用机器学习和深度学习分别实现鸢尾花分类
主要是借助鸢尾花分类这个小数据集,熟悉机器学习和深度学习的基本流程和框架。
1.导入数据,查看数据结构
import numpy as np
from sklearn.datasets import load_iris
data_feature=load_iris().data #numpy.ndarray
data_label=load_iris().target
print(data_feature.shape)
out:
(150,4)
print(data_label.shape)
out:
(150,)
2.数据标准化及乱序
feature_min=data_feature.min(axis=0)
feature_max=data_feature.max(axis=0)
data_norm=(data_feature-feature_min)/(feature_max-feature_min)
np.random.seed(116)
data_len=np.arange(len(data_feature))
#按照索引乱序
np.random.shuffle(data_len)
data_feature_shuffel=data_feature[data_len]
data_label_shuffel=data_label[data_len]
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split
(data_feature_shuffel,data_label_shuffel,test_size=0.2)
3.逻辑斯蒂回归模型
from sklearn.linear_model import LogisticRegression
lr=LogisticRegression()
lr.fit(x_train,y_train)
y_predict=lr.predict(x_test)
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test,y_predict))
out:
0.97
4.xgboost模型
import xgboost as xgb
dtrain = xgb.DMatrix(x_train,label=y_train)
dtest=xgb.DMatrix(x_test)
num_round = 100
watchlist = [(dtrain, 'train'), (dtest, 'eval')]
param = {'verbosity': 2
, 'objective': 'multi:softmax' # 目标
, "num_class": 3 # 指定多分类别数目
, 'eval_metric': 'mlogloss'
, "eta":0.01
, "gamma":0
, "max_depth":6
, "subsample":1 # 横向抽样
, "colsample_bytree":1 # 纵向抽样
, "colsample_bylevel":1
, "colsample_bynode":1
, "lambda":1 # L2
, "alpha":0 # L1
}
xg = xgb.train(param, dtrain, num_round)
y_pred1=xg.predict(dtest)
print(accuracy_score(y_test,y_pred1))
out:
0.967
5.手动实现tensorflow框架
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras import Model
x_train = tf.cast(x_train, tf.float32)
x_test = tf.cast(x_test, tf.float32)
db_train=tf.data.Dataset.from_tensor_slices((x_train,y_train)).batch(32)
db_test=tf.data.Dataset.from_tensor_slices((x_test,y_test)).batch(32)
w1=tf.Variable(tf.random.truncated_normal([4,3],stddev=0.1,seed=1))
b1=tf.Variable(tf.random.truncated_normal([3],stddev=0.1,seed=1))
lr = 0.1 # 学习率为0.1
epoch = 500 # 循环500轮
loss_all = 0 # 每轮分4个step,loss_all记录四个step生成的4个loss的和
for epoch in range(epoch):
for step,(x_train,y_train) in enumerate(db_train):
with tf.GradientTape() as tape:
y=tf.matmul(x_train,w1)+b1
y=tf.nn.softmax(y)
y_=tf.one_hot(y_train,depth=3)
loss=tf.reduce_mean(tf.square(y_-y))
loss_all += loss.numpy()
grads=tape.gradient(loss,[w1,b1])
w1.assign_sub(lr*grads[0])
b1.assign_sub(lr*grads[1])
# # 每个epoch,打印loss信息
#print("Epoch {}, loss: {}".format(epoch, loss_all/4))
loss_all = 0 # loss_all归零,为记录下一个epoch的loss做准备
# total_correct为预测对的样本个数, total_number为测试的总样本数,将这两个变量都初始化为0
total_correct, total_number = 0, 0
for x_test, y_test in db_test:
# 使用更新后的参数进行预测
y = tf.matmul(x_test, w1) + b1
y = tf.nn.softmax(y)
pred = tf.argmax(y, axis=1) # 返回y中最大值的索引,即预测的分类
# 将pred转换为y_test的数据类型
pred = tf.cast(pred, dtype=y_test.dtype)
# 若分类正确,则correct=1,否则为0,将bool型的结果转换为int型
correct = tf.cast(tf.equal(pred, y_test), dtype=tf.int32)
# 将每个batch的correct数加起来
correct = tf.reduce_sum(correct)
# 将所有batch中的correct数加起来
total_correct += int(correct)
# total_number为测试的总样本数,也就是x_test的行数,shape[0]返回变量的行数
total_number += x_test.shape[0]
# 总的准确率等于total_correct/total_number
acc = total_correct / total_number
#print("Test_acc:", acc)
6.调包实现Tensorflow框架
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras import Model
class IrisModel(Model):
def __init__(self):
super(IrisModel, self).__init__()
self.d1 = Dense(10, activation='relu')
self.d2 = Dense(3, activation='softmax')
def call(self, x):
x = self.d1(x)
y = self.d2(x)
return y
model = IrisModel()
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
metrics=['sparse_categorical_accuracy'])
model.fit(x_train, y_train, batch_size=32, epochs=500, validation_data=(x_test, y_test), validation_freq=1)
model.summary()
7.pytorch框架
import torch
input = torch.FloatTensor(x_train)
label = torch.LongTensor(y_train)
#定义bp神经网络 定义网络架构
class Net(torch.nn.Module):
def __init__(self, n_feature, n_hidden, n_output):
super(Net, self).__init__()
self.hidden = torch.nn.Linear(n_feature, n_hidden)
self.output = torch.nn.Linear(n_hidden, n_output)
def forward(self, X):
X = torch.nn.functional.relu(self.hidden(X))
X = self.output(X)
return X
# 网络结构情况
net = Net(n_feature=4, n_hidden=10, n_output=3)
opt = torch.optim.SGD(net.parameters(), lr=0.1)
loss_func = torch.nn.CrossEntropyLoss()
losses=[]
for epoch in range(400):
out = net(input)
loss = loss_func(out, label)
losses.append(loss.item())
opt.zero_grad()
loss.backward()
opt.step()
input_test = torch.FloatTensor(x_test)
label_test = torch.LongTensor(y_test)
print(losses)
out = net(input_test)
predicted = torch.max(out, 1)[1] # 返回最大值及其索引 所以[1]就是索引值 返回的是tensor
print(type(predicted))
pred_y = predicted.data.numpy()
target_y = label_test.data.numpy()
accuracy = float((pred_y == target_y).astype(int).sum()) / float(target_y.size)
print("莺尾花预测准确率", accuracy)
out:
莺尾花预测准确率 0.9666666666666667