当前位置: 首页 > article >正文

用机器学习和深度学习分别实现鸢尾花分类

主要是借助鸢尾花分类这个小数据集,熟悉机器学习和深度学习的基本流程和框架。

1.导入数据,查看数据结构

import numpy as np
from sklearn.datasets import load_iris
data_feature=load_iris().data #numpy.ndarray
data_label=load_iris().target
print(data_feature.shape)  
out:
(150,4)
print(data_label.shape)  
out:
(150,)

2.数据标准化及乱序

feature_min=data_feature.min(axis=0)
feature_max=data_feature.max(axis=0)
data_norm=(data_feature-feature_min)/(feature_max-feature_min)
np.random.seed(116)
data_len=np.arange(len(data_feature))
#按照索引乱序
np.random.shuffle(data_len)

data_feature_shuffel=data_feature[data_len]
data_label_shuffel=data_label[data_len]

from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split
(data_feature_shuffel,data_label_shuffel,test_size=0.2)

3.逻辑斯蒂回归模型

from sklearn.linear_model import LogisticRegression
lr=LogisticRegression()
lr.fit(x_train,y_train)
y_predict=lr.predict(x_test)

from sklearn.metrics import accuracy_score

print(accuracy_score(y_test,y_predict))

out:
0.97

4.xgboost模型

import xgboost as xgb
dtrain = xgb.DMatrix(x_train,label=y_train)
dtest=xgb.DMatrix(x_test)
num_round = 100
watchlist = [(dtrain, 'train'), (dtest, 'eval')]
param = {'verbosity': 2
    , 'objective': 'multi:softmax'  # 目标
    , "num_class": 3  # 指定多分类别数目
    , 'eval_metric': 'mlogloss'

                    , "eta":0.01

                    , "gamma":0
                    , "max_depth":6

                    , "subsample":1  # 横向抽样
                    , "colsample_bytree":1  # 纵向抽样
                    , "colsample_bylevel":1
                    , "colsample_bynode":1

                    , "lambda":1  # L2
                    , "alpha":0  # L1
          }



xg = xgb.train(param, dtrain, num_round)
y_pred1=xg.predict(dtest)

print(accuracy_score(y_test,y_pred1))

out:
0.967

5.手动实现tensorflow框架

import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras import Model
x_train = tf.cast(x_train, tf.float32)
x_test = tf.cast(x_test, tf.float32)
db_train=tf.data.Dataset.from_tensor_slices((x_train,y_train)).batch(32)
db_test=tf.data.Dataset.from_tensor_slices((x_test,y_test)).batch(32)
w1=tf.Variable(tf.random.truncated_normal([4,3],stddev=0.1,seed=1))
b1=tf.Variable(tf.random.truncated_normal([3],stddev=0.1,seed=1))
lr = 0.1  # 学习率为0.1
epoch = 500  # 循环500轮
loss_all = 0  # 每轮分4个step,loss_all记录四个step生成的4个loss的和
for epoch in range(epoch):
    for step,(x_train,y_train) in enumerate(db_train):
        with tf.GradientTape() as tape:
            y=tf.matmul(x_train,w1)+b1
            y=tf.nn.softmax(y)
            y_=tf.one_hot(y_train,depth=3)
            loss=tf.reduce_mean(tf.square(y_-y))
            loss_all += loss.numpy()
        grads=tape.gradient(loss,[w1,b1])
        w1.assign_sub(lr*grads[0])
        b1.assign_sub(lr*grads[1])
        # # 每个epoch,打印loss信息
        #print("Epoch {}, loss: {}".format(epoch, loss_all/4))

        loss_all = 0  # loss_all归零,为记录下一个epoch的loss做准备

        # total_correct为预测对的样本个数, total_number为测试的总样本数,将这两个变量都初始化为0
        total_correct, total_number = 0, 0
        for x_test, y_test in db_test:
            # 使用更新后的参数进行预测
            y = tf.matmul(x_test, w1) + b1
            y = tf.nn.softmax(y)
            pred = tf.argmax(y, axis=1)  # 返回y中最大值的索引,即预测的分类

            # 将pred转换为y_test的数据类型
            pred = tf.cast(pred, dtype=y_test.dtype)
            # 若分类正确,则correct=1,否则为0,将bool型的结果转换为int型
            correct = tf.cast(tf.equal(pred, y_test), dtype=tf.int32)
            # 将每个batch的correct数加起来
            correct = tf.reduce_sum(correct)
            # 将所有batch中的correct数加起来
            total_correct += int(correct)
            # total_number为测试的总样本数,也就是x_test的行数,shape[0]返回变量的行数
            total_number += x_test.shape[0]
        # 总的准确率等于total_correct/total_number
        acc = total_correct / total_number

        #print("Test_acc:", acc)
       

6.调包实现Tensorflow框架

import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras import Model

class IrisModel(Model):
    def __init__(self):
        super(IrisModel, self).__init__()
        self.d1 = Dense(10, activation='relu')
        self.d2 = Dense(3, activation='softmax')

    def call(self, x):
        x = self.d1(x)
        y = self.d2(x)
        return y


model = IrisModel()

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
              metrics=['sparse_categorical_accuracy'])

model.fit(x_train, y_train, batch_size=32, epochs=500, validation_data=(x_test, y_test), validation_freq=1)
model.summary()

7.pytorch框架

import torch


input = torch.FloatTensor(x_train)
label = torch.LongTensor(y_train)

#定义bp神经网络  定义网络架构
class Net(torch.nn.Module):
    def __init__(self, n_feature, n_hidden, n_output):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(n_feature, n_hidden)
        self.output = torch.nn.Linear(n_hidden, n_output)

    def forward(self, X):
        X = torch.nn.functional.relu(self.hidden(X))
        X = self.output(X)
        return X

# 网络结构情况
net = Net(n_feature=4, n_hidden=10, n_output=3)
opt = torch.optim.SGD(net.parameters(), lr=0.1)
loss_func = torch.nn.CrossEntropyLoss()
losses=[]
for epoch in range(400):
    out = net(input)
    loss = loss_func(out, label)
    losses.append(loss.item())
    opt.zero_grad()
    loss.backward()
    opt.step()
input_test = torch.FloatTensor(x_test)
label_test = torch.LongTensor(y_test)
print(losses)
out = net(input_test)
predicted = torch.max(out, 1)[1]  # 返回最大值及其索引  所以[1]就是索引值    返回的是tensor
print(type(predicted))
pred_y = predicted.data.numpy()
target_y = label_test.data.numpy()
accuracy = float((pred_y == target_y).astype(int).sum()) / float(target_y.size)

print("莺尾花预测准确率", accuracy)

out:
莺尾花预测准确率 0.9666666666666667


http://www.kler.cn/a/442159.html

相关文章:

  • 设计模式-单例模式
  • 介绍下常用的前端框架及时优缺点
  • 深度学习 Pytorch 张量的线性代数运算
  • MySQL面试题2025 每日20道
  • 计算机网络-物理层
  • 数据可视化:让数据讲故事的艺术
  • H3C MPLS跨域optionA
  • 并发测试Java(spring boot) VS C#(ASP.NET CORE)
  • SpringAI人工智能开发框架002---SpringAI项目搭建_依赖导入_maven仓库引入_接口中转
  • 力扣-图论-16【算法学习day.66】
  • 调用钉钉接口发送消息
  • Dcoker Redis哨兵模式集群介绍与搭建 故障转移 分布式 Java客户端连接
  • arm Rk3588 更新固件
  • nodepad之正则表达式删除无关键字符串的行
  • 详解MySQL在Windows上的安装
  • 17、ConvMixer模型原理及其PyTorch逐行实现
  • Springboot下出现java.awt.HeadlessException的原因及解决方案
  • HW机试题库(个人总结)
  • Metaploit-永恒之蓝漏洞利用
  • [Shader] 【图形渲染】【Unity Shader】Shader数学基础1-笛卡儿坐标系的应用
  • 短视频矩阵贴牌:打造品牌新势力的策略与实践
  • IOS通过WDA自动化中遇到的问题
  • 数据结构(Java版)第六期:LinkedList与链表(一)
  • 解决新安装CentOS 7系统mirrorlist.centos.org can‘t resolve问题
  • 前端的知识(部分)
  • 太阳能发电模拟系统设计与实现