当前位置：首页 > article >正文

MATLAB代码：机器学习-分类器

article 2025/3/3 6:48:21

本文包含三种机器学习分类器的MATLAB实现方式代码块：支持向量机、决策树、逻辑回归。

SVM/支持向量机(Support Vector Machine)

原理

MATLAB实现

实例代码块

采用搜索确定参数

Decision Tree / 决策树

原理

MATLAB实现

实例代码块

Logistic Regression / 逻辑回归

原理

MATLAB实现

实例代码块

SVM/支持向量机(Support Vector Machine)

原理

详细原理可参考：机器学习（九）：支持向量机SVM(超详细理论基础）_支持向量机的分类模型-CSDN博客

MATLAB实现

实例代码块

1- 导入数据

%% 训练集比例
train_ratio = 0.8;  %可自行设置比例

%% 导入数据
load('features.mat'); 

n = randperm(size(features,1));
train_num = floor(train_ratio * size(features,1));

train_features = features(n(1:train_num),:);
train_labels = labels(n(1:train_num),:);

test_features = features(n(train_num + 1:end),:);
test_labels = labels(n(train_num + 1:end),:);

2- 数据处理与设置

%% 参数设置

c = 20; %根据需要设置合适的值
g = 1.5; %根据需要设置合适的值
acc = 0; %准确率初始化

%% 数据处理

[Train_features,PS] = mapminmax(train_features');
Train_features = Train_features';
Test_features = mapminmax('apply',test_features',PS);
Test_features = Test_features';

3- SVM核心代码

template = templateSVM('KernelFunction','rbf','BoxConstraint',c,'KernelScale',g);
h_model = waitbar(0, '正在训练多类别SVM模型...'); %进度条显示
model = fitcecoc(Train_features,train_labels,'Learners',template,'Coding','onevsone','Verbose',0);
close(h_model);

save('my_trained_model.mat','model');  %保存模型

4- 后处理

%% 后处理

[predict_label_train,~] = predict(model,Train_features);
train_accuracy = sum(strcmp(predict_label_train,train_labels)) / length(train_labels); % 计算训练集准确度

[predict_label_test,~] = predict(model,Test_features);
test_accuracy = sum(strcmp(predict_label_test,test_labels)) / length(test_labels); % 计算测试集准确度

confusion_matrix_train = confusionmat(train_labels,predict_label_train);
figure;
confusionchart(confusion_matrix_train);
title('训练集混淆矩阵');
saveas(gcf, 'confusion_matrix_train.png');

confusion_matrix_test = confusionmat(test_labels,predict_label_test);
figure;
confusionchart(confusion_matrix_test);
title('测试集混淆矩阵');
saveas(gcf, 'confusion_matrix_test.png');

disp(['训练集准确度: ',num2str(train_accuracy)]);
disp(['测试集准确度: ',num2str(test_accuracy)]);

采用搜索确定参数

可以采用随机搜索＋网格搜索的方式确定较优的参数值。

%% 搜索部分
%根据需要自行修改初始值和搜索范围
num_random_search = 30; 
bestc_random = 15;
bestg_random = 2;
bestacc_random = 0;
bestc = 15;
bestg = 0.7;
bestacc = 0;

h_random = waitbar(0, '正在进行随机搜索寻找较优参数...');
for k = 1:num_random_search
    c_random = 2 + (6 - (2)) * rand(); 
    g_random = -3 + (2 - (-3)) * rand(); 
    template_random = templateSVM('KernelFunction','rbf','BoxConstraint',2^c_random,'KernelScale',2^g_random);
    classifier_random = fitcecoc(Train_features,train_labels,'Learners',template_random,'Coding','onevsone','Verbose',0,'CrossVal','on','KFold',v);
    cg_random = kfoldLoss(classifier_random);
    if (1 - cg_random) > bestacc_random
        bestacc_random = 1 - cg_random;
        bestc_random = 2^c_random;
        bestg_random = 2^g_random;
    end
    
    waitbar(k / num_random_search, h_random, sprintf('随机搜索已完成 %.2f%%', k / num_random_search * 100));
end
close(h_random); 

fprintf('Best c: %f\n', bestc_random);
fprintf('Best g: %f\n', bestg_random);

c_center = log2(bestc_random);
g_center = log2(bestg_random);
c_range = 5; 
g_range = 0.3;
c_vec = linspace(c_center - c_range,c_center + c_range,10);
g_vec = linspace(c_center - g_range,c_center + g_range,10);
[c,g] = meshgrid(c_vec,g_vec);
[m,n] = size(c);
cg = zeros(m,n);
eps = 10^(-4);
v = 5; 


h_grid = waitbar(0, '正在通过网格搜索寻找最佳c/g参数...');
total_iterations_grid = m * n; 
current_iteration_grid = 0;
for i = 1:m
    for j = 1:n
        template = templateSVM('KernelFunction','rbf','BoxConstraint',2^c(i,j),'KernelScale',2^g(i,j));
        classifier = fitcecoc(Train_features,train_labels,'Learners',template,'Coding','onevsone','Verbose',0,'CrossVal','on','KFold',v);
        cg(i,j) = kfoldLoss(classifier); 
        if (1 - cg(i,j)) > bestacc 
            bestacc = 1 - cg(i,j);
            bestc = 2^c(i,j);
            bestg = 2^g(i,j);
        end
        if abs((1 - cg(i,j)) - bestacc )<=eps && bestc > 2^c(i,j) 
            bestacc = 1 - cg(i,j);
            bestc = 2^c(i,j);
            bestg = 2^g(i,j);
        end
        
        current_iteration_grid = current_iteration_grid + 1;
        waitbar(current_iteration_grid / total_iterations_grid, h_grid, sprintf('网格搜索已完成 %.2f%%', current_iteration_grid / total_iterations_grid * 100));
    end
end
close(h_grid);

Decision Tree / 决策树

原理

详细原理可参考：

决策树(Decision Tree)-CSDN博客

MATLAB实现

实例代码块

1-数据导入与归一化

%% 设置训练集比例
train_ratio = 0.8; 

%% 设置交叉验证折数
v = 5; 

%% 导入数据
load('features.mat'); 

n = randperm(size(features,1));
train_num = floor(train_ratio * size(features,1)); 
% 训练集
train_features = features(n(1:train_num),:);
train_labels = labels(n(1:train_num),:);
% 测试集
test_features = features(n(train_num + 1:end),:);
test_labels = labels(n(train_num + 1:end),:);

%% 数据归一化
[Train_features,PS] = mapminmax(train_features');
Train_features = Train_features';
Test_features = mapminmax('apply',test_features',PS);
Test_features = Test_features';

2-参数网格搜索

%% 网格搜索：调整决策树参数（深度、最小叶子节点样本数、分裂标准等）
best_depth = 180; 
best_minLeaf = 3; 
best_splitCriterion = 'gdi'; 
best_accuracy = 0;

depth_vec = 50:10:150; 
minLeaf_vec = 1:10; 
splitCriteria = {'gdi', 'deviance'}; 

h_search = waitbar(0, '正在通过搜索寻找最佳决策树参数...');

for d = depth_vec
    for m = minLeaf_vec
        for sc = splitCriteria
            template = templateTree('MaxNumSplits', d, 'MinLeafSize', m, 'SplitCriterion', sc{1});
            
            classifier = fitcecoc(Train_features, train_labels, 'Learners', template, 'Coding', 'onevsone', 'Verbose', 0, 'CrossVal', 'on', 'KFold', v);
            
            cg = kfoldLoss(classifier); 
            acc = 1 - cg; 
            
            if acc > best_accuracy
                best_accuracy = acc;
                best_depth = d;
                best_minLeaf = m;
                best_splitCriterion = sc{1};
            end
        end
    end
end
close(h_search); 

fprintf('最佳深度: %d\n', best_depth);
fprintf('最佳最小叶子节点样本数: %d\n', best_minLeaf);
fprintf('最佳分裂标准: %s\n', best_splitCriterion);

3-核心代码

%% 模型
template = templateTree('MaxNumSplits', best_depth, 'MinLeafSize', best_minLeaf, 'SplitCriterion', best_splitCriterion);
model = fitcecoc(Train_features, train_labels, 'Learners', template, 'Coding', 'onevsone', 'Verbose', 0);

save('my_trained_model_tree.mat', 'model');

4-后处理

%% 
[predict_label_train, ~] = predict(model, Train_features);
train_accuracy = sum(strcmp(predict_label_train, train_labels)) / length(train_labels); % 计算训练集准确度

[predict_label_test, ~] = predict(model, Test_features);
test_accuracy = sum(strcmp(predict_label_test, test_labels)) / length(test_labels); % 计算测试集准确度

confusion_matrix_train = confusionmat(train_labels, predict_label_train);
figure;
confusionchart(confusion_matrix_train);
title('训练集混淆矩阵');
saveas(gcf, 'confusion_matrix_tree2train.png');

confusion_matrix_test = confusionmat(test_labels, predict_label_test);
figure;
confusionchart(confusion_matrix_test);
title('测试集混淆矩阵');
saveas(gcf, 'confusion_matrix_tree2test.png');

disp(['训练集准确度: ', num2str(train_accuracy)]);
disp(['测试集准确度: ', num2str(test_accuracy)]);

Logistic Regression / 逻辑回归

原理

详细原理可参考：

逻辑回归（Logistic Regression）-CSDN博客

MATLAB实现

实例代码块

1-数据导入与归一化

%% 训练集比例
train_ratio = 0.85; 

%% 交叉验证折数
v = 5; 

%% 导入数据
load('features.mat'); 

n = randperm(size(features, 1));
train_num = floor(train_ratio * size(features, 1)); 

train_features = features(n(1:train_num), :);
train_labels = labels(n(1:train_num), :);

test_features = features(n(train_num + 1:end), :);
test_labels = labels(n(train_num + 1:end), :);

%% 数据归一化
[Train_features, PS] = mapminmax(train_features');
Train_features = Train_features';
Test_features = mapminmax('apply', test_features', PS);
Test_features = Test_features';

2-参数网格搜索

%% 网格搜索

%lambda_range = logspace(-5, 5, 50); 
best_lambda = 1e-7;
best_accuracy = 0;

h_grid = waitbar(0, '正在进行网格搜索调整 Lambda 参数...');

for i = 1:length(lambda_range)
    lambda = lambda_range(i);
    
    template = templateLinear('Learner', 'logistic', 'Regularization', 'ridge', 'Lambda', lambda);
    
    cv = cvpartition(train_labels, 'KFold', v);
    
    cv_accuracy = zeros(cv.NumTestSets, 1);
    for j = 1:cv.NumTestSets
        train_idx = cv.training(j);
        test_idx = cv.test(j);
        
        model = fitcecoc(Train_features(train_idx, :), train_labels(train_idx), 'Learners', template, 'Coding', 'onevsone', 'Verbose', 0);
        
        predict_label = predict(model, Train_features(test_idx, :));
        
        cv_accuracy(j) = sum(strcmp(predict_label, train_labels(test_idx))) / length(test_idx);
    end
    
    mean_accuracy = mean(cv_accuracy);
    
    if mean_accuracy > best_accuracy
        best_accuracy = mean_accuracy;
        best_lambda = lambda;
    end
    
    waitbar(i / length(lambda_range), h_grid, sprintf('Lambda 参数调整中... %.2f%%', i / length(lambda_range) * 100));
end

close(h_grid);
fprintf('最佳 Lambda: %f\n', best_lambda);

3-核心代码

%% 模型
template = templateLinear('Learner', 'logistic', 'Regularization', 'ridge', 'Lambda', best_lambda);
h_model = waitbar(0, '正在训练 Logistic 回归模型...');
model = fitcecoc(Train_features, train_labels, 'Learners', template, 'Coding', 'onevsone', 'Verbose', 0);
close(h_model); 

save('my_trained_model_logistic.mat', 'model');

4-后处理

%% 
[predict_label_train, ~] = predict(model, Train_features);
train_accuracy = sum(strcmp(predict_label_train, train_labels)) / length(train_labels); % 计算训练集准确度

[predict_label_test, ~] = predict(model, Test_features);
test_accuracy = sum(strcmp(predict_label_test, test_labels)) / length(test_labels); % 计算测试集准确度

confusion_matrix_train = confusionmat(train_labels, predict_label_train);
figure;
confusionchart(confusion_matrix_train);
title('训练集混淆矩阵');
saveas(gcf, 'confusion_matrix_log2_train.png');

confusion_matrix_test = confusionmat(test_labels, predict_label_test);
figure;
confusionchart(confusion_matrix_test);
title('测试集混淆矩阵');
saveas(gcf, 'confusion_matrix_log2_test.png');

disp(['训练集准确度: ', num2str(train_accuracy)]);
disp(['测试集准确度: ', num2str(test_accuracy)]);

查看全文

http://www.kler.cn/a/568404.html