机器学习经典算法(scikit-learn)
安装库:pip install scikit-learn numpy
- 线性回归 (Linear Regression)
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_boston
# 加载数据
boston = load_boston()
X = boston.data
y = boston.target
# 划分数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 创建线性回归模型
model = LinearRegression()
model.fit(X_train, y_train)
# 预测
predictions = model.predict(X_test)
print(predictions)
- Logistic 回归 (Logistic Regression)
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
# 加载数据
iris = load_iris()
X = iris.data
y = iris.target
# 选择二分类问题
X_bin = X[y != 2]
y_bin = y[y != 2]
# 划分数据集
X_train, X_test, y_train, y_test = train_test_split(X_bin, y_bin, test_size=0.2, random_state=42)
# 创建Logistic回归模型
model = LogisticRegression()
model.fit(X_train, y_train)
# 预测
predictions = model.predict(X_test)
print(predictions)
- 线性判别分析 (Linear Discriminant Analysis, LDA)
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
# 使用上面的鸢尾花数据
lda = LinearDiscriminantAnalysis()
lda.fit(X_train, y_train)
# 预测
predictions = lda.predict(X_test)
print(predictions)
- 决策树 (Decision Tree)
from sklearn.tree import DecisionTreeClassifier
# 创建决策树模型
tree_model = DecisionTreeClassifier(random_state=42)
tree_model.fit(X_train, y_train)
# 预测
predictions = tree_model.predict(X_test)
print(predictions)
- 朴素贝叶斯 (Naive Bayes)
from sklearn.naive_bayes import GaussianNB
# 创建朴素贝叶斯模型
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
# 预测
predictions = nb_model.predict(X_test)
print(predictions)
- K 最近邻算法 (K-Nearest Neighbors)
from sklearn.neighbors import KNeighborsClassifier
# 创建KNN模型
knn_model = KNeighborsClassifier(n_neighbors=3)
knn_model.fit(X_train, y_train)
# 预测
predictions = knn_model.predict(X_test)
print(predictions)
-
学习向量量化 (Learning Vector Quantization)
学习向量量化可以使用 KNN 的变种,通常在实际使用中与 KNN 一起。
-
支持向量机 (Support Vector Machine)
from sklearn.svm import SVC
# 创建支持向量机模型
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)
# 预测
predictions = svm_model.predict(X_test)
print(predictions)
- 袋装法和随机森林 (Bagging and Random Forest)
from sklearn.ensemble import RandomForestClassifier
# 创建随机森林模型
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
# 预测
predictions = rf_model.predict(X_test)
print(predictions)