机器学习day8
自定义数据集 ,使用朴素贝叶斯对其进行分类
代码
import numpy as np
import matplotlib.pyplot as plt
class1_points = np.array([[2.1, 2.2], [2.4, 2.5], [2.2, 2.0], [2.0, 2.1], [2.3, 2.3], [2.6, 2.4], [2.5, 2.1]])
class2_points = np.array([[4.0, 3.5], [4.2, 3.9], [4.1, 3.8], [3.7, 3.4], [4.4, 3.6], [4.5, 3.7], [4.3, 3.9]])
X = np.concatenate((class1_points, class2_points), axis=0)
Y = np.concatenate((np.zeros(len(class1_points)), np.ones(len(class2_points))), axis=0)
print(Y)
prior_prob = [np.sum(Y == 0) / len(Y), np.sum(Y == 1) / len(Y)]
class_μ = [np.mean(X[Y == 0], axis=0), np.mean(X[Y == 1], axis=0)]
class_cov = [np.cov(X[Y == 0], rowvar=False), np.cov(X[Y == 1], rowvar=False)]
def pdf(x, mean, cov):
n = len(mean)
coff = 1 / (2 * np.pi) ** (n / 2) * np.sqrt(np.linalg.det(cov))
exponent = np.exp(-(1 / 2) * np.dot(np.dot((x - mean).T, np.linalg.inv(cov)), (x - mean)))
return coff * exponent
xx, yy = np.meshgrid(np.arange(0, 5, 0.05), np.arange(0, 5, 0.05))
grid_points = np.c_[xx.ravel(), yy.ravel()]
grid_label = []
for point in grid_points:
poster_prob = []
for i in range(2):
likelihood = pdf(point, class_μ[i], class_cov[i])
poster_prob.append(prior_prob[i] * likelihood)
pre_class = np.argmax(poster_prob)
grid_label.append(pre_class)
plt.scatter(class1_points[:, 0], class1_points[:, 1], c="blue", label="class 1")
plt.scatter(class2_points[:, 0], class2_points[:, 1], c="red", label="class 2")
plt.legend()
grid_label = np.array(grid_label)
pre_grid_label = grid_label.reshape(xx.shape)
contour = plt.contour(xx, yy, pre_grid_label, level=0.5, color='green')
plt.show()
效果