从零手工撸写个人工神经元网络(解决异或问题)Python+c++结构化
一、
Python 解决异或问题的人工神经元网络
“原生版本”方便理解原理,但较为繁琐版本:
import math
import random
# 设置随机种子以便复现,可自行注释掉或者固定seed
random.seed(0)
# ============ 1. 数据定义 =============
# XOR 的输入与期望输出
X = [
[0.0, 0.0],
[0.0, 1.0],
[1.0, 0.0],
[1.0, 1.0]
]
y = [
[0.0],
[1.0],
[1.0],
[0.0]
]
# ============ 2. 超参数定义 =============
input_size = 2 # 输入层神经元数
hidden_size = 2 # 隐藏层神经元数
output_size = 1 # 输出层神经元数
learning_rate = 0.1
epochs = 40000 #10000
# ============ 3. 参数初始化 =============
# 隐藏层参数
W1 = [[(random.random() - 0.5) for _ in range(input_size)] for _ in range(hidden_size)]
b1 = [(random.random() - 0.5) for _ in range(hidden_size)]
# 输出层参数
W2 = [[(random.random() - 0.5) for _ in range(hidden_size)] for _ in range(output_size)]
b2 = [(random.random() - 0.5) for _ in range(output_size)]
def sigmoid(z):
return 1.0 / (1.0 + math.exp(-z))
def sigmoid_deriv(z):
# sigmoid'(z) = sigmoid(z) * (1 - sigmoid(z))
return z * (1.0 - z)
# ============ 4. 训练 =============
for epoch in range(epochs):
# 这里使用 batch 的方式直接对4个训练样本做一次更新
# 可以改成单样本更新或者mini-batch,但演示以batch为例
# 累积梯度初始化
dW1 = [[0.0]*input_size for _ in range(hidden_size)]
db1 = [0.0 for _ in range(hidden_size)]
dW2 = [[0.0]*hidden_size for _ in range(output_size)]
db2 = [0.0 for _ in range(output_size)]
total_loss = 0.0
# ========== 前向传播 & 反向传播 ==========
for i in range(len(X)):
# 取第 i 个样本
x_i = X[i]
y_i = y[i]
# ----- 前向传播 -----
# 1) 隐藏层
hidden_net = [0.0 for _ in range(hidden_size)]
for h in range(hidden_size):
sum_val = 0.0
for inp in range(input_size):
sum_val += W1[h][inp] * x_i[inp]
sum_val += b1[h]
hidden_net[h] = sigmoid(sum_val) # 隐藏层输出
# 2) 输出层
output_net = [0.0 for _ in range(output_size)]
for out in range(output_size):
sum_val = 0.0
for h in range(hidden_size):
sum_val += W2[out][h] * hidden_net[h]
sum_val += b2[out]
output_net[out] = sigmoid(sum_val) # 输出层输出
# 均方误差 MSE 的一半(或者简单的二范数), 这里为了简化就直接用 (y_pred - y)^2
# 这里只输出单值, 但也可以对多输出情况做更一般化处理
loss = 0.5 * (y_i[0] - output_net[0])**2
total_loss += loss
# ----- 反向传播 -----
# 输出层误差梯度
# output_delta = (y_pred - y) * sigmoid'(output)
output_delta = (output_net[0] - y_i[0]) * sigmoid_deriv(output_net[0])
# 隐藏层误差梯度 hidden_delta 每个隐藏神经元需要根据 output_delta 反传
hidden_deltas = [0.0 for _ in range(hidden_size)]
for h in range(hidden_size):
hidden_deltas[h] = output_delta * W2[0][h] * sigmoid_deriv(hidden_net[h])
# 累加输出层梯度
for out in range(output_size):
for h in range(hidden_size):
dW2[out][h] += output_delta * hidden_net[h]
db2[out] += output_delta
# 累加隐藏层梯度
for h in range(hidden_size):
for inp in range(input_size):
dW1[h][inp] += hidden_deltas[h] * x_i[inp]
db1[h] += hidden_deltas[h]
# ========== 参数更新 ==========
m = float(len(X)) # 样本数
for h in range(hidden_size):
for inp in range(input_size):
W1[h][inp] -= learning_rate * (dW1[h][inp] / m)
b1[h] -= learning_rate * (db1[h] / m)
for out in range(output_size):
for h in range(hidden_size):
W2[out][h] -= learning_rate * (dW2[out][h] / m)
b2[out] -= learning_rate * (db2[out] / m)
# 打印一些训练过程信息
if (epoch+1) % 2000 == 0:
print(f"Epoch {epoch+1}/{epochs}, Loss = {total_loss}")
# ============ 5. 测试模型 ============
print("\n训练结束,测试网络对 XOR 的预测结果:")
for i in range(len(X)):
x_i = X[i]
# 前向传播
hidden_net = [0.0 for _ in range(hidden_size)]
for h in range(hidden_size):
sum_val = 0.0
for inp in range(input_size):
sum_val += W1[h][inp] * x_i[inp]
sum_val += b1[h]
hidden_net[h] = sigmoid(sum_val)
output_net = [0.0 for _ in range(output_size)]
for out in range(output_size):
sum_val = 0.0
for h in range(hidden_size):
sum_val += W2[out][h] * hidden_net[h]
sum_val += b2[out]
output_net[out] = sigmoid(sum_val)
print(f"输入: {x_i}, 预测输出: {output_net[0]:.4f}")
二、结构化以后的Python版本,代码规整化了,但需要些 “点乘”等矩阵运算的知识:
#(结构化以后的)Python版从零手写人工神经元网络解决XOR异或问题
import math
import random
# ========== 激活函数及其导数 ==========
def sigmoid(x):
return 1.0 / (1.0 + math.exp(-x))
def sigmoid_derivative(x):
# x 是 sigmoid(x) 后的输出值
return x * (1.0 - x)
# ========== 初始化权重辅助函数 ==========
def init_weight(rows, cols):
# 返回 rows x cols 的随机矩阵(范围-1到1)
return [[random.uniform(-1.0, 1.0) for _ in range(cols)] for _ in range(rows)]
# ========== 向量点乘和矩阵运算辅助函数 ==========
def dot_product(vec1, vec2):
# 向量点乘
return sum(a*b for a,b in zip(vec1, vec2))
def matrix_vector_mul(matrix, vector):
# 矩阵和向量乘法
result = []
for row in matrix:
result.append(dot_product(row, vector))
return result
# ========== 训练入口 ==========
def train_xor(epochs=10000, learning_rate=0.1):
# 训练数据(4个样本)
X = [
[0.0, 0.0], # -> 0
[0.0, 1.0], # -> 1
[1.0, 0.0], # -> 1
[1.0, 1.0] # -> 0
]
Y = [0.0, 1.0, 1.0, 0.0]
# 网络结构: 2输入 -> 2隐藏神经元 -> 1输出
input_size = 2
hidden_size = 2
output_size = 1
# 初始化权重和偏置
# W1: [hidden_size x input_size], b1: [hidden_size]
W1 = init_weight(hidden_size, input_size)
b1 = [random.uniform(-1.0, 1.0) for _ in range(hidden_size)]
# W2: [output_size x hidden_size], b2: [output_size]
W2 = init_weight(output_size, hidden_size)
b2 = [random.uniform(-1.0, 1.0) for _ in range(output_size)]
for ep in range(epochs):
# 累计误差可用于观察训练效果
total_error = 0.0
for x, y_true in zip(X, Y):
# ========== 前向传播 ==========
# 1) 输入层 -> 隐藏层
z1 = matrix_vector_mul(W1, x) # [hidden_size],尚未加偏置
# 加上偏置
for i in range(hidden_size):
z1[i] += b1[i]
# 激活
a1 = [sigmoid(z) for z in z1]
# 2) 隐藏层 -> 输出层
z2 = matrix_vector_mul(W2, a1) # [output_size],尚未加偏置
for i in range(output_size):
z2[i] += b2[i]
# 输出层激活
a2 = [sigmoid(z) for z in z2] # [output_size] = 1个值
y_pred = a2[0]
# ========== 计算误差 ==========
error = 0.5 * (y_true - y_pred)**2
total_error += error
# ========== 反向传播 ==========
# 输出层梯度
d_output = (y_pred - y_true) * sigmoid_derivative(y_pred) # scalar
# 隐藏层梯度: 对每个隐藏神经元,需要计算梯度
d_hidden = [0.0 for _ in range(hidden_size)]
for i in range(hidden_size):
# W2[0][i] 是第 i 个隐藏单元 -> 输出单元 的权重
d_hidden[i] = d_output * W2[0][i] * sigmoid_derivative(a1[i])
# ========== 更新 W2, b2 ==========
# W2 是 [1 x hidden_size],只1个输出神经元
for i in range(hidden_size):
W2[0][i] -= learning_rate * d_output * a1[i]
b2[0] -= learning_rate * d_output
# ========== 更新 W1, b1 ==========
for i in range(hidden_size):
for j in range(input_size):
W1[i][j] -= learning_rate * d_hidden[i] * x[j]
b1[i] -= learning_rate * d_hidden[i]
# 如果想查看训练过程中的误差,可在此打印
# if (ep+1) % 1000 == 0:
# print(f"Epoch {ep+1}, Error = {total_error}")
# 返回训练好的权重和偏置,方便后面测试或预测
return W1, b1, W2, b2
def predict(x, W1, b1, W2, b2):
# 前向传播
# 隐藏层
z1 = matrix_vector_mul(W1, x)
for i in range(len(z1)):
z1[i] += b1[i]
a1 = [sigmoid(z) for z in z1]
# 输出层
z2 = matrix_vector_mul(W2, a1)
for i in range(len(z2)):
z2[i] += b2[i]
a2 = [sigmoid(z) for z in z2]
return a2[0]
if __name__ == "__main__":
# 训练网络
W1, b1, W2, b2 = train_xor(epochs=10000, learning_rate=0.1)
# 测试
test_data = [
[0.0, 0.0],
[0.0, 1.0],
[1.0, 0.0],
[1.0, 1.0]
]
for t in test_data:
pred = predict(t, W1, b1, W2, b2)
print(f"Input: {t}, Predicted: {pred:.4f}")
三、
C++版本的(结构化比较好)人工神经元网络训练时候带损失函数损失值输出;
带模型存储、
模型再利用继续Training训练的 Xor异或问题的解决:
//20250311改进版本,增加了保存和加载模型的功能,并且模型的结构可以不一样
#include <iostream>
#include <vector>
#include <cmath>
#include <cstdlib>
#include <ctime>
#include <fstream>
#include <string>
class NeuralNetwork {
private:
// 神经网络结构
int inputSize;
int hiddenSize;
int outputSize;
// 学习率
double learningRate;
// 权重和偏置
std::vector<std::vector<double>> w1; // 输入层 -> 隐藏层 权重
std::vector<std::vector<double>> w2; // 隐藏层 -> 输出层 权重
std::vector<double> b1; // 隐藏层偏置
std::vector<double> b2; // 输出层偏置
// 工具函数:激活函数
double sigmoid(double x) {
return 1.0 / (1.0 + std::exp(-x));
}
// 工具函数:sigmoid 的导数
double sigmoidDerivative(double x) {
// 对应于 sigmoid(x) * (1 - sigmoid(x)),不过这里输入 x
// 通常是已经经过 sigmoid 的输出值
return x * (1.0 - x);
}
public:
// 构造函数:初始化网络结构和权重
NeuralNetwork(int inputSize, int hiddenSize, int outputSize, double learningRate)
: inputSize(inputSize), hiddenSize(hiddenSize), outputSize(outputSize),
learningRate(learningRate)
{
// 随机种子
std::srand(static_cast<unsigned>(std::time(nullptr)));
// 初始化 w1, w2
w1.resize(inputSize, std::vector<double>(hiddenSize));
w2.resize(hiddenSize, std::vector<double>(outputSize));
// 初始化 b1, b2
b1.resize(hiddenSize);
b2.resize(outputSize);
// 随机赋值(-0.5 ~ 0.5)
for (int i = 0; i < inputSize; ++i) {
for (int j = 0; j < hiddenSize; ++j) {
w1[i][j] = ((double)std::rand() / RAND_MAX) - 0.5;
}
}
for (int i = 0; i < hiddenSize; ++i) {
for (int j = 0; j < outputSize; ++j) {
w2[i][j] = ((double)std::rand() / RAND_MAX) - 0.5;
}
}
for (int j = 0; j < hiddenSize; ++j) {
b1[j] = ((double)std::rand() / RAND_MAX) - 0.5;
}
for (int j = 0; j < outputSize; ++j) {
b2[j] = ((double)std::rand() / RAND_MAX) - 0.5;
}
}
// 前向传播:给定输入,返回输出值
std::vector<double> forward(const std::vector<double>& input) {
// 计算隐藏层输出
std::vector<double> hidden(hiddenSize);
for (int j = 0; j < hiddenSize; ++j) {
double sum = b1[j];
for (int i = 0; i < inputSize; ++i) {
sum += input[i] * w1[i][j];
}
hidden[j] = sigmoid(sum);
}
// 计算输出层输出
std::vector<double> output(outputSize);
for (int k = 0; k < outputSize; ++k) {
double sum = b2[k];
for (int j = 0; j < hiddenSize; ++j) {
sum += hidden[j] * w2[j][k];
}
output[k] = sigmoid(sum);
}
return output;
}
// 训练:对单个样本进行一次前向、反向并更新权重
double trainSingleSample(const std::vector<double>& input,
const std::vector<double>& target)
{
// forward
std::vector<double> hidden(hiddenSize);
for (int j = 0; j < hiddenSize; ++j) {
double sum = b1[j];
for (int i = 0; i < inputSize; ++i) {
sum += input[i] * w1[i][j];
}
hidden[j] = sigmoid(sum);
}
std::vector<double> output(outputSize);
for (int k = 0; k < outputSize; ++k) {
double sum = b2[k];
for (int j = 0; j < hiddenSize; ++j) {
sum += hidden[j] * w2[j][k];
}
output[k] = sigmoid(sum);
}
// 计算输出层误差和误差梯度 delta
std::vector<double> outputError(outputSize);
std::vector<double> deltaOutput(outputSize);
for (int k = 0; k < outputSize; ++k) {
outputError[k] = target[k] - output[k];
deltaOutput[k] = outputError[k] * sigmoidDerivative(output[k]);
}
// 计算隐藏层误差和误差梯度
std::vector<double> hiddenError(hiddenSize, 0.0);
std::vector<double> deltaHidden(hiddenSize, 0.0);
for (int j = 0; j < hiddenSize; ++j) {
double error = 0.0;
for (int k = 0; k < outputSize; ++k) {
error += deltaOutput[k] * w2[j][k];
}
hiddenError[j] = error;
deltaHidden[j] = error * sigmoidDerivative(hidden[j]);
}
// 更新 w2, b2
for (int j = 0; j < hiddenSize; ++j) {
for (int k = 0; k < outputSize; ++k) {
w2[j][k] += learningRate * deltaOutput[k] * hidden[j];
}
}
for (int k = 0; k < outputSize; ++k) {
b2[k] += learningRate * deltaOutput[k];
}
// 更新 w1, b1
for (int i = 0; i < inputSize; ++i) {
for (int j = 0; j < hiddenSize; ++j) {
w1[i][j] += learningRate * deltaHidden[j] * input[i];
}
}
for (int j = 0; j < hiddenSize; ++j) {
b1[j] += learningRate * deltaHidden[j];
}
// 计算单个样本损失 (MSE的一半或 MSE)
// 这里采用 ( target - output )^2 的平均值,可以只返回 sum
double loss = 0.0;
for (int k = 0; k < outputSize; ++k) {
loss += 0.5 * (target[k] - output[k]) * (target[k] - output[k]);
}
return loss;
}
// 训练多个 epoch
void train(const std::vector<std::vector<double>>& inputs,
const std::vector<std::vector<double>>& targets,
int epochs)
{
for (int e = 0; e < epochs; ++e) {
double totalLoss = 0.0;
for (size_t i = 0; i < inputs.size(); ++i) {
totalLoss += trainSingleSample(inputs[i], targets[i]);
}
// 每个 epoch 后输出平均损失
// XOR 样本不多,所以直接用 sum 即可
double avgLoss = totalLoss / inputs.size();
if ((e + 1) % 100 == 0) {
std::cout << "Epoch " << e + 1 << " / " << epochs
<< ", Loss = " << avgLoss << std::endl;
}
}//220
}//110
// 保存模型到文件
bool saveModel(const std::string& filename) {
std::ofstream out(filename.c_str());
if (!out.is_open()) {
std::cerr << "Error: cannot open file to save model.\n";
return false;
}
// 先保存网络结构信息
out << inputSize << " " << hiddenSize << " " << outputSize << "\n";
// 保存 w1
for (int i = 0; i < inputSize; ++i) {
for (int j = 0; j < hiddenSize; ++j) {
out << w1[i][j] << " ";
}
out << "\n";
}
// 保存 b1
for (int j = 0; j < hiddenSize; ++j) {
out << b1[j] << " ";
}
out << "\n";
// 保存 w2
for (int j = 0; j < hiddenSize; ++j) {
for (int k = 0; k < outputSize; ++k) {
out << w2[j][k] << " ";
}
out << "\n";
}
// 保存 b2
for (int k = 0; k < outputSize; ++k) {
out << b2[k] << " ";
}
out << "\n";
out.close();
std::cout << "Model saved to " << filename << std::endl;
return true;
}
// 从文件加载模型
bool loadModel(const std::string& filename) {
std::ifstream in(filename.c_str());
if (!in.is_open()) {
std::cerr << "Error: cannot open file to load model.\n";
return false;
}
int inSize, hidSize, outSize;
in >> inSize >> hidSize >> outSize;
// 简单检查是否与当前网络结构一致(也可选择动态重建网络)
if (false && (inSize != inputSize || hidSize != hiddenSize || outSize != outputSize) ) {
std::cerr << "Error: model structure not match248.\n";
return false;
}
// 读取 w1
for (int i = 0; i < inputSize; ++i) {
for (int j = 0; j < hiddenSize; ++j) {
in >> w1[i][j];
}
}
// 读取 b1
for (int j = 0; j < hiddenSize; ++j) {
in >> b1[j];
}
// 读取 w2
for (int j = 0; j < hiddenSize; ++j) {
for (int k = 0; k < outputSize; ++k) {
in >> w2[j][k];
}
}
// 读取 b2
for (int k = 0; k < outputSize; ++k) {
in >> b2[k];
}
in.close();
std::cout << "Model loaded from " << filename << std::endl;
return true;
}
};
int main() {
// 构造一个 2-2-1 结构的神经网络
NeuralNetwork nn(2, 3, 1, 0.1);
// XOR 训练数据
std::vector<std::vector<double>> inputs = {
{0.0, 0.0},
{0.0, 1.0},
{1.0, 0.0},
{1.0, 1.0}
};
// XOR 对应期望输出
std::vector<std::vector<double>> targets = {
{0.0},
{1.0},
{1.0},
{0.0}
};
// 训练 2000 个 epoch
nn.train(inputs, targets, 20000); // 2000);
// 训练结束,测试一下效果
std::cout << "\nTest after training:\n";
for (size_t i = 0; i < inputs.size(); ++i) {
std::vector<double> output = nn.forward(inputs[i]);
std::cout << "[" << inputs[i][0] << ", " << inputs[i][1] << "] -> "
<< output[0] << std::endl;
}
// 保存模型
nn.saveModel("xor_network.dat");
// =============== 下面演示如何加载模型并使用 ===============
// 可注释掉或放在单独的程序中
NeuralNetwork nn_loaded(2, 4, 1, 0.1);
if (nn_loaded.loadModel("xor_network.dat")) {
std::cout << "\nTest with loaded model:\n";
for (size_t i = 0; i < inputs.size(); ++i) {
std::vector<double> output = nn_loaded.forward(inputs[i]);
std::cout << "[" << inputs[i][0] << ", " << inputs[i][1] << "] -> "
<< output[0] << std::endl;
}
}
return 0;
}
四、刻意结构化以后的c++代码
//刻意结构化以后的c++(解决XOR问题)
#include <iostream>
#include <vector>
#include <cmath>
#include <cstdlib> // srand, rand
#include <ctime> // time
#include <iomanip> // std::setprecision
// ========== 激活函数及其导数 ==========
// 和 Python 版的 sigmoid 保持一致
double sigmoid(double x) {
return 1.0 / (1.0 + std::exp(-x));
}
// x 是 sigmoid(x) 的结果
double sigmoid_derivative(double x) {
return x * (1.0 - x);
}
// ========== 初始化权重辅助函数 ==========
// 返回 rows x cols 的随机矩阵(范围 -1.0 到 1.0)
std::vector<std::vector<double>> init_weight(int rows, int cols) {
std::vector<std::vector<double>> mat(rows, std::vector<double>(cols, 0.0));
for (int r = 0; r < rows; r++) {
for (int c = 0; c < cols; c++) {
// 产生 -1.0 ~ +1.0 的随机值
double rnd = (double)rand() / RAND_MAX; // 0.0 ~ 1.0
rnd = rnd * 2.0 - 1.0; // -1.0 ~ +1.0
mat[r][c] = rnd;
}
}
return mat;
}
// ========== 向量点乘与矩阵运算 ==========
double dot_product(const std::vector<double>& vec1, const std::vector<double>& vec2) {
double sum = 0.0;
for (size_t i = 0; i < vec1.size(); i++) {
sum += vec1[i] * vec2[i];
}
return sum;
}
std::vector<double> matrix_vector_mul(const std::vector<std::vector<double>>& matrix,
const std::vector<double>& vec)
{
std::vector<double> result(matrix.size(), 0.0);
for (size_t i = 0; i < matrix.size(); i++) {
// i 行与输入向量点乘
double dp = dot_product(matrix[i], vec);
result[i] = dp;
}
return result;
}
// 训练返回的结构体,用于存储训练好的 W1, b1, W2, b2
struct XORModel {
std::vector<std::vector<double>> W1; // hidden_size x input_size
std::vector<double> b1; // hidden_size
std::vector<std::vector<double>> W2; // output_size x hidden_size
std::vector<double> b2; // output_size
};
// ========== 训练函数(类似 Python 的 train_xor) ==========
// epochs 和 learning_rate 可以根据需要自行调整
XORModel train_xor(int epochs = 10000, double learning_rate = 0.1)
{
// 训练数据(4 个样本)
std::vector<std::vector<double>> X = {
{0.0, 0.0}, // -> 0
{0.0, 1.0}, // -> 1
{1.0, 0.0}, // -> 1
{1.0, 1.0} // -> 0
};
std::vector<double> Y = { 0.0, 1.0, 1.0, 0.0 };
// 网络结构: 2 -> 2 -> 1
int input_size = 2;
int hidden_size = 2;
int output_size = 1;
// ========== 初始化权重和偏置 ==========
// W1: [hidden_size x input_size], b1: [hidden_size]
std::vector<std::vector<double>> W1 = init_weight(hidden_size, input_size);
std::vector<double> b1(hidden_size);
for (int i = 0; i < hidden_size; i++) {
double rnd = (double)rand() / RAND_MAX; // 0 ~ 1
b1[i] = rnd * 2.0 - 1.0; // -1 ~ 1
}
// W2: [output_size x hidden_size], b2: [output_size]
std::vector<std::vector<double>> W2 = init_weight(output_size, hidden_size);
std::vector<double> b2(output_size);
for (int i = 0; i < output_size; i++) {
double rnd = (double)rand() / RAND_MAX; // 0 ~ 1
b2[i] = rnd * 2.0 - 1.0; // -1 ~ 1
}
// ========== 开始训练 ==========
for (int ep = 0; ep < epochs; ep++) {
double total_error = 0.0;
// 遍历 4 个样本
for (size_t idx = 0; idx < X.size(); idx++) {
// 取样本
const std::vector<double>& x = X[idx];
double y_true = Y[idx];
// ---------- 前向传播 ----------
// 1) 输入层 -> 隐藏层
std::vector<double> z1 = matrix_vector_mul(W1, x); // size = hidden_size
// 加偏置
for (int i = 0; i < hidden_size; i++) {
z1[i] += b1[i];
}
// 激活
std::vector<double> a1(hidden_size);
for (int i = 0; i < hidden_size; i++) {
a1[i] = sigmoid(z1[i]);
}
// 2) 隐藏层 -> 输出层
std::vector<double> z2 = matrix_vector_mul(W2, a1); // size = output_size
for (int i = 0; i < output_size; i++) {
z2[i] += b2[i];
}
std::vector<double> a2(output_size);
for (int i = 0; i < output_size; i++) {
a2[i] = sigmoid(z2[i]);
}
double y_pred = a2[0];
// ---------- 计算误差 ----------
double error = 0.5 * (y_true - y_pred) * (y_true - y_pred);
total_error += error;
// ---------- 反向传播 ----------
// 输出层梯度
double d_output = (y_pred - y_true) * sigmoid_derivative(y_pred); // scalar
// 隐藏层梯度
std::vector<double> d_hidden(hidden_size, 0.0);
// W2[0][i] 是第 i 个隐藏单元 -> 输出单元 的权重 (因为 output_size=1)
for (int i = 0; i < hidden_size; i++) {
d_hidden[i] = d_output * W2[0][i] * sigmoid_derivative(a1[i]);
}
// ---------- 更新 W2, b2 ----------
// 因为 output_size=1,W2[0] 就是一行
for (int i = 0; i < hidden_size; i++) {
W2[0][i] -= learning_rate * d_output * a1[i];
}
b2[0] -= learning_rate * d_output;
// ---------- 更新 W1, b1 ----------
for (int i = 0; i < hidden_size; i++) {
for (int j = 0; j < input_size; j++) {
W1[i][j] -= learning_rate * d_hidden[i] * x[j];
}
b1[i] -= learning_rate * d_hidden[i];
}
}
// 若想在训练过程中查看误差,可开启注释
// if ((ep + 1) % 1000 == 0) {
// std::cout << "Epoch " << (ep + 1)
// << ", Error = " << total_error << std::endl;
// }
}
// 将训练好的权重、偏置打包成 XORModel 返回
XORModel model;
model.W1 = W1;
model.b1 = b1;
model.W2 = W2;
model.b2 = b2;
return model;
}
// ========== 预测函数(与 Python 版 predict 对应)==========
double predict(const std::vector<double>& x,
const std::vector<std::vector<double>>& W1,
const std::vector<double>& b1,
const std::vector<std::vector<double>>& W2,
const std::vector<double>& b2)
{
// 前向传播
// 隐藏层
std::vector<double> z1 = matrix_vector_mul(W1, x);
for (size_t i = 0; i < z1.size(); i++) {
z1[i] += b1[i];
}
std::vector<double> a1(z1.size());
for (size_t i = 0; i < z1.size(); i++) {
a1[i] = sigmoid(z1[i]);
}
// 输出层
std::vector<double> z2 = matrix_vector_mul(W2, a1);
for (size_t i = 0; i < z2.size(); i++) {
z2[i] += b2[i];
}
std::vector<double> a2(z2.size());
for (size_t i = 0; i < z2.size(); i++) {
a2[i] = sigmoid(z2[i]);
}
return a2[0];
}
int main()
{
// 为了得到不同的随机初始权重,这里设定随机种子
// 如需可重复结果,可改成 srand(0)
srand((unsigned int)time(nullptr));
// 训练
XORModel model = train_xor(10000, 0.1);
// 测试
std::vector<std::vector<double>> test_data = {
{0.0, 0.0},
{0.0, 1.0},
{1.0, 0.0},
{1.0, 1.0}
};
std::cout << std::fixed << std::setprecision(4);
for (auto& t : test_data) {
double y_pred = predict(t, model.W1, model.b1, model.W2, model.b2);
std::cout << "Input: [" << t[0] << ", " << t[1]
<< "], Predicted: " << y_pred << std::endl;
}
return 0;
}