自定义数据集 使用pytorch框架实现逻辑回归并保存模型,然后保存模型后再加载模型进行预测
1. 数据准备
首先,我们需要一些示例数据。在这个例子中,我们将生成一些简单的二维数据点,并为其分配标签。
2. 定义逻辑回归模型
接下来,我们定义一个简单的逻辑回归模型。
3. 训练模型
定义损失函数和优化器,然后进行模型训练。
4. 保存模型
训练完成后,我们可以保存模型的状态字典。
5. 加载模型并进行预测
加载保存的模型,并进行预测。
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# 生成一些示例数据
X, y = make_classification(n_samples=1000, n_features=2, n_classes=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 标准化数据
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# 转换为PyTorch张量
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)
# 定义逻辑回归模型
class LogisticRegression(nn.Module):
def __init__(self, input_dim):
super(LogisticRegression, self).__init__()
self.linear = nn.Linear(input_dim, 1)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
out = self.linear(x)
out = self.sigmoid(out)
return out.squeeze(1)
# 初始化模型、损失函数和优化器
input_dim = X_train_tensor.shape[1]
model = LogisticRegression(input_dim)
criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
# 将标签转换为适合BCELoss的格式(0和1)
y_train_tensor_float = y_train_tensor.float()
# 训练模型
num_epochs = 100
for epoch in range(num_epochs):
model.train()
outputs = model(X_train_tensor)
loss = criterion(outputs, y_train_tensor_float)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch+1) % 10 == 0:
print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
# 保存模型
torch.save(model.state_dict(), 'logistic_regression_model.pth')
# 加载模型
loaded_model = LogisticRegression(input_dim)
loaded_model.load_state_dict(torch.load('logistic_regression_model.pth'))
loaded_model.eval()
# 进行预测
with torch.no_grad():
predictions = (loaded_model(X_test_tensor) > 0.5).long()
# 计算准确率
accuracy = (predictions == y_test_tensor).sum().item() / y_test_tensor.size(0)
print(f'Accuracy: {accuracy:.4f}')