深度学习实验
实验一 numpy创建全连接神经网络
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, datasets, optimizers
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
# 准备数据
def mnist_dataset():
(x, y), (x_test, y_test) = datasets.mnist.load_data()
x = x / 255.0
x_test = x_test / 255.0
return (x, y), (x_test, y_test)
# 定义矩阵乘法类
class Matmul():
def __init__(self):
self.mem = {}
def forward(self, x, W):
h = np.matmul(x, W)
self.mem = {"x": x, "W": W}
return h
def backward(self, grad_y):
x = self.mem["x"]
W = self.mem["W"]
grad_x = np.matmul(grad_y, W.T)
grad_W = np.matmul(x.T, grad_y)
return grad_x, grad_W
# 定义Relu类
class Relu():
def __init__(self):
self.mem = {}
def forward(self, x):
self.mem["x"] = x
return np.where(x > 0, x, np.zeros_like(x))
def backward(self, grad_y):
x = self.mem["x"]
return (x > 0).astype(np.float32) * grad_y
# 定义Softmax类
class Softmax():
def __init__(self):
self.mem = {}
self.epsilon = 1e-12
def forward(self, x):
x_exp = np.exp(x)
denominator = np.sum(x_exp, axis=1, keepdims=True)
out = x_exp / (denominator + self.epsilon)
self.mem["out"] = out
self.mem["x_exp"] = x_exp
return out
def backward(self, grad_y):
s = self.mem["out"]
sisj = np.matmul(np.expand_dims(s, axis=2), np.expand_dims(s, axis=1))
g_y_exp = np.expand_dims(grad_y, axis=1)
tmp = np.matmul(g_y_exp, sisj)
tmp = np.squeeze(tmp, axis=1)
softmax_grad = -tmp + grad_y * s
return softmax_grad
# 定义交叉熵类
class Cross_entropy():
def __init__(self):
self.epsilon = 1e-12
self.mem = {}
def forward(self, x, labels):
log_prob = np.log(x + self.epsilon)
out = np.mean(np.sum(-log_prob * labels, axis=1))
self.mem["x"] = x
return out
def backward(self, labels):
x = self.mem["x"]
return -1 / (x + self.epsilon) * labels
# 建立模型
class myModel():
def __init__(self):
self.W1 = np.random.normal(size=[28*28+1, 100])
self.W2 = np.random.normal(size=[100, 10])
self.mul_h1 = Matmul()
self.relu = Relu()
self.mul_h2 = Matmul()
self.softmax = Softmax()
self.cross_en = Cross_entropy()
def forward(self, x, labels):
x = x.reshape(-1, 28*28)
bias = np.ones(shape=[x.shape[0], 1])
x = np.concatenate([x, bias], axis=1)
self.h1 = self.mul_h1.forward(x, self.W1)
self.h1_relu = self.relu.forward(self.h1)
self.h2 = self.mul_h2.forward(self.h1_relu, self.W2)
self.h2_soft = self.softmax.forward(self.h2)
self.loss = self.cross_en.forward(self.h2_soft, labels)
def backward(self, labels):
self.loss_grad = self.cross_en.backward(labels)
self.h2_soft_grad = self.softmax.backward(self.loss_grad)
self.h2_grad, self.W2_grad = self.mul_h2.backward(self.h2_soft_grad)
self.h1_relu_grad = self.relu.backward(self.h2_grad)
self.h1_grad, self.W1_grad = self.mul_h1.backward(self.h1_relu_grad)
# 计算准确率
def compute_accuracy(prob, labels):
predictions = np.argmax(prob, axis=1)
truth = np.argmax(labels, axis=1)
return np.mean(predictions == truth)
# 迭代一个epoch
def train_one_step(model, x, y):
model.forward(x, y)
model.backward(y)
model.W1 -= 1e-5 * model.W1_grad
model.W2 -= 1e-5 * model.W2_grad
loss = model.loss
accuracy = compute_accuracy(model.h2_soft, y)
return loss, accuracy
# 计算测试集上的loss和准确率
def test(model, x, y):
model.forward(x, y)
loss = model.loss
accuracy = compute_accuracy(model.h2_soft, y)
return loss, accuracy
# 实际训练
train_data, test_data = mnist_dataset()
train_label = np.zeros(shape=[train_data[0].shape[0], 10])
test_label = np.zeros(shape=[test_data[0].shape[0], 10])
train_label[np.arange(train_data[0].shape[0]), np.array(train_data[1])] = 1
test_label[np.arange(test_data[0].shape[0]), np.array(test_data[1])] = 1
model = myModel()
for epoch in range(50):
loss, accuracy = train_one_step(model, train_data[0], train_label)
print(f'epoch {epoch} : loss {loss} ; accuracy {accuracy}')
# 测试
loss, accuracy = test(model, test_data[0], test_label)
print(f'test loss {loss} ; accuracy {accuracy}')
实验二 Pytorch 的CNN
pip install torch==1.12.1+cu102 torchvision==0.13.1+cu102 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu102
import torch
from torch import nn
import torchvision
from torchvision import datasets, transforms
from tqdm import tqdm
# Hyper parameters
BATCH_SIZE = 100
EPOCHS = 10
LEARNING_RATE = 1e-4
KEEP_PROB_RATE = 0.7
# Set device to use
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# Data transformation
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.5], std=[0.5])
])
# Download and load dataset
path = './data/'
train_data = datasets.MNIST(path, train=True, transform=transform, download=True)
test_data = datasets.MNIST(path, train=False, transform=transform)
# Create DataLoader
train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_data, batch_size=BATCH_SIZE)
# Define the CNN model
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.model = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=32, kernel_size=7, padding=3, stride=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Flatten(),
nn.Linear(in_features=7*7*64, out_features=1024),
nn.ReLU(),
nn.Dropout(1 - KEEP_PROB_RATE),
nn.Linear(in_features=1024, out_features=10),
nn.Softmax(dim=1)
)
def forward(self, input):
output = self.model(input)
return output
net = Net()
net.to(device)
print(net)
# Define loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=net.parameters(), lr=LEARNING_RATE)
# Training and testing process
history = {'Test Loss': [], 'Test Accuracy': []}
for epoch in range(1, EPOCHS + 1):
process_bar = tqdm(train_loader, unit='step')
net.train(True)
for step, (train_imgs, labels) in enumerate(process_bar):
train_imgs = train_imgs.to(device)
labels = labels.to(device)
# Forward pass
outputs = net(train_imgs)
loss = loss_fn(outputs, labels)
# Backward pass and optimization
net.zero_grad()
loss.backward()
optimizer.step()
# Compute accuracy
predictions = torch.argmax(outputs, dim=1)
accuracy = torch.sum(predictions == labels) / labels.shape[0]
# Update progress bar
process_bar.set_description(f"[{epoch}/{EPOCHS}] Loss: {loss.item():.4f}, Acc: {accuracy.item():.4f}")
# Evaluate on test set
net.train(False)
correct = 0
total_loss = 0
with torch.no_grad():
for test_imgs, labels in test_loader:
test_imgs = test_imgs.to(device)
labels = labels.to(device)
outputs = net(test_imgs)
loss = loss_fn(outputs, labels)
total_loss += loss
predictions = torch.argmax(outputs, dim=1)
correct += torch.sum(predictions == labels)
test_accuracy = correct / (BATCH_SIZE * len(test_loader))
test_loss = total_loss / len(test_loader)
history['Test Loss'].append(test_loss.item())
history['Test Accuracy'].append(test_accuracy.item())
process_bar.set_description(
f"[{epoch}/{EPOCHS}] Loss: {loss.item():.4f}, Acc: {accuracy.item():.4f}, Test Loss: {test_loss.item():.4f}, Test Acc: {test_accuracy.item():.4f}"
)
process_bar.close()