👘 PyTorch 深度学习快速入门
·7 min read·1341 字
来源:《深度学习笔记》— PyTorch 深度学习快速入门
为什么选择 PyTorch
PyTorch 由 Meta AI 开发,采用动态计算图(Define-by-Run),写法接近普通 Python,调试方便,是学习深度学习原理的理想工具。
核心优势:
- Pythonic:代码直观,像写 NumPy 一样写神经网络
- 动态图:可以在运行时改变网络结构
- 自动求导:内置
autograd引擎 - GPU 支持:一行代码切换 CPU/GPU
1. 张量(Tensor)
张量是 PyTorch 的核心数据结构,类似 NumPy 的 ndarray,但支持 GPU 加速和自动求导。
1.1 创建张量
import torch
import numpy as np
# 从 Python 列表
t1 = torch.tensor([1.0, 2.0, 3.0])
print(t1.shape) # torch.Size([3])
print(t1.dtype) # torch.float32
# 从 NumPy 转换(共享内存)
arr = np.array([1, 2, 3])
t2 = torch.from_numpy(arr)
# 常用初始化
zeros = torch.zeros(3, 4)
ones = torch.ones(3, 4)
rand = torch.rand(3, 4) # 均匀分布 [0,1)
randn = torch.randn(3, 4) # 标准正态
eye = torch.eye(3)
1.2 基本运算
a = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
b = torch.tensor([[5.0, 6.0], [7.0, 8.0]])
print(a + b) # 加法
print(a * b) # 逐元素乘法
print(a @ b) # 矩阵乘法(等价 torch.mm(a, b))
print(a.reshape(4, 1))
print(a.T)
print(a.unsqueeze(0)) # (1,2,2)
print(a.squeeze())
1.3 CPU / GPU 切换
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备:{device}")
t = torch.randn(3, 3).to(device)
# 模型和数据都需移到同一设备
model = MyNet().to(device)
x = x.to(device)
2. 自动求导(Autograd)
2.1 requires_grad 与梯度追踪
x = torch.tensor([2.0], requires_grad=True)
y = x ** 2 + 3 * x + 1 # y = x² + 3x + 1
y.backward()
# dy/dx = 2x + 3 = 7.0
print(x.grad) # tensor([7.])
2.2 梯度的累加与清零
PyTorch 默认累加梯度,训练时每次迭代前必须手动清零:
optimizer.zero_grad() # 清零
loss.backward() # 计算梯度
optimizer.step() # 更新参数
2.3 推理时关闭梯度
model.eval()
with torch.no_grad():
output = model(x_test)
3. 构建神经网络(nn.Module)
3.1 自定义网络
import torch.nn as nn
import torch.nn.functional as F
class MLP(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super().__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = self.fc1(x)
x = F.relu(x)
x = self.fc2(x) # 输出层不加激活
return x
model = MLP(input_size=784, hidden_size=128, output_size=10)
3.2 nn.Sequential 快速搭建
model = nn.Sequential(
nn.Linear(784, 128),
nn.ReLU(),
nn.Dropout(p=0.2),
nn.Linear(128, 64),
nn.ReLU(),
nn.Linear(64, 10)
)
3.3 常用层一览
| 层类型 | PyTorch 类 | 用途 |
|---|---|---|
| 全连接 | nn.Linear(in, out) | MLP、分类头 |
| 卷积 | nn.Conv2d(in_ch, out_ch, kernel) | 图像特征提取 |
| ReLU | nn.ReLU() | 隐藏层激活 |
| BatchNorm | nn.BatchNorm1d / 2d | 训练稳定性 |
| Dropout | nn.Dropout(p=0.5) | 防止过拟合 |
| 池化 | nn.MaxPool2d(kernel_size) | 下采样 |
4. 损失函数与优化器
# 交叉熵(多分类,内含 Softmax)
criterion = nn.CrossEntropyLoss()
# MSE(回归)
criterion = nn.MSELoss()
# 二元交叉熵(二分类)
criterion = nn.BCELoss()
import torch.optim as optim
# SGD + Momentum
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
# Adam
optimizer = optim.Adam(model.parameters(), lr=0.001)
# 学习率调度
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
5. 标准训练循环
PyTorch 训练循环的"四步法":
def train_one_epoch(model, dataloader, criterion, optimizer, device):
model.train()
total_loss = 0
for x_batch, y_batch in dataloader:
x_batch = x_batch.to(device)
y_batch = y_batch.to(device)
optimizer.zero_grad() # 1. 清零梯度
outputs = model(x_batch) # 2. 正向传播
loss = criterion(outputs, y_batch) # 3. 计算损失
loss.backward() # 4. 反向传播
optimizer.step() # + 更新参数
total_loss += loss.item()
return total_loss / len(dataloader)
def evaluate(model, dataloader, device):
model.eval()
correct = 0
total = 0
with torch.no_grad():
for x_batch, y_batch in dataloader:
x_batch = x_batch.to(device)
y_batch = y_batch.to(device)
outputs = model(x_batch)
_, predicted = torch.max(outputs, dim=1)
correct += (predicted == y_batch).sum().item()
total += y_batch.size(0)
return correct / total
6. 数据加载(DataLoader)
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,)) # MNIST 标准化
])
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)
for x, y in train_loader:
print(x.shape, y.shape) # torch.Size([64, 1, 28, 28]) torch.Size([64])
break
7. 完整示例:MNIST 手写数字分类
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
# 1. 设置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 2. 数据
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
train_loader = DataLoader(
datasets.MNIST('./data', train=True, download=True, transform=transform),
batch_size=64, shuffle=True
)
test_loader = DataLoader(
datasets.MNIST('./data', train=False, download=True, transform=transform),
batch_size=64, shuffle=False
)
# 3. 模型
class MnistNet(nn.Module):
def __init__(self):
super().__init__()
self.net = nn.Sequential(
nn.Flatten(), # (64,1,28,28) -> (64,784)
nn.Linear(784, 256),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(256, 128),
nn.ReLU(),
nn.Linear(128, 10) # 10 类,不加 Softmax
)
def forward(self, x):
return self.net(x)
model = MnistNet().to(device)
# 4. 损失与优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.5)
# 5. 训练循环
for epoch in range(10):
model.train()
train_loss = 0
for x, y in train_loader:
x, y = x.to(device), y.to(device)
optimizer.zero_grad()
loss = criterion(model(x), y)
loss.backward()
optimizer.step()
train_loss += loss.item()
model.eval()
correct = 0
with torch.no_grad():
for x, y in test_loader:
x, y = x.to(device), y.to(device)
pred = model(x).argmax(dim=1)
correct += (pred == y).sum().item()
acc = correct / len(test_loader.dataset)
print(f"Epoch {epoch+1:2d} | Loss: {train_loss/len(train_loader):.4f} | Test Acc: {acc:.4f}")
scheduler.step()
典型输出(CPU 上训练约 2-3 分钟):
Epoch 1 | Loss: 0.2831 | Test Acc: 0.9703
Epoch 2 | Loss: 0.1219 | Test Acc: 0.9783
...
Epoch 10 | Loss: 0.0401 | Test Acc: 0.9841
8. 模型的保存与加载
# 推荐:只保存参数(体积小、版本兼容好)
torch.save(model.state_dict(), 'mnist_model.pth')
# 加载:先重建结构,再载入参数
model = MnistNet()
model.load_state_dict(torch.load('mnist_model.pth'))
model.eval()
# 保存完整模型(路径依赖强)
torch.save(model, 'mnist_full_model.pth')
loaded_model = torch.load('mnist_full_model.pth')
9. 从 NumPy 感知机到 PyTorch:概念对照
| 前序章节(NumPy 手写) | PyTorch 等价 | 说明 |
|---|---|---|
Affine 层 | nn.Linear | 自动管理 W、b 参数 |
Relu 层 | nn.ReLU() / F.relu() | 等价 |
SoftmaxWithLoss | nn.CrossEntropyLoss | 内含 Softmax |
numerical_gradient | loss.backward() | 自动微分,快几千倍 |
SGD.update() | optimizer.step() | Adam、SGD 均可替换 |
手动 grads['W1'] | param.grad | 自动存储在 .grad |
PyTorch 本质上是对前序章节所有手写实现的工业级封装——理解了手写版本,PyTorch 的每一行代码都将变得透明可读。