startup

  1. 创建一个python项目。
  $ python -m venv venv
  # 激活虚拟环境
  ## linux 
  $ source venv/bin/activate

  ## windows
  $ venv\Scripts\activate

  $ pip install -r requirements.txt
  1. 安装 PyTorch

https://pytorch.org/get-started/locally/

$ pip install torch torchvision torchaudio

## gpu version
$ pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

$ pip freeze > requirements.txt
  1. use
import torch
import numpy as np

# 创建了一个 numpy 数组的引用
foo = torch.from_numpy(np.random.rand(3, 2))

foo.numpy()

示例:识别手写数字

训练集为 28*28 像素的手写数字图片,转换为一维宽度为784。我们的目标是分类为0-9数字。所以我们定义一些常量

INPUT_SIZE = 28 * 28 # 784
OUTOUT_SIZE = 10
BATCH_SIZE = 64

HIDDEN_SIZES = [128, 64]

创建 CNN 模型

创建模型三种写法:

  1. basic
from torch import nn
import torch.nn.functional as F

class Network(nn.Module):
    def __init__ (self):
      super().__init__()
      # 全连接层 fully connected layer.
      self.fc1 = nn.Linear(INPUT_SIZE, HIDDEN_SIZES[0]) # input, output
      self.fc2 = nn.Linear(HIDDEN_SIZES[0], HIDDEN_SIZES[1])
      # 输出层
      self.fc3 = nn.Linear(HIDDEN_SIZES[1], OUTOUT_SIZE) # 手写数字最后分为0-9十类

  def forward (self, x):
      x = self.fc1(x) # 将张量传入第一层级
      x = F.relu(x)
      x = self.fc2(x)
      x = F.relu(x)
      x = self.fc3(x)
      x = F.softmax(x, dim = 1)

      return x

model = Network()

model.fc1.bias.data.fill_(0)
model.fc1.weight.data.normal_(std = 0.01)
  1. 简化
from torch import nn

model = nn.Sequential(
  nn.Linear(INPUT_SIZE, HIDDEN_SIZES[0]),
  nn.ReLU(),
  nn.Linear(HIDDEN_SIZES[0], HIDDEN_SIZES[1]),
  nn.ReLU(),
  nn.Linear(HIDDEN_SIZES[1], OUTOUT_SIZE),
  nn.Softmax(dim = 1)
)

  1. 简化带命名
from torch import nn
from collections import OrderedDict

model = nn.Sequential(OrderedDict([
  ('fc1', nn.Linear(INPUT_SIZE, HIDDEN_SIZES[0])),
  ('relu1', nn.ReLU()),
  ('fc2', nn.Linear(HIDDEN_SIZES[0], HIDDEN_SIZES[1])),
  ('relu2', nn.ReLU()),
  ('output', nn.Linear(HIDDEN_SIZES[1], OUTOUT_SIZE)),
  ('softmax', nn.Softmax(dim = 1))
]))

model.fc1.bias.data.fill_(0)
model.fc1.weight.data.normal_(std = 0.01)

预处理

%matplotlib inline
%config InlineBackend.firgure_format = 'retina'

import numpy as np
import torch

import helper
import matplotlib.pyplot as plt
from torchvision import datasets, transforms

# Define a transform to normalize the data
transform = transforms.Compose([
  # 1. 转化为张量
  transforms.ToTensor(), 
  # 2. 标准化
  transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Download and load the training data (手写数字图像)
trainset = datasets.MNIST(
  'MNIST_data/', 
  download = True, 
  train = True, 
  transform = transform
)

trainloader = torch.utils.data.DataLoader(trainset, batch_size = BATCH_SIZE, shuffle = True)

# Download and load the test data
testset = datasets.MNIST(
  'MNIST_data/',
  download = True,
  train = True,
  transform = transform
)

testloader = torch.utils.data.DataLoader(testset, batch_size = BATCH_SIZE, shuffle = True)

dataiter = iter(trainloader)
images, labels = dataiter.next()

## Forward pass

images, labels = next(iter(trainloader))

images.resize_(images.shape[0], 1, INPUT_SIZE) # batch row, col

### probabilities stands 
ps = model.forward(images[0])

### ????
helper.view_classify(images[0].view(1, 28, 28), ps) 

训练模型

单次训练模拟

### 定义损失函数
criterion = nn.CrossEntropyLoss()

### 优化器
optimizer = optim.SGD(model.parameters(), lr = 0.01) # learning rate

print('Before', model.fc1.weight)

### 获取图像和标签
images, labels = next(iter(trainloader))
images.resize_(images.shape[0], INPUT_SIZE)

### 梯度归零,防止累加
optimizer.zero_grad() 

### 向前转播
output = model.forward(images)

### 计算损失 
loss = criterion(output, labels)

### 反向传播
loss.backward()

print('Gradient - ', model.fc1.weight.grad)

### 优化权重
optimizer.step()

真实训练

### 定义训练周期,一个周期经过整个数据集一次
epochs = 3       # 整个训练集过3遍
print_every = 40 # 每个多少次进行打印
steps = 0        # 运行次数

for e in range(epochs):
  ### 用于观察运行过程中**损失**的变化
  running_loss = 0
  for images, labels in trainloader:
    steps += 1
    images.resize_(images.shape[0], INPUT_SIZE)
    ### 梯度归零
    optimizer.zero_grad()
    ### 向前传播
    output = model.forward(images)
    ### 计算损失
    loss = criterion(output, labels)
    ### 反向传播
    loss.backward()
    ### 优化权重
    optimizer.step()

    ### 损失值累加
    running_loss += loss.item()

    if steps % print_every == 0:
      print(
        'Epoch: {}/{}...'.format(e+1, epochs), 
        'Loss: {:.4f}'.format(running_loss/print_every)
      )

      ### 重置损失值
      running_loss = 0

验证模型质量

images, labels = next(iter(trainloader))

img = images[0].view(1, INPUT_SIZE)

# Turn off gradients to speed up the part
with torch.no_grad():
  logits = model.forward(img)

# Output of the network are logits, need to take softmax for probabilities
ps = F.softmax(logits, dim = 1)
helper.view_classify(img.view(1, 28, 28), ps)

保存和加载

## save
checkpoint = {
  'input_size': 784,
  'output_size': 10,
  'hidden_layers': [each.out_features for each in model.hidden_layers],
  'state_dict': model.state_dict()
}

torch.save(checkpoint, 'checkpoint.pth')

## load

def load_checkpoint(filepath):
  checkpoint = torch.load(filepath)
  model = fc_model.Network(
    checkpoint['input_size'],
    checkpoint['output_size'],
    checkpoint['hidden_layers'],
  )

  model.load_state_dict(checkpoint('state_dict'))

  return model

Refs