Pytorch基础

  • 前言

​ 本笔记写于2025年3月底,同步于Bilibili课程 PyTorch深度学习快速入门教程(绝对通俗易懂!)

相关工具使用介绍

torch.utils.data.Dataset

Bilibili P6 - P7

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from torch.utils.data import Dataset
from PIL import Image
import os

# 继承Dataset类,重写__init__()等函数
class MyData(Dataset):
def __init__(self, rootdir, labeldir):
self.rootdir = rootdir
self.labeldir = labeldir
self.path = os.path.join(self.rootdir, self.labeldir)
self.imgPath = os.listdir(self.path)

def __getitem__(self, index):
imgName = self.imgPath[index]
imgItemPath = os.path.join(self.rootdir, self.labeldir, imgName)
img = Image.open(imgItemPath)
label = self.labeldir
return img, label

def __len__(self):
return len(self.imgPath)

rootdir = "hymenoptera_data/train"
antslabeldir = "ants"
beeslabeldir = "bees"
ants_dataset = MyData(rootdir, antslabeldir)
bees_dataset = MyData(rootdir, beeslabeldir)

train_dataset = ants_dataset + bees_dataset

在PyTorch中读取数据主要设计两个类:Dataset 和 Dataloader

  1. Dataset

    • 提供一种方式去获取数据及其label

    • 如何获取每一个数据及其label

    • 告诉我们总共有多少的数据

  2. Dataloader

  • 为后面的网络提供不同的数据形式

torch.utils.tensorboard

Bilibili P8-9

1
2
3
4
5
6
7
8
9
10
11
12
13
14
from torch.utils.tensorboard import SummaryWriter # 导入SummaryWriter
import numpy as np
from PIL import Image

writer = SummaryWriter("./logs") #创建一个writer,指定日志文件夹路径“./logs”
img_PIL = Image.open("hymenoptera_data/train/bees/16838648_415acd9e3f.jpg")
img_arr = np.array(img_PIL)

# writer.add_image("test", img_arr, 2, dataformats="HWC")

# y = 2x
for i in range(100):
writer.add_scalar("y=2x", 2*i, i) # 画一个函数图像y=2x
writer.close()
1
2
3
# add_scalar
add_scalar(*tag*, *scalar_value*, *global_step=None*, *walltime=None*, *new_style=False*, *double_precision=False*)
# tag: 图表标题 scalar_value: y轴变量,global_step: x轴变量
1
2
3
# add_image
add_image(tag, img_tensor, global_step=None, walltime=None, dataformats='CHW')
# tag: 图片标题 img_tensor: 图片 global_step: 步数

打开Tensorboard日志文件:

1
tensorboard -logdir="logs"
  • 在 Command Prompt 中输入,注意括号中的环境名称为 pytorch 所在环境

  • 多次生成 tag 相同的日志可能会出错,需要将多余的日志删除

torchvision.transforms

Bilibili P10-13

tensor(张量)类型

transforms如何使用,怎么转为tensor?

1
2
3
4
5
6
7
8
9
10
11
12
13
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms # 导入transforms
from PIL import Image

# transforms如何使用,怎么转为tensor?
img_path = "hymenoptera_data/train/ants/0013035.jpg"
img = Image.open(img_path)
tensor_tans = transforms.ToTensor() # 创建工具 —— 转为Tensor类型
img_tensor = tensor_tans(img) # 使用工具

writer = SummaryWriter("logs_transforms")
writer.add_image("tensor image", img_tensor)
writer.close()

为什么要用tensor类型?

  • tensor中包含了很多神经网络中需要的参数

transforms中的一些工具的使用

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from PIL import Image
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms

img = Image.open("hymenoptera_data/train/bees/16838648_415acd9e3f.jpg")
writer = SummaryWriter("logs_transforms")

# ToTensor() —— 转为tensor类型
trans_totensor = transforms.ToTensor()
img_tensor = trans_totensor(img)

writer.add_image("ToTensor image", img_tensor)

# Normalize() —— 对图片归一化
trans_norm = transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
img_norm = trans_norm(img_tensor)
writer.add_image("Normalize image", img_norm)

# Resize() —— 对图片大小进行压缩变换,返回的仍是PIL Image类型
trans_resize = transforms.Resize((224, 224))
img_resize = trans_resize(img)
img_resize = trans_totensor(img_resize)
writer.add_image("Resize image", img_resize)

# Compose() —— 组合transform中的工具,前一个的输出是后一个的输入
trans_resize_2 = transforms.Resize(512)
trans_compose = transforms.Compose([trans_resize_2, trans_totensor])
img_resize_2 = trans_compose(img)
writer.add_image("Resize_2 image", img_resize_2)

# RandomCrop() —— 随机裁剪,随机取图片中的某一指定大小,指定的大小不能大于原图
trans_randcrop = transforms.RandomCrop([128,256])
trans_compose2 = transforms.Compose([trans_randcrop, trans_totensor])

for i in range(10):
img_crop = trans_compose2(img)
writer.add_image("Crop image", img_crop, i)
writer.close()

torchvision.dataset

Bilibili P14

与 torch.utils.data 中的 Dataset 类似*(实际上是 torch.utils.data.Dataset 的一个子类)*

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import torchvision
from torch.utils.tensorboard import SummaryWriter

dataset_transform = torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
])
train_set = torchvision.datasets.CIFAR10(root="./tran_dataset", train=True, download=True, transform=dataset_transform)
test_set = torchvision.datasets.CIFAR10(root="./tran_dataset", train=False, download=True, transform=dataset_transform)

# img, index = test_set[0]
# img.show()

print(train_set[0])
writer = SummaryWriter("logs_transforms_database")
for i in range(10):
img, index = test_set[i]
writer.add_image("test_set", img, i)
writer.close()

torchvision.datasets 使用方法(以CIFAR10为例)

1
2
3
4
5
torchvision.datasets.CIFAR10(root: Union[str, Path], train: bool = True, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False)
# root: 数据集路径
# train: True为训练集,False为验证集
# transform: 对数据集中的图片进行的transforms操作,是一个transforms.Compose的函数
# download: True为自动下载,False为不下载,一般保持为True

各种数据集官方文档

torch.utils.data.Dataloader

Bilibili P15

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import torchvision
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

test_set = torchvision.datasets.CIFAR10("./tran_dataset", train=False, download=True, transform=torchvision.transforms.ToTensor())

testloader = DataLoader(test_set, batch_size=64, shuffle=True, drop_last=True) # 使用Dataloader

img, target = test_set[0]

writer = SummaryWriter("logs_transforms_database")

i = 0
for data in testloader:
imgs, targets = data
writer.add_images("test_set1", imgs, i) # add_img!s!
i += 1
writer.close()

注意!Dataloader返回的imgs要用 SummaryWriter.add_images() 展示,不是add_image()

1
2
3
4
5
6
torch.utils.data.DataLoader(*dataset*, *batch_size=1*, *shuffle=None*, *sampler=None*, *batch_sampler=None*, *num_workers=0*, *collate_fn=None*, *pin_memory=False*, *drop_last=False*, *timeout=0*, *worker_init_fn=None*, *multiprocessing_context=None*, *generator=None*, ***, *prefetch_factor=None*, *persistent_workers=False*, *pin_memory_device=''*, *in_order=True*)
# dataset: 就是torch.Dataset类型,即我们的数据集
# batch_size: 每次“抓取”数据的数量
# shuffle: 随机取样,一般喜欢设为True
# num_workers=0: 采样的进程数量,默认为0,但在Windows下大于0可能会出错
# drop_last: 每次取batch_size个数据,剩下不够一次取样的数据是否丢弃

神经网络 torch.nn

官方文档

nn == neural network

基本骨架 (Containers)

官方文档

torch.nn.Module

Bilibili P16

官方文档

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import torch
from torch import nn # 引入nn

# 创建神经网络,继承nn.Module,重写两个函数
class Mynn(nn.Module):
def __init__(self):
super().__init__()

def forward(self, x): # 定义计算
x += 1
return x

mynn = Mynn()
x = torch.tensor(1.0)
output = mynn(x)
print(output)

torch.nn.Sequential

Bilibili P22

官方文档

  • 类似于 torchvision.transfomrs.Compose() ,能够把很多的操作结合在一起
  • 让代码整洁方便
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import torch
from torch import nn
from torch.nn import Flatten
from torch.utils.tensorboard import SummaryWriter


class Mynn(nn.Module):
def __init__(self):
super().__init__()
# self.conv1 = nn.Conv2d(3, 32, 5, padding=2)
# self.maxpool1 = nn.MaxPool2d(2)
# self.conv2 = nn.Conv2d(32, 32, 5, padding=2)
# self.maxpool2 = nn.MaxPool2d(2)
# self.conv3 = nn.Conv2d(32, 64, 5, padding=2)
# self.maxpool3 = nn.MaxPool2d(2)
# self.flatten = Flatten()
# self.linear1 = nn.Linear(1024, 64)
# self.linear2 = nn.Linear(64, 10)

#用Sequentia()包含上面的操作
self.model1 = nn.Sequential(
nn.Conv2d(3, 32, 5, padding=2),
nn.MaxPool2d(2),
nn.Conv2d(32, 32, 5, padding=2),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, 5, padding=2),
nn.MaxPool2d(2),
Flatten(),
nn.Linear(1024, 64),
nn.Linear(64, 10)
)

def forward(self, x):
# x = self.conv1(x)
# x = self.maxpool1(x)
# x = self.conv2(x)
# x = self.maxpool2(x)
# x = self.conv3(x)
# x = self.maxpool3(x)
# x = self.flatten(x)
# x = self.linear1(x)
# x = self.linear2(x)

#直接调用self.model1()
x = self.model1(x)
return x

mynn = Mynn()
print(mynn)

input = torch.ones(64, 3, 32, 32)
output = mynn(input)
print(output.shape)


writer = SummaryWriter("./logs_sq")
writer.add_graph(mynn, input) # 展示神经网络的结构图
writer.close()

卷积层 (Convolution Layers)

官方文档

卷积操作

Bilibili P17

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import torch
import torch.nn.functional as F #先介绍torch.nn.functional,但以后常用torch.nn

# 输入
input_t = torch.tensor([[1, 2, 0, 3, 1],
[0, 1, 2, 3, 1],
[1, 2, 1, 0, 0],
[5, 2, 3, 1, 1],
[2, 1, 0, 1, 1] ])
# 卷积核
kernel = torch.tensor([[1, 2, 1],
[0, 1 ,0],
[2, 1, 0] ])

print(input_t.shape) # 输出:torch.size([5,5])
print(kernel.shape) # 输出:torch.Size([3,3])

# torch.reshape() 变换输入的尺寸,满足conv2()的输入要求
input_t = torch.reshape(input_t, (1, 1, 5, 5))
kernel = torch.reshape(kernel, (1, 1, 3, 3))

print(input_t.shape) # 输出:torch.size([1,1,5,5])
print(kernel.shape) # 输出:torch.Size([1,1,3,3])

# 进行卷积
output = F.conv2d(input_t, kernel, stride=1)
print(output)
'''
输出:
tensor([[[[10, 12, 12],
[18, 16, 16],
[13, 9, 3]]]])
'''

output2 = F.conv2d(input_t, kernel, stride=2)
print(output2)
'''
输出:
tensor([[[[10, 12],
[13, 3]]]])
'''

output3 = F.conv2d(input_t, kernel, stride=1, padding=1)
print(output3)
'''
输出:
tensor([[[[ 1, 3, 4, 10, 8],
[ 5, 10, 12, 12, 6],
[ 7, 18, 16, 16, 8],
[11, 13, 9, 3, 4],
[14, 13, 9, 7, 4]]]])
'''
img img img img
stride=1, padding=0 stride=1, padding=2 stride=2, padding=0 stride=2, padding=1

torch.nn.Conv2d()

Bilibili P18

[官方文档](Conv2d — PyTorch 2.6 documentation)

1
2
3
4
5
6
classtorch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)
# in_channels: 输入图像通道数
# out_channels: 输出图像通道数
# kernel_size: 卷积核大小(3->3x3; (2, 5)->2x5)
# stride: 卷积时kernel横向纵向移动的步长,默认值为1
# padding: 边缘填充,默认值为0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import torch
import torchvision
from torch import nn
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10(root='./dataset', train=False, download=True, transform=torchvision.transforms.ToTensor())
dataloader = torch.utils.data.DataLoader(dataset, batch_size=64)

class Mynn(nn.Module):
def __init__(self):
super().__init__()
# 定义卷积
self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)

def forward(self, x):
return self.conv1(x)

writer = SummaryWriter("./logs")
i = 0
mynn = Mynn()
for data in dataloader:
imgs, targets = data
output = mynn(imgs)
print(output.shape) # 输出:torch.Size([64, 6, 30, 30])
print(imgs.shape) # 输出:torch.Size([64, 3, 32, 32])
writer.add_images('input', imgs, i)
# 彩色图像3通道,6通道会出错,所以要把output转为3通道,-1表示自动计算batch_size
output = torch.reshape(output, (-1, 3, 30, 30))
writer.add_images('output', output, i)
i += 1

池化层 (Pooling layers)

官方文档

torch.nn.MaxPool2d()

Bilibili P19

官方文档

1
2
3
4
5
6
torch.nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)
# kernel_size: 池化核大小
# stride: 池化核移动步长大小,注意默认值等于kernel_size
# padding: 边缘填充
# dilation: 空洞卷积
# ceil_mode: kernel移动时部分超出input边界,是否保留不足的元素。True-保留 False-舍去

空洞卷积 —— kernel相互之间隔开

img
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import torch
from torch import nn
from torch.nn import MaxPool2d

input = torch.tensor([[1, 2, 0, 3, 1],
[0, 1, 2, 3, 1],
[1, 2, 1, 0, 0],
[5, 2, 3, 1, 1],
[2, 1, 0, 1, 1]], dtype=torch.float32) # dtype把输入变为浮点型,但其实目前版本可以不变了
input = torch.reshape(input, (-1, 1, 5, 5)) # 更改input尺寸,满足maxpool()的输入要求

class Mynn(nn.Module):
def __init__(self):
super().__init__()
self.maxpool_ceil_True = MaxPool2d(kernel_size=3, ceil_mode=True)
self.maxpool_ceil_False = MaxPool2d(kernel_size=3, ceil_mode=False)

def forward(self, x):
output1 = self.maxpool_ceil_True(x)
output2 = self.maxpool_ceil_False(x)
return output1, output2

mynn = Mynn()
output1, output2 = mynn(input)
print(output1)
'''
输出:
tensor([[[[2., 3.],
[5., 1.]]]])
'''

print(output2)
'''
输出:
tensor([[[[2.]]]])
'''

为什么要最大池化?

可以在保留原数据特征的情况下,大大减小数据量,提高计算速度

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# 对图片进行最大池化
import torch
import torchvision
from torch import nn
from torch.nn import MaxPool2d
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10(root='./dataset', train=False, download=True, transform=torchvision.transforms.ToTensor())
dataloader = torch.utils.data.DataLoader(dataset, batch_size=64)


class Mynn(nn.Module):
def __init__(self):
super().__init__()
self.maxpool = MaxPool2d(kernel_size=3, ceil_mode=False)

def forward(self, x):
output = self.maxpool(x)
return output

mynn = Mynn()
writer = SummaryWriter("./logs_maxpool")
i = 0
for data in dataloader:
imgs, targets = data
writer.add_images("input", imgs, i)
output = mynn(imgs)
writer.add_images("output", output, i)
i += 1

writer.close()

最大池化后的图片,直观感受就是打了马赛克

Padding层 (Padding Layers)

官方文档

Bilibili P20

就是之前讲的padding边缘填充,几乎用不到

非线性激活 (Non-linear Activations)

官方文档

torch.nn.ReLu()

Bilibili P20

官方文档

  • 把输入数据负数归零,正数不变
../_images/ReLU.png
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import torch
from torch import nn

input = torch.tensor([[1, -0.5],
[-1, 3]])
torch.reshape(input, (-1, 1, 2, 2))
print(input.size())

class Mynn(nn.Module):
def __init__(self):
super().__init__()
self.relu1 = nn.ReLU()

def forward(self, x):
return self.relu1(x)

mynn = Mynn()
output = mynn(input)
print(output)
'''
输出:
tensor([[1., 0.],
[0., 3.]])
'''

torch.nn.Sigmoid()

Bilibili P20

官方文档

../_images/Sigmoid.png
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from torch import nn
import torchvision
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10(root="./dataset", train=False, download=True, transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset, batch_size=64)

class Mynn(nn.Module):
def __init__(self):
super().__init__()
self.sigmoid1 = nn.Sigmoid()

def forward(self, x):
return self.sigmoid1(x)

mynn = Mynn()
writer = SummaryWriter("./logs_sigmoid")
i = 0
for data in dataloader:
imgs, targets = data
writer.add_images("input", imgs, i)
output = mynn(imgs)
writer.add_images("output", output, i)
i += 1
writer.close()

几个非线性函数用法基本一致,只是函数本身公式不同

线性层 (Linear Layer)

Bilibili P21

官方文档

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import torch
import torchvision
from torch import nn

dastaset = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor())
dataloader = torch.utils.data.DataLoader(dastaset, batch_size=64, drop_last=True)


class Mynn(nn.Module):
def __init__(self):
super().__init__()
self.linear = nn.Linear(196608, 10)

def forward(self, x):
output = self.linear(x)
return output

mynn = Mynn()

for data in dataloader:
imgs, targets = data
print(imgs.shape) # 输出: torch.Size([64, 3, 32, 32])
output_0 = torch.reshape(imgs, (1, 1, 1, -1)) # ↓↓↓
output = torch.flatten(imgs) # 这两行效果类似,torch.flatten()可以把数据摊成一维的
print(output.shape) # 输出: torch.Size([196608])
output = mynn(output)
print(output.shape) # 输出: torch.Size([10])

其他不常用的层

正则化层 (Normalization Layers)

Bilibili P21

官方文档

  • 可以加快神经网络的训练速度

Recurrent Layers

Bilibili P21

官方文档

Transformer Layers

Bilibili P21

官方文档

Dropout Layers

Bilibili P21

官方文档

  • 随机地将输入中的某些数据置零,防止一些过拟合

Sparse Layers

Bilibili P21

官方文档

  • 主要用于自然语言处理

损失函数 (Loss Function)

官方文档

  • 作用:

    1. 计算实际输出和目标之间的差距

    2. 为我们更新输出提供一定的依据**(反向传播)**

torch.nn.L1Loss()

Bilibili P23

官方文档

  • 计算输入与目标只差的绝对值的平均值或总和
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import torch
from torch.nn import L1Loss

input = torch.tensor([[1, 2, 3]], dtype=torch.float) # L1Loss()要求数据为浮点型
target = torch.tensor([[1, 2, 5]], dtype=torch.float)

# input = torch.reshape(input, (1, 1, 1, 3))
# target = torch.reshape(target, (1, 1, 1, 3))
# 旧版要求batch_size,目前版本不需要了,可以去掉reshape

loss1 = L1Loss() # reduction默认值为‘mean’,即求平均值
loss2 = L1Loss(reduction='sum') # 求和

ret1 = loss1(input, target)
ret2 = loss2(input, target)

print(ret1) # 输出: tensor(0.6667)
print(ret2) # 输出: tensor(2.)

torch.nn.MSELoss()

Bilibili P23

官方文档

  • 计算差值平方平均数或平方和
1
2
3
4
5
6
7
8
9
10
11
12
import torch
from torch.nn import MSELoss

input = torch.tensor([[1, 2, 3]], dtype=torch.float)
target = torch.tensor([[1, 2, 5]], dtype=torch.float)

# input = torch.reshape(input, (1, 1, 1, 3))
# target = torch.reshape(target, (1, 1, 1, 3))

loss = MSELoss()
ret = loss(input, target)
print(ret) # 输出: tensor(1.3333)

MSELoss()中的 reduction 参数同 L1Loss()

torch.nn.CrossEntropyLoss()

Bilibili P23

官方文档

CrossEntropyLoss —— 交叉熵

  • 计算方法比较复杂,具体见官方文档
1
2
3
4
5
6
7
8
9
10
11
import torch
from torch.nn import CrossEntropyLoss

x = torch.tensor([0.1, 0.2, 0.3])
y = torch.tensor([1])

x = torch.reshape(x, (1, 3)) # 转换输入的形状

loss = CrossEntropyLoss()
ret = loss(x, y)
print(ret) # 输出: tensor(1.1019)
  • 主要用于分类问题中,进行反向传播
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import torchvision
from torch.utils.data import DataLoader
from torch import nn
from torch.nn import Flatten
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10(root='./dataset', train=False, download=True, transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset, batch_size=1)

class Mynn(nn.Module):
def __init__(self):
super().__init__()
self.model1 = nn.Sequential(
nn.Conv2d(3, 32, 5, padding=2),
nn.MaxPool2d(2),
nn.Conv2d(32, 32, 5, padding=2),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, 5, padding=2),
nn.MaxPool2d(2),
Flatten(),
nn.Linear(1024, 64),
nn.Linear(64, 10)
)

def forward(self, x):
x = self.model1(x)
return x

loss = nn.CrossEntropyLoss() # 定义交叉熵Loss

mynn = Mynn()
for data in dataloader:
imgs, targets = data
outputs = mynn(imgs)

ret = loss(outputs, targets) # 计算交叉熵
ret.backward() # 反向传播,详见下面优化器部分

优化器 (torch.optim)

Bilibili P24

官方文档

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# ......
# 数据构建及神经网络搭建同上,省略部分代码
mynn = Mynn()
loss = nn.CrossEntropyLoss() # 损失函数
optim = torch.optim.SGD(mynn.parameters(), lr=0.01) # 定义优化器(以SGD为例)

for epoch in range(20): # 训练的轮数
running_loss = 0.0
for data in dataloader: # 每轮训练
imgs, targets = data
outputs = mynn(imgs)

ret = loss(outputs, targets)
optim.zero_grad() # 将梯度置零,防止上一步的梯度影响
ret.backward() # 反向传播,会设置网络的梯度
optim.step() # 使用优化器,根据反向传播得到的梯度调整网络中的各种参数
running_loss += ret # 累加本轮的误差
print(running_loss)
'''
输出:
tensor(18773.7949, grad_fn=<AddBackward0>)
tensor(16280.6289, grad_fn=<AddBackward0>)
tensor(15427.4502, grad_fn=<AddBackward0>)
tensor(15957.1074, grad_fn=<AddBackward0>)
tensor(17857.4180, grad_fn=<AddBackward0>)
......
'''

使用网络模型

现有网络模型的使用及修改

VGG16 为例

Bilibili P25

1
2
3
4
5
6
7
8
9
10
11
12
13
14
import torch
import torchvision.datasets
import os
os.environ['TORCH_HOME']='C:/Users/29969/PycharmProjects/vgg' # 修改环境变量,指定模型下载地址
from torch import nn

# 下载ImageNet2012数据集,但是非常大,不下了(
# train_data = torchvision.datasets.ImageNet("./dataset", split='train', download=True, transform=torchvision.transforms.ToTensor())

# 创建vgg16,会自动下载模型
vgg16_f = torchvision.models.vgg16(weights=None) # weights=None,不使用预训练参数,只用网络结构
vgg16_t = torchvision.models.vgg16(weights='IMAGENET1K_V1')

train_data = torchvision.datasets.CIFAR10("./data", train=True, download=True, transform=torchvision.transforms.ToTensor())

VGG16 网络模型最后一步是一个线性层,输入4096,输出1000,说明该模型是分类1000类的模型。

我们的 CIFAR10 数据集只有10个类别,怎样将 VGG16 应用于 CIFAR10 数据集呢?

1
2
3
4
5
6
7
8
9
# 方法1
# 在模型最后添加一步线性层,输入1000,输出10
vgg16_t.classifier.add_module("add_linear", nn.Linear(1000, 10))
print(vgg16_t)

# 方法2
# 修改模型最后的线性层,改为输入4096,输出10
vgg16_f.classifier[6] = nn.Linear(4096, 10)
print(vgg16_f)

模型的保存与加载

Bilibili P26

保存

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import torchvision
import torch

vgg16 = torchvision.models.vgg16(weights=None)

# 保存方式1
# 既保存结构,也保存参数
torch.save(vgg16, "vgg16_method1.pth")

# 保存方式2(推荐)
# 只保存参数,以字典保存
torch.save(vgg16.state_dict(), "vgg16_method2.pth")

# 陷阱
class Mynn(torch.nn.Module):
def __init__(self):
super().__init__()
self.conv1 = torch.nn.Conv2d(3, 4, 3)

def forward(self, x):
output = self.conv1(x)
return output

mynn = Mynn()
torch.save(mynn, "mynn_method1.pth")

方式2为官方推荐,相比于方式1体积更小

加载

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import torchvision
import torch

# 加载方式1
model1 = torch.load("vgg16_method1.pth", weights_only=False)
print(model1)

# 加载方式2
model2 = torchvision.models.vgg16(weights=None)
model2.load_state_dict(torch.load("vgg16_method2.pth"))
print(model2)

# 陷阱
# 必须先引入模型,才能加载
# from model_save import Mynn

model3 = torch.load("mynn_method1.pth", weights_only=False)
print(model3)
# 没有import Mynn导致的报错: AttributeError: Can't get attribute 'Mynn' on <module '__main__' from 'C:\\Users\\29969\\PycharmProjects\\VGG\\model_load.py'>
  • 注意
    1. 加载方式必须与保存方式对应
    2. 老版本的Pytorch存在上述陷阱,加载时必须先import,目前版本似乎不需要了

模型训练

完整的模型训练的套路

Bilibili P27-29

  1. model.py 中搭建网络
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import torch
from torch import nn

# 搭建神经网络
class Mynn(nn.Module):
def __init__(self):
super().__init__()
self.model = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=32, kernel_size=5, stride=1, padding=2),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5, stride=1, padding=2),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2),
nn.MaxPool2d(kernel_size=2),
nn.Flatten(),
nn.Linear(in_features=64 * 4 * 4, out_features=64),
nn.Linear(in_features=64, out_features=10)
)

def forward(self, x):
x = self.model.forward(x)
return x

# 测试网络的正确性
if __name__ == '__main__':
mynn = Mynn()
input = torch.ones(64, 3, 32, 32)
output = mynn(input)
print(output.shape)
  1. train.py 中训练网络
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import torch
import torchvision
from torch import nn
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms

# 引入我们的模型
from model import Mynn

# 准备数据集
train_data = torchvision.datasets.CIFAR10(root='./data', download=True, train=True, transform=transforms.ToTensor())
test_data = torchvision.datasets.CIFAR10(root='./data', download=True, train=False, transform=transforms.ToTensor())

# 获取数据集长度
train_data_size = len(train_data)
test_data_size = len(test_data)
print("训练集长度:{}".format(train_data_size)) # 50000
print("测试集长度:{}".format(test_data_size)) # 10000

# Dataloader 加载数据集
train_dataloader = torch.utils.data.DataLoader(train_data, batch_size=64)
test_dataloader = torch.utils.data.DataLoader(test_data, batch_size=64)

# 创建网络模型
mynn = Mynn()

# 损失函数
loss_func = nn.CrossEntropyLoss()

# 优化器(随机梯度下降)
learning_rate = 0.001
optim = torch.optim.SGD(mynn.parameters(), lr=learning_rate)

# 添加tensorboard
writer = SummaryWriter("./logs")

# 训练网络
mynn.train() # 设置网络为训练模式,只对特定模型有必要
total_train_step = 0 # 记录训练次数
total_test_step = 0 # 记录测试次数
epoch = 10 # 训练轮数
for i in range(epoch):
print("-------------- 第 {} 轮训练开始 --------------".format(i + 1))
for data in train_dataloader:
imgs, targets = data
outputs = mynn(imgs) # 传入网络
loss = loss_func(outputs, targets) # 计算误差
optim.zero_grad() # 梯度清零
loss.backward() # 反向传播
optim.step() # 参数优化

total_train_step += 1
if total_train_step % 100 == 0:
print("训练次数{}, loss: {}".format(total_train_step, loss.item()))
writer.add_scalar("loss_train", loss.item(), total_train_step)

# 测试
mynn.eval() # 设置网络为测试模式,只对特定模型有必要
total_test_loss = 0 # 记录测试总误差
total_test_accuracy = 0 # 记录模型正确率
with torch.no_grad(): # 不使用梯度,只做测试
for data in test_dataloader:
imgs, targets = data
outputs = mynn(imgs)
loss = loss_func(outputs, targets)
total_test_loss += loss.item()
# 计算并累加正确率
total_test_accuracy += (outputs.argmax(1) == targets).sum()
print("整体测试集上的Loss:{}".format(total_test_loss))
print("整体测试集上的正确率:{}".format(total_test_accuracy / test_data_size))
writer.add_scalar("loss_test", total_test_loss, total_train_step)
writer.add_scalar("accuracy", total_test_accuracy / test_data_size, total_train_step)
total_test_step += 1

# 保存每轮模型
torch.save(mynn, 'mynn_{}'.format(total_test_step))

使用GPU进行训练

  • 只有网络模型损失函数数据可以转到 GPU 计算,数据集、优化器等不行
  • 没有GPU可以使用 Google Colab 进行训练(需科学上网)Bilibili P30

方式1 .cuda()

Bilibili P30

1
2
3
4
5
6
7
8
9
10
11
12
# 网络模型
mynn = Mynn()
mynn = mynn.cuda()

# 损失函数
loss_func = nn.CrossEntropyLoss()
loss_func = loss_func.cuda()

# 数据
imgs, targets = data # 从Dataloader中取出数据
imgs = imgs.cuda()
targets = targets.cuda()
  • 更好的写法,应在调用 .cuda 前判断是否 GPU 可用,例如:
1
2
if torch.cuda.is_available():
mynn = mynn.cuda()

方式2 .to()

Bilibili P31

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# 定义训练设备
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 有GPU就用GPU,没有就用CPU

# 转到GPU
# 网络模型
mynn = Mynn()
mynn.to(device)
# 损失函数
loss_func = nn.CrossEntropyLoss()
loss_func.to(device)
# 数据
imgs, targets = data # 从Dataloader中取出数据
imgs = imgs.to(device)
targets = targets.to(device)