GoAI

V1

2023/04/08阅读：13主题：全栈蓝

# CNN经典网络理论与实战总结

​ ✨写在前面：强烈推荐给大家一个优秀的人工智能学习网站，内容包括人工智能基础、机器学习、深度学习神经网络等，详细介绍各部分概念及实战教程，通俗易懂，非常适合人工智能领域初学者及研究者学习。➡️点击跳转到网站。

CNN网络结构的发展（最全整理） - 专知

1.LeNet-5

class LeNet(nn.Module): def init(self): super(LeNet, self).init() self.conv = nn.Sequential( nn.Conv2d(1, 6, 5), # in_channels, out_channels,kernel_size nn.Sigmoid(), nn.MaxPool2d(2, 2), # kernel_size, stride nn.Conv2d(6, 16, 5),#卷积层 nn.Sigmoid(), nn.MaxPool2d(2, 2) ) self.fc = nn.Sequential( nn.Linear(1644, 120), nn.Sigmoid(), nn.Linear(120, 84), nn.Sigmoid(), nn.Linear(84, 10) ) def forward(self, img): feature = self.conv(img) output = self.fc(feature.view(img.shape[0], -1)) return output #查看每个层的形状 net=LeNet() print(net)

"""获取数据和训练模型""" #使⽤Fashion-MNIST作为训练数据集。 batch_size = 256 train_iter, test_iter =d2l.load_data_fashion_mnist(batch_size=batch_size)

def evaluate_accuracy(data_iter, net,device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')): acc_sum, n = 0.0, 0 with torch.no_grad(): for X, y in data_iter: if isinstance(net, torch.nn.Module): net.eval() # 评估模式, 这会关闭dropout acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item() net.train() #改回训练模型 else: # ⾃定义的模型, 3.13节之后不会⽤到, 不考虑GPU if ('is_training' in net.code.co_varnames): # 如果有is_training这个参数 # 将is_training设置成False acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item() else: acc_sum += (net(X).argmax(dim=1) ==y).float().sum().item() n += y.shape[0] return acc_sum / n

def train_ch5(net, train_iter, test_iter, batch_size, optimizer,device, num_epochs): net = net.to(device) print("training on ", device) loss = torch.nn.CrossEntropyLoss() batch_count = 0 for epoch in range(num_epochs): train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0,time.time() for X, y in train_iter: X = X.to(device) y = y.to(device) y_hat = net(X) l = loss(y_hat, y) optimizer.zero_grad() l.backward() optimizer.step() train_l_sum += l.cpu().item() train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item() n += y.shape[0] batch_count += 1 test_acc = evaluate_accuracy(test_iter, net) print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f,time % .1fsec'% (epoch + 1, train_l_sum / batch_count,train_acc_sum / n, test_acc, time.time() - start)) #学习率采⽤0.001，训练算法使⽤Adam算法，损失函数使⽤交叉熵损失函数。 lr, num_epochs = 0.001, 5 optimizer = torch.optim.Adam(net.parameters(), lr=lr)

train_ch5(net, train_iter, test_iter, batch_size, optimizer, device,num_epochs) plt.show() 代码实战： 2.AlexNet

Alexnet代码实现：

import time import matplotlib.pyplot as plt import torch from torch import nn, optim import torchvision import sys sys.path.append("..") import d2lzh_pytorch as d2l device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

"""实现稍微简化过的AlexNet """ class AlexNet(nn.Module): def init(self): super(AlexNet, self).init() self.conv = nn.Sequential( nn.Conv2d(1, 96, 11, 4), # in_channels, out_channels,kernel_size, stride, padding nn.ReLU(), nn.MaxPool2d(3, 2), # kernel_size, stride

# 减⼩卷积窗⼝，使⽤填充为2来使得输⼊与输出的⾼和宽⼀致，且增⼤输出通道数

``````             nn.Conv2d(96, 256, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(3, 2),
# 连续3个卷积层，且使⽤更⼩的卷积窗⼝。除了最后的卷积层外，进⼀步增⼤了输出通道数。
# 前两个卷积层后不使⽤池化层来减⼩输⼊的⾼和宽
nn.Conv2d(256, 384, 3, 1, 1),
nn.ReLU(),
nn.Conv2d(384, 384, 3, 1, 1),
nn.ReLU(),
nn.Conv2d(384, 256, 3, 1, 1),
nn.ReLU(),
nn.MaxPool2d(3, 2)
)# 这⾥全连接层的输出个数⽐LeNet中的⼤数倍。使⽤丢弃层来缓解过拟合
self.fc = nn.Sequential(
nn.Linear(256 * 5 * 5, 4096),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(4096, 4096),
nn.ReLU(),
nn.Dropout(0.5),
# 输出层。由于这⾥使⽤Fashion-MNIST，所以⽤类别数为10，⽽⾮论⽂中的1000
nn.Linear(4096, 10),
)

def forward(self, img):
feature = self.conv(img)

output = self.fc(feature.view(img.shape[0], -1))
return output
``````

#打印看网络结构 net = AlexNet() print(net)

`````` trans = []
if resize:
trans.append(torchvision.transforms.Resize(size=resize))
trans.append(torchvision.transforms.ToTensor())

transform = torchvision.transforms.Compose(trans)
mnist_train = torchvision.datasets.FashionMNIST(root=root,
mnist_test = torchvision.datasets.FashionMNIST(root=root,
batch_size=batch_size, shuffle=True, num_workers=4)
batch_size=batch_size, shuffle=False, num_workers=4)

return train_iter, test_iter
``````

batch_size = 128

# 如出现“out of memory”的报错信息，可减⼩batch_size或resize

"""训练""" lr, num_epochs = 0.001, 5 optimizer = torch.optim.Adam(net.parameters(), lr=lr) d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer,device, num_epochs) plt.show() 3.Inception网络

Inception的核心思想是通过增加网络深度和宽度的同时减少参数的方法来解决问题。Inception v1由22层，比AlexNet的8层或VGGNet的19层更深。但其计算量只有15亿次浮点运算，同时只有500万的参数量，仅为AlexNet的1/12，却有着更高的准确率。

Inception的前身为MLP卷积层。

MLP卷积层的思想是将CNN高维度特征转成低维度特征，将神经网络的思想融合在具体的卷积操作当中。直白的理解就是在网络中再放一个网络，即使每个卷积的通道中包含一个微型的多层网络，用一个网络来代替原来具体的卷积运算过程（卷积核的每个值与样本对应的像素点相乘，再将相乘后的所有结果加在一起生成新的像素点的过程）

Inception v1模型在原有的Inception模型基础上做了一些改进，原因是由于Inception的原始模型是将所有的卷积核都在上一层的所有输出上来做，那么5x5的卷积核所需的计算量就比较大，造成了特征图厚度很大。为了避免这一现象，在3x3前、5x5前、最大池化层后分别加上了1x1的卷积核，起到了降低特征图厚度的作用（其中1x1卷积主要用来降维）

Inception v2模型在v1模型基础上应用当时的主流技术，在卷积后加入BN层，使每一层的输出都归一化处理，减少了内变协变量的移动问题；同时还使用了梯度截断技术，增加了训练的稳定性。另外，Inception学习了VGG，用2个3x3的conv替代5x5，这既降低了参数数量，也提升了计算速度。

Inception v3没有再加入其他的技术，只是将原来的结构进行了调整，其最重要的一个改进是分解。

Inception v4结合残差连接技术的特点进行结构的优化调整。

4.残差网络（ResNet） (1)Resnet概念

1. 左图为基本的residual block，residual mapping为两个64通道的3x3卷积，输入输出均为64通道，可直接相加。该block主要使用在相对浅层网络，比如ResNet-34；

2. 右图为针对深层网络提出的block，称为“bottleneck” block，主要目的就是为了降维。首先通过一个1x1卷积将256维通道（channel）降到64通道，最后通过一个256通道的1x1卷积恢复。

1. 相比于VGG-19，ResNet没有使用全连接层，而使用了全局平均池化层，可以减少大量参数。VGG-19大量参数集中在全连接层；

2. ResNet-34中跳跃连接“实线”为identity mapping和residual mapping通道数相同，“虚线”部分指的是两者通道数不同，需要使用1x1卷积调整通道维度，使其可以相加。

import torch from torch import nn from torch.nn import functional as F from torch.utils.data import DataLoader from torchvision import datasets from torchvision import transforms from torch import nn, optim

# from torchvision.models import resnet18

class ResBlk(nn.Module): """ resnet block """

``````def __init__(self, ch_in, ch_out):
"""
:param ch_in:
:param ch_out:
"""
super(ResBlk, self).__init__()

self.conv1 = nn.Conv2d(ch_in, ch_out, kernel_size=3, stride=1, padding=1)
self.bn1 = nn.BatchNorm2d(ch_out)
self.conv2 = nn.Conv2d(ch_out, ch_out, kernel_size=3, stride=1, padding=1)
self.bn2 = nn.BatchNorm2d(ch_out)

self.extra = nn.Sequential()
if ch_out != ch_in:
# [b, ch_in, h, w] => [b, ch_out, h, w]
self.extra = nn.Sequential(
nn.Conv2d(ch_in, ch_out, kernel_size=1, stride=1),
nn.BatchNorm2d(ch_out)
)

def forward(self, x):
"""
:param x: [b, ch, h, w]
:return:
"""
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
# short cut.
# extra module: [b, ch_in, h, w] => [b, ch_out, h, w]
out = self.extra(x) + out

return out
``````

class ResNet18(nn.Module):

``````def __init__(self):
super(ResNet18, self).__init__()

self.conv1 = nn.Sequential(
nn.BatchNorm2d(16)
)
# followed 4 blocks
# [b, 64, h, w] => [b, 128, h ,w]
self.blk1 = ResBlk(16, 16)
# [b, 128, h, w] => [b, 256, h, w]
self.blk2 = ResBlk(16, 32)
# # [b, 256, h, w] => [b, 512, h, w]
# self.blk3 = ResBlk(128, 256)
# # [b, 512, h, w] => [b, 1024, h, w]
# self.blk4 = ResBlk(256, 512)

self.outlayer = nn.Linear(32*32*32, 10)

def forward(self, x):
"""
:param x:
:return:
"""
x = F.relu(self.conv1(x))

# [b, 64, h, w] => [b, 1024, h, w]
x = self.blk1(x)
x = self.blk2(x)
# x = self.blk3(x)
# x = self.blk4(x)

# print(x.shape)
x = x.view(x.size(0), -1)
x = self.outlayer(x)

return x
``````

def main(): batchsz = 32

``````cifar_train = datasets.CIFAR10('cifar', True, transform=transforms.Compose([
transforms.Resize((32, 32)),
transforms.ToTensor()

cifar_test = datasets.CIFAR10('cifar', False, transform=transforms.Compose([
transforms.Resize((32, 32)),
transforms.ToTensor()

x, label = iter(cifar_train).next()
print('x:', x.shape, 'label:', label.shape)

device = torch.device('cuda')
# model = Lenet5().to(device)
model = ResNet18().to(device)

criteon = nn.CrossEntropyLoss().to(device)
print(model)

for epoch in range(1000):

model.train()
for batchidx, (x, label) in enumerate(cifar_train):
# [b, 3, 32, 32]
# [b]
x, label = x.to(device), label.to(device)

logits = model(x)
# logits: [b, 10]
# label:  [b]
# loss: tensor scalar
loss = criteon(logits, label)

# backprop
loss.backward()
optimizer.step()

#
print(epoch, 'loss:', loss.item())

model.eval()
# test
total_correct = 0
total_num = 0
for x, label in cifar_test:
# [b, 3, 32, 32]
# [b]
x, label = x.to(device), label.to(device)

# [b, 10]
logits = model(x)
# [b]
pred = logits.argmax(dim=1)
# [b] vs [b] => scalar tensor
correct = torch.eq(pred, label).float().sum().item()
total_correct += correct
total_num += x.size(0)
# print(correct)

acc = total_correct / total_num
print(epoch, 'acc:', acc)
``````

if name == 'main': main() 使用keras实现ResNet-18 from keras.layers import Input from keras.layers import Conv2D, MaxPool2D, Dense, BatchNormalization, Activation, add, GlobalAvgPool2D from keras.models import Model from keras import regularizers from keras.utils import plot_model from keras import backend as K

def conv2d_bn(x, nb_filter, kernel_size, strides=(1, 1), padding='same'): """ conv2d -> batch normalization -> relu activation """ x = Conv2D(nb_filter, kernel_size=kernel_size, strides=strides, padding=padding, kernel_regularizer=regularizers.l2(0.0001))(x) x = BatchNormalization()(x) x = Activation('relu')(x) return x

def shortcut(input, residual): """ shortcut连接，也就是identity mapping部分。 """

``````input_shape = K.int_shape(input)
residual_shape = K.int_shape(residual)
stride_height = int(round(input_shape[1] / residual_shape[1]))
stride_width = int(round(input_shape[2] / residual_shape[2]))
equal_channels = input_shape[3] == residual_shape[3]

identity = input
# 如果维度不同，则使用1x1卷积进行调整
if stride_width > 1 or stride_height > 1 or not equal_channels:
identity = Conv2D(filters=residual_shape[3],
kernel_size=(1, 1),
strides=(stride_width, stride_height),
kernel_regularizer=regularizers.l2(0.0001))(input)

``````

def basic_block(nb_filter, strides=(1, 1)): """ 基本的ResNet building block，适用于ResNet-18和ResNet-34. """ def f(input):

``````    conv1 = conv2d_bn(input, nb_filter, kernel_size=(3, 3), strides=strides)
residual = conv2d_bn(conv1, nb_filter, kernel_size=(3, 3))

return shortcut(input, residual)

return f
``````

def residual_block(nb_filter, repetitions, is_first_layer=False): """ 构建每层的residual模块，对应论文参数统计表中的conv2_x -> conv5_x """ def f(input): for i in range(repetitions): strides = (1, 1) if i == 0 and not is_first_layer: strides = (2, 2) input = basic_block(nb_filter, strides)(input) return input

``````return f
``````

def resnet_18(input_shape=(224,224,3), nclass=1000): """ build resnet-18 model using keras with TensorFlow backend. :param input_shape: input shape of network, default as (224,224,3) :param nclass: numbers of class(output shape of network), default as 1000 :return: resnet-18 model """ input_ = Input(shape=input_shape)

``````conv1 = conv2d_bn(input_, 64, kernel_size=(7, 7), strides=(2, 2))
pool1 = MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same')(conv1)

conv2 = residual_block(64, 2, is_first_layer=True)(pool1)
conv3 = residual_block(128, 2, is_first_layer=True)(conv2)
conv4 = residual_block(256, 2, is_first_layer=True)(conv3)
conv5 = residual_block(512, 2, is_first_layer=True)(conv4)

pool2 = GlobalAvgPool2D()(conv5)
output_ = Dense(nclass, activation='softmax')(pool2)

model = Model(inputs=input_, outputs=output_)
model.summary()

return model
``````

if name == 'main': model = resnet_18() plot_model(model, 'ResNet-18.png') # 保存模型图 四、其他领域Paper 1.物体检测领域的相关模型：      RCNN（regions with CNN）模型增加特征的穷举范围，然后在其中发现有价值的特征。大概步骤如下：

（1）对于一副输入的图片，通过选择性搜索，找出2000个候选窗口。

（2）利用CNN对它们提取特征向量，即将这2000个子图统一缩放到227x227，然后进行卷积操作。

（3）利用SVM算法对特征向量进行分类识别。

RCNN中对每一类都进行SVM训练，根据输出的特征类为每一个区打分，最终决定保留或拒绝该区域特征。

SPP-Net：基于空间金字塔池化的优化RCNN方法。空间金字塔池化（Spatial Pyramid Pooling, SPP）最大的特点是，不再关心输入图片的尺寸，而是根据最后的输出类别个数，通过算法来生成多个不同范围的池化层，由它们对输入进行并行池化处理，使最终的输出特征个数与生成类别个数相等，接着再进行类别的比较和判定。

Fast-R-CNN在SPP-Net基础上进行了改进，并将它嫁接到VGG16上所形成的网络，实现了整个网络端到端的训练。

YOLO：能够一次性预测多个位置和类别的模型。先将图片分为SxS个网络，每个网络相当于一个任务，负责检测内部是否有物体的中心点落入该区域，一旦有的话，则启动该任务来检测n个bounding boxes对象。

SSD：比YOLO更快更准的模型，融合了RPN的思想

2.OCR

3.Mobile

4.Image Segmentation

5.GAN

6.文本分类(TC, text-classification)

V1