共计 4941 个字符,预计需要花费 13 分钟才能阅读完成。
在日常生活中总是有给图像分类的场景,比方垃圾分类、不同场景的图像分类等;明天的文章次要是基于图像识别场景进行模型构建。图像识别是通过 Python 深度学习来进行模型训练,再应用模型对上传的电子表单进行主动审核与比对后反馈相应的后果。次要是利用 Python Torchvision 来结构模型,Torchvision 服务于 Pytorch 深度学习框架,次要是用来生成图片、视频数据集以及训练模型。
模型构建
构建模型为了直观,须要应用 Jupyter notebook 进行模型的构建,
导入所需包
图像识别须要用到深度学习相干模块,所以须要导入相应的包,具体导入的包如下:
%reload_ext autoreload
%autoreload 2
import torch
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision import transforms as tfs
from torchvision import models
from torch import nn
import matplotlib.pyplot as plt
%matplotlib inline
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
是否应用 GPU
模型的训练次要形式是基于 GPU 或者 CPU 训练,在没有 GPU 的条件下就在 CPU 下进行训练,模型的训练须要破费肯定的工夫,训练时长依据训练集的数据和硬件性能而定,训练后果精确性依据数据的多少和准确性而且,深度学习须要大量的素材能力判断出准确的后果,所以须要申明应用 CPU 进行训练:
# 是否应用 GPU
use_gpu = False
数据加强
将拿到的数据进行训练集的数据预处理并设置训练分层数,再将拿到的图片进行程度翻转后对图片进行剪裁,剪裁后将图片进行随机翻转,加强随机对比度以及图片色彩变动
# 数据加强
train_transform = tfs.Compose([
# 训练集的数据预处理
tfs.Resize([224, 224]),
tfs.RandomHorizontalFlip(),
tfs.RandomCrop(128),
tfs.ToTensor(),
tfs.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])
])
test_transform = tfs.Compose([tfs.Resize([224,224]),
# tfs.RandomCrop(128),
tfs.ToTensor(),
tfs.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])
])
# 每一个 batch 的数据集数目
batch_size = 10
数据集和验证集筹备
模型训练须要筹备数据集和验证集,只有足够的照片能力失去更精准的答案。训练集和验证集局部代码如下:
# 构建训练集和验证集
#
train_set = ImageFolder('./dataset1/train', train_transform)
train_data = DataLoader(train_set, batch_size, shuffle=True, num_workers=0)
valid_set = ImageFolder('./dataset1/valid', test_transform)
valid_data = DataLoader(valid_set, 2*batch_size, shuffle=False, num_workers=0)
train_set.class_to_idx
len(valid_data)
# 数据集筹备
try:
if iter(train_data).next()[0].shape[0] == batch_size and \
iter(valid_data).next()[0].shape[0] == 2*batch_size:
print('Dataset is ready!')
else:
print('Not success, maybe the batch size is wrong')
except:
print('not success, image transform is wrong!')
模型构建并筹备模型
# 构建模型
def get_model():
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(2048, 3)
return model
try:
model = get_model()
with torch.no_grad():
scorce = model(iter(train_data).next()[0])
print(scorce.shape[0], scorce.shape[1])
if scorce.shape[0] == batch_size and scorce.shape[1] == 3:
print('Model is ready!')
else:
print('Model is failed!')
except:
print('model is wrong')
if use_gpu:
model = model.cuda()
构建模型优化器
# 构建 loss 函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-4)
# 训练的 epoches 数目
max_epoch = 20
模型训练和训练后果可视化
数据集和训练集筹备好后进行模型训练和训练后果可视化,局部代码如下:
def train(model, train_data, valid_data, max_epoch, criterion, optimizer):
freq_print = int(len(train_data) / 3)
metric_log = dict()
metric_log['train_loss'] = list()
metric_log['train_acc'] = list()
if valid_data is not None:
metric_log['valid_loss'] = list()
metric_log['valid_acc'] = list()
for e in range(max_epoch):
model.train()
running_loss = 0
running_acc = 0
for i, data in enumerate(train_data, 1):
img, label = data
if use_gpu:
img = img.cuda()
label = label.cuda()
# forward 前向流传
out = model(img)
# 计算误差
loss = criterion(out, label.long())
# 反向流传,更新参数
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 计算准确率
_, pred = out.max(1)
num_correct = (pred == label.long()).sum().item()
acc = num_correct/img.shape[0]
running_loss += loss.item()
running_acc +=acc
if i % freq_print == 0:
print('[{}]/[{}], train loss: {:.3f}, train acc: {:.3f}' \
.format(i, len(train_data), running_loss / i, running_acc / i))
metric_log['train_loss'].append(running_loss / len(train_data))
metric_log['train_acc'].append(running_acc / len(train_data))
if valid_data is not None:
model.eval()
running_loss = 0
running_acc = 0
for data in valid_data:
img, label = data
if use_gpu:
img = img.cuda()
label = label.cuda()
# forward 前向流传
out = model(img)
# 计算误差
loss = criterion(out, label.long())
# 计算准确度
_, pred = out.max(1)
num_correct = (pred==label.long()).sum().item()
acc = num_correct/img.shape[0]
running_loss += loss.item()
running_acc += acc
metric_log['valid_loss'].append(running_loss/len(valid_data))
metric_log['valid_acc'].append(running_acc/len(valid_data))
print_str = 'epoch: {}, train loss: {:.3f}, train acc: {:.3f}, \
valid loss: {:.3f}, valid accuracy: {:.3f}'.format(e+1, metric_log['train_loss'][-1], metric_log['train_acc'][-1],
metric_log['valid_loss'][-1], metric_log['valid_acc'][-1])
else:
print_str = 'epoch: {}, train loss: {:.3f}, train acc: {:.3f}'.format(
e+1,
metric_log['train_loss'][-1],
metric_log['train_acc'][-1])
print(print_str)
# 可视化
nrows = 1
ncols = 2
figsize= (10, 5)
_, figs = plt.subplots(nrows, ncols, figsize=figsize)
if valid_data is not None:
figs[0].plot(metric_log['train_loss'], label='train loss')
figs[0].plot(metric_log['valid_loss'], label='valid loss')
figs[0].axes.set_xlabel('loss')
figs[0].legend(loc='best')
figs[1].plot(metric_log['train_acc'], label='train acc')
figs[1].plot(metric_log['valid_acc'], label='valid acc')
figs[1].axes.set_xlabel('acc')
figs[1].legend(loc='best')
else:
figs[0].plot(metric_log['train_loss'], label='train loss')
figs[0].axes.set_xlabel('loss')
figs[0].legend(loc='best')
figs[1].plot(metric_log['train_acc'], label='train acc')
figs[1].axes.set_xlabel('acc')
figs[1].legend(loc='best')
调参进行模型训练
# 用作调参
train(model, train_data, valid_data, max_epoch, criterion, optimizer)
保留模型
# 保留模型
torch.save(model.state_dict(), './model/save_model2.pth')
总结
明天的文章次要是讲图像识别模型如何构建。心愿对大家有所帮忙。
以上就是本次分享的所有内容,想要理解更多 python 常识欢送返回公众号:Python 编程学习圈,发送“J”即可收费获取,每日干货分享