关于pytorch:PyTorch模型推理及多任务通用范式-课程5-作业

Course

PyTorch模型推理及多任务通用范式课程5:

给大家安利了大白老师之前的好文：对于Yolo系列算法的理论知识解读和实战训练代码。
依据 pytorch模型推理的三板斧：数据预处理、数据进网络、数据后处理，逐行实现了Yolox的推理代码。

Assignment

必做题

本人找2张其余图，用Yolox_s进行指标检测，并注明输出尺寸和两个阈值。

思考题

Yolox_s：用time模块和for循环，对”./images/1.jpg”间断推理100次，统计工夫开销。有CUDA的同学，改下代码：self.device=torch.device('cuda')，统计工夫开销。
有CUDA的同学，别离用 Yolox_tiny、Yolox_s、Yolox_m、Yolox_l、Yolox_x 对”./images/1.jpg”间断推理100次，统计工夫开销。

Solutions

Code

import torchimport torch.nn as nnimport torchvisionimport numpy as npimport cv2from models_yolox.visualize import visfrom models_yolox.yolo_head import YOLOXHeadfrom models_yolox.yolo_pafpn import YOLOPAFPNimport time# write output imagedef write_output(result, image, output):    if result is not None:        bboxes, scores, labels = result        image = vis(image, bboxes, scores, labels, label_names)    text1 = 'image size : {}*{}'.format(model_detect.w, model_detect.h)    text2 = 'conf threshold : {}'.format(model_detect.conf_threshold)    text3 = 'nms threshold : {}'.format(model_detect.nms_threshold)    font = cv2.FONT_HERSHEY_SIMPLEX    cv2.putText(image, text1, (10, 20), font, 0.7, (0, 255, 0), 1, cv2.LINE_AA)    cv2.putText(image, text2, (10, 50), font, 0.7, (0, 255, 0), 1, cv2.LINE_AA)    cv2.putText(image, text3, (10, 80), font, 0.7, (0, 255, 0), 1, cv2.LINE_AA)    cv2.imwrite(output, image)# run a model multiple timesdef run_evl(times, model, device, image):    image = cv2.imread(image)    # CPU run 100 times    model_detect = ModelPipline(device=device, model_name=model)    t_all=0    for i in range(times):        t_start = time.time()        model_detect.predict(image)        t_end = time.time()        t_all += t_end - t_start    print('{} run model {} {} time lapse: {:.4f} seconds.'.format(device, model, times, t_all))class YOLOX(nn.Module):    def __init__(self, num_classes, depth=0.33, width=0.5, in_channels=[256, 512, 1024]):        super().__init__()        #yolox_s by default        self.backbone = YOLOPAFPN(depth=depth, width=width, in_channels=in_channels)        self.head = YOLOXHead(num_classes=num_classes, width=width, in_channels=in_channels)    def forward(self, x):        fpn_outs = self.backbone(x)        outputs = self.head(fpn_outs)        return outputsclass ModelPipline(object):    def __init__(self, device=torch.device('cuda'), model_name='yolox_s'):        # 进入模型的图片大小：为数据预处理和后处理做筹备        self.inputs_size = (640, 640)  # (h,w)        # CPU or CUDA：为数据预处理和模型加载做筹备        self.device = device        # 后处理的阈值        self.conf_threshold = 0.5        self.nms_threshold = 0.4        # 标签载入        label_names = open('./labels/coco_label.txt', 'r').readlines()        self.label_names = [line.strip('\n') for line in label_names]        # image size        self.w = 0        self.h = 0        # for model selection        self.model_info = {            'yolox_tiny':   [0.33,  0.375,  './weights/yolox_tiny_coco.pth.tar'],            'yolox_s':      [0.33,  0.5,    './weights/yolox_s_coco.pth.tar'],            'yolox_m':      [0.67,  0.75,   './weights/yolox_m_coco.pth.tar'],            'yolox_l':      [1.0,   1.0,    './weights/yolox_l_coco.pth.tar'],            'yolox_x':      [1.33,  1.25,   './weights/yolox_x_coco.pth.tar'],        }        # 载入模型构造和模型权重        self.num_classes = 80        self.model = self.get_model(model_name)    def predict(self, image):        # 数据预处理        inputs, r = self.preprocess(image)        # 数据进网络        outputs = self.model(inputs)        # 数据后处理        results = self.postprocess(outputs, r)        return results    def get_model(self, model_name):        model_info = self.model_info[model_name]        depth = model_info[0]        width = model_info[1]        path = model_info[2]        if model_name == 'yolox_tiny':            self.inputs_size = (416, 416)        model = YOLOX(self.num_classes, depth, width)        pretrained_state_dict = torch.load(path, map_location=lambda storage, loc: storage)["model"]        model.load_state_dict(pretrained_state_dict, strict=True)        model.to(self.device)        model.eval()        return model    def preprocess(self, image):        # 原图尺寸        h, w = image.shape[:2]        # to print image size        self.h = h        self.w = w        # 生成一张 w=h=640的mask，数值全是114        padded_img = np.ones((self.inputs_size[0], self.inputs_size[1], 3)) * 114.0        # 计算原图的长边缩放到640所须要的比例        r = min(self.inputs_size[0] / h, self.inputs_size[1] / w)        # 对原图做等比例缩放，使得长边=640        resized_img = cv2.resize(image, (int(w * r), int(h * r)), interpolation=cv2.INTER_LINEAR).astype(np.float32)        # 将缩放后的原图填充到 640×640的mask的左上方        padded_img[: int(h * r), : int(w * r)] = resized_img        # BGR——>RGB        padded_img = padded_img[:, :, ::-1]        # 归一化和标准化，和训练时保持一致        inputs = padded_img / 255        inputs = (inputs - np.array([0.485, 0.456, 0.406])) / np.array([0.229, 0.224, 0.225])        ##以下是图像工作的通用解决        # (H,W,C) ——> (C,H,W)        inputs = inputs.transpose(2, 0, 1)        # (C,H,W) ——> (1,C,H,W)        inputs = inputs[np.newaxis, :, :, :]        # NumpyArray ——> Tensor        inputs = torch.from_numpy(inputs)        # dtype float32        inputs = inputs.type(torch.float32)        # 与self.model放在雷同硬件上        inputs = inputs.to(self.device)        return inputs, r    def postprocess(self, prediction, r):        # prediction.shape=[1,8400,85]，上面先将85中的前4列进行转换，从 xc,yc,w,h 变为 x0,y0,x1,y1        box_corner = prediction.new(prediction.shape)        box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2        box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2        box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2        box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2        prediction[:, :, :4] = box_corner[:, :, :4]        # 只解决单张图        image_pred = prediction[0]        # class_conf.shape=[8400,1],求每个anchor在80个类别中的最高分数。class_pred.shape=[8400,1],每个anchor的label index。        class_conf, class_pred = torch.max(image_pred[:, 5: 5 + self.num_classes], 1, keepdim=True)        conf_score = image_pred[:, 4].unsqueeze(dim=1) * class_conf        conf_mask = (conf_score >= self.conf_threshold).squeeze()        # detections.shape=[8400,6]，别离是 x0 ,y0, x1, y1, obj_score*class_score, class_label        detections = torch.cat((image_pred[:, :4], conf_score, class_pred.float()), 1)        # 将obj_score*class_score > conf_thre 筛选进去        detections = detections[conf_mask]        # 通过阈值筛选后，如果没有残余指标则完结        if not detections.size(0):            return None        # NMS        nms_out_index = torchvision.ops.batched_nms(detections[:, :4], detections[:, 4], detections[:, 5],                                                    self.nms_threshold)        detections = detections[nms_out_index]        # 把坐标映射回原图        detections = detections.data.cpu().numpy()        bboxes = (detections[:, :4] / r).astype(np.int64)        scores = detections[:, 4]        labels = detections[:, 5].astype(np.int64)        return bboxes, scores, labelsif __name__ == '__main__':    model_detect = ModelPipline(torch.device('cuda'), 'yolox_s')    label_names = model_detect.label_names    # detect image 2    image = cv2.imread('./images/2.jpg')    result = model_detect.predict(image)    write_output(result, image, './demos/2.jpg')    # detect image 3    image = cv2.imread('./images/3.jpg')    result = model_detect.predict(image)    write_output(result, image, './demos/3.jpg')    # get time lapse of 100 iteration    run_evl(100, 'yolox_s', torch.device('cpu'), './images/1.jpg')    run_evl(100, 'yolox_tiny', torch.device('cuda'), './images/1.jpg')    run_evl(100, 'yolox_s', torch.device('cuda'), './images/1.jpg')    run_evl(100, 'yolox_m', torch.device('cuda'), './images/1.jpg')    run_evl(100, 'yolox_l', torch.device('cuda'), './images/1.jpg')    run_evl(100, 'yolox_x', torch.device('cuda'), './images/1.jpg')

必做题

提交2张图的检测成果，并注明输出尺寸和两个阈值。
图一

图二

思考题

CPU推理和CUDA推理，各自的工夫开销。
运行后果：
cpu run model yolox_s 100 time lapse: 13.2529 seconds.
cuda run model yolox_s 100 time lapse: 3.6188 seconds.
GPU显著比CPU运算快。
不同Backbone各自的工夫开销。
运行后果：
cuda run model yolox_tiny 100 time lapse: 2.5452 seconds.
cuda run model yolox_s 100 time lapse: 3.6188 seconds.
cuda run model yolox_m 100 time lapse: 4.0251 seconds.
cuda run model yolox_l 100 time lapse: 4.7148 seconds.
cuda run model yolox_x 100 time lapse: 6.5571 seconds.
模型越大工夫开销越大。

学习心得

这次课程对你的帮忙有哪些？技术上、学习办法上、思路上等都行，随便施展。
很久没写过作业了，督促了我写点货色，相熟一下之前不器重的细节。推理通用范式有实用价值，后续能够用在试验代码里。
对课程的优化改良有什么倡议？内容上、流程上、模式上等都行，随便施展。
本课程给我的感觉是老师们很尽心，内容丰盛，解说分明，有播种。但限于文字的模式，看得出来一些常识老师想讲但没法讲。心愿当前能录视频对着画板写写画画的讲，这样老师讲的分明，学生学得明确。另外心愿授课内容和代码能更贴近生产一线，多一些业界实际，能带入一些偏日常工作可能遇到的问题去思考和解决是最好的了。
最好能够附上本人所处的阶段，比方：学生，接触AI一年；退职，3年AI我的项目教训等。
自己研一，接触深度学习不到一年，课题是机器视觉相干。