关于javascript:Tensorflowjs-视频图片多目标检测

4次阅读

共计 5687 个字符,预计需要花费 15 分钟才能阅读完成。

前言

&nbsp&nbsp&nbsp&nbsp&nbsp&nbspTensorflow.js 官网提供了很多罕用模型库,涵盖了平时开发中大部分场景的模型。例如,后面提到的图片辨认,除此之外还有人体姿势辨认,指标物体辨认,语音文字等辨认。其中一些可能是 Python 转换而来,但都是开发人员用海量数据或资源训练的,集体感觉准确度能满足大部分性能开发要求。这里要介绍的是指标物体辨认模型 ——CooSSD。

&nbsp&nbsp&nbsp&nbsp&nbsp&nbsp 指标检测在机器视觉中曾经很常见了,就是模型能够对图片或者视频中的物体进行辨认,并预测其最大概率的名称和展现概率值。以下就先以 Github 上 Coo-SSD 图片指标检测为例,最初再弄一个视频的指标实时辨认。

demo 运行

&nbsp&nbsp&nbsp&nbsp&nbsp&nbsptensorflow.js 提供的例子是通过 yarn,因为我本地环境起因,就以 npm 和 parcel 运行其成果。先本地创立我的项目文件夹,而后再别离创立 index.html, script.js, package.json 和增加几张图片。

依赖包装置

package.json 配置,装置 tfjs-backend-cpu, tfjs-backend-webgl 和模型

{
  "name": "tfjs-coco-ssd-demo",
  "version": "1.0.0",
  "description": "","main":"index.js","dependencies": {"@tensorflow-models/coco-ssd":"^2.2.2","@tensorflow/tfjs-backend-cpu":"^3.3.0","@tensorflow/tfjs-backend-webgl":"^3.3.0","@tensorflow/tfjs-converter":"^3.3.0","@tensorflow/tfjs-core":"^3.3.0","stats.js":"^0.17.0"},"scripts": {"test":"echo \"Error: no test specified\" && exit 1"},"author":"",
  "license": "ISC",
  "browserslist": ["last 1 Chrome version"]
}

命令切换到我的项目目录,运行 npm install

代码

index.html

<h1>TensorFlow.js Object Detection</h1>
<select id='base_model'>
    <option value="lite_mobilenet_v2">SSD Lite Mobilenet V2</option>
    <option value="mobilenet_v1">SSD Mobilenet v1</option>
    <option value="mobilenet_v2">SSD Mobilenet v2</option>
</select>
<button type="button" id="run">Run</button>
<button type="button" id="toggle">Toggle Image</button>
<div>
<img id="image" />
<canvas id="canvas" width="600" height="399"></canvas>
</div>

<script src="script.js"></script>

script.js

import '@tensorflow/tfjs-backend-cpu';
import '@tensorflow/tfjs-backend-webgl';

import * as cocoSsd from '@tensorflow-models/coco-ssd';

import imageURL from './image3.jpg';
import image2URL from './image5.jpg';

let modelPromise;

window.onload = () => modelPromise = cocoSsd.load();

const button = document.getElementById('toggle');
button.onclick = () => {image.src = image.src.endsWith(imageURL) ? image2URL : imageURL;
};

const select = document.getElementById('base_model');
select.onchange = async (event) => {
  const model = await modelPromise;
  model.dispose();
  modelPromise = cocoSsd.load({base: event.srcElement.options[event.srcElement.selectedIndex].value});
};

const image = document.getElementById('image');
image.src = imageURL;

const runButton = document.getElementById('run');
runButton.onclick = async () => {
  const model = await modelPromise;
  console.log('model loaded');
  console.time('predict1');
  const result = await model.detect(image);
  console.timeEnd('predict1');


  const c = document.getElementById('canvas');
  const context = c.getContext('2d');
  context.drawImage(image, 0, 0);
  context.font = '10px Arial';

  console.log(result);

  console.log('number of detections:', result.length);
  for (let i = 0; i < result.length; i++) {context.beginPath();
    context.rect(...result[i].bbox);
    context.lineWidth = 1;
    context.strokeStyle = 'green';
    context.fillStyle = 'green';
    context.stroke();
    context.fillText(result[i].score.toFixed(3) + ' ' + result[i].class, result[i].bbox[0],
        result[i].bbox[1] > 10 ? result[i].bbox[1] - 5 : 10);
  }
};

切换到我的项目目录,运行 parcel index.html

运行成果

检测视频指标

&nbsp&nbsp&nbsp&nbsp&nbsp&nbsp 通过下面 demo 的图片检测发现,用于对某资源 (图片,视频) 进行检测的函数是 detect ()。查看该函数所处 Coco-SSD 文件发现,detect 函数接管三个参数,第一个参数能够是 tensorflow 张量,也能够别离是 DOM 里的图片,视频,画布等 HTML 元素,第二第三个参数别离用于过滤返回后果的最大辨认指标数和最小概率指标,而返回天然就是一个 box, 按概率值降序排列。

实现流程

(1). 给视频标签增加播放监听

(2). 页面渲染实现加载 Coco-SSD 模型

(3). 模型加载胜利轮询辨认视频 (video 标签)

(4). 监听到视频播放进行敞开轮询检测

编码
html 局部

<style>
    #big-box {position: relative;}

    #img-box {
        position: absolute;
        top: 0px;
        left: 0px;
    }

    #img-box div {
        position: absolute;
        /*border: 2px solid #f00;*/
        pointer-events: none;
    }

    #img-box div .className {
        position: absolute;
        top: 0;
        /* background: #f00; */
        color: #fff;
    }

    #myPlayer {
        max-width: 600px;
        width: 100%;
    }
</style>

<div id="showBox"> 期待模型加载...</div>
<br>

<div id="big-box">
    <video id="myPlayer" muted="true" autoplay src="persons.mp4" controls=""playsinline="" webkit-playsinline=""></video>

    <div id="img-box"></div>
</div>
<script src="persons.js"></script>

js 局部

import '@tensorflow/tfjs-backend-cpu';
import '@tensorflow/tfjs-backend-webgl';

import * as cocoSsd from '@tensorflow-models/coco-ssd';

var myModel = null;
var V = null;

var requestAnimationFrameIndex = null;
var myPlayer = document.getElementById("myPlayer");

var videoHeight = 0;
var videoWidth = 0;
var clientHeight = 0;
var clientWidth = 0;

var modelLoad = false;
var videoLoad = false;

window.onload = function () {myPlayer.addEventListener("canplay", function () {
        videoHeight = myPlayer.videoHeight;
        videoWidth = myPlayer.videoWidth;
        clientHeight = myPlayer.clientHeight;
        clientWidth = myPlayer.clientWidth;
        V = this;
        videoLoad = true;
    })

    loadModel();}

function loadModel() {if (modelLoad) {return;}
    
    cocoSsd.load().then(model => {var showBox = document.getElementById("showBox");
        showBox.innerHTML = "载入胜利";
        myModel = model;
        detectImage();
        modelLoad = true;
    });
}

function detectImage() {var showBox = document.getElementById("showBox");
    // 分类名
    var classList = [];
    // 分类色彩框
    var classColorMap = ["red", "green", "blue", "white"];
    // 色彩角标
    var colorCursor = 0;

    showBox.innerHTML = "检测中...";

    if (videoLoad) {myModel.detect(V).then(predictions => {

            showBox.innerHTML = "检测完结";

            const $imgbox = document.getElementById('img-box');

            $imgbox.innerHTML = ""

            predictions.forEach(box => {if (classList.indexOf(box.class) != -1) {classList.push(box.class);
                }

                console.log(box);

                var borderColor = classColorMap[colorCursor%4];
// console.log(colorCursor);
// console.log(borderColor);

                const $div = document.createElement('div')
                //$div.className = 'rect';
                $div.style.border = "2px solid"+borderColor;
                var heightScale = (clientHeight / videoHeight);
                var widthScale = (clientWidth / videoWidth)
                var transformTop = box.bbox[1] * heightScale;
                var transformLeft = box.bbox[0] * widthScale;
                var transformWidth = box.bbox[2] * widthScale;
                var transformHeight = box.bbox[3] * heightScale;
                var score = box.score.toFixed(3);
                $div.style.top = transformTop + 'px'
                $div.style.left = transformLeft + 'px'
                $div.style.width = transformWidth + 'px'
                $div.style.height = transformHeight + 'px'
                $div.innerHTML = `<span class='className'>${box.class} ${score}</span>`

                $imgbox.appendChild($div)

                colorCursor++;
            })

            setTimeout(function () {detectImage();
            }, 10);

        });

    }
}

演示成果

正文完
 0