关于图像识别:SkeyeRTSPLive高效转码之SkeyeVideoDecoder采用Intel集成显卡高效硬件解码解决方案-1

60次阅读

共计 8054 个字符,预计需要花费 21 分钟才能阅读完成。

在我之前写的一篇文章《SkeyeRTSPLive 传统视频监控互联网 + 实现利器解决方案》中提到 RTSP 转 RTMP 的转流过程,简化流程就是通过 SkeyeRTSPClient 拉 RTSP 流,获取音视频编码数据,而后再通过 SkeyeRTMPPusher 推出去,流程非常简单;而后再理论开发过程中,咱们发现其实这个过程并没有设想中那么简略;首先,RTSP 协定反对多种音视频编码格局,如音频反对 AAC,G711,G726, 等,视频反对 H264,H625,MJPEG, MPEG 等等各种格局,而 SkeyeRTMP 推流只反对 H264(已扩大反对 H265)格局,这时,音频咱们能够通过 SkeyeAACEncoder 将音频转码成 AAC 格局,而视频咱们能够通过 SkeyeVideoDecoder 解码成原始数据,而后再通过 SkeyeVideoEncoder 将原始数据转码成 RTMP 推送指定的格局,本文,咱们将重点讲述 SkeyeVideoDecoder 基于 Intel 硬解码库的硬解码流程。

SkeyeVideoDecoder 基于 Intel 硬解码库 SkeyeIntelHardDecoder

SkeyeIntelHardDecoder 库是基于 Intel 主板集成显卡的硬件解码程序,外部进行解码采纳 D3D 进行显示,其解码效率比 ffmpeg 软件解码效率提到至多 5 - 6 倍;

1. 接口申明如下:

// Intel Media Hardware Codec SDK Interface [8/17/2016 SwordTwelve]

#ifndef INTELHARDCODEC_INTERFACE_H
#define INTELHARDCODEC_INTERFACE_H

#ifdef __cplusplus

#define HARDCODEC_MAKEFOURCC(A,B,C,D)    ((((int)A))+(((int)B)<<8)+(((int)C)<<16)+(((int)D)<<24))

/* Hardware Codec FourCC */
typedef enum tagINTELHARDCODEC_FORMAT{HARDCODEC_FOURCC_NV12         = HARDCODEC_MAKEFOURCC('N','V','1','2'),   /* Native Format */
    HARDCODEC_FOURCC_YV12         = HARDCODEC_MAKEFOURCC('Y','V','1','2'),
    HARDCODEC_FOURCC_YUY2         = HARDCODEC_MAKEFOURCC('Y','U','Y','2'),
    HARDCODEC_FOURCC_RGB3         = HARDCODEC_MAKEFOURCC('R','G','B','3'),   /* RGB24 */
    HARDCODEC_FOURCC_RGB4         = HARDCODEC_MAKEFOURCC('R','G','B','4'),   /* RGB32 */
    HARDCODEC_FOURCC_P8           = 41,         /*  D3DFMT_P8   */
    HARDCODEC_FOURCC_P8_TEXTURE   = HARDCODEC_MAKEFOURCC('P','8','M','B') 
}INTELHARDCODEC_FORMAT;


class SkeyeIntelHardDecoder_Interface
{
    // 导出接口
public:
public:    //DLL 接口
    virtual int  WINAPI Init(HWND hWnd, bool bDxv2Show, bool bShowToScale, int mode = 1) = 0;
    virtual int  WINAPI Decode(unsigned char * pData, int len, OUT INTELHARDCODEC_FORMAT& outFormat, OUT unsigned char * pYUVData) = 0;
    virtual void WINAPI    Close() = 0;};

// 视频获取定义接口指针类型
typedef    SkeyeIntelHardDecoder_Interface*    LPIntelHardDecoder;    

LPIntelHardDecoder    APIENTRY Create_SkeyeIntelHardDecoder();// 创立管制接口指针
void APIENTRY Release_SkeyeIntelHardDecoder(LPIntelHardDecoder lpHardDecoder);// 销毁管制接口指针

#endif//__cplusplus
#endif//INTELHARDCODEC_INTERFACE_H
2. SkeyeIntelHardDecoder 解码库调用流程
  • 第一步,初始化解码器
    首先,查看设施是否反对 DXVA2,代码如下所示:
bool sSupportDxva2(HWND hwnd)
{
    HRESULT hr;
    mfxU32 adapterNum = 0; 
    IDirect3D9*                 m_pD3D9 = NULL;
    IDirect3DDevice9*           m_pD3DD9 = NULL;
    IDirect3DDeviceManager9*    m_pDeviceManager9 = NULL;
    D3DPRESENT_PARAMETERS       m_D3DPP;

    m_pD3D9 = Direct3DCreate9(D3D_SDK_VERSION);
    if (!m_pD3D9)
        return false;

    ZeroMemory(&m_D3DPP, sizeof(m_D3DPP));
    m_D3DPP.Windowed = true;
    m_D3DPP.hDeviceWindow = hwnd;

    m_D3DPP.Flags                      = D3DPRESENTFLAG_VIDEO;
    m_D3DPP.FullScreen_RefreshRateInHz = D3DPRESENT_RATE_DEFAULT;
    m_D3DPP.PresentationInterval       = D3DPRESENT_INTERVAL_ONE;
    m_D3DPP.BackBufferCount            = 1;
    m_D3DPP.BackBufferFormat           = D3DFMT_X8R8G8B8;

    m_D3DPP.BackBufferWidth  = GetSystemMetrics(SM_CXSCREEN);
    m_D3DPP.BackBufferHeight = GetSystemMetrics(SM_CYSCREEN);

    m_D3DPP.Flags |= D3DPRESENTFLAG_LOCKABLE_BACKBUFFER;


    m_D3DPP.SwapEffect = D3DSWAPEFFECT_DISCARD; // D3DSWAPEFFECT_OVERLAY

    hr = m_pD3D9->CreateDevice(
        adapterNum,
        D3DDEVTYPE_HAL,
        hwnd,
        D3DCREATE_SOFTWARE_VERTEXPROCESSING | D3DCREATE_MULTITHREADED | D3DCREATE_FPU_PRESERVE,
        &m_D3DPP,
        &m_pD3DD9);
    if (FAILED(hr))
    {m_pD3D9->Release();
        return false;
    }
    hr = m_pD3DD9->Reset(&m_D3DPP);
    if (FAILED(hr))
        return false;
    hr = m_pD3DD9->Clear(0, NULL, D3DCLEAR_TARGET, D3DCOLOR_XRGB(0, 0, 0), 1.0f, 0);
    if (FAILED(hr))
    {m_pD3DD9->Release();
        m_pD3D9->Release();
        return false; 
    }

    UINT resetToken = 0;

    hr = DXVA2CreateDirect3DDeviceManager9(&resetToken, &m_pDeviceManager9);
    if (FAILED(hr))
    {m_pD3DD9->Release();
        m_pD3D9->Release();
        return false; 
    }
    m_pDeviceManager9->Release();
    m_pD3DD9->Release();
    m_pD3D9->Release();
    return true;
}

而后,初始化解码器,代码如下:

int    Init(HWND hWnd, bool bDxv2Show, bool bShowToScale, int mode)
{
    // 检测是否反对硬件解码
    bool bUseIntelSdk = isSupportDxva2(hWnd);
    if (!bUseIntelSdk)
    {return -1;}
    m_bDxv2Show = bDxv2Show;
    m_bShowToScale = bShowToScale;
    m_hWnd = hWnd;
    ::GetClientRect(m_hWnd,&m_WndRect);
    
    m_pD3dRender = new CDecodeD3DRender();    
    m_pD3dRender->Init(hWnd);    

    mfxStatus sts = MFX_ERR_NONE;
//    mfxVersion version = {MFX_VERSION_MINOR, MFX_VERSION_MAJOR};
    mfxVersion version = {0, 1};

    if (mode == 1)
    {if (MFX_ERR_NONE != m_mfxSession.Init(MFX_IMPL_HARDWARE_ANY, &version))
            sts = m_mfxSession.Init(MFX_IMPL_HARDWARE, &version);
        if(sts != MFX_ERR_NONE)
            sts = m_mfxSession.Init(MFX_IMPL_SOFTWARE, &version);
    }
    else
    {sts = m_mfxSession.Init(MFX_IMPL_SOFTWARE, &version);
    }

    MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);

    // create decoder
    m_pmfxDEC = new MFXVideoDECODE(m_mfxSession);
    MSDK_CHECK_POINTER(m_pmfxDEC, MFX_ERR_MEMORY_ALLOC);

    // set video type in parameters
    m_mfxVideoParams.mfx.CodecId = MFX_CODEC_AVC;
    m_mfxVideoParams.AsyncDepth = 1;
    //  [12/6/2016 dingshuai]
    m_mfxVideoParams.mfx.FrameInfo.FourCC = MFX_FOURCC_NV12;//MFX_FOURCC_YV12;


    // set memory type
    m_bd3dAlloc = true;

    memset(&m_mfxBS, 0, sizeof(m_mfxBS));
    
    sts = InitMfxBitstream(&m_mfxBS, 1024*1024*2);    
    return sts;
}
  • 第二步,调用解码函数
    解码器是异步工作的,所以在解码函数调用后,咱们须要从缓冲区内把数据取出进行 D3D 显示或者回调到下层显示,代码如下所示:
mfxStatus CIntelMediaDecode::h264DecPacket(unsigned char * p_data, int len, unsigned char* p_yuvData)
{
    mfxSyncPoint    syncp;
    mfxStatus        sts = MFX_ERR_NONE;
    mfxU16             nIndex = 0; // index of free surface   

    //while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_DATA == sts || MFX_ERR_MORE_SURFACE == sts)
    {if (MFX_WRN_DEVICE_BUSY == sts)
        {MSDK_SLEEP(1); // just wait and then repeat the same call to DecodeFrameAsync

        }

        if (len > (m_mfxBS.MaxLength - m_mfxBS.DataLength))
        {//    sts = ExtendMfxBitstream(&m_mfxBS, m_mfxBS.MaxLength * 2); 
            //    MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
            m_mfxBS.DataOffset = 0;
            m_mfxBS.DataLength = 0;
        }

        if (len > 0)
        {memmove(m_mfxBS.Data, m_mfxBS.Data+m_mfxBS.DataOffset, m_mfxBS.DataLength);
            m_mfxBS.DataOffset = 0;

            memcpy(m_mfxBS.Data+m_mfxBS.DataLength, p_data, len);
            m_mfxBS.DataLength += len;
        }

        if (m_mfxBS.DataLength < 5)
        {
            m_sPrevState = MFX_ERR_MORE_DATA;
            return MFX_ERR_MORE_DATA;
        }

        if (MFX_ERR_MORE_SURFACE == m_sPrevState || MFX_ERR_NONE == m_sPrevState)
        {nIndex = GetFreeSurfaceIndex(m_pmfxSurfaces, m_mfxResponse.NumFrameActual); // find new working surface 
            if (MSDK_INVALID_SURF_IDX == nIndex)
            {return MFX_ERR_MEMORY_ALLOC;}
        }

        sts = m_pmfxDEC->DecodeFrameAsync(&m_mfxBS, &(m_pmfxSurfaces[nIndex]), &m_pmfxOutSurface, &syncp);
        if (MFX_ERR_DEVICE_LOST == sts || MFX_ERR_DEVICE_FAILED == sts)
            return sts;
        m_sPrevState = sts;

        // ignore warnings if output is available, 
        // if no output and no action required just repeat the same call
        if (MFX_ERR_NONE < sts && syncp)
        {sts = MFX_ERR_NONE;}

        if (MFX_ERR_NONE == sts)
        {sts = m_mfxSession.SyncOperation(syncp, MSDK_DEC_WAIT_INTERVAL);
        }

        if (MFX_ERR_NONE == sts)
        {
#if 0
            if (m_nSnapShotRequest == 1)
            {picture->linesize[0] = m_pmfxOutSurface->Data.Pitch;
                picture->linesize[1] = m_pmfxOutSurface->Data.Pitch;
                picture->linesize[2] = 0;

                picture->data[0] = m_pmfxOutSurface->Data.Y;
                picture->data[1] = m_pmfxOutSurface->Data.UV;
                picture->data[2] = 0;

                int ret = SetSnapFrameData(picture);
                if (ret > 0)
                {m_nSnapShotDataReady = 1;}
                m_nSnapShotRequest = 0;
            }
#endif

#if 1
            if (m_bExternalAlloc) 
            {sts = m_pMFXAllocator->Lock(m_pMFXAllocator->pthis, m_pmfxOutSurface->Data.MemId, &(m_pmfxOutSurface->Data));
                MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
            }
#endif

            ++m_nFrameIndex;

            int nYUVSize = m_nSrcWidth*m_nSrcHeight;
            //YUV 数据拷贝
            if (p_yuvData)//YUV420
            {memcpy(p_yuvData, m_pmfxOutSurface->Data.Y, nYUVSize);
                memcpy(p_yuvData+m_nSrcWidth*m_nSrcHeight, m_pmfxOutSurface->Data.UV, m_nSrcWidth*m_nSrcHeight>>1);

                // 硬件编码进去是 NV12,内部为了显示不便应该转成 I420 [12/6/2016 dingshuai]
#if 0
                int i = 0;  
                int yuvDely = nYUVSize*5/4;
                int nHelfYUVSize = nYUVSize/2;
                // Write Cb  
                for(int idx = 0; idx <  (nHelfYUVSize); idx+=2)  
                {*(p_yuvData+nYUVSize + i) = m_pmfxOutSurface->Data.UV[idx];  
                    *(p_yuvData+yuvDely +i) = m_pmfxOutSurface->Data.UV[idx+1];  
                    i++;  
                }  
#endif

            }

#if 1
            if (m_bExternalAlloc) 
            {sts = m_pMFXAllocator->Unlock(m_pMFXAllocator->pthis, m_pmfxOutSurface->Data.MemId, &(m_pmfxOutSurface->Data));
                MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
            }
#endif
            if (m_bDxv2Show)
            {::GetClientRect(m_hWnd, &m_WndRect);
                sts = m_pD3dRender->RenderFrame(m_pmfxOutSurface, m_pMFXAllocator,
                    m_WndRect, m_DisplayTitle, m_bEndedDrag, m_bShowToScale, &m_TargetRect);
                if (sts == MFX_ERR_NULL_PTR)
                    sts = MFX_ERR_NONE;
            }
        }
    }
    return sts;
}
  • 第三步,敞开解码器
void Close()
{WipeMfxBitstream(&m_mfxBS);
    MSDK_SAFE_DELETE(m_pmfxDEC);  
    MSDK_SAFE_DELETE(m_pD3dRender);

    if (m_bIntelSystemInitFinish != true)
        return;

    m_bIntelSystemInitFinish = false;

    DeleteFrames();

    // allocator if used as external for MediaSDK must be deleted after decoder
    DeleteAllocator();

    m_mfxSession.Close();
    
    if (NULL != m_pSnapShotBuf)
    {free(m_pSnapShotBuf);
        m_pSnapShotBuf = NULL;
    }
}

至此,咱们曾经实现了对 Intel 集显解码器的封装,通过浏览 [SkeyeRTSPPlayer] 的代码能够对该库的调用流程有一个零碎的意识;此外,目前集成的 Intel 集显解码库是比拟旧的版本,目前尚不反对 H265 解码,有趣味的敌人能够下载最新的 Intel 集显编解码库 Demo 自行封装解码库,置信当初的版本会更加的易用和高效。

有任何技术问题,欢送大家和我技术交换:
295222688@qq.com

大家也能够退出 SkeyePlayer 流媒体播放器 QQ 群进行探讨:
102644504

正文完
 0