在之前的《SkeyeRTSPLive高效转码之SkeyeVideoDecoder高效解码》系列文章中咱们曾经将视频解码成了原始图像数据(YUV/RGB),而后依据不同的转码需要进行编码。如视频分辨率缩放,调整码率,多码率输入等;为了解决转码过程中编码高分辨率高质量或者高压缩率(如H265)耗时的问题,咱们采纳Nvidia硬件驱动编码器进行编码,以谋求最高效率的转码和最低的推送提早。
SkeyeVideoEncoder基Nvidia独立显卡的硬件编码库SkeyeNvEncoder
1. 接口申明如下:
class SkeyeNvEncoder{public: //codec: 编码格局 0=h264, 1=h265/hevc int InitNvEncoder(int width,int height,int fps=25, int bitrate=4096, int gop=50, int qp=28, int rcMode=/*NV_ENC_PARAMS_RC_2_PASS_QUALITY*/NV_ENC_PARAMS_RC_CONSTQP, char* encoderPreset = "Default", int codec = 0,int nDeviceType=0, int nDeviceID=0 ); //H264获取SPS和PPS int GetSPSAndPPS(unsigned char*sps,long&spslen,unsigned char*pps,long&ppslen); //H265获取VPS,SPS和PPS int GetH265VPSSPSAndPPS(unsigned char*vps, long&vpslen, unsigned char*sps, long&spslen, unsigned char*pps, long&ppslen); // 编码InputFormat咱们固定为YUV420PL(I420),可批改为NV12, YUY2 等等在Init()时进行格局转换, [12/18/2016 dingshuai] unsigned char* NvEncodeSync(unsigned char* pYUV420, int inLenth, int& outLenth, bool& bKeyFrame); //敞开编码器,进行编码 int CloseNvEncoder();};
2. SkeyeNvEncoder编码库调用流程
- 第一步,初始化编码器及其参数
//初始化编码器参数int InitNvEncoder(int width,int height,int fps, int bitrate, int gop, int qp, int rcMode, char* encoderPreset , int codec, int nDeviceType, int nDeviceID){ //初始化设置参数 -- Start memset(&m_encodeConfig, 0, sizeof(EncodeConfig)); m_encodeConfig.width = width; m_encodeConfig.height = height; m_nVArea = width*height; m_nCheckyuvsize = m_nVArea*3/2; //编码器辨认的码率是bps, 然而咱们输出的是kbps, so*1024 m_encodeConfig.bitrate = bitrate*1024; //多通道编码优化图像品质只有在低提早模式下工作(LOW_LATENCY) m_encodeConfig.rcMode = rcMode;//NV_ENC_PARAMS_RC_2_PASS_QUALITY m_encodeConfig.encoderPreset = encoderPreset; //NV_ENC_PARAMS_RC_2_PASS_QUALITY; //默认指定低延时模式以及图像的压缩格局(HQ,HP,LOSSLESS ......) m_encodeConfig.presetGUID = NV_ENC_PRESET_LOW_LATENCY_HQ_GUID; // I帧距离 [12/16/2016 dingshuai] m_encodeConfig.gopLength = gop;//NVENC_INFINITE_GOPLENGTH; //CUDA m_encodeConfig.deviceType = nDeviceType; m_encodeConfig.deviceID = nDeviceID; m_encodeConfig.codec = codec;//NV_ENC_H264; m_encodeConfig.fps = fps; m_encodeConfig.qp = qp; m_encodeConfig.i_quant_factor = DEFAULT_I_QFACTOR; m_encodeConfig.b_quant_factor = DEFAULT_B_QFACTOR; m_encodeConfig.i_quant_offset = DEFAULT_I_QOFFSET; m_encodeConfig.b_quant_offset = DEFAULT_B_QOFFSET; m_encodeConfig.pictureStruct = NV_ENC_PIC_STRUCT_FRAME; //编码异步输入模式, 1-异步 0-同步 m_encodeConfig.enableAsyncMode = 0; //默认输出给编码器的格局为NV12(所以须要格局转换:YUV420->NV12) m_encodeConfig.inputFormat = NV_ENC_BUFFER_FORMAT_NV12; //暂不晓得这些参数什么用 m_encodeConfig.invalidateRefFramesEnableFlag = 0; m_encodeConfig.endFrameIdx = INT_MAX; //没有B帧,且目前编码器也不反对B帧,设了也没用 m_encodeConfig.numB = 0; if (m_encodeConfig.numB > 0) { //PRINTERR("B-frames are not supported\n"); return -1; } // 其余参数,欢送补充...... [12/18/2016 dingshuai] // // //初始化设置参数 -- END //初始化编码器 -- Start NVENCSTATUS nvStatus = NV_ENC_SUCCESS; switch (m_encodeConfig.deviceType) {#if defined(NV_WINDOWS) case NV_ENC_DX9: nvStatus = InitD3D9(m_encodeConfig.deviceID); break; case NV_ENC_DX10: nvStatus = InitD3D10(m_encodeConfig.deviceID); break; case NV_ENC_DX11: nvStatus = InitD3D11(m_encodeConfig.deviceID); break;#endif // initialize Cuda case NV_ENC_CUDA: InitCuda(m_encodeConfig.deviceID,0); break; } if (nvStatus != NV_ENC_SUCCESS) return -1; if (m_encodeConfig.deviceType != NV_ENC_CUDA) nvStatus = m_pNvHWEncoder->Initialize(m_pDevice, NV_ENC_DEVICE_TYPE_DIRECTX); else nvStatus = m_pNvHWEncoder->Initialize(m_pDevice, NV_ENC_DEVICE_TYPE_CUDA); if (nvStatus != NV_ENC_SUCCESS) return 1; //nvStatus = InitCuda(m_encodeConfig.deviceID, 0); //nvStatus = m_pNvHWEncoder->Initialize((void*)m_cuContext, NV_ENC_DEVICE_TYPE_CUDA); //if (nvStatus != NV_ENC_SUCCESS) // return -2; m_encodeConfig.presetGUID = m_pNvHWEncoder->GetPresetGUID(m_encodeConfig.encoderPreset, m_encodeConfig.codec); nvStatus = m_pNvHWEncoder->CreateEncoder(&m_encodeConfig); if (nvStatus != NV_ENC_SUCCESS) { Deinitialize(); return -3; } // 编码缓存帧数 [12/16/2016 dingshuai] uint32_t uEncodeBufferCount = 1; //调配编码缓冲区 nvStatus = AllocateIOBuffers(m_pNvHWEncoder->m_uMaxWidth, m_pNvHWEncoder->m_uMaxHeight, uEncodeBufferCount); if (nvStatus != NV_ENC_SUCCESS) return -4; m_spslen = 0; m_ppslen = 0; memset(m_sps, 0x00, 100); memset(m_pps, 0x00, 100); m_bWorking = true; return 1;}
其中,咱们须要设置编码格局(0=H264,1=H265目前只反对这两种格局),视频分辨率,帧率,码率和I帧距离(Gop),编码品质以及硬件编码器相干参数,参数详解如下:
//rcMode: Rate Control Modes(编码码率/品质管制模式),详见如下枚举: // typedef enum _NV_ENC_PARAMS_RC_MODE // { // NV_ENC_PARAMS_RC_CONSTQP = 0x0, /**< Constant QP mode */ // NV_ENC_PARAMS_RC_VBR = 0x1, /**< Variable bitrate mode */ // NV_ENC_PARAMS_RC_CBR = 0x2, /**< Constant bitrate mode */ // NV_ENC_PARAMS_RC_VBR_MINQP = 0x4, /**< Variable bitrate mode with MinQP */ // NV_ENC_PARAMS_RC_2_PASS_QUALITY = 0x8, /**< Multi pass encoding optimized for image quality and works only with low latency mode */ // NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP = 0x10, /**< Multi pass encoding optimized for maintaining frame size and works only with low latency mode */ // } //encoderPreset: 编码预设 // 预设编码器编码图像的延时和清晰度 // if (encoderPreset && (stricmp(encoderPreset, "HQ") == 0)) // else if (encoderPreset && (stricmp(encoderPreset, "LowLatencyHP") == 0)) // else if (encoderPreset && (stricmp(encoderPreset, "HP") == 0)) // else if (encoderPreset && (stricmp(encoderPreset, "LowLatencyHQ") == 0)) // else if (encoderPreset && (stricmp(encoderPreset, "BD") == 0)) // else if (encoderPreset && (stricmp(encoderPreset, "LOSSLESS") == 0)) // else if (encoderPreset && (stricmp(encoderPreset, "LowLatencyDefault") == 0)) // else if (encoderPreset && (stricmp(encoderPreset, "LosslessDefault") == 0)) // 详见nvEncoderAPI.h /* Preset GUIDS supported by the NvEncodeAPI interface. */
- 第二步,获取编码信息参数
如果编码格局为H264,咱们通过GetSPSAndPPS获取编码信息头SPS和PPS,如下代码段所示:
//获取SPS和PPS int GetSPSAndPPS(unsigned char*sps,long&spslen,unsigned char*pps,long&ppslen){ if (!m_bWorking) { return -1; } if (m_spslen == 0 || m_ppslen == 0) { unsigned char* pEncData = NULL; int nDataSize = 0; bool bKeyFrame = false; unsigned char* pTempBuffer = new unsigned char[m_nCheckyuvsize]; memset(pTempBuffer, 0x00, m_nCheckyuvsize); pEncData = NvEncodeSync(pTempBuffer, m_nCheckyuvsize, nDataSize, bKeyFrame); if (pEncData && nDataSize>0) { GetH264SPSandPPS((char*)pEncData, nDataSize, (char*)m_sps, (int*)&m_spslen, (char*)m_pps, (int*)&m_ppslen); } m_encPicCommand.bForceIDR = 1; if (pTempBuffer) { delete[] pTempBuffer; pTempBuffer = NULL; } } if (m_spslen>0&&m_ppslen>0) { memcpy(sps, m_sps, m_spslen); memcpy(pps, m_pps, m_ppslen); spslen = m_spslen; ppslen = m_ppslen; } return 1;}
如果编码格局为H265,咱们通过GetH265VPSSPSAndPPS获取编码信息头VPS,SPS和PPS,如下代码段所示:
int GetH265VPSSPSAndPPS(unsigned char*vps, long&vpslen, unsigned char*sps, long&spslen, unsigned char*pps, long&ppslen){ if (!m_bWorking) { return -1; } if (m_spslen == 0 || m_ppslen == 0) { unsigned char* pEncData = NULL; int nDataSize = 0; bool bKeyFrame = false; unsigned char* pTempBuffer = new unsigned char[m_nCheckyuvsize]; memset(pTempBuffer, 0x00, m_nCheckyuvsize); pEncData = NvEncodeSync(pTempBuffer, m_nCheckyuvsize, nDataSize, bKeyFrame); if (pEncData && nDataSize>0) { GetH265VPSandSPSandPPS((char*)pEncData, nDataSize, (char*)m_vps, (int*)&m_vpslen, (char*)m_sps, (int*)&m_spslen, (char*)m_pps, (int*)&m_ppslen); } m_encPicCommand.bForceIDR = 1; if (pTempBuffer) { delete[] pTempBuffer; pTempBuffer = NULL; } } spslen = m_spslen; ppslen = m_ppslen; vpslen = m_vpslen; if (m_spslen > 0) memcpy(sps, m_sps, m_spslen); if(m_ppslen>0) memcpy(pps, m_pps, m_ppslen); if(m_vpslen) memcpy(vps, m_vps, m_vpslen); return 1;}
第三步,调用编码函数进行视频帧编码
编码输出格局InputFormat咱们固定为YUV420PL(I420),如源图像色调格局为NV12, YUY2 等,须要在传入编码器时进行格局转换。unsigned char* NvEncodeSync(unsigned char* pYUV420, int inLenth, int& outLenth, bool& bKeyFrame){ if( !m_bWorking || inLenth !=m_nCheckyuvsize)//初始化尚未实现,或者传入的数据不满足YUV数据的长度,则返回谬误 { outLenth = 0; return NULL; } NVENCSTATUS nvStatus = NV_ENC_SUCCESS; bool bError = false; EncodeBuffer* pEncodeBuffer = m_EncodeBufferQueue.GetAvailable(); EncodeFrameConfig stEncodeFrame; memset(&stEncodeFrame, 0, sizeof(stEncodeFrame)); stEncodeFrame.yuv[0] = pYUV420;//Y stEncodeFrame.yuv[1] = pYUV420+m_nVArea;//U stEncodeFrame.yuv[2] = pYUV420+m_nVArea+(m_nVArea>>2);//V int nHelfWidth = m_encodeConfig.width >> 1; stEncodeFrame.stride[0] = m_encodeConfig.width; stEncodeFrame.stride[1] = nHelfWidth; stEncodeFrame.stride[2] = nHelfWidth; stEncodeFrame.width = m_encodeConfig.width; stEncodeFrame.height = m_encodeConfig.height; if (m_encodeConfig.deviceType == 0)//CUDA { //CUDA Lock CCudaAutoLock cuLock((CUcontext)m_pDevice);//m_cuContext nvStatus = PreProcessInput(pEncodeBuffer, stEncodeFrame.yuv, stEncodeFrame.width, stEncodeFrame.height, m_pNvHWEncoder->m_uCurWidth, m_pNvHWEncoder->m_uCurHeight, m_pNvHWEncoder->m_uMaxWidth, m_pNvHWEncoder->m_uMaxHeight); if (nvStatus != NV_ENC_SUCCESS) { outLenth = 0; return NULL; } nvStatus = m_pNvHWEncoder->NvEncMapInputResource(pEncodeBuffer->stInputBfr.nvRegisteredResource, &pEncodeBuffer->stInputBfr.hInputSurface); if (nvStatus != NV_ENC_SUCCESS) { PRINTERR("Failed to Map input buffer %p\n", pEncodeBuffer->stInputBfr.hInputSurface); bError = true; outLenth = 0; return NULL; } } else//DirectX or any others { unsigned char *pInputSurface = NULL; uint32_t lockedPitch = 0; while (pInputSurface == NULL) { nvStatus = m_pNvHWEncoder->NvEncLockInputBuffer(pEncodeBuffer->stInputBfr.hInputSurface, (void**)&pInputSurface, &lockedPitch); if (nvStatus != NV_ENC_SUCCESS) return NULL; if (pInputSurface == NULL) { nvStatus = m_pNvHWEncoder->NvEncUnlockInputBuffer(pEncodeBuffer->stInputBfr.hInputSurface); if (nvStatus != NV_ENC_SUCCESS) return NULL; Sleep(1); } } if (pEncodeBuffer->stInputBfr.bufferFmt == NV_ENC_BUFFER_FORMAT_NV12_PL) { unsigned char *pInputSurfaceCh = pInputSurface + (pEncodeBuffer->stInputBfr.dwHeight*lockedPitch); CmnConvertYUVtoNV12(stEncodeFrame.yuv[0], stEncodeFrame.yuv[1], stEncodeFrame.yuv[2], pInputSurface, pInputSurfaceCh, stEncodeFrame.width, stEncodeFrame.height, stEncodeFrame.width, lockedPitch); } } nvStatus = m_pNvHWEncoder->NvEncEncodeFrame(pEncodeBuffer, &m_encPicCommand, m_encodeConfig.width, m_encodeConfig.height, NV_ENC_PIC_STRUCT_FRAME, m_qpDeltaMapArray, m_qpDeltaMapArraySize); if (nvStatus != NV_ENC_SUCCESS) { bError = true; outLenth= 0; return NULL; } pEncodeBuffer = m_EncodeBufferQueue.GetAvailable(); if (!pEncodeBuffer) { pEncodeBuffer = m_EncodeBufferQueue.GetPending(); // 获取编码的h264/h265数据 [12/15/2016 dingshuai] nvStatus = m_pNvHWEncoder->ProcessOutput(pEncodeBuffer, m_pOutputBuffer, m_nOutputBufLen); if(nvStatus != NV_ENC_SUCCESS) { bError = true; outLenth= 0; } if (m_encodeConfig.deviceType == 0)//CUDA { // UnMap the input buffer after frame done if (pEncodeBuffer->stInputBfr.hInputSurface) { nvStatus = m_pNvHWEncoder->NvEncUnmapInputResource(pEncodeBuffer->stInputBfr.hInputSurface); pEncodeBuffer->stInputBfr.hInputSurface = NULL; } //pEncodeBuffer = m_EncodeBufferQueue.GetAvailable(); } else { nvStatus = m_pNvHWEncoder->NvEncUnlockInputBuffer(pEncodeBuffer->stInputBfr.hInputSurface); if (nvStatus != NV_ENC_SUCCESS) return NULL; } } else { outLenth= 0; return NULL; } if (m_encPicCommand.bForceIDR) { m_encPicCommand.bForceIDR = 0; } outLenth = m_nOutputBufLen; return m_pOutputBuffer;}
- 第四步,敞开编码器,开释编码器申请的内存和显卡资源
int CloseNvEncoder(){ m_bWorking = false; NVENCSTATUS nvStatus = NV_ENC_SUCCESS; ReleaseIOBuffers(); m_pNvHWEncoder->NvEncDestroyEncoder(); if (m_cuContext) { __cu(cuCtxDestroy(m_cuContext)); } return nvStatus;}
有任何技术问题,欢送大家退出SkeyePlayer流媒体播放器 QQ群和我技术交换,进行探讨:102644504