关于ffmpeg:音视频系列四ffmpeg之获取音视频帧数据

title: 音视频系列四：ffmpeg之获取音视频帧数据

categories:[ffmpeg]

tags:[音视频编程]

date: 2021/11/29

<div align = 'right'>作者：hackett</div>

<div align = 'right'>微信公众号：加班猿</div>

一、AVFrame解码视频

1.先贴一个ffmpeg解析flv文件20帧数据后的截图，AVFrame是蕴含码流参数较多的构造体，构造体源码位于libavcodec/avcodec.h中

残缺代码：

#include <stdio.h>#ifdef __cplusplusextern "C" {#endif#include <libavcodec/avcodec.h>#include <libavformat/avformat.h>#ifdef __cplusplus};#endifint openCodecContext(const AVFormatContext* pFormatCtx, int* pStreamIndex, enum AVMediaType type, AVCodecContext** ppCodecCtx) {    int streamIdx = -1;    // 获取流下标    for (int i = 0; i < pFormatCtx->nb_streams; i++) {        if (pFormatCtx->streams[i]->codec->codec_type == type) {            streamIdx = i;            break;        }    }    if (streamIdx == -1) {        printf("find video stream failed!\n");        exit(-1);    }    // 寻找解码器    AVCodecContext* pCodecCtx = pFormatCtx->streams[streamIdx]->codec;    AVCodec* pCodec = avcodec_find_decoder(pCodecCtx->codec_id);    if (NULL == pCodec) {        printf("avcode find decoder failed!\n");        exit(-1);    }    //关上解码器    if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0) {        printf("avcode open failed!\n");        exit(-1);    }    *ppCodecCtx = pCodecCtx;    *pStreamIndex = streamIdx;    return 0;}int main(void){    AVFormatContext* pInFormatCtx = NULL;    AVCodecContext* pVideoCodecCtx = NULL;    AVCodecContext* pAudioCodecCtx = NULL;    AVPacket* pPacket = NULL;    AVFrame* pFrame = NULL;    int ret;    /* 反对本地文件和网络url */    const char streamUrl[] = "./ouput_1min.flv";    /* 1. 注册 */    av_register_all();    pInFormatCtx = avformat_alloc_context();    /* 2. 关上流 */    if (avformat_open_input(&pInFormatCtx, streamUrl, NULL, NULL) != 0) {        printf("Couldn't open input stream.\n");        return -1;    }    /* 3. 获取流的信息 */    if (avformat_find_stream_info(pInFormatCtx, NULL) < 0) {        printf("Couldn't find stream information.\n");        return -1;    }    int videoStreamIdx = -1;    int audioStreamIdx = -1;    /* 4. 寻找并关上解码器 */    openCodecContext(pInFormatCtx, &videoStreamIdx, AVMEDIA_TYPE_VIDEO, &pVideoCodecCtx);    openCodecContext(pInFormatCtx, &audioStreamIdx, AVMEDIA_TYPE_AUDIO, &pAudioCodecCtx);    pPacket = av_packet_alloc();    pFrame = av_frame_alloc();    int cnt = 20; // 读取20帧数据（音频和视频）    while (cnt--) {        /* 5. 读流数据, 未解码的数据寄存于pPacket */        ret = av_read_frame(pInFormatCtx, pPacket);        if (ret < 0) {            printf("av_read_frame error\n");            break;        }        /* 6. 解码, 解码后的数据寄存于pFrame */        /* 视频解码 */        if (pPacket->stream_index == videoStreamIdx) {            avcodec_decode_video2(pVideoCodecCtx, pFrame, &ret, pPacket);            if (ret == 0) {                printf("video decodec error!\n");                continue;            }            printf("* * * * * * video * * * * * * * * *\n");            printf("___height: [%d]\n", pFrame->height);            printf("____width: [%d]\n", pFrame->width);            printf("pict_type: [%d]\n", pFrame->pict_type);            printf("key_frame: [%d]\n", pFrame->key_frame); // 视频关键帧  1 -> 是 0 -> 否            printf("___format: [%d]\n", pFrame->format);            printf("* * * * * * * * * * * * * * * * * * *\n\n");        }        /* 音频解码 */        if (pPacket->stream_index == audioStreamIdx) {            avcodec_decode_audio4(pAudioCodecCtx, pFrame, &ret, pPacket);            if (ret < 0) {                printf("audio decodec error!\n");                continue;            }            printf("* * * * * * audio * * * * * * * * * *\n");            printf("____nb_samples: [%d]\n", pFrame->nb_samples);            printf("__samples_rate: [%d]\n", pFrame->sample_rate);            printf("channel_layout: [%lu]\n", pFrame->channel_layout);            printf("________format: [%d]\n", pFrame->format);            printf("* * * * * * * * * * * * * * * * * * *\n\n");        }        av_packet_unref(pPacket); /* 将缓存空间的援用计数-1，并将Packet中的其余字段设为初始值。如果援用计数为0，主动的开释缓存空间 */    }    /* 开释资源 */    av_frame_free(&pFrame);    av_packet_free(&pPacket);    avcodec_close(pVideoCodecCtx);    avcodec_close(pAudioCodecCtx);    avformat_close_input(&pInFormatCtx);    return 0;}

2.简略介绍一下流程中的各个函数的意义：

av_register_all()：注册FFmpeg所有编解码器。

avformat_open_input()：关上流的AVFormatContext。

avformat_find_stream_info()：获取流的信息。

avcodec_find_encoder()：查找编码器。

avcodec_open2()：关上编码器。

av_read_frame()：读流数据。

avcodec_decode_video2()：视频解码。

av_write_frame()：将编码后的视频码流写入文件。

av_packet_unref()：将缓存空间的援用计数-1，并将Packet中的其余字段设为初始值。如果援用计数为0，主动的开释缓存空间。

二、AVFrame 数据结构

AVFrame构造体个别用于存储原始数据（即非压缩数据，例如对视频来说是YUV，RGB，对音频来说是PCM），此外还蕴含了一些相干的信息。

这里源码的正文太简短所以省略了。

typedef struct AVFrame {#define AV_NUM_DATA_POINTERS 8    uint8_t *data[AV_NUM_DATA_POINTERS];    int linesize[AV_NUM_DATA_POINTERS];    uint8_t **extended_data;    int width, height;    int nb_samples;    int format;    int key_frame;    enum AVPictureType pict_type;    AVRational sample_aspect_ratio;    int64_t pts;#if FF_API_PKT_PTS    attribute_deprecated    int64_t pkt_pts;#endif    int64_t pkt_dts;    int coded_picture_number;    int display_picture_number;    int quality;    void *opaque;#if FF_API_ERROR_FRAME    attribute_deprecated    uint64_t error[AV_NUM_DATA_POINTERS];#endif    int repeat_pict;    int interlaced_frame;    int top_field_first;    int palette_has_changed;    int64_t reordered_opaque;    int sample_rate;    uint64_t channel_layout;    AVBufferRef *buf[AV_NUM_DATA_POINTERS];    AVBufferRef **extended_buf;    int        nb_extended_buf;    AVFrameSideData **side_data;    int            nb_side_data;#define AV_FRAME_FLAG_CORRUPT       (1 << 0)#define AV_FRAME_FLAG_DISCARD   (1 << 2)    int flags;    enum AVColorRange color_range;    enum AVColorPrimaries color_primaries;    enum AVColorTransferCharacteristic color_trc;    enum AVColorSpace colorspace;    enum AVChromaLocation chroma_location;    int64_t best_effort_timestamp;    int64_t pkt_pos;    int64_t pkt_duration;    AVDictionary *metadata;    int decode_error_flags;#define FF_DECODE_ERROR_INVALID_BITSTREAM   1#define FF_DECODE_ERROR_MISSING_REFERENCE   2#define FF_DECODE_ERROR_CONCEALMENT_ACTIVE  4#define FF_DECODE_ERROR_DECODE_SLICES       8    int channels;    int pkt_size;#if FF_API_FRAME_QP    attribute_deprecated    int8_t *qscale_table;    attribute_deprecated    int qstride;    attribute_deprecated    int qscale_type;    attribute_deprecated    AVBufferRef *qp_table_buf;#endif    AVBufferRef *hw_frames_ctx;    AVBufferRef *opaque_ref;    size_t crop_top;    size_t crop_bottom;    size_t crop_left;    size_t crop_right;    AVBufferRef *private_ref;    } AVFrame;

接下来集中看罕用的一些构造成员：

2.1 data

 uint8_t *data[AV_NUM_DATA_POINTERS]; // 解码后原始数据（对视频来说是YUV，RGB，对音频来说是PCM）

data 是一个指针数组，数组的每一个元素是一个指针，指向视频中图像的某一 plane 或音频中某一声道的 plane。

2.2 linesize

int linesize[AV_NUM_DATA_POINTERS]; // data中“一行”数据的大小。留神：未必等于图像的宽，个别大于图像的宽

对于视频来说，linesize 每个元素是一个图像 plane 中一行图像的大小(字节数)。留神有对齐要求

对于音频来说，linesize 每个元素是一个音频 plane 的大小(字节数)

linesize 可能会因性能上的思考而填充一些额定的数据，因而 linesize 可能比理论对应的音视频数据尺寸要大。

2.3 width, height;

int width, height; // 视频帧宽和高（1920x1080,1280x720...）

2.4 nb_samples

int nb_samples; // 音频帧中单个声道中蕴含的采样点数。

2.5 format

int format; // 解码后原始数据类型

对于视频帧，此值对应于enum AVPixelFormat

enum AVPixelFormat {      AV_PIX_FMT_NONE = -1,      AV_PIX_FMT_YUV420P,   ///< planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)      AV_PIX_FMT_YUYV422,   ///< packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr      AV_PIX_FMT_RGB24,     ///< packed RGB 8:8:8, 24bpp, RGBRGB...      AV_PIX_FMT_BGR24,     ///< packed RGB 8:8:8, 24bpp, BGRBGR...      AV_PIX_FMT_YUV422P,   ///< planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)      AV_PIX_FMT_YUV444P,   ///< planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)      AV_PIX_FMT_YUV410P,   ///< planar YUV 4:1:0,  9bpp, (1 Cr & Cb sample per 4x4 Y samples)      AV_PIX_FMT_YUV411P,   ///< planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)      AV_PIX_FMT_GRAY8,     ///<        Y        ,  8bpp      AV_PIX_FMT_MONOWHITE, ///<        Y        ,  1bpp, 0 is white, 1 is black, in each byte pixels are ordered from the msb to the lsb      AV_PIX_FMT_MONOBLACK, ///<        Y        ,  1bpp, 0 is black, 1 is white, in each byte pixels are ordered from the msb to the lsb      AV_PIX_FMT_PAL8,      ///< 8 bit with PIX_FMT_RGB32 palette      AV_PIX_FMT_YUVJ420P,  ///< planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of PIX_FMT_YUV420P and setting color_range    ...（省略）  }

对于音频帧，此值对应于enum AVSampleFormat

enum AVSampleFormat {      AV_SAMPLE_FMT_NONE = -1,      AV_SAMPLE_FMT_U8,          ///< unsigned 8 bits      AV_SAMPLE_FMT_S16,         ///< signed 16 bits      AV_SAMPLE_FMT_S32,         ///< signed 32 bits      AV_SAMPLE_FMT_FLT,         ///< float      AV_SAMPLE_FMT_DBL,         ///< double      AV_SAMPLE_FMT_U8P,         ///< unsigned 8 bits, planar      AV_SAMPLE_FMT_S16P,        ///< signed 16 bits, planar      AV_SAMPLE_FMT_S32P,        ///< signed 32 bits, planar      AV_SAMPLE_FMT_FLTP,        ///< float, planar      AV_SAMPLE_FMT_DBLP,        ///< double, planar     AV_SAMPLE_FMT_NB           ///< Number of sample formats. DO NOT USE if linking dynamically  };

2.6 key_frame

int key_frame; // 是否是关键帧

2.7 pict_type

enum AVPictureType pict_type; // 帧类型（I,B,P...）

视频帧类型(I、B、P 等)

enum AVPictureType {    AV_PICTURE_TYPE_NONE = 0, ///< Undefined    AV_PICTURE_TYPE_I,     ///< Intra    AV_PICTURE_TYPE_P,     ///< Predicted    AV_PICTURE_TYPE_B,     ///< Bi-dir predicted    AV_PICTURE_TYPE_S,     ///< S(GMC)-VOP MPEG-4    AV_PICTURE_TYPE_SI,    ///< Switching Intra    AV_PICTURE_TYPE_SP,    ///< Switching Predicted    AV_PICTURE_TYPE_BI,    ///< BI type};

2.8 sample_aspect_ratio

AVRational sample_aspect_ratio; // 视频宽高比（16:9，4:3...）

2.9 pts

int64_t pts; // 显示工夫戳 单位是 time_base

2.10 pkt_pts

int64_t pkt_pts;

此 frame 对应的 packet 中的解码工夫戳。是从对应 packet(解码生成此 frame)中拷贝 DTS 失去此值。
如果对应的 packet 中只有 dts 而未设置 pts，则此值也是此 frame 的 pts。

2.11 coded_picture_number

int coded_picture_number; // 编码帧序号

2.12 display_picture_number

int display_picture_number; // 显示帧序号

2.13 interlaced_frame

int interlaced_frame; // 是否是隔行扫描

2.14 sample_rate

int sample_rate; // 音频采样率

2.15 buf

AVBufferRef *buf[AV_NUM_DATA_POINTERS];

此帧的数据能够由 AVBufferRef 治理，AVBufferRef 提供 AVBuffer 援用机制

AVBuffer 是 FFmpeg 中很罕用的一种缓冲区，缓冲区应用援用计数(reference-counted)机制

2.16 pkt_pos

int64_t pkt_pos; // 最初一个扔进解码器的 packet 在输出文件中的地位偏移量

2.17 pkt_duration

int64_t pkt_duration;// 对应 packet 的时长，单位是 AVStream->time_base

2.18 channels

int channels;// 音频声道数量

2.19 pkt_size

int pkt_size;// 对应 packet 的大小

2.20 crop_

size_t crop_top;size_t crop_bottom;size_t crop_left;size_t crop_right;

用于视频帧图像裁切。四个值别离为从 frame 的上/下/左/右边界裁切的像素数。

如果你感觉文章还不错，能够给个"三连"

我是加班猿，咱们下期见