ijkplayer框架简析 -- FFmpeg中重要结构体 · Android杂文

A complete, cross-platform solution to record, convert and stream audio and video.

FFmpeg 是一个多媒体框架，能够处理解编码、转码、mux、demux、流、滤镜和播放。它支持很多格式，且具有高度的移植性。

类库

libavformat - 用于解析和生成音视频格式
libavcodec - 编解码音视频
libavutil - 工具库
libswscale - 提供比例缩放、色彩映射转换、图像颜色空间或格式转换的功能
libswresample - 音频重采样，采样格式转换和混合等
libavfilter - 滤波器，如宽高比裁剪、格式化、非格式化、伸缩
libpostproc - 后期效果处理，如图像的去块效应等
libavdevice - 硬件采集、加速、显示

结构体之间关系

协议（http, rtsp, rtmp, mms）

AVIOContext，URLProtocol，URLContext 主要存储视音频使用的协议的类型以及状态。URLProtocol 存储输入视音频使用的封装格式；每种协议都对应一个URLProtocol结构（注意：FFMPEG中文件也被当做一种协议 “file” ）
封装（flv, avi, rmvb, mp4）

AVFormatContext 主要存储视音频封装格式中包含的信息；AVInputFormat 存储输入视音频使用的封装格式。每种视音频封装格式都对应一个 AVInputFormat 结构
编解码（h264, mpeg2, aac, mp3）

每个 AVStream 存储一个视频/音频流的相关数据；每个 AVStream 对应一个 AVCodecContext，存储该视频 / 音频流使用解码方式的相关数据；每个 AVCodecContext 中对应一个 AVCodec，包含该视频/音频对应的解码器。每种解码器都对应一个 AVCodec 结构
数据

视频的话，每个结构一般是存一帧（音频可能有好几帧）

解码前数据：AVPacket；解码后数据：AVFrame

结构体

网络协议

AVIOContext

管理输入输出数据的结构体，在 avformat_open_input() 中进行初始化

typedef struct AVIOContext{
    const AVClass *av_class;
    unsigned char *buffer;  /**< Start of the buffer. */
    int buffer_size;        /**< Maximum buffer size */
    unsigned char *buf_ptr; /**< Current position in the buffer */
    unsigned char *buf_end; /**< End of the data */
    int (*read_packet)(void *opaque, uint8_t *buf, int buf_size);
    int (*write_packet)(void *opaque, uint8_t *buf, int buf_size);
    int64_t (*seek)(void *opaque, int64_t offset, int whence);
    int64_t pos;            /**< position in the file of the current buffer */
    int must_flush;         /**< true if the next seek should flush */
    int eof_reached;        /**< true if eof reached */
    int write_flag;         /**< true if open for writing */
    int max_packet_size;
    unsigned long checksum;
    unsigned char *checksum_ptr;
    unsigned long (*update_checksum)(unsigned long checksum, const uint8_t *buf, unsigned int size);
    int error;              /**< contains the error code or 0 if no error happened */
    int (*read_pause)(void *opaque, int pause);
    int64_t (*read_seek)(void *opaque, int stream_index,
                         int64_t timestamp, int flags);
    int seekable;
    int64_t maxsize;
    int direct;
    int64_t bytes_read;
    int seek_count;
    int writeout_count;
    int orig_buffer_size;
    int short_seek_threshold;
}

unsigned char *buffer : 缓存开始位置
int buffer_size : 缓存大小（默认32768）
unsigned char *buf_ptr : 当前指针读取到的位置
unsigned char *buf_end : 缓存结束的位置
void *opaque : URLContext 结构体

在解码的情况下，buffer 用于存储 ffmpeg 读入的数据。例如打开一个视频文件的时候，先把数据从硬盘读入buffer，然后在送给解码器用于解码。

URLProtocol

述了音视频数据传输所使用的协议，每种传输协议 (例如 HTTP、RTMP) 等，都会对应一个 URLProtocol 结构

typedef struct URLProtocol {
	const char *name;
	int (*url_open)(URLContext *h, const char *url, int flags);
	int (*url_read)(URLContext *h, unsigned char *buf, int size);
	int (*url_write)(URLContext *h, const unsigned char *buf, int size);
	int64_t (*url_seek)(URLContext *h, int64_t pos, int whence);
	int (*url_close)(URLContext *h);
	struct URLProtocol *next;
	int (*url_read_pause)(URLContext *h, int pause);
	int64_t (*url_read_seek)(URLContext *h, int stream_index,
		int64_t timestamp, int flags);
	int (*url_get_file_handle)(URLContext *h);
	int priv_data_size;
	const AVClass *priv_data_class;
	int flags;
	int (*url_check)(URLContext *h, int mask);
}

const char *name : 存储协议的名称，每一种输入协议都对应这样一个结构体

URLProtocol ff_rtmp_protocol = {
    .name                = "rtmp",
    .url_open            = rtmp_open,
    .url_read            = rtmp_read,
    .url_write           = rtmp_write,
    .url_close           = rtmp_close,
    .url_read_pause      = rtmp_read_pause,
    .url_read_seek       = rtmp_read_seek,
    .url_get_file_handle = rtmp_get_file_handle,
    .priv_data_size      = sizeof(RTMP),
    .flags               = URL_PROTOCOL_FLAG_NETWORK,
};

等号右边的函数是完成具体读写功能的函数。可以看一下 file 协议的几个函数（file.c）

static int file_read(URLContext *h, unsigned char *buf, int size)
{
    int fd = (intptr_t) h->priv_data;
    int r = read(fd, buf, size);
    return (-1 == r)?AVERROR(errno):r;
}
 
static int file_write(URLContext *h, const unsigned char *buf, int size)
{
    int fd = (intptr_t) h->priv_data;
    int r = write(fd, buf, size);
    return (-1 == r)?AVERROR(errno):r;
}
 
static int file_get_handle(URLContext *h)
{
    return (intptr_t) h->priv_data;
}
 
static int file_check(URLContext *h, int mask)
{
    struct stat st;
    int ret = stat(h->filename, &st);
    if (ret < 0)
        return AVERROR(errno);
 
    ret |= st.st_mode&S_IRUSR ? mask&AVIO_FLAG_READ  : 0;
    ret |= st.st_mode&S_IWUSR ? mask&AVIO_FLAG_WRITE : 0;
 
    return ret;
}
 
#if CONFIG_FILE_PROTOCOL
 
static int file_open(URLContext *h, const char *filename, int flags)
{
    int access;
    int fd;
 
    av_strstart(filename, "file:", &filename);
 
    if (flags & AVIO_FLAG_WRITE && flags & AVIO_FLAG_READ) {
        access = O_CREAT | O_TRUNC | O_RDWR;
    } else if (flags & AVIO_FLAG_WRITE) {
        access = O_CREAT | O_TRUNC | O_WRONLY;
    } else {
        access = O_RDONLY;
    }
#ifdef O_BINARY
    access |= O_BINARY;
#endif
    fd = open(filename, access, 0666);
    if (fd == -1)
        return AVERROR(errno);
    h->priv_data = (void *) (intptr_t) fd;
    return 0;
}
 
/* XXX: use llseek */
static int64_t file_seek(URLContext *h, int64_t pos, int whence)
{
    int fd = (intptr_t) h->priv_data;
    if (whence == AVSEEK_SIZE) {
        struct stat st;
        int ret = fstat(fd, &st);
        return ret < 0 ? AVERROR(errno) : st.st_size;
    }
    return lseek(fd, pos, whence);
}
 
static int file_close(URLContext *h)
{
    int fd = (intptr_t) h->priv_data;
    return close(fd);
}

URLContext

封装了协议对象及协议操作对象

封装格式

AVFormatContext

描述了媒体文件的构成及基本信息，是统领全局的基本结构体，贯穿程序始终，很多函数都要用它作为参数

typedef struct AVFormatContext {
	const AVClass *av_class;
	struct AVInputFormat *iformat;
	struct AVOutputFormat *oformat;
	void *priv_data;
	AVIOContext *pb;
	int ctx_flags;
	unsigned int nb_streams;
	AVStream **streams;
	char filename[1024];
	int64_t start_time;
	int64_t duration;
	int bit_rate;
    unsigned int packet_size;
    int max_delay;
    int flags;
    const uint8_t *key;
    int keylen;

    unsigned int nb_programs;
    AVProgram **programs;

    enum AVCodecID video_codec_id;
    enum AVCodecID audio_codec_id;
    enum AVCodecID subtitle_codec_id;

    unsigned int max_index_size;
    unsigned int max_picture_buffer;
    unsigned int nb_chapters;
    AVChapter **chapters;
    AVDictionary *metadata;
    int64_t start_time_realtime;
    int fps_probe_size;
    int error_recognition;
    AVIOInterruptCB interrupt_callback;
    int64_t max_interleave_delta;
    int strict_std_compliance;
    int event_flags;
    int max_ts_probe;
    int avoid_negative_ts;
    int ts_id;
    int audio_preload;
    int max_chunk_duration;
    int max_chunk_size;
    int use_wallclock_as_timestamps;
    int avio_flags;
    enum AVDurationEstimationMethod duration_estimation_method;
    int64_t skip_initial_bytes;
    unsigned int correct_ts_overflow;
    int seek2any;
    int probe_score;
    int format_probesize;
    char *codec_whitelist;
    char *format_whitelist;
    AVFormatInternal *internal;
    int io_repositioned;
    AVCodec *video_codec;
    AVCodec *audio_codec;
    AVCodec *subtitle_codec;
    AVCodec *data_codec;
    int metadata_header_padding;
    void *opaque;
    av_format_control_message control_message_cb;
    int64_t output_ts_offset;
    uint8_t *dump_separator;
    enum AVCodecID data_codec_id;
    int (*open_cb)(struct AVFormatContext *s, AVIOContext **p, const char *url, int flags, const AVIOInterruptCB *int_cb, AVDictionary **options);	
}

struct AVInputFormat *iformat : 输入数据的封装格式，由 avformat_open_input 设置，仅仅在 Demuxing 使用。
struct AVOutputFormat *oformat : 输出数据的封装格式，必须由使用者在avformat_write_header前设置，由 Muxing 使用
priv_data : 在 muxing 中，由 avformat_write_header 设置；在 demuxing 中，由 avformat_open_input 设置
AVIOContext *pb : 输入数据的缓存。如果iformat/oformat.flags 设置为 AVFMT_NOFILE 的话，该字段不需要设置。对于 Demuxing ，需要在avformat_open_input 前设置，或由 avformat_open_input 设置；对于 Muxing,在 avformat_write_header 前设置
ctx_flags : 码流的信息，表明码流属性的的信号。由 libavformat设置，例如 AVFMTCTX_NOHEADER
nb_streams : 指 AVFormatContext.streams 的数量，必须由 avformat_new_stream设置
AVStream **streams : 文件中所有码流的列表，新的码流创建使用 avformat_new_stream 函数。Demuxing 中，码流由 avformat_open_input 创建。如果 AVFMTCTX_NOHEADER 被设置，新的码流可以出现在 av_read_frame 中。Muxing 中，码流在 avformat_write_header 之前由用户创建，它的释放是由 avformat_free_context完成的
filename : 输入或输出的文件名，Demuxing 中由 avformat_open_input 设置，Muxing 中在使用 avformat_write_header 前由调用者设置
int64_t duration : 码流的时长
bit_rate : 比特率
AVDictionary *metadata : 元数据，适用于整个文件。通过 av_dict_get() 函数获得视频的原数据

AVInputFormat

解复用器对象，每种作为输入的封装格式 (例如 FLV、MP4、TS 等) 对应一个该结构体

AVOutputFormat

复用器对象，每种作为输出的封装格式（例如 FLV, MP4、TS 等）对应一个该结构体

AVStream

用于描述一个视频 / 音频流的相关数据信息

编解码

AVCodecContext

描述编解码器上下文的数据结构，包含了众多编解码器需要的参数信息

typedef struct AVCodecContext{
	const AVClass *av_class;
	int log_level_offset;
	enum AVMediaType codec_type;
	const struct AVCodec *codec;
	enum AVCodecID     codec_id;
	unsigned int codec_tag;
	void *priv_data;
	struct AVCodecInternal *internal;
	void *opaque;
	int bit_rate;
	int bit_rate_tolerance;
	int global_quality;
	int compression_level;
	int flags;
	int flags2;
	uint8_t *extradata;
	int extradata_size;
	AVRational time_base;
	int ticks_per_frame;
	int delay;
	int width, height;
	int coded_width, coded_height;
	int gop_size;
	enum AVPixelFormat pix_fmt;
	void (*draw_horiz_band)(struct AVCodecContext *s,
                            const AVFrame *src, int offset[AV_NUM_DATA_POINTERS],
                            int y, int type, int height);
	enum AVPixelFormat (*get_format)(struct AVCodecContext *s, const enum AVPixelFormat * fmt);
	int max_b_frames;
	float b_quant_factor;
	int b_frame_strategy;
	float b_quant_offset;
	int has_b_frames;
	int mpeg_quant; 		/*decoding: unused*/
	float i_quant_factor; 	/*decoding: unused*/
	float i_quant_offset; 	/*decoding: unused*/
	float lumi_masking;		/*decoding: unused*/
	float temporal_cplx_masking; /*decoding: unused*/
	float spatial_cplx_masking;  /*decoding: unused*/
	float p_masking;		/*decoding: unused*/
	float dark_masking;		/*decoding: unused*/
	int slice_count;
	int prediction_method;	/*decoding: unused*/
	int *slice_offset;
	AVRational sample_aspect_ratio;
	int me_cmp;				/*decoding: unused*/
	int me_sub_cmp;			/*decoding: unused*/
	int mb_cmp;				/*decoding: unused*/
    ...
}AVCodecContext;

AVMediaType codec_type : 编解码器的类型，如音频、视频、字幕
AVCdec *codec : 采用的解码器 AVCodec
int bit_rate : 平均比特率
uint8_t *extradata; int extradata_size : 针对特定编码器包含的附加信息（例如对于H.264解码器来说，存储SPS，PPS等）
AVRational time_base : 根据该参数，可以把PTS转化为实际的时间（单位为秒s）
int width, height : 视频的宽高
int refs : 运动估计参考帧的个数
int sample_rate : 采样率
int channels : 声道数
enum AVSampleFormat sample_fmt : 采样格式
int profile : 型（H.264里面就有，其他编码标准应该也有）
int level : 级（和profile差不太多）

AVCodecContext 使用 avcodec_alloc_context3 分配，该函数除了分配 AVCodecContext 外，还会初始化默认的字段。分配的内存必须通过 avcodec_free_context 释放。AVCodecContext 中很多的参数是编码的时候使用的，而不是解码的时候使用的

avcodec_register_all();
...
codec = avcodec_find_decoder(AV_CODEC_ID_H264);
if(!codec)
    exit(1);

context = avcodec_alloc_context3(codec);

if(avcodec_open2(context, codec, opts) < 0)
    exit(1);

codec_type(编解码器类型)

enum AVMediaType {
    AVMEDIA_TYPE_UNKNOWN = -1,  ///< Usually treated as AVMEDIA_TYPE_DATA
    AVMEDIA_TYPE_VIDEO,
    AVMEDIA_TYPE_AUDIO,
    AVMEDIA_TYPE_DATA,          ///< Opaque data information usually continuous
    AVMEDIA_TYPE_SUBTITLE,
    AVMEDIA_TYPE_ATTACHMENT,    ///< Opaque data information usually sparse
    AVMEDIA_TYPE_NB
};

sample_fmt(音频采样格式)

enum AVSampleFormat {
    AV_SAMPLE_FMT_NONE = -1,
    AV_SAMPLE_FMT_U8,          ///< unsigned 8 bits
    AV_SAMPLE_FMT_S16,         ///< signed 16 bits
    AV_SAMPLE_FMT_S32,         ///< signed 32 bits
    AV_SAMPLE_FMT_FLT,         ///< float
    AV_SAMPLE_FMT_DBL,         ///< double
 
    AV_SAMPLE_FMT_U8P,         ///< unsigned 8 bits, planar
    AV_SAMPLE_FMT_S16P,        ///< signed 16 bits, planar
    AV_SAMPLE_FMT_S32P,        ///< signed 32 bits, planar
    AV_SAMPLE_FMT_FLTP,        ///< float, planar
    AV_SAMPLE_FMT_DBLP,        ///< double, planar
 
    AV_SAMPLE_FMT_NB           ///< Number of sample formats. DO NOT USE if linking dynamically
};

profile

#define FF_PROFILE_UNKNOWN -99
#define FF_PROFILE_RESERVED -100
 
#define FF_PROFILE_AAC_MAIN 0
#define FF_PROFILE_AAC_LOW  1
#define FF_PROFILE_AAC_SSR  2
#define FF_PROFILE_AAC_LTP  3
#define FF_PROFILE_AAC_HE   4
#define FF_PROFILE_AAC_HE_V2 28
#define FF_PROFILE_AAC_LD   22
#define FF_PROFILE_AAC_ELD  38
 
#define FF_PROFILE_DTS         20
#define FF_PROFILE_DTS_ES      30
#define FF_PROFILE_DTS_96_24   40
#define FF_PROFILE_DTS_HD_HRA  50
#define FF_PROFILE_DTS_HD_MA   60
 
#define FF_PROFILE_MPEG2_422    0
#define FF_PROFILE_MPEG2_HIGH   1
#define FF_PROFILE_MPEG2_SS     2
#define FF_PROFILE_MPEG2_SNR_SCALABLE  3
#define FF_PROFILE_MPEG2_MAIN   4
#define FF_PROFILE_MPEG2_SIMPLE 5
 
#define FF_PROFILE_H264_CONSTRAINED  (1<<9)  // 8+1; constraint_set1_flag
#define FF_PROFILE_H264_INTRA        (1<<11) // 8+3; constraint_set3_flag
 
#define FF_PROFILE_H264_BASELINE             66
#define FF_PROFILE_H264_CONSTRAINED_BASELINE (66|FF_PROFILE_H264_CONSTRAINED)
#define FF_PROFILE_H264_MAIN                 77
#define FF_PROFILE_H264_EXTENDED             88
#define FF_PROFILE_H264_HIGH                 100
#define FF_PROFILE_H264_HIGH_10              110
#define FF_PROFILE_H264_HIGH_10_INTRA        (110|FF_PROFILE_H264_INTRA)
#define FF_PROFILE_H264_HIGH_422             122
#define FF_PROFILE_H264_HIGH_422_INTRA       (122|FF_PROFILE_H264_INTRA)
#define FF_PROFILE_H264_HIGH_444             144
#define FF_PROFILE_H264_HIGH_444_PREDICTIVE  244
#define FF_PROFILE_H264_HIGH_444_INTRA       (244|FF_PROFILE_H264_INTRA)
#define FF_PROFILE_H264_CAVLC_444            44
 
#define FF_PROFILE_VC1_SIMPLE   0
#define FF_PROFILE_VC1_MAIN     1
#define FF_PROFILE_VC1_COMPLEX  2
#define FF_PROFILE_VC1_ADVANCED 3
 
#define FF_PROFILE_MPEG4_SIMPLE                     0
#define FF_PROFILE_MPEG4_SIMPLE_SCALABLE            1
#define FF_PROFILE_MPEG4_CORE                       2
#define FF_PROFILE_MPEG4_MAIN                       3
#define FF_PROFILE_MPEG4_N_BIT                      4
#define FF_PROFILE_MPEG4_SCALABLE_TEXTURE           5
#define FF_PROFILE_MPEG4_SIMPLE_FACE_ANIMATION      6
#define FF_PROFILE_MPEG4_BASIC_ANIMATED_TEXTURE     7
#define FF_PROFILE_MPEG4_HYBRID                     8
#define FF_PROFILE_MPEG4_ADVANCED_REAL_TIME         9
#define FF_PROFILE_MPEG4_CORE_SCALABLE             10
#define FF_PROFILE_MPEG4_ADVANCED_CODING           11
#define FF_PROFILE_MPEG4_ADVANCED_CORE             12
#define FF_PROFILE_MPEG4_ADVANCED_SCALABLE_TEXTURE 13
#define FF_PROFILE_MPEG4_SIMPLE_STUDIO             14
#define FF_PROFILE_MPEG4_ADVANCED_SIMPLE           15

AVCodec

编解码器对象，每种编解码格式 (例如 H.264、AAC 等）对应一个该结构体；每个 AVCodecContext 中含有一个 AVCodec

typedef struct AVCodec{
	const char *name;
    const char *long_name;
    enum AVMediaType type;
    enum AVCodecID id;
    int capabilities;
    const AVRational *supported_framerates; ///< array of supported framerates, or NULL if any, array is terminated by {0,0}
    const enum AVPixelFormat *pix_fmts;     ///< array of supported pixel formats, or NULL if unknown, array is terminated by -1
    const int *supported_samplerates;       ///< array of supported audio samplerates, or NULL if unknown, array is terminated by 0
    const enum AVSampleFormat *sample_fmts; ///< array of supported sample formats, or NULL if unknown, array is terminated by -1
    const uint64_t *channel_layouts;         ///< array of support channel layouts, or NULL if unknown. array is terminated by 0
    uint8_t max_lowres;                     ///< maximum value for lowres supported by the decoder, no direct access, use av_codec_get_max_lowres()
    const AVClass *priv_class;              ///< AVClass for the private context
    const AVProfile *profiles;              ///< array of recognized profiles, or NULL if unknown, array is terminated by {FF_PROFILE_UNKNOWN}
    int priv_data_size;
    struct AVCodec *next;
    int (*init_thread_copy)(AVCodecContext *);
    int (*update_thread_context)(AVCodecContext *dst, const AVCodecContext *src);
    const AVCodecDefault *defaults;
    void (*init_static_data)(struct AVCodec *codec);

    int (*init)(AVCodecContext *);
    int (*encode_sub)(AVCodecContext *, uint8_t *buf, int buf_size,
    int (*encode2)(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame,
                   int *got_packet_ptr);
    int (*decode)(AVCodecContext *, void *outdata, int *outdata_size, AVPacket *avpkt);
    int (*close)(AVCodecContext *);
    void (*flush)(AVCodecContext *);
    int caps_internal;
}

name : 具体的 CODEC 的名称的简短描述，比如 “HEVC”/“H264” 等
long_name : CODEC 名称的详细描述，比如 “HEVC (High Efficiency Video Coding)”
type : 媒体类型的字段，它是 enum 型的，表示视频、音频、字幕等，比如AVMEDIA_TYPE_VIDEO、AVMEIDA_TYPE_AUDIO
id : 唯一标识的 CODEC 类型，比如 AV_CODEC_ID_HEVC
supported_framerates : 支持的视频帧率的数组，以{0，0}作为结束
pix_fmts : 编解码器支持的图像格式的数组，以 -1 作为结束
profiles : 编解码器支持的 Profile，以 HEVC 为例，包含 “Main” / “Main10” / “Main Still Picture”
supported_samplerates : 支持的音频采样率
sample_fmts : 支持的音频采样格式
channel_layouts : 支持的音频声道数
priv_data_size : 私有数据的大小

enum AVMediaType type

enum AVMediaType {
    AVMEDIA_TYPE_UNKNOWN = -1,  ///< Usually treated as AVMEDIA_TYPE_DATA
    AVMEDIA_TYPE_VIDEO,
    AVMEDIA_TYPE_AUDIO,
    AVMEDIA_TYPE_DATA,          ///< Opaque data information usually continuous
    AVMEDIA_TYPE_SUBTITLE,
    AVMEDIA_TYPE_ATTACHMENT,    ///< Opaque data information usually sparse
    AVMEDIA_TYPE_NB
}

enum AVCodecID id

enum AVCodecID {
    AV_CODEC_ID_NONE,
 
    /* video codecs */
    AV_CODEC_ID_MPEG1VIDEO,
    AV_CODEC_ID_MPEG2VIDEO, ///< preferred ID for MPEG-1/2 video decoding
    AV_CODEC_ID_MPEG2VIDEO_XVMC,
    AV_CODEC_ID_H261,
    AV_CODEC_ID_H263,
    AV_CODEC_ID_RV10,
    AV_CODEC_ID_RV20,
    AV_CODEC_ID_MJPEG,
    AV_CODEC_ID_MJPEGB,
    AV_CODEC_ID_LJPEG,
    AV_CODEC_ID_SP5X,
    AV_CODEC_ID_JPEGLS,
    AV_CODEC_ID_MPEG4,
    AV_CODEC_ID_RAWVIDEO,
    AV_CODEC_ID_MSMPEG4V1,
    AV_CODEC_ID_MSMPEG4V2,
    AV_CODEC_ID_MSMPEG4V3,
    AV_CODEC_ID_WMV1,
    AV_CODEC_ID_WMV2,
    AV_CODEC_ID_H263P,
    AV_CODEC_ID_H263I,
    AV_CODEC_ID_FLV1,
    AV_CODEC_ID_SVQ1,
    AV_CODEC_ID_SVQ3,
    AV_CODEC_ID_DVVIDEO,
    AV_CODEC_ID_HUFFYUV,
    AV_CODEC_ID_CYUV,
    AV_CODEC_ID_H264,
    ...
}

const enum AVPixelFormat *pix_fmts

enum AVPixelFormat {
    AV_PIX_FMT_NONE = -1,
    AV_PIX_FMT_YUV420P,   ///< planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
    AV_PIX_FMT_YUYV422,   ///< packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
    AV_PIX_FMT_RGB24,     ///< packed RGB 8:8:8, 24bpp, RGBRGB...
    AV_PIX_FMT_BGR24,     ///< packed RGB 8:8:8, 24bpp, BGRBGR...
    AV_PIX_FMT_YUV422P,   ///< planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
    AV_PIX_FMT_YUV444P,   ///< planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
    AV_PIX_FMT_YUV410P,   ///< planar YUV 4:1:0,  9bpp, (1 Cr & Cb sample per 4x4 Y samples)
    AV_PIX_FMT_YUV411P,   ///< planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
    AV_PIX_FMT_GRAY8,     ///<        Y        ,  8bpp
    AV_PIX_FMT_MONOWHITE, ///<        Y        ,  1bpp, 0 is white, 1 is black, in each byte pixels are ordered from the msb to the lsb
    AV_PIX_FMT_MONOBLACK, ///<        Y        ,  1bpp, 0 is black, 1 is white, in each byte pixels are ordered from the msb to the lsb
    AV_PIX_FMT_PAL8,      ///< 8 bit with PIX_FMT_RGB32 palette
    AV_PIX_FMT_YUVJ420P,  ///< planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of PIX_FMT_YUV420P and setting color_range
    AV_PIX_FMT_YUVJ422P,  ///< planar YUV 4:2:2, 16bpp, full scale (JPEG), deprecated in favor of PIX_FMT_YUV422P and setting color_range
    AV_PIX_FMT_YUVJ444P,  ///< planar YUV 4:4:4, 24bpp, full scale (JPEG), deprecated in favor of PIX_FMT_YUV444P and setting color_range
    AV_PIX_FMT_XVMC_MPEG2_MC,///< XVideo Motion Acceleration via common packet passing
    AV_PIX_FMT_XVMC_MPEG2_IDCT,
    ...
}

const enum AVSampleFormat *sample_fmts

enum AVSampleFormat {
    AV_SAMPLE_FMT_NONE = -1,
    AV_SAMPLE_FMT_U8,          ///< unsigned 8 bits
    AV_SAMPLE_FMT_S16,         ///< signed 16 bits
    AV_SAMPLE_FMT_S32,         ///< signed 32 bits
    AV_SAMPLE_FMT_FLT,         ///< float
    AV_SAMPLE_FMT_DBL,         ///< double
 
    AV_SAMPLE_FMT_U8P,         ///< unsigned 8 bits, planar
    AV_SAMPLE_FMT_S16P,        ///< signed 16 bits, planar
    AV_SAMPLE_FMT_S32P,        ///< signed 32 bits, planar
    AV_SAMPLE_FMT_FLTP,        ///< float, planar
    AV_SAMPLE_FMT_DBLP,        ///< double, planar
 
    AV_SAMPLE_FMT_NB           ///< Number of sample formats. DO NOT USE if linking dynamically
};

每一个编解码器对应一个 AVCodec 该结构体

AVCodec ff_h264_decoder = {
    .name           = "h264",
    .type           = AVMEDIA_TYPE_VIDEO,
    .id             = CODEC_ID_H264,
    .priv_data_size = sizeof(H264Context),
    .init           = ff_h264_decode_init,
    .close          = ff_h264_decode_end,
    .decode         = decode_frame,
    .capabilities   = /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY |
                      CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
    .flush= flush_dpb,
    .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
    .init_thread_copy      = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
    .update_thread_context = ONLY_IF_THREADS_ENABLED(decode_update_thread_context),
    .profiles = NULL_IF_CONFIG_SMALL(profiles),
    .priv_class     = &h264_class,
};

AVCodecParameters

编解码参数，每个 AVStream 中都含有一个 AVCodecParameters，用来存放当前流的编解码参数

数据

AVPacket

存放编码后、解码前的压缩数据，即 ES 数据

typedef struct AVPacket{
	AVBufferRef *buf;
	int64_t      pts;
	int64_t      dts;
	uint8_t    *data;
	int         size;
	int stream_index;
	int        flags;
	AVPacketSideData *side_data;
	int side_data_elems;
	int   duration;
	int64_t pos;
	int64_t convergence_duration;
}

pts: 显示时间戳，它的单位是 AVStream->time_base；如果在文件中没有保存这个值，它被设置为 AV_NOPTS_VALUE。由于图像显示不可能早于图像解压，因此 PTS 必须比 DTS（解码时间戳）大或者相等。某些文件格式中可能会使用 PTS/DTS 表示其他含义，此时时间戳必须转为真正的时间戳才能保存到 AVPacket 结构中
dts : 解码时间戳，它的单位是 AVStream->time_base，表示压缩视频解码的时间，如果文件中没有保存该值，它被设置为 AV_NOPTS_VALUE
data : 指向真正的压缩编码的数据
size : 表示该 AVPacket 结构中 data 字段所指向的压缩数据的大小
stream_index : 标识该 AVPacket 结构所属的视频流或音频流
duration : 该 AVPacket 包以 AVStream->time_base 为单位，所持续的时间，0 表示未知，或者为显示时间戳的差值(next_pts - this pts)
pos : 表示该 AVPacket 数据在媒体中的位置，即字节偏移量

AVFrame

存放编码前、解码后的原始数据，如 YUV 格式的视频数据或 PCM 格式的音频数据等

AVFrame 结构体必须使用 av_frame_alloc() 分配，注意该函数只是分配了 AVFrame 结构本身，它的 data 区域要用其他方式管理；该结构体的释放要用 av_frame_free()。

AVFrame 结构体通常只需分配一次，之后即可通过保存不同的数据来重复多次使用，比如一个 AVFrame 结构可以保存从解码器中解码出的多帧数据。此时，就可以使用av_frame_unref()释放任何由 Frame 保存的参考帧并还原回最原始的状态。

typedef struct AVFrame{
	uint8_t *data[AV_NUM_DATA_POINTERS];
	int linesize[AV_NUM_DATA_POINTERS];
	uint8_t **extended_data;
	int width, height;
	int nb_samples; /* number of audio samples(per channel) described by this frame */
	int format;
	int key_frame; /* 1->keyframe, 0->not*/
	enum AVPictureType pict_type;
	AVRational sample_aspect_ratio;
	int64_t pts;
	int64_t pkt_pts;
	int64_t pkt_dts;
	int coded_picture_number;
	int display_picture_number;
	int quality;
	void *opaque; /* for some private data of the user */
	uint64_t error[AV_NUM_DATA_POINTERS];
	int repeat_pict;
	int interlaced_frame;
	int top_field_first;	/* If the content is interlaced, is top field displayed first */
	int palette_has_changed;
    int64_t reordered_opaque;
    int sample_rate;    /*Sample rate of the audio data*/
    uint64_t channel_layout; /*channel layout of the audio data*/
    AVBufferRef *buf[AV_NUM_DATA_POINTERS];
    AVBufferRef **extended_buf;
    int nb_exteneded_buf;
    AVFrameSideData **side_data;
    int nb_side_data;

    int flags;
    enum AVColorRange color_range;
    enum AVColorPrimaries color_primaries;
    enum AVColorTransferCharacteristic color_trc;
    enum AVColorSpace colorspace;
    enum AVChromaLocation chroma_location;

    int64_t best_effort_timestamp;
    int64_t pkt_pos;
    int64_t pkt_duration;
    AVDictionary *metadata;
    int decode _error_flags;

    int channels;
    int pkt_size;
    AVBufferRef *qp_table_buf;
}

data : 指向图片或信道的指针，与初始化时分配的大小可能不同，一些解码器取数据范围超出 (0,0)-(width, height) ，具体请查看 avcodec_align_dimensions2() 方法。一些过滤器或扫描器读数据时可能会超过 16 字节，所以当它们使用时，必须额外分配 16 字节。对于 packed 格式的数据(例如 RGB24)，会存放到 data[0] 里面；对于 planar 格式的数据(例如 YUV420P)，则会分开 data[0]/data[1]/data[2]（YUV420P 中 data[0] 存放 Y，data[1] 存放 U，data[2] 存放 V）
linesize : 对于视频数据，表示每个图像行的字节大小；对于音频数据，表示每个 Plane 的字节大小，只有linesize[0]可以设置，对于plane 音频，每个信道 channel 必须是相同的。对于视频的 linesize 应为 CPU 的对准要求的倍数，一般为 32。注意 linesize 可大于可用的数据的尺寸，有可能存在由于性能原因额外填充
width/height : 视频的宽高
format : 帧格式,-1表示未设置的帧格式。对于视频帧，该值为 enum 类型的 AVPixelFormat，例如 AV_PIX_FMT_YUV420P；对于音频帧，该值为 enum 型的 AVSampleFormat，例如 AV_SAMPLE_FMT_S16
key_frame : 关键帧，1 表示关键帧，0 表示非关键帧
pict_type : 帧图片类型，例如 I/P/B
sample_aspect_ration : 帧像素的宽高比，使用 AVRational 表示（16:9，4:3…）
pts : 显示时间戳，单位为 time_base
pkt_pts : 该 PTS 是从 AVPacket 结构中拷贝过来的；与之对应的是 pkt_dts
coded_picture_number/display_picture_number : 解码序列号和显示序列号（Display Order/Decoded Order）
interlaced_frame : 表示该帧为 interlace 码流或者为 progressive 码流
top_field_first : 对于 interlace 码流，表示该它是 top first or bottom first
qscale_table : QP 表
mbskip_table : 跳过宏块表
(*motion_val[2])[2] : 运动矢量表
mb_type : 宏块类型表
dct_coeff : DCT 系数，这个没有提取过
ref_index[2] : 运动估计参考帧列表（貌似 H.264 这种比较新的标准才会涉及到多参考帧）
interlaced_frame : 是否是隔行扫描

pict_type

enum AVPictureType {
    AV_PICTURE_TYPE_NONE = 0, ///< Undefined
    AV_PICTURE_TYPE_I,     ///< Intra
    AV_PICTURE_TYPE_P,     ///< Predicted
    AV_PICTURE_TYPE_B,     ///< Bi-dir predicted
    AV_PICTURE_TYPE_S,     ///< S(GMC)-VOP MPEG4
    AV_PICTURE_TYPE_SI,    ///< Switching Intra
    AV_PICTURE_TYPE_SP,    ///< Switching Predicted
    AV_PICTURE_TYPE_BI,    ///< BI type
};

参考

雷霄骅 - FFMPEG