Video Decode Acceleration API Specification (Proposal) Rev. 0.10 Jonathan Bian /* Revision History */ rev 0.10 (03/20/07 ) - Initial draft /* Acknowledgement */ /*****************************************************************************/ Overview This is a decode only interface currently. The basic steps are: - Negotiate a mutually acceptable configuration with the accelerator to lock down profile, entrypoints, and other attributes that will not change on a frame-by-frame basis. - Create a decode context which represents a "virtualized" hardware decode device - Get and fill decode buffers with picture level, slice level and macroblock level data (depending on entrypoints) - Pass the decode buffers to the accelerator to decode the current frame /*****************************************************************************/ /*****************************************************************************/ Initialization & Configuration Management - Find out supported profiles - Find out entrypoints for a given profile - Find out configuration attributes for a given profile/entrypoint pair - Create a configuration for use by the decoder /*****************************************************************************/ typedef void* VADisplay; /* window system dependent */ typedef enum { VAStatusOK = 0, VAStatusError = 1, /* more error codes need to be defined */ } VAStatus; /* * For X Windows, native_dpy would be from XOpenDisplay() */ */ VADisplay vaGetDisplay ( NativeDisplay native_dpy /* implementation specific */ ); VAStatus vaInitialize ( VADisplay dpy, int *major_version, /* out */ int *minor_version /* out */ ); / * * After this call, all library internal resources will be cleaned up */ VAStatus vaTerminate ( VADisplay dpy ); typedef enum { VAProfileMPEG2Simple = 0, VAProfileMPEG2Main = 1, VAProfileMPEG4Simple = 2, VAProfileMPEG4AdvancedSimple = 3, VAProfileMPEG4Main = 4, VAProfileH264Baseline = 5, VAProfileH264Main = 6, VAProfileH264High = 7, VAProfileVC1Simple = 8, VAProfileVC1Main = 9, VAProfileVC1Advanced = 10, } VAProfile; typedef enum { VAEntrypointNone = 0, /* no entrypoint for this profile */ VAEntrypointVLD = 1, VAEntrypointIZZ = 2, VAEntrypointIDCT = 3, VAEntrypointMoComp = 4, VAEntrypointDeblocking = 5, } VAEntrypoint; /* Configuration attribute types */ typedef enum { VAConfigAttribRTFormat = 0, VAConfigAttribSpatialResidual = 1, VAConfigAttribSpatialClipping = 2, VAConfigAttribIntraResidual = 3, VAConfigAttribEncryption = 4, } VAConfigAttribType; /* attribute value for VAConfigAttribRTFormat */ #define VA_RT_FORMAT_YUV420 0x00000001 #define VA_RT_FORMAT_YUV422 0x00000002 #define VA_RT_FORMAT_YUV422 0x00000004 /* Configuration attributes */ typedef struct _VAConfigAttrib { VAConfigAttribType type; unsigned int value; /* OR'd flags (bits) for this attribute */ } VAConfigAttrib; /* Query entrypoints for a given profile */ VAStatus vaQueryConfigEntrypoints ( VADisplay dpy, VAProfile profile VAEntrypoint *entrypoints, /* out */ int *num_entrypoints /* out */ ); /* Query attributes for a given profile/entrypoint pair */ VAStatus vaQueryConfigAttributes ( VADisplay dpy, VAProfile profile, VAEntrypoint entrypoint, VAConfigAttrib *attrib_list, /* in/out */ int num_attribs ); typedef int VAConfigID; /* * Create a configuration for the decode pipeline * it passes in the attribute list that specifies the attributes it cares * about, with the rest taking default values. */ VAStatus vaCreateConfig ( vaDisplay dpy, VAProfile profile, VAEntrypoint entrypoint, VAConfigAttrib *attrib_list, int num_attribs, VAConfigID *config_id /* out */ ); /* * Get max number of attributes supported by the accelerator */ VAStatus vaGetMaxNumConfigAttributes ( VADisplay dpy, ); /* * Get all attributes for a given configuration */ VAStatus vaGetConfigAttributes ( VADisplay dpy, VAConfigID config_id, VAProfile *profile, /* out */ VAEntrypoint *entrypoint, /* out */ VAConfigAttrib *attrib_list,/* out */ int *num_attribs /* out */ ); /*****************************************************************************/ Context Context represents a "virtual" video decode pipeline /*****************************************************************************/ /* generic context ID type, can be re-typed for specific implementation */ typedef int VAContextID; typedef struct _VAContext { VAContextID context_id; /* to identify this context */ VAConfigID config_id; unsigned short picture_width; unsigned short picture_height; VASurfaceID *render_targets; int num_render_targets; int flags; void *privData; } VAContext; flags - Any combination of the following: VA_PROGRESSIVE (only progressive frame pictures in the sequence when set) VAStatus vaCreateContext ( VADisplay dpy, VAConfigID config_id, int picture_width, int picture_height, int flag, VASurface *render_targets, int num_render_targets, VAContext *context /* out */ ); VAStatus vaDestroyContext ( VADisplay dpy, VAContext *context ) /*****************************************************************************/ Surface Management Surfaces are render targets for a given context. The data in the surfaces are not accessible to the client and the internal data format of the surface is implementatin specific. Question: Is there a need to know the data format (fourcc) or just differentiate between 420/422/444 is sufficient? /*****************************************************************************/ typedef int VASurfaceID; typedef struct _VASurface { VASurfaceID surface_id; /* uniquely identify this surface */ VAContextID context_id; /* which context does this surface belong */ unsigned short width; unsigned short height; int format; /* 420/422/444 */ void *privData; /* private to the library */ } VASurface; /* * Surfaces will be bound to a context when the context is created. Once * a surface is bound to a given context, it can not be used to create * another context. The association is removed when the context is destroyed */ /* Surface Functions */ VAStatus vaCreateSurfaces ( VADisplay dpy, int width, int height, int format, int num_surfaces, VASurface *surfaces /* out */ ) /* * surfaces can only be destroyed after the context associated has been * destroyed */ VAStatus vaDestroySurface ( VADisplay dpy, VASurface *surface_list, int num_surfaces ) /*****************************************************************************/ Decode Buffers /*****************************************************************************/ typedef int VADecodeBufferID; typedef enum { VAPictureParameterBufferType = 0, VAPictureBitPlaneBufferType = 1, VAIQMatrixBufferType = 2, VASliceParameterBufferType = 3, VASliceDataBufferType = 4, VAMacroblockParameterBufferType = 5, VAResidualDataBufferType = 6, VADeblockingParameterBufferType = 7, } VADecodeBufferType; /* Decode buffer descriptor */ typedef struct _VADecodeBufferDescriptor { VADecodeBufferID buffer_id; /* uniquely identify this buffer */ VADecodeBufferType type; int size; /* total size */ int num_macroblocks; int mb_x; int mb_y; void *privData; /* additional data such as decryptor */ } VADecodeBufferDescriptor; /****************************/ MPEG-2 data structures /****************************/ /* Picture Parameter Buffer */ typedef struct _VAPictureParameterBufferMPEG2 { VASurfaceID forward_reference_picture; VASurfaceID backward_reference_picture; /* meanings of the following fields are the same as in the standard */ int picture_coding_type; int f_code; /* pack all four fcode into this */ union { struct { unsigned char intra_dc_precision : 2; unsigned char picture_structure : 2; unsigned char top_field_first : 1; unsigned char frame_pred_frame_dct : 1; unsigned char concealment_motion_vectors : 1; unsigned char q_scale_type : 1; unsigned char intra_vlc_format : 1; unsigned char alternate_scan : 1; unsigned char int repeat_first_field : 1; unsigned char progressive_frame : 1; }; unsigned int picture_coding_extension; }; } VAPictureParameterBufferMPEG2; /* Inverse Quantization Matrix Buffer */ typedef struct _VAIQMatrixBufferMPEG2 { int load_intra_quantiser_matrix; int load_non_intra_quantiser_matrix; int load_chroma_intra_quantiser_matrix; int load_chroma_non_intra_quantiser_matrix; unsigned char intra_quantiser_matrix[64]; unsigned char non_intra_quantiser_matrix[64]; unsigned char chroma_intra_quantiser_matrix[64]; unsigned char chroma_non_intra_quantiser_matrix[64]; } VAIQMatrixBufferMPEG2; /* Slice Parameter Buffer */ typedef struct _VASliceParameterBufferMPEG2 { int slice_data_offset;/* the offset to the first byte of slice data */ int macroblock_offset;/* the offset to the first bit of MB from the first byte of slice data */ int slice_vertical_position; int quantiser_scale_code; int intra_slice_flag; } VASliceParameterBufferMPEG2; /****************************/ VC-1 data structures /****************************/ /* Picture Parameter Buffer */ typedef struct _VAPictureParameterBufferVC1 { VASurfaceID forward_reference_picture; VASurfaceID backward_reference_picture; /* if out-of-loop post-processing is done on the render target, then we need to keep the in-loop decoded picture as a reference picture */ VASurfaceID inloop_decoded_picture; unsigned char closed_entry; /* ENTRY_POINT_LAYER::CLOSED_ENTRY */ unsigned char broken_link; /* ENTRY_POINT_LAYER::BROKEN_LINK */ unsigned char conditional_overlap_flag; /* ENTRY_POINT_LAYER::CONDOVER */ unsigned char fast_uvmc_flag; /* ENTRY_POINT_LAYER::FASTUVMC */ unsigned char b_picture_fraction; /* PICTURE_LAYER::BFRACTION */ unsigned char cbp_table; /* PICTURE_LAYER::CBPTAB/ICBPTAB */ unsigned char mb_mode_table; /* PICTURE_LAYER::MBMODETAB */ unsigned char range_reduction_frame;/* PICTURE_LAYER::RNDCTRL */ unsigned char rounding_control; /* PICTURE_LAYER::RNDCTRL */ unsigned char post_processing; /* PICTURE_LAYER::POSTPROC */ unsigned char picture_resolution_index; /* PICTURE_LAYER::RESPIC */ unsigned char luma_scale; /* PICTURE_LAYER::LUMSCALE */ unsigned char luma_shift; /* PICTURE_LAYER::LUMSHIFT */ union { struct { unsigned char picture_type : 2; /* PICTURE_LAYER::PTYPE */ unsigned char frame_coding_mode : 3;/* PICTURE_LAYER::FCM */ unsigned char top_field_first : 1;/* PICTURE_LAYER::TFF */ }; unsigned char picture_fields; }; union { struct { unsigned char mv_type_mb : 1; /* PICTURE::MVTYPEMB */ unsigned char direct_mb : 1; /* PICTURE::DIRECTMB */ unsigned char skip_mb : 1; /* PICTURE::SKIPMB */ unsigned char field_tx : 1; /* PICTURE::FIELDTX */ unsigned char foward_mb : 1; /* PICTURE::FORWARDMB */ unsigned char ac_pred : 1; /* PICTURE::ACPRED */ unsigned char overflags : 1; /* PICTURE::OVERFLAGS */ }; unsigned char raw_coding_flag; }; union { struct { unsigned char reference_distance_flag : 1;/* PICTURE_LAYER::REFDIST_FLAG */ unsigned char reference_distance : 1;/* PICTURE_LAYER::REFDIST */ unsigned char num_reference_pictures: 1;/* PICTURE_LAYER::NUMREF */ unsigned char reference_field_pic_indicator : 1;/* PICTURE_LAYER::REFFIELD */ }; unsigned short reference_fields; }; union { struct { unsigned char mv_mode : 2; /* PICTURE_LAYER::MVMODE */ unsigned char mv_mode2 : 2; /* PICTURE_LAYER::MVMODE2 */ unsigned char mv_table : 3;/* PICTURE_LAYER::MVTAB/IMVTAB */ unsigned char 2mv_block_pattern_table: 2;/* PICTURE_LAYER::2MVBPTAB */ unsigned char four_mv_switch: 1; /* PICTURE_LAYER::4MVSWITCH */ unsigned char 4mv_block_pattern_table : 2;/* PICTURE_LAYER::4MVBPTAB */ unsigned char extended_mv_flag: 1;/* ENTRY_POINT_LAYER::EXTENDED_MV */ unsigned char extended_mv_range : 2;/* PICTURE_LAYER::MVRANGE */ unsigned char extended_dmv_flag : 1;/* ENTRY_POINT_LAYER::EXTENDED_DMV */ unsigned char extended_dmv_range : 2;/* PICTURE_LAYER::DMVRANGE */ }; unsigned int mv_fields; }; union { struct { unsigned char dquant : 2; /* ENTRY_POINT_LAYER::DQUANT */ unsigned char half_qp : 1; /* PICTURE_LAYER::HALFQP */ unsigned char pic_quantizer_scale : 1;/* PICTURE_LAYER::PQUANT */ unsigned char pic_quantizer_type : 1;/* PICTURE_LAYER::PQUANTIZER */ unsigned char dq_frame : 1; /* VOPDQUANT::DQUANTFRM */ unsigned char dq_profile : 1; /* VOPDQUANT::DQPROFILE */ unsigned char dq_binary_level : 1; /* VOPDQUANT::DQBILEVEL */ unsigned char alt_pic_quantizer : 5;/* VOPDQUANT::ALTPQUANT */ }; unsigned short pic_quantizer_fields; }; union { struct { unsigned char variable_sized_transform_flag : 1;/* ENTRY_POINT_LAYER::VSTRANSFORM */ unsigned char mb_level_transform_type_flag : 1;/* PICTURE_LAYER::TTMBF */ unsigned char frame_level_transform_type : 2;/* PICTURE_LAYER::TTFRM */ unsigned char transform_ac_codingset_idx1 : 2;/* PICTURE_LAYER::TRANSACFRM */ unsigned char transform_ac_codingset_idx2 : 2;/* PICTURE_LAYER::TRANSACFRM2 */ unsigned char intra_transform_dc_table : 1;/* PICTURE_LAYER::TRANSDCTAB */ }; unsigned short transform_fields; }; } VAPictureParameterBufferVC1; /* Bitplane Buffer */ Exact format TBD /* Slice Parameter Buffer */ typedef struct _VASliceParameterBufferVC1 { int slice_data_offset;/* the offset to the first byte of slice data */ int macroblock_offset;/* the offset to the first bit of MB from the first byte of slice data */ int slice_vertical_position; } VASliceParameterBufferVC1; /* Slice Data Buffer */ This is simplely a buffer containing raw bit-stream bytes /* Macroblock Parameter Buffer */ /* Residual Data Buffer */ /* Deblocking Parameter Buffer */ /* Buffer functions */ VAStatus vaCreateDecodeBuffer ( VADisplay dpy, VADecodeBufferDescriptor *buf_desc, /* in/out */ void **buffer_pointer, /* out */ ); VAStatus vaDestroyDecodeBuffer ( VADisplay dpy, VADecodeBufferID buffer_id ); /*****************************************************************************/ Render (Decode) Pictures A picture represents either a frame or a field. The Begin/Render/End sequence sends the decode buffers to the accelerator /*****************************************************************************/ /* * Get ready to decode a picture to a target surface */ VAStatus vaBeginPicture ( VADisplay dpy, VAContext *context, VASurface *render_target ); /* * Send decode buffers to the accelerator. * Buffers can be used again after returning from this function. */ VAStatus vaRenderPicture ( VADisplay dpy, VAContext *context, VASurface *surface, VADecodeBufferDescriptor *buffers, int num_buffers ); /* * Make the end of buffers for a picture. * The accelerator should start processing all pending operations for this * surface. This call is non-blocking. The client can start another * Begin/Render/End sequence on a different render target. */ VAStatus vaEndPicture ( VADisplay dpy, VAContext *context, VASurface *render_target ); /*****************************************************************************/ Synchronization /*****************************************************************************/ /* * This function blocks until all pending operations on the render target * have been completed. Upon return it is safe to use the render target for a * different picture. */ VAStatus vaSyncSurface ( VADisplay dpy, VAContext *context, VASurface *render_target ); typedef enum { VASurfaceRendering = 0, VASurfaceReady = 1, } VASurfaceStatus; /* * Find out any pending ops on the render target */ VAStatus vaQuerySurfaceStatus ( VADisplay dpy, VAContext *context, VASurface *render_target, VAPictureStatus *status /* out */ ); /*****************************************************************************/ Sample Program (w/ pseudo code) Mostly to demonstrate program flow with no error handling ... /*****************************************************************************/ /* MPEG-2 VLD decode for a 720x480 frame */ int major_ver, minor_ver; vaInitialize(dpy, &major_ver, &minor_ver); VAEntrypoint entrypoints[5]; int num_entrypoints; vaQueryConfigEntrypoints(dpy, VAProfileMPEG2Main, entrypoints, &num_entrypoints); /* traverse entrypoints arrary to see whether VLD is there */ /* Assuming finding VLD, find out the format for the render target */ VAConfigAttrib attrib; attrib.type = VAConfigAttribRTFormat; vaQueryConfigAttributes(dpy, VAProfileMPEG2Main, VAEntrypointVLD, &attrib, 1); if (attrib.value & VA_RT_FORMAT_YUV420) /* Found desired RT format, keep going */ VAConfigID config_id; vaCreateConfig(dpy, VAProfileMPEG2Main, VAEntrypointVLD, &attrib, 1, &config_id); /* * create surfaces for the current target as well as reference frames * we can get by with 4 surfaces for MPEG-2 */ VASurface surfaces[4]; vaCreateSurfaces(dpy, 720, 480, VA_RT_FORMAT_YUV420, 4, surfaces); /* * Create a context for this decode pipe */ VAContext context; vaCreateContext(dpy, config_id, 720, 480, VA_PROGRESSIVE, surfaces, 4, &context); /* Create a picture parameter buffer for this frame */ VAPictureParameterBufferMPEG2 *picture_param; VADecodeBufferDescriptor picture_param_desc; picture_param_desc.type = VAPictureParameterBufferType; picture_param_desc.size = sizeof(VAPictureParameterBufferMPEG2); vaCreateDecodeBuffer(dpy, picture_param_desc, &picture_param); picture_param->picture_coding_type = 1; /* I-frame */ /* fill in picture_coding_extension fields here */ /* Create a IQ matrix buffer for this frame */ VAIQMatrixBufferMPEG2 *iq_matrix; VADecodeBufferDescriptor iq_matrix_desc; iq_matrix_desc.type = VAIQMatrixBufferType; iq_matrix_desc.size = sizeof(VAIQMatrixBufferMPEG2); vaCreateDecodeBuffer(dpy, iq_matrix_desc, &iq_matrix); /* fill values for IQ_matrix here */ /* send the picture and IQ matrix buffers to the accelerator */ vaBeginPicture(dpy, context, &surfaces[0]); vaRenderPicture(dpy, context, &surfaces[0], &picture_param_desc, 1); vaRenderPicture(dpy, context, &surfaces[0], &iq_matrix_desc, 1); /* * Send slices in this frame to the accelerator. * For MPEG-2, each slice is one row of macroblocks, and * we have 30 slices for a 720x480 frame */ for (int i = 1; i <= 30; i++) { /* Create a slice parameter buffer */ VASliceParameterBufferMPEG2 *slice_param; VADecodeBufferDescriptor slice_param_desc; slice_param_desc.type = VASliceParameterBufferType; slice_param_desc.size = sizeof(VAIQMatrixBufferMPEG2); vaCreateDecodeBuffer(dpy, slice_param_desc, &slice_param); slice_param->slice_data_offset = 0; /* Let's say all slices in this bit-stream has 64-bit header */ slice_param->macroblock_offset = 64; slice_param->vertical_position = i; /* set up the rest based on what is in the slice header ... */ /* send the slice parameter buffer */ vaRenderPicture(dpy, context, &surfaces[0], &slice_param_desc, 1); /* Create a slice data buffer */ unsigned char *slice_data; VADecodeBufferDescriptor slice_data_desc; slice_data_desc.type = VASliceDataBufferType; slice_data_desc.size = x; /* decoder can figure this out */ vaCreateDecodeBuffer(dpy, slice_data_desc, &slice_data); /* decoder will supply the src for the following memcpy */ memcpy(slice_data, slice_data_src, slice_data_desc.size); /* send the slice data buffer */ vaRenderPicture(dpy, context, &surfaces[0], &slice_data_desc, 1); } /* all slices have been sent, mark the end for this frame */ vaEndPicture(dpy, context, &surfaces[0]);