Video Decode Acceleration API Specification (Proposal)

Rev. 0.10
Jonathan Bian <jonathan.bian@intel.com>

/* Revision History */
rev 0.10 (03/20/07 ) - Initial draft


/* Acknowledgement */

 
/*****************************************************************************/ 

Overview 

This is a decode only interface currently.  The basic steps are:

- Negotiate a mutually acceptable configuration with the accelerator to lock
  down profile, entrypoints, and other attributes that will not change on 
  a frame-by-frame basis.
- Create a decode context which represents a "virtualized" hardware decode 
  device
- Get and fill decode buffers with picture level, slice level and macroblock 
  level data (depending on entrypoints)
- Pass the decode buffers to the accelerator to decode the current frame

/*****************************************************************************/

/*****************************************************************************/ 

Initialization & Configuration Management 

- Find out supported profiles
- Find out entrypoints for a given profile
- Find out configuration attributes for a given profile/entrypoint pair
- Create a configuration for use by the decoder

/*****************************************************************************/

typedef void* VADisplay;	/* window system dependent */

typedef enum
{
    VAStatusOK			= 0,
    VAStatusError		= 1,
    /* more error codes need to be defined */
} VAStatus;

/*
 * For X Windows, native_dpy would be from XOpenDisplay() */ 
 */
VADisplay vaGetDisplay (
    NativeDisplay native_dpy	/* implementation specific */
);

VAStatus vaInitialize (
    VADisplay dpy,
    int *major_version,	 /* out */
    int *minor_version 	 /* out */
);

/ *
  * After this call, all library internal resources will be cleaned up
  */ 
VAStatus vaTerminate (
    VADisplay dpy
);

typedef enum
{
    VAProfileMPEG2Simple		= 0,
    VAProfileMPEG2Main			= 1,
    VAProfileMPEG4Simple		= 2,
    VAProfileMPEG4AdvancedSimple	= 3,
    VAProfileMPEG4Main			= 4,
    VAProfileH264Baseline		= 5,
    VAProfileH264Main			= 6,
    VAProfileH264High			= 7,
    VAProfileVC1Simple			= 8,
    VAProfileVC1Main			= 9,
    VAProfileVC1Advanced		= 10,
} VAProfile;

typedef enum
{
    VAEntrypointNone		= 0, /* no entrypoint for this profile */
    VAEntrypointVLD		= 1,
    VAEntrypointIZZ		= 2,
    VAEntrypointIDCT		= 3,
    VAEntrypointMoComp		= 4,
    VAEntrypointDeblocking	= 5,
} VAEntrypoint;

/* Configuration attribute types */
typedef enum
{
    VAConfigAttribRTFormat		= 0,
    VAConfigAttribSpatialResidual	= 1,
    VAConfigAttribSpatialClipping	= 2,
    VAConfigAttribIntraResidual		= 3,
    VAConfigAttribEncryption		= 4,
} VAConfigAttribType;

/* attribute value for VAConfigAttribRTFormat */
#define VA_RT_FORMAT_YUV420	0x00000001
#define VA_RT_FORMAT_YUV422	0x00000002
#define VA_RT_FORMAT_YUV422	0x00000004

/* Configuration attributes */
typedef struct _VAConfigAttrib {
    VAConfigAttribType type;
    unsigned int value; /* OR'd flags (bits) for this attribute */
} VAConfigAttrib;

/* Query entrypoints for a given profile */
VAStatus vaQueryConfigEntrypoints (
    VADisplay dpy,
    VAProfile profile
    VAEntrypoint *entrypoints,	/* out */
    int *num_entrypoints	/* out */
);

/* Query attributes for a given profile/entrypoint pair */
VAStatus vaQueryConfigAttributes (
    VADisplay dpy,
    VAProfile profile,
    VAEntrypoint entrypoint,
    VAConfigAttrib *attrib_list, /* in/out */
    int num_attribs
);

typedef int VAConfigID;

/* 
 * Create a configuration for the decode pipeline 
 * it passes in the attribute list that specifies the attributes it cares 
 * about, with the rest taking default values.  
 */
VAStatus vaCreateConfig (
    vaDisplay dpy,
    VAProfile profile, 
    VAEntrypoint entrypoint, 
    VAConfigAttrib *attrib_list,
    int num_attribs,
    VAConfigID *config_id /* out */
);


/* 
 * Get max number of attributes supported by the accelerator 
 */
VAStatus vaGetMaxNumConfigAttributes (
    VADisplay dpy,
);

/* 
 * Get all attributes for a given configuration 
 */
VAStatus vaGetConfigAttributes (
    VADisplay dpy,
    VAConfigID config_id, 
    VAProfile *profile, 	/* out */
    VAEntrypoint *entrypoint, 	/* out */
    VAConfigAttrib *attrib_list,/* out */
    int *num_attribs		/* out */
);


/*****************************************************************************/ 

Context 

Context represents a "virtual" video decode pipeline

/*****************************************************************************/

/* generic context ID type, can be re-typed for specific implementation */
typedef int VAContextID;

typedef struct _VAContext
{
    VAContextID		context_id; /* to identify this context */
    VAConfigID		config_id;
    unsigned short	picture_width;
    unsigned short	picture_height;
    VASurfaceID		*render_targets;
    int			num_render_targets;	
    int 		flags;
    void		*privData;	 
} VAContext;

    flags - Any combination of the following:
      VA_PROGRESSIVE (only progressive frame pictures in the sequence when set)

VAStatus vaCreateContext (
    VADisplay dpy,
    VAConfigID config_id,
    int picture_width,
    int picture_height,
    int flag,
    VASurface *render_targets,
    int num_render_targets,
    VAContext *context		/* out */
);

VAStatus vaDestroyContext (
    VADisplay dpy,
    VAContext *context
)

/*****************************************************************************/ 

Surface Management 

Surfaces are render targets for a given context. The data in the surfaces 
are not accessible to the client and the internal data format of
the surface is implementatin specific. 

Question: Is there a need to know the data format (fourcc) or just 
differentiate between 420/422/444 is sufficient?

/*****************************************************************************/

typedef int VASurfaceID;

typedef struct _VASurface
{
    VASurfaceID		surface_id; /* uniquely identify this surface */
    VAContextID		context_id; /* which context does this surface belong */
    unsigned short	width;
    unsigned short	height;
    int			format; /* 420/422/444 */
    void		*privData; /* private to the library */
} VASurface;

/* 
 * Surfaces will be bound to a context when the context is created. Once
 * a surface is bound to a given context, it can not be used to create
 * another context. The association is removed when the context is destroyed
 */

/* Surface Functions */
VAStatus vaCreateSurfaces (
    VADisplay dpy,
    int width,
    int height,
    int format,
    int num_surfaces,
    VASurface *surfaces	/* out */
)
/*
 * surfaces can only be destroyed after the context associated has been 
 * destroyed
 */
VAStatus vaDestroySurface (
    VADisplay dpy,
    VASurface *surface_list,
    int num_surfaces
)

/*****************************************************************************/ 

	Decode Buffers 

/*****************************************************************************/

typedef int VADecodeBufferID;

typedef enum
{
    VAPictureParameterBufferType	= 0,
    VAPictureBitPlaneBufferType		= 1,
    VAIQMatrixBufferType		= 2,
    VASliceParameterBufferType		= 3,
    VASliceDataBufferType		= 4,
    VAMacroblockParameterBufferType	= 5,
    VAResidualDataBufferType		= 6,
    VADeblockingParameterBufferType	= 7,
} VADecodeBufferType;

/* Decode buffer descriptor */
typedef struct _VADecodeBufferDescriptor
{
    VADecodeBufferID	buffer_id; /* uniquely identify this buffer */
    VADecodeBufferType	type;
    int			size; /* total size */
    int			num_macroblocks;
    int			mb_x;
    int			mb_y;
    void		*privData; /* additional data such as decryptor */
} VADecodeBufferDescriptor;

/****************************/
 MPEG-2 data structures
/****************************/
 
/* Picture Parameter Buffer */
typedef struct _VAPictureParameterBufferMPEG2
{
    VASurfaceID forward_reference_picture;
    VASurfaceID backward_reference_picture;
    /* meanings of the following fields are the same as in the standard */
    int picture_coding_type;
    int f_code; /* pack all four fcode into this */
    union {
        struct {
            unsigned char intra_dc_precision		: 2; 
            unsigned char picture_structure		: 2; 
            unsigned char top_field_first		: 1; 
            unsigned char frame_pred_frame_dct		: 1; 
            unsigned char concealment_motion_vectors	: 1;
            unsigned char q_scale_type			: 1;
            unsigned char intra_vlc_format		: 1;
            unsigned char alternate_scan		: 1;
            unsigned char int repeat_first_field	: 1;
            unsigned char progressive_frame		: 1;
        };
        unsigned int picture_coding_extension;
    };
} VAPictureParameterBufferMPEG2;

/* Inverse Quantization Matrix Buffer */
typedef struct _VAIQMatrixBufferMPEG2
{
    int load_intra_quantiser_matrix;
    int load_non_intra_quantiser_matrix;
    int load_chroma_intra_quantiser_matrix;
    int load_chroma_non_intra_quantiser_matrix;
    unsigned char intra_quantiser_matrix[64];
    unsigned char non_intra_quantiser_matrix[64];
    unsigned char chroma_intra_quantiser_matrix[64];
    unsigned char chroma_non_intra_quantiser_matrix[64];
} VAIQMatrixBufferMPEG2;

/* Slice Parameter Buffer */
typedef struct _VASliceParameterBufferMPEG2
{
    int slice_data_offset;/* the offset to the first byte of slice data */
    int macroblock_offset;/* the offset to the first bit of MB from the first byte of slice data */
    int slice_vertical_position;
    int quantiser_scale_code;
    int intra_slice_flag;
} VASliceParameterBufferMPEG2;

/****************************/
 VC-1 data structures
/****************************/
 
/* Picture Parameter Buffer */
typedef struct _VAPictureParameterBufferVC1
{
    VASurfaceID forward_reference_picture;
    VASurfaceID backward_reference_picture;
    /* if out-of-loop post-processing is done on the render
       target, then we need to keep the in-loop decoded 
       picture as a reference picture */
    VASurfaceID inloop_decoded_picture;

    unsigned char closed_entry;		/* ENTRY_POINT_LAYER::CLOSED_ENTRY */
    unsigned char broken_link;		/* ENTRY_POINT_LAYER::BROKEN_LINK */
    unsigned char conditional_overlap_flag; /* ENTRY_POINT_LAYER::CONDOVER */
    unsigned char fast_uvmc_flag;	/* ENTRY_POINT_LAYER::FASTUVMC */
    unsigned char b_picture_fraction;	/* PICTURE_LAYER::BFRACTION */
    unsigned char cbp_table;		/* PICTURE_LAYER::CBPTAB/ICBPTAB */
    unsigned char mb_mode_table;	/* PICTURE_LAYER::MBMODETAB */
    unsigned char range_reduction_frame;/* PICTURE_LAYER::RNDCTRL */
    unsigned char rounding_control;	/* PICTURE_LAYER::RNDCTRL */
    unsigned char post_processing;	/* PICTURE_LAYER::POSTPROC */
    unsigned char picture_resolution_index;	/* PICTURE_LAYER::RESPIC */
    unsigned char luma_scale;		/* PICTURE_LAYER::LUMSCALE */
    unsigned char luma_shift;		/* PICTURE_LAYER::LUMSHIFT */
    union {
        struct {
            unsigned char picture_type	: 2; 	/* PICTURE_LAYER::PTYPE */
            unsigned char frame_coding_mode	: 3;/* PICTURE_LAYER::FCM */
            unsigned char top_field_first	: 1;/* PICTURE_LAYER::TFF */
        };
        unsigned char picture_fields;
    };
    union {
       struct {
            unsigned char mv_type_mb	: 1; 	/* PICTURE::MVTYPEMB */
            unsigned char direct_mb	: 1; 	/* PICTURE::DIRECTMB */
            unsigned char skip_mb	: 1; 	/* PICTURE::SKIPMB */
            unsigned char field_tx	: 1; 	/* PICTURE::FIELDTX */
            unsigned char foward_mb	: 1;	/* PICTURE::FORWARDMB */
            unsigned char ac_pred	: 1;	/* PICTURE::ACPRED */
            unsigned char overflags	: 1;	/* PICTURE::OVERFLAGS */
        };
        unsigned char raw_coding_flag;
    };
    union {
        struct {
            unsigned char reference_distance_flag : 1;/* PICTURE_LAYER::REFDIST_FLAG */
            unsigned char reference_distance	: 1;/* PICTURE_LAYER::REFDIST */
            unsigned char num_reference_pictures: 1;/* PICTURE_LAYER::NUMREF */
            unsigned char reference_field_pic_indicator	: 1;/* PICTURE_LAYER::REFFIELD */
        };
        unsigned short reference_fields;
    };
    union {
        struct {
            unsigned char mv_mode	: 2; 	/* PICTURE_LAYER::MVMODE */
            unsigned char mv_mode2	: 2; 	/* PICTURE_LAYER::MVMODE2 */
            unsigned char mv_table	: 3;/* PICTURE_LAYER::MVTAB/IMVTAB */
            unsigned char 2mv_block_pattern_table: 2;/* PICTURE_LAYER::2MVBPTAB */
            unsigned char four_mv_switch: 1; 	/* PICTURE_LAYER::4MVSWITCH */
            unsigned char 4mv_block_pattern_table : 2;/* PICTURE_LAYER::4MVBPTAB */
            unsigned char extended_mv_flag: 1;/* ENTRY_POINT_LAYER::EXTENDED_MV */
            unsigned char extended_mv_range : 2;/* PICTURE_LAYER::MVRANGE */
            unsigned char extended_dmv_flag : 1;/* ENTRY_POINT_LAYER::EXTENDED_DMV */
            unsigned char extended_dmv_range : 2;/* PICTURE_LAYER::DMVRANGE */
        };
        unsigned int mv_fields;
    };
    union {
        struct {
            unsigned char dquant	: 2; 	/* ENTRY_POINT_LAYER::DQUANT */
            unsigned char half_qp	: 1; 	/* PICTURE_LAYER::HALFQP */
            unsigned char pic_quantizer_scale : 1;/* PICTURE_LAYER::PQUANT */
            unsigned char pic_quantizer_type : 1;/* PICTURE_LAYER::PQUANTIZER */
            unsigned char dq_frame	: 1; 	/* VOPDQUANT::DQUANTFRM */
            unsigned char dq_profile	: 1; 	/* VOPDQUANT::DQPROFILE */
            unsigned char dq_binary_level : 1; 	/* VOPDQUANT::DQBILEVEL */
            unsigned char alt_pic_quantizer : 5;/* VOPDQUANT::ALTPQUANT */
        };
        unsigned short pic_quantizer_fields;
    };
    union {
        struct {
            unsigned char variable_sized_transform_flag	: 1;/* ENTRY_POINT_LAYER::VSTRANSFORM */
            unsigned char mb_level_transform_type_flag	: 1;/* PICTURE_LAYER::TTMBF */
            unsigned char frame_level_transform_type	: 2;/* PICTURE_LAYER::TTFRM */
            unsigned char transform_ac_codingset_idx1	: 2;/* PICTURE_LAYER::TRANSACFRM */
            unsigned char transform_ac_codingset_idx2	: 2;/* PICTURE_LAYER::TRANSACFRM2 */
            unsigned char intra_transform_dc_table	: 1;/* PICTURE_LAYER::TRANSDCTAB */
        };
        unsigned short transform_fields;
    };
} VAPictureParameterBufferVC1;

/* Bitplane Buffer */
Exact format TBD

/* Slice Parameter Buffer */
typedef struct _VASliceParameterBufferVC1
{
    int slice_data_offset;/* the offset to the first byte of slice data */
    int macroblock_offset;/* the offset to the first bit of MB from the first byte of slice data */
    int slice_vertical_position;
} VASliceParameterBufferVC1;

/* Slice Data Buffer */
This is simplely a buffer containing raw bit-stream bytes 

/* Macroblock Parameter Buffer */

/* Residual Data Buffer */

/* Deblocking Parameter Buffer */

/* Buffer functions */

VAStatus vaCreateDecodeBuffer (
    VADisplay dpy,
    VADecodeBufferDescriptor *buf_desc,	/* in/out */
    void **buffer_pointer,		/* out */
);

VAStatus vaDestroyDecodeBuffer (
    VADisplay dpy,
    VADecodeBufferID buffer_id
);

/*****************************************************************************/ 

Render (Decode) Pictures

A picture represents either a frame or a field.

The Begin/Render/End sequence sends the decode buffers to the accelerator

/*****************************************************************************/

/*
 * Get ready to decode a picture to a target surface
 */
VAStatus vaBeginPicture (
    VADisplay dpy,
    VAContext *context,
    VASurface *render_target
);

/* 
 * Send decode buffers to the accelerator.
 * Buffers can be used again after returning from this function. 
 */
VAStatus vaRenderPicture (
    VADisplay dpy,
    VAContext *context,
    VASurface *surface,
    VADecodeBufferDescriptor *buffers,
    int num_buffers
);

/* 
 * Make the end of buffers for a picture. 
 * The accelerator should start processing all pending operations for this 
 * surface. This call is non-blocking. The client can start another 
 * Begin/Render/End sequence on a different render target.
 */
VAStatus vaEndPicture (
    VADisplay dpy,
    VAContext *context,
    VASurface *render_target
);

/*****************************************************************************/ 

Synchronization 

/*****************************************************************************/

/* 
 * This function blocks until all pending operations on the render target
 * have been completed.  Upon return it is safe to use the render target for a 
 * different picture. 
 */
VAStatus vaSyncSurface (
    VADisplay dpy,
    VAContext *context,
    VASurface *render_target
);

typedef enum
{
    VASurfaceRendering	= 0,
    VASurfaceReady	= 1,
} VASurfaceStatus;

/*
 * Find out any pending ops on the render target 
 */
VAStatus vaQuerySurfaceStatus (
    VADisplay dpy,
    VAContext *context,
    VASurface *render_target,
    VAPictureStatus *status	/* out */
);

/*****************************************************************************/ 

Sample Program (w/ pseudo code)

Mostly to demonstrate program flow with no error handling ...

/*****************************************************************************/

	/* MPEG-2 VLD decode for a 720x480 frame */

	int major_ver, minor_ver;
	vaInitialize(dpy, &major_ver, &minor_ver);

	VAEntrypoint entrypoints[5];
	int num_entrypoints;
	vaQueryConfigEntrypoints(dpy, VAProfileMPEG2Main, entrypoints, 
	                         &num_entrypoints);

	/* traverse entrypoints arrary to see whether VLD is there */

	/* Assuming finding VLD, find out the format for the render target */
	VAConfigAttrib attrib;
	attrib.type = VAConfigAttribRTFormat;
	vaQueryConfigAttributes(dpy, VAProfileMPEG2Main, VAEntrypointVLD,
                                &attrib, 1);

	if (attrib.value & VA_RT_FORMAT_YUV420)
		/* Found desired RT format, keep going */ 

	VAConfigID config_id;
	vaCreateConfig(dpy, VAProfileMPEG2Main, VAEntrypointVLD, &attrib, 1,
                       &config_id);

	/* 
         * create surfaces for the current target as well as reference frames
	 * we can get by with 4 surfaces for MPEG-2
	 */
	VASurface surfaces[4];
	vaCreateSurfaces(dpy, 720, 480, VA_RT_FORMAT_YUV420, 4, surfaces);

	/* 
         * Create a context for this decode pipe
	 */
	VAContext context;
	vaCreateContext(dpy, config_id, 720, 480, VA_PROGRESSIVE, surfaces,
                        4, &context);

	/* Create a picture parameter buffer for this frame */
	VAPictureParameterBufferMPEG2 *picture_param;
	VADecodeBufferDescriptor picture_param_desc;
	picture_param_desc.type = VAPictureParameterBufferType;
	picture_param_desc.size = sizeof(VAPictureParameterBufferMPEG2);
	vaCreateDecodeBuffer(dpy, picture_param_desc, &picture_param);
	picture_param->picture_coding_type = 1; /* I-frame */	
	/* fill in picture_coding_extension fields here */

	/* Create a IQ matrix buffer for this frame */
	VAIQMatrixBufferMPEG2 *iq_matrix;
	VADecodeBufferDescriptor iq_matrix_desc;
	iq_matrix_desc.type = VAIQMatrixBufferType;
	iq_matrix_desc.size = sizeof(VAIQMatrixBufferMPEG2);
	vaCreateDecodeBuffer(dpy, iq_matrix_desc, &iq_matrix);
	/* fill values for IQ_matrix here */

	/* send the picture and IQ matrix buffers to the accelerator */
	vaBeginPicture(dpy, context, &surfaces[0]);

	vaRenderPicture(dpy, context, &surfaces[0], &picture_param_desc, 1);
	vaRenderPicture(dpy, context, &surfaces[0], &iq_matrix_desc, 1);

	/* 
         * Send slices in this frame to the accelerator.
         * For MPEG-2, each slice is one row of macroblocks, and
         * we have 30 slices for a 720x480 frame 
         */
	for (int i = 1; i <= 30; i++) {

		/* Create a slice parameter buffer */
		VASliceParameterBufferMPEG2 *slice_param;
		VADecodeBufferDescriptor slice_param_desc;
		slice_param_desc.type = VASliceParameterBufferType;
		slice_param_desc.size = sizeof(VAIQMatrixBufferMPEG2);
		vaCreateDecodeBuffer(dpy, slice_param_desc, &slice_param);

		slice_param->slice_data_offset = 0;
		/* Let's say all slices in this bit-stream has 64-bit header */
		slice_param->macroblock_offset = 64; 
		slice_param->vertical_position = i;
		/* set up the rest based on what is in the slice header ... */

		/* send the slice parameter buffer */
		vaRenderPicture(dpy, context, &surfaces[0], &slice_param_desc, 1);

		/* Create a slice data buffer */
		unsigned char *slice_data;
		VADecodeBufferDescriptor slice_data_desc;
		slice_data_desc.type = VASliceDataBufferType;
		slice_data_desc.size = x; /* decoder can figure this out */
		vaCreateDecodeBuffer(dpy, slice_data_desc, &slice_data);
		/* decoder will supply the src for the following memcpy */
		memcpy(slice_data, slice_data_src, slice_data_desc.size);

		/* send the slice data buffer */
		vaRenderPicture(dpy, context, &surfaces[0], &slice_data_desc, 1);
	}

	/* all slices have been sent, mark the end for this frame */
	vaEndPicture(dpy, context, &surfaces[0]);