xf86-video-intel: 9 commits - src/intel_module.c src/render_program/exa_sf.g5a src/render_program/exa_sf.g5b src/render_program/exa_sf_mask.g5a src/render_program/exa_sf_mask.g5b src/render_program/exa_wm_ca.g5a src/render_program/exa_wm_ca.g5b src/render_program/exa_wm_ca_srcalpha.g5a src/render_program/exa_wm_ca_srcalpha.g5b src/render_program/exa_wm_mask_affine.g5a src/render_program/exa_wm_mask_affine.g5b src/render_program/exa_wm_mask_projective.g5a src/render_program/exa_wm_mask_projective.g5b src/render_program/exa_wm_mask_sample_a.g5a src/render_program/exa_wm_mask_sample_a.g5b src/render_program/exa_wm_mask_sample_argb.g5a src/render_program/exa_wm_mask_sample_argb.g5b src/render_program/exa_wm_noca.g5a src/render_program/exa_wm_noca.g5b src/render_program/exa_wm_src_affine.g5a src/render_program/exa_wm_src_affine.g5b src/render_program/exa_wm_src_projective.g5a src/render_program/exa_wm_src_projective.g5b src/render_program/exa_wm_src_sample_a.g5a src/rende r_program/exa_wm_src_sample_a.g5b src/render_program/exa_wm_src_sample_argb.g5a src/render_program/exa_wm_src_sample_argb.g5b src/render_program/exa_wm_src_sample_planar.g5a src/render_program/exa_wm_src_sample_planar.g5b src/render_program/exa_wm_write.g5a src/render_program/exa_wm_write.g5b src/render_program/exa_wm_xy.g5a src/render_program/exa_wm_xy.g5b src/render_program/exa_wm_yuv_rgb.g5a src/render_program/exa_wm_yuv_rgb.g5b src/render_program/Makefile.am src/sna/gen4_render.c src/sna/gen5_render.c src/sna/kgem.c src/sna/sna_accel.c src/sna/sna.h src/sna/sna_io.c src/sna/sna_video.c

Chris Wilson ickle at kemper.freedesktop.org
Thu Nov 24 14:08:23 PST 2011


 src/intel_module.c                              |    2 
 src/render_program/Makefile.am                  |   78 +++++++++++-
 src/render_program/exa_sf.g5a                   |   99 ++++++++++++++++
 src/render_program/exa_sf.g5b                   |    7 +
 src/render_program/exa_sf_mask.g5a              |   99 ++++++++++++++++
 src/render_program/exa_sf_mask.g5b              |    7 +
 src/render_program/exa_wm_ca.g5a                |   38 ++++++
 src/render_program/exa_wm_ca.g5b                |    4 
 src/render_program/exa_wm_ca_srcalpha.g5a       |   37 ++++++
 src/render_program/exa_wm_ca_srcalpha.g5b       |    4 
 src/render_program/exa_wm_mask_affine.g5a       |   31 +++++
 src/render_program/exa_wm_mask_affine.g5b       |    4 
 src/render_program/exa_wm_mask_projective.g5a   |   53 ++++++++
 src/render_program/exa_wm_mask_projective.g5b   |   16 ++
 src/render_program/exa_wm_mask_sample_a.g5a     |   49 ++++++++
 src/render_program/exa_wm_mask_sample_a.g5b     |    3 
 src/render_program/exa_wm_mask_sample_argb.g5a  |   49 ++++++++
 src/render_program/exa_wm_mask_sample_argb.g5b  |    3 
 src/render_program/exa_wm_noca.g5a              |   38 ++++++
 src/render_program/exa_wm_noca.g5b              |    4 
 src/render_program/exa_wm_src_affine.g5a        |   31 +++++
 src/render_program/exa_wm_src_affine.g5b        |    4 
 src/render_program/exa_wm_src_projective.g5a    |   49 ++++++++
 src/render_program/exa_wm_src_projective.g5b    |   16 ++
 src/render_program/exa_wm_src_sample_a.g5a      |   48 +++++++
 src/render_program/exa_wm_src_sample_a.g5b      |    3 
 src/render_program/exa_wm_src_sample_argb.g5a   |   46 +++++++
 src/render_program/exa_wm_src_sample_argb.g5b   |    2 
 src/render_program/exa_wm_src_sample_planar.g5a |   32 +++++
 src/render_program/exa_wm_src_sample_planar.g5b |    5 
 src/render_program/exa_wm_write.g5a             |   55 +++++++++
 src/render_program/exa_wm_write.g5b             |    6 
 src/render_program/exa_wm_xy.g5a                |   52 ++++++++
 src/render_program/exa_wm_xy.g5b                |    4 
 src/render_program/exa_wm_yuv_rgb.g5a           |   98 ++++++++++++++++
 src/render_program/exa_wm_yuv_rgb.g5b           |   12 +
 src/sna/gen4_render.c                           |    2 
 src/sna/gen5_render.c                           |  146 ++++++++++++------------
 src/sna/kgem.c                                  |    6 
 src/sna/sna.h                                   |    1 
 src/sna/sna_accel.c                             |  144 ++++++++++++++++++-----
 src/sna/sna_io.c                                |    2 
 src/sna/sna_video.c                             |    3 
 43 files changed, 1272 insertions(+), 120 deletions(-)

New commits:
commit 394ad51c421f41e4bc6545fcee10b1757cb5f518
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Nov 24 17:35:36 2011 +0000

    sna/video: Check for Xv presence in the server before initialising
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c
index 068b803..bd5ff14 100644
--- a/src/sna/sna_video.c
+++ b/src/sna/sna_video.c
@@ -493,6 +493,9 @@ void sna_video_init(struct sna *sna, ScreenPtr screen)
 	int prefer_overlay =
 	    xf86ReturnOptValBool(sna->Options, OPTION_PREFER_OVERLAY, FALSE);
 
+	if (!xf86LoaderCheckSymbol("xf86XVListGenericAdaptors"))
+		return;
+
 	num_adaptors = xf86XVListGenericAdaptors(sna->scrn, &adaptors);
 	newAdaptors =
 	    malloc((num_adaptors + 2) * sizeof(XF86VideoAdaptorPtr *));
commit a2fab9560ec5dd6efc352917fe7626e59bed69e3
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Nov 24 17:14:08 2011 +0000

    sna: Downgrade Y-tiling if we must use the BLT
    
    There are many operations, usually the core drawing acceleration, where
    the BLT is much more preferable than using the CPU. However, the BLT is
    limited to only using X-tiling, so if we encounter a Y-tiled pixmap
    target we need to recreate it as X-tiling before proceeding. Hopefully,
    the pixmap is then kept around and rendered multiple times to amoritize
    the cost of the copy.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index afc9256..1a35c4d 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -252,6 +252,41 @@ static uint32_t sna_pixmap_choose_tiling(PixmapPtr pixmap, uint32_t tiling)
 				  pixmap->drawable.bitsPerPixel);
 }
 
+static bool sna_pixmap_change_tiling(PixmapPtr pixmap, uint32_t tiling)
+{
+	struct sna_pixmap *priv = sna_pixmap(pixmap);
+	struct sna *sna = to_sna_from_pixmap(pixmap);
+	struct kgem_bo *bo;
+	BoxRec box;
+
+	if (priv->pinned)
+		return false;
+
+	bo = kgem_create_2d(&sna->kgem,
+			    pixmap->drawable.width,
+			    pixmap->drawable.height,
+			    pixmap->drawable.bitsPerPixel,
+			    tiling, 0);
+	if (bo == NULL)
+		return false;
+
+	box.x1 = box.y1 = 0;
+	box.x2 = pixmap->drawable.width;
+	box.y2 = pixmap->drawable.height;
+
+	if (!sna->render.copy_boxes(sna, GXcopy,
+				    pixmap, priv->gpu_bo, 0, 0,
+				    pixmap, bo, 0, 0,
+				    &box, 1)) {
+		kgem_bo_destroy(&sna->kgem, bo);
+		return false;
+	}
+
+	kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
+	priv->gpu_bo = bo;
+	return true;
+}
+
 static inline void sna_set_pixmap(PixmapPtr pixmap, struct sna_pixmap *sna)
 {
 	dixSetPrivate(&pixmap->devPrivates, &sna_pixmap_index, sna);
@@ -1474,8 +1509,10 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 				     &damage))
 		return false;
 
-	if (priv->gpu_bo->tiling == I915_TILING_Y)
-		return false;
+	if (priv->gpu_bo->tiling == I915_TILING_Y) {
+		if (!sna_pixmap_change_tiling(pixmap, I915_TILING_X))
+			return false;
+	}
 
 	assert_pixmap_contains_box(pixmap, RegionExtents(region));
 	if (damage)
@@ -1592,8 +1629,12 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 				     &damage))
 		return false;
 
-	if (bo->tiling == I915_TILING_Y)
-		return false;
+	if (bo->tiling == I915_TILING_Y) {
+		if (!sna_pixmap_change_tiling(pixmap, I915_TILING_X))
+			return false;
+
+		bo = priv->gpu_bo;
+	}
 
 	assert_pixmap_contains_box(pixmap, RegionExtents(region));
 	if (damage)
@@ -3208,7 +3249,8 @@ sna_copy_plane(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 
 	if (sna_drawable_use_gpu_bo(dst, &region.extents, &damage)) {
 		struct sna_pixmap *priv = sna_pixmap(pixmap);
-		if (priv->gpu_bo->tiling != I915_TILING_Y) {
+		if (priv->gpu_bo->tiling != I915_TILING_Y ||
+		    sna_pixmap_change_tiling(pixmap, I915_TILING_X)) {
 			RegionUninit(&region);
 			return miDoCopy(src, dst, gc,
 					src_x, src_y,
@@ -6819,8 +6861,17 @@ sna_poly_fill_rect_stippled_blt(DrawablePtr drawable,
 
 	PixmapPtr stipple = gc->stipple;
 
-	if (bo->tiling == I915_TILING_Y)
-		return false;
+	if (bo->tiling == I915_TILING_Y) {
+		PixmapPtr pixmap = get_drawable_pixmap(drawable);
+
+		/* This is cheating, but only the gpu_bo can be tiled */
+		assert(bo == sna_pixmap(pixmap)->gpu_bo);
+
+		if (!sna_pixmap_change_tiling(pixmap, I915_TILING_X))
+			return false;
+
+		bo = sna_pixmap(pixmap)->gpu_bo;
+	}
 
 	sna_drawable_move_to_cpu(&stipple->drawable, false);
 
@@ -7121,8 +7172,11 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc,
 
 	bo = priv->gpu_bo;
 	if (bo->tiling == I915_TILING_Y) {
-		DBG(("%s -- fallback, dst uses Y-tiling\n", __FUNCTION__));
-		return false;
+		if (!sna_pixmap_change_tiling(pixmap, I915_TILING_X)) {
+			DBG(("%s -- fallback, dst uses Y-tiling\n", __FUNCTION__));
+			return false;
+		}
+		bo = priv->gpu_bo;
 	}
 
 	get_drawable_deltas(drawable, pixmap, &dx, &dy);
@@ -7664,8 +7718,10 @@ sna_reversed_glyph_blt(DrawablePtr drawable, GCPtr gc,
 	uint8_t rop = transparent ? copy_ROP[gc->alu] : ROP_S;
 
 	if (priv->gpu_bo->tiling == I915_TILING_Y) {
-		DBG(("%s -- fallback, dst uses Y-tiling\n", __FUNCTION__));
-		return false;
+		if (!sna_pixmap_change_tiling(pixmap, I915_TILING_X)) {
+			DBG(("%s -- fallback, dst uses Y-tiling\n", __FUNCTION__));
+			return false;
+		}
 	}
 
 	get_drawable_deltas(drawable, pixmap, &dx, &dy);
@@ -7949,8 +8005,10 @@ sna_push_pixels_solid_blt(GCPtr gc,
 	int n;
 	uint8_t rop = copy_ROP[gc->alu];
 
-	if (priv->gpu_bo->tiling == I915_TILING_Y)
-		return false;
+	if (priv->gpu_bo->tiling == I915_TILING_Y) {
+		if (!sna_pixmap_change_tiling(pixmap, I915_TILING_X))
+			return false;
+	}
 
 	if (!sna_drawable_use_gpu_bo(drawable, &region->extents, &damage))
 		return false;
commit e0a4492c8b51d6ae09cde0b895da6177284e95e0
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Nov 24 17:00:18 2011 +0000

    sna: Use Y-tiling for source pixmaps
    
    Y-tiling is slightly faster with RENDER operations, so attempt to
    allocate source-only pixmaps using this tiling mode. Actually using
    Y-tiling is a delicate balance because it then prevents the use of the
    BLT. For instance, enabling Y-tiling by default gives a 30% performance
    improvement on the fish-demo (compositing benchmark) at 2560x1440 on
    Ironlake but regresses tiger-demo by 2x (spans benchmark).
    
    So experiment with this compromise and allow for changing the default
    tiling.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/intel_module.c b/src/intel_module.c
index 80b5da8..f452e1b 100644
--- a/src/intel_module.c
+++ b/src/intel_module.c
@@ -56,7 +56,7 @@ static const struct intel_device_info intel_i855_info = {
 	.gen = 21,
 };
 static const struct intel_device_info intel_i865_info = {
-	.gen = 21,
+	.gen = 22,
 };
 
 static const struct intel_device_info intel_i915_info = {
diff --git a/src/sna/sna.h b/src/sna/sna.h
index 9492fc2..4377774 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -278,6 +278,7 @@ struct sna {
 		struct gen7_render_state gen7;
 	} render_state;
 	uint32_t have_render;
+	uint32_t default_tiling;
 
 	Bool directRenderingOpen;
 	char *deviceName;
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 0d5c0bf..afc9256 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -212,15 +212,31 @@ static Bool sna_destroy_private(PixmapPtr pixmap, struct sna_pixmap *priv)
 	return true;
 }
 
-static uint32_t sna_pixmap_choose_tiling(PixmapPtr pixmap)
+static inline uint32_t default_tiling(PixmapPtr pixmap)
+{
+	struct sna_pixmap *priv = sna_pixmap(pixmap);
+	struct sna *sna = to_sna_from_pixmap(pixmap);
+
+	/* Try to avoid hitting the Y-tiling GTT mapping bug on 855GM */
+	if (sna->kgem.gen == 21)
+		return I915_TILING_X;
+
+	return sna_damage_is_all(&priv->cpu_damage,
+				 pixmap->drawable.width,
+				 pixmap->drawable.height) ? I915_TILING_Y : sna->default_tiling;
+}
+
+static uint32_t sna_pixmap_choose_tiling(PixmapPtr pixmap, uint32_t tiling)
 {
 	struct sna *sna = to_sna_from_pixmap(pixmap);
-	uint32_t tiling, bit;
+	uint32_t bit;
 
 	/* Use tiling by default, but disable per user request */
-	tiling = I915_TILING_X;
-	bit = pixmap->usage_hint == SNA_CREATE_FB ?
-		SNA_TILING_FB : SNA_TILING_2D;
+	if (pixmap->usage_hint == SNA_CREATE_FB) {
+		tiling = I915_TILING_X;
+		bit = SNA_TILING_FB;
+	} else
+		bit = SNA_TILING_2D;
 	if ((sna->tiling && (1 << bit)) == 0)
 		tiling = I915_TILING_NONE;
 
@@ -282,7 +298,7 @@ struct sna_pixmap *sna_pixmap_attach(PixmapPtr pixmap)
 					pixmap->drawable.width,
 					pixmap->drawable.height,
 					pixmap->drawable.bitsPerPixel,
-					sna_pixmap_choose_tiling(pixmap)))
+					I915_TILING_NONE))
 			return NULL;
 		break;
 	}
@@ -979,7 +995,8 @@ sna_pixmap_force_to_gpu(PixmapPtr pixmap)
 					      pixmap->drawable.width,
 					      pixmap->drawable.height,
 					      pixmap->drawable.bitsPerPixel,
-					      sna_pixmap_choose_tiling(pixmap),
+					      sna_pixmap_choose_tiling(pixmap,
+								       default_tiling(pixmap)),
 					      flags);
 		if (priv->gpu_bo == NULL)
 			return NULL;
@@ -1017,7 +1034,8 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap)
 					       pixmap->drawable.width,
 					       pixmap->drawable.height,
 					       pixmap->drawable.bitsPerPixel,
-					       sna_pixmap_choose_tiling(pixmap),
+					       sna_pixmap_choose_tiling(pixmap,
+									default_tiling(pixmap)),
 					       priv->cpu_damage ? CREATE_INACTIVE : 0);
 		if (priv->gpu_bo == NULL) {
 			assert(list_is_empty(&priv->list));
@@ -1950,7 +1968,8 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 	if (dst_priv && dst_priv->gpu_bo == NULL &&
 	    src_priv && src_priv->gpu_bo != NULL) {
 		uint32_t tiling =
-			sna_pixmap_choose_tiling(dst_pixmap);
+			sna_pixmap_choose_tiling(dst_pixmap,
+						 src_priv->gpu_bo->tiling);
 
 		DBG(("%s: create dst GPU bo for upload\n", __FUNCTION__));
 
@@ -8507,6 +8526,7 @@ Bool sna_accel_init(ScreenPtr screen, struct sna *sna)
 
 	backend = "no";
 	sna->have_render = false;
+	sna->default_tiling = I915_TILING_X;
 	no_render_init(sna);
 
 #if !DEBUG_NO_RENDER
diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index 04366be..2930713 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -42,6 +42,8 @@
 
 #define PITCH(x, y) ALIGN((x)*(y), 4)
 
+/* XXX Need to avoid using GTT fenced access for I915_TILING_Y on 855GM */
+
 static void read_boxes_inplace(struct kgem *kgem,
 			       struct kgem_bo *bo, int16_t src_dx, int16_t src_dy,
 			       PixmapPtr pixmap, int16_t dst_dx, int16_t dst_dy,
commit 7e4a1b7ed25a881113e197acde62b07f26f2d6ef
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Nov 24 11:34:51 2011 +0000

    src/sna/gen5: Replace the precompiled shaders
    
    Take advantage of a couple of new instructions introduced with Cantiga
    to reduce the instruction count inside the shaders and improve
    performance by around 10% in the fish-demo.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/render_program/Makefile.am b/src/render_program/Makefile.am
index c70b1f7..1704036 100644
--- a/src/render_program/Makefile.am
+++ b/src/render_program/Makefile.am
@@ -15,14 +15,16 @@ INTEL_G4A =				\
 	exa_wm_ca_srcalpha.g4a		\
 	exa_wm_write.g4a 		\
 	exa_wm_yuv_rgb.g4a		\
-	exa_wm_xy.g4a
+	exa_wm_xy.g4a			\
+	$(NULL)
 
 INTEL_G4I =				\
 	exa_wm.g4i			\
 	exa_wm_affine.g4i		\
 	exa_wm_projective.g4i		\
 	exa_wm_sample_planar.g4i	\
-	exa_wm_src_sample_argb.g4i
+	exa_wm_src_sample_argb.g4i	\
+	$(NULL)
 
 INTEL_G4B = 				\
 	exa_sf.g4b 			\
@@ -41,7 +43,8 @@ INTEL_G4B = 				\
 	exa_wm_ca_srcalpha.g4b		\
 	exa_wm_write.g4b 		\
 	exa_wm_yuv_rgb.g4b		\
-	exa_wm_xy.g4b
+	exa_wm_xy.g4b			\
+	$(NULL)
 
 
 INTEL_G4B_GEN5 = 				\
@@ -61,11 +64,53 @@ INTEL_G4B_GEN5 = 				\
 	exa_wm_ca_srcalpha.g4b.gen5		\
 	exa_wm_write.g4b.gen5 			\
 	exa_wm_yuv_rgb.g4b.gen5			\
-	exa_wm_xy.g4b.gen5
+	exa_wm_xy.g4b.gen5			\
+	$(NULL)
+
+INTEL_G5A =				\
+	exa_sf.g5a 			\
+	exa_sf_mask.g5a 		\
+	exa_wm_src_affine.g5a 		\
+	exa_wm_src_projective.g5a 	\
+	exa_wm_src_sample_argb.g5a 	\
+	exa_wm_src_sample_a.g5a 	\
+	exa_wm_src_sample_planar.g5a 	\
+	exa_wm_mask_affine.g5a 		\
+	exa_wm_mask_projective.g5a 	\
+	exa_wm_mask_sample_argb.g5a 	\
+	exa_wm_mask_sample_a.g5a 	\
+	exa_wm_noca.g5a			\
+	exa_wm_ca.g5a			\
+	exa_wm_ca_srcalpha.g5a		\
+	exa_wm_write.g5a 		\
+	exa_wm_yuv_rgb.g5a		\
+	exa_wm_xy.g5a			\
+	$(NULL)
+
+INTEL_G5B = 				\
+	exa_sf.g5b 			\
+	exa_sf_mask.g5b 		\
+	exa_wm_src_affine.g5b 		\
+	exa_wm_src_projective.g5b 	\
+	exa_wm_src_sample_argb.g5b 	\
+	exa_wm_src_sample_a.g5b 	\
+	exa_wm_src_sample_planar.g5b 	\
+	exa_wm_mask_affine.g5b 		\
+	exa_wm_mask_projective.g5b 	\
+	exa_wm_mask_sample_argb.g5b 	\
+	exa_wm_mask_sample_a.g5b 	\
+	exa_wm_noca.g5b			\
+	exa_wm_ca.g5b			\
+	exa_wm_ca_srcalpha.g5b		\
+	exa_wm_write.g5b 		\
+	exa_wm_yuv_rgb.g5b		\
+	exa_wm_xy.g5b			\
+	$(NULL)
 
 INTEL_G6I =				\
 	exa_wm_affine.g6i		\
-	exa_wm_write.g6i
+	exa_wm_write.g6i		\
+	$(NULL)
 
 INTEL_G6A =				\
 	exa_wm_src_affine.g6a 		\
@@ -81,7 +126,8 @@ INTEL_G6A =				\
 	exa_wm_ca_srcalpha.g6a		\
 	exa_wm_noca.g6a			\
 	exa_wm_write.g6a 		\
-	exa_wm_yuv_rgb.g6a
+	exa_wm_yuv_rgb.g6a		\
+	$(NULL)
 
 INTEL_G6B =				\
 	exa_wm_src_affine.g6b 		\
@@ -97,7 +143,8 @@ INTEL_G6B =				\
 	exa_wm_ca_srcalpha.g6b		\
 	exa_wm_noca.g6b			\
 	exa_wm_write.g6b 		\
-	exa_wm_yuv_rgb.g6b
+	exa_wm_yuv_rgb.g6b		\
+	$(NULL)
 
 INTEL_G7A =				\
 	exa_wm_mask_affine.g7a		\
@@ -110,7 +157,8 @@ INTEL_G7A =				\
 	exa_wm_src_sample_argb.g7a 	\
 	exa_wm_src_sample_planar.g7a 	\
 	exa_wm_write.g7a 		\
-	exa_wm_yuv_rgb.g7a
+	exa_wm_yuv_rgb.g7a		\
+	$(NULL)
 
 INTEL_G7B =				\
 	exa_wm_mask_affine.g7b		\
@@ -123,13 +171,16 @@ INTEL_G7B =				\
 	exa_wm_src_sample_argb.g7b 	\
 	exa_wm_src_sample_planar.g7b 	\
 	exa_wm_write.g7b 		\
-	exa_wm_yuv_rgb.g7b
+	exa_wm_yuv_rgb.g7b		\
+	$(NULL)
 
 EXTRA_DIST = 		\
 	$(INTEL_G4A)	\
 	$(INTEL_G4I)	\
 	$(INTEL_G4B)	\
 	$(INTEL_G4B_GEN5)\
+	$(INTEL_G5A)	\
+	$(INTEL_G5B)	\
 	$(INTEL_G6A)	\
 	$(INTEL_G6B)	\
 	$(INTEL_G6I)	\
@@ -138,10 +189,13 @@ EXTRA_DIST = 		\
 
 if HAVE_GEN4ASM
 
-SUFFIXES = .g4a .g4b .g6a .g6b .g7a .g7b
+SUFFIXES = .g4a .g4b .g5a .g5b .g6a .g6b .g7a .g7b
 .g4a.g4b:
 	m4 -I$(srcdir) -s $< > $*.g4m && intel-gen4asm -o $@ $*.g4m && intel-gen4asm -g 5 -o $@.gen5 $*.g4m && rm $*.g4m
 
+.g5a.g5b:
+	m4 -I$(srcdir) -s $< > $*.g5m && intel-gen4asm -g 5 -o $@ $*.g5m && rm $*.g5m
+
 .g6a.g6b:
 	m4 -I$(srcdir) -s $< > $*.g6m && intel-gen4asm -g 6 -o $@ $*.g6m && rm $*.g6m
 
@@ -149,13 +203,15 @@ SUFFIXES = .g4a .g4b .g6a .g6b .g7a .g7b
 	m4 -I$(srcdir) -s $< > $*.g7m && intel-gen4asm -g 7 -o $@ $*.g7m && rm $*.g7m
 
 $(INTEL_G4B): $(INTEL_G4I)
+$(INTEL_G5B): $(INTEL_G4I)
 $(INTEL_G6B): $(INTEL_G4I) $(INTEL_G6I)
 $(INTEL_G7B): $(INTEL_G4I) $(INTEL_G6I)
 
-BUILT_SOURCES= $(INTEL_G4B) $(INTEL_G6B) $(INTEL_G7B)
+BUILT_SOURCES= $(INTEL_G4B) $(INTEL_G5B) $(INTEL_G6B) $(INTEL_G7B)
 
 clean-local:
 	-rm -f $(INTEL_G4B) $(INTEL_G4B_GEN5)
+	-rm -f $(INTEL_G5B)
 	-rm -f $(INTEL_G6B)
 	-rm -f $(INTEL_G7B)
 endif
diff --git a/src/render_program/exa_sf.g5a b/src/render_program/exa_sf.g5a
new file mode 100644
index 0000000..a5dd0ee
--- /dev/null
+++ b/src/render_program/exa_sf.g5a
@@ -0,0 +1,99 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Packard <keithp at keithp.com>
+ *    Eric Anholt <eric at anholt.net>
+ *
+ */
+
+/*
+ * Inputs (note all sub-register addresses are bytes, not float indices)
+ *
+ * Note that the vertices will have been reordered:
+ *
+ * V0 is topmost (leftmost among topmost) (upper left)
+ * V1 is next clockwise (lower right)
+ * V2 is remaining (lower left)
+ *
+ *  V0 ...................... XX
+ *  |                          .
+ *  |                          .
+ *  |                          .
+ *  V2------------------------V1
+ *
+ *  G0	    thread state -- just pass along
+ *
+ *  G1 and G2 are fixed by SF spec
+ *
+ *  G1.0    reserved
+ *  G1.4    Provoking vertex
+ *  G1.8    Determinant
+ *  G1.12   X1 - X0
+ *  G1.16   X2 - X0
+ *  G1.20   Y1 - Y0
+ *  G1.24   Y2 - Y0
+ *  G1.30   reserved
+ *
+ *  G2.0    Z0
+ *  G2.4    1/W0
+ *  G2.8    Z1
+ *  G2.12   1/W1
+ *  G2.16   Z2
+ *  G2.20   1/W2
+ *  G2.24   reserved
+ *  G2.30   reserved
+ *
+ *  G3 is V0 Vertex Attribute Data from URB (upper left)
+ *
+ *  G3.0    u0
+ *  G3.4    v0
+ *
+ *  G4 is V1 Vertex Attribute Data from URB (lower right)
+ *
+ *  G4.0    u1
+ *  G4.4    v1
+ *
+ *  G5 is V2 Vertex Attribute Data from URB (lower left)
+ *
+ */
+
+/* Compute inverses of the input deltas */
+send (4) 0 g6<1>F g1.12<4,4,1>F math inv mlen 1 rlen 1 { align1 };
+
+/* texture location at V0 */
+mov (4) m3<1>F g3<4,4,1>F { align1 };
+
+/* compute V1 - V2 (motion in X) for texture coordinates */
+add (4) g7<1>F g4<4,4,1>F -g5<4,4,1>F { align1 };
+
+/* multiply by 1/dx */
+mul (4) m1<1>F g7<4,4,1>F g6.0<0,1,0>F { align1 };
+
+/* Compute V2 - V0 (motion in Y) for texture coordinates */
+add (4) g7<1>F g5<4,4,1>F -g3<4,4,1>F { align1 };
+
+/* multiply by 1/dy */
+mul (4) m2<1>F g7<4,4,1>F g6.8<0,1,0>F {align1 };
+
+/* and we're done */
+send (8) 0 null g0<8,8,1>F urb 0 transpose used complete mlen 4 rlen 0 { align1 EOT };
diff --git a/src/render_program/exa_sf.g5b b/src/render_program/exa_sf.g5b
new file mode 100644
index 0000000..d1035ae
--- /dev/null
+++ b/src/render_program/exa_sf.g5b
@@ -0,0 +1,7 @@
+   { 0x00400031, 0x20c01fbd, 0x1069002c, 0x02100001 },
+   { 0x00400001, 0x206003be, 0x00690060, 0x00000000 },
+   { 0x00400040, 0x20e077bd, 0x00690080, 0x006940a0 },
+   { 0x00400041, 0x202077be, 0x006900e0, 0x000000c0 },
+   { 0x00400040, 0x20e077bd, 0x006900a0, 0x00694060 },
+   { 0x00400041, 0x204077be, 0x006900e0, 0x000000c8 },
+   { 0x00600031, 0x20001fbc, 0x648d0000, 0x8808c800 },
diff --git a/src/render_program/exa_sf_mask.g5a b/src/render_program/exa_sf_mask.g5a
new file mode 100644
index 0000000..b4e984c
--- /dev/null
+++ b/src/render_program/exa_sf_mask.g5a
@@ -0,0 +1,99 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Packard <keithp at keithp.com>
+ *    Eric Anholt <eric at anholt.net>
+ *    Wang Zhenyu <zhenyu.z.wang at intel.com>
+ */
+
+/*
+ * Inputs (note all sub-register addresses are bytes, not float indices)
+ *
+ * Note that the vertices will have been reordered:
+ *
+ * V0 is topmost (leftmost among topmost) (upper left)
+ * V1 is next clockwise (lower right)
+ * V2 is remaining (lower left)
+ *
+ *  V0 ...................... XX
+ *  |                          .
+ *  |                          .
+ *  |                          .
+ *  V2------------------------V1
+ *
+ *  G0	    thread state -- just pass along
+ *
+ *  G1 and G2 are fixed by SF spec
+ *
+ *  G1.0    reserved
+ *  G1.4    Provoking vertex
+ *  G1.8    Determinant
+ *  G1.12   X1 - X0
+ *  G1.16   X2 - X0
+ *  G1.20   Y1 - Y0
+ *  G1.24   Y2 - Y0
+ *  G1.30   reserved
+ *
+ *  G2.0    Z0
+ *  G2.4    1/W0
+ *  G2.8    Z1
+ *  G2.12   1/W1
+ *  G2.16   Z2
+ *  G2.20   1/W2
+ *  G2.24   reserved
+ *  G2.30   reserved
+ *
+ *  G3 is V0 Vertex Attribute Data from URB (upper left)
+ *
+ *  G3.0    u0
+ *  G3.4    v0
+ *
+ *  G4 is V1 Vertex Attribute Data from URB (lower right)
+ *
+ *  G4.0    u1
+ *  G4.4    v1
+ *
+ *  G5 is V2 Vertex Attribute Data from URB (lower left)
+ *
+ */
+
+/* Compute inverses of the input deltas */
+send (4) 0 g6<1>F g1.12<4,4,1>F math inv mlen 1 rlen 1 { align1 };
+
+/* texture location at V0 */
+mov (8) m3<1>F g3<8,8,1>F { align1 };
+
+/* compute V1 - V2 (motion in X) for texture coordinates */
+add (8) g7<1>F g4<8,8,1>F -g5<8,8,1>F { align1 };
+
+/* multiply by 1/dx */
+mul (8) m1<1>F g7<8,8,1>F g6.0<0,1,0>F { align1 };
+
+/* Compute V2 - V0 (motion in Y) for texture coordinates */
+add (8) g7<1>F g5<8,8,1>F -g3<8,8,1>F { align1 };
+
+/* multiply by 1/dy */
+mul (8) m2<1>F g7<8,8,1>F g6.8<0,1,0>F {align1 };
+
+/* and we're done */
+send (8) 0 null g0<8,8,1>F urb 0 transpose used complete mlen 4 rlen 0 { align1 EOT };
diff --git a/src/render_program/exa_sf_mask.g5b b/src/render_program/exa_sf_mask.g5b
new file mode 100644
index 0000000..76a03f8
--- /dev/null
+++ b/src/render_program/exa_sf_mask.g5b
@@ -0,0 +1,7 @@
+   { 0x00400031, 0x20c01fbd, 0x1069002c, 0x02100001 },
+   { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
+   { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d40a0 },
+   { 0x00600041, 0x202077be, 0x008d00e0, 0x000000c0 },
+   { 0x00600040, 0x20e077bd, 0x008d00a0, 0x008d4060 },
+   { 0x00600041, 0x204077be, 0x008d00e0, 0x000000c8 },
+   { 0x00600031, 0x20001fbc, 0x648d0000, 0x8808c800 },
diff --git a/src/render_program/exa_wm_ca.g5a b/src/render_program/exa_wm_ca.g5a
new file mode 100644
index 0000000..5d982b3
--- /dev/null
+++ b/src/render_program/exa_wm_ca.g5a
@@ -0,0 +1,38 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Wang Zhenyu <zhenyu.z.wang at intel.com>
+ *    Keith Packard <keithp at keithp.com>
+ */
+ 
+/*
+ * Composite src and mask together, no component alpha
+ */
+
+include(`exa_wm.g4i')
+
+/* mul mask rgba channels to src */
+mul (16)    src_sample_r_01<1>F	src_sample_r_01<8,8,1>F	mask_sample_r_01<8,8,1>F { compr align1 };
+mul (16)    src_sample_g_01<1>F src_sample_g_01<8,8,1>F	mask_sample_g_01<8,8,1>F { compr align1 };
+mul (16)    src_sample_b_01<1>F src_sample_b_01<8,8,1>F	mask_sample_b_01<8,8,1>F { compr align1 };
+mul (16)    src_sample_a_01<1>F src_sample_a_01<8,8,1>F	mask_sample_a_01<8,8,1>F { compr align1 };
diff --git a/src/render_program/exa_wm_ca.g5b b/src/render_program/exa_wm_ca.g5b
new file mode 100644
index 0000000..372e8b2
--- /dev/null
+++ b/src/render_program/exa_wm_ca.g5b
@@ -0,0 +1,4 @@
+   { 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d02c0 },
+   { 0x00802041, 0x220077bd, 0x008d0200, 0x008d0300 },
+   { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0340 },
+   { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 },
diff --git a/src/render_program/exa_wm_ca_srcalpha.g5a b/src/render_program/exa_wm_ca_srcalpha.g5a
new file mode 100644
index 0000000..d1f847f
--- /dev/null
+++ b/src/render_program/exa_wm_ca_srcalpha.g5a
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Wang Zhenyu <zhenyu.z.wang at intel.com>
+ *    Keith Packard <keithp at keithp.com>
+ */
+ 
+/*
+ * Composite src and mask together, no component alpha
+ */
+
+include(`exa_wm.g4i')
+
+mul (16)    src_sample_r_01<1>F mask_sample_r_01<8,8,1>F src_sample_a_01<8,8,1>F { compr align1 };
+mul (16)    src_sample_g_01<1>F mask_sample_g_01<8,8,1>F src_sample_a_01<8,8,1>F { compr align1 };
+mul (16)    src_sample_b_01<1>F mask_sample_b_01<8,8,1>F src_sample_a_01<8,8,1>F { compr align1 };
+mul (16)    src_sample_a_01<1>F mask_sample_a_01<8,8,1>F src_sample_a_01<8,8,1>F { compr align1 };
diff --git a/src/render_program/exa_wm_ca_srcalpha.g5b b/src/render_program/exa_wm_ca_srcalpha.g5b
new file mode 100644
index 0000000..963d676
--- /dev/null
+++ b/src/render_program/exa_wm_ca_srcalpha.g5b
@@ -0,0 +1,4 @@
+   { 0x00802041, 0x21c077bd, 0x008d02c0, 0x008d0280 },
+   { 0x00802041, 0x220077bd, 0x008d0300, 0x008d0280 },
+   { 0x00802041, 0x224077bd, 0x008d0340, 0x008d0280 },
+   { 0x00802041, 0x228077bd, 0x008d0380, 0x008d0280 },
diff --git a/src/render_program/exa_wm_mask_affine.g5a b/src/render_program/exa_wm_mask_affine.g5a
new file mode 100644
index 0000000..d3f498e
--- /dev/null
+++ b/src/render_program/exa_wm_mask_affine.g5a
@@ -0,0 +1,31 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+include(`exa_wm.g4i')
+
+line (16) null mask_du_dx dst_x<8,8,1>F { compr align1 };
+mac (16) mask_u mask_du_dy dst_y<8,8,1>F { compr align1 };
+
+line (16) null mask_dv_dx dst_x<8,8,1>F { compr align1 };
+mac (16) mask_v mask_dv_dy dst_y<8,8,1>F { compr align1 };
diff --git a/src/render_program/exa_wm_mask_affine.g5b b/src/render_program/exa_wm_mask_affine.g5b
new file mode 100644
index 0000000..e265bee
--- /dev/null
+++ b/src/render_program/exa_wm_mask_affine.g5b
@@ -0,0 +1,4 @@
+   { 0x00802059, 0x200077bc, 0x000000a0, 0x008d0100 },
+   { 0x00802048, 0x210077be, 0x000000a4, 0x008d0140 },
+   { 0x00802059, 0x200077bc, 0x000000b0, 0x008d0100 },
+   { 0x00802048, 0x214077be, 0x000000b4, 0x008d0140 },
diff --git a/src/render_program/exa_wm_mask_projective.g5a b/src/render_program/exa_wm_mask_projective.g5a
new file mode 100644
index 0000000..9acaace
--- /dev/null
+++ b/src/render_program/exa_wm_mask_projective.g5a
@@ -0,0 +1,53 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Wang Zhenyu <zhenyu.z.wang at intel.com>
+ *    Keith Packard <keithp at keithp.com>
+ */
+
+include(`exa_wm.g4i')
+
+define(`du_dx',	`mask_du_dx')
+define(`du_dy',	`mask_du_dy')
+define(`uo',	`mask_uo')
+
+define(`dv_dx',	`mask_dv_dx')
+define(`dv_dy',	`mask_dv_dy')
+define(`vo',	`mask_vo')
+
+define(`dw_dx',	`mask_dw_dx')
+define(`dw_dy',	`mask_dw_dy')
+define(`wo',	`mask_wo')
+
+define(`u',	`mask_u')
+define(`v',	`mask_v')
+define(`w',	`mask_w')
+
+define(`u_0',	`mask_u_0')
+define(`v_0',	`mask_v_0')
+define(`u_1',	`mask_u_1')
+define(`v_1',	`mask_v_1')
+define(`w_0',	`mask_w_0')
+define(`w_1',	`mask_w_1')
+
+include(`exa_wm_projective.g4i')
diff --git a/src/render_program/exa_wm_mask_projective.g5b b/src/render_program/exa_wm_mask_projective.g5b
new file mode 100644
index 0000000..c357459
--- /dev/null
+++ b/src/render_program/exa_wm_mask_projective.g5b
@@ -0,0 +1,16 @@
+   { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000c0 },
+   { 0x00802041, 0x238077bd, 0x008d0140, 0x000000c4 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000cc },
+   { 0x00600031, 0x21801fbd, 0x108d03c0, 0x02100001 },
+   { 0x00600031, 0x21a01fbd, 0x108d03e0, 0x02100001 },
+   { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000a0 },
+   { 0x00802041, 0x238077bd, 0x008d0140, 0x000000a4 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000ac },
+   { 0x00802041, 0x210077be, 0x008d03c0, 0x008d0180 },
+   { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000b0 },
+   { 0x00802041, 0x238077bd, 0x008d0140, 0x000000b4 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000bc },
+   { 0x00802041, 0x214077be, 0x008d03c0, 0x008d0180 },
diff --git a/src/render_program/exa_wm_mask_sample_a.g5a b/src/render_program/exa_wm_mask_sample_a.g5a
new file mode 100644
index 0000000..b1c75af
--- /dev/null
+++ b/src/render_program/exa_wm_mask_sample_a.g5a
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Wang Zhenyu <zhenyu.z.wang at intel.com>
+ *    Keith Packard <keithp at keithp.com>
+ */
+
+/* Sample the mask surface */
+
+include(`exa_wm.g4i')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+
+/* load only alpha */
+mov (1) g0.8<1>UD	0x00007000UD { align1 mask_disable };
+mov (8) mask_msg<1>UD g0<8,8,1>UD { align1 }; /* copy to msg start reg*/
+
+/* mask_msg will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) mask_msg_ind		/* msg reg index */
+	mask_sample_a_01<1>UW 	/* readback */
+	null
+	sampler (2,1,F)		/* sampler message description, (binding_table,sampler_index,datatype)
+				/* here(src->dst) we should use src_sampler and src_surface */
+	mlen 5 rlen 2 { align1 };   /* required message len 5, readback len 8 */
+
diff --git a/src/render_program/exa_wm_mask_sample_a.g5b b/src/render_program/exa_wm_mask_sample_a.g5b
new file mode 100644
index 0000000..472c2bb
--- /dev/null
+++ b/src/render_program/exa_wm_mask_sample_a.g5b
@@ -0,0 +1,3 @@
+   { 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
+   { 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 },
+   { 0x07800031, 0x23801c09, 0x20000000, 0x0a2a0102 },
diff --git a/src/render_program/exa_wm_mask_sample_argb.g5a b/src/render_program/exa_wm_mask_sample_argb.g5a
new file mode 100644
index 0000000..78bfc92
--- /dev/null
+++ b/src/render_program/exa_wm_mask_sample_argb.g5a
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Wang Zhenyu <zhenyu.z.wang at intel.com>
+ *    Keith Packard <keithp at keithp.com>
+ */
+
+/* Sample the mask surface */
+
+include(`exa_wm.g4i')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+
+/* load argb */
+mov (1) g0.8<1>UD	0x00000000UD { align1 mask_disable };
+mov (8) mask_msg<1>UD g0<8,8,1>UD { align1 }; /* copy to msg start reg*/
+
+/* mask_msg will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) mask_msg_ind		/* msg reg index */
+	mask_sample_base<1>UW 	/* readback */
+	null
+	sampler (2,1,F)		/* sampler message description, (binding_table,sampler_index,datatype)
+				/* here(src->dst) we should use src_sampler and src_surface */
+	mlen 5 rlen 8 { align1 };   /* required message len 5, readback len 8 */
+
diff --git a/src/render_program/exa_wm_mask_sample_argb.g5b b/src/render_program/exa_wm_mask_sample_argb.g5b
new file mode 100644
index 0000000..cb112d5
--- /dev/null
+++ b/src/render_program/exa_wm_mask_sample_argb.g5b
@@ -0,0 +1,3 @@
+   { 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 },
+   { 0x07800031, 0x22c01c09, 0x20000000, 0x0a8a0102 },
diff --git a/src/render_program/exa_wm_noca.g5a b/src/render_program/exa_wm_noca.g5a
new file mode 100644
index 0000000..d0d60fa
--- /dev/null
+++ b/src/render_program/exa_wm_noca.g5a
@@ -0,0 +1,38 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Wang Zhenyu <zhenyu.z.wang at intel.com>
+ *    Keith Packard <keithp at keithp.com>
+ */
+ 
+/*
+ * Composite src and mask together, no component alpha
+ */
+
+include(`exa_wm.g4i')
+/* mul mask's alpha channel to src */
+
+mul (16)    src_sample_r_01<1>F	src_sample_r_01<8,8,1>F	mask_sample_a_01<8,8,1>F { compr align1 };
+mul (16)    src_sample_g_01<1>F src_sample_g_01<8,8,1>F	mask_sample_a_01<8,8,1>F { compr align1 };
+mul (16)    src_sample_b_01<1>F src_sample_b_01<8,8,1>F	mask_sample_a_01<8,8,1>F { compr align1 };
+mul (16)    src_sample_a_01<1>F src_sample_a_01<8,8,1>F	mask_sample_a_01<8,8,1>F { compr align1 };
diff --git a/src/render_program/exa_wm_noca.g5b b/src/render_program/exa_wm_noca.g5b
new file mode 100644
index 0000000..1506334
--- /dev/null
+++ b/src/render_program/exa_wm_noca.g5b
@@ -0,0 +1,4 @@
+   { 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d0380 },
+   { 0x00802041, 0x220077bd, 0x008d0200, 0x008d0380 },
+   { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0380 },
+   { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 },
diff --git a/src/render_program/exa_wm_src_affine.g5a b/src/render_program/exa_wm_src_affine.g5a
new file mode 100644
index 0000000..cf2b421
--- /dev/null
+++ b/src/render_program/exa_wm_src_affine.g5a
@@ -0,0 +1,31 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+include(`exa_wm.g4i')
+
+line (16) null src_du_dx dst_x<8,8,1>F { compr align1 };
+mac (16) src_u src_du_dy dst_y<8,8,1>F { compr align1 };
+
+line (16) null src_dv_dx dst_x<8,8,1>F { compr align1 };
+mac (16) src_v src_dv_dy dst_y<8,8,1>F { compr align1 };
diff --git a/src/render_program/exa_wm_src_affine.g5b b/src/render_program/exa_wm_src_affine.g5b
new file mode 100644
index 0000000..f526adf
--- /dev/null
+++ b/src/render_program/exa_wm_src_affine.g5b
@@ -0,0 +1,4 @@
+   { 0x00802059, 0x200077bc, 0x00000060, 0x008d0100 },
+   { 0x00802048, 0x204077be, 0x00000064, 0x008d0140 },
+   { 0x00802059, 0x200077bc, 0x00000070, 0x008d0100 },
+   { 0x00802048, 0x208077be, 0x00000074, 0x008d0140 },
diff --git a/src/render_program/exa_wm_src_projective.g5a b/src/render_program/exa_wm_src_projective.g5a
new file mode 100644
index 0000000..16c9cd5
--- /dev/null
+++ b/src/render_program/exa_wm_src_projective.g5a
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Wang Zhenyu <zhenyu.z.wang at intel.com>
+ *    Keith Packard <keithp at keithp.com>
+ */
+
+
+include(`exa_wm.g4i')
+define(`du_dx',	`src_du_dx')
+define(`du_dy',	`src_du_dy')
+define(`uo',	`src_uo')
+define(`dv_dx',	`src_dv_dx')
+define(`dv_dy',	`src_dv_dy')
+define(`vo',	`src_vo')
+define(`dw_dx',	`src_dw_dx')
+define(`dw_dy',	`src_dw_dy')
+define(`wo',	`src_wo')
+define(`u',	`src_u')
+define(`v',	`src_v')
+define(`w',	`src_w')
+define(`u_0',	`src_u_0')
+define(`v_0',	`src_v_0')
+define(`u_1',	`src_u_1')
+define(`v_1',	`src_v_1')
+define(`w_0',	`src_w_0')
+define(`w_1',	`src_w_1')
+
+include(`exa_wm_projective.g4i')
diff --git a/src/render_program/exa_wm_src_projective.g5b b/src/render_program/exa_wm_src_projective.g5b
new file mode 100644
index 0000000..ae3db8c
--- /dev/null
+++ b/src/render_program/exa_wm_src_projective.g5b
@@ -0,0 +1,16 @@
+   { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000080 },
+   { 0x00802041, 0x238077bd, 0x008d0140, 0x00000084 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000008c },
+   { 0x00600031, 0x21801fbd, 0x108d03c0, 0x02100001 },
+   { 0x00600031, 0x21a01fbd, 0x108d03e0, 0x02100001 },
+   { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 },
+   { 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000006c },
+   { 0x00802041, 0x204077be, 0x008d03c0, 0x008d0180 },
+   { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 },
+   { 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000007c },
+   { 0x00802041, 0x208077be, 0x008d03c0, 0x008d0180 },
diff --git a/src/render_program/exa_wm_src_sample_a.g5a b/src/render_program/exa_wm_src_sample_a.g5a
new file mode 100644
index 0000000..667bfb3
--- /dev/null
+++ b/src/render_program/exa_wm_src_sample_a.g5a
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Wang Zhenyu <zhenyu.z.wang at intel.com>
+ *    Keith Packard <keithp at keithp.com>
+ */
+
+/* Sample the src surface */
+
+include(`exa_wm.g4i')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+
+/* load alpha */
+mov (1) g0.8<1>UD	0x00007000UD { align1 mask_disable };
+mov (8) src_msg<1>UD g0<8,8,1>UD { align1 }; /* copy to msg start reg*/
+
+/* src_msg will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) src_msg_ind		/* msg reg index */
+	src_sample_a_01<1>UW 	/* readback */
+	null
+	sampler (1,0,F)		/* sampler message description, (binding_table,sampler_index,datatype)
+				/* here(src->dst) we should use src_sampler and src_surface */
+	mlen 5 rlen 2 { align1 };   /* required message len 5, readback len 8 */
diff --git a/src/render_program/exa_wm_src_sample_a.g5b b/src/render_program/exa_wm_src_sample_a.g5b
new file mode 100644
index 0000000..0e4eebe
--- /dev/null
+++ b/src/render_program/exa_wm_src_sample_a.g5b
@@ -0,0 +1,3 @@
+   { 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
+   { 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
+   { 0x01800031, 0x22801c09, 0x20000000, 0x0a2a0001 },
diff --git a/src/render_program/exa_wm_src_sample_argb.g5a b/src/render_program/exa_wm_src_sample_argb.g5a
new file mode 100644
index 0000000..b598de6
--- /dev/null
+++ b/src/render_program/exa_wm_src_sample_argb.g5a
@@ -0,0 +1,46 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Wang Zhenyu <zhenyu.z.wang at intel.com>
+ *    Keith Packard <keithp at keithp.com>
+ */
+
+/* Sample the src surface */
+
+include(`exa_wm.g4i')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+
+/* load argb */
+mov (1) g0.8<1>UD	0x00000000UD { align1 mask_disable };
+
+/* src_msg will be copied with g0, as it contains send desc */
+send (16) src_msg_ind		/* msg reg index */
+	src_sample_base<1>UW 	/* readback */
+	g0<8,8,1>UW
+	sampler (1,0,F)		/* sampler message description, (binding_table,sampler_index,datatype)
+				/* here(src->dst) we should use src_sampler and src_surface */
+	mlen 5 rlen 8 { align1 };
diff --git a/src/render_program/exa_wm_src_sample_argb.g5b b/src/render_program/exa_wm_src_sample_argb.g5b
new file mode 100644
index 0000000..f8cb41e
--- /dev/null
+++ b/src/render_program/exa_wm_src_sample_argb.g5b
@@ -0,0 +1,2 @@
+   { 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
+   { 0x01800031, 0x21c01d29, 0x208d0000, 0x0a8a0001 },
diff --git a/src/render_program/exa_wm_src_sample_planar.g5a b/src/render_program/exa_wm_src_sample_planar.g5a
new file mode 100644
index 0000000..59678f5
--- /dev/null
+++ b/src/render_program/exa_wm_src_sample_planar.g5a
@@ -0,0 +1,32 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Wang Zhenyu <zhenyu.z.wang at intel.com>
+ *    Keith Packard <keithp at keithp.com>
+ */
+
+/* Sample the src surface in planar format */
+
+include(`exa_wm.g4i')
+
+include(`exa_wm_sample_planar.g4i')
diff --git a/src/render_program/exa_wm_src_sample_planar.g5b b/src/render_program/exa_wm_src_sample_planar.g5b
new file mode 100644
index 0000000..ce3670b
--- /dev/null
+++ b/src/render_program/exa_wm_src_sample_planar.g5b
@@ -0,0 +1,5 @@
+   { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
+   { 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
+   { 0x01800031, 0x22001c09, 0x20000000, 0x0a2a0001 },
+   { 0x01800031, 0x21c01c09, 0x20000000, 0x0a2a0003 },
+   { 0x01800031, 0x22401c09, 0x20000000, 0x0a2a0005 },
diff --git a/src/render_program/exa_wm_write.g5a b/src/render_program/exa_wm_write.g5a
new file mode 100644
index 0000000..c472a9b
--- /dev/null
+++ b/src/render_program/exa_wm_write.g5a
@@ -0,0 +1,55 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Wang Zhenyu <zhenyu.z.wang at intel.com>
+ *    Keith Packard <keithp at keithp.com>
+ */
+
+include(`exa_wm.g4i')
+
+/*
+ * Prepare data in m2-m5 for subspan(1,0), m6-m9 for subspan(3,2),
+ */
+
+mov (16) m130<1>F	src_sample_r_01<8,8,1>F { align1 compr };
+mov (16) m131<1>F	src_sample_g_01<8,8,1>F { align1 compr };
+mov (16) m132<1>F	src_sample_b_01<8,8,1>F { align1 compr };
+mov (16) m133<1>F	src_sample_a_01<8,8,1>F { align1 compr };
+
+/* m0, m1 are all direct passed by PS thread payload */
+mov (8) data_port_msg_1<1>F	g1<8,8,1>F		{ mask_disable align1 };
+
+/* write */
+send (16) 
+	data_port_msg_0_ind 
+	acc0<1>UW
+	g0<8,8,1>UW 
+	write (
+	       0,  /* binding_table */
+	       8,  /* pixel scordboard clear, msg type simd16 single source */
+	       4,  /* render target write */
+	       0   /* no write commit message */
+	) 
+	mlen 10
+	rlen 0
+	{ align1 EOT };
diff --git a/src/render_program/exa_wm_write.g5b b/src/render_program/exa_wm_write.g5b
new file mode 100644
index 0000000..aff2ce0
--- /dev/null
+++ b/src/render_program/exa_wm_write.g5b
@@ -0,0 +1,6 @@
+   { 0x00802001, 0x304003be, 0x008d01c0, 0x00000000 },
+   { 0x00802001, 0x306003be, 0x008d0200, 0x00000000 },
+   { 0x00802001, 0x308003be, 0x008d0240, 0x00000000 },
+   { 0x00802001, 0x30a003be, 0x008d0280, 0x00000000 },
+   { 0x00600201, 0x202003be, 0x008d0020, 0x00000000 },
+   { 0x00800031, 0x24001d28, 0x548d0000, 0x94084800 },
diff --git a/src/render_program/exa_wm_xy.g5a b/src/render_program/exa_wm_xy.g5a
new file mode 100644
index 0000000..e99f5ac
--- /dev/null
+++ b/src/render_program/exa_wm_xy.g5a
@@ -0,0 +1,52 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Wang Zhenyu <zhenyu.z.wang at intel.com>
+ *    Keith Packard <keithp at keithp.com>
+ */
+ 
+/*
+ * Register assignments:
+ *
+ *  x		    g6/g7
+ *  y		    g8/g9
+ *
+ *  temp x	    g10/g11
+ *  temp y	    g12/g13
+ *
+ *  src w	    g14/g15
+ *  src u	    m1/m2
+ *  src v	    m3/m4
+ */
+ 
+/* Fragment to compute per-pixel XY values */
+
+include(`exa_wm.g4i')
+    
+    /* Load X and Y coordinates and compute per-pixel coordinates */
+add (16)	temp_x_uw<1>UW	dst_x_uw		0x10101010V	{ align1 };
+add (16)	temp_y_uw<1>UW	dst_y_uw		0x11001100V	{ align1 };
+
+    /* subtract screen-space origin of vertex 0 */
+add (16)	dst_x<1>F	temp_x_uw<8,8,1>UW	-screen_x0	{ compr align1 };
+add (16)	dst_y<1>F	temp_y_uw<8,8,1>UW	-screen_y0	{ compr align1 };
diff --git a/src/render_program/exa_wm_xy.g5b b/src/render_program/exa_wm_xy.g5b
new file mode 100644
index 0000000..327fc29
--- /dev/null
+++ b/src/render_program/exa_wm_xy.g5b
@@ -0,0 +1,4 @@
+   { 0x00800040, 0x23c06d29, 0x00480028, 0x10101010 },
+   { 0x00800040, 0x23806d29, 0x0048002a, 0x11001100 },
+   { 0x00802040, 0x2100753d, 0x008d03c0, 0x00004020 },
+   { 0x00802040, 0x2140753d, 0x008d0380, 0x00004024 },
diff --git a/src/render_program/exa_wm_yuv_rgb.g5a b/src/render_program/exa_wm_yuv_rgb.g5a
new file mode 100644
index 0000000..4fb2576
--- /dev/null
+++ b/src/render_program/exa_wm_yuv_rgb.g5a
@@ -0,0 +1,98 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Packard <keithp at keithp.com>
+ *    Eric Anholt <eric at anholt.net>
+ *
+ */
+
+include(`exa_wm.g4i')
+
+define(`YCbCr_base',	`src_sample_base')
+
+define(`Cr',		`src_sample_r')
+define(`Cr_01',		`src_sample_r_01')
+define(`Cr_23',		`src_sample_r_23')
+
+define(`Y',		`src_sample_g')
+define(`Y_01',		`src_sample_g_01')
+define(`Y_23',		`src_sample_g_23')
+
+define(`Cb',		`src_sample_b')
+define(`Cb_01',		`src_sample_b_01')
+define(`Cb_23',		`src_sample_b_23')
+
+define(`Crn',		`mask_sample_r')
+define(`Crn_01',	`mask_sample_r_01')
+define(`Crn_23',	`mask_sample_r_23')
+
+define(`Yn',		`mask_sample_g')
+define(`Yn_01',		`mask_sample_g_01')
+define(`Yn_23',		`mask_sample_g_23')
+
+define(`Cbn',		`mask_sample_b')
+define(`Cbn_01',	`mask_sample_b_01')
+define(`Cbn_23',	`mask_sample_b_23')
+
+    /* color space conversion function:
+     * R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255), 0, 1)
+     * G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255), 0, 1)
+     * B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255), 0, 1)
+     */
+
+    /* Normalize Y, Cb and Cr:
+     *
+     * Yn = (Y - 16/255) * 1.164
+     * Crn = Cr - 128 / 255
+     * Cbn = Cb - 128 / 255
+     */
+add (16)    Yn<1>F		Y<8,8,1>F	-0.0627451F { compr align1 };
+mul (16)    Yn<1>F		Yn<8,8,1>F	1.164F	    { compr align1 };
+
+add (16)    Crn<1>F		Cr<8,8,1>F	-0.501961F  { compr align1 };
+
+add (16)    Cbn<1>F		Cb<8,8,1>F	-0.501961F  { compr align1 };
+
+    /* 
+     * R = Y + Cr * 1.596
+     */
+mov (16)    acc0<1>F		Yn<8,8,1>F		    { compr align1 };
+mac.sat(16) src_sample_r<1>F	Crn<8,8,1>F	1.596F	    { compr align1 };
+     
+    /*
+     * G = Crn * -0.813 + Cbn * -0.392 + Y
+     */
+mov (16)    acc0<1>F		Yn<8,8,1>F		    { compr align1 };
+mac (16)    acc0<1>F		Crn<8,8,1>F    	-0.813F	    { compr align1 };
+mac.sat(16) src_sample_g<1>F	Cbn<8,8,1>F    	-0.392F	    { compr align1 };
+
+    /*
+     * B = Cbn * 2.017 + Y
+     */
+mov (16)    acc0<1>F	        Yn<8,8,1>F		    { compr align1 };
+mac.sat(16) src_sample_b<1>F	Cbn<8,8,1>F     2.017F	    { compr align1 };
+
+    /*
+     * A = 1.0
+     */
+mov (16)    src_sample_a<1>F	1.0F			    { compr align1 };
diff --git a/src/render_program/exa_wm_yuv_rgb.g5b b/src/render_program/exa_wm_yuv_rgb.g5b
new file mode 100644
index 0000000..01f6e2b
--- /dev/null
+++ b/src/render_program/exa_wm_yuv_rgb.g5b
@@ -0,0 +1,12 @@
+   { 0x00802040, 0x23007fbd, 0x008d0200, 0xbd808081 },
+   { 0x00802041, 0x23007fbd, 0x008d0300, 0x3f94fdf4 },
+   { 0x00802040, 0x22c07fbd, 0x008d01c0, 0xbf008084 },
+   { 0x00802040, 0x23407fbd, 0x008d0240, 0xbf008084 },
+   { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
+   { 0x80802048, 0x21c07fbd, 0x008d02c0, 0x3fcc49ba },
+   { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
+   { 0x00802048, 0x24007fbc, 0x008d02c0, 0xbf5020c5 },
+   { 0x80802048, 0x22007fbd, 0x008d0340, 0xbec8b439 },
+   { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
+   { 0x80802048, 0x22407fbd, 0x008d0340, 0x40011687 },
+   { 0x00802001, 0x228003fd, 0x00000000, 0x3f800000 },
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 4f5f6ad..7c9d88e 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -88,101 +88,101 @@
 #define PS_MAX_THREADS	    48
 
 static const uint32_t sf_kernel[][4] = {
-#include "exa_sf.g4b.gen5"
+#include "exa_sf.g5b"
 };
 
 static const uint32_t sf_kernel_mask[][4] = {
-#include "exa_sf_mask.g4b.gen5"
+#include "exa_sf_mask.g5b"
 };
 
 static const uint32_t ps_kernel_nomask_affine[][4] = {
-#include "exa_wm_xy.g4b.gen5"
-#include "exa_wm_src_affine.g4b.gen5"
-#include "exa_wm_src_sample_argb.g4b.gen5"
-#include "exa_wm_write.g4b.gen5"
+#include "exa_wm_xy.g5b"
+#include "exa_wm_src_affine.g5b"
+#include "exa_wm_src_sample_argb.g5b"
+#include "exa_wm_write.g5b"
 };
 
 static const uint32_t ps_kernel_nomask_projective[][4] = {
-#include "exa_wm_xy.g4b.gen5"
-#include "exa_wm_src_projective.g4b.gen5"
-#include "exa_wm_src_sample_argb.g4b.gen5"
-#include "exa_wm_write.g4b.gen5"
+#include "exa_wm_xy.g5b"
+#include "exa_wm_src_projective.g5b"
+#include "exa_wm_src_sample_argb.g5b"
+#include "exa_wm_write.g5b"
 };
 
 static const uint32_t ps_kernel_maskca_affine[][4] = {
-#include "exa_wm_xy.g4b.gen5"
-#include "exa_wm_src_affine.g4b.gen5"
-#include "exa_wm_src_sample_argb.g4b.gen5"
-#include "exa_wm_mask_affine.g4b.gen5"
-#include "exa_wm_mask_sample_argb.g4b.gen5"
-#include "exa_wm_ca.g4b.gen5"
-#include "exa_wm_write.g4b.gen5"
+#include "exa_wm_xy.g5b"
+#include "exa_wm_src_affine.g5b"
+#include "exa_wm_src_sample_argb.g5b"
+#include "exa_wm_mask_affine.g5b"
+#include "exa_wm_mask_sample_argb.g5b"
+#include "exa_wm_ca.g5b"
+#include "exa_wm_write.g5b"
 };
 
 static const uint32_t ps_kernel_maskca_projective[][4] = {
-#include "exa_wm_xy.g4b.gen5"
-#include "exa_wm_src_projective.g4b.gen5"
-#include "exa_wm_src_sample_argb.g4b.gen5"
-#include "exa_wm_mask_projective.g4b.gen5"
-#include "exa_wm_mask_sample_argb.g4b.gen5"
-#include "exa_wm_ca.g4b.gen5"
-#include "exa_wm_write.g4b.gen5"
+#include "exa_wm_xy.g5b"
+#include "exa_wm_src_projective.g5b"
+#include "exa_wm_src_sample_argb.g5b"
+#include "exa_wm_mask_projective.g5b"
+#include "exa_wm_mask_sample_argb.g5b"
+#include "exa_wm_ca.g5b"
+#include "exa_wm_write.g5b"
 };
 
 static const uint32_t ps_kernel_maskca_srcalpha_affine[][4] = {
-#include "exa_wm_xy.g4b.gen5"
-#include "exa_wm_src_affine.g4b.gen5"
-#include "exa_wm_src_sample_a.g4b.gen5"
-#include "exa_wm_mask_affine.g4b.gen5"
-#include "exa_wm_mask_sample_argb.g4b.gen5"
-#include "exa_wm_ca_srcalpha.g4b.gen5"
-#include "exa_wm_write.g4b.gen5"
+#include "exa_wm_xy.g5b"
+#include "exa_wm_src_affine.g5b"
+#include "exa_wm_src_sample_a.g5b"
+#include "exa_wm_mask_affine.g5b"
+#include "exa_wm_mask_sample_argb.g5b"
+#include "exa_wm_ca_srcalpha.g5b"
+#include "exa_wm_write.g5b"
 };
 
 static const uint32_t ps_kernel_maskca_srcalpha_projective[][4] = {
-#include "exa_wm_xy.g4b.gen5"
-#include "exa_wm_src_projective.g4b.gen5"
-#include "exa_wm_src_sample_a.g4b.gen5"
-#include "exa_wm_mask_projective.g4b.gen5"
-#include "exa_wm_mask_sample_argb.g4b.gen5"
-#include "exa_wm_ca_srcalpha.g4b.gen5"
-#include "exa_wm_write.g4b.gen5"
+#include "exa_wm_xy.g5b"
+#include "exa_wm_src_projective.g5b"
+#include "exa_wm_src_sample_a.g5b"
+#include "exa_wm_mask_projective.g5b"
+#include "exa_wm_mask_sample_argb.g5b"
+#include "exa_wm_ca_srcalpha.g5b"
+#include "exa_wm_write.g5b"
 };
 
 static const uint32_t ps_kernel_masknoca_affine[][4] = {
-#include "exa_wm_xy.g4b.gen5"
-#include "exa_wm_src_affine.g4b.gen5"
-#include "exa_wm_src_sample_argb.g4b.gen5"
-#include "exa_wm_mask_affine.g4b.gen5"
-#include "exa_wm_mask_sample_a.g4b.gen5"
-#include "exa_wm_noca.g4b.gen5"
-#include "exa_wm_write.g4b.gen5"
+#include "exa_wm_xy.g5b"
+#include "exa_wm_src_affine.g5b"
+#include "exa_wm_src_sample_argb.g5b"
+#include "exa_wm_mask_affine.g5b"
+#include "exa_wm_mask_sample_a.g5b"
+#include "exa_wm_noca.g5b"
+#include "exa_wm_write.g5b"
 };
 
 static const uint32_t ps_kernel_masknoca_projective[][4] = {
-#include "exa_wm_xy.g4b.gen5"
-#include "exa_wm_src_projective.g4b.gen5"
-#include "exa_wm_src_sample_argb.g4b.gen5"
-#include "exa_wm_mask_projective.g4b.gen5"
-#include "exa_wm_mask_sample_a.g4b.gen5"
-#include "exa_wm_noca.g4b.gen5"
-#include "exa_wm_write.g4b.gen5"
+#include "exa_wm_xy.g5b"
+#include "exa_wm_src_projective.g5b"
+#include "exa_wm_src_sample_argb.g5b"
+#include "exa_wm_mask_projective.g5b"
+#include "exa_wm_mask_sample_a.g5b"
+#include "exa_wm_noca.g5b"
+#include "exa_wm_write.g5b"
 };
 
 static const uint32_t ps_kernel_packed_static[][4] = {
-#include "exa_wm_xy.g4b.gen5"
-#include "exa_wm_src_affine.g4b.gen5"
-#include "exa_wm_src_sample_argb.g4b.gen5"
-#include "exa_wm_yuv_rgb.g4b.gen5"
-#include "exa_wm_write.g4b.gen5"
+#include "exa_wm_xy.g5b"
+#include "exa_wm_src_affine.g5b"
+#include "exa_wm_src_sample_argb.g5b"
+#include "exa_wm_yuv_rgb.g5b"
+#include "exa_wm_write.g5b"
 };
 
 static const uint32_t ps_kernel_planar_static[][4] = {
-#include "exa_wm_xy.g4b.gen5"
-#include "exa_wm_src_affine.g4b.gen5"
-#include "exa_wm_src_sample_planar.g4b.gen5"
-#include "exa_wm_yuv_rgb.g4b.gen5"
-#include "exa_wm_write.g4b.gen5"
+#include "exa_wm_xy.g5b"
+#include "exa_wm_src_affine.g5b"
+#include "exa_wm_src_sample_planar.g5b"
+#include "exa_wm_yuv_rgb.g5b"
+#include "exa_wm_write.g5b"
 };
 
 #define KERNEL(kernel_enum, kernel, masked) \
commit 67ea8808d7f0ce47733a66d8708dcfa0dbec14c1
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Nov 24 11:45:38 2011 +0000

    sna/gen5: Remove the unused SIP
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 54f5d68..4f5f6ad 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -1121,14 +1121,6 @@ gen5_composite_get_binding_table(struct sna *sna,
 }
 
 static void
-gen5_emit_sip(struct sna *sna)
-{
-	/* Set system instruction pointer */
-	OUT_BATCH(GEN5_STATE_SIP | 0);
-	OUT_BATCH(0);
-}
-
-static void
 gen5_emit_urb(struct sna *sna)
 {
 	int urb_vs_start, urb_vs_size;
@@ -1206,7 +1198,6 @@ gen5_emit_invariant(struct sna *sna)
 	 */
 	OUT_BATCH(GEN5_PIPELINE_SELECT | PIPELINE_SELECT_3D);
 
-	gen5_emit_sip(sna);
 	gen5_emit_state_base_address(sna);
 
 	sna->render_state.gen5.needs_invariant = FALSE;
commit 499ed74e979d99bbe1670ad54875fba6055fce5d
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Nov 24 11:02:01 2011 +0000

    sna: Tweak cancellation of deferred flush
    
    The goal is to keep running until the tick after every stops,
    irrespective of forced flushes.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 478367c..9fecb1f 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -745,8 +745,7 @@ bool kgem_retire(struct kgem *kgem)
 	kgem->need_retire = !list_is_empty(&kgem->requests);
 	if (!kgem->need_retire && kgem->ring)
 		kgem->ring = kgem->mode;
-	kgem->busy &= kgem->need_retire;
-	DBG(("%s -- busy=%d\n", __FUNCTION__, kgem->busy));
+	DBG(("%s -- need_retire=%d\n", __FUNCTION__, kgem->need_retire));
 
 	return retired;
 }
@@ -1096,7 +1095,7 @@ void _kgem_submit(struct kgem *kgem)
 		kgem_cleanup(kgem);
 
 	kgem_reset(kgem);
-	kgem->flush_now = kgem->busy = 1;
+	kgem->flush_now = 1;
 }
 
 void kgem_throttle(struct kgem *kgem)
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 0f41003..0d5c0bf 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -8328,19 +8328,23 @@ static Bool sna_accel_do_flush(struct sna *sna)
 	struct itimerspec to;
 	struct sna_pixmap *priv;
 
-	if (sna->kgem.flush_now) {
-		sna_accel_drain_timer(sna, FLUSH_TIMER);
-		return TRUE;
-	}
-
-	return_if_timer_active(FLUSH_TIMER);
-
 	priv = sna_accel_scanout(sna);
 	if (priv == NULL) {
 		DBG(("%s -- no scanout attached\n", __FUNCTION__));
 		return FALSE;
 	}
 
+	if (sna->kgem.flush_now) {
+		sna->kgem.flush_now = 0;
+		if (priv->gpu_bo->rq != NULL) {
+			DBG(("%s -- forcing flush\n", __FUNCTION__));
+			sna_accel_drain_timer(sna, FLUSH_TIMER);
+			return TRUE;
+		}
+	}
+
+	return_if_timer_active(FLUSH_TIMER);
+
 	if (priv->cpu_damage == NULL && priv->gpu_bo->rq == NULL) {
 		DBG(("%s -- no pending write to scanout\n", __FUNCTION__));
 		return FALSE;
@@ -8419,6 +8423,7 @@ static bool sna_accel_flush(struct sna *sna)
 	struct sna_pixmap *priv = sna_accel_scanout(sna);
 	bool nothing_to_do =
 		priv->cpu_damage == NULL && priv->gpu_bo->rq == NULL;
+	bool need_throttle = sna->kgem.busy;
 
 	DBG(("%s (time=%ld), nothing_to_do=%d, busy? %d\n",
 	     __FUNCTION__, (long)GetTimeInMillis(),
@@ -8430,8 +8435,7 @@ static bool sna_accel_flush(struct sna *sna)
 		sna_pixmap_move_to_gpu(priv->pixmap);
 	sna->kgem.busy = !nothing_to_do;
 	kgem_bo_flush(&sna->kgem, priv->gpu_bo);
-	sna->kgem.flush_now = 0;
-	return !nothing_to_do;
+	return need_throttle;
 }
 
 static void sna_accel_expire(struct sna *sna)
commit 35239d207c38e1ae0cb843ed7a72621711cb001e
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Nov 24 02:51:51 2011 +0000

    sna: Always call retire following a mmap
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 89caf96..478367c 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -746,6 +746,7 @@ bool kgem_retire(struct kgem *kgem)
 	if (!kgem->need_retire && kgem->ring)
 		kgem->ring = kgem->mode;
 	kgem->busy &= kgem->need_retire;
+	DBG(("%s -- busy=%d\n", __FUNCTION__, kgem->busy));
 
 	return retired;
 }
@@ -1827,7 +1828,7 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot)
 		return NULL;
 
 	bo->needs_flush = false;
-	if (prot & PROT_WRITE && bo->gpu)
+	if (bo->gpu)
 		kgem_retire(kgem);
 
 	return ptr;
commit 11e0b456de93dae9f7ba4298d8a32b6b60edc503
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Nov 24 03:04:09 2011 +0000

    sna/gen5: Use the BLT rather than flush when copying
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 36f236f..54f5d68 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -2471,8 +2471,17 @@ gen5_render_copy_boxes(struct sna *sna, uint8_t alu,
 	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
 		kgem_submit(&sna->kgem);
 
-	if (kgem_bo_is_dirty(src_bo))
+	if (kgem_bo_is_dirty(src_bo)) {
+		if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
+		    sna_blt_copy_boxes(sna, alu,
+				       src_bo, src_dx, src_dy,
+				       dst_bo, dst_dx, dst_dy,
+				       dst->drawable.bitsPerPixel,
+				       box, n))
+			return TRUE;
+
 		kgem_emit_flush(&sna->kgem);
+	}
 
 	gen5_copy_bind_surfaces(sna, &tmp);
 	gen5_align_vertex(sna, &tmp);
commit a44663af9335136cba59b12078c6717992283b62
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Nov 24 02:54:43 2011 +0000

    sna/gen4,gen5: Fix typo and only emit a flush if the video source is dirty
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index dbb7c43..5e9bd37 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -1630,7 +1630,7 @@ gen4_render_video(struct sna *sna,
 	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL))
 		kgem_submit(&sna->kgem);
 
-	if (!kgem_bo_is_dirty(frame->bo))
+	if (kgem_bo_is_dirty(frame->bo))
 		kgem_emit_flush(&sna->kgem);
 
 	gen4_video_bind_surfaces(sna, &tmp, frame);
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index e9a61f0..36f236f 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -1677,7 +1677,7 @@ gen5_render_video(struct sna *sna,
 	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL))
 		kgem_submit(&sna->kgem);
 
-	if (!kgem_bo_is_dirty(frame->bo))
+	if (kgem_bo_is_dirty(frame->bo))
 		kgem_emit_flush(&sna->kgem);
 
 	gen5_video_bind_surfaces(sna, &tmp, frame);


More information about the xorg-commit mailing list