xf86-video-intel: Branch 'projective-965' - 8 commits - src/exa_sf_mask.g4a src/exa_sf_mask.g4b src/exa_sf_mask_prog.h src/exa_sf_prog.h src/exa_wm_affine.g4i src/exa_wm_ca.g4b src/exa_wm_ca_srcalpha.g4a src/exa_wm_ca_srcalpha.g4b src/exa_wm.g4i src/exa_wm_mask_affine.g4a src/exa_wm_mask_affine.g4b src/exa_wm_maskca.g4a src/exa_wm_maskca.g4b src/exa_wm_maskca_prog.h src/exa_wm_maskca_srcalpha.g4a src/exa_wm_maskca_srcalpha.g4b src/exa_wm_maskca_srcalpha_prog.h src/exa_wm_masknoca.g4a src/exa_wm_masknoca.g4b src/exa_wm_masknoca_prog.h src/exa_wm_mask_projective.g4a src/exa_wm_mask_projective.g4b src/exa_wm_mask_sample_a.g4a src/exa_wm_mask_sample_a.g4b src/exa_wm_mask_sample_argb.g4a src/exa_wm_mask_sample_argb.g4b src/exa_wm_mask_sample.g4a src/exa_wm_mask_sample.g4b src/exa_wm_noca.g4b src/exa_wm_nomask.g4a src/exa_wm_nomask_prog.h src/exa_wm_projective.g4i src/exa_wm_src_affine.g4a src/exa_wm_src_affine.g4b src/exa_wm_src_projective.g4a src/exa_wm_src_projective.g4b src/exa_wm_src_sample_a.g4a src/exa_wm_src_sample_a.g4b src/exa_wm_src_sample_argb.g4a src/exa_wm_src_sample_argb.g4b src/exa_wm_src_sample.g4a src/exa_wm_src_sample.g4b src/exa_wm_write.g4a src/exa_wm_write.g4b src/exa_wm_xy.g4b src/i810_reg.h src/i965_render.c src/i965_video.c src/Makefile.am src/sf_prog.h src/wm_prog.h

Keith Packard keithp at kemper.freedesktop.org
Tue Apr 1 00:19:51 PDT 2008


 src/Makefile.am                   |   41 ++----
 src/exa_sf_mask.g4a               |  105 +++++++++++------
 src/exa_sf_mask.g4b               |   20 ---
 src/exa_sf_mask_prog.h            |   15 --
 src/exa_sf_prog.h                 |   15 --
 src/exa_wm.g4i                    |   87 +++++++-------
 src/exa_wm_affine.g4i             |    1 
 src/exa_wm_ca.g4b                 |    4 
 src/exa_wm_ca_srcalpha.g4a        |    9 -
 src/exa_wm_ca_srcalpha.g4b        |    8 -
 src/exa_wm_mask_affine.g4a        |    4 
 src/exa_wm_mask_affine.g4b        |   16 +-
 src/exa_wm_mask_projective.g4a    |    5 
 src/exa_wm_mask_projective.g4b    |   32 ++---
 src/exa_wm_mask_sample.g4a        |   49 --------
 src/exa_wm_mask_sample.g4b        |    1 
 src/exa_wm_mask_sample_a.g4a      |   48 ++++++++
 src/exa_wm_mask_sample_a.g4b      |    2 
 src/exa_wm_mask_sample_argb.g4a   |   48 ++++++++
 src/exa_wm_mask_sample_argb.g4b   |    2 
 src/exa_wm_maskca.g4a             |  228 --------------------------------------
 src/exa_wm_maskca.g4b             |   95 ---------------
 src/exa_wm_maskca_prog.h          |   95 ---------------
 src/exa_wm_maskca_srcalpha.g4a    |  228 --------------------------------------
 src/exa_wm_maskca_srcalpha.g4b    |   95 ---------------
 src/exa_wm_maskca_srcalpha_prog.h |   95 ---------------
 src/exa_wm_masknoca.g4a           |  228 --------------------------------------
 src/exa_wm_masknoca.g4b           |   95 ---------------
 src/exa_wm_masknoca_prog.h        |   95 ---------------
 src/exa_wm_noca.g4b               |    8 -
 src/exa_wm_nomask.g4a             |    2 
 src/exa_wm_nomask_prog.h          |   34 -----
 src/exa_wm_projective.g4i         |    4 
 src/exa_wm_src_affine.g4a         |    4 
 src/exa_wm_src_affine.g4b         |   16 +-
 src/exa_wm_src_projective.g4a     |    4 
 src/exa_wm_src_projective.g4b     |   32 ++---
 src/exa_wm_src_sample.g4a         |   49 --------
 src/exa_wm_src_sample.g4b         |    1 
 src/exa_wm_src_sample_a.g4a       |   47 +++++++
 src/exa_wm_src_sample_a.g4b       |    2 
 src/exa_wm_src_sample_argb.g4a    |   47 +++++++
 src/exa_wm_src_sample_argb.g4b    |    2 
 src/exa_wm_write.g4a              |    6 -
 src/exa_wm_write.g4b              |   20 +--
 src/exa_wm_xy.g4b                 |    8 -
 src/i810_reg.h                    |    1 
 src/i965_render.c                 |  171 +++++++++++++++++-----------
 src/i965_video.c                  |    4 
 src/sf_prog.h                     |   17 --
 src/wm_prog.h                     |   82 -------------
 51 files changed, 536 insertions(+), 1791 deletions(-)

New commits:
commit 0836373dc6e2f8612f120074980561f7ac11f6f7
Author: Keith Packard <keithp at keithp.com>
Date:   Tue Apr 1 00:16:05 2008 -0700

    Add projective versions of the PS kernels

diff --git a/src/i965_render.c b/src/i965_render.c
index dc5bd5e..921ea80 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -348,7 +348,7 @@ static const uint32_t ps_kernel_static_nomask_projective [][4] = {
 #include "exa_wm_write.g4b"
 };
 
-static const uint32_t ps_kernel_static_maskca [][4] = {
+static const uint32_t ps_kernel_static_maskca_affine [][4] = {
 #include "exa_wm_xy.g4b"
 #include "exa_wm_src_affine.g4b"
 #include "exa_wm_src_sample_argb.g4b"
@@ -358,7 +358,17 @@ static const uint32_t ps_kernel_static_maskca [][4] = {
 #include "exa_wm_write.g4b"
 };
 
-static const uint32_t ps_kernel_static_maskca_srcalpha [][4] = {
+static const uint32_t ps_kernel_static_maskca_projective [][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_projective.g4b"
+#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_mask_projective.g4b"
+#include "exa_wm_mask_sample_argb.g4b"
+#include "exa_wm_ca.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_static_maskca_srcalpha_affine [][4] = {
 #include "exa_wm_xy.g4b"
 #include "exa_wm_src_affine.g4b"
 #include "exa_wm_src_sample_a.g4b"
@@ -368,7 +378,17 @@ static const uint32_t ps_kernel_static_maskca_srcalpha [][4] = {
 #include "exa_wm_write.g4b"
 };
 
-static const uint32_t ps_kernel_static_masknoca [][4] = {
+static const uint32_t ps_kernel_static_maskca_srcalpha_projective [][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_projective.g4b"
+#include "exa_wm_src_sample_a.g4b"
+#include "exa_wm_mask_projective.g4b"
+#include "exa_wm_mask_sample_argb.g4b"
+#include "exa_wm_ca_srcalpha.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_static_masknoca_affine [][4] = {
 #include "exa_wm_xy.g4b"
 #include "exa_wm_src_affine.g4b"
 #include "exa_wm_src_sample_argb.g4b"
@@ -378,6 +398,16 @@ static const uint32_t ps_kernel_static_masknoca [][4] = {
 #include "exa_wm_write.g4b"
 };
 
+static const uint32_t ps_kernel_static_masknoca_projective [][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_projective.g4b"
+#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_mask_projective.g4b"
+#include "exa_wm_mask_sample_a.g4b"
+#include "exa_wm_noca.g4b"
+#include "exa_wm_write.g4b"
+};
+
 static uint32_t 
 i965_get_card_format(PicturePtr pPict)
 {
@@ -484,15 +514,22 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
 	if (pMaskPicture->componentAlpha && 
                 PICT_FORMAT_RGB(pMaskPicture->format)) {
             if (i965_blend_op[op].src_alpha) {
-                next_offset = ps_kernel_offset + 
-                    sizeof(ps_kernel_static_maskca_srcalpha);
+		if (is_affine)
+		    next_offset = ps_kernel_offset + sizeof(ps_kernel_static_maskca_srcalpha_affine);
+		else
+		    next_offset = ps_kernel_offset + sizeof(ps_kernel_static_maskca_srcalpha_projective);
             } else {
-                next_offset = ps_kernel_offset + 
-                    sizeof(ps_kernel_static_maskca);
+		if (is_affine)
+		    next_offset = ps_kernel_offset + sizeof(ps_kernel_static_maskca_affine);
+		else
+		    next_offset = ps_kernel_offset + sizeof(ps_kernel_static_maskca_projective);
             }
-        } else
-	    next_offset = ps_kernel_offset + 
-                          sizeof(ps_kernel_static_masknoca);
+        } else {
+	    if (is_affine)
+		next_offset = ps_kernel_offset + sizeof(ps_kernel_static_masknoca_affine);
+	    else
+		next_offset = ps_kernel_offset + sizeof(ps_kernel_static_masknoca_projective);
+	}
     } else {
 	if (is_affine)
 	    next_offset = ps_kernel_offset + sizeof (ps_kernel_static_nomask_affine);
@@ -865,22 +902,28 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     if (pMask) {
 	if (pMaskPicture->componentAlpha && 
                 PICT_FORMAT_RGB(pMaskPicture->format)) {
-            if (i965_blend_op[op].src_alpha) 
-                memcpy(ps_kernel, ps_kernel_static_maskca_srcalpha,
-                        sizeof (ps_kernel_static_maskca_srcalpha));
-            else
-                memcpy(ps_kernel, ps_kernel_static_maskca,
-                        sizeof (ps_kernel_static_maskca));
-        } else
-   	    memcpy(ps_kernel, ps_kernel_static_masknoca,
-		   sizeof (ps_kernel_static_masknoca));
+            if (i965_blend_op[op].src_alpha) {
+		if (is_affine)
+		    memcpy(ps_kernel, ps_kernel_static_maskca_srcalpha_affine, sizeof (ps_kernel_static_maskca_srcalpha_affine));
+		else
+                    memcpy(ps_kernel, ps_kernel_static_maskca_srcalpha_projective, sizeof (ps_kernel_static_maskca_srcalpha_projective));
+            } else {
+		if (is_affine)
+		    memcpy(ps_kernel, ps_kernel_static_maskca_affine, sizeof (ps_kernel_static_maskca_affine));
+		else
+		    memcpy(ps_kernel, ps_kernel_static_maskca_projective, sizeof (ps_kernel_static_maskca_projective));
+	    }
+        } else {
+	    if (is_affine)
+		memcpy(ps_kernel, ps_kernel_static_masknoca_affine, sizeof (ps_kernel_static_masknoca_affine));
+	    else
+		memcpy(ps_kernel, ps_kernel_static_masknoca_projective, sizeof (ps_kernel_static_masknoca_projective));
+	}
     } else {
 	if (is_affine)
-	    memcpy(ps_kernel, ps_kernel_static_nomask_affine,
-		   sizeof (ps_kernel_static_nomask_affine));
+	    memcpy(ps_kernel, ps_kernel_static_nomask_affine, sizeof (ps_kernel_static_nomask_affine));
 	else
-	    memcpy(ps_kernel, ps_kernel_static_nomask_projective,
-		   sizeof (ps_kernel_static_nomask_projective));
+	    memcpy(ps_kernel, ps_kernel_static_nomask_projective, sizeof (ps_kernel_static_nomask_projective));
     }
 
     wm_state = &wm_state_local;
commit bfd803e085e938866efb45c67a79facef78ec399
Author: Keith Packard <keithp at keithp.com>
Date:   Tue Apr 1 00:06:08 2008 -0700

    Shrink WM thread to 32 registers and 1024 scratch space.
    
    Saving registers means we can run more in parallel.

diff --git a/src/exa_wm.g4i b/src/exa_wm.g4i
index 724ef2b..10e630e 100644
--- a/src/exa_wm.g4i
+++ b/src/exa_wm.g4i
@@ -57,7 +57,7 @@ define(`mask_dw_dy', `g6.4<0,1,0>F')
 define(`mask_wo',    `g6.12<0,1,0>F')
 
 /*
- * Local variables
+ * Local variables. Pairs must be aligned on even reg boundry
  */
 
 /* this holds the X dest coordinates */
@@ -71,14 +71,14 @@ define(`dst_y_0',   `dst_y')
 define(`dst_y_1',   `g11')
 
 /* When computing x * dn/dx, use this */
-define(`temp_x',    `g34')
+define(`temp_x',    `g30')
 define(`temp_x_0',  `temp_x')
-define(`temp_x_1',  `g35')
+define(`temp_x_1',  `g31')
 
 /* When computing y * dn/dy, use this */
-define(`temp_y',    `g32')
+define(`temp_y',    `g28')
 define(`temp_y_0',  temp_y)
-define(`temp_y_1',  `g33')
+define(`temp_y_1',  `g29')
 
 /* when loading x/y, use these to hold them in UW format */
 define(`temp_x_uw', temp_x)
@@ -90,33 +90,33 @@ define(`src_msg_ind',`1')
 define(`src_u',	    `m2')
 define(`src_v',	    `m4')
 define(`src_w',	    `g12')
-define(`src_w_0',   `g12')
+define(`src_w_0',   `src_w')
 define(`src_w_1',   `g13')
 
 define(`mask_msg',  `m7')
 define(`mask_msg_ind',`7')
 define(`mask_u',    `m8')
 define(`mask_v',    `m10')
-define(`mask_w',    `g14')
-define(`mask_w_0',  `g14')
-define(`mask_w_1',  `g15')
+define(`mask_w',    `src_w')
+define(`mask_w_0',  `src_w_0')
+define(`mask_w_1',  `src_w_1')
 
 /* sample src to these registers */
-define(`src_sample0',	`g16')
-define(`src_sample1',	`g17')
-define(`src_sample2',	`g18')
-define(`src_sample3',	`g19')
-define(`src_sample4',	`g20')
-define(`src_sample5',	`g21')
-define(`src_sample6',	`g22')
-define(`src_sample7',	`g23')
+define(`src_sample0',	`g14')
+define(`src_sample1',	`g15')
+define(`src_sample2',	`g16')
+define(`src_sample3',	`g17')
+define(`src_sample4',	`g18')
+define(`src_sample5',	`g19')
+define(`src_sample6',	`g20')
+define(`src_sample7',	`g21')
 
 /* sample mask to these registers */
-define(`mask_sample0',	`g24')
-define(`mask_sample1',	`g25')
-define(`mask_sample2',	`g26')
-define(`mask_sample3',	`g27')
-define(`mask_sample4',	`g28')
-define(`mask_sample5',	`g29')
-define(`mask_sample6',	`g30')
-define(`mask_sample7',	`g31')
+define(`mask_sample0',	`g22')
+define(`mask_sample1',	`g23')
+define(`mask_sample2',	`g24')
+define(`mask_sample3',	`g25')
+define(`mask_sample4',	`g26')
+define(`mask_sample5',	`g27')
+define(`mask_sample6',	`g28')
+define(`mask_sample7',	`g29')
diff --git a/src/exa_wm_ca.g4b b/src/exa_wm_ca.g4b
index 28bd6c6..372e8b2 100644
--- a/src/exa_wm_ca.g4b
+++ b/src/exa_wm_ca.g4b
@@ -1,4 +1,4 @@
+   { 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d02c0 },
    { 0x00802041, 0x220077bd, 0x008d0200, 0x008d0300 },
    { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0340 },
    { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 },
-   { 0x00802041, 0x22c077bd, 0x008d02c0, 0x008d03c0 },
diff --git a/src/exa_wm_ca_srcalpha.g4b b/src/exa_wm_ca_srcalpha.g4b
index 94f1516..963d676 100644
--- a/src/exa_wm_ca_srcalpha.g4b
+++ b/src/exa_wm_ca_srcalpha.g4b
@@ -1,4 +1,4 @@
-   { 0x00802041, 0x220077bd, 0x008d0300, 0x008d02c0 },
-   { 0x00802041, 0x224077bd, 0x008d0340, 0x008d02c0 },
-   { 0x00802041, 0x228077bd, 0x008d0380, 0x008d02c0 },
-   { 0x00802041, 0x22c077bd, 0x008d03c0, 0x008d02c0 },
+   { 0x00802041, 0x21c077bd, 0x008d02c0, 0x008d0280 },
+   { 0x00802041, 0x220077bd, 0x008d0300, 0x008d0280 },
+   { 0x00802041, 0x224077bd, 0x008d0340, 0x008d0280 },
+   { 0x00802041, 0x228077bd, 0x008d0380, 0x008d0280 },
diff --git a/src/exa_wm_mask_affine.g4b b/src/exa_wm_mask_affine.g4b
index 35dec6f..14a5451 100644
--- a/src/exa_wm_mask_affine.g4b
+++ b/src/exa_wm_mask_affine.g4b
@@ -1,8 +1,8 @@
-   { 0x00802041, 0x244077bd, 0x008d0100, 0x000000a0 },
-   { 0x00802041, 0x240077bd, 0x008d0140, 0x000000a4 },
-   { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
-   { 0x00802040, 0x210077be, 0x008d0440, 0x000000ac },
-   { 0x00802041, 0x244077bd, 0x008d0100, 0x000000b0 },
-   { 0x00802041, 0x240077bd, 0x008d0140, 0x000000b4 },
-   { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
-   { 0x00802040, 0x214077be, 0x008d0440, 0x000000bc },
+   { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000a0 },
+   { 0x00802041, 0x238077bd, 0x008d0140, 0x000000a4 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
+   { 0x00802040, 0x210077be, 0x008d03c0, 0x000000ac },
+   { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000b0 },
+   { 0x00802041, 0x238077bd, 0x008d0140, 0x000000b4 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
+   { 0x00802040, 0x214077be, 0x008d03c0, 0x000000bc },
diff --git a/src/exa_wm_mask_projective.g4b b/src/exa_wm_mask_projective.g4b
index 0684882..78cb9ae 100644
--- a/src/exa_wm_mask_projective.g4b
+++ b/src/exa_wm_mask_projective.g4b
@@ -1,16 +1,16 @@
-   { 0x00802041, 0x244077bd, 0x008d0100, 0x000000c0 },
-   { 0x00802041, 0x240077bd, 0x008d0140, 0x000000c4 },
-   { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
-   { 0x00802040, 0x244077bd, 0x008d0440, 0x000000cc },
-   { 0x00600031, 0x21c01fbd, 0x008d0440, 0x01110001 },
-   { 0x00600031, 0x21e01fbd, 0x008d0460, 0x01110001 },
-   { 0x00802041, 0x244077bd, 0x008d0100, 0x000000a0 },
-   { 0x00802041, 0x240077bd, 0x008d0140, 0x000000a4 },
-   { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
-   { 0x00802040, 0x244077bd, 0x008d0440, 0x000000ac },
-   { 0x00802041, 0x210077be, 0x008d0440, 0x008d01c0 },
-   { 0x00802041, 0x244077bd, 0x008d0100, 0x000000b0 },
-   { 0x00802041, 0x240077bd, 0x008d0140, 0x000000b4 },
-   { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
-   { 0x00802040, 0x244077bd, 0x008d0440, 0x000000bc },
-   { 0x00802041, 0x214077be, 0x008d0440, 0x008d01c0 },
+   { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000c0 },
+   { 0x00802041, 0x238077bd, 0x008d0140, 0x000000c4 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000cc },
+   { 0x00600031, 0x21801fbd, 0x008d03c0, 0x01110001 },
+   { 0x00600031, 0x21a01fbd, 0x008d03e0, 0x01110001 },
+   { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000a0 },
+   { 0x00802041, 0x238077bd, 0x008d0140, 0x000000a4 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000ac },
+   { 0x00802041, 0x210077be, 0x008d03c0, 0x008d0180 },
+   { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000b0 },
+   { 0x00802041, 0x238077bd, 0x008d0140, 0x000000b4 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000bc },
+   { 0x00802041, 0x214077be, 0x008d03c0, 0x008d0180 },
diff --git a/src/exa_wm_mask_sample_a.g4b b/src/exa_wm_mask_sample_a.g4b
index 01fc8d5..018bd36 100644
--- a/src/exa_wm_mask_sample_a.g4b
+++ b/src/exa_wm_mask_sample_a.g4b
@@ -1,2 +1,2 @@
    { 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
-   { 0x07800031, 0x23c01d29, 0x008d0000, 0x02520102 },
+   { 0x07800031, 0x23801d29, 0x008d0000, 0x02520102 },
diff --git a/src/exa_wm_mask_sample_argb.g4b b/src/exa_wm_mask_sample_argb.g4b
index 97d3803..b159cba 100644
--- a/src/exa_wm_mask_sample_argb.g4b
+++ b/src/exa_wm_mask_sample_argb.g4b
@@ -1,2 +1,2 @@
    { 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
-   { 0x07800031, 0x23001d29, 0x008d0000, 0x02580102 },
+   { 0x07800031, 0x22c01d29, 0x008d0000, 0x02580102 },
diff --git a/src/exa_wm_noca.g4b b/src/exa_wm_noca.g4b
index 1c9d948..1506334 100644
--- a/src/exa_wm_noca.g4b
+++ b/src/exa_wm_noca.g4b
@@ -1,4 +1,4 @@
-   { 0x00802041, 0x220077bd, 0x008d0200, 0x008d03c0 },
-   { 0x00802041, 0x224077bd, 0x008d0240, 0x008d03c0 },
-   { 0x00802041, 0x228077bd, 0x008d0280, 0x008d03c0 },
-   { 0x00802041, 0x22c077bd, 0x008d02c0, 0x008d03c0 },
+   { 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d0380 },
+   { 0x00802041, 0x220077bd, 0x008d0200, 0x008d0380 },
+   { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0380 },
+   { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 },
diff --git a/src/exa_wm_src_affine.g4b b/src/exa_wm_src_affine.g4b
index 9fef62c..d30da87 100644
--- a/src/exa_wm_src_affine.g4b
+++ b/src/exa_wm_src_affine.g4b
@@ -1,8 +1,8 @@
-   { 0x00802041, 0x244077bd, 0x008d0100, 0x00000060 },
-   { 0x00802041, 0x240077bd, 0x008d0140, 0x00000064 },
-   { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
-   { 0x00802040, 0x204077be, 0x008d0440, 0x0000006c },
-   { 0x00802041, 0x244077bd, 0x008d0100, 0x00000070 },
-   { 0x00802041, 0x240077bd, 0x008d0140, 0x00000074 },
-   { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
-   { 0x00802040, 0x208077be, 0x008d0440, 0x0000007c },
+   { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 },
+   { 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
+   { 0x00802040, 0x204077be, 0x008d03c0, 0x0000006c },
+   { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 },
+   { 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
+   { 0x00802040, 0x208077be, 0x008d03c0, 0x0000007c },
diff --git a/src/exa_wm_src_projective.g4b b/src/exa_wm_src_projective.g4b
index 2d20395..198bab3 100644
--- a/src/exa_wm_src_projective.g4b
+++ b/src/exa_wm_src_projective.g4b
@@ -1,16 +1,16 @@
-   { 0x00802041, 0x244077bd, 0x008d0100, 0x00000080 },
-   { 0x00802041, 0x240077bd, 0x008d0140, 0x00000084 },
-   { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
-   { 0x00802040, 0x244077bd, 0x008d0440, 0x0000008c },
-   { 0x00600031, 0x21801fbd, 0x008d0440, 0x01110001 },
-   { 0x00600031, 0x21a01fbd, 0x008d0460, 0x01110001 },
-   { 0x00802041, 0x244077bd, 0x008d0100, 0x00000060 },
-   { 0x00802041, 0x240077bd, 0x008d0140, 0x00000064 },
-   { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
-   { 0x00802040, 0x244077bd, 0x008d0440, 0x0000006c },
-   { 0x00802041, 0x204077be, 0x008d0440, 0x008d0180 },
-   { 0x00802041, 0x244077bd, 0x008d0100, 0x00000070 },
-   { 0x00802041, 0x240077bd, 0x008d0140, 0x00000074 },
-   { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
-   { 0x00802040, 0x244077bd, 0x008d0440, 0x0000007c },
-   { 0x00802041, 0x208077be, 0x008d0440, 0x008d0180 },
+   { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000080 },
+   { 0x00802041, 0x238077bd, 0x008d0140, 0x00000084 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000008c },
+   { 0x00600031, 0x21801fbd, 0x008d03c0, 0x01110001 },
+   { 0x00600031, 0x21a01fbd, 0x008d03e0, 0x01110001 },
+   { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 },
+   { 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000006c },
+   { 0x00802041, 0x204077be, 0x008d03c0, 0x008d0180 },
+   { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 },
+   { 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000007c },
+   { 0x00802041, 0x208077be, 0x008d03c0, 0x008d0180 },
diff --git a/src/exa_wm_src_sample_a.g4b b/src/exa_wm_src_sample_a.g4b
index 8505757..ce8650a 100644
--- a/src/exa_wm_src_sample_a.g4b
+++ b/src/exa_wm_src_sample_a.g4b
@@ -1,2 +1,2 @@
    { 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
-   { 0x01800031, 0x22c01d29, 0x008d0000, 0x02520001 },
+   { 0x01800031, 0x22801d29, 0x008d0000, 0x02520001 },
diff --git a/src/exa_wm_src_sample_argb.g4b b/src/exa_wm_src_sample_argb.g4b
index 1d4a730..c5b9274 100644
--- a/src/exa_wm_src_sample_argb.g4b
+++ b/src/exa_wm_src_sample_argb.g4b
@@ -1,2 +1,2 @@
    { 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
-   { 0x01800031, 0x22001d29, 0x008d0000, 0x02580001 },
+   { 0x01800031, 0x21c01d29, 0x008d0000, 0x02580001 },
diff --git a/src/exa_wm_write.g4b b/src/exa_wm_write.g4b
index b7421c2..785fe32 100644
--- a/src/exa_wm_write.g4b
+++ b/src/exa_wm_write.g4b
@@ -1,11 +1,11 @@
-   { 0x00600001, 0x204003be, 0x008d0200, 0x00000000 },
-   { 0x00600001, 0x206003be, 0x008d0240, 0x00000000 },
-   { 0x00600001, 0x208003be, 0x008d0280, 0x00000000 },
-   { 0x00600001, 0x20a003be, 0x008d02c0, 0x00000000 },
-   { 0x00600001, 0x20c003be, 0x008d0220, 0x00000000 },
-   { 0x00600001, 0x20e003be, 0x008d0260, 0x00000000 },
-   { 0x00600001, 0x210003be, 0x008d02a0, 0x00000000 },
-   { 0x00600001, 0x212003be, 0x008d02e0, 0x00000000 },
+   { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 },
+   { 0x00600001, 0x206003be, 0x008d0200, 0x00000000 },
+   { 0x00600001, 0x208003be, 0x008d0240, 0x00000000 },
+   { 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 },
+   { 0x00600001, 0x20c003be, 0x008d01e0, 0x00000000 },
+   { 0x00600001, 0x20e003be, 0x008d0220, 0x00000000 },
+   { 0x00600001, 0x210003be, 0x008d0260, 0x00000000 },
+   { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 },
    { 0x00600001, 0x20200022, 0x008d0020, 0x00000000 },
    { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
    { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/exa_wm_xy.g4b b/src/exa_wm_xy.g4b
index c5620cd..327fc29 100644
--- a/src/exa_wm_xy.g4b
+++ b/src/exa_wm_xy.g4b
@@ -1,4 +1,4 @@
-   { 0x00800040, 0x24406d29, 0x00480028, 0x10101010 },
-   { 0x00800040, 0x24006d29, 0x0048002a, 0x11001100 },
-   { 0x00802040, 0x2100753d, 0x008d0440, 0x00004020 },
-   { 0x00802040, 0x2140753d, 0x008d0400, 0x00004024 },
+   { 0x00800040, 0x23c06d29, 0x00480028, 0x10101010 },
+   { 0x00800040, 0x23806d29, 0x0048002a, 0x11001100 },
+   { 0x00802040, 0x2100753d, 0x008d03c0, 0x00004020 },
+   { 0x00802040, 0x2140753d, 0x008d0380, 0x00004024 },
diff --git a/src/i965_render.c b/src/i965_render.c
index c2260eb..dc5bd5e 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -329,10 +329,10 @@ static const uint32_t sf_kernel_static_mask[][4] = {
 };
 
 /* ps kernels */
-#define PS_KERNEL_NUM_GRF   48
+#define PS_KERNEL_NUM_GRF   32
 #define PS_MAX_THREADS	    32
-#define PS_SCRATCH_SPACE    2048
-#define PS_SCRATCH_SPACE_LOG	1   /* log2 (PS_SCRATCH_SPACE) - 10  (1024 is 0, 2048 is 1) */
+#define PS_SCRATCH_SPACE    1024
+#define PS_SCRATCH_SPACE_LOG	0   /* log2 (PS_SCRATCH_SPACE) - 10  (1024 is 0, 2048 is 1) */
 
 static const uint32_t ps_kernel_static_nomask_affine [][4] = {
 #include "exa_wm_xy.g4b"
commit a6492661ae07310128eb73c3ef037c42ce7ab184
Author: Keith Packard <keithp at keithp.com>
Date:   Mon Mar 31 23:50:20 2008 -0700

    Fix composite with mask using new compositing thread code
    
    Clean up register allocation to never overlap
    Always write 4 values for each texture vertex.

diff --git a/src/Makefile.am b/src/Makefile.am
index 81d9596..9b5d653 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -131,18 +131,14 @@ INTEL_G4A =				\
 	packed_yuv_wm.g4a		\
 	exa_sf.g4a 			\
 	exa_sf_mask.g4a 		\
-	exa_sf_rotation.g4a		\
-	exa_wm_maskca.g4a 		\
-	exa_wm_maskca_srcalpha.g4a 	\
-	exa_wm_masknoca.g4a 		\
-	exa_wm_nomask.g4a		\
-	exa_wm_rotation.g4a		\
 	exa_wm_src_affine.g4a 		\
 	exa_wm_src_projective.g4a 	\
-	exa_wm_src_sample.g4a 		\
+	exa_wm_src_sample_argb.g4a 	\
+	exa_wm_src_sample_a.g4a 	\
 	exa_wm_mask_affine.g4a 		\
 	exa_wm_mask_projective.g4a 	\
-	exa_wm_mask_sample.g4a 		\
+	exa_wm_mask_sample_argb.g4a 	\
+	exa_wm_mask_sample_a.g4a 	\
 	exa_wm_noca.g4a			\
 	exa_wm_ca.g4a			\
 	exa_wm_ca_srcalpha.g4a		\
@@ -153,29 +149,21 @@ INTEL_G4I =				\
 	exa_wm.g4i			\
 	exa_wm_affine.g4i		\
 	exa_wm_projective.g4i
+	
 
 INTEL_G4B = 				\
 	packed_yuv_sf.g4b		\
-	packed_yuv_wm.g4b 		\
-	exa_sf_mask.g4b			\
+	packed_yuv_wm.g4b		\
 	exa_sf.g4b 			\
-	exa_sf_rotation.g4b		\
-	exa_wm_maskca.g4b		\
-	exa_wm_maskca_srcalpha.g4b	\
-	exa_wm_masknoca.g4b		\
-	exa_wm_nomask.g4b		\
-	exa_wm_rotation.g4b		\
-	exa_wm_maskca.g4b 		\
-	exa_wm_maskca_srcalpha.g4b 	\
-	exa_wm_masknoca.g4b 		\
-	exa_wm_nomask.g4b		\
-	exa_wm_rotation.g4b		\
+	exa_sf_mask.g4b 		\
 	exa_wm_src_affine.g4b 		\
 	exa_wm_src_projective.g4b 	\
-	exa_wm_src_sample.g4b 		\
+	exa_wm_src_sample_argb.g4b 	\
+	exa_wm_src_sample_a.g4b 	\
 	exa_wm_mask_affine.g4b 		\
 	exa_wm_mask_projective.g4b 	\
-	exa_wm_mask_sample.g4b 		\
+	exa_wm_mask_sample_argb.g4b 	\
+	exa_wm_mask_sample_a.g4b 	\
 	exa_wm_noca.g4b			\
 	exa_wm_ca.g4b			\
 	exa_wm_ca_srcalpha.g4b		\
@@ -194,8 +182,11 @@ if HAVE_GEN4ASM
 
 SUFFIXES = .g4a .g4b
 .g4a.g4b:
-	m4 -s $*.g4a > $*.g4m
-	intel-gen4asm -o $@ $*.g4m && rm $*.g4m
+	m4 -s $*.g4a > $*.g4m && intel-gen4asm -o $@ $*.g4m && rm $*.g4m
+
+$(INTEL_G4B): $(INTEL_G4I)
+
+BUILT_SOURCES= $(INTEL_G4B)
 
 endif
 
diff --git a/src/exa_wm.g4i b/src/exa_wm.g4i
index 1be40e7..724ef2b 100644
--- a/src/exa_wm.g4i
+++ b/src/exa_wm.g4i
@@ -71,47 +71,52 @@ define(`dst_y_0',   `dst_y')
 define(`dst_y_1',   `g11')
 
 /* When computing x * dn/dx, use this */
-define(`temp_x',    `g12')
+define(`temp_x',    `g34')
 define(`temp_x_0',  `temp_x')
-define(`temp_x_1',  `g13')
+define(`temp_x_1',  `g35')
 
 /* When computing y * dn/dy, use this */
-define(`temp_y',    `g14')
+define(`temp_y',    `g32')
 define(`temp_y_0',  temp_y)
-define(`temp_y_1',  `g15')
+define(`temp_y_1',  `g33')
 
 /* when loading x/y, use these to hold them in UW format */
 define(`temp_x_uw', temp_x)
 define(`temp_y_uw', temp_y)
 
 /* compute source and mask u/v to this pair to send to sampler */
-define(`src_u',	    `m1')
-define(`src_v',	    `m3')
-define(`mask_u',    src_u)
-define(`mask_v',    src_v)
-define(`src_w',	    `g16')
-define(`src_w_0',   src_w)
-define(`src_w_1',   `g17')
-define(`mask_w',    src_w)
-define(`mask_w_0',  src_w_0)
-define(`mask_w_1',  src_w_1)
+define(`src_msg',   `m1')
+define(`src_msg_ind',`1')
+define(`src_u',	    `m2')
+define(`src_v',	    `m4')
+define(`src_w',	    `g12')
+define(`src_w_0',   `g12')
+define(`src_w_1',   `g13')
+
+define(`mask_msg',  `m7')
+define(`mask_msg_ind',`7')
+define(`mask_u',    `m8')
+define(`mask_v',    `m10')
+define(`mask_w',    `g14')
+define(`mask_w_0',  `g14')
+define(`mask_w_1',  `g15')
 
 /* sample src to these registers */
-define(`src_sample0',	`g18')
-define(`src_sample1',	`g19')
-define(`src_sample2',	`g20')
-define(`src_sample3',	`g21')
-define(`src_sample4',	`g22')
-define(`src_sample5',	`g23')
-define(`src_sample6',	`g24')
-define(`src_sample7',	`g25')
+define(`src_sample0',	`g16')
+define(`src_sample1',	`g17')
+define(`src_sample2',	`g18')
+define(`src_sample3',	`g19')
+define(`src_sample4',	`g20')
+define(`src_sample5',	`g21')
+define(`src_sample6',	`g22')
+define(`src_sample7',	`g23')
 
 /* sample mask to these registers */
-define(`mask_sample0',	`g26')
-define(`mask_sample1',	`g27')
-define(`mask_sample2',	`g28')
-define(`mask_sample3',	`g29')
-define(`mask_sample4',	`g30')
-define(`mask_sample5',	`g31')
-define(`mask_sample6',	`g32')
-define(`mask_sample7',	`g33')
+define(`mask_sample0',	`g24')
+define(`mask_sample1',	`g25')
+define(`mask_sample2',	`g26')
+define(`mask_sample3',	`g27')
+define(`mask_sample4',	`g28')
+define(`mask_sample5',	`g29')
+define(`mask_sample6',	`g30')
+define(`mask_sample7',	`g31')
diff --git a/src/exa_wm_affine.g4i b/src/exa_wm_affine.g4i
index 8fc6450..e72656b 100644
--- a/src/exa_wm_affine.g4i
+++ b/src/exa_wm_affine.g4i
@@ -42,4 +42,3 @@ mul (16)	temp_x<1>F	dst_x<8,8,1>F	dv_dx		{ compr align1 };
 mul (16)	temp_y<1>F	dst_y<8,8,1>F	dv_dy		{ compr align1 };
 add (16)	temp_x<1>F	temp_x<8,8,1>F	temp_y<8,8,1>F	{ compr align1 };
 add (16)	v<1>F		temp_x<8,8,1>F	vo		{ compr align1 };
-
diff --git a/src/exa_wm_ca.g4b b/src/exa_wm_ca.g4b
index d0f3519..28bd6c6 100644
--- a/src/exa_wm_ca.g4b
+++ b/src/exa_wm_ca.g4b
@@ -1,4 +1,4 @@
+   { 0x00802041, 0x220077bd, 0x008d0200, 0x008d0300 },
    { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0340 },
    { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 },
    { 0x00802041, 0x22c077bd, 0x008d02c0, 0x008d03c0 },
-   { 0x00802041, 0x230077bd, 0x008d0300, 0x008d0400 },
diff --git a/src/exa_wm_ca_srcalpha.g4a b/src/exa_wm_ca_srcalpha.g4a
index a1be28e..e252e19 100644
--- a/src/exa_wm_ca_srcalpha.g4a
+++ b/src/exa_wm_ca_srcalpha.g4a
@@ -31,8 +31,7 @@
 
 include(`exa_wm.g4i')
 
-/* mul mask rgba channels to src */
-mul (16)    src_sample0<1>F src_sample0<8,8,1>F	src_sample6<8,8,1>F	{ compr align1 };
-mul (16)    src_sample2<1>F src_sample2<8,8,1>F	src_sample6<8,8,1>F	{ compr align1 };
-mul (16)    src_sample4<1>F src_sample4<8,8,1>F	src_sample6<8,8,1>F	{ compr align1 };
-mul (16)    src_sample6<1>F src_sample6<8,8,1>F	src_sample6<8,8,1>F	{ compr align1 };
+mul (16)    src_sample0<1>F mask_sample0<8,8,1>F    src_sample6<8,8,1>F	{ compr align1 };
+mul (16)    src_sample2<1>F mask_sample2<8,8,1>F    src_sample6<8,8,1>F	{ compr align1 };
+mul (16)    src_sample4<1>F mask_sample4<8,8,1>F    src_sample6<8,8,1>F	{ compr align1 };
+mul (16)    src_sample6<1>F mask_sample6<8,8,1>F    src_sample6<8,8,1>F	{ compr align1 };
diff --git a/src/exa_wm_ca_srcalpha.g4b b/src/exa_wm_ca_srcalpha.g4b
index 780e704..94f1516 100644
--- a/src/exa_wm_ca_srcalpha.g4b
+++ b/src/exa_wm_ca_srcalpha.g4b
@@ -1,4 +1,4 @@
-   { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0300 },
-   { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0300 },
-   { 0x00802041, 0x22c077bd, 0x008d02c0, 0x008d0300 },
-   { 0x00802041, 0x230077bd, 0x008d0300, 0x008d0300 },
+   { 0x00802041, 0x220077bd, 0x008d0300, 0x008d02c0 },
+   { 0x00802041, 0x224077bd, 0x008d0340, 0x008d02c0 },
+   { 0x00802041, 0x228077bd, 0x008d0380, 0x008d02c0 },
+   { 0x00802041, 0x22c077bd, 0x008d03c0, 0x008d02c0 },
diff --git a/src/exa_wm_mask_affine.g4a b/src/exa_wm_mask_affine.g4a
index 4c096cb..9c52d2f 100644
--- a/src/exa_wm_mask_affine.g4a
+++ b/src/exa_wm_mask_affine.g4a
@@ -26,12 +26,16 @@
  */
 
 include(`exa_wm.g4i')
+
 define(`du_dx',	`mask_du_dx')
 define(`du_dy',	`mask_du_dy')
 define(`uo',	`mask_uo')
+
 define(`dv_dx',	`mask_dv_dx')
 define(`dv_dy',	`mask_dv_dy')
 define(`vo',	`mask_vo')
+
 define(`u',	`mask_u')
 define(`v',	`mask_v')
+
 include(`exa_wm_affine.g4i')
diff --git a/src/exa_wm_mask_affine.g4b b/src/exa_wm_mask_affine.g4b
index 62b46e0..35dec6f 100644
--- a/src/exa_wm_mask_affine.g4b
+++ b/src/exa_wm_mask_affine.g4b
@@ -1,8 +1,8 @@
-   { 0x00802041, 0x218077bd, 0x008d0100, 0x00000090 },
-   { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000094 },
-   { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
-   { 0x00802040, 0x202077be, 0x008d0180, 0x0000009c },
-   { 0x00802041, 0x218077bd, 0x008d0100, 0x000000a0 },
-   { 0x00802041, 0x21c077bd, 0x008d0140, 0x000000a4 },
-   { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
-   { 0x00802040, 0x206077be, 0x008d0180, 0x000000ac },
+   { 0x00802041, 0x244077bd, 0x008d0100, 0x000000a0 },
+   { 0x00802041, 0x240077bd, 0x008d0140, 0x000000a4 },
+   { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
+   { 0x00802040, 0x210077be, 0x008d0440, 0x000000ac },
+   { 0x00802041, 0x244077bd, 0x008d0100, 0x000000b0 },
+   { 0x00802041, 0x240077bd, 0x008d0140, 0x000000b4 },
+   { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
+   { 0x00802040, 0x214077be, 0x008d0440, 0x000000bc },
diff --git a/src/exa_wm_mask_projective.g4a b/src/exa_wm_mask_projective.g4a
index 464f6c5..9acaace 100644
--- a/src/exa_wm_mask_projective.g4a
+++ b/src/exa_wm_mask_projective.g4a
@@ -42,6 +42,11 @@ define(`wo',	`mask_wo')
 define(`u',	`mask_u')
 define(`v',	`mask_v')
 define(`w',	`mask_w')
+
+define(`u_0',	`mask_u_0')
+define(`v_0',	`mask_v_0')
+define(`u_1',	`mask_u_1')
+define(`v_1',	`mask_v_1')
 define(`w_0',	`mask_w_0')
 define(`w_1',	`mask_w_1')
 
diff --git a/src/exa_wm_mask_projective.g4b b/src/exa_wm_mask_projective.g4b
index ac4faa3..0684882 100644
--- a/src/exa_wm_mask_projective.g4b
+++ b/src/exa_wm_mask_projective.g4b
@@ -1,16 +1,16 @@
-   { 0x00802041, 0x218077bd, 0x008d0100, 0x000000b0 },
-   { 0x00802041, 0x21c077bd, 0x008d0140, 0x000000b4 },
-   { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
-   { 0x00802040, 0x218077bd, 0x008d0180, 0x000000bc },
-   { 0x00600031, 0x22001fbd, 0x008d0180, 0x01110001 },
-   { 0x00600031, 0x22201fbd, 0x008d01a0, 0x01110001 },
-   { 0x00802041, 0x218077bd, 0x008d0100, 0x00000090 },
-   { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000094 },
-   { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
-   { 0x00802040, 0x218077bd, 0x008d0180, 0x0000009c },
-   { 0x00802041, 0x202077be, 0x008d0180, 0x008d0200 },
-   { 0x00802041, 0x218077bd, 0x008d0100, 0x000000a0 },
-   { 0x00802041, 0x21c077bd, 0x008d0140, 0x000000a4 },
-   { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
-   { 0x00802040, 0x218077bd, 0x008d0180, 0x000000ac },
-   { 0x00802041, 0x206077be, 0x008d0180, 0x008d0200 },
+   { 0x00802041, 0x244077bd, 0x008d0100, 0x000000c0 },
+   { 0x00802041, 0x240077bd, 0x008d0140, 0x000000c4 },
+   { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
+   { 0x00802040, 0x244077bd, 0x008d0440, 0x000000cc },
+   { 0x00600031, 0x21c01fbd, 0x008d0440, 0x01110001 },
+   { 0x00600031, 0x21e01fbd, 0x008d0460, 0x01110001 },
+   { 0x00802041, 0x244077bd, 0x008d0100, 0x000000a0 },
+   { 0x00802041, 0x240077bd, 0x008d0140, 0x000000a4 },
+   { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
+   { 0x00802040, 0x244077bd, 0x008d0440, 0x000000ac },
+   { 0x00802041, 0x210077be, 0x008d0440, 0x008d01c0 },
+   { 0x00802041, 0x244077bd, 0x008d0100, 0x000000b0 },
+   { 0x00802041, 0x240077bd, 0x008d0140, 0x000000b4 },
+   { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
+   { 0x00802040, 0x244077bd, 0x008d0440, 0x000000bc },
+   { 0x00802041, 0x214077be, 0x008d0440, 0x008d01c0 },
diff --git a/src/exa_wm_mask_sample.g4a b/src/exa_wm_mask_sample.g4a
deleted file mode 100644
index 45dc3c4..0000000
--- a/src/exa_wm_mask_sample.g4a
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright © 2006 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Wang Zhenyu <zhenyu.z.wang at intel.com>
- *    Keith Packard <keithp at keithp.com>
- */
-
-/* Sample the mask surface */
-
-include(`exa_wm.g4i')
-
-/* prepare sampler read back gX register, which would be written back to output */
-
-/* use simd16 sampler, param 0 is u, param 1 is v. */
-/* 'payload' loading, assuming tex coord start from g4 */
-
-/* m0 will be copied with g0, as it contains send desc */
-/* emit sampler 'send' cmd */
-send (16) 0			/* msg reg index */
-	mask_sample0<1>UW 	/* readback */
-	g0<8,8,1>UW		/* copy to msg start reg*/
-	sampler (1,0,F)		/* sampler message description, (binding_table,sampler_index,datatype)
-				/* here(src->dst) we should use src_sampler and src_surface */
-	mlen 5 rlen 8 { align1 };   /* required message len 5, readback len 8 */
-
-// mov (8)  mask_sample7<1>UD	mask_sample7<8,8,1>UD	    { align1 };  /* wait sampler return */
-
-/* if we set up read-back reg correctly, emit dataport write 'send' cmd with EOT */
-
diff --git a/src/exa_wm_mask_sample.g4b b/src/exa_wm_mask_sample.g4b
deleted file mode 100644
index 45f7ead..0000000
--- a/src/exa_wm_mask_sample.g4b
+++ /dev/null
@@ -1 +0,0 @@
-   { 0x00800031, 0x23401d29, 0x008d0000, 0x02580001 },
diff --git a/src/exa_wm_mask_sample_a.g4a b/src/exa_wm_mask_sample_a.g4a
new file mode 100644
index 0000000..c06611d
--- /dev/null
+++ b/src/exa_wm_mask_sample_a.g4a
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Wang Zhenyu <zhenyu.z.wang at intel.com>
+ *    Keith Packard <keithp at keithp.com>
+ */
+
+/* Sample the mask surface */
+
+include(`exa_wm.g4i')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+
+/* load only alpha */
+mov (1) g0.8<1>UD	0x00007000UD { align1 mask_disable };
+
+/* mask_msg will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) mask_msg_ind		/* msg reg index */
+	mask_sample6<1>UW 	/* readback */
+	g0<8,8,1>UW		/* copy to msg start reg*/
+	sampler (2,1,F)		/* sampler message description, (binding_table,sampler_index,datatype)
+				/* here(src->dst) we should use src_sampler and src_surface */
+	mlen 5 rlen 2 { align1 };   /* required message len 5, readback len 8 */
+
diff --git a/src/exa_wm_mask_sample_a.g4b b/src/exa_wm_mask_sample_a.g4b
new file mode 100644
index 0000000..01fc8d5
--- /dev/null
+++ b/src/exa_wm_mask_sample_a.g4b
@@ -0,0 +1,2 @@
+   { 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
+   { 0x07800031, 0x23c01d29, 0x008d0000, 0x02520102 },
diff --git a/src/exa_wm_mask_sample_argb.g4a b/src/exa_wm_mask_sample_argb.g4a
new file mode 100644
index 0000000..7f0815f
--- /dev/null
+++ b/src/exa_wm_mask_sample_argb.g4a
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Wang Zhenyu <zhenyu.z.wang at intel.com>
+ *    Keith Packard <keithp at keithp.com>
+ */
+
+/* Sample the mask surface */
+
+include(`exa_wm.g4i')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+
+/* load argb */
+mov (1) g0.8<1>UD	0x00000000UD { align1 mask_disable };
+
+/* mask_msg will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) mask_msg_ind		/* msg reg index */
+	mask_sample0<1>UW 	/* readback */
+	g0<8,8,1>UW		/* copy to msg start reg*/
+	sampler (2,1,F)		/* sampler message description, (binding_table,sampler_index,datatype)
+				/* here(src->dst) we should use src_sampler and src_surface */
+	mlen 5 rlen 8 { align1 };   /* required message len 5, readback len 8 */
+
diff --git a/src/exa_wm_mask_sample_argb.g4b b/src/exa_wm_mask_sample_argb.g4b
new file mode 100644
index 0000000..97d3803
--- /dev/null
+++ b/src/exa_wm_mask_sample_argb.g4b
@@ -0,0 +1,2 @@
+   { 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
+   { 0x07800031, 0x23001d29, 0x008d0000, 0x02580102 },
diff --git a/src/exa_wm_noca.g4b b/src/exa_wm_noca.g4b
index ba01d1a..1c9d948 100644
--- a/src/exa_wm_noca.g4b
+++ b/src/exa_wm_noca.g4b
@@ -1,4 +1,4 @@
-   { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0400 },
-   { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0400 },
-   { 0x00802041, 0x22c077bd, 0x008d02c0, 0x008d0400 },
-   { 0x00802041, 0x230077bd, 0x008d0300, 0x008d0400 },
+   { 0x00802041, 0x220077bd, 0x008d0200, 0x008d03c0 },
+   { 0x00802041, 0x224077bd, 0x008d0240, 0x008d03c0 },
+   { 0x00802041, 0x228077bd, 0x008d0280, 0x008d03c0 },
+   { 0x00802041, 0x22c077bd, 0x008d02c0, 0x008d03c0 },
diff --git a/src/exa_wm_nomask.g4a b/src/exa_wm_nomask.g4a
index 97426ec..eb535fe 100644
--- a/src/exa_wm_nomask.g4a
+++ b/src/exa_wm_nomask.g4a
@@ -119,7 +119,7 @@ mov (8) m8<1>F g17<8,8,1>F { align1 };
 mov (8) m9<1>F g19<8,8,1>F { align1 };
 
 /* m0, m1 are all direct passed by PS thread payload */
-mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable };
+mov (8) m1<1>UD g1<8,8,1>UD { align1 };
 
 /* write */
 send (16) 0 acc0<1>UW g0<8,8,1>UW write (
diff --git a/src/exa_wm_src_affine.g4a b/src/exa_wm_src_affine.g4a
index 3bf8717..3194b5a 100644
--- a/src/exa_wm_src_affine.g4a
+++ b/src/exa_wm_src_affine.g4a
@@ -30,12 +30,16 @@
  */
 
 include(`exa_wm.g4i')
+
 define(`du_dx',	`src_du_dx')
 define(`du_dy',	`src_du_dy')
 define(`uo',	`src_uo')
+
 define(`dv_dx',	`src_dv_dx')
 define(`dv_dy',	`src_dv_dy')
 define(`vo',	`src_vo')
+
 define(`u',	`src_u')
 define(`v',	`src_v')
+
 include(`exa_wm_affine.g4i')
diff --git a/src/exa_wm_src_affine.g4b b/src/exa_wm_src_affine.g4b
index f18ea1e..9fef62c 100644
--- a/src/exa_wm_src_affine.g4b
+++ b/src/exa_wm_src_affine.g4b
@@ -1,8 +1,8 @@
-   { 0x00802041, 0x218077bd, 0x008d0100, 0x00000060 },
-   { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000064 },
-   { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
-   { 0x00802040, 0x202077be, 0x008d0180, 0x0000006c },
-   { 0x00802041, 0x218077bd, 0x008d0100, 0x00000070 },
-   { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000074 },
-   { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
-   { 0x00802040, 0x206077be, 0x008d0180, 0x0000007c },
+   { 0x00802041, 0x244077bd, 0x008d0100, 0x00000060 },
+   { 0x00802041, 0x240077bd, 0x008d0140, 0x00000064 },
+   { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
+   { 0x00802040, 0x204077be, 0x008d0440, 0x0000006c },
+   { 0x00802041, 0x244077bd, 0x008d0100, 0x00000070 },
+   { 0x00802041, 0x240077bd, 0x008d0140, 0x00000074 },
+   { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
+   { 0x00802040, 0x208077be, 0x008d0440, 0x0000007c },
diff --git a/src/exa_wm_src_projective.g4a b/src/exa_wm_src_projective.g4a
index 6bd2d6a..16c9cd5 100644
--- a/src/exa_wm_src_projective.g4a
+++ b/src/exa_wm_src_projective.g4a
@@ -39,6 +39,10 @@ define(`wo',	`src_wo')
 define(`u',	`src_u')
 define(`v',	`src_v')
 define(`w',	`src_w')
+define(`u_0',	`src_u_0')
+define(`v_0',	`src_v_0')
+define(`u_1',	`src_u_1')
+define(`v_1',	`src_v_1')
 define(`w_0',	`src_w_0')
 define(`w_1',	`src_w_1')
 
diff --git a/src/exa_wm_src_projective.g4b b/src/exa_wm_src_projective.g4b
index 68bfc92..2d20395 100644
--- a/src/exa_wm_src_projective.g4b
+++ b/src/exa_wm_src_projective.g4b
@@ -1,16 +1,16 @@
-   { 0x00802041, 0x218077bd, 0x008d0100, 0x00000080 },
-   { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000084 },
-   { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
-   { 0x00802040, 0x218077bd, 0x008d0180, 0x0000008c },
-   { 0x00600031, 0x22001fbd, 0x008d0180, 0x01110001 },
-   { 0x00600031, 0x22201fbd, 0x008d01a0, 0x01110001 },
-   { 0x00802041, 0x218077bd, 0x008d0100, 0x00000060 },
-   { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000064 },
-   { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
-   { 0x00802040, 0x218077bd, 0x008d0180, 0x0000006c },
-   { 0x00802041, 0x202077be, 0x008d0180, 0x008d0200 },
-   { 0x00802041, 0x218077bd, 0x008d0100, 0x00000070 },
-   { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000074 },
-   { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
-   { 0x00802040, 0x218077bd, 0x008d0180, 0x0000007c },
-   { 0x00802041, 0x206077be, 0x008d0180, 0x008d0200 },
+   { 0x00802041, 0x244077bd, 0x008d0100, 0x00000080 },
+   { 0x00802041, 0x240077bd, 0x008d0140, 0x00000084 },
+   { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
+   { 0x00802040, 0x244077bd, 0x008d0440, 0x0000008c },
+   { 0x00600031, 0x21801fbd, 0x008d0440, 0x01110001 },
+   { 0x00600031, 0x21a01fbd, 0x008d0460, 0x01110001 },
+   { 0x00802041, 0x244077bd, 0x008d0100, 0x00000060 },
+   { 0x00802041, 0x240077bd, 0x008d0140, 0x00000064 },
+   { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
+   { 0x00802040, 0x244077bd, 0x008d0440, 0x0000006c },
+   { 0x00802041, 0x204077be, 0x008d0440, 0x008d0180 },
+   { 0x00802041, 0x244077bd, 0x008d0100, 0x00000070 },
+   { 0x00802041, 0x240077bd, 0x008d0140, 0x00000074 },
+   { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
+   { 0x00802040, 0x244077bd, 0x008d0440, 0x0000007c },
+   { 0x00802041, 0x208077be, 0x008d0440, 0x008d0180 },
diff --git a/src/exa_wm_src_sample.g4a b/src/exa_wm_src_sample.g4a
deleted file mode 100644
index 04cd3e3..0000000
--- a/src/exa_wm_src_sample.g4a
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright © 2006 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Wang Zhenyu <zhenyu.z.wang at intel.com>
- *    Keith Packard <keithp at keithp.com>
- */
-
-/* Sample the src surface */
-
-include(`exa_wm.g4i')
-
-/* prepare sampler read back gX register, which would be written back to output */
-
-/* use simd16 sampler, param 0 is u, param 1 is v. */
-/* 'payload' loading, assuming tex coord start from g4 */
-
-/* m0 will be copied with g0, as it contains send desc */
-/* emit sampler 'send' cmd */
-send (16) 0			/* msg reg index */
-	src_sample0<1>UW 	/* readback */
-	g0<8,8,1>UW		/* copy to msg start reg*/
-	sampler (1,0,F)		/* sampler message description, (binding_table,sampler_index,datatype)
-				/* here(src->dst) we should use src_sampler and src_surface */
-	mlen 5 rlen 8 { align1 };   /* required message len 5, readback len 8 */
-
-// mov (8)  src_sample7<1>UD	src_sample7<8,8,1>UD	    { align1 };  /* wait sampler return */
-
-/* if we set up read-back reg correctly, emit dataport write 'send' cmd with EOT */
-
diff --git a/src/exa_wm_src_sample.g4b b/src/exa_wm_src_sample.g4b
deleted file mode 100644
index 5ca33f5..0000000
--- a/src/exa_wm_src_sample.g4b
+++ /dev/null
@@ -1 +0,0 @@
-   { 0x00800031, 0x22401d29, 0x008d0000, 0x02580001 },
diff --git a/src/exa_wm_src_sample_a.g4a b/src/exa_wm_src_sample_a.g4a
new file mode 100644
index 0000000..803c358
--- /dev/null
+++ b/src/exa_wm_src_sample_a.g4a
@@ -0,0 +1,47 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Wang Zhenyu <zhenyu.z.wang at intel.com>
+ *    Keith Packard <keithp at keithp.com>
+ */
+
+/* Sample the src surface */
+
+include(`exa_wm.g4i')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+
+/* load alpha */
+mov (1) g0.8<1>UD	0x00007000UD { align1 mask_disable };
+
+/* src_msg will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) src_msg_ind		/* msg reg index */
+	src_sample6<1>UW 	/* readback */
+	g0<8,8,1>UW		/* copy to msg start reg*/
+	sampler (1,0,F)		/* sampler message description, (binding_table,sampler_index,datatype)
+				/* here(src->dst) we should use src_sampler and src_surface */
+	mlen 5 rlen 2 { align1 };   /* required message len 5, readback len 8 */
diff --git a/src/exa_wm_src_sample_a.g4b b/src/exa_wm_src_sample_a.g4b
new file mode 100644
index 0000000..8505757
--- /dev/null
+++ b/src/exa_wm_src_sample_a.g4b
@@ -0,0 +1,2 @@
+   { 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
+   { 0x01800031, 0x22c01d29, 0x008d0000, 0x02520001 },
diff --git a/src/exa_wm_src_sample_argb.g4a b/src/exa_wm_src_sample_argb.g4a
new file mode 100644
index 0000000..4fcf276
--- /dev/null
+++ b/src/exa_wm_src_sample_argb.g4a
@@ -0,0 +1,47 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Wang Zhenyu <zhenyu.z.wang at intel.com>
+ *    Keith Packard <keithp at keithp.com>
+ */
+
+/* Sample the src surface */
+
+include(`exa_wm.g4i')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+
+/* load argb */
+mov (1) g0.8<1>UD	0x00000000UD { align1 mask_disable };
+
+/* src_msg will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) src_msg_ind		/* msg reg index */
+	src_sample0<1>UW 	/* readback */
+	g0<8,8,1>UW		/* copy to msg start reg*/
+	sampler (1,0,F)		/* sampler message description, (binding_table,sampler_index,datatype)
+				/* here(src->dst) we should use src_sampler and src_surface */
+	mlen 5 rlen 8 { align1 };   /* required message len 5, readback len 8 */
diff --git a/src/exa_wm_src_sample_argb.g4b b/src/exa_wm_src_sample_argb.g4b
new file mode 100644
index 0000000..1d4a730
--- /dev/null
+++ b/src/exa_wm_src_sample_argb.g4b
@@ -0,0 +1,2 @@
+   { 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
+   { 0x01800031, 0x22001d29, 0x008d0000, 0x02580001 },
diff --git a/src/exa_wm_write.g4a b/src/exa_wm_write.g4a
index 9a821d7..5d3e6b1 100644
--- a/src/exa_wm_write.g4a
+++ b/src/exa_wm_write.g4a
@@ -31,9 +31,6 @@
 
 include(`exa_wm.g4i')
 
-/* m0, m1 are all direct passed by PS thread payload */
-mov (8) m1<1>F g1<8,8,1>F { align1 };
-
 /* prepare data in m2-m5 for subspan(1,0), m6-m9 for subspan(3,2), then it's ready to write */
 /* src_sample0 -> m2
    src_sample1 -> m6
@@ -55,7 +52,7 @@ mov (8) m8<1>F src_sample5<8,8,1>F { align1 };
 mov (8) m9<1>F src_sample7<8,8,1>F { align1 };
 
 /* m0, m1 are all direct passed by PS thread payload */
-mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable };
+mov (8) m1<1>UD g1<8,8,1>UD { align1 };
 
 /* write */
 send (16) 0 acc0<1>UW g0<8,8,1>UW write (
@@ -76,5 +73,4 @@ nop;
 nop;
 nop;
 nop;
-nop;
 
diff --git a/src/exa_wm_write.g4b b/src/exa_wm_write.g4b
index dd266a3..b7421c2 100644
--- a/src/exa_wm_write.g4b
+++ b/src/exa_wm_write.g4b
@@ -1,13 +1,12 @@
-   { 0x00600001, 0x202003be, 0x008d0020, 0x00000000 },
-   { 0x00600001, 0x204003be, 0x008d0240, 0x00000000 },
-   { 0x00600001, 0x206003be, 0x008d0280, 0x00000000 },
-   { 0x00600001, 0x208003be, 0x008d02c0, 0x00000000 },
-   { 0x00600001, 0x20a003be, 0x008d0300, 0x00000000 },
-   { 0x00600001, 0x20c003be, 0x008d0260, 0x00000000 },
-   { 0x00600001, 0x20e003be, 0x008d02a0, 0x00000000 },
-   { 0x00600001, 0x210003be, 0x008d02e0, 0x00000000 },
-   { 0x00600001, 0x212003be, 0x008d0320, 0x00000000 },
-   { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
+   { 0x00600001, 0x204003be, 0x008d0200, 0x00000000 },
+   { 0x00600001, 0x206003be, 0x008d0240, 0x00000000 },
+   { 0x00600001, 0x208003be, 0x008d0280, 0x00000000 },
+   { 0x00600001, 0x20a003be, 0x008d02c0, 0x00000000 },
+   { 0x00600001, 0x20c003be, 0x008d0220, 0x00000000 },
+   { 0x00600001, 0x20e003be, 0x008d0260, 0x00000000 },
+   { 0x00600001, 0x210003be, 0x008d02a0, 0x00000000 },
+   { 0x00600001, 0x212003be, 0x008d02e0, 0x00000000 },
+   { 0x00600001, 0x20200022, 0x008d0020, 0x00000000 },
    { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
    { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
    { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
@@ -17,4 +16,3 @@
    { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
    { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
    { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/exa_wm_xy.g4b b/src/exa_wm_xy.g4b
index 7784a3d..c5620cd 100644
--- a/src/exa_wm_xy.g4b
+++ b/src/exa_wm_xy.g4b
@@ -1,4 +1,4 @@
-   { 0x00800040, 0x21806d29, 0x00480028, 0x10101010 },
-   { 0x00800040, 0x21c06d29, 0x0048002a, 0x11001100 },
-   { 0x00802040, 0x2100753d, 0x008d0180, 0x00004020 },
-   { 0x00802040, 0x2140753d, 0x008d01c0, 0x00004024 },
+   { 0x00800040, 0x24406d29, 0x00480028, 0x10101010 },
+   { 0x00800040, 0x24006d29, 0x0048002a, 0x11001100 },
+   { 0x00802040, 0x2100753d, 0x008d0440, 0x00004020 },
+   { 0x00802040, 0x2140753d, 0x008d0400, 0x00004024 },
diff --git a/src/i810_reg.h b/src/i810_reg.h
index d799e77..834b948 100644
--- a/src/i810_reg.h
+++ b/src/i810_reg.h
@@ -2322,6 +2322,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define MI_FLUSH			(0x04<<23)
 #define MI_WRITE_DIRTY_STATE		(1<<4)
 #define MI_END_SCENE			(1<<3)
+#define MI_GLOBAL_SNAPSHOT_COUNT_RESET	(1<<3)
 #define MI_INHIBIT_RENDER_CACHE_FLUSH	(1<<2)
 #define MI_STATE_INSTRUCTION_CACHE_FLUSH (1<<1)
 #define MI_INVALIDATE_MAP_CACHE		(1<<0)
diff --git a/src/i965_render.c b/src/i965_render.c
index e348c2b..c2260eb 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -285,7 +285,7 @@ static int next_offset, total_state_size;
 static char *state_base;
 static int state_base_offset;
 static float *vb;
-static int vb_size = (6 * 4) * 4 ; /* 6 DWORDS per vertex - and mask*/
+static int vb_size = (2 + 3 + 3) * 3 * 4;   /* (dst, src, mask) 3 vertices, 4 bytes */
 
 static uint32_t src_blend, dst_blend;
 
@@ -318,7 +318,7 @@ static const uint32_t sip_kernel_static[][4] = {
  */
 
 #define SF_KERNEL_NUM_GRF  16
-#define SF_MAX_THREADS	   2
+#define SF_MAX_THREADS	   1
 
 static const uint32_t sf_kernel_static[][4] = {
 #include "exa_sf.g4b"
@@ -329,29 +329,31 @@ static const uint32_t sf_kernel_static_mask[][4] = {
 };
 
 /* ps kernels */
-#define PS_KERNEL_NUM_GRF   32
-#define PS_MAX_THREADS	   32
+#define PS_KERNEL_NUM_GRF   48
+#define PS_MAX_THREADS	    32
+#define PS_SCRATCH_SPACE    2048
+#define PS_SCRATCH_SPACE_LOG	1   /* log2 (PS_SCRATCH_SPACE) - 10  (1024 is 0, 2048 is 1) */
 
 static const uint32_t ps_kernel_static_nomask_affine [][4] = {
 #include "exa_wm_xy.g4b"
 #include "exa_wm_src_affine.g4b"
-#include "exa_wm_src_sample.g4b"
+#include "exa_wm_src_sample_argb.g4b"
 #include "exa_wm_write.g4b"
 };
 
 static const uint32_t ps_kernel_static_nomask_projective [][4] = {
 #include "exa_wm_xy.g4b"
 #include "exa_wm_src_projective.g4b"
-#include "exa_wm_src_sample.g4b"
+#include "exa_wm_src_sample_argb.g4b"
 #include "exa_wm_write.g4b"
 };
 
 static const uint32_t ps_kernel_static_maskca [][4] = {
 #include "exa_wm_xy.g4b"
 #include "exa_wm_src_affine.g4b"
-#include "exa_wm_src_sample.g4b"
+#include "exa_wm_src_sample_argb.g4b"
 #include "exa_wm_mask_affine.g4b"
-#include "exa_wm_mask_sample.g4b"
+#include "exa_wm_mask_sample_argb.g4b"
 #include "exa_wm_ca.g4b"
 #include "exa_wm_write.g4b"
 };
@@ -359,9 +361,9 @@ static const uint32_t ps_kernel_static_maskca [][4] = {
 static const uint32_t ps_kernel_static_maskca_srcalpha [][4] = {
 #include "exa_wm_xy.g4b"
 #include "exa_wm_src_affine.g4b"
-#include "exa_wm_src_sample.g4b"
+#include "exa_wm_src_sample_a.g4b"
 #include "exa_wm_mask_affine.g4b"
-#include "exa_wm_mask_sample.g4b"
+#include "exa_wm_mask_sample_argb.g4b"
 #include "exa_wm_ca_srcalpha.g4b"
 #include "exa_wm_write.g4b"
 };
@@ -369,9 +371,9 @@ static const uint32_t ps_kernel_static_maskca_srcalpha [][4] = {
 static const uint32_t ps_kernel_static_masknoca [][4] = {
 #include "exa_wm_xy.g4b"
 #include "exa_wm_src_affine.g4b"
-#include "exa_wm_src_sample.g4b"
+#include "exa_wm_src_sample_argb.g4b"
 #include "exa_wm_mask_affine.g4b"
-#include "exa_wm_mask_sample.g4b"
+#include "exa_wm_mask_sample_a.g4b"
 #include "exa_wm_noca.g4b"
 #include "exa_wm_write.g4b"
 };
@@ -432,21 +434,21 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
 
     pI830->transform[0] = pSrcPicture->transform;
     is_affine_src = i830_transform_is_affine (pI830->transform[0]);
-    is_affine_mask = i830_transform_is_affine (pI830->transform[1]);
-    is_affine = is_affine_src && is_affine_mask;
 
     if (!pMask) {
 	pI830->transform[1] = NULL;
 	pI830->scale_units[1][0] = -1;
 	pI830->scale_units[1][1] = -1;
+	is_affine_mask = TRUE;
     } else {
 	pI830->transform[1] = pMaskPicture->transform;
-	if (pI830->transform[1])
-	    I830FALLBACK("i965 mask transform not implemented!\n");
 	pI830->scale_units[1][0] = pMask->drawable.width;
 	pI830->scale_units[1][1] = pMask->drawable.height;
+	is_affine_mask = i830_transform_is_affine (pI830->transform[1]);
     }
 
+    is_affine = is_affine_src && is_affine_mask;
+
     /* setup 3d pipeline state */
 
     binding_table_entries = 2; /* default no mask */
@@ -463,7 +465,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     next_offset = wm_offset + sizeof(*wm_state);
 
     wm_scratch_offset = ALIGN(next_offset, 1024);
-    next_offset = wm_scratch_offset + 1024 * PS_MAX_THREADS;
+    next_offset = wm_scratch_offset + PS_SCRATCH_SPACE * PS_MAX_THREADS;
 
     cc_offset = ALIGN(next_offset, 32);
     next_offset = cc_offset + sizeof(*cc_state);
@@ -782,6 +784,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
 	    I830FALLBACK("Bad filter 0x%x\n", pMaskPicture->filter);
    	}
 
+	mask_sampler_state->ss0.default_color_mode = 0; /* GL mode */
    	if (!pMaskPicture->repeat) {
    	    mask_sampler_state->ss1.r_wrap_mode =
 		BRW_TEXCOORDMODE_CLAMP_BORDER;
@@ -885,7 +888,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     wm_state->thread0.kernel_start_pointer =
 	(state_base_offset + ps_kernel_offset) >> 6;
     wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
-    wm_state->thread1.single_program_flow = 0;
+    wm_state->thread1.single_program_flow = 1;
     if (!pMask)
 	wm_state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */
     else
@@ -893,7 +896,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
 
     wm_state->thread2.scratch_space_base_pointer = (state_base_offset +
 						    wm_scratch_offset)>>10;
-    wm_state->thread2.per_thread_scratch_space = 0;
+    wm_state->thread2.per_thread_scratch_space = PS_SCRATCH_SPACE_LOG; 
     wm_state->thread3.const_urb_entry_read_length = 0;
     wm_state->thread3.const_urb_entry_read_offset = 0;
     /* Each pair of attributes (src/mask coords) is one URB entry */
@@ -1044,12 +1047,12 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
 	if (is_affine)
 	{
 	    src_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
-	    w_component = BRW_VFCOMPONENT_NOSTORE;
+	    w_component = BRW_VFCOMPONENT_STORE_1_FLT;
 	}
 	else
 	{
 	    src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
-	    w_component = BRW_VFCOMPONENT_NOSTORE;
+	    w_component = BRW_VFCOMPONENT_STORE_SRC;
 	}
 	BEGIN_BATCH(pMask?12:10);
 	/* Set up the pointer to our (single) vertex buffer */
@@ -1083,7 +1086,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
 	OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC	<< VE1_VFCOMPONENT_0_SHIFT) |
 		  (BRW_VFCOMPONENT_STORE_SRC	<< VE1_VFCOMPONENT_1_SHIFT) |
 		  (w_component			<< VE1_VFCOMPONENT_2_SHIFT) |
-		  (BRW_VFCOMPONENT_NOSTORE	<< VE1_VFCOMPONENT_3_SHIFT) |
+		  (BRW_VFCOMPONENT_STORE_1_FLT	<< VE1_VFCOMPONENT_3_SHIFT) |
 		  ((4 + 4)			<< VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */
 	/* u1, v1, w1 */
    	if (pMask) {
@@ -1095,15 +1098,15 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
 	    OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC    << VE1_VFCOMPONENT_0_SHIFT) |
 		      (BRW_VFCOMPONENT_STORE_SRC    << VE1_VFCOMPONENT_1_SHIFT) |
 		      (w_component		    << VE1_VFCOMPONENT_2_SHIFT) |
-		      (BRW_VFCOMPONENT_NOSTORE	    << VE1_VFCOMPONENT_3_SHIFT) |
-		      ((4 + 2 + 4)		    << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */
+		      (BRW_VFCOMPONENT_STORE_1_FLT  << VE1_VFCOMPONENT_3_SHIFT) |
+		      ((4 + 4 + 4)		    << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */
    	}
 
 	ADVANCE_BATCH();
     }
 
 #ifdef I830DEBUG
-    ErrorF("try to sync to show any errors...");
+    ErrorF("try to sync to show any errors...\n");
     I830Sync(pScrn);
 #endif
     return TRUE;
@@ -1119,7 +1122,6 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
     Bool is_affine_src, is_affine_mask, is_affine;
     float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
     int i;
-    int per_vertex = 2; /* dst x/y */
 
     is_affine_src = i830_transform_is_affine (pI830->transform[0]);
     is_affine_mask = i830_transform_is_affine (pI830->transform[1]);
@@ -1139,7 +1141,6 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
 					      pI830->transform[0],
 					      &src_x[2], &src_y[2]))
 	    return;
-	per_vertex += 2;    /* src u/v */
     }
     else
     {
@@ -1158,14 +1159,13 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
 						 &src_x[2], &src_y[2],
 						 &src_w[2]))
 	    return;
-	per_vertex += 3;    /* src u/v/w */
     }
 
     if (pI830->scale_units[1][0] == -1 || pI830->scale_units[1][1] == -1) {
 	has_mask = FALSE;
     } else {
 	has_mask = TRUE;
-	if (is_affine_mask) {
+	if (is_affine) {
 	    if (!i830_get_transformed_coordinates(maskX, maskY,
 						  pI830->transform[1],
 						  &mask_x[0], &mask_y[0]))
@@ -1178,7 +1178,6 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
 						  pI830->transform[1],
 						  &mask_x[2], &mask_y[2]))
 		return;
-	    per_vertex += 2;	/* mask u/v */
 	} else {
 	    if (!i830_get_transformed_coordinates_3d(maskX, maskY,
 						     pI830->transform[1],
@@ -1195,10 +1194,17 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
 						     &mask_x[2], &mask_y[2],
 						     &mask_w[2]))
 		return;
-	    per_vertex += 3;	/* mask u/v/w */
 	}
     }
 
+    {
+	BEGIN_BATCH(2);
+	OUT_BATCH(MI_FLUSH |
+		  MI_STATE_INSTRUCTION_CACHE_FLUSH |
+		  BRW_MI_GLOBAL_SNAPSHOT_RESET);
+	OUT_BATCH(MI_NOOP);
+	ADVANCE_BATCH();
+    }
     /* Wait for any existing composite rectangles to land before we overwrite
      * the VB with the next one.
      */
@@ -1246,6 +1252,7 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
 	if (!is_affine)
 	    vb[i++] = mask_w[0];
     }
+    assert (i * 4 <= vb_size);
 
     {
       BEGIN_BATCH(6);
@@ -1262,7 +1269,7 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
       ADVANCE_BATCH();
     }
 #ifdef I830DEBUG
-    ErrorF("sync after 3dprimitive");
+    ErrorF("sync after 3dprimitive\n");
     I830Sync(pScrn);
 #endif
     /* we must be sure that the pipeline is flushed before next exa draw,
diff --git a/src/i965_video.c b/src/i965_video.c
index 41f56a9..1d2c3f5 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -78,7 +78,7 @@ static const uint32_t sip_kernel_static[][4] = {
 #define SF_MAX_THREADS	   1
 
 static const uint32_t sf_kernel_static[][4] = {
-#include "sf_prog.h"
+#include "packed_yuv_sf.g4b"
 };
 
 /*
@@ -94,7 +94,7 @@ static const uint32_t sf_kernel_static[][4] = {
 #define BRW_GRF_BLOCKS(nreg)	((nreg + 15) / 16 - 1)
 
 static const uint32_t ps_kernel_static[][4] = {
-#include "wm_prog.h"
+#include "packed_yuv_wm.g4b"
 };
 
 #define ALIGN(i,m)    (((i) + (m) - 1) & ~((m) - 1))
commit f8081178eb6fda0e405967cbacad532561619262
Author: Keith Packard <keithp at keithp.com>
Date:   Mon Mar 31 12:06:37 2008 -0700

    remove old shader source files

diff --git a/src/exa_sf_mask_prog.h b/src/exa_sf_mask_prog.h
deleted file mode 100644
index be0a77b..0000000
--- a/src/exa_sf_mask_prog.h
+++ /dev/null
@@ -1,15 +0,0 @@
-   { 0x00400031, 0x20c01fbd, 0x0069002c, 0x01110001 },
-   { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
-   { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d40a0 },
-   { 0x00600041, 0x202077be, 0x008d00e0, 0x000000c0 },
-   { 0x00600040, 0x20e077bd, 0x008d00a0, 0x008d4060 },
-   { 0x00600041, 0x204077be, 0x008d00e0, 0x000000c8 },
-   { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/exa_sf_prog.h b/src/exa_sf_prog.h
deleted file mode 100644
index 223c9c9..0000000
--- a/src/exa_sf_prog.h
+++ /dev/null
@@ -1,15 +0,0 @@
-   { 0x00400031, 0x20c01fbd, 0x0069002c, 0x01110001 },
-   { 0x00400001, 0x206003be, 0x00690060, 0x00000000 },
-   { 0x00400040, 0x20e077bd, 0x00690080, 0x006940a0 },
-   { 0x00400041, 0x202077be, 0x006900e0, 0x000000c0 },
-   { 0x00400040, 0x20e077bd, 0x006900a0, 0x00694060 },
-   { 0x00400041, 0x204077be, 0x006900e0, 0x000000c8 },
-   { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/exa_wm_maskca_prog.h b/src/exa_wm_maskca_prog.h
deleted file mode 100644
index d936412..0000000
--- a/src/exa_wm_maskca_prog.h
+++ /dev/null
@@ -1,95 +0,0 @@
-   { 0x00000001, 0x20c0013d, 0x00000028, 0x00000000 },
-   { 0x00000040, 0x20c40d3d, 0x00000028, 0x00000001 },
-   { 0x00000001, 0x20c8013d, 0x00000028, 0x00000000 },
-   { 0x00000040, 0x20cc0d3d, 0x00000028, 0x00000001 },
-   { 0x00000001, 0x2100013d, 0x0000002a, 0x00000000 },
-   { 0x00000001, 0x2104013d, 0x0000002a, 0x00000000 },
-   { 0x00000040, 0x21080d3d, 0x0000002a, 0x00000001 },
-   { 0x00000040, 0x210c0d3d, 0x0000002a, 0x00000001 },
-   { 0x00000001, 0x20d0013d, 0x0000002c, 0x00000000 },
-   { 0x00000040, 0x20d40d3d, 0x0000002c, 0x00000001 },
-   { 0x00000001, 0x20d8013d, 0x0000002c, 0x00000000 },
-   { 0x00000040, 0x20dc0d3d, 0x0000002c, 0x00000001 },
-   { 0x00000001, 0x2110013d, 0x0000002e, 0x00000000 },
-   { 0x00000001, 0x2114013d, 0x0000002e, 0x00000000 },
-   { 0x00000040, 0x21180d3d, 0x0000002e, 0x00000001 },
-   { 0x00000040, 0x211c0d3d, 0x0000002e, 0x00000001 },
-   { 0x00000001, 0x20e0013d, 0x00000030, 0x00000000 },
-   { 0x00000040, 0x20e40d3d, 0x00000030, 0x00000001 },
-   { 0x00000001, 0x20e8013d, 0x00000030, 0x00000000 },
-   { 0x00000040, 0x20ec0d3d, 0x00000030, 0x00000001 },
-   { 0x00000001, 0x2120013d, 0x00000032, 0x00000000 },
-   { 0x00000001, 0x2124013d, 0x00000032, 0x00000000 },
-   { 0x00000040, 0x21280d3d, 0x00000032, 0x00000001 },
-   { 0x00000040, 0x212c0d3d, 0x00000032, 0x00000001 },
-   { 0x00000001, 0x20f0013d, 0x00000034, 0x00000000 },
-   { 0x00000040, 0x20f40d3d, 0x00000034, 0x00000001 },
-   { 0x00000001, 0x20f8013d, 0x00000034, 0x00000000 },
-   { 0x00000040, 0x20fc0d3d, 0x00000034, 0x00000001 },
-   { 0x00000001, 0x2130013d, 0x00000036, 0x00000000 },
-   { 0x00000001, 0x2134013d, 0x00000036, 0x00000000 },
-   { 0x00000040, 0x21380d3d, 0x00000036, 0x00000001 },
-   { 0x00000040, 0x213c0d3d, 0x00000036, 0x00000001 },
-   { 0x00600040, 0x214077bd, 0x008d00c0, 0x00004020 },
-   { 0x00600040, 0x216077bd, 0x008d00e0, 0x00004020 },
-   { 0x00600041, 0x214077bd, 0x008d0140, 0x00000060 },
-   { 0x00600041, 0x216077bd, 0x008d0160, 0x00000060 },
-   { 0x00600040, 0x214077bd, 0x008d0140, 0x0000006c },
-   { 0x00600040, 0x216077bd, 0x008d0160, 0x0000006c },
-   { 0x00600040, 0x218077bd, 0x008d0100, 0x00004024 },
-   { 0x00600040, 0x21a077bd, 0x008d0120, 0x00004024 },
-   { 0x00600041, 0x218077bd, 0x008d0180, 0x00000064 },
-   { 0x00600041, 0x21a077bd, 0x008d01a0, 0x00000064 },
-   { 0x00600040, 0x218077bd, 0x008d0180, 0x0000007c },
-   { 0x00600040, 0x21a077bd, 0x008d01a0, 0x0000007c },
-   { 0x00600001, 0x202003be, 0x008d0140, 0x00000000 },
-   { 0x00600001, 0x204003be, 0x008d0160, 0x00000000 },
-   { 0x00600001, 0x206003be, 0x008d0180, 0x00000000 },
-   { 0x00600001, 0x208003be, 0x008d01a0, 0x00000000 },
-   { 0x00800031, 0x21c01d29, 0x008d0000, 0x02580001 },
-   { 0x00600001, 0x22a00021, 0x008d02a0, 0x00000000 },
-   { 0x00600040, 0x214077bd, 0x008d00c0, 0x00004020 },
-   { 0x00600040, 0x216077bd, 0x008d00e0, 0x00004020 },
-   { 0x00600041, 0x214077bd, 0x008d0140, 0x00000080 },
-   { 0x00600041, 0x216077bd, 0x008d0160, 0x00000080 },
-   { 0x00600040, 0x214077bd, 0x008d0140, 0x0000008c },
-   { 0x00600040, 0x216077bd, 0x008d0160, 0x0000008c },
-   { 0x00600040, 0x218077bd, 0x008d0100, 0x00004024 },
-   { 0x00600040, 0x21a077bd, 0x008d0120, 0x00004024 },
-   { 0x00600041, 0x218077bd, 0x008d0180, 0x00000084 },
-   { 0x00600041, 0x21a077bd, 0x008d01a0, 0x00000084 },
-   { 0x00600040, 0x218077bd, 0x008d0180, 0x0000009c },
-   { 0x00600040, 0x21a077bd, 0x008d01a0, 0x0000009c },
-   { 0x00600001, 0x202003be, 0x008d0140, 0x00000000 },
-   { 0x00600001, 0x204003be, 0x008d0160, 0x00000000 },
-   { 0x00600001, 0x206003be, 0x008d0180, 0x00000000 },
-   { 0x00600001, 0x208003be, 0x008d01a0, 0x00000000 },
-   { 0x00800031, 0x22c01d29, 0x008d0000, 0x02580102 },
-   { 0x00600001, 0x23a00021, 0x008d03a0, 0x00000000 },
-   { 0x00600041, 0x21c077bd, 0x008d01c0, 0x008d02c0 },
-   { 0x00600041, 0x21e077bd, 0x008d01e0, 0x008d02e0 },
-   { 0x00600041, 0x220077bd, 0x008d0200, 0x008d0300 },
-   { 0x00600041, 0x222077bd, 0x008d0220, 0x008d0320 },
-   { 0x00600041, 0x224077bd, 0x008d0240, 0x008d0340 },
-   { 0x00600041, 0x226077bd, 0x008d0260, 0x008d0360 },
-   { 0x00600041, 0x228077bd, 0x008d0280, 0x008d0380 },
-   { 0x00600041, 0x22a077bd, 0x008d02a0, 0x008d03a0 },
-   { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 },
-   { 0x00600001, 0x206003be, 0x008d0200, 0x00000000 },
-   { 0x00600001, 0x208003be, 0x008d0240, 0x00000000 },
-   { 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 },
-   { 0x00600001, 0x20c003be, 0x008d01e0, 0x00000000 },
-   { 0x00600001, 0x20e003be, 0x008d0220, 0x00000000 },
-   { 0x00600001, 0x210003be, 0x008d0260, 0x00000000 },
-   { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 },
-   { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
-   { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/exa_wm_maskca_srcalpha_prog.h b/src/exa_wm_maskca_srcalpha_prog.h
deleted file mode 100644
index d83b119..0000000
--- a/src/exa_wm_maskca_srcalpha_prog.h
+++ /dev/null
@@ -1,95 +0,0 @@
-   { 0x00000001, 0x20c0013d, 0x00000028, 0x00000000 },
-   { 0x00000040, 0x20c40d3d, 0x00000028, 0x00000001 },
-   { 0x00000001, 0x20c8013d, 0x00000028, 0x00000000 },
-   { 0x00000040, 0x20cc0d3d, 0x00000028, 0x00000001 },
-   { 0x00000001, 0x2100013d, 0x0000002a, 0x00000000 },
-   { 0x00000001, 0x2104013d, 0x0000002a, 0x00000000 },
-   { 0x00000040, 0x21080d3d, 0x0000002a, 0x00000001 },
-   { 0x00000040, 0x210c0d3d, 0x0000002a, 0x00000001 },
-   { 0x00000001, 0x20d0013d, 0x0000002c, 0x00000000 },
-   { 0x00000040, 0x20d40d3d, 0x0000002c, 0x00000001 },
-   { 0x00000001, 0x20d8013d, 0x0000002c, 0x00000000 },
-   { 0x00000040, 0x20dc0d3d, 0x0000002c, 0x00000001 },
-   { 0x00000001, 0x2110013d, 0x0000002e, 0x00000000 },
-   { 0x00000001, 0x2114013d, 0x0000002e, 0x00000000 },
-   { 0x00000040, 0x21180d3d, 0x0000002e, 0x00000001 },
-   { 0x00000040, 0x211c0d3d, 0x0000002e, 0x00000001 },
-   { 0x00000001, 0x20e0013d, 0x00000030, 0x00000000 },
-   { 0x00000040, 0x20e40d3d, 0x00000030, 0x00000001 },
-   { 0x00000001, 0x20e8013d, 0x00000030, 0x00000000 },
-   { 0x00000040, 0x20ec0d3d, 0x00000030, 0x00000001 },
-   { 0x00000001, 0x2120013d, 0x00000032, 0x00000000 },
-   { 0x00000001, 0x2124013d, 0x00000032, 0x00000000 },
-   { 0x00000040, 0x21280d3d, 0x00000032, 0x00000001 },
-   { 0x00000040, 0x212c0d3d, 0x00000032, 0x00000001 },
-   { 0x00000001, 0x20f0013d, 0x00000034, 0x00000000 },
-   { 0x00000040, 0x20f40d3d, 0x00000034, 0x00000001 },
-   { 0x00000001, 0x20f8013d, 0x00000034, 0x00000000 },
-   { 0x00000040, 0x20fc0d3d, 0x00000034, 0x00000001 },
-   { 0x00000001, 0x2130013d, 0x00000036, 0x00000000 },
-   { 0x00000001, 0x2134013d, 0x00000036, 0x00000000 },
-   { 0x00000040, 0x21380d3d, 0x00000036, 0x00000001 },
-   { 0x00000040, 0x213c0d3d, 0x00000036, 0x00000001 },
-   { 0x00600040, 0x214077bd, 0x008d00c0, 0x00004020 },
-   { 0x00600040, 0x216077bd, 0x008d00e0, 0x00004020 },
-   { 0x00600041, 0x214077bd, 0x008d0140, 0x00000060 },
-   { 0x00600041, 0x216077bd, 0x008d0160, 0x00000060 },
-   { 0x00600040, 0x214077bd, 0x008d0140, 0x0000006c },
-   { 0x00600040, 0x216077bd, 0x008d0160, 0x0000006c },
-   { 0x00600040, 0x218077bd, 0x008d0100, 0x00004024 },
-   { 0x00600040, 0x21a077bd, 0x008d0120, 0x00004024 },
-   { 0x00600041, 0x218077bd, 0x008d0180, 0x00000064 },
-   { 0x00600041, 0x21a077bd, 0x008d01a0, 0x00000064 },
-   { 0x00600040, 0x218077bd, 0x008d0180, 0x0000007c },
-   { 0x00600040, 0x21a077bd, 0x008d01a0, 0x0000007c },
-   { 0x00600001, 0x202003be, 0x008d0140, 0x00000000 },
-   { 0x00600001, 0x204003be, 0x008d0160, 0x00000000 },
-   { 0x00600001, 0x206003be, 0x008d0180, 0x00000000 },
-   { 0x00600001, 0x208003be, 0x008d01a0, 0x00000000 },
-   { 0x00800031, 0x21c01d29, 0x008d0000, 0x02580001 },
-   { 0x00600001, 0x22a00021, 0x008d02a0, 0x00000000 },
-   { 0x00600040, 0x214077bd, 0x008d00c0, 0x00004020 },
-   { 0x00600040, 0x216077bd, 0x008d00e0, 0x00004020 },
-   { 0x00600041, 0x214077bd, 0x008d0140, 0x00000080 },
-   { 0x00600041, 0x216077bd, 0x008d0160, 0x00000080 },
-   { 0x00600040, 0x214077bd, 0x008d0140, 0x0000008c },
-   { 0x00600040, 0x216077bd, 0x008d0160, 0x0000008c },
-   { 0x00600040, 0x218077bd, 0x008d0100, 0x00004024 },
-   { 0x00600040, 0x21a077bd, 0x008d0120, 0x00004024 },
-   { 0x00600041, 0x218077bd, 0x008d0180, 0x00000084 },
-   { 0x00600041, 0x21a077bd, 0x008d01a0, 0x00000084 },
-   { 0x00600040, 0x218077bd, 0x008d0180, 0x0000009c },
-   { 0x00600040, 0x21a077bd, 0x008d01a0, 0x0000009c },
-   { 0x00600001, 0x202003be, 0x008d0140, 0x00000000 },
-   { 0x00600001, 0x204003be, 0x008d0160, 0x00000000 },
-   { 0x00600001, 0x206003be, 0x008d0180, 0x00000000 },
-   { 0x00600001, 0x208003be, 0x008d01a0, 0x00000000 },
-   { 0x00800031, 0x22c01d29, 0x008d0000, 0x02580102 },
-   { 0x00600001, 0x23a00021, 0x008d03a0, 0x00000000 },
-   { 0x00600041, 0x21c077bd, 0x008d02c0, 0x008d0280 },
-   { 0x00600041, 0x21e077bd, 0x008d02e0, 0x008d02a0 },
-   { 0x00600041, 0x220077bd, 0x008d0300, 0x008d0280 },
-   { 0x00600041, 0x222077bd, 0x008d0320, 0x008d02a0 },
-   { 0x00600041, 0x224077bd, 0x008d0340, 0x008d0280 },
-   { 0x00600041, 0x226077bd, 0x008d0360, 0x008d02a0 },
-   { 0x00600041, 0x228077bd, 0x008d0380, 0x008d0280 },
-   { 0x00600041, 0x22a077bd, 0x008d03a0, 0x008d02a0 },
-   { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 },
-   { 0x00600001, 0x206003be, 0x008d0200, 0x00000000 },
-   { 0x00600001, 0x208003be, 0x008d0240, 0x00000000 },
-   { 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 },
-   { 0x00600001, 0x20c003be, 0x008d01e0, 0x00000000 },
-   { 0x00600001, 0x20e003be, 0x008d0220, 0x00000000 },
-   { 0x00600001, 0x210003be, 0x008d0260, 0x00000000 },
-   { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 },
-   { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
-   { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/exa_wm_masknoca_prog.h b/src/exa_wm_masknoca_prog.h
deleted file mode 100644
index 5fcf3b5..0000000
--- a/src/exa_wm_masknoca_prog.h
+++ /dev/null
@@ -1,95 +0,0 @@
-   { 0x00000001, 0x20c0013d, 0x00000028, 0x00000000 },
-   { 0x00000040, 0x20c40d3d, 0x00000028, 0x00000001 },
-   { 0x00000001, 0x20c8013d, 0x00000028, 0x00000000 },
-   { 0x00000040, 0x20cc0d3d, 0x00000028, 0x00000001 },
-   { 0x00000001, 0x2100013d, 0x0000002a, 0x00000000 },
-   { 0x00000001, 0x2104013d, 0x0000002a, 0x00000000 },
-   { 0x00000040, 0x21080d3d, 0x0000002a, 0x00000001 },
-   { 0x00000040, 0x210c0d3d, 0x0000002a, 0x00000001 },
-   { 0x00000001, 0x20d0013d, 0x0000002c, 0x00000000 },
-   { 0x00000040, 0x20d40d3d, 0x0000002c, 0x00000001 },
-   { 0x00000001, 0x20d8013d, 0x0000002c, 0x00000000 },
-   { 0x00000040, 0x20dc0d3d, 0x0000002c, 0x00000001 },
-   { 0x00000001, 0x2110013d, 0x0000002e, 0x00000000 },
-   { 0x00000001, 0x2114013d, 0x0000002e, 0x00000000 },
-   { 0x00000040, 0x21180d3d, 0x0000002e, 0x00000001 },
-   { 0x00000040, 0x211c0d3d, 0x0000002e, 0x00000001 },
-   { 0x00000001, 0x20e0013d, 0x00000030, 0x00000000 },
-   { 0x00000040, 0x20e40d3d, 0x00000030, 0x00000001 },
-   { 0x00000001, 0x20e8013d, 0x00000030, 0x00000000 },
-   { 0x00000040, 0x20ec0d3d, 0x00000030, 0x00000001 },
-   { 0x00000001, 0x2120013d, 0x00000032, 0x00000000 },
-   { 0x00000001, 0x2124013d, 0x00000032, 0x00000000 },
-   { 0x00000040, 0x21280d3d, 0x00000032, 0x00000001 },
-   { 0x00000040, 0x212c0d3d, 0x00000032, 0x00000001 },
-   { 0x00000001, 0x20f0013d, 0x00000034, 0x00000000 },
-   { 0x00000040, 0x20f40d3d, 0x00000034, 0x00000001 },
-   { 0x00000001, 0x20f8013d, 0x00000034, 0x00000000 },
-   { 0x00000040, 0x20fc0d3d, 0x00000034, 0x00000001 },
-   { 0x00000001, 0x2130013d, 0x00000036, 0x00000000 },
-   { 0x00000001, 0x2134013d, 0x00000036, 0x00000000 },
-   { 0x00000040, 0x21380d3d, 0x00000036, 0x00000001 },
-   { 0x00000040, 0x213c0d3d, 0x00000036, 0x00000001 },
-   { 0x00600040, 0x214077bd, 0x008d00c0, 0x00004020 },
-   { 0x00600040, 0x216077bd, 0x008d00e0, 0x00004020 },
-   { 0x00600041, 0x214077bd, 0x008d0140, 0x00000060 },
-   { 0x00600041, 0x216077bd, 0x008d0160, 0x00000060 },
-   { 0x00600040, 0x214077bd, 0x008d0140, 0x0000006c },
-   { 0x00600040, 0x216077bd, 0x008d0160, 0x0000006c },
-   { 0x00600040, 0x218077bd, 0x008d0100, 0x00004024 },
-   { 0x00600040, 0x21a077bd, 0x008d0120, 0x00004024 },
-   { 0x00600041, 0x218077bd, 0x008d0180, 0x00000064 },
-   { 0x00600041, 0x21a077bd, 0x008d01a0, 0x00000064 },
-   { 0x00600040, 0x218077bd, 0x008d0180, 0x0000007c },
-   { 0x00600040, 0x21a077bd, 0x008d01a0, 0x0000007c },
-   { 0x00600001, 0x202003be, 0x008d0140, 0x00000000 },
-   { 0x00600001, 0x204003be, 0x008d0160, 0x00000000 },
-   { 0x00600001, 0x206003be, 0x008d0180, 0x00000000 },
-   { 0x00600001, 0x208003be, 0x008d01a0, 0x00000000 },
-   { 0x00800031, 0x21c01d29, 0x008d0000, 0x02580001 },
-   { 0x00600001, 0x22a00021, 0x008d02a0, 0x00000000 },
-   { 0x00600040, 0x214077bd, 0x008d00c0, 0x00004020 },
-   { 0x00600040, 0x216077bd, 0x008d00e0, 0x00004020 },
-   { 0x00600041, 0x214077bd, 0x008d0140, 0x00000080 },
-   { 0x00600041, 0x216077bd, 0x008d0160, 0x00000080 },
-   { 0x00600040, 0x214077bd, 0x008d0140, 0x0000008c },
-   { 0x00600040, 0x216077bd, 0x008d0160, 0x0000008c },
-   { 0x00600040, 0x218077bd, 0x008d0100, 0x00004024 },
-   { 0x00600040, 0x21a077bd, 0x008d0120, 0x00004024 },
-   { 0x00600041, 0x218077bd, 0x008d0180, 0x00000084 },
-   { 0x00600041, 0x21a077bd, 0x008d01a0, 0x00000084 },
-   { 0x00600040, 0x218077bd, 0x008d0180, 0x0000009c },
-   { 0x00600040, 0x21a077bd, 0x008d01a0, 0x0000009c },
-   { 0x00600001, 0x202003be, 0x008d0140, 0x00000000 },
-   { 0x00600001, 0x204003be, 0x008d0160, 0x00000000 },
-   { 0x00600001, 0x206003be, 0x008d0180, 0x00000000 },
-   { 0x00600001, 0x208003be, 0x008d01a0, 0x00000000 },
-   { 0x00800031, 0x22c01d29, 0x008d0000, 0x02580102 },
-   { 0x00600001, 0x23a00021, 0x008d03a0, 0x00000000 },
-   { 0x00600041, 0x21c077bd, 0x008d01c0, 0x008d0380 },
-   { 0x00600041, 0x21e077bd, 0x008d01e0, 0x008d03a0 },
-   { 0x00600041, 0x220077bd, 0x008d0200, 0x008d0380 },
-   { 0x00600041, 0x222077bd, 0x008d0220, 0x008d03a0 },
-   { 0x00600041, 0x224077bd, 0x008d0240, 0x008d0380 },
-   { 0x00600041, 0x226077bd, 0x008d0260, 0x008d03a0 },
-   { 0x00600041, 0x228077bd, 0x008d0280, 0x008d0380 },
-   { 0x00600041, 0x22a077bd, 0x008d02a0, 0x008d03a0 },
-   { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 },
-   { 0x00600001, 0x206003be, 0x008d0200, 0x00000000 },
-   { 0x00600001, 0x208003be, 0x008d0240, 0x00000000 },
-   { 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 },
-   { 0x00600001, 0x20c003be, 0x008d01e0, 0x00000000 },
-   { 0x00600001, 0x20e003be, 0x008d0220, 0x00000000 },
-   { 0x00600001, 0x210003be, 0x008d0260, 0x00000000 },
-   { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 },
-   { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
-   { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/exa_wm_nomask_prog.h b/src/exa_wm_nomask_prog.h
deleted file mode 100644
index c73bdbc..0000000
--- a/src/exa_wm_nomask_prog.h
+++ /dev/null
@@ -1,34 +0,0 @@
-   { 0x00800040, 0x20806d29, 0x00480028, 0x10101010 },
-   { 0x00800040, 0x20c06d29, 0x0048002a, 0x11001100 },
-   { 0x00802040, 0x2180753d, 0x008d0080, 0x00004020 },
-   { 0x00802040, 0x2200753d, 0x008d00c0, 0x00004024 },
-   { 0x00802041, 0x210077bd, 0x008d0180, 0x00000060 },
-   { 0x00802041, 0x214077bd, 0x008d0200, 0x00000064 },
-   { 0x00802040, 0x210077bd, 0x008d0100, 0x008d0140 },
-   { 0x00802040, 0x202077be, 0x008d0100, 0x0000006c },
-   { 0x00802041, 0x210077bd, 0x008d0180, 0x00000070 },
-   { 0x00802041, 0x214077bd, 0x008d0200, 0x00000074 },
-   { 0x00802040, 0x210077bd, 0x008d0100, 0x008d0140 },
-   { 0x00802040, 0x206077be, 0x008d0100, 0x0000007c },
-   { 0x00800031, 0x21801d29, 0x008d0000, 0x02580001 },
-   { 0x00600001, 0x22600021, 0x008d0260, 0x00000000 },
-   { 0x00600001, 0x202003be, 0x008d0020, 0x00000000 },
-   { 0x00600001, 0x204003be, 0x008d0180, 0x00000000 },
-   { 0x00600001, 0x206003be, 0x008d01c0, 0x00000000 },
-   { 0x00600001, 0x208003be, 0x008d0200, 0x00000000 },
-   { 0x00600001, 0x20a003be, 0x008d0240, 0x00000000 },
-   { 0x00600001, 0x20c003be, 0x008d01a0, 0x00000000 },
-   { 0x00600001, 0x20e003be, 0x008d01e0, 0x00000000 },
-   { 0x00600001, 0x210003be, 0x008d0220, 0x00000000 },
-   { 0x00600001, 0x212003be, 0x008d0260, 0x00000000 },
-   { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
-   { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/sf_prog.h b/src/sf_prog.h
deleted file mode 100644
index 830d176..0000000
--- a/src/sf_prog.h
+++ /dev/null
@@ -1,17 +0,0 @@
-   { 0x00000031, 0x20c01fbd, 0x0000002c, 0x01110081 },
-   { 0x00000031, 0x20c41fbd, 0x00000034, 0x01110081 },
-   { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d4060 },
-   { 0x00000041, 0x20e077bd, 0x000000e0, 0x000000c0 },
-   { 0x00000041, 0x20e477bd, 0x000000e4, 0x000000c4 },
-   { 0x00600001, 0x202003be, 0x000000e0, 0x00000000 },
-   { 0x00600001, 0x204003be, 0x000000e4, 0x00000000 },
-   { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
-   { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/wm_prog.h b/src/wm_prog.h
deleted file mode 100644
index d72c651..0000000
--- a/src/wm_prog.h
+++ /dev/null
@@ -1,82 +0,0 @@
-   { 0x00000001, 0x2080013d, 0x00000028, 0x00000000 },
-   { 0x00000040, 0x20840d3d, 0x00000028, 0x00000001 },
-   { 0x00000001, 0x2088013d, 0x00000028, 0x00000000 },
-   { 0x00000040, 0x208c0d3d, 0x00000028, 0x00000001 },
-   { 0x00000001, 0x20c0013d, 0x0000002a, 0x00000000 },
-   { 0x00000001, 0x20c4013d, 0x0000002a, 0x00000000 },
-   { 0x00000040, 0x20c80d3d, 0x0000002a, 0x00000001 },
-   { 0x00000040, 0x20cc0d3d, 0x0000002a, 0x00000001 },
-   { 0x00000001, 0x2090013d, 0x0000002c, 0x00000000 },
-   { 0x00000040, 0x20940d3d, 0x0000002c, 0x00000001 },
-   { 0x00000001, 0x2098013d, 0x0000002c, 0x00000000 },
-   { 0x00000040, 0x209c0d3d, 0x0000002c, 0x00000001 },
-   { 0x00000001, 0x20d0013d, 0x0000002e, 0x00000000 },
-   { 0x00000001, 0x20d4013d, 0x0000002e, 0x00000000 },
-   { 0x00000040, 0x20d80d3d, 0x0000002e, 0x00000001 },
-   { 0x00000040, 0x20dc0d3d, 0x0000002e, 0x00000001 },
-   { 0x00000001, 0x20a0013d, 0x00000030, 0x00000000 },
-   { 0x00000040, 0x20a40d3d, 0x00000030, 0x00000001 },
-   { 0x00000001, 0x20a8013d, 0x00000030, 0x00000000 },
-   { 0x00000040, 0x20ac0d3d, 0x00000030, 0x00000001 },
-   { 0x00000001, 0x20e0013d, 0x00000032, 0x00000000 },
-   { 0x00000001, 0x20e4013d, 0x00000032, 0x00000000 },
-   { 0x00000040, 0x20e80d3d, 0x00000032, 0x00000001 },
-   { 0x00000040, 0x20ec0d3d, 0x00000032, 0x00000001 },
-   { 0x00000001, 0x20b0013d, 0x00000034, 0x00000000 },
-   { 0x00000040, 0x20b40d3d, 0x00000034, 0x00000001 },
-   { 0x00000001, 0x20b8013d, 0x00000034, 0x00000000 },
-   { 0x00000040, 0x20bc0d3d, 0x00000034, 0x00000001 },
-   { 0x00000001, 0x20f0013d, 0x00000036, 0x00000000 },
-   { 0x00000001, 0x20f4013d, 0x00000036, 0x00000000 },
-   { 0x00000040, 0x20f80d3d, 0x00000036, 0x00000001 },
-   { 0x00000040, 0x20fc0d3d, 0x00000036, 0x00000001 },
-   { 0x00600040, 0x208077bd, 0x008d0080, 0x00004020 },
-   { 0x00600040, 0x20a077bd, 0x008d00a0, 0x00004020 },
-   { 0x00600041, 0x208077bd, 0x008d0080, 0x00000060 },
-   { 0x00600041, 0x20a077bd, 0x008d00a0, 0x00000060 },
-   { 0x00600040, 0x208077bd, 0x008d0080, 0x0000006c },
-   { 0x00600040, 0x20a077bd, 0x008d00a0, 0x0000006c },
-   { 0x00600040, 0x20c077bd, 0x008d00c0, 0x00004024 },
-   { 0x00600040, 0x20e077bd, 0x008d00e0, 0x00004024 },
-   { 0x00600041, 0x20c077bd, 0x008d00c0, 0x00000074 },
-   { 0x00600041, 0x20e077bd, 0x008d00e0, 0x00000074 },
-   { 0x00600040, 0x20c077bd, 0x008d00c0, 0x0000007c },
-   { 0x00600040, 0x20e077bd, 0x008d00e0, 0x0000007c },
-   { 0x00600001, 0x202003be, 0x008d0080, 0x00000000 },
-   { 0x00600001, 0x204003be, 0x008d00a0, 0x00000000 },
-   { 0x00600001, 0x206003be, 0x008d00c0, 0x00000000 },
-   { 0x00600001, 0x208003be, 0x008d00e0, 0x00000000 },
-   { 0x00800031, 0x21801d29, 0x008d0000, 0x02580001 },
-   { 0x00600001, 0x22600129, 0x008d0260, 0x00000000 },
-   { 0x00600040, 0x21c07fbd, 0x008d01c0, 0xbd808081 },
-   { 0x00600040, 0x21807fbd, 0x008d0180, 0xbf008084 },
-   { 0x00600040, 0x22007fbd, 0x008d0200, 0xbf008084 },
-   { 0x00600041, 0x21c07fbd, 0x008d01c0, 0x3f94fdf4 },
-   { 0x00600041, 0x20007fbc, 0x008d0180, 0x3fcc49ba },
-   { 0x80600048, 0x20407fbe, 0x008d01c0, 0x3f800000 },
-   { 0x00600041, 0x20007fbc, 0x008d0180, 0xbf5020c5 },
-   { 0x00600048, 0x20007fbc, 0x008d0200, 0xbec8b439 },
-   { 0x80600048, 0x20607fbe, 0x008d01c0, 0x3f800000 },
-   { 0x00600041, 0x20007fbc, 0x008d0200, 0x40011687 },
-   { 0x80600048, 0x20807fbe, 0x008d01c0, 0x3f800000 },
-   { 0x00600040, 0x21e07fbd, 0x008d01e0, 0xbd808081 },
-   { 0x00600040, 0x21a07fbd, 0x008d01a0, 0xbf008084 },
-   { 0x00600040, 0x22207fbd, 0x008d0220, 0xbf008084 },
-   { 0x00600041, 0x21e07fbd, 0x008d01e0, 0x3f94fdf4 },
-   { 0x00600041, 0x20007fbc, 0x008d01a0, 0x3fcc49ba },
-   { 0x80600048, 0x20c07fbe, 0x008d01e0, 0x3f800000 },
-   { 0x00600041, 0x20007fbc, 0x008d01a0, 0xbf5020c5 },
-   { 0x00600048, 0x20007fbc, 0x008d0220, 0xbec8b439 },
-   { 0x80600048, 0x20e07fbe, 0x008d01e0, 0x3f800000 },
-   { 0x00600041, 0x20007fbc, 0x008d0220, 0x40011687 },
-   { 0x80600048, 0x21007fbe, 0x008d01e0, 0x3f800000 },
-   { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
-   { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
commit 879f8717b09f79156b218ee9cc2107700190d586
Author: Keith Packard <keithp at keithp.com>
Date:   Mon Mar 31 12:05:56 2008 -0700

    remove old monolithic shaders

diff --git a/src/exa_wm_maskca.g4b b/src/exa_wm_maskca.g4b
deleted file mode 100644
index d936412..0000000
--- a/src/exa_wm_maskca.g4b
+++ /dev/null
@@ -1,95 +0,0 @@
-   { 0x00000001, 0x20c0013d, 0x00000028, 0x00000000 },
-   { 0x00000040, 0x20c40d3d, 0x00000028, 0x00000001 },
-   { 0x00000001, 0x20c8013d, 0x00000028, 0x00000000 },
-   { 0x00000040, 0x20cc0d3d, 0x00000028, 0x00000001 },
-   { 0x00000001, 0x2100013d, 0x0000002a, 0x00000000 },
-   { 0x00000001, 0x2104013d, 0x0000002a, 0x00000000 },
-   { 0x00000040, 0x21080d3d, 0x0000002a, 0x00000001 },
-   { 0x00000040, 0x210c0d3d, 0x0000002a, 0x00000001 },
-   { 0x00000001, 0x20d0013d, 0x0000002c, 0x00000000 },
-   { 0x00000040, 0x20d40d3d, 0x0000002c, 0x00000001 },
-   { 0x00000001, 0x20d8013d, 0x0000002c, 0x00000000 },
-   { 0x00000040, 0x20dc0d3d, 0x0000002c, 0x00000001 },
-   { 0x00000001, 0x2110013d, 0x0000002e, 0x00000000 },
-   { 0x00000001, 0x2114013d, 0x0000002e, 0x00000000 },
-   { 0x00000040, 0x21180d3d, 0x0000002e, 0x00000001 },
-   { 0x00000040, 0x211c0d3d, 0x0000002e, 0x00000001 },
-   { 0x00000001, 0x20e0013d, 0x00000030, 0x00000000 },
-   { 0x00000040, 0x20e40d3d, 0x00000030, 0x00000001 },
-   { 0x00000001, 0x20e8013d, 0x00000030, 0x00000000 },
-   { 0x00000040, 0x20ec0d3d, 0x00000030, 0x00000001 },
-   { 0x00000001, 0x2120013d, 0x00000032, 0x00000000 },
-   { 0x00000001, 0x2124013d, 0x00000032, 0x00000000 },
-   { 0x00000040, 0x21280d3d, 0x00000032, 0x00000001 },
-   { 0x00000040, 0x212c0d3d, 0x00000032, 0x00000001 },
-   { 0x00000001, 0x20f0013d, 0x00000034, 0x00000000 },
-   { 0x00000040, 0x20f40d3d, 0x00000034, 0x00000001 },
-   { 0x00000001, 0x20f8013d, 0x00000034, 0x00000000 },
-   { 0x00000040, 0x20fc0d3d, 0x00000034, 0x00000001 },
-   { 0x00000001, 0x2130013d, 0x00000036, 0x00000000 },
-   { 0x00000001, 0x2134013d, 0x00000036, 0x00000000 },
-   { 0x00000040, 0x21380d3d, 0x00000036, 0x00000001 },
-   { 0x00000040, 0x213c0d3d, 0x00000036, 0x00000001 },
-   { 0x00600040, 0x214077bd, 0x008d00c0, 0x00004020 },
-   { 0x00600040, 0x216077bd, 0x008d00e0, 0x00004020 },
-   { 0x00600041, 0x214077bd, 0x008d0140, 0x00000060 },
-   { 0x00600041, 0x216077bd, 0x008d0160, 0x00000060 },
-   { 0x00600040, 0x214077bd, 0x008d0140, 0x0000006c },
-   { 0x00600040, 0x216077bd, 0x008d0160, 0x0000006c },
-   { 0x00600040, 0x218077bd, 0x008d0100, 0x00004024 },
-   { 0x00600040, 0x21a077bd, 0x008d0120, 0x00004024 },
-   { 0x00600041, 0x218077bd, 0x008d0180, 0x00000064 },
-   { 0x00600041, 0x21a077bd, 0x008d01a0, 0x00000064 },
-   { 0x00600040, 0x218077bd, 0x008d0180, 0x0000007c },
-   { 0x00600040, 0x21a077bd, 0x008d01a0, 0x0000007c },
-   { 0x00600001, 0x202003be, 0x008d0140, 0x00000000 },
-   { 0x00600001, 0x204003be, 0x008d0160, 0x00000000 },
-   { 0x00600001, 0x206003be, 0x008d0180, 0x00000000 },
-   { 0x00600001, 0x208003be, 0x008d01a0, 0x00000000 },
-   { 0x00800031, 0x21c01d29, 0x008d0000, 0x02580001 },
-   { 0x00600001, 0x22a00021, 0x008d02a0, 0x00000000 },
-   { 0x00600040, 0x214077bd, 0x008d00c0, 0x00004020 },
-   { 0x00600040, 0x216077bd, 0x008d00e0, 0x00004020 },
-   { 0x00600041, 0x214077bd, 0x008d0140, 0x00000080 },
-   { 0x00600041, 0x216077bd, 0x008d0160, 0x00000080 },
-   { 0x00600040, 0x214077bd, 0x008d0140, 0x0000008c },
-   { 0x00600040, 0x216077bd, 0x008d0160, 0x0000008c },
-   { 0x00600040, 0x218077bd, 0x008d0100, 0x00004024 },
-   { 0x00600040, 0x21a077bd, 0x008d0120, 0x00004024 },
-   { 0x00600041, 0x218077bd, 0x008d0180, 0x00000084 },
-   { 0x00600041, 0x21a077bd, 0x008d01a0, 0x00000084 },
-   { 0x00600040, 0x218077bd, 0x008d0180, 0x0000009c },
-   { 0x00600040, 0x21a077bd, 0x008d01a0, 0x0000009c },
-   { 0x00600001, 0x202003be, 0x008d0140, 0x00000000 },
-   { 0x00600001, 0x204003be, 0x008d0160, 0x00000000 },
-   { 0x00600001, 0x206003be, 0x008d0180, 0x00000000 },
-   { 0x00600001, 0x208003be, 0x008d01a0, 0x00000000 },
-   { 0x00800031, 0x22c01d29, 0x008d0000, 0x02580102 },
-   { 0x00600001, 0x23a00021, 0x008d03a0, 0x00000000 },
-   { 0x00600041, 0x21c077bd, 0x008d01c0, 0x008d02c0 },
-   { 0x00600041, 0x21e077bd, 0x008d01e0, 0x008d02e0 },
-   { 0x00600041, 0x220077bd, 0x008d0200, 0x008d0300 },
-   { 0x00600041, 0x222077bd, 0x008d0220, 0x008d0320 },
-   { 0x00600041, 0x224077bd, 0x008d0240, 0x008d0340 },
-   { 0x00600041, 0x226077bd, 0x008d0260, 0x008d0360 },
-   { 0x00600041, 0x228077bd, 0x008d0280, 0x008d0380 },
-   { 0x00600041, 0x22a077bd, 0x008d02a0, 0x008d03a0 },
-   { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 },
-   { 0x00600001, 0x206003be, 0x008d0200, 0x00000000 },
-   { 0x00600001, 0x208003be, 0x008d0240, 0x00000000 },
-   { 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 },
-   { 0x00600001, 0x20c003be, 0x008d01e0, 0x00000000 },
-   { 0x00600001, 0x20e003be, 0x008d0220, 0x00000000 },
-   { 0x00600001, 0x210003be, 0x008d0260, 0x00000000 },
-   { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 },
-   { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
-   { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/exa_wm_maskca_srcalpha.g4b b/src/exa_wm_maskca_srcalpha.g4b
deleted file mode 100644
index d83b119..0000000
--- a/src/exa_wm_maskca_srcalpha.g4b
+++ /dev/null
@@ -1,95 +0,0 @@
-   { 0x00000001, 0x20c0013d, 0x00000028, 0x00000000 },
-   { 0x00000040, 0x20c40d3d, 0x00000028, 0x00000001 },
-   { 0x00000001, 0x20c8013d, 0x00000028, 0x00000000 },
-   { 0x00000040, 0x20cc0d3d, 0x00000028, 0x00000001 },
-   { 0x00000001, 0x2100013d, 0x0000002a, 0x00000000 },
-   { 0x00000001, 0x2104013d, 0x0000002a, 0x00000000 },
-   { 0x00000040, 0x21080d3d, 0x0000002a, 0x00000001 },
-   { 0x00000040, 0x210c0d3d, 0x0000002a, 0x00000001 },
-   { 0x00000001, 0x20d0013d, 0x0000002c, 0x00000000 },
-   { 0x00000040, 0x20d40d3d, 0x0000002c, 0x00000001 },
-   { 0x00000001, 0x20d8013d, 0x0000002c, 0x00000000 },
-   { 0x00000040, 0x20dc0d3d, 0x0000002c, 0x00000001 },
-   { 0x00000001, 0x2110013d, 0x0000002e, 0x00000000 },
-   { 0x00000001, 0x2114013d, 0x0000002e, 0x00000000 },
-   { 0x00000040, 0x21180d3d, 0x0000002e, 0x00000001 },
-   { 0x00000040, 0x211c0d3d, 0x0000002e, 0x00000001 },
-   { 0x00000001, 0x20e0013d, 0x00000030, 0x00000000 },
-   { 0x00000040, 0x20e40d3d, 0x00000030, 0x00000001 },
-   { 0x00000001, 0x20e8013d, 0x00000030, 0x00000000 },
-   { 0x00000040, 0x20ec0d3d, 0x00000030, 0x00000001 },
-   { 0x00000001, 0x2120013d, 0x00000032, 0x00000000 },
-   { 0x00000001, 0x2124013d, 0x00000032, 0x00000000 },
-   { 0x00000040, 0x21280d3d, 0x00000032, 0x00000001 },
-   { 0x00000040, 0x212c0d3d, 0x00000032, 0x00000001 },
-   { 0x00000001, 0x20f0013d, 0x00000034, 0x00000000 },
-   { 0x00000040, 0x20f40d3d, 0x00000034, 0x00000001 },
-   { 0x00000001, 0x20f8013d, 0x00000034, 0x00000000 },
-   { 0x00000040, 0x20fc0d3d, 0x00000034, 0x00000001 },
-   { 0x00000001, 0x2130013d, 0x00000036, 0x00000000 },
-   { 0x00000001, 0x2134013d, 0x00000036, 0x00000000 },
-   { 0x00000040, 0x21380d3d, 0x00000036, 0x00000001 },
-   { 0x00000040, 0x213c0d3d, 0x00000036, 0x00000001 },
-   { 0x00600040, 0x214077bd, 0x008d00c0, 0x00004020 },
-   { 0x00600040, 0x216077bd, 0x008d00e0, 0x00004020 },
-   { 0x00600041, 0x214077bd, 0x008d0140, 0x00000060 },
-   { 0x00600041, 0x216077bd, 0x008d0160, 0x00000060 },
-   { 0x00600040, 0x214077bd, 0x008d0140, 0x0000006c },
-   { 0x00600040, 0x216077bd, 0x008d0160, 0x0000006c },
-   { 0x00600040, 0x218077bd, 0x008d0100, 0x00004024 },
-   { 0x00600040, 0x21a077bd, 0x008d0120, 0x00004024 },
-   { 0x00600041, 0x218077bd, 0x008d0180, 0x00000064 },
-   { 0x00600041, 0x21a077bd, 0x008d01a0, 0x00000064 },
-   { 0x00600040, 0x218077bd, 0x008d0180, 0x0000007c },
-   { 0x00600040, 0x21a077bd, 0x008d01a0, 0x0000007c },
-   { 0x00600001, 0x202003be, 0x008d0140, 0x00000000 },
-   { 0x00600001, 0x204003be, 0x008d0160, 0x00000000 },
-   { 0x00600001, 0x206003be, 0x008d0180, 0x00000000 },
-   { 0x00600001, 0x208003be, 0x008d01a0, 0x00000000 },
-   { 0x00800031, 0x21c01d29, 0x008d0000, 0x02580001 },
-   { 0x00600001, 0x22a00021, 0x008d02a0, 0x00000000 },
-   { 0x00600040, 0x214077bd, 0x008d00c0, 0x00004020 },
-   { 0x00600040, 0x216077bd, 0x008d00e0, 0x00004020 },
-   { 0x00600041, 0x214077bd, 0x008d0140, 0x00000080 },
-   { 0x00600041, 0x216077bd, 0x008d0160, 0x00000080 },
-   { 0x00600040, 0x214077bd, 0x008d0140, 0x0000008c },
-   { 0x00600040, 0x216077bd, 0x008d0160, 0x0000008c },
-   { 0x00600040, 0x218077bd, 0x008d0100, 0x00004024 },
-   { 0x00600040, 0x21a077bd, 0x008d0120, 0x00004024 },
-   { 0x00600041, 0x218077bd, 0x008d0180, 0x00000084 },
-   { 0x00600041, 0x21a077bd, 0x008d01a0, 0x00000084 },
-   { 0x00600040, 0x218077bd, 0x008d0180, 0x0000009c },
-   { 0x00600040, 0x21a077bd, 0x008d01a0, 0x0000009c },
-   { 0x00600001, 0x202003be, 0x008d0140, 0x00000000 },
-   { 0x00600001, 0x204003be, 0x008d0160, 0x00000000 },
-   { 0x00600001, 0x206003be, 0x008d0180, 0x00000000 },
-   { 0x00600001, 0x208003be, 0x008d01a0, 0x00000000 },
-   { 0x00800031, 0x22c01d29, 0x008d0000, 0x02580102 },
-   { 0x00600001, 0x23a00021, 0x008d03a0, 0x00000000 },
-   { 0x00600041, 0x21c077bd, 0x008d02c0, 0x008d0280 },
-   { 0x00600041, 0x21e077bd, 0x008d02e0, 0x008d02a0 },
-   { 0x00600041, 0x220077bd, 0x008d0300, 0x008d0280 },
-   { 0x00600041, 0x222077bd, 0x008d0320, 0x008d02a0 },
-   { 0x00600041, 0x224077bd, 0x008d0340, 0x008d0280 },
-   { 0x00600041, 0x226077bd, 0x008d0360, 0x008d02a0 },
-   { 0x00600041, 0x228077bd, 0x008d0380, 0x008d0280 },
-   { 0x00600041, 0x22a077bd, 0x008d03a0, 0x008d02a0 },
-   { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 },
-   { 0x00600001, 0x206003be, 0x008d0200, 0x00000000 },
-   { 0x00600001, 0x208003be, 0x008d0240, 0x00000000 },
-   { 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 },
-   { 0x00600001, 0x20c003be, 0x008d01e0, 0x00000000 },
-   { 0x00600001, 0x20e003be, 0x008d0220, 0x00000000 },
-   { 0x00600001, 0x210003be, 0x008d0260, 0x00000000 },
-   { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 },
-   { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
-   { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/exa_wm_masknoca.g4b b/src/exa_wm_masknoca.g4b
deleted file mode 100644
index 5fcf3b5..0000000
--- a/src/exa_wm_masknoca.g4b
+++ /dev/null
@@ -1,95 +0,0 @@
-   { 0x00000001, 0x20c0013d, 0x00000028, 0x00000000 },
-   { 0x00000040, 0x20c40d3d, 0x00000028, 0x00000001 },
-   { 0x00000001, 0x20c8013d, 0x00000028, 0x00000000 },
-   { 0x00000040, 0x20cc0d3d, 0x00000028, 0x00000001 },
-   { 0x00000001, 0x2100013d, 0x0000002a, 0x00000000 },
-   { 0x00000001, 0x2104013d, 0x0000002a, 0x00000000 },
-   { 0x00000040, 0x21080d3d, 0x0000002a, 0x00000001 },
-   { 0x00000040, 0x210c0d3d, 0x0000002a, 0x00000001 },
-   { 0x00000001, 0x20d0013d, 0x0000002c, 0x00000000 },
-   { 0x00000040, 0x20d40d3d, 0x0000002c, 0x00000001 },
-   { 0x00000001, 0x20d8013d, 0x0000002c, 0x00000000 },
-   { 0x00000040, 0x20dc0d3d, 0x0000002c, 0x00000001 },
-   { 0x00000001, 0x2110013d, 0x0000002e, 0x00000000 },
-   { 0x00000001, 0x2114013d, 0x0000002e, 0x00000000 },
-   { 0x00000040, 0x21180d3d, 0x0000002e, 0x00000001 },
-   { 0x00000040, 0x211c0d3d, 0x0000002e, 0x00000001 },
-   { 0x00000001, 0x20e0013d, 0x00000030, 0x00000000 },
-   { 0x00000040, 0x20e40d3d, 0x00000030, 0x00000001 },
-   { 0x00000001, 0x20e8013d, 0x00000030, 0x00000000 },
-   { 0x00000040, 0x20ec0d3d, 0x00000030, 0x00000001 },
-   { 0x00000001, 0x2120013d, 0x00000032, 0x00000000 },
-   { 0x00000001, 0x2124013d, 0x00000032, 0x00000000 },
-   { 0x00000040, 0x21280d3d, 0x00000032, 0x00000001 },
-   { 0x00000040, 0x212c0d3d, 0x00000032, 0x00000001 },
-   { 0x00000001, 0x20f0013d, 0x00000034, 0x00000000 },
-   { 0x00000040, 0x20f40d3d, 0x00000034, 0x00000001 },
-   { 0x00000001, 0x20f8013d, 0x00000034, 0x00000000 },
-   { 0x00000040, 0x20fc0d3d, 0x00000034, 0x00000001 },
-   { 0x00000001, 0x2130013d, 0x00000036, 0x00000000 },
-   { 0x00000001, 0x2134013d, 0x00000036, 0x00000000 },
-   { 0x00000040, 0x21380d3d, 0x00000036, 0x00000001 },
-   { 0x00000040, 0x213c0d3d, 0x00000036, 0x00000001 },
-   { 0x00600040, 0x214077bd, 0x008d00c0, 0x00004020 },
-   { 0x00600040, 0x216077bd, 0x008d00e0, 0x00004020 },
-   { 0x00600041, 0x214077bd, 0x008d0140, 0x00000060 },
-   { 0x00600041, 0x216077bd, 0x008d0160, 0x00000060 },
-   { 0x00600040, 0x214077bd, 0x008d0140, 0x0000006c },
-   { 0x00600040, 0x216077bd, 0x008d0160, 0x0000006c },
-   { 0x00600040, 0x218077bd, 0x008d0100, 0x00004024 },
-   { 0x00600040, 0x21a077bd, 0x008d0120, 0x00004024 },
-   { 0x00600041, 0x218077bd, 0x008d0180, 0x00000064 },
-   { 0x00600041, 0x21a077bd, 0x008d01a0, 0x00000064 },
-   { 0x00600040, 0x218077bd, 0x008d0180, 0x0000007c },
-   { 0x00600040, 0x21a077bd, 0x008d01a0, 0x0000007c },
-   { 0x00600001, 0x202003be, 0x008d0140, 0x00000000 },
-   { 0x00600001, 0x204003be, 0x008d0160, 0x00000000 },
-   { 0x00600001, 0x206003be, 0x008d0180, 0x00000000 },
-   { 0x00600001, 0x208003be, 0x008d01a0, 0x00000000 },
-   { 0x00800031, 0x21c01d29, 0x008d0000, 0x02580001 },
-   { 0x00600001, 0x22a00021, 0x008d02a0, 0x00000000 },
-   { 0x00600040, 0x214077bd, 0x008d00c0, 0x00004020 },
-   { 0x00600040, 0x216077bd, 0x008d00e0, 0x00004020 },
-   { 0x00600041, 0x214077bd, 0x008d0140, 0x00000080 },
-   { 0x00600041, 0x216077bd, 0x008d0160, 0x00000080 },
-   { 0x00600040, 0x214077bd, 0x008d0140, 0x0000008c },
-   { 0x00600040, 0x216077bd, 0x008d0160, 0x0000008c },
-   { 0x00600040, 0x218077bd, 0x008d0100, 0x00004024 },
-   { 0x00600040, 0x21a077bd, 0x008d0120, 0x00004024 },
-   { 0x00600041, 0x218077bd, 0x008d0180, 0x00000084 },
-   { 0x00600041, 0x21a077bd, 0x008d01a0, 0x00000084 },
-   { 0x00600040, 0x218077bd, 0x008d0180, 0x0000009c },
-   { 0x00600040, 0x21a077bd, 0x008d01a0, 0x0000009c },
-   { 0x00600001, 0x202003be, 0x008d0140, 0x00000000 },
-   { 0x00600001, 0x204003be, 0x008d0160, 0x00000000 },
-   { 0x00600001, 0x206003be, 0x008d0180, 0x00000000 },
-   { 0x00600001, 0x208003be, 0x008d01a0, 0x00000000 },
-   { 0x00800031, 0x22c01d29, 0x008d0000, 0x02580102 },
-   { 0x00600001, 0x23a00021, 0x008d03a0, 0x00000000 },
-   { 0x00600041, 0x21c077bd, 0x008d01c0, 0x008d0380 },
-   { 0x00600041, 0x21e077bd, 0x008d01e0, 0x008d03a0 },
-   { 0x00600041, 0x220077bd, 0x008d0200, 0x008d0380 },
-   { 0x00600041, 0x222077bd, 0x008d0220, 0x008d03a0 },
-   { 0x00600041, 0x224077bd, 0x008d0240, 0x008d0380 },
-   { 0x00600041, 0x226077bd, 0x008d0260, 0x008d03a0 },
-   { 0x00600041, 0x228077bd, 0x008d0280, 0x008d0380 },
-   { 0x00600041, 0x22a077bd, 0x008d02a0, 0x008d03a0 },
-   { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 },
-   { 0x00600001, 0x206003be, 0x008d0200, 0x00000000 },
-   { 0x00600001, 0x208003be, 0x008d0240, 0x00000000 },
-   { 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 },
-   { 0x00600001, 0x20c003be, 0x008d01e0, 0x00000000 },
-   { 0x00600001, 0x20e003be, 0x008d0220, 0x00000000 },
-   { 0x00600001, 0x210003be, 0x008d0260, 0x00000000 },
-   { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 },
-   { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
-   { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
commit 6db8faeb754897b21af045d00f50db9640b080bb
Author: Keith Packard <keithp at keithp.com>
Date:   Mon Mar 31 12:04:00 2008 -0700

    remove old monolithic shaders

diff --git a/src/exa_sf_mask.g4a b/src/exa_sf_mask.g4a
index 8701a10..5078d01 100644
--- a/src/exa_sf_mask.g4a
+++ b/src/exa_sf_mask.g4a
@@ -23,7 +23,7 @@
  * Authors:
  *    Keith Packard <keithp at keithp.com>
  *    Eric Anholt <eric at anholt.net>
- *
+ *    Wang Zhenyu <zhenyu.z.wang at intel.com>
  */
 
 /*
diff --git a/src/exa_wm_maskca.g4a b/src/exa_wm_maskca.g4a
deleted file mode 100644
index d030467..0000000
--- a/src/exa_wm_maskca.g4a
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Copyright © 2007 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Wang Zhenyu <zhenyu.z.wang at intel.com>
- */
-
-/*
- * This's for exa composite operation in no mask picture case.
- * The simplest case is just sending what src picture has to dst picture.
- * XXX: This is still experimental, and should be fixed to support multiple texture
- * map, and conditional mul actions. 
- */
-
-/* I think this should be same as in g4a program for texture video,
-   as we also use 16-pixel dispatch. and SF scale in g3 is useful for us. */
-
-/* The initial payload of the thread is always g0.
- * WM_URB (incoming URB entries) is g3
-   As mask texture coeffient needs extra setup urb starting from g4, we should
-   shift this location. 
-
- * X0_R is g4->g6
- * X1_R is g5->g7
- * Y0_R is g6->g8
- * Y1_R is g7->g9
-
-     * X0: {ss0.x, ss0.x+1, ss0.x,   ss0.x+1, ss1.x, ss1.x+1, ss1.x,   ss1.x+y}
-     * Y0: {ss0.y, ss0.y,   ss0.y+1, ss0.y+1, ss1.y, ss1.y,   ss1.y+1, ss1.y+1}
-     * X1: {ss2.x, ss2.x+1, ss2.x,   ss2.x+1, ss3.x, ss3.x+1, ss3.x,   ss3.x+y}
-     * Y1: {ss2.y, ss2.y,   ss2.y+1, ss2.y+1, ss3.y, ss3.y,   ss3.y+1, ss3.y+1}
- */
-
-/* multitexture program with src and mask texture */
-/* - load src texture */
-/* - load mask texture */
-/* - mul src.X with mask's alpha */
-/* - write out src.X */
-
-    /* Set up ss0.x coordinates*/
-mov (1) g6<1>F g1.8<0,1,0>UW { align1 };
-add (1) g6.4<1>F g1.8<0,1,0>UW 1UD { align1 };
-mov (1) g6.8<1>F g1.8<0,1,0>UW { align1 };
-add (1) g6.12<1>F g1.8<0,1,0>UW 1UD { align1 };
-    /* Set up ss0.y coordinates */
-mov (1) g8<1>F g1.10<0,1,0>UW { align1 };
-mov (1) g8.4<1>F g1.10<0,1,0>UW { align1 };
-add (1) g8.8<1>F g1.10<0,1,0>UW 1UD { align1 };
-add (1) g8.12<1>F g1.10<0,1,0>UW 1UD { align1 };
-    /* set up ss1.x coordinates */
-mov (1) g6.16<1>F g1.12<0,1,0>UW { align1 };
-add (1) g6.20<1>F g1.12<0,1,0>UW 1UD { align1 };
-mov (1) g6.24<1>F g1.12<0,1,0>UW { align1 };
-add (1) g6.28<1>F g1.12<0,1,0>UW 1UD { align1 };
-    /* set up ss1.y coordinates */
-mov (1) g8.16<1>F g1.14<0,1,0>UW { align1 };
-mov (1) g8.20<1>F g1.14<0,1,0>UW { align1 };
-add (1) g8.24<1>F g1.14<0,1,0>UW 1UD { align1 };
-add (1) g8.28<1>F g1.14<0,1,0>UW 1UD { align1 };
-    /* Set up ss2.x coordinates */
-mov (1) g7<1>F g1.16<0,1,0>UW { align1 };
-add (1) g7.4<1>F g1.16<0,1,0>UW 1UD { align1 };
-mov (1) g7.8<1>F g1.16<0,1,0>UW { align1 };
-add (1) g7.12<1>F g1.16<0,1,0>UW 1UD { align1 };
-    /* Set up ss2.y coordinates */
-mov (1) g9<1>F g1.18<0,1,0>UW { align1 };
-mov (1) g9.4<1>F g1.18<0,1,0>UW { align1 };
-add (1) g9.8<1>F g1.18<0,1,0>UW 1UD { align1 };
-add (1) g9.12<1>F g1.18<0,1,0>UW 1UD { align1 };
-    /* Set up ss3.x coordinates */
-mov (1) g7.16<1>F g1.20<0,1,0>UW { align1 };
-add (1) g7.20<1>F g1.20<0,1,0>UW 1UD { align1 };
-mov (1) g7.24<1>F g1.20<0,1,0>UW { align1 };
-add (1) g7.28<1>F g1.20<0,1,0>UW 1UD { align1 };
-    /* Set up ss3.y coordinates */
-mov (1) g9.16<1>F g1.22<0,1,0>UW { align1 };
-mov (1) g9.20<1>F g1.22<0,1,0>UW { align1 };
-add (1) g9.24<1>F g1.22<0,1,0>UW 1UD { align1 };
-add (1) g9.28<1>F g1.22<0,1,0>UW 1UD { align1 };
-
-    /* Now, map these screen space coordinates into texture coordinates. */
-/* This is for src texture */
-/* I don't want to change origin ssX coords, as it will be used later in mask */
-/* so store tex coords in g10, g11, g12, g13 */
-
-    /* subtract screen-space X origin of vertex 0. */
-add (8) g10<1>F g6<8,8,1>F -g1<0,1,0>F { align1 };
-add (8) g11<1>F g7<8,8,1>F -g1<0,1,0>F { align1 };
-    /* scale by texture X increment */
-/* Cx[0] */
-mul (8) g10<1>F g10<8,8,1>F g3<0,1,0>F { align1 };
-mul (8) g11<1>F g11<8,8,1>F g3<0,1,0>F { align1 };
-    /* add in texture X offset */
-/* Co[0] */
-add (8) g10<1>F g10<8,8,1>F g3.12<0,1,0>F { align1 };
-add (8) g11<1>F g11<8,8,1>F g3.12<0,1,0>F { align1 };
-    /* subtract screen-space Y origin of vertex 0. */
-add (8) g12<1>F g8<8,8,1>F -g1.4<0,1,0>F { align1 };
-add (8) g13<1>F g9<8,8,1>F -g1.4<0,1,0>F { align1 };
-    /* scale by texture Y increment */
-/* Cy[0] */
-mul (8) g12<1>F g12<8,8,1>F g3.4<0,1,0>F { align1 };
-mul (8) g13<1>F g13<8,8,1>F g3.4<0,1,0>F { align1 };
-    /* add in texture Y offset */
-/* Co[1] */
-add (8) g12<1>F g12<8,8,1>F g3.28<0,1,0>F { align1 };
-add (8) g13<1>F g13<8,8,1>F g3.28<0,1,0>F { align1 };
-
-/* prepare sampler read back gX register, which would be written back to output */
-
-/* use simd16 sampler, param 0 is u, param 1 is v. */
-/* 'payload' loading, assuming tex coord start from g4 */
-mov (8) m1<1>F g10<8,8,1>F { align1 };
-mov (8) m2<1>F g11<8,8,1>F { align1 }; /* param 0 u in m1, m2 */
-mov (8) m3<1>F g12<8,8,1>F { align1 };
-mov (8) m4<1>F g13<8,8,1>F { align1 }; /* param 1 v in m3, m4 */
-
-/* m0 will be copied with g0, as it contains send desc */
-/* emit sampler 'send' cmd */
-
-/* src texture readback: g14-g21 */
-send (16) 0 		/* msg reg index */
-	g14<1>UW 	/* readback */
-	g0<8,8,1>UW  	/* copy to msg start reg*/
-	sampler (1,0,F)  /* sampler message description, 
-				(binding_table,sampler_index,datatype). 
-			    here(src->dst) we should use src_sampler and 
-			    src_surface */
-	mlen 5 rlen 8 { align1 };   /* required message len 5, readback len 8 */
-
-mov (8) g21<1>UD g21<8,8,1>UD { align1 };  /* wait sampler return */
-
-/* sampler mask texture, use g10, g11, g12, g13 */
-    /* subtract screen-space X origin of vertex 0. */
-add (8) g10<1>F g6<8,8,1>F -g1<0,1,0>F { align1 };
-add (8) g11<1>F g7<8,8,1>F -g1<0,1,0>F { align1 };
-    /* scale by texture X increment */
-/* Cx[2] */
-mul (8) g10<1>F g10<8,8,1>F g4<0,1,0>F { align1 };
-mul (8) g11<1>F g11<8,8,1>F g4<0,1,0>F { align1 };
-    /* add in texture X offset */
-/* Co[2] */
-add (8) g10<1>F g10<8,8,1>F g4.12<0,1,0>F { align1 };
-add (8) g11<1>F g11<8,8,1>F g4.12<0,1,0>F { align1 };
-    /* subtract screen-space Y origin of vertex 0. */
-add (8) g12<1>F g8<8,8,1>F -g1.4<0,1,0>F { align1 };
-add (8) g13<1>F g9<8,8,1>F -g1.4<0,1,0>F { align1 };
-    /* scale by texture Y increment */
-/* Cy[2] */
-mul (8) g12<1>F g12<8,8,1>F g4.4<0,1,0>F { align1 };
-mul (8) g13<1>F g13<8,8,1>F g4.4<0,1,0>F { align1 };
-    /* add in texture Y offset */
-/* Co[3] */
-add (8) g12<1>F g12<8,8,1>F g4.28<0,1,0>F { align1 };
-add (8) g13<1>F g13<8,8,1>F g4.28<0,1,0>F { align1 };
-
-mov (8) m1<1>F g10<8,8,1>F { align1 };
-mov (8) m2<1>F g11<8,8,1>F { align1 }; 
-mov (8) m3<1>F g12<8,8,1>F { align1 };
-mov (8) m4<1>F g13<8,8,1>F { align1 };
-
-/* mask sampler g22-g29 */
-/* binding_table (2), sampler (1) */
-send (16) 0 g22<1>UW g0<8,8,1>UW sampler (2,1,F) mlen 5 rlen 8 { align1 };
-mov (8) g29<1>UD g29<8,8,1>UD { align1 };  /* wait sampler return */
-
-/* mul mask's channel to src, then write out src */
-mul (8) g14<1>F g14<8,8,1>F g22<8,8,1>F { align1 };
-mul (8) g15<1>F g15<8,8,1>F g23<8,8,1>F { align1 };
-mul (8) g16<1>F g16<8,8,1>F g24<8,8,1>F { align1 };
-mul (8) g17<1>F g17<8,8,1>F g25<8,8,1>F { align1 };
-mul (8) g18<1>F g18<8,8,1>F g26<8,8,1>F { align1 };
-mul (8) g19<1>F g19<8,8,1>F g27<8,8,1>F { align1 };
-mul (8) g20<1>F g20<8,8,1>F g28<8,8,1>F { align1 };
-mul (8) g21<1>F g21<8,8,1>F g29<8,8,1>F { align1 };
-
-/* prepare data in m2-m5 for subspan(1,0), m6-m9 for subspan(3,2), then it's ready to write */
-mov (8) m2<1>F g14<8,8,1>F { align1 };
-mov (8) m3<1>F g16<8,8,1>F { align1 };
-mov (8) m4<1>F g18<8,8,1>F { align1 };
-mov (8) m5<1>F g20<8,8,1>F { align1 };
-mov (8) m6<1>F g15<8,8,1>F { align1 };
-mov (8) m7<1>F g17<8,8,1>F { align1 };
-mov (8) m8<1>F g19<8,8,1>F { align1 };
-mov (8) m9<1>F g21<8,8,1>F { align1 };
-
-/* m0, m1 are all direct passed by PS thread payload */
-mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable };
-
-/* write */
-send (16) 0 acc0<1>UW g0<8,8,1>UW write (
-	0,  /* binding_table */
-	8,  /* pixel scordboard clear, msg type simd16 single source */
-	4,  /* render target write */
-	0   /* no write commit message */
-	) 
-	mlen 10
-	rlen 0
-	{ align1 EOT };
-
-nop;
-nop;
-nop;
-nop;
-nop;
-nop;
-nop;
-nop;
-nop;
diff --git a/src/exa_wm_maskca_srcalpha.g4a b/src/exa_wm_maskca_srcalpha.g4a
deleted file mode 100644
index 133c9f0..0000000
--- a/src/exa_wm_maskca_srcalpha.g4a
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Copyright © 2007 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Wang Zhenyu <zhenyu.z.wang at intel.com>
- */
-
-/*
- * This's for exa composite operation in no mask picture case.
- * The simplest case is just sending what src picture has to dst picture.
- * XXX: This is still experimental, and should be fixed to support multiple texture
- * map, and conditional mul actions. 
- */
-
-/* I think this should be same as in g4a program for texture video,
-   as we also use 16-pixel dispatch. and SF scale in g3 is useful for us. */
-
-/* The initial payload of the thread is always g0.
- * WM_URB (incoming URB entries) is g3
-   As mask texture coeffient needs extra setup urb starting from g4, we should
-   shift this location. 
-
- * X0_R is g4->g6
- * X1_R is g5->g7
- * Y0_R is g6->g8
- * Y1_R is g7->g9
-
-     * X0: {ss0.x, ss0.x+1, ss0.x,   ss0.x+1, ss1.x, ss1.x+1, ss1.x,   ss1.x+y}
-     * Y0: {ss0.y, ss0.y,   ss0.y+1, ss0.y+1, ss1.y, ss1.y,   ss1.y+1, ss1.y+1}
-     * X1: {ss2.x, ss2.x+1, ss2.x,   ss2.x+1, ss3.x, ss3.x+1, ss3.x,   ss3.x+y}
-     * Y1: {ss2.y, ss2.y,   ss2.y+1, ss2.y+1, ss3.y, ss3.y,   ss3.y+1, ss3.y+1}
- */
-
-/* multitexture program with src and mask texture */
-/* - load src texture */
-/* - load mask texture */
-/* - mul src.X with mask's alpha */
-/* - write out src.X */
-
-    /* Set up ss0.x coordinates*/
-mov (1) g6<1>F g1.8<0,1,0>UW { align1 };
-add (1) g6.4<1>F g1.8<0,1,0>UW 1UD { align1 };
-mov (1) g6.8<1>F g1.8<0,1,0>UW { align1 };
-add (1) g6.12<1>F g1.8<0,1,0>UW 1UD { align1 };
-    /* Set up ss0.y coordinates */
-mov (1) g8<1>F g1.10<0,1,0>UW { align1 };
-mov (1) g8.4<1>F g1.10<0,1,0>UW { align1 };
-add (1) g8.8<1>F g1.10<0,1,0>UW 1UD { align1 };
-add (1) g8.12<1>F g1.10<0,1,0>UW 1UD { align1 };
-    /* set up ss1.x coordinates */
-mov (1) g6.16<1>F g1.12<0,1,0>UW { align1 };
-add (1) g6.20<1>F g1.12<0,1,0>UW 1UD { align1 };
-mov (1) g6.24<1>F g1.12<0,1,0>UW { align1 };
-add (1) g6.28<1>F g1.12<0,1,0>UW 1UD { align1 };
-    /* set up ss1.y coordinates */
-mov (1) g8.16<1>F g1.14<0,1,0>UW { align1 };
-mov (1) g8.20<1>F g1.14<0,1,0>UW { align1 };
-add (1) g8.24<1>F g1.14<0,1,0>UW 1UD { align1 };
-add (1) g8.28<1>F g1.14<0,1,0>UW 1UD { align1 };
-    /* Set up ss2.x coordinates */
-mov (1) g7<1>F g1.16<0,1,0>UW { align1 };
-add (1) g7.4<1>F g1.16<0,1,0>UW 1UD { align1 };
-mov (1) g7.8<1>F g1.16<0,1,0>UW { align1 };
-add (1) g7.12<1>F g1.16<0,1,0>UW 1UD { align1 };
-    /* Set up ss2.y coordinates */
-mov (1) g9<1>F g1.18<0,1,0>UW { align1 };
-mov (1) g9.4<1>F g1.18<0,1,0>UW { align1 };
-add (1) g9.8<1>F g1.18<0,1,0>UW 1UD { align1 };
-add (1) g9.12<1>F g1.18<0,1,0>UW 1UD { align1 };
-    /* Set up ss3.x coordinates */
-mov (1) g7.16<1>F g1.20<0,1,0>UW { align1 };
-add (1) g7.20<1>F g1.20<0,1,0>UW 1UD { align1 };
-mov (1) g7.24<1>F g1.20<0,1,0>UW { align1 };
-add (1) g7.28<1>F g1.20<0,1,0>UW 1UD { align1 };
-    /* Set up ss3.y coordinates */
-mov (1) g9.16<1>F g1.22<0,1,0>UW { align1 };
-mov (1) g9.20<1>F g1.22<0,1,0>UW { align1 };
-add (1) g9.24<1>F g1.22<0,1,0>UW 1UD { align1 };
-add (1) g9.28<1>F g1.22<0,1,0>UW 1UD { align1 };
-
-    /* Now, map these screen space coordinates into texture coordinates. */
-/* This is for src texture */
-/* I don't want to change origin ssX coords, as it will be used later in mask */
-/* so store tex coords in g10, g11, g12, g13 */
-
-    /* subtract screen-space X origin of vertex 0. */
-add (8) g10<1>F g6<8,8,1>F -g1<0,1,0>F { align1 };
-add (8) g11<1>F g7<8,8,1>F -g1<0,1,0>F { align1 };
-    /* scale by texture X increment */
-/* Cx[0] */
-mul (8) g10<1>F g10<8,8,1>F g3<0,1,0>F { align1 };
-mul (8) g11<1>F g11<8,8,1>F g3<0,1,0>F { align1 };
-    /* add in texture X offset */
-/* Co[0] */
-add (8) g10<1>F g10<8,8,1>F g3.12<0,1,0>F { align1 };
-add (8) g11<1>F g11<8,8,1>F g3.12<0,1,0>F { align1 };
-    /* subtract screen-space Y origin of vertex 0. */
-add (8) g12<1>F g8<8,8,1>F -g1.4<0,1,0>F { align1 };
-add (8) g13<1>F g9<8,8,1>F -g1.4<0,1,0>F { align1 };
-    /* scale by texture Y increment */
-/* Cy[0] */
-mul (8) g12<1>F g12<8,8,1>F g3.4<0,1,0>F { align1 };
-mul (8) g13<1>F g13<8,8,1>F g3.4<0,1,0>F { align1 };
-    /* add in texture Y offset */
-/* Co[1] */
-add (8) g12<1>F g12<8,8,1>F g3.28<0,1,0>F { align1 };
-add (8) g13<1>F g13<8,8,1>F g3.28<0,1,0>F { align1 };
-
-/* prepare sampler read back gX register, which would be written back to output */
-
-/* use simd16 sampler, param 0 is u, param 1 is v. */
-/* 'payload' loading, assuming tex coord start from g4 */
-mov (8) m1<1>F g10<8,8,1>F { align1 };
-mov (8) m2<1>F g11<8,8,1>F { align1 }; /* param 0 u in m1, m2 */
-mov (8) m3<1>F g12<8,8,1>F { align1 };
-mov (8) m4<1>F g13<8,8,1>F { align1 }; /* param 1 v in m3, m4 */
-
-/* m0 will be copied with g0, as it contains send desc */
-/* emit sampler 'send' cmd */
-
-/* src texture readback: g14-g21 */
-send (16) 0 		/* msg reg index */
-	g14<1>UW 	/* readback */
-	g0<8,8,1>UW  	/* copy to msg start reg*/
-	sampler (1,0,F)  /* sampler message description, 
-				(binding_table,sampler_index,datatype). 
-			    here(src->dst) we should use src_sampler and 
-			    src_surface */
-	mlen 5 rlen 8 { align1 };   /* required message len 5, readback len 8 */
-
-mov (8) g21<1>UD g21<8,8,1>UD { align1 };  /* wait sampler return */
-
-/* sampler mask texture, use g10, g11, g12, g13 */
-    /* subtract screen-space X origin of vertex 0. */
-add (8) g10<1>F g6<8,8,1>F -g1<0,1,0>F { align1 };
-add (8) g11<1>F g7<8,8,1>F -g1<0,1,0>F { align1 };
-    /* scale by texture X increment */
-/* Cx[2] */
-mul (8) g10<1>F g10<8,8,1>F g4<0,1,0>F { align1 };
-mul (8) g11<1>F g11<8,8,1>F g4<0,1,0>F { align1 };
-    /* add in texture X offset */
-/* Co[2] */
-add (8) g10<1>F g10<8,8,1>F g4.12<0,1,0>F { align1 };
-add (8) g11<1>F g11<8,8,1>F g4.12<0,1,0>F { align1 };
-    /* subtract screen-space Y origin of vertex 0. */
-add (8) g12<1>F g8<8,8,1>F -g1.4<0,1,0>F { align1 };
-add (8) g13<1>F g9<8,8,1>F -g1.4<0,1,0>F { align1 };
-    /* scale by texture Y increment */
-/* Cy[2] */
-mul (8) g12<1>F g12<8,8,1>F g4.4<0,1,0>F { align1 };
-mul (8) g13<1>F g13<8,8,1>F g4.4<0,1,0>F { align1 };
-    /* add in texture Y offset */
-/* Co[3] */
-add (8) g12<1>F g12<8,8,1>F g4.28<0,1,0>F { align1 };
-add (8) g13<1>F g13<8,8,1>F g4.28<0,1,0>F { align1 };
-
-mov (8) m1<1>F g10<8,8,1>F { align1 };
-mov (8) m2<1>F g11<8,8,1>F { align1 }; 
-mov (8) m3<1>F g12<8,8,1>F { align1 };
-mov (8) m4<1>F g13<8,8,1>F { align1 };
-
-/* mask sampler g22-g29 */
-/* binding_table (2), sampler (1) */
-send (16) 0 g22<1>UW g0<8,8,1>UW sampler (2,1,F) mlen 5 rlen 8 { align1 };
-mov (8) g29<1>UD g29<8,8,1>UD { align1 };  /* wait sampler return */
-
-/* src channel has no more use, src.A * mask.C */
-mul (8) g14<1>F g22<8,8,1>F g20<8,8,1>F { align1 };
-mul (8) g15<1>F g23<8,8,1>F g21<8,8,1>F { align1 };
-mul (8) g16<1>F g24<8,8,1>F g20<8,8,1>F { align1 };
-mul (8) g17<1>F g25<8,8,1>F g21<8,8,1>F { align1 };
-mul (8) g18<1>F g26<8,8,1>F g20<8,8,1>F { align1 };
-mul (8) g19<1>F g27<8,8,1>F g21<8,8,1>F { align1 };
-mul (8) g20<1>F g28<8,8,1>F g20<8,8,1>F { align1 };
-mul (8) g21<1>F g29<8,8,1>F g21<8,8,1>F { align1 };
-
-/* prepare data in m2-m5 for subspan(1,0), m6-m9 for subspan(3,2), then it's ready to write */
-mov (8) m2<1>F g14<8,8,1>F { align1 };
-mov (8) m3<1>F g16<8,8,1>F { align1 };
-mov (8) m4<1>F g18<8,8,1>F { align1 };
-mov (8) m5<1>F g20<8,8,1>F { align1 };
-mov (8) m6<1>F g15<8,8,1>F { align1 };
-mov (8) m7<1>F g17<8,8,1>F { align1 };
-mov (8) m8<1>F g19<8,8,1>F { align1 };
-mov (8) m9<1>F g21<8,8,1>F { align1 };
-
-/* m0, m1 are all direct passed by PS thread payload */
-mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable };
-
-/* write */
-send (16) 0 acc0<1>UW g0<8,8,1>UW write (
-	0,  /* binding_table */
-	8,  /* pixel scordboard clear, msg type simd16 single source */
-	4,  /* render target write */
-	0   /* no write commit message */
-	) 
-	mlen 10
-	rlen 0
-	{ align1 EOT };
-
-nop;
-nop;
-nop;
-nop;
-nop;
-nop;
-nop;
-nop;
-nop;
diff --git a/src/exa_wm_masknoca.g4a b/src/exa_wm_masknoca.g4a
deleted file mode 100644
index 44f6953..0000000
--- a/src/exa_wm_masknoca.g4a
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Copyright © 2006 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Wang Zhenyu <zhenyu.z.wang at intel.com>
- */
-
-/*
- * This's for exa composite operation in no mask picture case.
- * The simplest case is just sending what src picture has to dst picture.
- * XXX: This is still experimental, and should be fixed to support multiple texture
- * map, and conditional mul actions. 
- */
-
-/* I think this should be same as in g4a program for texture video,
-   as we also use 16-pixel dispatch. and SF scale in g3 is useful for us. */
-
-/* The initial payload of the thread is always g0.
- * WM_URB (incoming URB entries) is g3
-   As mask texture coeffient needs extra setup urb starting from g4, we should
-   shift this location. 
-
- * X0_R is g4->g6
- * X1_R is g5->g7
- * Y0_R is g6->g8
- * Y1_R is g7->g9
-
-     * X0: {ss0.x, ss0.x+1, ss0.x,   ss0.x+1, ss1.x, ss1.x+1, ss1.x,   ss1.x+y}
-     * Y0: {ss0.y, ss0.y,   ss0.y+1, ss0.y+1, ss1.y, ss1.y,   ss1.y+1, ss1.y+1}
-     * X1: {ss2.x, ss2.x+1, ss2.x,   ss2.x+1, ss3.x, ss3.x+1, ss3.x,   ss3.x+y}
-     * Y1: {ss2.y, ss2.y,   ss2.y+1, ss2.y+1, ss3.y, ss3.y,   ss3.y+1, ss3.y+1}
- */
-
-/* multitexture program with src and mask texture */
-/* - load src texture */
-/* - load mask texture */
-/* - mul src.X with mask's alpha */
-/* - write out src.X */
-
-    /* Set up ss0.x coordinates*/
-mov (1) g6<1>F g1.8<0,1,0>UW { align1 };
-add (1) g6.4<1>F g1.8<0,1,0>UW 1UD { align1 };
-mov (1) g6.8<1>F g1.8<0,1,0>UW { align1 };
-add (1) g6.12<1>F g1.8<0,1,0>UW 1UD { align1 };
-    /* Set up ss0.y coordinates */
-mov (1) g8<1>F g1.10<0,1,0>UW { align1 };
-mov (1) g8.4<1>F g1.10<0,1,0>UW { align1 };
-add (1) g8.8<1>F g1.10<0,1,0>UW 1UD { align1 };
-add (1) g8.12<1>F g1.10<0,1,0>UW 1UD { align1 };
-    /* set up ss1.x coordinates */
-mov (1) g6.16<1>F g1.12<0,1,0>UW { align1 };
-add (1) g6.20<1>F g1.12<0,1,0>UW 1UD { align1 };
-mov (1) g6.24<1>F g1.12<0,1,0>UW { align1 };
-add (1) g6.28<1>F g1.12<0,1,0>UW 1UD { align1 };
-    /* set up ss1.y coordinates */
-mov (1) g8.16<1>F g1.14<0,1,0>UW { align1 };
-mov (1) g8.20<1>F g1.14<0,1,0>UW { align1 };
-add (1) g8.24<1>F g1.14<0,1,0>UW 1UD { align1 };
-add (1) g8.28<1>F g1.14<0,1,0>UW 1UD { align1 };
-    /* Set up ss2.x coordinates */
-mov (1) g7<1>F g1.16<0,1,0>UW { align1 };
-add (1) g7.4<1>F g1.16<0,1,0>UW 1UD { align1 };
-mov (1) g7.8<1>F g1.16<0,1,0>UW { align1 };
-add (1) g7.12<1>F g1.16<0,1,0>UW 1UD { align1 };
-    /* Set up ss2.y coordinates */
-mov (1) g9<1>F g1.18<0,1,0>UW { align1 };
-mov (1) g9.4<1>F g1.18<0,1,0>UW { align1 };
-add (1) g9.8<1>F g1.18<0,1,0>UW 1UD { align1 };
-add (1) g9.12<1>F g1.18<0,1,0>UW 1UD { align1 };
-    /* Set up ss3.x coordinates */
-mov (1) g7.16<1>F g1.20<0,1,0>UW { align1 };
-add (1) g7.20<1>F g1.20<0,1,0>UW 1UD { align1 };
-mov (1) g7.24<1>F g1.20<0,1,0>UW { align1 };
-add (1) g7.28<1>F g1.20<0,1,0>UW 1UD { align1 };
-    /* Set up ss3.y coordinates */
-mov (1) g9.16<1>F g1.22<0,1,0>UW { align1 };
-mov (1) g9.20<1>F g1.22<0,1,0>UW { align1 };
-add (1) g9.24<1>F g1.22<0,1,0>UW 1UD { align1 };
-add (1) g9.28<1>F g1.22<0,1,0>UW 1UD { align1 };
-
-    /* Now, map these screen space coordinates into texture coordinates. */
-/* This is for src texture */
-/* I don't want to change origin ssX coords, as it will be used later in mask */
-/* so store tex coords in g10, g11, g12, g13 */
-
-    /* subtract screen-space X origin of vertex 0. */
-add (8) g10<1>F g6<8,8,1>F -g1<0,1,0>F { align1 };
-add (8) g11<1>F g7<8,8,1>F -g1<0,1,0>F { align1 };
-    /* scale by texture X increment */
-/* Cx[0] */
-mul (8) g10<1>F g10<8,8,1>F g3<0,1,0>F { align1 };
-mul (8) g11<1>F g11<8,8,1>F g3<0,1,0>F { align1 };
-    /* add in texture X offset */
-/* Co[0] */
-add (8) g10<1>F g10<8,8,1>F g3.12<0,1,0>F { align1 };
-add (8) g11<1>F g11<8,8,1>F g3.12<0,1,0>F { align1 };
-    /* subtract screen-space Y origin of vertex 0. */
-add (8) g12<1>F g8<8,8,1>F -g1.4<0,1,0>F { align1 };
-add (8) g13<1>F g9<8,8,1>F -g1.4<0,1,0>F { align1 };
-    /* scale by texture Y increment */
-/* Cy[0] */
-mul (8) g12<1>F g12<8,8,1>F g3.4<0,1,0>F { align1 };
-mul (8) g13<1>F g13<8,8,1>F g3.4<0,1,0>F { align1 };
-    /* add in texture Y offset */
-/* Co[1] */
-add (8) g12<1>F g12<8,8,1>F g3.28<0,1,0>F { align1 };
-add (8) g13<1>F g13<8,8,1>F g3.28<0,1,0>F { align1 };
-
-/* prepare sampler read back gX register, which would be written back to output */
-
-/* use simd16 sampler, param 0 is u, param 1 is v. */
-/* 'payload' loading, assuming tex coord start from g4 */
-mov (8) m1<1>F g10<8,8,1>F { align1 };
-mov (8) m2<1>F g11<8,8,1>F { align1 }; /* param 0 u in m1, m2 */
-mov (8) m3<1>F g12<8,8,1>F { align1 };
-mov (8) m4<1>F g13<8,8,1>F { align1 }; /* param 1 v in m3, m4 */
-
-/* m0 will be copied with g0, as it contains send desc */
-/* emit sampler 'send' cmd */
-
-/* src texture readback: g14-g21 */
-send (16) 0 		/* msg reg index */
-	g14<1>UW 	/* readback */
-	g0<8,8,1>UW  	/* copy to msg start reg*/
-	sampler (1,0,F)  /* sampler message description, 
-				(binding_table,sampler_index,datatype). 
-			    here(src->dst) we should use src_sampler and 
-			    src_surface */
-	mlen 5 rlen 8 { align1 };   /* required message len 5, readback len 8 */
-
-mov (8) g21<1>UD g21<8,8,1>UD { align1 };  /* wait sampler return */
-
-/* sampler mask texture, use g10, g11, g12, g13 */
-    /* subtract screen-space X origin of vertex 0. */
-add (8) g10<1>F g6<8,8,1>F -g1<0,1,0>F { align1 };
-add (8) g11<1>F g7<8,8,1>F -g1<0,1,0>F { align1 };
-    /* scale by texture X increment */
-/* Cx[2] */
-mul (8) g10<1>F g10<8,8,1>F g4<0,1,0>F { align1 };
-mul (8) g11<1>F g11<8,8,1>F g4<0,1,0>F { align1 };
-    /* add in texture X offset */
-/* Co[2] */
-add (8) g10<1>F g10<8,8,1>F g4.12<0,1,0>F { align1 };
-add (8) g11<1>F g11<8,8,1>F g4.12<0,1,0>F { align1 };
-    /* subtract screen-space Y origin of vertex 0. */
-add (8) g12<1>F g8<8,8,1>F -g1.4<0,1,0>F { align1 };
-add (8) g13<1>F g9<8,8,1>F -g1.4<0,1,0>F { align1 };
-    /* scale by texture Y increment */
-/* Cy[2] */
-mul (8) g12<1>F g12<8,8,1>F g4.4<0,1,0>F { align1 };
-mul (8) g13<1>F g13<8,8,1>F g4.4<0,1,0>F { align1 };
-    /* add in texture Y offset */
-/* Co[3] */
-add (8) g12<1>F g12<8,8,1>F g4.28<0,1,0>F { align1 };
-add (8) g13<1>F g13<8,8,1>F g4.28<0,1,0>F { align1 };
-
-mov (8) m1<1>F g10<8,8,1>F { align1 };
-mov (8) m2<1>F g11<8,8,1>F { align1 }; 
-mov (8) m3<1>F g12<8,8,1>F { align1 };
-mov (8) m4<1>F g13<8,8,1>F { align1 };
-
-/* mask sampler g22-g29 */
-/* binding_table (2), sampler (1) */
-send (16) 0 g22<1>UW g0<8,8,1>UW sampler (2,1,F) mlen 5 rlen 8 { align1 };
-mov (8) g29<1>UD g29<8,8,1>UD { align1 };  /* wait sampler return */
-
-/* mul mask's alpha channel g28,g29 to src (g14-g21), then write out src */
-mul (8) g14<1>F g14<8,8,1>F g28<8,8,1>F { align1 };
-mul (8) g15<1>F g15<8,8,1>F g29<8,8,1>F { align1 };
-mul (8) g16<1>F g16<8,8,1>F g28<8,8,1>F { align1 };
-mul (8) g17<1>F g17<8,8,1>F g29<8,8,1>F { align1 };
-mul (8) g18<1>F g18<8,8,1>F g28<8,8,1>F { align1 };
-mul (8) g19<1>F g19<8,8,1>F g29<8,8,1>F { align1 };
-mul (8) g20<1>F g20<8,8,1>F g28<8,8,1>F { align1 };
-mul (8) g21<1>F g21<8,8,1>F g29<8,8,1>F { align1 };
-
-/* prepare data in m2-m5 for subspan(1,0), m6-m9 for subspan(3,2), then it's ready to write */
-mov (8) m2<1>F g14<8,8,1>F { align1 };
-mov (8) m3<1>F g16<8,8,1>F { align1 };
-mov (8) m4<1>F g18<8,8,1>F { align1 };
-mov (8) m5<1>F g20<8,8,1>F { align1 };
-mov (8) m6<1>F g15<8,8,1>F { align1 };
-mov (8) m7<1>F g17<8,8,1>F { align1 };
-mov (8) m8<1>F g19<8,8,1>F { align1 };
-mov (8) m9<1>F g21<8,8,1>F { align1 };
-
-/* m0, m1 are all direct passed by PS thread payload */
-mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable };
-
-/* write */
-send (16) 0 acc0<1>UW g0<8,8,1>UW write (
-	0,  /* binding_table */
-	8,  /* pixel scordboard clear, msg type simd16 single source */
-	4,  /* render target write */
-	0   /* no write commit message */
-	) 
-	mlen 10
-	rlen 0
-	{ align1 EOT };
-
-nop;
-nop;
-nop;
-nop;
-nop;
-nop;
-nop;
-nop;
-nop;
commit 6bb92213374f278387c539bbe05b773e87e11b90
Author: Keith Packard <keithp at keithp.com>
Date:   Mon Mar 31 11:59:14 2008 -0700

    Back to new shaders, fix urb read length

diff --git a/src/exa_sf_mask.g4a b/src/exa_sf_mask.g4a
index a0d6efc..8701a10 100644
--- a/src/exa_sf_mask.g4a
+++ b/src/exa_sf_mask.g4a
@@ -21,52 +21,81 @@
  * IN THE SOFTWARE.
  *
  * Authors:
- *    Wang Zhenyu <zhenyu.z.wang at intel.com>
+ *    Keith Packard <keithp at keithp.com>
+ *    Eric Anholt <eric at anholt.net>
+ *
  */
 
-/* FIXME how to setup second coeffient for mask tex coord */
-
-/* 
-   g3 (v0) { u0, v0, 1.0, 1.0 }  ==> {u0, v0, 1.0, 1.0, mu0, mv0, 1.0, 1.0}  Co[0](u0) Co[1](v0) Co[2](mu0) Co[3](mv0)
-   g4 (v1) { u1, v1, 1.0, 1.0 }  ==> {u1, v1, 1.0, 1.0, mu1, mv1, 1.0, 1.0}
-   g5 (v2) { u2, v2 }  ==> (u2, v2, mu2, mv2}
-   g6      { 1/(x1-x0), 1/(y1-y0) }
-   g7      { u1-u0, v1-v0, 0, 0}  ==>{u1-u0, v1-v0,0, 0, mu1-mu0, mv1-mv0, 0, 0}
-	   -> { (u1-u0)/(x1-x0), (v1-v0)/(y1-y0) }  ==>{(u1-u0)/(x1-x0), (v1-v0)/(y1-y0),(mu1-mu0)/(x1-x0), (mv1-mv0)/(y1-y0)
-		Cx,		 Cy 			Cx[0],		 Cy[0],		 Cx[1], 	    Cy[1]
+/*
+ * Inputs (note all sub-register addresses are bytes, not float indices)
+ *
+ * Note that the vertices will have been reordered:
+ *
+ * V0 is topmost (leftmost among topmost) (upper left)
+ * V1 is next clockwise (lower right)
+ * V2 is remaining (lower left)
+ *
+ *  V0 ...................... XX
+ *  |                          .
+ *  |                          .
+ *  |                          .
+ *  V2------------------------V1
+ *
+ *  G0	    thread state -- just pass along
+ *
+ *  G1 and G2 are fixed by SF spec
+ *
+ *  G1.0    reserved
+ *  G1.4    Provoking vertex
+ *  G1.8    Determinant
+ *  G1.12   X1 - X0
+ *  G1.16   X2 - X0
+ *  G1.20   Y1 - Y0
+ *  G1.24   Y2 - Y0
+ *  G1.30   reserved
+ *
+ *  G2.0    Z0
+ *  G2.4    1/W0
+ *  G2.8    Z1
+ *  G2.12   1/W1
+ *  G2.16   Z2
+ *  G2.20   1/W2
+ *  G2.24   reserved
+ *  G2.30   reserved
+ *
+ *  G3 is V0 Vertex Attribute Data from URB (upper left)
+ *
+ *  G3.0    u0
+ *  G3.4    v0
+ *
+ *  G4 is V1 Vertex Attribute Data from URB (lower right)
+ *
+ *  G4.0    u1
+ *  G4.4    v1
+ *
+ *  G5 is V2 Vertex Attribute Data from URB (lower left)
+ *
  */
 
-/* assign Cx[0], Cx[1] to src, same to Cy, Co 
-          Cx[2], Cx[3] to mask, same to Cy, Co */
+/* Compute inverses of the input deltas */
+send (4) 0 g6<1>F g1.12<4,4,1>F math inv mlen 1 rlen 1 { align1 };
 
-send (1) 0 g6<1>F g1.12<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
-send (1) 0 g6.4<1>F g1.20<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
-add (8) g7<1>F g4<8,8,1>F -g3<8,8,1>F { align1 };
-/* Cx[0] */
-mul (1) g7<1>F g7<0,1,0>F g6<0,1,0>F { align1 };
-/* Cy[0] */
-mul (1) g7.4<1>F g7.4<0,1,0>F g6.4<0,1,0>F { align1 };
-/* Cx[2] */
-mul (1) g7.8<1>F g7.8<0,1,0>F g6<0,1,0>F { align1 };
-/* Cy[2] */
-mul (1) g7.12<1>F g7.12<0,1,0>F g6.4<0,1,0>F { align1 };
-
-/* src Cx[0], Cx[1] */
-mov (8) m1<1>F g7<0,1,0>F { align1 };
-/* mask Cx[2], Cx[3] */
-mov (1) m1.8<1>F g7.8<0,1,0>F { align1 };
-mov (1) m1.12<1>F g7.8<0,1,0>F { align1 };
-/* src Cy[0], Cy[1] */
-mov (8) m2<1>F g7.4<0,1,0>F { align1 };
-/* mask Cy[2], Cy[3] */
-mov (1) m2.8<1>F g7.12<0,1,0>F { align1 };
-mov (1) m2.12<1>F g7.12<0,1,0>F { align1 };
-/* src Co[0], Co[1] */
+/* texture location at V0 */
 mov (8) m3<1>F g3<8,8,1>F { align1 };
-/* mask Co[2], Co[3] */
-mov (1) m3.8<1>F g3.8<0,1,0>F { align1 };
-mov (1) m3.12<1>F g3.12<0,1,0>F { align1 };
 
+/* compute V1 - V2 (motion in X) for texture coordinates */
+add (8) g7<1>F g4<8,8,1>F -g5<8,8,1>F { align1 };
+
+/* multiply by 1/dx */
+mul (8) m1<1>F g7<8,8,1>F g6.0<0,1,0>F { align1 };
+
+/* Compute V2 - V0 (motion in Y) for texture coordinates */
+add (8) g7<1>F g5<8,8,1>F -g3<8,8,1>F { align1 };
+
+/* multiply by 1/dy */
+mul (8) m2<1>F g7<8,8,1>F g6.8<0,1,0>F {align1 };
+
+/* and we're done */
 send (8) 0 null g0<8,8,1>F urb 0 transpose used complete mlen 4 rlen 0 { align1 EOT };
 nop;
 nop;
diff --git a/src/exa_sf_mask.g4b b/src/exa_sf_mask.g4b
index 4e9114d..be0a77b 100644
--- a/src/exa_sf_mask.g4b
+++ b/src/exa_sf_mask.g4b
@@ -1,19 +1,9 @@
-   { 0x00000031, 0x20c01fbd, 0x0000002c, 0x01110081 },
-   { 0x00000031, 0x20c41fbd, 0x00000034, 0x01110081 },
-   { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d4060 },
-   { 0x00000041, 0x20e077bd, 0x000000e0, 0x000000c0 },
-   { 0x00000041, 0x20e477bd, 0x000000e4, 0x000000c4 },
-   { 0x00000041, 0x20e877bd, 0x000000e8, 0x000000c0 },
-   { 0x00000041, 0x20ec77bd, 0x000000ec, 0x000000c4 },
-   { 0x00600001, 0x202003be, 0x000000e0, 0x00000000 },
-   { 0x00000001, 0x202803be, 0x000000e8, 0x00000000 },
-   { 0x00000001, 0x202c03be, 0x000000e8, 0x00000000 },
-   { 0x00600001, 0x204003be, 0x000000e4, 0x00000000 },
-   { 0x00000001, 0x204803be, 0x000000ec, 0x00000000 },
-   { 0x00000001, 0x204c03be, 0x000000ec, 0x00000000 },
+   { 0x00400031, 0x20c01fbd, 0x0069002c, 0x01110001 },
    { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
-   { 0x00000001, 0x206803be, 0x00000068, 0x00000000 },
-   { 0x00000001, 0x206c03be, 0x0000006c, 0x00000000 },
+   { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d40a0 },
+   { 0x00600041, 0x202077be, 0x008d00e0, 0x000000c0 },
+   { 0x00600040, 0x20e077bd, 0x008d00a0, 0x008d4060 },
+   { 0x00600041, 0x204077be, 0x008d00e0, 0x000000c8 },
    { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
    { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
    { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/exa_wm.g4i b/src/exa_wm.g4i
index c7ecb09..1be40e7 100644
--- a/src/exa_wm.g4i
+++ b/src/exa_wm.g4i
@@ -29,8 +29,6 @@
  * Input parameters
  */
 
-define(`quote', `ifelse(`$#', `0', `', ``$*'')')
-
 /* Destination X/Y */
 define(`dst_x_uw',  `g1.8<2,4,0>UW')
 define(`dst_y_uw',  `g1.10<2,4,0>UW')
@@ -48,15 +46,15 @@ define(`src_dw_dx', `g4.0<0,1,0>F')
 define(`src_dw_dy', `g4.4<0,1,0>F')
 define(`src_wo',    `g4.12<0,1,0>F')
 
-define(`mask_du_dx', `g4.16<0,1,0>F')
-define(`mask_du_dy', `g4.20<0,1,0>F')
-define(`mask_uo',    `g4.28<0,1,0>F')
-define(`mask_dv_dx', `g5.0<0,1,0>F')
-define(`mask_dv_dy', `g5.4<0,1,0>F')
-define(`mask_vo',    `g5.12<0,1,0>F')
-define(`mask_dw_dx', `g5.16<0,1,0>F')
-define(`mask_dw_dy', `g5.20<0,1,0>F')
-define(`mask_wo',    `g5.28<0,1,0>F')
+define(`mask_du_dx', `g5.0<0,1,0>F')
+define(`mask_du_dy', `g5.4<0,1,0>F')
+define(`mask_uo',    `g5.12<0,1,0>F')
+define(`mask_dv_dx', `g5.16<0,1,0>F')
+define(`mask_dv_dy', `g5.20<0,1,0>F')
+define(`mask_vo',    `g5.28<0,1,0>F')
+define(`mask_dw_dx', `g6.0<0,1,0>F')
+define(`mask_dw_dy', `g6.4<0,1,0>F')
+define(`mask_wo',    `g6.12<0,1,0>F')
 
 /*
  * Local variables
diff --git a/src/i965_render.c b/src/i965_render.c
index 7668779..e348c2b 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -347,8 +347,6 @@ static const uint32_t ps_kernel_static_nomask_projective [][4] = {
 };
 
 static const uint32_t ps_kernel_static_maskca [][4] = {
-#include "exa_wm_maskca.g4b"
-#if 0
 #include "exa_wm_xy.g4b"
 #include "exa_wm_src_affine.g4b"
 #include "exa_wm_src_sample.g4b"
@@ -356,12 +354,9 @@ static const uint32_t ps_kernel_static_maskca [][4] = {
 #include "exa_wm_mask_sample.g4b"
 #include "exa_wm_ca.g4b"
 #include "exa_wm_write.g4b"
-#endif
 };
 
 static const uint32_t ps_kernel_static_maskca_srcalpha [][4] = {
-#include "exa_wm_maskca_srcalpha.g4b"
-#if 0
 #include "exa_wm_xy.g4b"
 #include "exa_wm_src_affine.g4b"
 #include "exa_wm_src_sample.g4b"
@@ -369,12 +364,9 @@ static const uint32_t ps_kernel_static_maskca_srcalpha [][4] = {
 #include "exa_wm_mask_sample.g4b"
 #include "exa_wm_ca_srcalpha.g4b"
 #include "exa_wm_write.g4b"
-#endif
 };
 
 static const uint32_t ps_kernel_static_masknoca [][4] = {
-#include "exa_wm_masknoca.g4b"
-#if 0
 #include "exa_wm_xy.g4b"
 #include "exa_wm_src_affine.g4b"
 #include "exa_wm_src_sample.g4b"
@@ -382,7 +374,6 @@ static const uint32_t ps_kernel_static_masknoca [][4] = {
 #include "exa_wm_mask_sample.g4b"
 #include "exa_wm_noca.g4b"
 #include "exa_wm_write.g4b"
-#endif
 };
 
 static uint32_t 
@@ -907,9 +898,9 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     wm_state->thread3.const_urb_entry_read_offset = 0;
     /* Each pair of attributes (src/mask coords) is one URB entry */
     if (pMask)
-	wm_state->thread3.urb_entry_read_length = 2;
+	wm_state->thread3.urb_entry_read_length = 4;
     else
-	wm_state->thread3.urb_entry_read_length = 1;
+	wm_state->thread3.urb_entry_read_length = 2;
     wm_state->thread3.urb_entry_read_offset = 0;
     /* wm kernel use urb from 3, see wm_program in compiler module */
     wm_state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */
commit 05710145b6fc4ed2c528128b2e6022591a53d050
Author: Keith Packard <keithp at keithp.com>
Date:   Mon Mar 31 11:31:31 2008 -0700

    Dont set the compr bit on 8-unit sends

diff --git a/src/exa_wm_projective.g4i b/src/exa_wm_projective.g4i
index 13da99c..3c3bbf0 100644
--- a/src/exa_wm_projective.g4i
+++ b/src/exa_wm_projective.g4i
@@ -31,8 +31,8 @@ mul (16)	temp_x<1>F	dst_x<8,8,1>F	dw_dx		{ compr align1 };
 mul (16)	temp_y<1>F	dst_y<8,8,1>F	dw_dy		{ compr align1 };
 add (16)	temp_x<1>F	temp_x<8,8,1>F	temp_y<8,8,1>F	{ compr align1 };
 add (16)	temp_x<1>F	temp_x<8,8,1>F	wo		{ compr align1 };
-send (8) 0	w_0<1>F		temp_x_0<8,8,1>F math inv mlen 1 rlen 1	{ compr align1 };
-send (8) 0	w_1<1>F		temp_x_1<8,8,1>F math inv mlen 1 rlen 1	{ compr align1 };
+send (8) 0	w_0<1>F		temp_x_0<8,8,1>F math inv mlen 1 rlen 1	{ align1 };
+send (8) 0	w_1<1>F		temp_x_1<8,8,1>F math inv mlen 1 rlen 1	{ align1 };
 
 /********** Compute u *************/
 


More information about the xorg-commit mailing list