[PATCH xf86-video-ati v2 1/2] EXA/6xx/7xx: fast solid pixmap support

Tan Hu tan.hu at zte.com.cn
Wed Jun 1 09:02:47 UTC 2016


Solid pixmaps are currently implemented with scratch pixmaps, which
is slow. This replaces the hack with a proper implementation. The
Composite shader can now either sample a src/mask or use a constant
value.

r6xx still be used on some machine,
Ported from commit 94d0d14914a025525a0766669b556eaa6681def7.

Signed-off-by: Tan Hu <tan.hu at zte.com.cn>
---
 src/r600_exa.c    | 257 ++++++++++++++++++++++++---------
 src/r600_shader.c | 418 +++++++++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 526 insertions(+), 149 deletions(-)

diff --git a/src/r600_exa.c b/src/r600_exa.c
index 8d11ce7..10df4ec 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -1165,6 +1165,134 @@ static Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskP
 
 }
 
+static void R600SetSolidConsts(ScrnInfoPtr pScrn, float *buf, int format, uint32_t fg, int unit)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+    float pix_r = 0, pix_g = 0, pix_b = 0, pix_a = 0;
+
+    uint32_t w = (fg >> 24) & 0xff;
+    uint32_t z = (fg >> 16) & 0xff;
+    uint32_t y = (fg >> 8) & 0xff;
+    uint32_t x = (fg >> 0) & 0xff;
+    float xf = (float)x / 255; /* R */
+    float yf = (float)y / 255; /* G */
+    float zf = (float)z / 255; /* B */
+    float wf = (float)w / 255; /* A */
+
+    /* component swizzles */
+    switch (format) {
+	case PICT_a1r5g5b5:
+	case PICT_a8r8g8b8:
+	    pix_r = zf; /* R */
+	    pix_g = yf; /* G */
+	    pix_b = xf; /* B */
+	    pix_a = wf; /* A */
+	    break;
+	case PICT_a8b8g8r8:
+	    pix_r = xf; /* R */
+	    pix_g = yf; /* G */
+	    pix_b = zf; /* B */
+	    pix_a = wf; /* A */
+	    break;
+	case PICT_x8b8g8r8:
+	    pix_r = xf; /* R */
+	    pix_g = yf; /* G */
+	    pix_b = zf; /* B */
+	    pix_a = 1.0; /* A */
+	    break;
+	case PICT_b8g8r8a8:
+	    pix_r = yf; /* R */
+	    pix_g = zf; /* G */
+	    pix_b = wf; /* B */
+	    pix_a = xf; /* A */
+	    break;
+	case PICT_b8g8r8x8:
+	    pix_r = yf; /* R */
+	    pix_g = zf; /* G */
+	    pix_b = wf; /* B */
+	    pix_a = 1.0; /* A */
+	    break;
+	case PICT_x1r5g5b5:
+	case PICT_x8r8g8b8:
+	case PICT_r5g6b5:
+	    pix_r = zf; /* R */
+	    pix_g = yf; /* G */
+	    pix_b = xf; /* B */
+	    pix_a = 1.0; /* A */
+	    break;
+	case PICT_a8:
+	    pix_r = 0.0; /* R */
+	    pix_g = 0.0; /* G */
+	    pix_b = 0.0; /* B */
+	    pix_a = xf; /* A */
+	    break;
+	default:
+	    ErrorF("Bad format 0x%x\n", format);
+    }
+
+    if (unit == 0) {
+	if (!accel_state->msk_pic) {
+	    if (PICT_FORMAT_RGB(format) == 0) {
+		pix_r = 0.0;
+		pix_g = 0.0;
+		pix_b = 0.0;
+	    }
+
+	    if (PICT_FORMAT_A(format) == 0)
+		pix_a = 1.0;
+	} else {
+	    if (accel_state->component_alpha) {
+		if (accel_state->src_alpha) {
+		    if (PICT_FORMAT_A(format) == 0) {
+			pix_r = 1.0;
+			pix_g = 1.0;
+			pix_b = 1.0;
+			pix_a = 1.0;
+		    } else {
+			pix_r = pix_a;
+			pix_g = pix_a;
+			pix_b = pix_a;
+		    }
+		} else {
+		    if (PICT_FORMAT_A(format) == 0)
+			pix_a = 1.0;
+		}
+	    } else {
+		if (PICT_FORMAT_RGB(format) == 0) {
+		    pix_r = 0;
+		    pix_g = 0;
+		    pix_b = 0;
+		}
+
+		if (PICT_FORMAT_A(format) == 0)
+		    pix_a = 1.0;
+	    }
+	}
+    } else {
+	if (accel_state->component_alpha) {
+	    if (PICT_FORMAT_A(format) == 0)
+		pix_a = 1.0;
+	} else {
+	    if (PICT_FORMAT_A(format) == 0) {
+		pix_r = 1.0;
+		pix_g = 1.0;
+		pix_b = 1.0;
+		pix_a = 1.0;
+	    } else {
+		pix_r = pix_a;
+		pix_g = pix_a;
+		pix_b = pix_a;
+	    }
+	}
+    }
+
+    buf[0] = pix_r;
+    buf[1] = pix_g;
+    buf[2] = pix_b;
+    buf[3] = pix_a;
+}
+
 static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 				 PicturePtr pMaskPicture, PicturePtr pDstPicture,
 				 PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
@@ -1177,31 +1305,27 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
     cb_config_t cb_conf;
     shader_config_t vs_conf, ps_conf;
     struct r600_accel_object src_obj, mask_obj, dst_obj;
+    uint32_t ps_bool_consts = 0;
+    float ps_alu_consts[8];
 
     if (pDst->drawable.bitsPerPixel < 8 || (pSrc && pSrc->drawable.bitsPerPixel < 8))
 	return FALSE;
 
-    if (!pSrc) {
-	pSrc = RADEONSolidPixmap(pScreen, pSrcPicture->pSourcePict->solidFill.color);
-	if (!pSrc)
-	    RADEON_FALLBACK(("Failed to create solid scratch pixmap\n"));
+    if (pSrc) {
+	src_obj.bo = radeon_get_pixmap_bo(pSrc);
+	src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
+	src_obj.surface = radeon_get_pixmap_surface(pSrc);
+	src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
+	src_obj.width = pSrc->drawable.width;
+	src_obj.height = pSrc->drawable.height;
+	src_obj.bpp = pSrc->drawable.bitsPerPixel;
+	src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
     }
 
     dst_obj.bo = radeon_get_pixmap_bo(pDst);
-    src_obj.bo = radeon_get_pixmap_bo(pSrc);
     dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
-    src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
     dst_obj.surface = radeon_get_pixmap_surface(pDst);
-    src_obj.surface = radeon_get_pixmap_surface(pSrc);
-
-    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
     dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
-
-    src_obj.width = pSrc->drawable.width;
-    src_obj.height = pSrc->drawable.height;
-    src_obj.bpp = pSrc->drawable.bitsPerPixel;
-    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
-
     dst_obj.width = pDst->drawable.width;
     dst_obj.height = pDst->drawable.height;
     dst_obj.bpp = pDst->drawable.bitsPerPixel;
@@ -1211,34 +1335,17 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 	dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
 
     if (pMaskPicture) {
-	if (!pMask) {
-	    pMask = RADEONSolidPixmap(pScreen, pMaskPicture->pSourcePict->solidFill.color);
-	    if (!pMask) {
-		if (!pSrcPicture->pDrawable)
-		    pScreen->DestroyPixmap(pSrc);
-		RADEON_FALLBACK(("Failed to create solid scratch pixmap\n"));
-	    }
+	if (pMask) {
+	    mask_obj.bo = radeon_get_pixmap_bo(pMask);
+	    mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
+	    mask_obj.surface = radeon_get_pixmap_surface(pMask);
+	    mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
+	    mask_obj.width = pMask->drawable.width;
+	    mask_obj.height = pMask->drawable.height;
+	    mask_obj.bpp = pMask->drawable.bitsPerPixel;
+	    mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
 	}
 
-	mask_obj.bo = radeon_get_pixmap_bo(pMask);
-	mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
-	mask_obj.surface = radeon_get_pixmap_surface(pMask);
-
-	mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
-
-	mask_obj.width = pMask->drawable.width;
-	mask_obj.height = pMask->drawable.height;
-	mask_obj.bpp = pMask->drawable.bitsPerPixel;
-	mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
-
-	if (!R600SetAccelState(pScrn,
-			       &src_obj,
-			       &mask_obj,
-			       &dst_obj,
-			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
-			       3, 0xffffffff))
-	    return FALSE;
-
 	accel_state->msk_pic = pMaskPicture;
 	if (pMaskPicture->componentAlpha) {
 	    accel_state->component_alpha = TRUE;
@@ -1251,19 +1358,19 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 	    accel_state->src_alpha = FALSE;
 	}
     } else {
-	if (!R600SetAccelState(pScrn,
-			       &src_obj,
-			       NULL,
-			       &dst_obj,
-			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
-			       3, 0xffffffff))
-	    return FALSE;
-
 	accel_state->msk_pic = NULL;
 	accel_state->component_alpha = FALSE;
 	accel_state->src_alpha = FALSE;
     }
 
+    if (!R600SetAccelState(pScrn,
+			   pSrc ? &src_obj : NULL,
+			   (pMaskPicture && pMask) ? &mask_obj : NULL,
+			   &dst_obj,
+			   accel_state->comp_vs_offset, accel_state->comp_ps_offset,
+			   3, 0xffffffff))
+	return FALSE;
+
     if (!R600GetDestFormat(pDstPicture, &dst_format))
 	return FALSE;
 
@@ -1284,10 +1391,13 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
     r600_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
     r600_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
 
-    if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
-        R600IBDiscard(pScrn);
-        return FALSE;
-    }
+    if (pSrc) {
+        if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
+            R600IBDiscard(pScrn);
+            return FALSE;
+        }
+    } else
+        accel_state->is_transform[0] = FALSE;
 
     if (pMask) {
         if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
@@ -1297,12 +1407,16 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
     } else
         accel_state->is_transform[1] = FALSE;
 
+    if (pSrc)
+	ps_bool_consts |= (1 << 0);
+    if (pMask)
+	ps_bool_consts |= (1 << 1);
+    r600_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, ps_bool_consts);
+
     if (pMask) {
 	r600_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (1 << 0));
-	r600_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (1 << 0));
     } else {
 	r600_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (0 << 0));
-	r600_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (0 << 0));
     }
 
     /* Shader */
@@ -1315,7 +1429,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 
     ps_conf.shader_addr         = accel_state->ps_mc_addr;
     ps_conf.shader_size         = accel_state->ps_size;
-    ps_conf.num_gprs            = 3;
+    ps_conf.num_gprs            = 2;
     ps_conf.stack_size          = 1;
     ps_conf.uncached_first_inst = 1;
     ps_conf.clamp_consts        = 0;
@@ -1381,6 +1495,27 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
     else
 	r600_set_spi(pScrn, (1 - 1), 1);
 
+    if (!pSrc) {
+	/* solid src color */
+	R600SetSolidConsts(pScrn, &ps_alu_consts[0], pSrcPicture->format,
+			   pSrcPicture->pSourcePict->solidFill.color, 0);
+    }
+
+    if (!pMaskPicture) {
+	/* use identity constant if there is no mask */
+	ps_alu_consts[4] = 1.0;
+	ps_alu_consts[5] = 1.0;
+	ps_alu_consts[6] = 1.0;
+	ps_alu_consts[7] = 1.0;
+    } else if (!pMask) {
+	/* solid mask color */
+	R600SetSolidConsts(pScrn, &ps_alu_consts[4], pMaskPicture->format,
+			   pMaskPicture->pSourcePict->solidFill.color, 1);
+    }
+
+    r600_set_alu_consts(pScrn, SQ_ALU_CONSTANT_ps,
+			sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
+
     if (accel_state->vsync)
 	RADEONVlineHelperClear(pScrn);
 
@@ -1405,7 +1540,7 @@ static void R600FinishComposite(ScrnInfoPtr pScrn, PixmapPtr pDst,
 			       accel_state->vline_y1,
 			       accel_state->vline_y2);
 
-    vtx_size = accel_state->msk_pic ? 24 : 16;
+    vtx_size = accel_state->msk_pix ? 24 : 16;
 
     r600_finish_op(pScrn, vtx_size);
 }
@@ -1418,12 +1553,6 @@ static void R600DoneComposite(PixmapPtr pDst)
     struct radeon_accel_state *accel_state = info->accel_state;
 
     R600FinishComposite(pScrn, pDst, accel_state);
-
-    if (!accel_state->src_pic->pDrawable)
-	pScreen->DestroyPixmap(accel_state->src_pix);
-
-    if (accel_state->msk_pic && !accel_state->msk_pic->pDrawable)
-	pScreen->DestroyPixmap(accel_state->msk_pix);
 }
 
 static void R600Composite(PixmapPtr pDst,
@@ -1455,7 +1584,7 @@ static void R600Composite(PixmapPtr pDst,
     if (accel_state->vsync)
 	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
 
-    if (accel_state->msk_pic) {
+    if (accel_state->msk_pix) {
 
 	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
 
diff --git a/src/r600_shader.c b/src/r600_shader.c
index 4cb2fc8..26a6ab6 100644
--- a/src/r600_shader.c
+++ b/src/r600_shader.c
@@ -2318,9 +2318,10 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
     int i = 0;
 
     /* 0 */
-    shader[i++] = CF_DWORD0(ADDR(3));
+    /* call fetch-mask if boolean1 == true */
+    shader[i++] = CF_DWORD0(ADDR(10));
     shader[i++] = CF_DWORD1(POP_COUNT(0),
-                            CF_CONST(0),
+                            CF_CONST(1),
                             COND(SQ_CF_COND_BOOL),
                             I_COUNT(0),
                             CALL_COUNT(0),
@@ -2330,9 +2331,10 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
                             WHOLE_QUAD_MODE(0),
                             BARRIER(0));
     /* 1 */
-    shader[i++] = CF_DWORD0(ADDR(7));
+    /* call read-constant-mask if boolean1 == false */
+    shader[i++] = CF_DWORD0(ADDR(12));
     shader[i++] = CF_DWORD1(POP_COUNT(0),
-                            CF_CONST(0),
+                            CF_CONST(1),
                             COND(SQ_CF_COND_NOT_BOOL),
                             I_COUNT(0),
                             CALL_COUNT(0),
@@ -2342,33 +2344,36 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
                             WHOLE_QUAD_MODE(0),
                             BARRIER(0));
     /* 2 */
-    shader[i++] = CF_DWORD0(ADDR(0));
+    /* call fetch-src if boolean0 == true */
+    shader[i++] = CF_DWORD0(ADDR(6));
     shader[i++] = CF_DWORD1(POP_COUNT(0),
                             CF_CONST(0),
-                            COND(SQ_CF_COND_ACTIVE),
+                            COND(SQ_CF_COND_BOOL),
                             I_COUNT(0),
                             CALL_COUNT(0),
-                            END_OF_PROGRAM(1),
+                            END_OF_PROGRAM(0),
                             VALID_PIXEL_MODE(0),
-                            CF_INST(SQ_CF_INST_NOP),
+                            CF_INST(SQ_CF_INST_CALL),
                             WHOLE_QUAD_MODE(0),
-                            BARRIER(1));
+                            BARRIER(0));
 
-    /* 3 - mask sub */
-    shader[i++] = CF_DWORD0(ADDR(14));
+    /* 3 */
+    /* call read-constant-src if boolean0 == false */
+    shader[i++] = CF_DWORD0(ADDR(8));
     shader[i++] = CF_DWORD1(POP_COUNT(0),
 			    CF_CONST(0),
-			    COND(SQ_CF_COND_ACTIVE),
-			    I_COUNT(2),
+			    COND(SQ_CF_COND_NOT_BOOL),
+			    I_COUNT(0),
 			    CALL_COUNT(0),
 			    END_OF_PROGRAM(0),
 			    VALID_PIXEL_MODE(0),
-			    CF_INST(SQ_CF_INST_TEX),
+			    CF_INST(SQ_CF_INST_CALL),
 			    WHOLE_QUAD_MODE(0),
-			    BARRIER(1));
+			    BARRIER(0));
 
     /* 4 */
-    shader[i++] = CF_ALU_DWORD0(ADDR(10),
+    /* src IN mask (GPR0 := GPR1 .* GPR0) */
+    shader[i++] = CF_ALU_DWORD0(ADDR(14),
 				KCACHE_BANK0(0),
 				KCACHE_BANK1(0),
 				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
@@ -2382,9 +2387,10 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
 				BARRIER(1));
 
     /* 5 */
+    /* export pixel data */
     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
 					  TYPE(SQ_EXPORT_PIXEL),
-					  RW_GPR(2),
+					  RW_GPR(0),
 					  RW_REL(ABSOLUTE),
 					  INDEX_GPR(0),
 					  ELEM_SIZE(1));
@@ -2394,55 +2400,57 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
 					       SRC_SEL_W(SQ_SEL_W),
 					       R6xx_ELEM_LOOP(0),
 					       BURST_COUNT(1),
-					       END_OF_PROGRAM(0),
+					       END_OF_PROGRAM(1),
 					       VALID_PIXEL_MODE(0),
 					       CF_INST(SQ_CF_INST_EXPORT_DONE),
 					       WHOLE_QUAD_MODE(0),
 					       BARRIER(1));
+    /* subroutine fetch src */
     /* 6 */
-    shader[i++] = CF_DWORD0(ADDR(0));
+    /* fetch src into GPR0*/
+    shader[i++] = CF_DWORD0(ADDR(26));
     shader[i++] = CF_DWORD1(POP_COUNT(0),
 			    CF_CONST(0),
 			    COND(SQ_CF_COND_ACTIVE),
-			    I_COUNT(0),
+			    I_COUNT(1),
 			    CALL_COUNT(0),
 			    END_OF_PROGRAM(0),
 			    VALID_PIXEL_MODE(0),
-			    CF_INST(SQ_CF_INST_RETURN),
+			    CF_INST(SQ_CF_INST_TEX),
 			    WHOLE_QUAD_MODE(0),
 			    BARRIER(1));
 
-    /* 7 non-mask sub */
-    shader[i++] = CF_DWORD0(ADDR(18));
+    /* 7 */
+    /* return */
+    shader[i++] = CF_DWORD0(ADDR(0));
     shader[i++] = CF_DWORD1(POP_COUNT(0),
 			    CF_CONST(0),
 			    COND(SQ_CF_COND_ACTIVE),
-			    I_COUNT(1),
+			    I_COUNT(0),
 			    CALL_COUNT(0),
 			    END_OF_PROGRAM(0),
 			    VALID_PIXEL_MODE(0),
-			    CF_INST(SQ_CF_INST_TEX),
+			    CF_INST(SQ_CF_INST_RETURN),
 			    WHOLE_QUAD_MODE(0),
 			    BARRIER(1));
+
+    /* subroutine read-constant-src*/
     /* 8 */
-    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
-					  TYPE(SQ_EXPORT_PIXEL),
-					  RW_GPR(0),
-					  RW_REL(ABSOLUTE),
-					  INDEX_GPR(0),
-					  ELEM_SIZE(1));
-    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					       SRC_SEL_Y(SQ_SEL_Y),
-					       SRC_SEL_Z(SQ_SEL_Z),
-					       SRC_SEL_W(SQ_SEL_W),
-					       R6xx_ELEM_LOOP(0),
-					       BURST_COUNT(1),
-					       END_OF_PROGRAM(0),
-					       VALID_PIXEL_MODE(0),
-					       CF_INST(SQ_CF_INST_EXPORT_DONE),
-					       WHOLE_QUAD_MODE(0),
-					       BARRIER(1));
+    /* read constants into GPR0 */
+    shader[i++] = CF_ALU_DWORD0(ADDR(18),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(4),
+				USES_WATERFALL(0),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
     /* 9 */
+    /* return */
     shader[i++] = CF_DWORD0(ADDR(0));
     shader[i++] = CF_DWORD1(POP_COUNT(0),
 			    CF_CONST(0),
@@ -2455,8 +2463,67 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
 			    WHOLE_QUAD_MODE(0),
 			    BARRIER(1));
 
-    /* 10 - alu 0 */
-    /* MUL gpr[2].x gpr[1].x gpr[0].x */
+    /* subroutine fetch mask */
+    /* 10 */
+    /* fetch mask into GPR1*/
+    shader[i++] = CF_DWORD0(ADDR(28));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_ACTIVE),
+                            I_COUNT(1),
+                            CALL_COUNT(0),
+                            END_OF_PROGRAM(0),
+                            VALID_PIXEL_MODE(0),
+                            CF_INST(SQ_CF_INST_TEX),
+                            WHOLE_QUAD_MODE(0),
+                            BARRIER(1));
+
+    /* 11 */
+    /* return */
+    shader[i++] = CF_DWORD0(ADDR(0));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_ACTIVE),
+                            I_COUNT(0),
+                            CALL_COUNT(0),
+                            END_OF_PROGRAM(0),
+                            VALID_PIXEL_MODE(0),
+                            CF_INST(SQ_CF_INST_RETURN),
+                            WHOLE_QUAD_MODE(0),
+                            BARRIER(1));
+
+    /* subroutine read-constant-mask*/
+    /* 12 */
+    /* read constants into GPR1 */
+    shader[i++] = CF_ALU_DWORD0(ADDR(22),
+                                KCACHE_BANK0(0),
+                                KCACHE_BANK1(0),
+                                KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+                                KCACHE_ADDR0(0),
+                                KCACHE_ADDR1(0),
+                                I_COUNT(4),
+                                USES_WATERFALL(0),
+                                CF_INST(SQ_CF_INST_ALU),
+                                WHOLE_QUAD_MODE(0),
+                                BARRIER(1));
+    /* 13 */
+    /* return */
+    shader[i++] = CF_DWORD0(ADDR(0));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_ACTIVE),
+                            I_COUNT(0),
+                            CALL_COUNT(0),
+                            END_OF_PROGRAM(0),
+                            VALID_PIXEL_MODE(0),
+                            CF_INST(SQ_CF_INST_RETURN),
+                            WHOLE_QUAD_MODE(0),
+                            BARRIER(1));
+    /* ALU clauses */
+
+    /* 14 - alu 0 */
+    /* MUL gpr[0].x gpr[1].x gpr[0].x */
     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
 			     SRC0_REL(ABSOLUTE),
 			     SRC0_ELEM(ELEM_X),
@@ -2478,12 +2545,12 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
 				 OMOD(SQ_ALU_OMOD_OFF),
 				 ALU_INST(SQ_OP2_INST_MUL),
 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
-				 DST_GPR(2),
+				 DST_GPR(0),
 				 DST_REL(ABSOLUTE),
 				 DST_ELEM(ELEM_X),
 				 CLAMP(1));
-    /* 11 - alu 1 */
-    /* MUL gpr[2].y gpr[1].y gpr[0].y */
+    /* 15 - alu 1 */
+    /* MUL gpr[0].y gpr[1].y gpr[0].y */
     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
 			     SRC0_REL(ABSOLUTE),
 			     SRC0_ELEM(ELEM_Y),
@@ -2505,12 +2572,12 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
 				 OMOD(SQ_ALU_OMOD_OFF),
 				 ALU_INST(SQ_OP2_INST_MUL),
 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
-				 DST_GPR(2),
+				 DST_GPR(0),
 				 DST_REL(ABSOLUTE),
 				 DST_ELEM(ELEM_Y),
 				 CLAMP(1));
-    /* 12 - alu 2 */
-    /* MUL gpr[2].z gpr[1].z gpr[0].z */
+    /* 16 - alu 2 */
+    /* MUL gpr[0].z gpr[1].z gpr[0].z */
     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
 			     SRC0_REL(ABSOLUTE),
 			     SRC0_ELEM(ELEM_Z),
@@ -2532,12 +2599,12 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
 				 OMOD(SQ_ALU_OMOD_OFF),
 				 ALU_INST(SQ_OP2_INST_MUL),
 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
-				 DST_GPR(2),
+				 DST_GPR(0),
 				 DST_REL(ABSOLUTE),
 				 DST_ELEM(ELEM_Z),
 				 CLAMP(1));
-    /* 13 - alu 3 */
-    /* MUL gpr[2].w gpr[1].w gpr[0].w */
+    /* 17 - alu 3 */
+    /* MUL gpr[0].w gpr[1].w gpr[0].w */
     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
 			     SRC0_REL(ABSOLUTE),
 			     SRC0_ELEM(ELEM_W),
@@ -2559,12 +2626,222 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
 				 OMOD(SQ_ALU_OMOD_OFF),
 				 ALU_INST(SQ_OP2_INST_MUL),
 				 BANK_SWIZZLE(SQ_ALU_VEC_012),
-				 DST_GPR(2),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_W),
+				 CLAMP(1));
+
+    /* 18 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+				 SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 FOG_MERGE(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MOV),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_X),
+				 CLAMP(1));
+    /* 19 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_Y),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+				 SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 FOG_MERGE(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MOV),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Y),
+				 CLAMP(1));
+    /* 20 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Z),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_Z),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+				 SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 FOG_MERGE(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MOV),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Z),
+				 CLAMP(1));
+    /* 21 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_W),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_W),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+				 SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 FOG_MERGE(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MOV),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(0),
 				 DST_REL(ABSOLUTE),
 				 DST_ELEM(ELEM_W),
 				 CLAMP(1));
 
-    /* 14/15 - src - mask */
+    /* 22 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+				 SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 FOG_MERGE(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MOV),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(1),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_X),
+				 CLAMP(1));
+    /* 23 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_Y),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+				 SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 FOG_MERGE(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MOV),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(1),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Y),
+				 CLAMP(1));
+    /* 24 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Z),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_Z),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+				 SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 FOG_MERGE(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MOV),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(1),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Z),
+				 CLAMP(1));
+    /* 25 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_W),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_W),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+				 SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 FOG_MERGE(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MOV),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(1),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_W),
+				 CLAMP(1));
+
+    /* 26/27 - src */
     shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
 			     BC_FRAC_MODE(0),
 			     FETCH_WHOLE_QUAD(0),
@@ -2592,7 +2869,7 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
 			     SRC_SEL_Z(SQ_SEL_0),
 			     SRC_SEL_W(SQ_SEL_1));
     shader[i++] = TEX_DWORD_PAD;
-    /* 16/17 - mask */
+    /* 28/29 - mask */
     shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
 			     BC_FRAC_MODE(0),
 			     FETCH_WHOLE_QUAD(0),
@@ -2621,34 +2898,5 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
 			     SRC_SEL_W(SQ_SEL_1));
     shader[i++] = TEX_DWORD_PAD;
 
-    /* 18/19 - src - non-mask */
-    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
-			     BC_FRAC_MODE(0),
-			     FETCH_WHOLE_QUAD(0),
-			     RESOURCE_ID(0),
-			     SRC_GPR(0),
-			     SRC_REL(ABSOLUTE),
-			     R7xx_ALT_CONST(0));
-    shader[i++] = TEX_DWORD1(DST_GPR(0),
-			     DST_REL(ABSOLUTE),
-			     DST_SEL_X(SQ_SEL_X),
-			     DST_SEL_Y(SQ_SEL_Y),
-			     DST_SEL_Z(SQ_SEL_Z),
-			     DST_SEL_W(SQ_SEL_W),
-			     LOD_BIAS(0),
-			     COORD_TYPE_X(TEX_NORMALIZED),
-			     COORD_TYPE_Y(TEX_NORMALIZED),
-			     COORD_TYPE_Z(TEX_NORMALIZED),
-			     COORD_TYPE_W(TEX_NORMALIZED));
-    shader[i++] = TEX_DWORD2(OFFSET_X(0),
-			     OFFSET_Y(0),
-			     OFFSET_Z(0),
-			     SAMPLER_ID(0),
-			     SRC_SEL_X(SQ_SEL_X),
-			     SRC_SEL_Y(SQ_SEL_Y),
-			     SRC_SEL_Z(SQ_SEL_0),
-			     SRC_SEL_W(SQ_SEL_1));
-    shader[i++] = TEX_DWORD_PAD;
-
     return i;
 }
-- 
1.8.3.1

--------------------------------------------------------
ZTE Information Security Notice: The information contained in this mail (and any attachment transmitted herewith) is privileged and confidential and is intended for the exclusive use of the addressee(s).  If you are not an intended recipient, any disclosure, reproduction, distribution or other dissemination or use of the information contained is strictly prohibited.  If you have received this mail in error, please delete it and notify us immediately.


More information about the xorg-driver-ati mailing list