[PATCH xf86-video-ati 1/2] EXA/6xx/7xx: fast solid pixmap support

tan.hu at zte.com.cn tan.hu at zte.com.cn
Tue May 17 10:17:30 UTC 2016


From: Tan Hu <tan.hu at zte.com.cn>

Solid pixmaps are currently implemented with scratch pixmaps, which
is slow. This replaces the hack with a proper implementation. The
Composite shader can now either sample a src/mask or use a constant
value. 

r6xx still be used on some machine, 
Ported from commit 94d0d14914a025525a0766669b556eaa6681def7.

Signed-off-by: Tan Hu <tan.hu at zte.com.cn>
--
 src/r600_exa.c    | 257 ++++++++++++++++++++++++--------
 src/r600_shader.c | 430 
++++++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 532 insertions(+), 155 deletions(-)

diff --git a/src/r600_exa.c b/src/r600_exa.c
index 8d11ce7..a95f320 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -1165,6 +1165,134 @@ static Bool R600CheckComposite(int op, PicturePtr 
pSrcPicture, PicturePtr pMaskP
 
 }
 
+static void R600SetSolidConsts(ScrnInfoPtr pScrn, float *buf, int format, 
uint32_t fg, int unit)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+    float pix_r = 0, pix_g = 0, pix_b = 0, pix_a = 0;
+
+    uint32_t w = (fg >> 24) & 0xff;
+    uint32_t z = (fg >> 16) & 0xff;
+    uint32_t y = (fg >> 8) & 0xff;
+    uint32_t x = (fg >> 0) & 0xff;
+    float xf = (float)x / 255; /* R */
+    float yf = (float)y / 255; /* G */
+    float zf = (float)z / 255; /* B */
+    float wf = (float)w / 255; /* A */
+
+    /* component swizzles */
+    switch (format) {
+       case PICT_a1r5g5b5:
+       case PICT_a8r8g8b8:
+           pix_r = zf; /* R */
+           pix_g = yf; /* G */
+           pix_b = xf; /* B */
+           pix_a = wf; /* A */
+           break;
+       case PICT_a8b8g8r8:
+           pix_r = xf; /* R */
+           pix_g = yf; /* G */
+           pix_b = zf; /* B */
+           pix_a = wf; /* A */
+           break;
+       case PICT_x8b8g8r8:
+           pix_r = xf; /* R */
+           pix_g = yf; /* G */
+           pix_b = zf; /* B */
+           pix_a = 1.0; /* A */
+           break;
+       case PICT_b8g8r8a8:
+           pix_r = yf; /* R */
+           pix_g = zf; /* G */
+           pix_b = wf; /* B */
+           pix_a = xf; /* A */
+           break;
+       case PICT_b8g8r8x8:
+           pix_r = yf; /* R */
+           pix_g = zf; /* G */
+           pix_b = wf; /* B */
+           pix_a = 1.0; /* A */
+           break;
+       case PICT_x1r5g5b5:
+       case PICT_x8r8g8b8:
+       case PICT_r5g6b5:
+           pix_r = zf; /* R */
+           pix_g = yf; /* G */
+           pix_b = xf; /* B */
+           pix_a = 1.0; /* A */
+           break;
+       case PICT_a8:
+           pix_r = 0.0; /* R */
+           pix_g = 0.0; /* G */
+           pix_b = 0.0; /* B */
+           pix_a = xf; /* A */
+           break;
+       default:
+           ErrorF("Bad format 0x%x\n", format);
+    }
+
+    if (unit == 0) {
+       if (!accel_state->msk_pic) {
+           if (PICT_FORMAT_RGB(format) == 0) {
+               pix_r = 0.0;
+               pix_g = 0.0;
+               pix_b = 0.0;
+           }
+
+           if (PICT_FORMAT_A(format) == 0)
+               pix_a = 1.0;
+       } else {
+           if (accel_state->component_alpha) {
+               if (accel_state->src_alpha) {
+                   if (PICT_FORMAT_A(format) == 0) {
+                       pix_r = 1.0;
+                       pix_g = 1.0;
+                       pix_b = 1.0;
+                       pix_a = 1.0;
+                   } else {
+                       pix_r = pix_a;
+                       pix_g = pix_a;
+                       pix_b = pix_a;
+                   }
+               } else {
+                   if (PICT_FORMAT_A(format) == 0)
+                       pix_a = 1.0;
+               }
+           } else {
+               if (PICT_FORMAT_RGB(format) == 0) {
+                   pix_r = 0;
+                   pix_g = 0;
+                   pix_b = 0;
+               }
+
+               if (PICT_FORMAT_A(format) == 0)
+                   pix_a = 1.0;
+           }
+       }
+    } else {
+       if (accel_state->component_alpha) {
+           if (PICT_FORMAT_A(format) == 0)
+               pix_a = 1.0;
+       } else {
+           if (PICT_FORMAT_A(format) == 0) {
+               pix_r = 1.0;
+               pix_g = 1.0;
+               pix_b = 1.0;
+               pix_a = 1.0;
+           } else {
+               pix_r = pix_a;
+               pix_g = pix_a;
+               pix_b = pix_a;
+           }
+       }
+    }
+
+    buf[0] = pix_r;
+    buf[1] = pix_g;
+    buf[2] = pix_b;
+    buf[3] = pix_a;
+}
+
 static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
                                 PicturePtr pMaskPicture, PicturePtr 
pDstPicture,
                                 PixmapPtr pSrc, PixmapPtr pMask, 
PixmapPtr pDst)
@@ -1177,31 +1305,27 @@ static Bool R600PrepareComposite(int op, 
PicturePtr pSrcPicture,
     cb_config_t cb_conf;
     shader_config_t vs_conf, ps_conf;
     struct r600_accel_object src_obj, mask_obj, dst_obj;
+    uint32_t ps_bool_consts = 0;
+    float ps_alu_consts[8];
 
     if (pDst->drawable.bitsPerPixel < 8 || (pSrc && 
pSrc->drawable.bitsPerPixel < 8))
        return FALSE;
 
-    if (!pSrc) {
-       pSrc = RADEONSolidPixmap(pScreen, 
pSrcPicture->pSourcePict->solidFill.color);
-       if (!pSrc)
-           RADEON_FALLBACK(("Failed to create solid scratch pixmap\n"));
+    if (pSrc) {
+       src_obj.bo = radeon_get_pixmap_bo(pSrc);
+       src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
+       src_obj.surface = radeon_get_pixmap_surface(pSrc);
+       src_obj.pitch = exaGetPixmapPitch(pSrc) / 
(pSrc->drawable.bitsPerPixel / 8);
+       src_obj.width = pSrc->drawable.width;
+       src_obj.height = pSrc->drawable.height;
+       src_obj.bpp = pSrc->drawable.bitsPerPixel;
+       src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
     }
 
     dst_obj.bo = radeon_get_pixmap_bo(pDst);
-    src_obj.bo = radeon_get_pixmap_bo(pSrc);
     dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
-    src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
     dst_obj.surface = radeon_get_pixmap_surface(pDst);
-    src_obj.surface = radeon_get_pixmap_surface(pSrc);
-
-    src_obj.pitch = exaGetPixmapPitch(pSrc) / 
(pSrc->drawable.bitsPerPixel / 8);
     dst_obj.pitch = exaGetPixmapPitch(pDst) / 
(pDst->drawable.bitsPerPixel / 8);
-
-    src_obj.width = pSrc->drawable.width;
-    src_obj.height = pSrc->drawable.height;
-    src_obj.bpp = pSrc->drawable.bitsPerPixel;
-    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
-
     dst_obj.width = pDst->drawable.width;
     dst_obj.height = pDst->drawable.height;
     dst_obj.bpp = pDst->drawable.bitsPerPixel;
@@ -1211,34 +1335,17 @@ static Bool R600PrepareComposite(int op, 
PicturePtr pSrcPicture,
        dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
 
     if (pMaskPicture) {
-       if (!pMask) {
-           pMask = RADEONSolidPixmap(pScreen, 
pMaskPicture->pSourcePict->solidFill.color);
-           if (!pMask) {
-               if (!pSrcPicture->pDrawable)
-                   pScreen->DestroyPixmap(pSrc);
-               RADEON_FALLBACK(("Failed to create solid scratch 
pixmap\n"));
-           }
+       if (pMask) {
+           mask_obj.bo = radeon_get_pixmap_bo(pMask);
+           mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
+           mask_obj.surface = radeon_get_pixmap_surface(pMask);
+            mask_obj.pitch = exaGetPixmapPitch(pMask) / 
(pMask->drawable.bitsPerPixel / 8);
+           mask_obj.width = pMask->drawable.width;
+           mask_obj.height = pMask->drawable.height;
+           mask_obj.bpp = pMask->drawable.bitsPerPixel;
+           mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | 
RADEON_GEM_DOMAIN_GTT;
        }
 
-       mask_obj.bo = radeon_get_pixmap_bo(pMask);
-       mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
-       mask_obj.surface = radeon_get_pixmap_surface(pMask);
-
-       mask_obj.pitch = exaGetPixmapPitch(pMask) / 
(pMask->drawable.bitsPerPixel / 8);
-
-       mask_obj.width = pMask->drawable.width;
-       mask_obj.height = pMask->drawable.height;
-       mask_obj.bpp = pMask->drawable.bitsPerPixel;
-       mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
-
-       if (!R600SetAccelState(pScrn,
-                              &src_obj,
-                              &mask_obj,
-                              &dst_obj,
-                              accel_state->comp_vs_offset, 
accel_state->comp_ps_offset,
-                              3, 0xffffffff))
-           return FALSE;
-
        accel_state->msk_pic = pMaskPicture;
        if (pMaskPicture->componentAlpha) {
            accel_state->component_alpha = TRUE;
@@ -1251,19 +1358,19 @@ static Bool R600PrepareComposite(int op, 
PicturePtr pSrcPicture,
            accel_state->src_alpha = FALSE;
        }
     } else {
-       if (!R600SetAccelState(pScrn,
-                              &src_obj,
-                              NULL,
-                              &dst_obj,
-                              accel_state->comp_vs_offset, 
accel_state->comp_ps_offset,
-                              3, 0xffffffff))
-           return FALSE;
-
        accel_state->msk_pic = NULL;
        accel_state->component_alpha = FALSE;
        accel_state->src_alpha = FALSE;
     }
 
+    if (!R600SetAccelState(pScrn,
+               pSrc ? &src_obj : NULL,
+               (pMaskPicture && pMask) ? &mask_obj : NULL,
+               &dst_obj,
+               accel_state->comp_vs_offset, accel_state->comp_ps_offset,
+               3, 0xffffffff))
+       return FALSE;
+
     if (!R600GetDestFormat(pDstPicture, &dst_format))
        return FALSE;
 
@@ -1284,10 +1391,13 @@ static Bool R600PrepareComposite(int op, 
PicturePtr pSrcPicture,
     r600_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, 
accel_state->dst_obj.height);
     r600_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, 
accel_state->dst_obj.height);
 
-    if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
-        R600IBDiscard(pScrn);
-        return FALSE;
-    }
+    if (pSrc) {
+        if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
+            R600IBDiscard(pScrn);
+            return FALSE;
+        }
+    } else
+        accel_state->is_transform[0] = FALSE;
 
     if (pMask) {
         if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
@@ -1297,12 +1407,16 @@ static Bool R600PrepareComposite(int op, 
PicturePtr pSrcPicture,
     } else
         accel_state->is_transform[1] = FALSE;
 
+    if (pSrc)
+       ps_bool_consts |= (1 << 0);
+    if (pMask)
+       ps_bool_consts |= (1 << 1);
+    r600_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, ps_bool_consts);
+
     if (pMask) {
        r600_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (1 << 0));
-       r600_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (1 << 0));
     } else {
        r600_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (0 << 0));
-       r600_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (0 << 0));
     }
 
     /* Shader */
@@ -1315,7 +1429,7 @@ static Bool R600PrepareComposite(int op, PicturePtr 
pSrcPicture,
 
     ps_conf.shader_addr         = accel_state->ps_mc_addr;
     ps_conf.shader_size         = accel_state->ps_size;
-    ps_conf.num_gprs            = 3;
+    ps_conf.num_gprs            = 2;
     ps_conf.stack_size          = 1;
     ps_conf.uncached_first_inst = 1;
     ps_conf.clamp_consts        = 0;
@@ -1381,6 +1495,27 @@ static Bool R600PrepareComposite(int op, PicturePtr 
pSrcPicture,
     else
        r600_set_spi(pScrn, (1 - 1), 1);
 
+    if (!pSrc) {
+       /* solid src color */
+       R600SetSolidConsts(pScrn, &ps_alu_consts[0], pSrcPicture->format,
+               pSrcPicture->pSourcePict->solidFill.color, 0);
+    }
+
+    if (!pMaskPicture) {
+       /* use identity constant if there is no mask */
+       ps_alu_consts[4] = 1.0;
+       ps_alu_consts[5] = 1.0;
+       ps_alu_consts[6] = 1.0;
+       ps_alu_consts[7] = 1.0;
+    } else if (!pMask) {
+       /* solid mask color */
+       R600SetSolidConsts(pScrn, &ps_alu_consts[4], pMaskPicture->format,
+               pMaskPicture->pSourcePict->solidFill.color, 1);
+    }
+
+    r600_set_alu_consts(pScrn, SQ_ALU_CONSTANT_ps,
+                       sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, 
ps_alu_consts);
+
     if (accel_state->vsync)
        RADEONVlineHelperClear(pScrn);
 
@@ -1405,7 +1540,7 @@ static void R600FinishComposite(ScrnInfoPtr pScrn, 
PixmapPtr pDst,
                               accel_state->vline_y1,
                               accel_state->vline_y2);
 
-    vtx_size = accel_state->msk_pic ? 24 : 16;
+    vtx_size = accel_state->msk_pix ? 24 : 16;
 
     r600_finish_op(pScrn, vtx_size);
 }
@@ -1418,12 +1553,6 @@ static void R600DoneComposite(PixmapPtr pDst)
     struct radeon_accel_state *accel_state = info->accel_state;
 
     R600FinishComposite(pScrn, pDst, accel_state);
-
-    if (!accel_state->src_pic->pDrawable)
-       pScreen->DestroyPixmap(accel_state->src_pix);
-
-    if (accel_state->msk_pic && !accel_state->msk_pic->pDrawable)
-       pScreen->DestroyPixmap(accel_state->msk_pix);
 }
 
 static void R600Composite(PixmapPtr pDst,
@@ -1455,7 +1584,7 @@ static void R600Composite(PixmapPtr pDst,
     if (accel_state->vsync)
        RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
 
-    if (accel_state->msk_pic) {
+    if (accel_state->msk_pix) {
 
        vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
 
diff --git a/src/r600_shader.c b/src/r600_shader.c
index 4cb2fc8..af32cfe 100644
--- a/src/r600_shader.c
+++ b/src/r600_shader.c
@@ -2318,9 +2318,10 @@ int R600_comp_ps(RADEONChipFamily ChipSet, 
uint32_t* shader)
     int i = 0;
 
     /* 0 */
-    shader[i++] = CF_DWORD0(ADDR(3));
+    /* call fetch-mask if boolean1 == true */
+    shader[i++] = CF_DWORD0(ADDR(10));
     shader[i++] = CF_DWORD1(POP_COUNT(0),
-                            CF_CONST(0),
+                            CF_CONST(1),
                             COND(SQ_CF_COND_BOOL),
                             I_COUNT(0),
                             CALL_COUNT(0),
@@ -2330,9 +2331,10 @@ int R600_comp_ps(RADEONChipFamily ChipSet, 
uint32_t* shader)
                             WHOLE_QUAD_MODE(0),
                             BARRIER(0));
     /* 1 */
-    shader[i++] = CF_DWORD0(ADDR(7));
+    /* call read-constant-mask if boolean1 == false */
+    shader[i++] = CF_DWORD0(ADDR(12));
     shader[i++] = CF_DWORD1(POP_COUNT(0),
-                            CF_CONST(0),
+                            CF_CONST(1),
                             COND(SQ_CF_COND_NOT_BOOL),
                             I_COUNT(0),
                             CALL_COUNT(0),
@@ -2342,33 +2344,36 @@ int R600_comp_ps(RADEONChipFamily ChipSet, 
uint32_t* shader)
                             WHOLE_QUAD_MODE(0),
                             BARRIER(0));
     /* 2 */
-    shader[i++] = CF_DWORD0(ADDR(0));
+    /* call fetch-src if boolean0 == true */
+    shader[i++] = CF_DWORD0(ADDR(6));
     shader[i++] = CF_DWORD1(POP_COUNT(0),
                             CF_CONST(0),
-                            COND(SQ_CF_COND_ACTIVE),
+                            COND(SQ_CF_COND_BOOL),
                             I_COUNT(0),
                             CALL_COUNT(0),
-                            END_OF_PROGRAM(1),
+                            END_OF_PROGRAM(0),
                             VALID_PIXEL_MODE(0),
-                            CF_INST(SQ_CF_INST_NOP),
+                            CF_INST(SQ_CF_INST_CALL),
                             WHOLE_QUAD_MODE(0),
-                            BARRIER(1));
+                            BARRIER(0));
 
-    /* 3 - mask sub */
-    shader[i++] = CF_DWORD0(ADDR(14));
+    /* 3 */
+    /* call read-constant-src if boolean0 == false */
+    shader[i++] = CF_DWORD0(ADDR(8));
     shader[i++] = CF_DWORD1(POP_COUNT(0),
-                           CF_CONST(0),
-                           COND(SQ_CF_COND_ACTIVE),
-                           I_COUNT(2),
-                           CALL_COUNT(0),
-                           END_OF_PROGRAM(0),
-                           VALID_PIXEL_MODE(0),
-                           CF_INST(SQ_CF_INST_TEX),
-                           WHOLE_QUAD_MODE(0),
-                           BARRIER(1));
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_NOT_BOOL),
+                            I_COUNT(0),
+                            CALL_COUNT(0),
+                            END_OF_PROGRAM(0),
+                            VALID_PIXEL_MODE(0),
+                            CF_INST(SQ_CF_INST_CALL),
+                            WHOLE_QUAD_MODE(0),
+                            BARRIER(0));
 
     /* 4 */
-    shader[i++] = CF_ALU_DWORD0(ADDR(10),
+    /* src IN mask (GPR0 := GPR1 .* GPR0) */
+   shader[i++] = CF_ALU_DWORD0(ADDR(14),
                                KCACHE_BANK0(0),
                                KCACHE_BANK1(0),
                                KCACHE_MODE0(SQ_CF_KCACHE_NOP));
@@ -2382,9 +2387,10 @@ int R600_comp_ps(RADEONChipFamily ChipSet, 
uint32_t* shader)
                                BARRIER(1));
 
     /* 5 */
+    /* export pixel data */
     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
                                          TYPE(SQ_EXPORT_PIXEL),
-                                         RW_GPR(2),
+                                         RW_GPR(0),
                                          RW_REL(ABSOLUTE),
                                          INDEX_GPR(0),
                                          ELEM_SIZE(1));
@@ -2394,55 +2400,57 @@ int R600_comp_ps(RADEONChipFamily ChipSet, 
uint32_t* shader)
                                               SRC_SEL_W(SQ_SEL_W),
                                               R6xx_ELEM_LOOP(0),
                                               BURST_COUNT(1),
-                                              END_OF_PROGRAM(0),
+                                              END_OF_PROGRAM(1),
                                               VALID_PIXEL_MODE(0),
 CF_INST(SQ_CF_INST_EXPORT_DONE),
                                               WHOLE_QUAD_MODE(0),
                                               BARRIER(1));
+    /* subroutine fetch src */
     /* 6 */
-    shader[i++] = CF_DWORD0(ADDR(0));
+    /* fetch src into GPR0*/
+    shader[i++] = CF_DWORD0(ADDR(26));
     shader[i++] = CF_DWORD1(POP_COUNT(0),
                            CF_CONST(0),
                            COND(SQ_CF_COND_ACTIVE),
-                           I_COUNT(0),
+                           I_COUNT(1),
                            CALL_COUNT(0),
                            END_OF_PROGRAM(0),
                            VALID_PIXEL_MODE(0),
-                           CF_INST(SQ_CF_INST_RETURN),
+                           CF_INST(SQ_CF_INST_TEX),
                            WHOLE_QUAD_MODE(0),
                            BARRIER(1));
-
-    /* 7 non-mask sub */
-    shader[i++] = CF_DWORD0(ADDR(18));
+ 
+    /* 7 */
+    /* return */
+    shader[i++] = CF_DWORD0(ADDR(0));
     shader[i++] = CF_DWORD1(POP_COUNT(0),
                            CF_CONST(0),
                            COND(SQ_CF_COND_ACTIVE),
-                           I_COUNT(1),
+                           I_COUNT(0),
                            CALL_COUNT(0),
                            END_OF_PROGRAM(0),
                            VALID_PIXEL_MODE(0),
-                           CF_INST(SQ_CF_INST_TEX),
+                           CF_INST(SQ_CF_INST_RETURN),
                            WHOLE_QUAD_MODE(0),
                            BARRIER(1));
+
+    /* subroutine read-constant-src*/
     /* 8 */
-    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
-                                         TYPE(SQ_EXPORT_PIXEL),
-                                         RW_GPR(0),
-                                         RW_REL(ABSOLUTE),
-                                         INDEX_GPR(0),
-                                         ELEM_SIZE(1));
-    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-                                              SRC_SEL_Y(SQ_SEL_Y),
-                                              SRC_SEL_Z(SQ_SEL_Z),
-                                              SRC_SEL_W(SQ_SEL_W),
-                                              R6xx_ELEM_LOOP(0),
-                                              BURST_COUNT(1),
-                                              END_OF_PROGRAM(0),
-                                              VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_EXPORT_DONE),
-                                              WHOLE_QUAD_MODE(0),
-                                              BARRIER(1));
+    /* read constants into GPR0 */
+   shader[i++] = CF_ALU_DWORD0(ADDR(18),
+                                KCACHE_BANK0(0),
+                                KCACHE_BANK1(0),
+                                KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+                                KCACHE_ADDR0(0),
+                                KCACHE_ADDR1(0),
+                                I_COUNT(4),
+                                USES_WATERFALL(0),
+                                CF_INST(SQ_CF_INST_ALU),
+                                WHOLE_QUAD_MODE(0),
+                                BARRIER(1)); 
     /* 9 */
+    /* return */
     shader[i++] = CF_DWORD0(ADDR(0));
     shader[i++] = CF_DWORD1(POP_COUNT(0),
                            CF_CONST(0),
@@ -2455,8 +2463,67 @@ int R600_comp_ps(RADEONChipFamily ChipSet, 
uint32_t* shader)
                            WHOLE_QUAD_MODE(0),
                            BARRIER(1));
 
-    /* 10 - alu 0 */
-    /* MUL gpr[2].x gpr[1].x gpr[0].x */
+    /* subroutine fetch mask */
+    /* 10 */
+    /* fetch mask into GPR1*/
+    shader[i++] = CF_DWORD0(ADDR(28));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_ACTIVE),
+                            I_COUNT(1),
+                            CALL_COUNT(0),
+                            END_OF_PROGRAM(0),
+                            VALID_PIXEL_MODE(0),
+                            CF_INST(SQ_CF_INST_TEX),
+                            WHOLE_QUAD_MODE(0),
+                            BARRIER(1));
+
+    /* 11 */
+    /* return */
+    shader[i++] = CF_DWORD0(ADDR(0));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_ACTIVE),
+                            I_COUNT(0),
+                            CALL_COUNT(0),
+                            END_OF_PROGRAM(0),
+                            VALID_PIXEL_MODE(0),
+                            CF_INST(SQ_CF_INST_RETURN),
+                            WHOLE_QUAD_MODE(0),
+                            BARRIER(1));
+
+    /* subroutine read-constant-mask*/
+    /* 12 */
+    /* read constants into GPR1 */
+    shader[i++] = CF_ALU_DWORD0(ADDR(22),
+                                KCACHE_BANK0(0),
+                                KCACHE_BANK1(0),
+                                KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+                                KCACHE_ADDR0(0),
+                                KCACHE_ADDR1(0),
+                                I_COUNT(4),
+                                USES_WATERFALL(0),
+                                CF_INST(SQ_CF_INST_ALU),
+                                WHOLE_QUAD_MODE(0),
+                                BARRIER(1));
+    /* 13 */
+    /* return */
+    shader[i++] = CF_DWORD0(ADDR(0));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_ACTIVE),
+                            I_COUNT(0),
+                            CALL_COUNT(0),
+                            END_OF_PROGRAM(0),
+                            VALID_PIXEL_MODE(0),
+                            CF_INST(SQ_CF_INST_RETURN),
+                            WHOLE_QUAD_MODE(0),
+                            BARRIER(1));
+    /* ALU clauses */
+
+    /* 14 - alu 0 */
+    /* MUL gpr[0].x gpr[1].x gpr[0].x */
     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
                             SRC0_REL(ABSOLUTE),
                             SRC0_ELEM(ELEM_X),
@@ -2478,12 +2545,12 @@ int R600_comp_ps(RADEONChipFamily ChipSet, 
uint32_t* shader)
                                 OMOD(SQ_ALU_OMOD_OFF),
                                 ALU_INST(SQ_OP2_INST_MUL),
                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
-                                DST_GPR(2),
+                                DST_GPR(0),
                                 DST_REL(ABSOLUTE),
                                 DST_ELEM(ELEM_X),
                                 CLAMP(1));
-    /* 11 - alu 1 */
-    /* MUL gpr[2].y gpr[1].y gpr[0].y */
+    /* 15 - alu 1 */
+    /* MUL gpr[0].y gpr[1].y gpr[0].y */
     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
                             SRC0_REL(ABSOLUTE),
                             SRC0_ELEM(ELEM_Y),
@@ -2505,12 +2572,12 @@ int R600_comp_ps(RADEONChipFamily ChipSet, 
uint32_t* shader)
                                 OMOD(SQ_ALU_OMOD_OFF),
                                 ALU_INST(SQ_OP2_INST_MUL),
                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
-                                DST_GPR(2),
+                                DST_GPR(0),
                                 DST_REL(ABSOLUTE),
                                 DST_ELEM(ELEM_Y),
                                 CLAMP(1));
-    /* 12 - alu 2 */
-    /* MUL gpr[2].z gpr[1].z gpr[0].z */
+    /* 16 - alu 2 */
+    /* MUL gpr[0].z gpr[1].z gpr[0].z */
     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
                             SRC0_REL(ABSOLUTE),
                             SRC0_ELEM(ELEM_Z),
@@ -2532,12 +2599,12 @@ int R600_comp_ps(RADEONChipFamily ChipSet, 
uint32_t* shader)
                                 OMOD(SQ_ALU_OMOD_OFF),
                                 ALU_INST(SQ_OP2_INST_MUL),
                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
-                                DST_GPR(2),
+                                DST_GPR(0),
                                 DST_REL(ABSOLUTE),
                                 DST_ELEM(ELEM_Z),
                                 CLAMP(1));
-    /* 13 - alu 3 */
-    /* MUL gpr[2].w gpr[1].w gpr[0].w */
+    /* 17 - alu 3 */
+    /* MUL gpr[0].w gpr[1].w gpr[0].w */
     shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
                             SRC0_REL(ABSOLUTE),
                             SRC0_ELEM(ELEM_W),
@@ -2559,12 +2626,222 @@ int R600_comp_ps(RADEONChipFamily ChipSet, 
uint32_t* shader)
                                 OMOD(SQ_ALU_OMOD_OFF),
                                 ALU_INST(SQ_OP2_INST_MUL),
                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
-                                DST_GPR(2),
+                                DST_GPR(0),
                                 DST_REL(ABSOLUTE),
                                 DST_ELEM(ELEM_W),
                                 CLAMP(1));
 
-    /* 14/15 - src - mask */
+    /* 18 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
+                            SRC0_REL(ABSOLUTE),
+                            SRC0_ELEM(ELEM_X),
+                            SRC0_NEG(0),
+                            SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+                            SRC1_REL(ABSOLUTE),
+                            SRC1_ELEM(ELEM_X),
+                            SRC1_NEG(0),
+                            INDEX_MODE(SQ_INDEX_AR_X),
+                            PRED_SEL(SQ_PRED_SEL_OFF),
+                            LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                SRC0_ABS(0),
+                                SRC1_ABS(0),
+                                UPDATE_EXECUTE_MASK(0),
+                                UPDATE_PRED(0),
+                                WRITE_MASK(1),
+                                FOG_MERGE(0),
+                                OMOD(SQ_ALU_OMOD_OFF),
+                                ALU_INST(SQ_OP2_INST_MOV),
+                                BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                DST_GPR(0),
+                                DST_REL(ABSOLUTE),
+                                DST_ELEM(ELEM_X),
+                                CLAMP(1));
+    /* 19 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
+                            SRC0_REL(ABSOLUTE),
+                            SRC0_ELEM(ELEM_Y),
+                            SRC0_NEG(0),
+                            SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+                            SRC1_REL(ABSOLUTE),
+                            SRC1_ELEM(ELEM_Y),
+                            SRC1_NEG(0),
+                            INDEX_MODE(SQ_INDEX_AR_X),
+                            PRED_SEL(SQ_PRED_SEL_OFF),
+                            LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                SRC0_ABS(0),
+                                SRC1_ABS(0),
+                                UPDATE_EXECUTE_MASK(0),
+                                UPDATE_PRED(0),
+                                WRITE_MASK(1),
+                                FOG_MERGE(0),
+                                OMOD(SQ_ALU_OMOD_OFF),
+                                ALU_INST(SQ_OP2_INST_MOV),
+                                BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                DST_GPR(0),
+                                DST_REL(ABSOLUTE),
+                                DST_ELEM(ELEM_Y),
+                                CLAMP(1));
+    /* 20 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
+                            SRC0_REL(ABSOLUTE),
+                            SRC0_ELEM(ELEM_Z),
+                            SRC0_NEG(0),
+                            SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+                            SRC1_REL(ABSOLUTE),
+                            SRC1_ELEM(ELEM_Z),
+                            SRC1_NEG(0),
+                            INDEX_MODE(SQ_INDEX_AR_X),
+                            PRED_SEL(SQ_PRED_SEL_OFF),
+                            LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                SRC0_ABS(0),
+                                SRC1_ABS(0),
+                                UPDATE_EXECUTE_MASK(0),
+                                UPDATE_PRED(0),
+                                WRITE_MASK(1),
+                                FOG_MERGE(0),
+                                OMOD(SQ_ALU_OMOD_OFF),
+                                ALU_INST(SQ_OP2_INST_MOV),
+                                BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                DST_GPR(0),
+                                DST_REL(ABSOLUTE),
+                                DST_ELEM(ELEM_Z),
+                                CLAMP(1));
+    /* 21 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
+                            SRC0_REL(ABSOLUTE),
+                            SRC0_ELEM(ELEM_W),
+                            SRC0_NEG(0),
+                            SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+                            SRC1_REL(ABSOLUTE),
+                            SRC1_ELEM(ELEM_W),
+                            SRC1_NEG(0),
+                            INDEX_MODE(SQ_INDEX_AR_X),
+                            PRED_SEL(SQ_PRED_SEL_OFF),
+                            LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                SRC0_ABS(0),
+                                SRC1_ABS(0),
+                                UPDATE_EXECUTE_MASK(0),
+                                UPDATE_PRED(0),
+                                WRITE_MASK(1),
+                                FOG_MERGE(0),
+                                OMOD(SQ_ALU_OMOD_OFF),
+                                ALU_INST(SQ_OP2_INST_MOV),
+                                BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                DST_GPR(0),
+                                DST_REL(ABSOLUTE),
+                                DST_ELEM(ELEM_W),
+                                CLAMP(1));
+
+    /* 22 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
+                            SRC0_REL(ABSOLUTE),
+                            SRC0_ELEM(ELEM_X),
+                            SRC0_NEG(0),
+                            SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+                            SRC1_REL(ABSOLUTE),
+                            SRC1_ELEM(ELEM_X),
+                            SRC1_NEG(0),
+                            INDEX_MODE(SQ_INDEX_AR_X),
+                            PRED_SEL(SQ_PRED_SEL_OFF),
+                            LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                SRC0_ABS(0),
+                                SRC1_ABS(0),
+                                UPDATE_EXECUTE_MASK(0),
+                                UPDATE_PRED(0),
+                                WRITE_MASK(1),
+                                FOG_MERGE(0),
+                                OMOD(SQ_ALU_OMOD_OFF),
+                                ALU_INST(SQ_OP2_INST_MOV),
+                                BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                DST_GPR(1),
+                                DST_REL(ABSOLUTE),
+                                DST_ELEM(ELEM_X),
+                                CLAMP(1));
+    /* 23 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
+                            SRC0_REL(ABSOLUTE),
+                            SRC0_ELEM(ELEM_Y),
+                            SRC0_NEG(0),
+                            SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+                            SRC1_REL(ABSOLUTE),
+                            SRC1_ELEM(ELEM_Y),
+                            SRC1_NEG(0),
+                            INDEX_MODE(SQ_INDEX_AR_X),
+                            PRED_SEL(SQ_PRED_SEL_OFF),
+                            LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                SRC0_ABS(0),
+                                SRC1_ABS(0),
+                                UPDATE_EXECUTE_MASK(0),
+                                UPDATE_PRED(0),
+                                WRITE_MASK(1),
+                                FOG_MERGE(0),
+                                OMOD(SQ_ALU_OMOD_OFF),
+                                ALU_INST(SQ_OP2_INST_MOV),
+                                BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                DST_GPR(1),
+                                DST_REL(ABSOLUTE),
+                                DST_ELEM(ELEM_Y),
+                                CLAMP(1));
+    /* 24 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
+                            SRC0_REL(ABSOLUTE),
+                            SRC0_ELEM(ELEM_Z),
+                            SRC0_NEG(0),
+                            SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+                            SRC1_REL(ABSOLUTE),
+                            SRC1_ELEM(ELEM_Z),
+                            SRC1_NEG(0),
+                            INDEX_MODE(SQ_INDEX_AR_X),
+                            PRED_SEL(SQ_PRED_SEL_OFF),
+                            LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                SRC0_ABS(0),
+                                SRC1_ABS(0),
+                                UPDATE_EXECUTE_MASK(0),
+                                UPDATE_PRED(0),
+                                WRITE_MASK(1),
+                                FOG_MERGE(0),
+                                OMOD(SQ_ALU_OMOD_OFF),
+                                ALU_INST(SQ_OP2_INST_MOV),
+                                BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                DST_GPR(1),
+                                DST_REL(ABSOLUTE),
+                                DST_ELEM(ELEM_Z),
+                                CLAMP(1));
+    /* 25 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
+                            SRC0_REL(ABSOLUTE),
+                            SRC0_ELEM(ELEM_W),
+                            SRC0_NEG(0),
+                            SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+                            SRC1_REL(ABSOLUTE),
+                            SRC1_ELEM(ELEM_W),
+                            SRC1_NEG(0),
+                            INDEX_MODE(SQ_INDEX_AR_X),
+                            PRED_SEL(SQ_PRED_SEL_OFF),
+                            LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                SRC0_ABS(0),
+                                SRC1_ABS(0),
+                                UPDATE_EXECUTE_MASK(0),
+                                UPDATE_PRED(0),
+                                WRITE_MASK(1),
+                                FOG_MERGE(0),
+                                OMOD(SQ_ALU_OMOD_OFF),
+                                ALU_INST(SQ_OP2_INST_MOV),
+                                BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                DST_GPR(1),
+                                DST_REL(ABSOLUTE),
+                                DST_ELEM(ELEM_W),
+                                CLAMP(1));
+
+    /* 26/27 - src */
     shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
                             BC_FRAC_MODE(0),
                             FETCH_WHOLE_QUAD(0),
@@ -2592,7 +2869,7 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* 
shader)
                             SRC_SEL_Z(SQ_SEL_0),
                             SRC_SEL_W(SQ_SEL_1));
     shader[i++] = TEX_DWORD_PAD;
-    /* 16/17 - mask */
+    /* 28/29 - mask */
     shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
                             BC_FRAC_MODE(0),
                             FETCH_WHOLE_QUAD(0),
@@ -2621,34 +2898,5 @@ int R600_comp_ps(RADEONChipFamily ChipSet, 
uint32_t* shader)
                             SRC_SEL_W(SQ_SEL_1));
     shader[i++] = TEX_DWORD_PAD;
 
-    /* 18/19 - src - non-mask */
-    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
-                            BC_FRAC_MODE(0),
-                            FETCH_WHOLE_QUAD(0),
-                            RESOURCE_ID(0),
-                            SRC_GPR(0),
-                            SRC_REL(ABSOLUTE),
-                            R7xx_ALT_CONST(0));
-    shader[i++] = TEX_DWORD1(DST_GPR(0),
-                            DST_REL(ABSOLUTE),
-                            DST_SEL_X(SQ_SEL_X),
-                            DST_SEL_Y(SQ_SEL_Y),
-                            DST_SEL_Z(SQ_SEL_Z),
-                            DST_SEL_W(SQ_SEL_W),
-                            LOD_BIAS(0),
-                            COORD_TYPE_X(TEX_NORMALIZED),
-                            COORD_TYPE_Y(TEX_NORMALIZED),
-                            COORD_TYPE_Z(TEX_NORMALIZED),
-                            COORD_TYPE_W(TEX_NORMALIZED));
-    shader[i++] = TEX_DWORD2(OFFSET_X(0),
-                            OFFSET_Y(0),
-                            OFFSET_Z(0),
-                            SAMPLER_ID(0),
-                            SRC_SEL_X(SQ_SEL_X),
-                            SRC_SEL_Y(SQ_SEL_Y),
-                            SRC_SEL_Z(SQ_SEL_0),
-                            SRC_SEL_W(SQ_SEL_1));
-    shader[i++] = TEX_DWORD_PAD;
-
     return i;
 }
-- 
2.1.0

--------------------------------------------------------
ZTE Information Security Notice: The information contained in this mail (and any attachment transmitted herewith) is privileged and confidential and is intended for the exclusive use of the addressee(s).  If you are not an intended recipient, any disclosure, reproduction, distribution or other dissemination or use of the information contained is strictly prohibited.  If you have received this mail in error, please delete it and notify us immediately.


More information about the xorg-driver-ati mailing list