[PATCH xf86-video-ati resend 1/2] EXA/6xx/7xx: fast solid pixmap support
Grigori Goronzy
greg at chown.ath.cx
Tue May 31 11:28:19 UTC 2016
On 2016-05-27 11:05, Tan Hu wrote:
> Solid pixmaps are currently implemented with scratch pixmaps, which
> is slow. This replaces the hack with a proper implementation. The
> Composite shader can now either sample a src/mask or use a constant
> value.
>
> r6xx still be used on some machine,
> Ported from commit 94d0d14914a025525a0766669b556eaa6681def7.
>
Reviewed-by: Grigori Goronzy <greg at chown.ath.cx>
Note that I don't have any R600 class hardware and I cannot test this.
However, AFAIR, any problems with these code paths are very obvious. If
GTK+ applications render fine, the code should work correctly.
> Signed-off-by: Tan Hu <tan.hu at zte.com.cn>
> ---
> src/r600_exa.c | 257 ++++++++++++++++++++++++--------
> src/r600_shader.c | 428
> ++++++++++++++++++++++++++++++++++++++++++------------
> 2 files changed, 531 insertions(+), 154 deletions(-)
>
> diff --git a/src/r600_exa.c b/src/r600_exa.c
> index 8d11ce7..a95f320 100644
> --- a/src/r600_exa.c
> +++ b/src/r600_exa.c
> @@ -1165,6 +1165,134 @@ static Bool R600CheckComposite(int op,
> PicturePtr pSrcPicture, PicturePtr pMaskP
>
> }
>
> +static void R600SetSolidConsts(ScrnInfoPtr pScrn, float *buf, int
> format, uint32_t fg, int unit)
> +{
> + RADEONInfoPtr info = RADEONPTR(pScrn);
> + struct radeon_accel_state *accel_state = info->accel_state;
> + float pix_r = 0, pix_g = 0, pix_b = 0, pix_a = 0;
> +
> + uint32_t w = (fg >> 24) & 0xff;
> + uint32_t z = (fg >> 16) & 0xff;
> + uint32_t y = (fg >> 8) & 0xff;
> + uint32_t x = (fg >> 0) & 0xff;
> + float xf = (float)x / 255; /* R */
> + float yf = (float)y / 255; /* G */
> + float zf = (float)z / 255; /* B */
> + float wf = (float)w / 255; /* A */
> +
> + /* component swizzles */
> + switch (format) {
> + case PICT_a1r5g5b5:
> + case PICT_a8r8g8b8:
> + pix_r = zf; /* R */
> + pix_g = yf; /* G */
> + pix_b = xf; /* B */
> + pix_a = wf; /* A */
> + break;
> + case PICT_a8b8g8r8:
> + pix_r = xf; /* R */
> + pix_g = yf; /* G */
> + pix_b = zf; /* B */
> + pix_a = wf; /* A */
> + break;
> + case PICT_x8b8g8r8:
> + pix_r = xf; /* R */
> + pix_g = yf; /* G */
> + pix_b = zf; /* B */
> + pix_a = 1.0; /* A */
> + break;
> + case PICT_b8g8r8a8:
> + pix_r = yf; /* R */
> + pix_g = zf; /* G */
> + pix_b = wf; /* B */
> + pix_a = xf; /* A */
> + break;
> + case PICT_b8g8r8x8:
> + pix_r = yf; /* R */
> + pix_g = zf; /* G */
> + pix_b = wf; /* B */
> + pix_a = 1.0; /* A */
> + break;
> + case PICT_x1r5g5b5:
> + case PICT_x8r8g8b8:
> + case PICT_r5g6b5:
> + pix_r = zf; /* R */
> + pix_g = yf; /* G */
> + pix_b = xf; /* B */
> + pix_a = 1.0; /* A */
> + break;
> + case PICT_a8:
> + pix_r = 0.0; /* R */
> + pix_g = 0.0; /* G */
> + pix_b = 0.0; /* B */
> + pix_a = xf; /* A */
> + break;
> + default:
> + ErrorF("Bad format 0x%x\n", format);
> + }
> +
> + if (unit == 0) {
> + if (!accel_state->msk_pic) {
> + if (PICT_FORMAT_RGB(format) == 0) {
> + pix_r = 0.0;
> + pix_g = 0.0;
> + pix_b = 0.0;
> + }
> +
> + if (PICT_FORMAT_A(format) == 0)
> + pix_a = 1.0;
> + } else {
> + if (accel_state->component_alpha) {
> + if (accel_state->src_alpha) {
> + if (PICT_FORMAT_A(format) == 0) {
> + pix_r = 1.0;
> + pix_g = 1.0;
> + pix_b = 1.0;
> + pix_a = 1.0;
> + } else {
> + pix_r = pix_a;
> + pix_g = pix_a;
> + pix_b = pix_a;
> + }
> + } else {
> + if (PICT_FORMAT_A(format) == 0)
> + pix_a = 1.0;
> + }
> + } else {
> + if (PICT_FORMAT_RGB(format) == 0) {
> + pix_r = 0;
> + pix_g = 0;
> + pix_b = 0;
> + }
> +
> + if (PICT_FORMAT_A(format) == 0)
> + pix_a = 1.0;
> + }
> + }
> + } else {
> + if (accel_state->component_alpha) {
> + if (PICT_FORMAT_A(format) == 0)
> + pix_a = 1.0;
> + } else {
> + if (PICT_FORMAT_A(format) == 0) {
> + pix_r = 1.0;
> + pix_g = 1.0;
> + pix_b = 1.0;
> + pix_a = 1.0;
> + } else {
> + pix_r = pix_a;
> + pix_g = pix_a;
> + pix_b = pix_a;
> + }
> + }
> + }
> +
> + buf[0] = pix_r;
> + buf[1] = pix_g;
> + buf[2] = pix_b;
> + buf[3] = pix_a;
> +}
> +
> static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
> PicturePtr pMaskPicture, PicturePtr pDstPicture,
> PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
> @@ -1177,31 +1305,27 @@ static Bool R600PrepareComposite(int op,
> PicturePtr pSrcPicture,
> cb_config_t cb_conf;
> shader_config_t vs_conf, ps_conf;
> struct r600_accel_object src_obj, mask_obj, dst_obj;
> + uint32_t ps_bool_consts = 0;
> + float ps_alu_consts[8];
>
> if (pDst->drawable.bitsPerPixel < 8 || (pSrc &&
> pSrc->drawable.bitsPerPixel < 8))
> return FALSE;
>
> - if (!pSrc) {
> - pSrc = RADEONSolidPixmap(pScreen,
> pSrcPicture->pSourcePict->solidFill.color);
> - if (!pSrc)
> - RADEON_FALLBACK(("Failed to create solid scratch pixmap\n"));
> + if (pSrc) {
> + src_obj.bo = radeon_get_pixmap_bo(pSrc);
> + src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
> + src_obj.surface = radeon_get_pixmap_surface(pSrc);
> + src_obj.pitch = exaGetPixmapPitch(pSrc) /
> (pSrc->drawable.bitsPerPixel / 8);
> + src_obj.width = pSrc->drawable.width;
> + src_obj.height = pSrc->drawable.height;
> + src_obj.bpp = pSrc->drawable.bitsPerPixel;
> + src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
> }
>
> dst_obj.bo = radeon_get_pixmap_bo(pDst);
> - src_obj.bo = radeon_get_pixmap_bo(pSrc);
> dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
> - src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
> dst_obj.surface = radeon_get_pixmap_surface(pDst);
> - src_obj.surface = radeon_get_pixmap_surface(pSrc);
> -
> - src_obj.pitch = exaGetPixmapPitch(pSrc) /
> (pSrc->drawable.bitsPerPixel / 8);
> dst_obj.pitch = exaGetPixmapPitch(pDst) /
> (pDst->drawable.bitsPerPixel / 8);
> -
> - src_obj.width = pSrc->drawable.width;
> - src_obj.height = pSrc->drawable.height;
> - src_obj.bpp = pSrc->drawable.bitsPerPixel;
> - src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
> -
> dst_obj.width = pDst->drawable.width;
> dst_obj.height = pDst->drawable.height;
> dst_obj.bpp = pDst->drawable.bitsPerPixel;
> @@ -1211,34 +1335,17 @@ static Bool R600PrepareComposite(int op,
> PicturePtr pSrcPicture,
> dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
>
> if (pMaskPicture) {
> - if (!pMask) {
> - pMask = RADEONSolidPixmap(pScreen,
> pMaskPicture->pSourcePict->solidFill.color);
> - if (!pMask) {
> - if (!pSrcPicture->pDrawable)
> - pScreen->DestroyPixmap(pSrc);
> - RADEON_FALLBACK(("Failed to create solid scratch pixmap\n"));
> - }
> + if (pMask) {
> + mask_obj.bo = radeon_get_pixmap_bo(pMask);
> + mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
> + mask_obj.surface = radeon_get_pixmap_surface(pMask);
> + mask_obj.pitch = exaGetPixmapPitch(pMask) /
> (pMask->drawable.bitsPerPixel / 8);
> + mask_obj.width = pMask->drawable.width;
> + mask_obj.height = pMask->drawable.height;
> + mask_obj.bpp = pMask->drawable.bitsPerPixel;
> + mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
> }
>
> - mask_obj.bo = radeon_get_pixmap_bo(pMask);
> - mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
> - mask_obj.surface = radeon_get_pixmap_surface(pMask);
> -
> - mask_obj.pitch = exaGetPixmapPitch(pMask) /
> (pMask->drawable.bitsPerPixel / 8);
> -
> - mask_obj.width = pMask->drawable.width;
> - mask_obj.height = pMask->drawable.height;
> - mask_obj.bpp = pMask->drawable.bitsPerPixel;
> - mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
> -
> - if (!R600SetAccelState(pScrn,
> - &src_obj,
> - &mask_obj,
> - &dst_obj,
> - accel_state->comp_vs_offset, accel_state->comp_ps_offset,
> - 3, 0xffffffff))
> - return FALSE;
> -
> accel_state->msk_pic = pMaskPicture;
> if (pMaskPicture->componentAlpha) {
> accel_state->component_alpha = TRUE;
> @@ -1251,19 +1358,19 @@ static Bool R600PrepareComposite(int op,
> PicturePtr pSrcPicture,
> accel_state->src_alpha = FALSE;
> }
> } else {
> - if (!R600SetAccelState(pScrn,
> - &src_obj,
> - NULL,
> - &dst_obj,
> - accel_state->comp_vs_offset, accel_state->comp_ps_offset,
> - 3, 0xffffffff))
> - return FALSE;
> -
> accel_state->msk_pic = NULL;
> accel_state->component_alpha = FALSE;
> accel_state->src_alpha = FALSE;
> }
>
> + if (!R600SetAccelState(pScrn,
> + pSrc ? &src_obj : NULL,
> + (pMaskPicture && pMask) ? &mask_obj : NULL,
> + &dst_obj,
> + accel_state->comp_vs_offset, accel_state->comp_ps_offset,
> + 3, 0xffffffff))
> + return FALSE;
> +
> if (!R600GetDestFormat(pDstPicture, &dst_format))
> return FALSE;
>
> @@ -1284,10 +1391,13 @@ static Bool R600PrepareComposite(int op,
> PicturePtr pSrcPicture,
> r600_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width,
> accel_state->dst_obj.height);
> r600_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width,
> accel_state->dst_obj.height);
>
> - if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
> - R600IBDiscard(pScrn);
> - return FALSE;
> - }
> + if (pSrc) {
> + if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
> + R600IBDiscard(pScrn);
> + return FALSE;
> + }
> + } else
> + accel_state->is_transform[0] = FALSE;
>
> if (pMask) {
> if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
> @@ -1297,12 +1407,16 @@ static Bool R600PrepareComposite(int op,
> PicturePtr pSrcPicture,
> } else
> accel_state->is_transform[1] = FALSE;
>
> + if (pSrc)
> + ps_bool_consts |= (1 << 0);
> + if (pMask)
> + ps_bool_consts |= (1 << 1);
> + r600_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, ps_bool_consts);
> +
> if (pMask) {
> r600_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (1 << 0));
> - r600_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (1 << 0));
> } else {
> r600_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (0 << 0));
> - r600_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (0 << 0));
> }
>
> /* Shader */
> @@ -1315,7 +1429,7 @@ static Bool R600PrepareComposite(int op,
> PicturePtr pSrcPicture,
>
> ps_conf.shader_addr = accel_state->ps_mc_addr;
> ps_conf.shader_size = accel_state->ps_size;
> - ps_conf.num_gprs = 3;
> + ps_conf.num_gprs = 2;
> ps_conf.stack_size = 1;
> ps_conf.uncached_first_inst = 1;
> ps_conf.clamp_consts = 0;
> @@ -1381,6 +1495,27 @@ static Bool R600PrepareComposite(int op,
> PicturePtr pSrcPicture,
> else
> r600_set_spi(pScrn, (1 - 1), 1);
>
> + if (!pSrc) {
> + /* solid src color */
> + R600SetSolidConsts(pScrn, &ps_alu_consts[0], pSrcPicture->format,
> + pSrcPicture->pSourcePict->solidFill.color, 0);
> + }
> +
> + if (!pMaskPicture) {
> + /* use identity constant if there is no mask */
> + ps_alu_consts[4] = 1.0;
> + ps_alu_consts[5] = 1.0;
> + ps_alu_consts[6] = 1.0;
> + ps_alu_consts[7] = 1.0;
> + } else if (!pMask) {
> + /* solid mask color */
> + R600SetSolidConsts(pScrn, &ps_alu_consts[4], pMaskPicture->format,
> + pMaskPicture->pSourcePict->solidFill.color, 1);
> + }
> +
> + r600_set_alu_consts(pScrn, SQ_ALU_CONSTANT_ps,
> + sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
> +
> if (accel_state->vsync)
> RADEONVlineHelperClear(pScrn);
>
> @@ -1405,7 +1540,7 @@ static void R600FinishComposite(ScrnInfoPtr
> pScrn, PixmapPtr pDst,
> accel_state->vline_y1,
> accel_state->vline_y2);
>
> - vtx_size = accel_state->msk_pic ? 24 : 16;
> + vtx_size = accel_state->msk_pix ? 24 : 16;
>
> r600_finish_op(pScrn, vtx_size);
> }
> @@ -1418,12 +1553,6 @@ static void R600DoneComposite(PixmapPtr pDst)
> struct radeon_accel_state *accel_state = info->accel_state;
>
> R600FinishComposite(pScrn, pDst, accel_state);
> -
> - if (!accel_state->src_pic->pDrawable)
> - pScreen->DestroyPixmap(accel_state->src_pix);
> -
> - if (accel_state->msk_pic && !accel_state->msk_pic->pDrawable)
> - pScreen->DestroyPixmap(accel_state->msk_pix);
> }
>
> static void R600Composite(PixmapPtr pDst,
> @@ -1455,7 +1584,7 @@ static void R600Composite(PixmapPtr pDst,
> if (accel_state->vsync)
> RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
>
> - if (accel_state->msk_pic) {
> + if (accel_state->msk_pix) {
>
> vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
>
> diff --git a/src/r600_shader.c b/src/r600_shader.c
> index 4cb2fc8..4b857ab 100644
> --- a/src/r600_shader.c
> +++ b/src/r600_shader.c
> @@ -2318,9 +2318,10 @@ int R600_comp_ps(RADEONChipFamily ChipSet,
> uint32_t* shader)
> int i = 0;
>
> /* 0 */
> - shader[i++] = CF_DWORD0(ADDR(3));
> + /* call fetch-mask if boolean1 == true */
> + shader[i++] = CF_DWORD0(ADDR(10));
> shader[i++] = CF_DWORD1(POP_COUNT(0),
> - CF_CONST(0),
> + CF_CONST(1),
> COND(SQ_CF_COND_BOOL),
> I_COUNT(0),
> CALL_COUNT(0),
> @@ -2330,9 +2331,10 @@ int R600_comp_ps(RADEONChipFamily ChipSet,
> uint32_t* shader)
> WHOLE_QUAD_MODE(0),
> BARRIER(0));
> /* 1 */
> - shader[i++] = CF_DWORD0(ADDR(7));
> + /* call read-constant-mask if boolean1 == false */
> + shader[i++] = CF_DWORD0(ADDR(12));
> shader[i++] = CF_DWORD1(POP_COUNT(0),
> - CF_CONST(0),
> + CF_CONST(1),
> COND(SQ_CF_COND_NOT_BOOL),
> I_COUNT(0),
> CALL_COUNT(0),
> @@ -2342,33 +2344,36 @@ int R600_comp_ps(RADEONChipFamily ChipSet,
> uint32_t* shader)
> WHOLE_QUAD_MODE(0),
> BARRIER(0));
> /* 2 */
> - shader[i++] = CF_DWORD0(ADDR(0));
> + /* call fetch-src if boolean0 == true */
> + shader[i++] = CF_DWORD0(ADDR(6));
> shader[i++] = CF_DWORD1(POP_COUNT(0),
> CF_CONST(0),
> - COND(SQ_CF_COND_ACTIVE),
> + COND(SQ_CF_COND_BOOL),
> I_COUNT(0),
> CALL_COUNT(0),
> - END_OF_PROGRAM(1),
> + END_OF_PROGRAM(0),
> VALID_PIXEL_MODE(0),
> - CF_INST(SQ_CF_INST_NOP),
> + CF_INST(SQ_CF_INST_CALL),
> WHOLE_QUAD_MODE(0),
> - BARRIER(1));
> + BARRIER(0));
>
> - /* 3 - mask sub */
> - shader[i++] = CF_DWORD0(ADDR(14));
> + /* 3 */
> + /* call read-constant-src if boolean0 == false */
> + shader[i++] = CF_DWORD0(ADDR(8));
> shader[i++] = CF_DWORD1(POP_COUNT(0),
> - CF_CONST(0),
> - COND(SQ_CF_COND_ACTIVE),
> - I_COUNT(2),
> - CALL_COUNT(0),
> - END_OF_PROGRAM(0),
> - VALID_PIXEL_MODE(0),
> - CF_INST(SQ_CF_INST_TEX),
> - WHOLE_QUAD_MODE(0),
> - BARRIER(1));
> + CF_CONST(0),
> + COND(SQ_CF_COND_NOT_BOOL),
> + I_COUNT(0),
> + CALL_COUNT(0),
> + END_OF_PROGRAM(0),
> + VALID_PIXEL_MODE(0),
> + CF_INST(SQ_CF_INST_CALL),
> + WHOLE_QUAD_MODE(0),
> + BARRIER(0));
>
> /* 4 */
> - shader[i++] = CF_ALU_DWORD0(ADDR(10),
> + /* src IN mask (GPR0 := GPR1 .* GPR0) */
> + shader[i++] = CF_ALU_DWORD0(ADDR(14),
> KCACHE_BANK0(0),
> KCACHE_BANK1(0),
> KCACHE_MODE0(SQ_CF_KCACHE_NOP));
> @@ -2382,9 +2387,10 @@ int R600_comp_ps(RADEONChipFamily ChipSet,
> uint32_t* shader)
> BARRIER(1));
>
> /* 5 */
> + /* export pixel data */
> shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
> TYPE(SQ_EXPORT_PIXEL),
> - RW_GPR(2),
> + RW_GPR(0),
> RW_REL(ABSOLUTE),
> INDEX_GPR(0),
> ELEM_SIZE(1));
> @@ -2394,55 +2400,57 @@ int R600_comp_ps(RADEONChipFamily ChipSet,
> uint32_t* shader)
> SRC_SEL_W(SQ_SEL_W),
> R6xx_ELEM_LOOP(0),
> BURST_COUNT(1),
> - END_OF_PROGRAM(0),
> + END_OF_PROGRAM(1),
> VALID_PIXEL_MODE(0),
> CF_INST(SQ_CF_INST_EXPORT_DONE),
> WHOLE_QUAD_MODE(0),
> BARRIER(1));
> + /* subroutine fetch src */
> /* 6 */
> - shader[i++] = CF_DWORD0(ADDR(0));
> + /* fetch src into GPR0*/
> + shader[i++] = CF_DWORD0(ADDR(26));
> shader[i++] = CF_DWORD1(POP_COUNT(0),
> CF_CONST(0),
> COND(SQ_CF_COND_ACTIVE),
> - I_COUNT(0),
> + I_COUNT(1),
> CALL_COUNT(0),
> END_OF_PROGRAM(0),
> VALID_PIXEL_MODE(0),
> - CF_INST(SQ_CF_INST_RETURN),
> + CF_INST(SQ_CF_INST_TEX),
> WHOLE_QUAD_MODE(0),
> BARRIER(1));
>
> - /* 7 non-mask sub */
> - shader[i++] = CF_DWORD0(ADDR(18));
> + /* 7 */
> + /* return */
> + shader[i++] = CF_DWORD0(ADDR(0));
> shader[i++] = CF_DWORD1(POP_COUNT(0),
> CF_CONST(0),
> COND(SQ_CF_COND_ACTIVE),
> - I_COUNT(1),
> + I_COUNT(0),
> CALL_COUNT(0),
> END_OF_PROGRAM(0),
> VALID_PIXEL_MODE(0),
> - CF_INST(SQ_CF_INST_TEX),
> + CF_INST(SQ_CF_INST_RETURN),
> WHOLE_QUAD_MODE(0),
> BARRIER(1));
> +
> + /* subroutine read-constant-src*/
> /* 8 */
> - shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
> - TYPE(SQ_EXPORT_PIXEL),
> - RW_GPR(0),
> - RW_REL(ABSOLUTE),
> - INDEX_GPR(0),
> - ELEM_SIZE(1));
> - shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
> - SRC_SEL_Y(SQ_SEL_Y),
> - SRC_SEL_Z(SQ_SEL_Z),
> - SRC_SEL_W(SQ_SEL_W),
> - R6xx_ELEM_LOOP(0),
> - BURST_COUNT(1),
> - END_OF_PROGRAM(0),
> - VALID_PIXEL_MODE(0),
> - CF_INST(SQ_CF_INST_EXPORT_DONE),
> - WHOLE_QUAD_MODE(0),
> - BARRIER(1));
> + /* read constants into GPR0 */
> + shader[i++] = CF_ALU_DWORD0(ADDR(18),
> + KCACHE_BANK0(0),
> + KCACHE_BANK1(0),
> + KCACHE_MODE0(SQ_CF_KCACHE_NOP));
> + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
> + KCACHE_ADDR0(0),
> + KCACHE_ADDR1(0),
> + I_COUNT(4),
> + USES_WATERFALL(0),
> + CF_INST(SQ_CF_INST_ALU),
> + WHOLE_QUAD_MODE(0),
> + BARRIER(1));
> /* 9 */
> + /* return */
> shader[i++] = CF_DWORD0(ADDR(0));
> shader[i++] = CF_DWORD1(POP_COUNT(0),
> CF_CONST(0),
> @@ -2455,8 +2463,67 @@ int R600_comp_ps(RADEONChipFamily ChipSet,
> uint32_t* shader)
> WHOLE_QUAD_MODE(0),
> BARRIER(1));
>
> - /* 10 - alu 0 */
> - /* MUL gpr[2].x gpr[1].x gpr[0].x */
> + /* subroutine fetch mask */
> + /* 10 */
> + /* fetch mask into GPR1*/
> + shader[i++] = CF_DWORD0(ADDR(28));
> + shader[i++] = CF_DWORD1(POP_COUNT(0),
> + CF_CONST(0),
> + COND(SQ_CF_COND_ACTIVE),
> + I_COUNT(1),
> + CALL_COUNT(0),
> + END_OF_PROGRAM(0),
> + VALID_PIXEL_MODE(0),
> + CF_INST(SQ_CF_INST_TEX),
> + WHOLE_QUAD_MODE(0),
> + BARRIER(1));
> +
> + /* 11 */
> + /* return */
> + shader[i++] = CF_DWORD0(ADDR(0));
> + shader[i++] = CF_DWORD1(POP_COUNT(0),
> + CF_CONST(0),
> + COND(SQ_CF_COND_ACTIVE),
> + I_COUNT(0),
> + CALL_COUNT(0),
> + END_OF_PROGRAM(0),
> + VALID_PIXEL_MODE(0),
> + CF_INST(SQ_CF_INST_RETURN),
> + WHOLE_QUAD_MODE(0),
> + BARRIER(1));
> +
> + /* subroutine read-constant-mask*/
> + /* 12 */
> + /* read constants into GPR1 */
> + shader[i++] = CF_ALU_DWORD0(ADDR(22),
> + KCACHE_BANK0(0),
> + KCACHE_BANK1(0),
> + KCACHE_MODE0(SQ_CF_KCACHE_NOP));
> + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
> + KCACHE_ADDR0(0),
> + KCACHE_ADDR1(0),
> + I_COUNT(4),
> + USES_WATERFALL(0),
> + CF_INST(SQ_CF_INST_ALU),
> + WHOLE_QUAD_MODE(0),
> + BARRIER(1));
> + /* 13 */
> + /* return */
> + shader[i++] = CF_DWORD0(ADDR(0));
> + shader[i++] = CF_DWORD1(POP_COUNT(0),
> + CF_CONST(0),
> + COND(SQ_CF_COND_ACTIVE),
> + I_COUNT(0),
> + CALL_COUNT(0),
> + END_OF_PROGRAM(0),
> + VALID_PIXEL_MODE(0),
> + CF_INST(SQ_CF_INST_RETURN),
> + WHOLE_QUAD_MODE(0),
> + BARRIER(1));
> + /* ALU clauses */
> +
> + /* 14 - alu 0 */
> + /* MUL gpr[0].x gpr[1].x gpr[0].x */
> shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
> SRC0_REL(ABSOLUTE),
> SRC0_ELEM(ELEM_X),
> @@ -2478,12 +2545,12 @@ int R600_comp_ps(RADEONChipFamily ChipSet,
> uint32_t* shader)
> OMOD(SQ_ALU_OMOD_OFF),
> ALU_INST(SQ_OP2_INST_MUL),
> BANK_SWIZZLE(SQ_ALU_VEC_012),
> - DST_GPR(2),
> + DST_GPR(0),
> DST_REL(ABSOLUTE),
> DST_ELEM(ELEM_X),
> CLAMP(1));
> - /* 11 - alu 1 */
> - /* MUL gpr[2].y gpr[1].y gpr[0].y */
> + /* 15 - alu 1 */
> + /* MUL gpr[0].y gpr[1].y gpr[0].y */
> shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
> SRC0_REL(ABSOLUTE),
> SRC0_ELEM(ELEM_Y),
> @@ -2505,12 +2572,12 @@ int R600_comp_ps(RADEONChipFamily ChipSet,
> uint32_t* shader)
> OMOD(SQ_ALU_OMOD_OFF),
> ALU_INST(SQ_OP2_INST_MUL),
> BANK_SWIZZLE(SQ_ALU_VEC_012),
> - DST_GPR(2),
> + DST_GPR(0),
> DST_REL(ABSOLUTE),
> DST_ELEM(ELEM_Y),
> CLAMP(1));
> - /* 12 - alu 2 */
> - /* MUL gpr[2].z gpr[1].z gpr[0].z */
> + /* 16 - alu 2 */
> + /* MUL gpr[0].z gpr[1].z gpr[0].z */
> shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
> SRC0_REL(ABSOLUTE),
> SRC0_ELEM(ELEM_Z),
> @@ -2532,12 +2599,12 @@ int R600_comp_ps(RADEONChipFamily ChipSet,
> uint32_t* shader)
> OMOD(SQ_ALU_OMOD_OFF),
> ALU_INST(SQ_OP2_INST_MUL),
> BANK_SWIZZLE(SQ_ALU_VEC_012),
> - DST_GPR(2),
> + DST_GPR(0),
> DST_REL(ABSOLUTE),
> DST_ELEM(ELEM_Z),
> CLAMP(1));
> - /* 13 - alu 3 */
> - /* MUL gpr[2].w gpr[1].w gpr[0].w */
> + /* 17 - alu 3 */
> + /* MUL gpr[0].w gpr[1].w gpr[0].w */
> shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
> SRC0_REL(ABSOLUTE),
> SRC0_ELEM(ELEM_W),
> @@ -2559,12 +2626,222 @@ int R600_comp_ps(RADEONChipFamily ChipSet,
> uint32_t* shader)
> OMOD(SQ_ALU_OMOD_OFF),
> ALU_INST(SQ_OP2_INST_MUL),
> BANK_SWIZZLE(SQ_ALU_VEC_012),
> - DST_GPR(2),
> + DST_GPR(0),
> DST_REL(ABSOLUTE),
> DST_ELEM(ELEM_W),
> CLAMP(1));
>
> - /* 14/15 - src - mask */
> + /* 18 */
> + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
> + SRC0_REL(ABSOLUTE),
> + SRC0_ELEM(ELEM_X),
> + SRC0_NEG(0),
> + SRC1_SEL(ALU_SRC_GPR_BASE + 0),
> + SRC1_REL(ABSOLUTE),
> + SRC1_ELEM(ELEM_X),
> + SRC1_NEG(0),
> + INDEX_MODE(SQ_INDEX_AR_X),
> + PRED_SEL(SQ_PRED_SEL_OFF),
> + LAST(0));
> + shader[i++] = ALU_DWORD1_OP2(ChipSet,
> + SRC0_ABS(0),
> + SRC1_ABS(0),
> + UPDATE_EXECUTE_MASK(0),
> + UPDATE_PRED(0),
> + WRITE_MASK(1),
> + FOG_MERGE(0),
> + OMOD(SQ_ALU_OMOD_OFF),
> + ALU_INST(SQ_OP2_INST_MOV),
> + BANK_SWIZZLE(SQ_ALU_VEC_012),
> + DST_GPR(0),
> + DST_REL(ABSOLUTE),
> + DST_ELEM(ELEM_X),
> + CLAMP(1));
> + /* 19 */
> + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
> + SRC0_REL(ABSOLUTE),
> + SRC0_ELEM(ELEM_Y),
> + SRC0_NEG(0),
> + SRC1_SEL(ALU_SRC_GPR_BASE + 0),
> + SRC1_REL(ABSOLUTE),
> + SRC1_ELEM(ELEM_Y),
> + SRC1_NEG(0),
> + INDEX_MODE(SQ_INDEX_AR_X),
> + PRED_SEL(SQ_PRED_SEL_OFF),
> + LAST(0));
> + shader[i++] = ALU_DWORD1_OP2(ChipSet,
> + SRC0_ABS(0),
> + SRC1_ABS(0),
> + UPDATE_EXECUTE_MASK(0),
> + UPDATE_PRED(0),
> + WRITE_MASK(1),
> + FOG_MERGE(0),
> + OMOD(SQ_ALU_OMOD_OFF),
> + ALU_INST(SQ_OP2_INST_MOV),
> + BANK_SWIZZLE(SQ_ALU_VEC_012),
> + DST_GPR(0),
> + DST_REL(ABSOLUTE),
> + DST_ELEM(ELEM_Y),
> + CLAMP(1));
> + /* 20 */
> + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
> + SRC0_REL(ABSOLUTE),
> + SRC0_ELEM(ELEM_Z),
> + SRC0_NEG(0),
> + SRC1_SEL(ALU_SRC_GPR_BASE + 0),
> + SRC1_REL(ABSOLUTE),
> + SRC1_ELEM(ELEM_Z),
> + SRC1_NEG(0),
> + INDEX_MODE(SQ_INDEX_AR_X),
> + PRED_SEL(SQ_PRED_SEL_OFF),
> + LAST(0));
> + shader[i++] = ALU_DWORD1_OP2(ChipSet,
> + SRC0_ABS(0),
> + SRC1_ABS(0),
> + UPDATE_EXECUTE_MASK(0),
> + UPDATE_PRED(0),
> + WRITE_MASK(1),
> + FOG_MERGE(0),
> + OMOD(SQ_ALU_OMOD_OFF),
> + ALU_INST(SQ_OP2_INST_MOV),
> + BANK_SWIZZLE(SQ_ALU_VEC_012),
> + DST_GPR(0),
> + DST_REL(ABSOLUTE),
> + DST_ELEM(ELEM_Z),
> + CLAMP(1));
> + /* 21 */
> + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
> + SRC0_REL(ABSOLUTE),
> + SRC0_ELEM(ELEM_W),
> + SRC0_NEG(0),
> + SRC1_SEL(ALU_SRC_GPR_BASE + 0),
> + SRC1_REL(ABSOLUTE),
> + SRC1_ELEM(ELEM_W),
> + SRC1_NEG(0),
> + INDEX_MODE(SQ_INDEX_AR_X),
> + PRED_SEL(SQ_PRED_SEL_OFF),
> + LAST(1));
> + shader[i++] = ALU_DWORD1_OP2(ChipSet,
> + SRC0_ABS(0),
> + SRC1_ABS(0),
> + UPDATE_EXECUTE_MASK(0),
> + UPDATE_PRED(0),
> + WRITE_MASK(1),
> + FOG_MERGE(0),
> + OMOD(SQ_ALU_OMOD_OFF),
> + ALU_INST(SQ_OP2_INST_MOV),
> + BANK_SWIZZLE(SQ_ALU_VEC_012),
> + DST_GPR(0),
> + DST_REL(ABSOLUTE),
> + DST_ELEM(ELEM_W),
> + CLAMP(1));
> +
> + /* 22 */
> + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
> + SRC0_REL(ABSOLUTE),
> + SRC0_ELEM(ELEM_X),
> + SRC0_NEG(0),
> + SRC1_SEL(ALU_SRC_GPR_BASE + 0),
> + SRC1_REL(ABSOLUTE),
> + SRC1_ELEM(ELEM_X),
> + SRC1_NEG(0),
> + INDEX_MODE(SQ_INDEX_AR_X),
> + PRED_SEL(SQ_PRED_SEL_OFF),
> + LAST(0));
> + shader[i++] = ALU_DWORD1_OP2(ChipSet,
> + SRC0_ABS(0),
> + SRC1_ABS(0),
> + UPDATE_EXECUTE_MASK(0),
> + UPDATE_PRED(0),
> + WRITE_MASK(1),
> + FOG_MERGE(0),
> + OMOD(SQ_ALU_OMOD_OFF),
> + ALU_INST(SQ_OP2_INST_MOV),
> + BANK_SWIZZLE(SQ_ALU_VEC_012),
> + DST_GPR(1),
> + DST_REL(ABSOLUTE),
> + DST_ELEM(ELEM_X),
> + CLAMP(1));
> + /* 23 */
> + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
> + SRC0_REL(ABSOLUTE),
> + SRC0_ELEM(ELEM_Y),
> + SRC0_NEG(0),
> + SRC1_SEL(ALU_SRC_GPR_BASE + 0),
> + SRC1_REL(ABSOLUTE),
> + SRC1_ELEM(ELEM_Y),
> + SRC1_NEG(0),
> + INDEX_MODE(SQ_INDEX_AR_X),
> + PRED_SEL(SQ_PRED_SEL_OFF),
> + LAST(0));
> + shader[i++] = ALU_DWORD1_OP2(ChipSet,
> + SRC0_ABS(0),
> + SRC1_ABS(0),
> + UPDATE_EXECUTE_MASK(0),
> + UPDATE_PRED(0),
> + WRITE_MASK(1),
> + FOG_MERGE(0),
> + OMOD(SQ_ALU_OMOD_OFF),
> + ALU_INST(SQ_OP2_INST_MOV),
> + BANK_SWIZZLE(SQ_ALU_VEC_012),
> + DST_GPR(1),
> + DST_REL(ABSOLUTE),
> + DST_ELEM(ELEM_Y),
> + CLAMP(1));
> + /* 24 */
> + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
> + SRC0_REL(ABSOLUTE),
> + SRC0_ELEM(ELEM_Z),
> + SRC0_NEG(0),
> + SRC1_SEL(ALU_SRC_GPR_BASE + 0),
> + SRC1_REL(ABSOLUTE),
> + SRC1_ELEM(ELEM_Z),
> + SRC1_NEG(0),
> + INDEX_MODE(SQ_INDEX_AR_X),
> + PRED_SEL(SQ_PRED_SEL_OFF),
> + LAST(0));
> + shader[i++] = ALU_DWORD1_OP2(ChipSet,
> + SRC0_ABS(0),
> + SRC1_ABS(0),
> + UPDATE_EXECUTE_MASK(0),
> + UPDATE_PRED(0),
> + WRITE_MASK(1),
> + FOG_MERGE(0),
> + OMOD(SQ_ALU_OMOD_OFF),
> + ALU_INST(SQ_OP2_INST_MOV),
> + BANK_SWIZZLE(SQ_ALU_VEC_012),
> + DST_GPR(1),
> + DST_REL(ABSOLUTE),
> + DST_ELEM(ELEM_Z),
> + CLAMP(1));
> + /* 25 */
> + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
> + SRC0_REL(ABSOLUTE),
> + SRC0_ELEM(ELEM_W),
> + SRC0_NEG(0),
> + SRC1_SEL(ALU_SRC_GPR_BASE + 0),
> + SRC1_REL(ABSOLUTE),
> + SRC1_ELEM(ELEM_W),
> + SRC1_NEG(0),
> + INDEX_MODE(SQ_INDEX_AR_X),
> + PRED_SEL(SQ_PRED_SEL_OFF),
> + LAST(1));
> + shader[i++] = ALU_DWORD1_OP2(ChipSet,
> + SRC0_ABS(0),
> + SRC1_ABS(0),
> + UPDATE_EXECUTE_MASK(0),
> + UPDATE_PRED(0),
> + WRITE_MASK(1),
> + FOG_MERGE(0),
> + OMOD(SQ_ALU_OMOD_OFF),
> + ALU_INST(SQ_OP2_INST_MOV),
> + BANK_SWIZZLE(SQ_ALU_VEC_012),
> + DST_GPR(1),
> + DST_REL(ABSOLUTE),
> + DST_ELEM(ELEM_W),
> + CLAMP(1));
> +
> + /* 26/27 - src */
> shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
> BC_FRAC_MODE(0),
> FETCH_WHOLE_QUAD(0),
> @@ -2592,7 +2869,7 @@ int R600_comp_ps(RADEONChipFamily ChipSet,
> uint32_t* shader)
> SRC_SEL_Z(SQ_SEL_0),
> SRC_SEL_W(SQ_SEL_1));
> shader[i++] = TEX_DWORD_PAD;
> - /* 16/17 - mask */
> + /* 28/29 - mask */
> shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
> BC_FRAC_MODE(0),
> FETCH_WHOLE_QUAD(0),
> @@ -2621,34 +2898,5 @@ int R600_comp_ps(RADEONChipFamily ChipSet,
> uint32_t* shader)
> SRC_SEL_W(SQ_SEL_1));
> shader[i++] = TEX_DWORD_PAD;
>
> - /* 18/19 - src - non-mask */
> - shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
> - BC_FRAC_MODE(0),
> - FETCH_WHOLE_QUAD(0),
> - RESOURCE_ID(0),
> - SRC_GPR(0),
> - SRC_REL(ABSOLUTE),
> - R7xx_ALT_CONST(0));
> - shader[i++] = TEX_DWORD1(DST_GPR(0),
> - DST_REL(ABSOLUTE),
> - DST_SEL_X(SQ_SEL_X),
> - DST_SEL_Y(SQ_SEL_Y),
> - DST_SEL_Z(SQ_SEL_Z),
> - DST_SEL_W(SQ_SEL_W),
> - LOD_BIAS(0),
> - COORD_TYPE_X(TEX_NORMALIZED),
> - COORD_TYPE_Y(TEX_NORMALIZED),
> - COORD_TYPE_Z(TEX_NORMALIZED),
> - COORD_TYPE_W(TEX_NORMALIZED));
> - shader[i++] = TEX_DWORD2(OFFSET_X(0),
> - OFFSET_Y(0),
> - OFFSET_Z(0),
> - SAMPLER_ID(0),
> - SRC_SEL_X(SQ_SEL_X),
> - SRC_SEL_Y(SQ_SEL_Y),
> - SRC_SEL_Z(SQ_SEL_0),
> - SRC_SEL_W(SQ_SEL_1));
> - shader[i++] = TEX_DWORD_PAD;
> -
> return i;
> }
More information about the xorg-driver-ati
mailing list