xf86-video-ati: Branch 'master' - 3 commits
Alex Deucher
agd5f at kemper.freedesktop.org
Mon Nov 29 15:11:16 PST 2010
src/evergreen_exa.c | 2
src/evergreen_shader.c | 622 ++++++++++++++++++++++++++++++---------
src/r600_exa.c | 2
src/r600_shader.c | 766 ++++++++++++++++++++++++++++++++++++-------------
src/r600_shader.h | 4
5 files changed, 1054 insertions(+), 342 deletions(-)
New commits:
commit 90f831361844f1b80b3f6bb718ff5ac584d73d48
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Mon Nov 29 18:09:05 2010 -0500
evergreen: use dot4 for transforms
diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c
index 1c02752..89afaff 100644
--- a/src/evergreen_exa.c
+++ b/src/evergreen_exa.c
@@ -1318,7 +1318,7 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
/* Shader */
vs_conf.shader_addr = accel_state->vs_mc_addr;
vs_conf.shader_size = accel_state->vs_size;
- vs_conf.num_gprs = 3;
+ vs_conf.num_gprs = 5;
vs_conf.stack_size = 1;
vs_conf.bo = accel_state->shaders_bo;
evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
diff --git a/src/evergreen_shader.c b/src/evergreen_shader.c
index 42cea7a..ef56d2d 100644
--- a/src/evergreen_shader.c
+++ b/src/evergreen_shader.c
@@ -1410,7 +1410,7 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(1));
/* 3 - mask sub */
- shader[i++] = CF_DWORD0(ADDR(32),
+ shader[i++] = CF_DWORD0(ADDR(44),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
@@ -1430,7 +1430,7 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
KCACHE_ADDR0(0),
KCACHE_ADDR1(0),
- I_COUNT(12),
+ I_COUNT(20),
ALT_CONST(0),
CF_INST(SQ_CF_INST_ALU),
WHOLE_QUAD_MODE(0),
@@ -1500,7 +1500,7 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(1));
/* 9 - non-mask sub */
- shader[i++] = CF_DWORD0(ADDR(38),
+ shader[i++] = CF_DWORD0(ADDR(50),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
@@ -1513,14 +1513,14 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
BARRIER(1));
/* 10 - ALU */
- shader[i++] = CF_ALU_DWORD0(ADDR(26),
+ shader[i++] = CF_ALU_DWORD0(ADDR(34),
KCACHE_BANK0(0),
KCACHE_BANK1(0),
KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
KCACHE_ADDR0(0),
KCACHE_ADDR1(0),
- I_COUNT(6),
+ I_COUNT(10),
ALT_CONST(0),
CF_INST(SQ_CF_INST_ALU),
WHOLE_QUAD_MODE(0),
@@ -1573,189 +1573,408 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* mask alu - 14 srcX MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ /* 14 srcX.x DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 15 srcX.y DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 16 srcX.z DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(0));
- /* 15 srcY MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+
+ /* 17 srcX.w DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
+ SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
+ SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 1),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
+ DST_GPR(3),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(0));
- /* 16 srcX MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ /* 18 srcY.x DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 1),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
+ DST_GPR(3),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 17 srcY MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+
+ /* 19 srcY.y DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 20 srcY.z DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+
+ /* 21 srcY.w DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 22 maskX.x DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 1),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_W),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
+ DST_GPR(4),
DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Y),
+ DST_ELEM(ELEM_X),
CLAMP(0));
- /* 18 maskX MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ /* 23 maskX.y DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 2),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 24 maskX.z DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(0));
- /* 19 maskY MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 3),
+ /* 25 maskX.w DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
+ SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
+ SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 3),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
+ DST_GPR(4),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(0));
- /* 20 srcX MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ /* 26 maskY.x DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 0),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
+ DST_GPR(4),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 21 srcY MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 3),
+
+ /* 27 maskY.y DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
+ SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 0),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_W),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
+ DST_GPR(4),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 22 srcX / w */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ /* 28 maskY.z DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+
+ /* 29 maskY.w DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 30 srcX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
@@ -1779,8 +1998,8 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 23 srcY / h */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ /* 31 srcY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
@@ -1804,8 +2023,8 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 24 maskX / w */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ /* 32 maskX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
@@ -1829,8 +2048,8 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 25 maskY / h */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ /* 33 maskY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
@@ -1854,98 +2073,209 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* no mask alu - 26 srcX MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ /* 34 srcX.x DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 35 srcX.y DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 36 srcX.z DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(0));
- /* 27 srcY MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+
+ /* 37 srcX.w DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
+ SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
+ SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 1),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(0));
- /* 28 srcX MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ /* 38 srcY.x DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 0),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 29 srcY MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+
+ /* 39 srcY.y DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
+ SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 0),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_W),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 30 srcX / w */
+
+ /* 40 srcY.z DOT4 - non-mask */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+
+ /* 41 srcY.w DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 42 srcX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
+ SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
@@ -1968,8 +2298,8 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 31 srcY / h */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ /* 43 srcY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
@@ -1993,7 +2323,7 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* mask vfetch - 32/33 - dst */
+ /* mask vfetch - 44/45 - dst */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -2020,7 +2350,7 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
ALT_CONST(0),
BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
shader[i++] = VTX_DWORD_PAD;
- /* 34/35 - src */
+ /* 46/47 - src */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -2047,7 +2377,7 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
ALT_CONST(0),
BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
shader[i++] = VTX_DWORD_PAD;
- /* 36/37 - mask */
+ /* 48/49 - mask */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -2075,7 +2405,7 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
shader[i++] = VTX_DWORD_PAD;
- /* no mask vfetch - 38/39 - dst */
+ /* no mask vfetch - 50/51 - dst */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -2102,7 +2432,7 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
ALT_CONST(0),
BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
shader[i++] = VTX_DWORD_PAD;
- /* 40/41 - src */
+ /* 52/53 - src */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
commit 3cae361d0448b6e231c80f53d64bdbbdd74dc4cf
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Mon Nov 29 17:44:47 2010 -0500
6xx/7xx: clean up gpr/const handling in shaders
diff --git a/src/r600_shader.c b/src/r600_shader.c
index b42690c..7dceffe 100644
--- a/src/r600_shader.c
+++ b/src/r600_shader.c
@@ -157,11 +157,11 @@ int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
BARRIER(1));
/* 2 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(256),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -183,11 +183,11 @@ int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_X),
CLAMP(1));
/* 3 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(256),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
@@ -209,11 +209,11 @@ int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_Y),
CLAMP(1));
/* 4 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(256),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
@@ -235,11 +235,11 @@ int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_Z),
CLAMP(1));
/* 5 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(256),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
@@ -522,11 +522,11 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
/* 4 texX / w */
- shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(256),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -549,11 +549,11 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 5 texY / h */
- shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(256),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
@@ -690,18 +690,18 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
BARRIER(1));
/* 4,5,6,7 */
/* r2.x = MAD(c0.w, r1.x, c0.x) */
- shader[i++] = ALU_DWORD0(SRC0_SEL(256),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
SRC2_REL(ABSOLUTE),
SRC2_ELEM(ELEM_X),
SRC2_NEG(0),
@@ -712,18 +712,18 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_X),
CLAMP(0));
/* r2.y = MAD(c0.w, r1.x, c0.y) */
- shader[i++] = ALU_DWORD0(SRC0_SEL(256),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
SRC2_REL(ABSOLUTE),
SRC2_ELEM(ELEM_Y),
SRC2_NEG(0),
@@ -734,18 +734,18 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_Y),
CLAMP(0));
/* r2.z = MAD(c0.w, r1.x, c0.z) */
- shader[i++] = ALU_DWORD0(SRC0_SEL(256),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
SRC2_REL(ABSOLUTE),
SRC2_ELEM(ELEM_Z),
SRC2_NEG(0),
@@ -780,11 +780,11 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
/* 8,9,10,11 */
/* r2.x = MAD(c1.x, r1.y, pv.x) */
- shader[i++] = ALU_DWORD0(SRC0_SEL(257),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
@@ -802,11 +802,11 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_X),
CLAMP(0));
/* r2.y = MAD(c1.y, r1.y, pv.y) */
- shader[i++] = ALU_DWORD0(SRC0_SEL(257),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
@@ -824,11 +824,11 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_Y),
CLAMP(0));
/* r2.z = MAD(c1.z, r1.y, pv.z) */
- shader[i++] = ALU_DWORD0(SRC0_SEL(257),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
@@ -869,11 +869,11 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 12,13,14,15 */
/* r2.x = MAD(c2.x, r1.z, pv.x) */
- shader[i++] = ALU_DWORD0(SRC0_SEL(258),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
@@ -891,11 +891,11 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_X),
CLAMP(1));
/* r2.y = MAD(c2.y, r1.z, pv.y) */
- shader[i++] = ALU_DWORD0(SRC0_SEL(258),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
@@ -913,11 +913,11 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_Y),
CLAMP(1));
/* r2.z = MAD(c2.z, r1.z, pv.z) */
- shader[i++] = ALU_DWORD0(SRC0_SEL(258),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
@@ -1361,11 +1361,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
/* 14 srcX.x DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(256),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -1388,11 +1388,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 15 srcX.y DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(256),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
@@ -1415,11 +1415,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 16 srcX.z DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(256),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
@@ -1442,11 +1442,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 17 srcX.w DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(256),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
@@ -1469,11 +1469,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 18 srcY.x DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(257),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -1496,11 +1496,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 19 srcY.y DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(257),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
@@ -1523,11 +1523,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 20 srcY.z DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(257),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
@@ -1550,11 +1550,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 21 srcY.w DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(257),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
@@ -1577,11 +1577,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 22 maskX.x DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(258),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -1604,11 +1604,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 23 maskX.y DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(258),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
@@ -1631,11 +1631,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 24 maskX.z DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(258),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
@@ -1658,11 +1658,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 25 maskX.w DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(258),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
@@ -1685,11 +1685,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 26 maskY.x DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(259),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -1712,11 +1712,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 27 maskY.y DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(259),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
@@ -1739,11 +1739,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 28 maskY.z DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(259),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
@@ -1766,11 +1766,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 29 maskY.w DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(259),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
@@ -1793,11 +1793,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 30 srcX / w */
- shader[i++] = ALU_DWORD0(SRC0_SEL(3),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(256),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
@@ -1820,11 +1820,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 31 srcY / h */
- shader[i++] = ALU_DWORD0(SRC0_SEL(3),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(257),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
@@ -1847,11 +1847,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 32 maskX / w */
- shader[i++] = ALU_DWORD0(SRC0_SEL(4),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(258),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
@@ -1874,11 +1874,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 33 maskY / h */
- shader[i++] = ALU_DWORD0(SRC0_SEL(4),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(259),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
@@ -1901,11 +1901,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 34 srcX.x DOT4 - non-mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(256),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -1928,11 +1928,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 35 srcX.y DOT4 - non-mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(256),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
@@ -1955,11 +1955,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 36 srcX.z DOT4 - non-mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(256),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
@@ -1982,11 +1982,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 37 srcX.w DOT4 - non-mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(256),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
@@ -2009,11 +2009,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 38 srcY.x DOT4 - non-mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(257),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2036,11 +2036,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 39 srcY.y DOT4 - non-mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(257),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
@@ -2063,11 +2063,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 40 srcY.z DOT4 - non-mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(257),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
@@ -2090,11 +2090,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 41 srcY.w DOT4 - non-mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(257),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
@@ -2117,11 +2117,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 42 srcX / w */
- shader[i++] = ALU_DWORD0(SRC0_SEL(2),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(256),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
@@ -2144,11 +2144,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(0));
/* 43 srcY / h */
- shader[i++] = ALU_DWORD0(SRC0_SEL(2),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(257),
+ SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
@@ -2445,11 +2445,11 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
/* 10 - alu 0 */
/* MUL gpr[2].x gpr[1].x gpr[0].x */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2472,11 +2472,11 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(1));
/* 11 - alu 1 */
/* MUL gpr[2].y gpr[1].y gpr[0].y */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
@@ -2499,11 +2499,11 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(1));
/* 12 - alu 2 */
/* MUL gpr[2].z gpr[1].z gpr[0].z */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
@@ -2526,11 +2526,11 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CLAMP(1));
/* 13 - alu 3 */
/* MUL gpr[2].w gpr[1].w gpr[0].w */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
diff --git a/src/r600_shader.h b/src/r600_shader.h
index a68d6c2..3d5acc7 100644
--- a/src/r600_shader.h
+++ b/src/r600_shader.h
@@ -193,6 +193,10 @@
// 128-159 kcache constants bank 0
// 160-191 kcache constants bank 1
// 248-255 special SQ_ALU_SRC_* (0, 1, etc.)
+#define ALU_SRC_GPR_BASE 0
+#define ALU_SRC_KCACHE0_BASE 128
+#define ALU_SRC_KCACHE1_BASE 160
+#define ALU_SRC_CFILE_BASE 256
#define SRC0_REL(x) (x)
#define SRC1_REL(x) (x)
commit d9bcac516f2a810acb300b29169e56a2df0b47ac
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Mon Nov 29 17:23:30 2010 -0500
r6xx/r7xx use dot4 for transforms
diff --git a/src/r600_exa.c b/src/r600_exa.c
index a04d66a..f6cde1d 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -1358,7 +1358,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
/* Shader */
vs_conf.shader_addr = accel_state->vs_mc_addr;
vs_conf.shader_size = accel_state->vs_size;
- vs_conf.num_gprs = 3;
+ vs_conf.num_gprs = 5;
vs_conf.stack_size = 1;
vs_conf.bo = accel_state->shaders_bo;
r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
diff --git a/src/r600_shader.c b/src/r600_shader.c
index e2a4163..b42690c 100644
--- a/src/r600_shader.c
+++ b/src/r600_shader.c
@@ -1191,7 +1191,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(1));
/* 3 - mask sub */
- shader[i++] = CF_DWORD0(ADDR(32));
+ shader[i++] = CF_DWORD0(ADDR(44));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
@@ -1211,7 +1211,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
KCACHE_ADDR0(0),
KCACHE_ADDR1(0),
- I_COUNT(12),
+ I_COUNT(20),
USES_WATERFALL(0),
CF_INST(SQ_CF_INST_ALU),
WHOLE_QUAD_MODE(0),
@@ -1284,7 +1284,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(1));
/* 9 - non-mask sub */
- shader[i++] = CF_DWORD0(ADDR(38));
+ shader[i++] = CF_DWORD0(ADDR(50));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
@@ -1297,14 +1297,14 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
BARRIER(1));
/* 10 - ALU */
- shader[i++] = CF_ALU_DWORD0(ADDR(26),
+ shader[i++] = CF_ALU_DWORD0(ADDR(34),
KCACHE_BANK0(0),
KCACHE_BANK1(0),
KCACHE_MODE0(SQ_CF_KCACHE_NOP));
shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
KCACHE_ADDR0(0),
KCACHE_ADDR1(0),
- I_COUNT(6),
+ I_COUNT(10),
USES_WATERFALL(0),
CF_INST(SQ_CF_INST_ALU),
WHOLE_QUAD_MODE(0),
@@ -1360,189 +1360,440 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
BARRIER(1));
- /* 14 srcX MAD - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(256),
+ /* 14 srcX.x DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(256),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 15 srcX.y DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(1),
+ SRC1_SEL(256),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 16 srcX.z DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(256),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(0));
- /* 15 srcY MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(257),
+
+ /* 17 srcX.w DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
+ SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(1),
+ SRC1_SEL(256),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
+ SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(257),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
+ DST_GPR(3),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(0));
- /* 16 srcX MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(256),
+ /* 18 srcY.x DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(1),
+ SRC1_SEL(257),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(1),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
+ DST_GPR(3),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 17 srcY MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(257),
+
+ /* 19 srcY.y DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(257),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 20 srcY.z DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(257),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+
+ /* 21 srcY.w DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(257),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 22 maskX.x DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(1),
+ SRC1_SEL(258),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(1),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_W),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
+ DST_GPR(4),
DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Y),
+ DST_ELEM(ELEM_X),
CLAMP(0));
- /* 18 maskX MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(258),
+ /* 23 maskX.y DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(0),
+ SRC1_SEL(258),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(258),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 24 maskX.z DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(258),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(0));
- /* 19 maskY MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(259),
+ /* 25 maskX.w DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
+ SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(0),
+ SRC1_SEL(258),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
+ SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
+ DST_GPR(4),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(0));
- /* 20 srcX MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(258),
+ /* 26 maskY.x DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(0),
+ SRC1_SEL(259),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
+ DST_GPR(4),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 21 srcY MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(259),
+
+ /* 27 maskY.y DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
+ SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(0),
+ SRC1_SEL(259),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_W),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
+ DST_GPR(4),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 22 srcX / w */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ /* 28 maskY.z DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(259),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+
+ /* 29 maskY.w DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(259),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 30 srcX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(3),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
@@ -1568,8 +1819,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 23 srcY / h */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ /* 31 srcY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(3),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
@@ -1595,8 +1846,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 24 maskX / w */
- shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ /* 32 maskX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(4),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
@@ -1622,8 +1873,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 25 maskY / h */
- shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ /* 33 maskY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(4),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
@@ -1649,98 +1900,225 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 26 srcX MAD - non-mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(256),
+ /* 34 srcX.x DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(256),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 35 srcX.y DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(0),
+ SRC1_SEL(256),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 36 srcX.z DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(256),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(0));
- /* 27 srcY MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(257),
+
+ /* 37 srcX.w DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
+ SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(0),
+ SRC1_SEL(256),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
+ SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(257),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(0));
- /* 28 srcX MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(256),
+ /* 38 srcY.x DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(0),
+ SRC1_SEL(257),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 29 srcY MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(257),
+
+ /* 39 srcY.y DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
+ SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(0),
+ SRC1_SEL(257),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_W),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 30 srcX / w */
+
+ /* 40 srcY.z DOT4 - non-mask */
shader[i++] = ALU_DWORD0(SRC0_SEL(0),
SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(257),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+
+ /* 41 srcY.w DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(257),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 42 srcX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(2),
+ SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
SRC1_SEL(256),
@@ -1765,8 +2143,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 31 srcY / h */
- shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ /* 43 srcY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(2),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
@@ -1792,7 +2170,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 32/33 - dst - mask */
+ /* 44/45 - dst - mask */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -1817,7 +2195,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(1));
shader[i++] = VTX_DWORD_PAD;
- /* 34/35 - src */
+ /* 46/47 - src */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -1842,7 +2220,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(0));
shader[i++] = VTX_DWORD_PAD;
- /* 36/37 - mask */
+ /* 48/49 - mask */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -1868,7 +2246,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
MEGA_FETCH(0));
shader[i++] = VTX_DWORD_PAD;
- /* 38/39 - dst - non-mask */
+ /* 50/51 - dst - non-mask */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -1893,7 +2271,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(1));
shader[i++] = VTX_DWORD_PAD;
- /* 40/41 - src */
+ /* 52/53 - src */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
More information about the xorg-commit
mailing list