xf86-video-ati: Branch 'master' - 16 commits
Alex Deucher
agd5f at kemper.freedesktop.org
Fri Apr 17 07:36:57 PDT 2009
man/radeon.man | 4
src/r600_shader.c | 440 ---
src/r600_textured_videofuncs.c | 106
src/radeon.h | 5
src/radeon_accelfuncs.c | 5
src/radeon_commonfuncs.c | 5
src/radeon_exa_funcs.c | 5
src/radeon_textured_video.c | 208 +
src/radeon_textured_videofuncs.c | 5547 +++++++++++++++++++++++++--------------
src/radeon_video.c | 19
src/radeon_video.h | 20
11 files changed, 4014 insertions(+), 2350 deletions(-)
New commits:
commit db177c70ce88af19c8e05eb916a15f3e124876eb
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Fri Apr 17 01:05:15 2009 -0400
Update Xv info in man page
diff --git a/man/radeon.man b/man/radeon.man
index 7ca65e9..9b0d292 100644
--- a/man/radeon.man
+++ b/man/radeon.man
@@ -594,7 +594,9 @@ XV_BICUBIC is used to control whether textured adapter should apply
a bicubic filter to smooth the output. It has three values: 'off'(0), 'on'(1)
and 'auto'(2). 'off' means never apply the filter, 'on' means always apply
the filter and 'auto' means apply the filter only if the X and Y
-sizes are scaled to more than double, this to avoid blurred output.
+sizes are scaled to more than double to avoid blurred output. Bicubic
+filtering is not currently compatible with other Xv attributes like hue,
+contrast, and brightness, and must be disabled to use those attributes.
The default is
.B 'auto'(2).
commit 00266177bed2dc8693df497ca3ec19f2dc4adc05
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Thu Apr 16 20:33:28 2009 -0400
R3xx/R5xx: only apply Xv attributes if bicubic is disabled
Provides consistent output
diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c
index 2318a62..5c46712 100644
--- a/src/radeon_textured_video.c
+++ b/src/radeon_textured_video.c
@@ -356,8 +356,10 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
/* Bicubic filter setup */
pPriv->bicubic_enabled = (pPriv->bicubic_state != BICUBIC_OFF);
- if (!(IS_R300_3D || IS_R500_3D))
+ if (!(IS_R300_3D || IS_R500_3D)) {
pPriv->bicubic_enabled = FALSE;
+ pPriv->bicubic_state = BICUBIC_OFF;
+ }
if (pPriv->bicubic_enabled && (pPriv->bicubic_state == BICUBIC_AUTO)) {
/*
* Applying the bicubic filter with a scale of less than 200%
@@ -372,7 +374,7 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
case FOURCC_I420:
srcPitch = (width + 3) & ~3;
srcPitch2 = ((width >> 1) + 3) & ~3;
- if (pPriv->bicubic_enabled) {
+ if (pPriv->bicubic_state != BICUBIC_OFF) {
dstPitch = ((dst_width << 1) + 15) & ~15;
dstPitch = (dstPitch + 63) & ~63;
} else {
@@ -502,7 +504,7 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
srcPitch, srcPitch2, pPriv->src_pitch,
width, height);
}
- } else if (pPriv->bicubic_enabled) {
+ } else if (pPriv->bicubic_state != BICUBIC_OFF) {
top &= ~1;
nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top;
s2offset = srcPitch * height;
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index 0ac247a..caf8dce 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -1051,9 +1051,9 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
colorpitch |= R300_COLORTILE;
- if (!pPriv->bicubic_enabled && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
+ if (((pPriv->bicubic_state == BICUBIC_OFF)) &&
+ (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12))
isplanar = TRUE;
- }
if (isplanar) {
txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0;
@@ -1064,7 +1064,7 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
else
txformat1 = R300_TX_FORMAT_VYUY422;
- if (pPriv->bicubic_enabled)
+ if (pPriv->bicubic_state != BICUBIC_OFF)
txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
/* pitch is in pixels */
@@ -1250,661 +1250,422 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
FINISH_ACCEL();
/* setup pixel shader */
- if (pPriv->bicubic_enabled) {
- BEGIN_ACCEL(79);
-
- /* 4 components: 2 for tex0 and 2 for tex1 */
- OUT_ACCEL_REG(R300_RS_COUNT, ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
- R300_RS_COUNT_HIRES_EN));
-
- /* R300_INST_COUNT_RS - highest RS instruction used */
- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6));
-
- /* Pixel stack frame size. */
- OUT_ACCEL_REG(R300_US_PIXSIZE, 5);
-
- /* Indirection levels */
- OUT_ACCEL_REG(R300_US_CONFIG, ((2 << R300_NLEVEL_SHIFT) |
- R300_FIRST_TEX));
-
- /* Set nodes. */
- OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
- R300_ALU_CODE_SIZE(14) |
- R300_TEX_CODE_OFFSET(0) |
- R300_TEX_CODE_SIZE(6)));
-
- /* Nodes are allocated highest first, but executed lowest first */
- OUT_ACCEL_REG(R300_US_CODE_ADDR_0, 0);
- OUT_ACCEL_REG(R300_US_CODE_ADDR_1, (R300_ALU_START(0) |
- R300_ALU_SIZE(0) |
- R300_TEX_START(0) |
- R300_TEX_SIZE(0)));
- OUT_ACCEL_REG(R300_US_CODE_ADDR_2, (R300_ALU_START(1) |
- R300_ALU_SIZE(9) |
- R300_TEX_START(1) |
- R300_TEX_SIZE(0)));
- OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(11) |
- R300_ALU_SIZE(2) |
- R300_TEX_START(2) |
- R300_TEX_SIZE(3) |
- R300_RGBA_OUT));
-
- /* ** BICUBIC FP ** */
-
- /* texcoord0 => temp0
- * texcoord1 => temp1 */
-
- // first node
- /* TEX temp2, temp1.rrr0, tex1, 1D */
- OUT_ACCEL_REG(R300_US_TEX_INST(0), (R300_TEX_INST(R300_TEX_INST_LD) |
- R300_TEX_ID(1) |
- R300_TEX_SRC_ADDR(1) |
- R300_TEX_DST_ADDR(2)));
-
- /* MOV temp1.r, temp1.ggg0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(1) |
- R300_ALU_RGB_ADDRD(1) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDRD(1) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
-
-
- // second node
- /* TEX temp1, temp1, tex1, 1D */
- OUT_ACCEL_REG(R300_US_TEX_INST(1), (R300_TEX_INST(R300_TEX_INST_LD) |
- R300_TEX_ID(1) |
- R300_TEX_SRC_ADDR(1) |
- R300_TEX_DST_ADDR(1)));
-
- /* MUL temp3.rg, temp2.ggg0, const0.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(2) |
- R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) |
- R300_ALU_RGB_ADDRD(3) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(3) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
-
-
- /* MUL temp2.rg, temp2.rrr0, const0.rgb */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(2) |
- R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) |
- R300_ALU_RGB_ADDRD(2) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(2) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
-
- /* MAD temp4.rg, temp1.ggg0, const1.rgb, temp3.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(1) |
- R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
- R300_ALU_RGB_ADDR2(3) |
- R300_ALU_RGB_ADDRD(4) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(4) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
-
- /* MAD temp5.rg, temp1.ggg0, const1.rgb, temp2.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(1) |
- R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
- R300_ALU_RGB_ADDR2(2) |
- R300_ALU_RGB_ADDRD(5) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(5) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
-
- /* MAD temp3.rg, temp1.rrr0, const1.rgb, temp3.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(1) |
- R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
- R300_ALU_RGB_ADDR2(3) |
- R300_ALU_RGB_ADDRD(3) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(3) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
-
- /* MAD temp1.rg, temp1.rrr0, const1.rgb, temp2.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(1) |
- R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
- R300_ALU_RGB_ADDR2(2) |
- R300_ALU_RGB_ADDRD(1) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(1) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
-
- /* ADD temp1.rg, temp0.rgb0, temp1.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR2(1) |
- R300_ALU_RGB_ADDRD(1) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(1) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
-
- /* ADD temp2.rg, temp0.rgb0, temp3.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR2(3) |
- R300_ALU_RGB_ADDRD(2) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(2) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
-
- /* ADD temp3.rg, temp0.rgb0, temp5.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR2(5) |
- R300_ALU_RGB_ADDRD(3) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(3) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
-
- /* ADD temp0.rg, temp0.rgb0, temp4.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(10), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(10), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR2(4) |
- R300_ALU_RGB_ADDRD(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(10), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(10), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
-
-
- // third node
- /* TEX temp4, temp1.rg--, tex0, 1D */
- OUT_ACCEL_REG(R300_US_TEX_INST(2), (R300_TEX_INST(R300_TEX_INST_LD) |
- R300_TEX_ID(0) |
- R300_TEX_SRC_ADDR(1) |
- R300_TEX_DST_ADDR(4)));
-
- /* TEX temp3, temp3.rg--, tex0, 1D */
- OUT_ACCEL_REG(R300_US_TEX_INST(3), (R300_TEX_INST(R300_TEX_INST_LD) |
- R300_TEX_ID(0) |
- R300_TEX_SRC_ADDR(3) |
- R300_TEX_DST_ADDR(3)));
-
- /* TEX temp5, temp2.rg--, tex0, 1D */
- OUT_ACCEL_REG(R300_US_TEX_INST(4), (R300_TEX_INST(R300_TEX_INST_LD) |
- R300_TEX_ID(0) |
- R300_TEX_SRC_ADDR(2) |
- R300_TEX_DST_ADDR(5)));
-
- /* TEX temp0, temp0.rg--, tex0, 1D */
- OUT_ACCEL_REG(R300_US_TEX_INST(5), (R300_TEX_INST(R300_TEX_INST_LD) |
- R300_TEX_ID(0) |
- R300_TEX_SRC_ADDR(0) |
- R300_TEX_DST_ADDR(0)));
-
- /* LRP temp3, temp1.bbbb, temp4, temp3 ->
- * - PRESUB temps, temp4 - temp3
- * - MAD temp3, temp1.bbbb, temps, temp3 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(11), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(11), (R300_ALU_RGB_ADDR0(3) |
- R300_ALU_RGB_ADDR1(4) |
- R300_ALU_RGB_ADDR2(1) |
- R300_ALU_RGB_ADDRD(3) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(11), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(11), (R300_ALU_ALPHA_ADDR0(3) |
- R300_ALU_ALPHA_ADDR1(4) |
- R300_ALU_ALPHA_ADDR2(1) |
- R300_ALU_ALPHA_ADDRD(3) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A)));
-
- /* LRP temp0, temp1.bbbb, temp5, temp0 ->
- * - PRESUB temps, temp5 - temp0
- * - MAD temp0, temp1.bbbb, temps, temp0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(12), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0) |
- R300_ALU_RGB_INSERT_NOP));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(12), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR1(5) |
- R300_ALU_RGB_ADDR2(1) |
- R300_ALU_RGB_ADDRD(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(12), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(12), (R300_ALU_ALPHA_ADDR0(0) |
- R300_ALU_ALPHA_ADDR1(5) |
- R300_ALU_ALPHA_ADDR2(1) |
- R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A)));
-
- /* LRP output, temp2.bbbb, temp3, temp0 ->
- * - PRESUB temps, temp3 - temp0
- * - MAD output, temp2.bbbb, temps, temp0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(13), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(13), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR1(3) |
- R300_ALU_RGB_ADDR2(2) |
- R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(13), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(13), (R300_ALU_ALPHA_ADDR0(0) |
- R300_ALU_ALPHA_ADDR1(3) |
- R300_ALU_ALPHA_ADDR2(2) |
- R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A)));
-
- /* Shader constants. */
- OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(1.0/(float)pPriv->w));
- OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), 0);
- OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), 0);
- OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), 0);
-
- OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), 0);
- OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(1.0/(float)pPriv->h));
- OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), 0);
- OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), 0);
-
- FINISH_ACCEL();
- } else if (isplanar) {
- /*
- * y' = y - .0625
- * u' = u - .5
- * v' = v - .5;
- *
- * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v'
- * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
- * b = 1.1643 * y' + 2.017 * u' + 0.0 * v'
- *
- * DP3 might look like the straightforward solution
- * but we'd need to move the texture yuv values in
- * the same reg for this to work. Therefore use MADs.
- * Brightness just adds to the off constant.
- * Contrast is multiplication of luminance.
- * Saturation and hue change the u and v coeffs.
- * Default values (before adjustments - depend on colorspace):
- * yco = 1.1643
- * uco = 0, -0.39173, 2.017
- * vco = 1.5958, -0.8129, 0
- * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
- * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
- * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
- *
- * temp = MAD(yco, yuv.yyyy, off)
- * temp = MAD(uco, yuv.uuuu, temp)
- * result = MAD(vco, yuv.vvvv, temp)
- */
- /* TODO: don't recalc consts always */
- const float Loff = -0.0627;
- const float Coff = -0.502;
- float uvcosf, uvsinf;
- float yco;
- float uco[3], vco[3], off[3];
- float bright, cont, gamma;
- int ref = pPriv->transform_index;
- Bool needgamma = FALSE;
+ if (pPriv->bicubic_state != BICUBIC_OFF) {
+ if (pPriv->bicubic_enabled) {
+ BEGIN_ACCEL(79);
+
+ /* 4 components: 2 for tex0 and 2 for tex1 */
+ OUT_ACCEL_REG(R300_RS_COUNT, ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+ R300_RS_COUNT_HIRES_EN));
+
+ /* R300_INST_COUNT_RS - highest RS instruction used */
+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6));
+
+ /* Pixel stack frame size. */
+ OUT_ACCEL_REG(R300_US_PIXSIZE, 5);
+
+ /* Indirection levels */
+ OUT_ACCEL_REG(R300_US_CONFIG, ((2 << R300_NLEVEL_SHIFT) |
+ R300_FIRST_TEX));
+
+ /* Set nodes. */
+ OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
+ R300_ALU_CODE_SIZE(14) |
+ R300_TEX_CODE_OFFSET(0) |
+ R300_TEX_CODE_SIZE(6)));
+
+ /* Nodes are allocated highest first, but executed lowest first */
+ OUT_ACCEL_REG(R300_US_CODE_ADDR_0, 0);
+ OUT_ACCEL_REG(R300_US_CODE_ADDR_1, (R300_ALU_START(0) |
+ R300_ALU_SIZE(0) |
+ R300_TEX_START(0) |
+ R300_TEX_SIZE(0)));
+ OUT_ACCEL_REG(R300_US_CODE_ADDR_2, (R300_ALU_START(1) |
+ R300_ALU_SIZE(9) |
+ R300_TEX_START(1) |
+ R300_TEX_SIZE(0)));
+ OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(11) |
+ R300_ALU_SIZE(2) |
+ R300_TEX_START(2) |
+ R300_TEX_SIZE(3) |
+ R300_RGBA_OUT));
+
+ /* ** BICUBIC FP ** */
+
+ /* texcoord0 => temp0
+ * texcoord1 => temp1 */
+
+ // first node
+ /* TEX temp2, temp1.rrr0, tex1, 1D */
+ OUT_ACCEL_REG(R300_US_TEX_INST(0), (R300_TEX_INST(R300_TEX_INST_LD) |
+ R300_TEX_ID(1) |
+ R300_TEX_SRC_ADDR(1) |
+ R300_TEX_DST_ADDR(2)));
+
+ /* MOV temp1.r, temp1.ggg0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(1) |
+ R300_ALU_RGB_ADDRD(1) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDRD(1) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- cont = RTFContrast(pPriv->contrast);
- bright = RTFBrightness(pPriv->brightness);
- gamma = (float)pPriv->gamma / 1000.0;
- uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue));
- uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue));
- /* overlay video also does pre-gamma contrast/sat adjust, should we? */
- yco = trans[ref].RefLuma * cont;
- uco[0] = -trans[ref].RefRCr * uvsinf;
- uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
- uco[2] = trans[ref].RefBCb * uvcosf;
- vco[0] = trans[ref].RefRCr * uvcosf;
- vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
- vco[2] = trans[ref].RefBCb * uvsinf;
- off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright;
- off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright;
- off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright;
+ // second node
+ /* TEX temp1, temp1, tex1, 1D */
+ OUT_ACCEL_REG(R300_US_TEX_INST(1), (R300_TEX_INST(R300_TEX_INST_LD) |
+ R300_TEX_ID(1) |
+ R300_TEX_SRC_ADDR(1) |
+ R300_TEX_DST_ADDR(1)));
+
+ /* MUL temp3.rg, temp2.ggg0, const0.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(2) |
+ R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) |
+ R300_ALU_RGB_ADDRD(3) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(3) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- if (gamma != 1.0) {
- needgamma = TRUE;
- /* note: gamma correction is out = in ^ gamma;
- gpu can only do LG2/EX2 therefore we transform into
- in ^ gamma = 2 ^ (log2(in) * gamma).
- Lots of scalar ops, unfortunately (better solution?) -
- without gamma that's 3 inst, with gamma it's 10...
- could use different gamma factors per channel,
- if that's of any use. */
- }
- BEGIN_ACCEL(needgamma ? 28 + 33 : 33);
- /* 2 components: same 2 for tex0/1/2 */
- OUT_ACCEL_REG(R300_RS_COUNT,
- ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
- R300_RS_COUNT_HIRES_EN));
- /* R300_INST_COUNT_RS - highest RS instruction used */
- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
-
- OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */
-
- /* Indirection levels */
- OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
- R300_FIRST_TEX));
-
- OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
- R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) |
- R300_TEX_CODE_OFFSET(0) |
- R300_TEX_CODE_SIZE(3)));
-
- OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
- R300_ALU_SIZE(needgamma ? 7 + 2 : 2) |
- R300_TEX_START(0) |
- R300_TEX_SIZE(2) |
- R300_RGBA_OUT));
-
- /* tex inst */
- OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
- R300_TEX_DST_ADDR(2) |
- R300_TEX_ID(0) |
- R300_TEX_INST(R300_TEX_INST_LD)));
- OUT_ACCEL_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) |
- R300_TEX_DST_ADDR(1) |
- R300_TEX_ID(1) |
- R300_TEX_INST(R300_TEX_INST_LD)));
- OUT_ACCEL_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) |
- R300_TEX_DST_ADDR(0) |
- R300_TEX_ID(2) |
- R300_TEX_INST(R300_TEX_INST_LD)));
-
- /* ALU inst */
- /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) |
- R300_ALU_RGB_ADDR1(2) |
- R300_ALU_RGB_ADDR2(0) |
- R300_ALU_RGB_ADDRD(2) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) |
- R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
- /* alpha nop, but need to set up alpha source for rgb usage */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) |
- R300_ALU_ALPHA_ADDR1(2) |
- R300_ALU_ALPHA_ADDR2(0) |
- R300_ALU_ALPHA_ADDRD(2) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
-
- /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) |
- R300_ALU_RGB_ADDR1(1) |
- R300_ALU_RGB_ADDR2(2) |
- R300_ALU_RGB_ADDRD(2) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
- R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
- /* alpha nop */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(2) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
-
- /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) |
- R300_ALU_RGB_ADDR1(0) |
- R300_ALU_RGB_ADDR2(2) |
- R300_ALU_RGB_ADDRD(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) |
- (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
- R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
- R300_ALU_RGB_CLAMP));
- /* write alpha 1 */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
- R300_ALU_ALPHA_TARGET_A));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0)));
-
- if (needgamma) {
- /* rgb temp0.r = op_sop, set up src0 reg */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3),
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
- /* alpha lg2 temp0, temp0.r */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
+ /* MUL temp2.rg, temp2.rrr0, const0.rgb */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(2) |
+ R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) |
+ R300_ALU_RGB_ADDRD(2) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
-
- /* rgb temp0.g = op_sop, set up src0 reg */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4),
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
- /* alpha lg2 temp0, temp0.g */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) |
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(2) |
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
+
+ /* MAD temp4.rg, temp1.ggg0, const1.rgb, temp3.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(1) |
+ R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
+ R300_ALU_RGB_ADDR2(3) |
+ R300_ALU_RGB_ADDRD(4) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
-
- /* rgb temp0.b = op_sop, set up src0 reg */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5),
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
- /* alpha lg2 temp0, temp0.b */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) |
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(4) |
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
+
+ /* MAD temp5.rg, temp1.ggg0, const1.rgb, temp2.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(1) |
+ R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
+ R300_ALU_RGB_ADDR2(2) |
+ R300_ALU_RGB_ADDRD(5) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(5) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- /* MUL const1, temp1, temp0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR1(0) |
- R300_ALU_RGB_ADDR2(0) |
- R300_ALU_RGB_ADDRD(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) |
- R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
- R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
- /* alpha nop, but set up const1 */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) |
+ /* MAD temp3.rg, temp1.rrr0, const1.rgb, temp3.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(1) |
+ R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
+ R300_ALU_RGB_ADDR2(3) |
+ R300_ALU_RGB_ADDRD(3) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(3) |
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* MAD temp1.rg, temp1.rrr0, const1.rgb, temp2.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(1) |
+ R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
+ R300_ALU_RGB_ADDR2(2) |
+ R300_ALU_RGB_ADDRD(1) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(1) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- /* rgb out0.r = op_sop, set up src0 reg */
+ /* ADD temp1.rg, temp0.rgb0, temp1.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) |
- R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7),
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
- /* alpha ex2 temp0, temp0.r */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
+ R300_ALU_RGB_ADDR2(1) |
+ R300_ALU_RGB_ADDRD(1) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(1) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- /* rgb out0.g = op_sop, set up src0 reg */
+ /* ADD temp2.rg, temp0.rgb0, temp3.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) |
- R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8),
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
- /* alpha ex2 temp0, temp0.g */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
+ R300_ALU_RGB_ADDR2(3) |
+ R300_ALU_RGB_ADDRD(2) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(2) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- /* rgb out0.b = op_sop, set up src0 reg */
+ /* ADD temp3.rg, temp0.rgb0, temp5.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) |
- R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9),
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
- /* alpha ex2 temp0, temp0.b */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
+ R300_ALU_RGB_ADDR2(5) |
+ R300_ALU_RGB_ADDRD(3) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- }
-
- /* Shader constants. */
- /* constant 0: off, yco */
- OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), F_TO_24(off[1]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), F_TO_24(off[2]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), F_TO_24(yco));
- /* constant 1: uco */
- OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(gamma));
- /* constant 2: vco */
- OUT_ACCEL_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_B(2), F_TO_24(vco[2]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0));
-
- FINISH_ACCEL();
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(3) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ /* ADD temp0.rg, temp0.rgb0, temp4.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(10), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(10), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR2(4) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(10), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(10), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+
+ // third node
+ /* TEX temp4, temp1.rg--, tex0, 1D */
+ OUT_ACCEL_REG(R300_US_TEX_INST(2), (R300_TEX_INST(R300_TEX_INST_LD) |
+ R300_TEX_ID(0) |
+ R300_TEX_SRC_ADDR(1) |
+ R300_TEX_DST_ADDR(4)));
+
+ /* TEX temp3, temp3.rg--, tex0, 1D */
+ OUT_ACCEL_REG(R300_US_TEX_INST(3), (R300_TEX_INST(R300_TEX_INST_LD) |
+ R300_TEX_ID(0) |
+ R300_TEX_SRC_ADDR(3) |
+ R300_TEX_DST_ADDR(3)));
+
+ /* TEX temp5, temp2.rg--, tex0, 1D */
+ OUT_ACCEL_REG(R300_US_TEX_INST(4), (R300_TEX_INST(R300_TEX_INST_LD) |
+ R300_TEX_ID(0) |
+ R300_TEX_SRC_ADDR(2) |
+ R300_TEX_DST_ADDR(5)));
+
+ /* TEX temp0, temp0.rg--, tex0, 1D */
+ OUT_ACCEL_REG(R300_US_TEX_INST(5), (R300_TEX_INST(R300_TEX_INST_LD) |
+ R300_TEX_ID(0) |
+ R300_TEX_SRC_ADDR(0) |
+ R300_TEX_DST_ADDR(0)));
+
+ /* LRP temp3, temp1.bbbb, temp4, temp3 ->
+ * - PRESUB temps, temp4 - temp3
+ * - MAD temp3, temp1.bbbb, temps, temp3 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(11), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(11), (R300_ALU_RGB_ADDR0(3) |
+ R300_ALU_RGB_ADDR1(4) |
+ R300_ALU_RGB_ADDR2(1) |
+ R300_ALU_RGB_ADDRD(3) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(11), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(11), (R300_ALU_ALPHA_ADDR0(3) |
+ R300_ALU_ALPHA_ADDR1(4) |
+ R300_ALU_ALPHA_ADDR2(1) |
+ R300_ALU_ALPHA_ADDRD(3) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A)));
+
+ /* LRP temp0, temp1.bbbb, temp5, temp0 ->
+ * - PRESUB temps, temp5 - temp0
+ * - MAD temp0, temp1.bbbb, temps, temp0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(12), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0) |
+ R300_ALU_RGB_INSERT_NOP));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(12), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR1(5) |
+ R300_ALU_RGB_ADDR2(1) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(12), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(12), (R300_ALU_ALPHA_ADDR0(0) |
+ R300_ALU_ALPHA_ADDR1(5) |
+ R300_ALU_ALPHA_ADDR2(1) |
+ R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A)));
+
+ /* LRP output, temp2.bbbb, temp3, temp0 ->
+ * - PRESUB temps, temp3 - temp0
+ * - MAD output, temp2.bbbb, temps, temp0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(13), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(13), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR1(3) |
+ R300_ALU_RGB_ADDR2(2) |
+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(13), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(13), (R300_ALU_ALPHA_ADDR0(0) |
+ R300_ALU_ALPHA_ADDR1(3) |
+ R300_ALU_ALPHA_ADDR2(2) |
+ R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A)));
+
+ /* Shader constants. */
+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(1.0/(float)pPriv->w));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), 0);
+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), 0);
+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), 0);
+
+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), 0);
+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(1.0/(float)pPriv->h));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), 0);
+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), 0);
+
+ FINISH_ACCEL();
+ } else {
+ BEGIN_ACCEL(11);
+ /* 2 components: 2 for tex0 */
+ OUT_ACCEL_REG(R300_RS_COUNT,
+ ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+ R300_RS_COUNT_HIRES_EN));
+ /* R300_INST_COUNT_RS - highest RS instruction used */
+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
+
+ OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */
+
+ /* Indirection levels */
+ OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
+ R300_FIRST_TEX));
+
+ OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
+ R300_ALU_CODE_SIZE(1) |
+ R300_TEX_CODE_OFFSET(0) |
+ R300_TEX_CODE_SIZE(1)));
+
+ OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
+ R300_ALU_SIZE(0) |
+ R300_TEX_START(0) |
+ R300_TEX_SIZE(0) |
+ R300_RGBA_OUT));
+
+ /* tex inst */
+ OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
+ R300_TEX_DST_ADDR(0) |
+ R300_TEX_ID(0) |
+ R300_TEX_INST(R300_TEX_INST_LD)));
+
+ /* ALU inst */
+ /* RGB */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR_0, (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR1(0) |
+ R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R |
+ R300_ALU_RGB_MASK_G |
+ R300_ALU_RGB_MASK_B)) |
+ R300_ALU_RGB_TARGET_A));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST_0, (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
+ R300_ALU_RGB_CLAMP));
+ /* Alpha */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR_0, (R300_ALU_ALPHA_ADDR0(0) |
+ R300_ALU_ALPHA_ADDR1(0) |
+ R300_ALU_ALPHA_ADDR2(0) |
+ R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
+ R300_ALU_ALPHA_TARGET_A |
+ R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST_0, (R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) |
+ R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) |
+ R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) |
+ R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) |
+ R300_ALU_ALPHA_CLAMP));
+ FINISH_ACCEL();
+ }
} else {
/*
* y' = y - .0625
@@ -1972,220 +1733,446 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
if that's of any use. */
}
- BEGIN_ACCEL(needgamma ? 28 + 33 : 33);
- /* 2 components */
- OUT_ACCEL_REG(R300_RS_COUNT,
- ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
- R300_RS_COUNT_HIRES_EN));
- /* R300_INST_COUNT_RS - highest RS instruction used */
- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
-
- OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
-
- /* Indirection levels */
- OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
- R300_FIRST_TEX));
-
- OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
- R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) |
- R300_TEX_CODE_OFFSET(0) |
- R300_TEX_CODE_SIZE(1)));
-
- OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
- R300_ALU_SIZE(needgamma ? 7 + 2 : 2) |
- R300_TEX_START(0) |
- R300_TEX_SIZE(0) |
- R300_RGBA_OUT));
-
- /* tex inst */
- OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
- R300_TEX_DST_ADDR(0) |
- R300_TEX_ID(0) |
- R300_TEX_INST(R300_TEX_INST_LD)));
-
- /* ALU inst */
- /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) |
- R300_ALU_RGB_ADDR1(0) |
- R300_ALU_RGB_ADDR2(0) |
- R300_ALU_RGB_ADDRD(1) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) |
- R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_GGG) |
- R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
- /* alpha nop, but need to set up alpha source for rgb usage */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) |
- R300_ALU_ALPHA_ADDR1(0) |
- R300_ALU_ALPHA_ADDR2(0) |
- R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
-
- /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) |
- R300_ALU_RGB_ADDR1(0) |
- R300_ALU_RGB_ADDR2(1) |
- R300_ALU_RGB_ADDRD(1) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_BBB) |
- R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
- R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
- /* alpha nop */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
-
- /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) |
- R300_ALU_RGB_ADDR1(0) |
- R300_ALU_RGB_ADDR2(1) |
- R300_ALU_RGB_ADDRD(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) |
- (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RRR) |
- R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
- R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
- R300_ALU_RGB_CLAMP));
- /* write alpha 1 */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
- R300_ALU_ALPHA_TARGET_A));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0)));
-
- if (needgamma) {
- /* rgb temp0.r = op_sop, set up src0 reg */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3),
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
- /* alpha lg2 temp0, temp0.r */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) |
+ if (isplanar) {
+ BEGIN_ACCEL(needgamma ? 28 + 33 : 33);
+ /* 2 components: same 2 for tex0/1/2 */
+ OUT_ACCEL_REG(R300_RS_COUNT,
+ ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+ R300_RS_COUNT_HIRES_EN));
+ /* R300_INST_COUNT_RS - highest RS instruction used */
+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
+
+ OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */
+
+ /* Indirection levels */
+ OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
+ R300_FIRST_TEX));
+
+ OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
+ R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) |
+ R300_TEX_CODE_OFFSET(0) |
+ R300_TEX_CODE_SIZE(3)));
+
+ OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
+ R300_ALU_SIZE(needgamma ? 7 + 2 : 2) |
+ R300_TEX_START(0) |
+ R300_TEX_SIZE(2) |
+ R300_RGBA_OUT));
+
+ /* tex inst */
+ OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
+ R300_TEX_DST_ADDR(2) |
+ R300_TEX_ID(0) |
+ R300_TEX_INST(R300_TEX_INST_LD)));
+ OUT_ACCEL_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) |
+ R300_TEX_DST_ADDR(1) |
+ R300_TEX_ID(1) |
+ R300_TEX_INST(R300_TEX_INST_LD)));
+ OUT_ACCEL_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) |
+ R300_TEX_DST_ADDR(0) |
+ R300_TEX_ID(2) |
+ R300_TEX_INST(R300_TEX_INST_LD)));
+
+ /* ALU inst */
+ /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) |
+ R300_ALU_RGB_ADDR1(2) |
+ R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDRD(2) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
+ /* alpha nop, but need to set up alpha source for rgb usage */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) |
+ R300_ALU_ALPHA_ADDR1(2) |
+ R300_ALU_ALPHA_ADDR2(0) |
+ R300_ALU_ALPHA_ADDRD(2) |
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- /* rgb temp0.g = op_sop, set up src0 reg */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4),
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
- /* alpha lg2 temp0, temp0.g */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) |
+ /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) |
+ R300_ALU_RGB_ADDR1(1) |
+ R300_ALU_RGB_ADDR2(2) |
+ R300_ALU_RGB_ADDRD(2) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
+ /* alpha nop */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(2) |
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- /* rgb temp0.b = op_sop, set up src0 reg */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5),
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
- /* alpha lg2 temp0, temp0.b */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
+ /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) |
+ R300_ALU_RGB_ADDR1(0) |
+ R300_ALU_RGB_ADDR2(2) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) |
+ (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
+ R300_ALU_RGB_CLAMP));
+ /* write alpha 1 */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
+ R300_ALU_ALPHA_TARGET_A));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
-
- /* MUL const1, temp1, temp0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0)));
+
+ if (needgamma) {
+ /* rgb temp0.r = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha lg2 temp0, temp0.r */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb temp0.g = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha lg2 temp0, temp0.g */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb temp0.b = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha lg2 temp0, temp0.b */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* MUL const1, temp1, temp0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR1(0) |
+ R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
+ /* alpha nop, but set up const1 */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb out0.r = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) |
+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha ex2 temp0, temp0.r */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb out0.g = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) |
+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha ex2 temp0, temp0.g */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb out0.b = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) |
+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha ex2 temp0, temp0.b */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ }
+ } else {
+ BEGIN_ACCEL(needgamma ? 28 + 33 : 33);
+ /* 2 components */
+ OUT_ACCEL_REG(R300_RS_COUNT,
+ ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+ R300_RS_COUNT_HIRES_EN));
+ /* R300_INST_COUNT_RS - highest RS instruction used */
+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
+
+ OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
+
+ /* Indirection levels */
+ OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
+ R300_FIRST_TEX));
+
+ OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
+ R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) |
+ R300_TEX_CODE_OFFSET(0) |
+ R300_TEX_CODE_SIZE(1)));
+
+ OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
+ R300_ALU_SIZE(needgamma ? 7 + 2 : 2) |
+ R300_TEX_START(0) |
+ R300_TEX_SIZE(0) |
+ R300_RGBA_OUT));
+
+ /* tex inst */
+ OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
+ R300_TEX_DST_ADDR(0) |
+ R300_TEX_ID(0) |
+ R300_TEX_INST(R300_TEX_INST_LD)));
+
+ /* ALU inst */
+ /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) |
R300_ALU_RGB_ADDR1(0) |
R300_ALU_RGB_ADDR2(0) |
- R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_ADDRD(1) |
R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) |
R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_GGG) |
R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
- /* alpha nop, but set up const1 */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) |
+ /* alpha nop, but need to set up alpha source for rgb usage */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) |
+ R300_ALU_ALPHA_ADDR1(0) |
+ R300_ALU_ALPHA_ADDR2(0) |
+ R300_ALU_ALPHA_ADDRD(0) |
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- /* rgb out0.r = op_sop, set up src0 reg */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) |
- R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7),
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
- /* alpha ex2 temp0, temp0.r */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
-
- /* rgb out0.g = op_sop, set up src0 reg */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) |
- R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8),
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
- /* alpha ex2 temp0, temp0.g */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) |
+ /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) |
+ R300_ALU_RGB_ADDR1(0) |
+ R300_ALU_RGB_ADDR2(1) |
+ R300_ALU_RGB_ADDRD(1) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_BBB) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
+ /* alpha nop */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) |
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- /* rgb out0.b = op_sop, set up src0 reg */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) |
- R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9),
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
- /* alpha ex2 temp0, temp0.b */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
+ /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) |
+ R300_ALU_RGB_ADDR1(0) |
+ R300_ALU_RGB_ADDR2(1) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) |
+ (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RRR) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
+ R300_ALU_RGB_CLAMP));
+ /* write alpha 1 */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
+ R300_ALU_ALPHA_TARGET_A));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0)));
+
+ if (needgamma) {
+ /* rgb temp0.r = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha lg2 temp0, temp0.r */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb temp0.g = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha lg2 temp0, temp0.g */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb temp0.b = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha lg2 temp0, temp0.b */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* MUL const1, temp1, temp0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR1(0) |
+ R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
+ /* alpha nop, but set up const1 */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb out0.r = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) |
+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha ex2 temp0, temp0.r */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb out0.g = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) |
+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha ex2 temp0, temp0.g */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb out0.b = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) |
+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha ex2 temp0, temp0.b */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ }
}
/* Shader constants. */
@@ -2491,9 +2478,9 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
if (RADEONTilingEnabled(pScrn, pPixmap))
colorpitch |= R300_COLORTILE;
- if (!pPriv->bicubic_enabled && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
+ if (((pPriv->bicubic_state == BICUBIC_OFF)) &&
+ (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12))
isplanar = TRUE;
- }
if (isplanar) {
txformat1 = R300_TX_FORMAT_X8;
@@ -2504,7 +2491,7 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
else
txformat1 = R300_TX_FORMAT_VYUY422;
- if (pPriv->bicubic_enabled)
+ if (pPriv->bicubic_state != BICUBIC_OFF)
txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
/* pitch is in pixels */
@@ -2694,805 +2681,574 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
FINISH_ACCEL();
/* setup pixel shader */
- if (pPriv->bicubic_enabled) {
- BEGIN_ACCEL(7);
-
- /* 4 components: 2 for tex0 and 2 for tex1 */
- OUT_ACCEL_REG(R300_RS_COUNT,
- ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
- R300_RS_COUNT_HIRES_EN));
-
- /* R300_INST_COUNT_RS - highest RS instruction used */
- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6));
-
- /* Pixel stack frame size. */
- OUT_ACCEL_REG(R300_US_PIXSIZE, 5);
-
- /* FP length. */
- OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
- R500_US_CODE_END_ADDR(13)));
- OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
- R500_US_CODE_RANGE_SIZE(13)));
-
- /* Prepare for FP emission. */
- OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
- FINISH_ACCEL();
-
- BEGIN_ACCEL(89);
- /* Pixel shader.
- * I've gone ahead and annotated each instruction, since this
- * thing is MASSIVE. :3
- * Note: In order to avoid buggies with temps and multiple
- * inputs, all temps are offset by 2. temp0 -> register2. */
-
- /* TEX temp2, input1.xxxx, tex1, 1D */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
- R500_TEX_INST_LD |
- R500_TEX_IGNORE_UNCOVERED));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
- R500_TEX_SRC_S_SWIZ_R |
- R500_TEX_SRC_T_SWIZ_R |
- R500_TEX_SRC_R_SWIZ_R |
- R500_TEX_SRC_Q_SWIZ_R |
- R500_TEX_DST_ADDR(2) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* TEX temp5, input1.yyyy, tex1, 1D */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
- R500_TEX_INST_LD |
- R500_TEX_SEM_ACQUIRE |
- R500_TEX_IGNORE_UNCOVERED));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
- R500_TEX_SRC_S_SWIZ_G |
- R500_TEX_SRC_T_SWIZ_G |
- R500_TEX_SRC_R_SWIZ_G |
- R500_TEX_SRC_Q_SWIZ_G |
- R500_TEX_DST_ADDR(5) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* MUL temp4, const0.x0x0, temp2.yyxx */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
- R500_RGB_ADDR0_CONST |
- R500_RGB_ADDR1(2)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
- R500_ALPHA_ADDR0_CONST |
- R500_ALPHA_ADDR1(2)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
- R500_ALU_RGB_R_SWIZ_A_R |
- R500_ALU_RGB_G_SWIZ_A_0 |
- R500_ALU_RGB_B_SWIZ_A_R |
- R500_ALU_RGB_SEL_B_SRC1 |
- R500_ALU_RGB_R_SWIZ_B_G |
- R500_ALU_RGB_G_SWIZ_B_G |
- R500_ALU_RGB_B_SWIZ_B_R));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) |
- R500_ALPHA_OP_MAD |
- R500_ALPHA_SEL_A_SRC0 |
- R500_ALPHA_SWIZ_A_0 |
- R500_ALPHA_SEL_B_SRC1 |
- R500_ALPHA_SWIZ_B_R));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) |
- R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_R_SWIZ_0 |
- R500_ALU_RGBA_G_SWIZ_0 |
- R500_ALU_RGBA_B_SWIZ_0 |
- R500_ALU_RGBA_A_SWIZ_0));
-
- /* MAD temp3, const0.0y0y, temp5.xxxx, temp4 */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
- R500_RGB_ADDR0_CONST |
- R500_RGB_ADDR1(5) |
- R500_RGB_ADDR2(4)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
- R500_ALPHA_ADDR0_CONST |
- R500_ALPHA_ADDR1(5) |
- R500_ALPHA_ADDR2(4)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
- R500_ALU_RGB_R_SWIZ_A_0 |
- R500_ALU_RGB_G_SWIZ_A_G |
- R500_ALU_RGB_B_SWIZ_A_0 |
- R500_ALU_RGB_SEL_B_SRC1 |
- R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_G_SWIZ_B_R |
- R500_ALU_RGB_B_SWIZ_B_R));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) |
- R500_ALPHA_OP_MAD |
- R500_ALPHA_SEL_A_SRC0 |
- R500_ALPHA_SWIZ_A_G |
- R500_ALPHA_SEL_B_SRC1 |
- R500_ALPHA_SWIZ_B_R));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) |
- R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_SEL_C_SRC2 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_B |
- R500_ALU_RGBA_A_SWIZ_A));
-
- /* ADD temp3, temp3, input0.xyxy */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(3) |
- R500_RGB_ADDR2(0)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(3) |
- R500_ALPHA_ADDR2(0)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 |
- R500_ALU_RGB_G_SWIZ_A_1 |
- R500_ALU_RGB_B_SWIZ_A_1 |
- R500_ALU_RGB_SEL_B_SRC1 |
- R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_G_SWIZ_B_G |
- R500_ALU_RGB_B_SWIZ_B_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) |
- R500_ALPHA_OP_MAD |
- R500_ALPHA_SWIZ_A_1 |
- R500_ALPHA_SEL_B_SRC1 |
- R500_ALPHA_SWIZ_B_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) |
- R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_SEL_C_SRC2 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_R |
- R500_ALU_RGBA_A_SWIZ_G));
-
- /* TEX temp1, temp3.zwxy, tex0, 2D */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
- R500_TEX_INST_LD |
- R500_TEX_IGNORE_UNCOVERED));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) |
- R500_TEX_SRC_S_SWIZ_B |
- R500_TEX_SRC_T_SWIZ_A |
- R500_TEX_SRC_R_SWIZ_R |
- R500_TEX_SRC_Q_SWIZ_G |
- R500_TEX_DST_ADDR(1) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* TEX temp3, temp3.xyzw, tex0, 2D */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
- R500_TEX_INST_LD |
- R500_TEX_SEM_ACQUIRE |
- R500_TEX_IGNORE_UNCOVERED));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) |
- R500_TEX_SRC_S_SWIZ_R |
- R500_TEX_SRC_T_SWIZ_G |
- R500_TEX_SRC_R_SWIZ_B |
- R500_TEX_SRC_Q_SWIZ_A |
- R500_TEX_DST_ADDR(3) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* MAD temp4, const0.0y0y, temp5.yyyy, temp4 */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
- R500_RGB_ADDR0_CONST |
- R500_RGB_ADDR1(5) |
- R500_RGB_ADDR2(4)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
- R500_ALPHA_ADDR0_CONST |
- R500_ALPHA_ADDR1(5) |
- R500_ALPHA_ADDR2(4)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
- R500_ALU_RGB_R_SWIZ_A_0 |
- R500_ALU_RGB_G_SWIZ_A_G |
- R500_ALU_RGB_B_SWIZ_A_0 |
- R500_ALU_RGB_SEL_B_SRC1 |
- R500_ALU_RGB_R_SWIZ_B_G |
- R500_ALU_RGB_G_SWIZ_B_G |
- R500_ALU_RGB_B_SWIZ_B_G));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) |
- R500_ALPHA_OP_MAD |
- R500_ALPHA_SEL_A_SRC0 |
- R500_ALPHA_SWIZ_A_G |
- R500_ALPHA_SEL_B_SRC1 |
- R500_ALPHA_SWIZ_B_G));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) |
- R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_SEL_C_SRC2 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_B |
- R500_ALU_RGBA_A_SWIZ_A));
-
- /* ADD temp0, temp4, input0.xyxy */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(4) |
- R500_RGB_ADDR2(0)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(4) |
- R500_ALPHA_ADDR2(0)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 |
- R500_ALU_RGB_G_SWIZ_A_1 |
- R500_ALU_RGB_B_SWIZ_A_1 |
- R500_ALU_RGB_SEL_B_SRC1 |
- R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_G_SWIZ_B_G |
- R500_ALU_RGB_B_SWIZ_B_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) |
- R500_ALPHA_OP_MAD |
- R500_ALPHA_SWIZ_A_1 |
- R500_ALPHA_SEL_B_SRC1 |
- R500_ALPHA_SWIZ_B_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) |
- R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_SEL_C_SRC2 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_R |
- R500_ALU_RGBA_A_SWIZ_G));
-
- /* TEX temp4, temp0.zwzw, tex0, 2D */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
- R500_TEX_INST_LD |
- R500_TEX_IGNORE_UNCOVERED));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
- R500_TEX_SRC_S_SWIZ_B |
- R500_TEX_SRC_T_SWIZ_A |
- R500_TEX_SRC_R_SWIZ_B |
- R500_TEX_SRC_Q_SWIZ_A |
- R500_TEX_DST_ADDR(4) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* TEX temp0, temp0.xyzw, tex0, 2D */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
- R500_TEX_INST_LD |
- R500_TEX_SEM_ACQUIRE |
- R500_TEX_IGNORE_UNCOVERED));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
- R500_TEX_SRC_S_SWIZ_R |
- R500_TEX_SRC_T_SWIZ_G |
- R500_TEX_SRC_R_SWIZ_B |
- R500_TEX_SRC_Q_SWIZ_A |
- R500_TEX_DST_ADDR(0) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* LRP temp3, temp2.zzzz, temp1, temp3 ->
- * - PRESUB temps, temp1 - temp3
- * - MAD temp2.zzzz, temps, temp3 */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(3) |
- R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 |
- R500_RGB_ADDR1(1) |
- R500_RGB_ADDR2(2)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(3) |
- R500_ALPHA_SRCP_OP_A1_MINUS_A0 |
- R500_ALPHA_ADDR1(1) |
- R500_ALPHA_ADDR2(2)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 |
- R500_ALU_RGB_R_SWIZ_A_B |
- R500_ALU_RGB_G_SWIZ_A_B |
- R500_ALU_RGB_B_SWIZ_A_B |
- R500_ALU_RGB_SEL_B_SRCP |
- R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_G_SWIZ_B_G |
- R500_ALU_RGB_B_SWIZ_B_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) |
- R500_ALPHA_OP_MAD |
- R500_ALPHA_SEL_A_SRC2 |
- R500_ALPHA_SWIZ_A_B |
- R500_ALPHA_SEL_B_SRCP |
- R500_ALPHA_SWIZ_B_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) |
- R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_SEL_C_SRC0 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_B |
- R500_ALU_RGBA_A_SWIZ_A));
-
- /* LRP temp0, temp2.zzzz, temp4, temp0 ->
- * - PRESUB temps, temp4 - temp1
- * - MAD temp2.zzzz, temps, temp0 */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
- R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 |
- R500_RGB_ADDR1(4) |
- R500_RGB_ADDR2(2)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
- R500_ALPHA_SRCP_OP_A1_MINUS_A0 |
- R500_ALPHA_ADDR1(4) |
- R500_ALPHA_ADDR2(2)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 |
- R500_ALU_RGB_R_SWIZ_A_B |
- R500_ALU_RGB_G_SWIZ_A_B |
- R500_ALU_RGB_B_SWIZ_A_B |
- R500_ALU_RGB_SEL_B_SRCP |
- R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_G_SWIZ_B_G |
- R500_ALU_RGB_B_SWIZ_B_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) |
- R500_ALPHA_OP_MAD |
- R500_ALPHA_SEL_A_SRC2 |
- R500_ALPHA_SWIZ_A_B |
- R500_ALPHA_SEL_B_SRCP |
- R500_ALPHA_SWIZ_B_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) |
- R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_SEL_C_SRC0 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_B |
- R500_ALU_RGBA_A_SWIZ_A));
-
- /* LRP output, temp5.zzzz, temp3, temp0 ->
- * - PRESUB temps, temp3 - temp0
- * - MAD temp5.zzzz, temps, temp0 */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
- R500_INST_LAST |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK |
- R500_INST_RGB_OMASK_R |
- R500_INST_RGB_OMASK_G |
- R500_INST_RGB_OMASK_B |
- R500_INST_ALPHA_OMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
- R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 |
- R500_RGB_ADDR1(3) |
- R500_RGB_ADDR2(5)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
- R500_ALPHA_SRCP_OP_A1_MINUS_A0 |
- R500_ALPHA_ADDR1(3) |
- R500_ALPHA_ADDR2(5)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 |
- R500_ALU_RGB_R_SWIZ_A_B |
- R500_ALU_RGB_G_SWIZ_A_B |
- R500_ALU_RGB_B_SWIZ_A_B |
- R500_ALU_RGB_SEL_B_SRCP |
- R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_G_SWIZ_B_G |
- R500_ALU_RGB_B_SWIZ_B_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) |
- R500_ALPHA_OP_MAD |
- R500_ALPHA_SEL_A_SRC2 |
- R500_ALPHA_SWIZ_A_B |
- R500_ALPHA_SEL_B_SRCP |
- R500_ALPHA_SWIZ_B_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) |
- R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_SEL_C_SRC0 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_B |
- R500_ALU_RGBA_A_SWIZ_A));
-
- /* Shader constants. */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0));
-
- /* const0 = {1 / texture[0].width, 1 / texture[0].height, 0, 0} */
- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->w));
- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->h));
- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0);
- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0);
-
- FINISH_ACCEL();
-
- } else if (isplanar) {
- /*
- * y' = y - .0625
- * u' = u - .5
- * v' = v - .5;
- *
- * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v'
- * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
- * b = 1.1643 * y' + 2.017 * u' + 0.0 * v'
- *
- * DP3 might look like the straightforward solution
- * but we'd need to move the texture yuv values in
- * the same reg for this to work. Therefore use MADs.
- * Brightness just adds to the off constant.
- * Contrast is multiplication of luminance.
- * Saturation and hue change the u and v coeffs.
- * Default values (before adjustments - depend on colorspace):
- * yco = 1.1643
- * uco = 0, -0.39173, 2.017
- * vco = 1.5958, -0.8129, 0
- * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
- * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
- * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
- *
- * temp = MAD(yco, yuv.yyyy, off)
- * temp = MAD(uco, yuv.uuuu, temp)
- * result = MAD(vco, yuv.vvvv, temp)
- */
- /* TODO: don't recalc consts always */
- const float Loff = -0.0627;
- const float Coff = -0.502;
- float uvcosf, uvsinf;
- float yco;
- float uco[3], vco[3], off[3];
- float bright, cont, gamma;
- int ref = pPriv->transform_index;
- Bool needgamma = FALSE;
-
- cont = RTFContrast(pPriv->contrast);
- bright = RTFBrightness(pPriv->brightness);
- gamma = (float)pPriv->gamma / 1000.0;
- uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue));
- uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue));
- /* overlay video also does pre-gamma contrast/sat adjust, should we? */
-
- yco = trans[ref].RefLuma * cont;
- uco[0] = -trans[ref].RefRCr * uvsinf;
- uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
- uco[2] = trans[ref].RefBCb * uvcosf;
- vco[0] = trans[ref].RefRCr * uvcosf;
- vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
- vco[2] = trans[ref].RefBCb * uvsinf;
- off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright;
- off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright;
- off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright;
-
- //XXX gamma
+ if (pPriv->bicubic_state != BICUBIC_OFF) {
+ if (pPriv->bicubic_enabled) {
+ BEGIN_ACCEL(7);
- if (gamma != 1.0) {
- needgamma = TRUE;
- /* note: gamma correction is out = in ^ gamma;
- gpu can only do LG2/EX2 therefore we transform into
- in ^ gamma = 2 ^ (log2(in) * gamma).
- Lots of scalar ops, unfortunately (better solution?) -
- without gamma that's 3 inst, with gamma it's 10...
- could use different gamma factors per channel,
- if that's of any use. */
+ /* 4 components: 2 for tex0 and 2 for tex1 */
+ OUT_ACCEL_REG(R300_RS_COUNT,
+ ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+ R300_RS_COUNT_HIRES_EN));
+
+ /* R300_INST_COUNT_RS - highest RS instruction used */
+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6));
+
+ /* Pixel stack frame size. */
+ OUT_ACCEL_REG(R300_US_PIXSIZE, 5);
+
+ /* FP length. */
+ OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
+ R500_US_CODE_END_ADDR(13)));
+ OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
+ R500_US_CODE_RANGE_SIZE(13)));
+
+ /* Prepare for FP emission. */
+ OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
+ FINISH_ACCEL();
+
+ BEGIN_ACCEL(89);
+ /* Pixel shader.
+ * I've gone ahead and annotated each instruction, since this
+ * thing is MASSIVE. :3
+ * Note: In order to avoid buggies with temps and multiple
+ * inputs, all temps are offset by 2. temp0 -> register2. */
+
+ /* TEX temp2, input1.xxxx, tex1, 1D */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
+ R500_TEX_INST_LD |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
+ R500_TEX_SRC_S_SWIZ_R |
+ R500_TEX_SRC_T_SWIZ_R |
+ R500_TEX_SRC_R_SWIZ_R |
+ R500_TEX_SRC_Q_SWIZ_R |
+ R500_TEX_DST_ADDR(2) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* TEX temp5, input1.yyyy, tex1, 1D */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
+ R500_TEX_INST_LD |
+ R500_TEX_SEM_ACQUIRE |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
+ R500_TEX_SRC_S_SWIZ_G |
+ R500_TEX_SRC_T_SWIZ_G |
+ R500_TEX_SRC_R_SWIZ_G |
+ R500_TEX_SRC_Q_SWIZ_G |
+ R500_TEX_DST_ADDR(5) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* MUL temp4, const0.x0x0, temp2.yyxx */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
+ R500_RGB_ADDR0_CONST |
+ R500_RGB_ADDR1(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
+ R500_ALPHA_ADDR0_CONST |
+ R500_ALPHA_ADDR1(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_R |
+ R500_ALU_RGB_G_SWIZ_A_0 |
+ R500_ALU_RGB_B_SWIZ_A_R |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_G |
+ R500_ALU_RGB_G_SWIZ_B_G |
+ R500_ALU_RGB_B_SWIZ_B_R));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) |
+ R500_ALPHA_OP_MAD |
+ R500_ALPHA_SEL_A_SRC0 |
+ R500_ALPHA_SWIZ_A_0 |
+ R500_ALPHA_SEL_B_SRC1 |
+ R500_ALPHA_SWIZ_B_R));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) |
+ R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_R_SWIZ_0 |
+ R500_ALU_RGBA_G_SWIZ_0 |
+ R500_ALU_RGBA_B_SWIZ_0 |
+ R500_ALU_RGBA_A_SWIZ_0));
+
+ /* MAD temp3, const0.0y0y, temp5.xxxx, temp4 */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
+ R500_RGB_ADDR0_CONST |
+ R500_RGB_ADDR1(5) |
+ R500_RGB_ADDR2(4)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
+ R500_ALPHA_ADDR0_CONST |
+ R500_ALPHA_ADDR1(5) |
+ R500_ALPHA_ADDR2(4)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_0 |
+ R500_ALU_RGB_G_SWIZ_A_G |
+ R500_ALU_RGB_B_SWIZ_A_0 |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_G_SWIZ_B_R |
+ R500_ALU_RGB_B_SWIZ_B_R));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) |
+ R500_ALPHA_OP_MAD |
+ R500_ALPHA_SEL_A_SRC0 |
+ R500_ALPHA_SWIZ_A_G |
+ R500_ALPHA_SEL_B_SRC1 |
+ R500_ALPHA_SWIZ_B_R));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) |
+ R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_SEL_C_SRC2 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_A_SWIZ_A));
+
+ /* ADD temp3, temp3, input0.xyxy */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(3) |
+ R500_RGB_ADDR2(0)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(3) |
+ R500_ALPHA_ADDR2(0)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 |
+ R500_ALU_RGB_G_SWIZ_A_1 |
+ R500_ALU_RGB_B_SWIZ_A_1 |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_G_SWIZ_B_G |
+ R500_ALU_RGB_B_SWIZ_B_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) |
+ R500_ALPHA_OP_MAD |
+ R500_ALPHA_SWIZ_A_1 |
+ R500_ALPHA_SEL_B_SRC1 |
+ R500_ALPHA_SWIZ_B_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) |
+ R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_SEL_C_SRC2 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_R |
+ R500_ALU_RGBA_A_SWIZ_G));
+
+ /* TEX temp1, temp3.zwxy, tex0, 2D */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
+ R500_TEX_INST_LD |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) |
+ R500_TEX_SRC_S_SWIZ_B |
+ R500_TEX_SRC_T_SWIZ_A |
+ R500_TEX_SRC_R_SWIZ_R |
+ R500_TEX_SRC_Q_SWIZ_G |
+ R500_TEX_DST_ADDR(1) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* TEX temp3, temp3.xyzw, tex0, 2D */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
+ R500_TEX_INST_LD |
+ R500_TEX_SEM_ACQUIRE |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) |
+ R500_TEX_SRC_S_SWIZ_R |
+ R500_TEX_SRC_T_SWIZ_G |
+ R500_TEX_SRC_R_SWIZ_B |
+ R500_TEX_SRC_Q_SWIZ_A |
+ R500_TEX_DST_ADDR(3) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* MAD temp4, const0.0y0y, temp5.yyyy, temp4 */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
+ R500_RGB_ADDR0_CONST |
+ R500_RGB_ADDR1(5) |
+ R500_RGB_ADDR2(4)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
+ R500_ALPHA_ADDR0_CONST |
+ R500_ALPHA_ADDR1(5) |
+ R500_ALPHA_ADDR2(4)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_0 |
+ R500_ALU_RGB_G_SWIZ_A_G |
+ R500_ALU_RGB_B_SWIZ_A_0 |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_G |
+ R500_ALU_RGB_G_SWIZ_B_G |
+ R500_ALU_RGB_B_SWIZ_B_G));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) |
+ R500_ALPHA_OP_MAD |
+ R500_ALPHA_SEL_A_SRC0 |
+ R500_ALPHA_SWIZ_A_G |
+ R500_ALPHA_SEL_B_SRC1 |
+ R500_ALPHA_SWIZ_B_G));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) |
+ R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_SEL_C_SRC2 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_A_SWIZ_A));
+
+ /* ADD temp0, temp4, input0.xyxy */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(4) |
+ R500_RGB_ADDR2(0)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(4) |
+ R500_ALPHA_ADDR2(0)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 |
+ R500_ALU_RGB_G_SWIZ_A_1 |
+ R500_ALU_RGB_B_SWIZ_A_1 |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_G_SWIZ_B_G |
+ R500_ALU_RGB_B_SWIZ_B_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) |
+ R500_ALPHA_OP_MAD |
+ R500_ALPHA_SWIZ_A_1 |
+ R500_ALPHA_SEL_B_SRC1 |
+ R500_ALPHA_SWIZ_B_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) |
+ R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_SEL_C_SRC2 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_R |
+ R500_ALU_RGBA_A_SWIZ_G));
+
+ /* TEX temp4, temp0.zwzw, tex0, 2D */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
+ R500_TEX_INST_LD |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
+ R500_TEX_SRC_S_SWIZ_B |
+ R500_TEX_SRC_T_SWIZ_A |
+ R500_TEX_SRC_R_SWIZ_B |
+ R500_TEX_SRC_Q_SWIZ_A |
+ R500_TEX_DST_ADDR(4) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* TEX temp0, temp0.xyzw, tex0, 2D */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
+ R500_TEX_INST_LD |
+ R500_TEX_SEM_ACQUIRE |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
+ R500_TEX_SRC_S_SWIZ_R |
+ R500_TEX_SRC_T_SWIZ_G |
+ R500_TEX_SRC_R_SWIZ_B |
+ R500_TEX_SRC_Q_SWIZ_A |
+ R500_TEX_DST_ADDR(0) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* LRP temp3, temp2.zzzz, temp1, temp3 ->
+ * - PRESUB temps, temp1 - temp3
+ * - MAD temp2.zzzz, temps, temp3 */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(3) |
+ R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 |
+ R500_RGB_ADDR1(1) |
+ R500_RGB_ADDR2(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(3) |
+ R500_ALPHA_SRCP_OP_A1_MINUS_A0 |
+ R500_ALPHA_ADDR1(1) |
+ R500_ALPHA_ADDR2(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 |
+ R500_ALU_RGB_R_SWIZ_A_B |
+ R500_ALU_RGB_G_SWIZ_A_B |
+ R500_ALU_RGB_B_SWIZ_A_B |
+ R500_ALU_RGB_SEL_B_SRCP |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_G_SWIZ_B_G |
+ R500_ALU_RGB_B_SWIZ_B_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) |
+ R500_ALPHA_OP_MAD |
+ R500_ALPHA_SEL_A_SRC2 |
+ R500_ALPHA_SWIZ_A_B |
+ R500_ALPHA_SEL_B_SRCP |
+ R500_ALPHA_SWIZ_B_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) |
+ R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_SEL_C_SRC0 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_A_SWIZ_A));
+
+ /* LRP temp0, temp2.zzzz, temp4, temp0 ->
+ * - PRESUB temps, temp4 - temp1
+ * - MAD temp2.zzzz, temps, temp0 */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
+ R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 |
+ R500_RGB_ADDR1(4) |
+ R500_RGB_ADDR2(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
+ R500_ALPHA_SRCP_OP_A1_MINUS_A0 |
+ R500_ALPHA_ADDR1(4) |
+ R500_ALPHA_ADDR2(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 |
+ R500_ALU_RGB_R_SWIZ_A_B |
+ R500_ALU_RGB_G_SWIZ_A_B |
+ R500_ALU_RGB_B_SWIZ_A_B |
+ R500_ALU_RGB_SEL_B_SRCP |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_G_SWIZ_B_G |
+ R500_ALU_RGB_B_SWIZ_B_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) |
+ R500_ALPHA_OP_MAD |
+ R500_ALPHA_SEL_A_SRC2 |
+ R500_ALPHA_SWIZ_A_B |
+ R500_ALPHA_SEL_B_SRCP |
+ R500_ALPHA_SWIZ_B_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) |
+ R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_SEL_C_SRC0 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_A_SWIZ_A));
+
+ /* LRP output, temp5.zzzz, temp3, temp0 ->
+ * - PRESUB temps, temp3 - temp0
+ * - MAD temp5.zzzz, temps, temp0 */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
+ R500_INST_LAST |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK |
+ R500_INST_RGB_OMASK_R |
+ R500_INST_RGB_OMASK_G |
+ R500_INST_RGB_OMASK_B |
+ R500_INST_ALPHA_OMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
+ R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 |
+ R500_RGB_ADDR1(3) |
+ R500_RGB_ADDR2(5)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
+ R500_ALPHA_SRCP_OP_A1_MINUS_A0 |
+ R500_ALPHA_ADDR1(3) |
+ R500_ALPHA_ADDR2(5)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 |
+ R500_ALU_RGB_R_SWIZ_A_B |
+ R500_ALU_RGB_G_SWIZ_A_B |
+ R500_ALU_RGB_B_SWIZ_A_B |
+ R500_ALU_RGB_SEL_B_SRCP |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_G_SWIZ_B_G |
+ R500_ALU_RGB_B_SWIZ_B_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) |
+ R500_ALPHA_OP_MAD |
+ R500_ALPHA_SEL_A_SRC2 |
+ R500_ALPHA_SWIZ_A_B |
+ R500_ALPHA_SEL_B_SRCP |
+ R500_ALPHA_SWIZ_B_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) |
+ R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_SEL_C_SRC0 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_A_SWIZ_A));
+
+ /* Shader constants. */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0));
+
+ /* const0 = {1 / texture[0].width, 1 / texture[0].height, 0, 0} */
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->w));
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->h));
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0);
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0);
+
+ FINISH_ACCEL();
+ } else {
+ BEGIN_ACCEL(19);
+ /* 2 components: 2 for tex0 */
+ OUT_ACCEL_REG(R300_RS_COUNT,
+ ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+ R300_RS_COUNT_HIRES_EN));
+
+ /* R300_INST_COUNT_RS - highest RS instruction used */
+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
+
+ /* Pixel stack frame size. */
+ OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */
+
+ /* FP length. */
+ OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
+ R500_US_CODE_END_ADDR(1)));
+ OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
+ R500_US_CODE_RANGE_SIZE(1)));
+
+ /* Prepare for FP emission. */
+ OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
+
+ /* tex inst */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK |
+ R500_INST_RGB_CLAMP |
+ R500_INST_ALPHA_CLAMP));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
+ R500_TEX_INST_LD |
+ R500_TEX_SEM_ACQUIRE |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
+ R500_TEX_SRC_S_SWIZ_R |
+ R500_TEX_SRC_T_SWIZ_G |
+ R500_TEX_DST_ADDR(0) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
+ R500_DX_S_SWIZ_R |
+ R500_DX_T_SWIZ_R |
+ R500_DX_R_SWIZ_R |
+ R500_DX_Q_SWIZ_R |
+ R500_DY_ADDR(0) |
+ R500_DY_S_SWIZ_R |
+ R500_DY_T_SWIZ_R |
+ R500_DY_R_SWIZ_R |
+ R500_DY_Q_SWIZ_R));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* ALU inst */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_LAST |
+ R500_INST_RGB_OMASK_R |
+ R500_INST_RGB_OMASK_G |
+ R500_INST_RGB_OMASK_B |
+ R500_INST_ALPHA_OMASK |
+ R500_INST_RGB_CLAMP |
+ R500_INST_ALPHA_CLAMP));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
+ R500_RGB_ADDR1(0) |
+ R500_RGB_ADDR1_CONST |
+ R500_RGB_ADDR2(0) |
+ R500_RGB_ADDR2_CONST));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
+ R500_ALPHA_ADDR1(0) |
+ R500_ALPHA_ADDR1_CONST |
+ R500_ALPHA_ADDR2(0) |
+ R500_ALPHA_ADDR2_CONST));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_R |
+ R500_ALU_RGB_G_SWIZ_A_G |
+ R500_ALU_RGB_B_SWIZ_A_B |
+ R500_ALU_RGB_SEL_B_SRC0 |
+ R500_ALU_RGB_R_SWIZ_B_1 |
+ R500_ALU_RGB_B_SWIZ_B_1 |
+ R500_ALU_RGB_G_SWIZ_B_1));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
+ R500_ALPHA_SWIZ_A_A |
+ R500_ALPHA_SWIZ_B_1));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_R_SWIZ_0 |
+ R500_ALU_RGBA_G_SWIZ_0 |
+ R500_ALU_RGBA_B_SWIZ_0 |
+ R500_ALU_RGBA_A_SWIZ_0));
+ FINISH_ACCEL();
}
-
- BEGIN_ACCEL(56);
- /* 2 components: 2 for tex0 */
- OUT_ACCEL_REG(R300_RS_COUNT,
- ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
- R300_RS_COUNT_HIRES_EN));
-
- /* R300_INST_COUNT_RS - highest RS instruction used */
- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
-
- /* Pixel stack frame size. */
- OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */
-
- /* FP length. */
- OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
- R500_US_CODE_END_ADDR(5)));
- OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
- R500_US_CODE_RANGE_SIZE(5)));
-
- /* Prepare for FP emission. */
- OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
-
- /* tex inst */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK |
- R500_INST_RGB_CLAMP |
- R500_INST_ALPHA_CLAMP));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
- R500_TEX_INST_LD |
- R500_TEX_IGNORE_UNCOVERED));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
- R500_TEX_SRC_S_SWIZ_R |
- R500_TEX_SRC_T_SWIZ_G |
- R500_TEX_DST_ADDR(2) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
- R500_DX_S_SWIZ_R |
- R500_DX_T_SWIZ_R |
- R500_DX_R_SWIZ_R |
- R500_DX_Q_SWIZ_R |
- R500_DY_ADDR(0) |
- R500_DY_S_SWIZ_R |
- R500_DY_T_SWIZ_R |
- R500_DY_R_SWIZ_R |
- R500_DY_Q_SWIZ_R));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* tex inst */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK |
- R500_INST_RGB_CLAMP |
- R500_INST_ALPHA_CLAMP));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
- R500_TEX_INST_LD |
- R500_TEX_IGNORE_UNCOVERED));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
- R500_TEX_SRC_S_SWIZ_R |
- R500_TEX_SRC_T_SWIZ_G |
- R500_TEX_DST_ADDR(1) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
- R500_DX_S_SWIZ_R |
- R500_DX_T_SWIZ_R |
- R500_DX_R_SWIZ_R |
- R500_DX_Q_SWIZ_R |
- R500_DY_ADDR(0) |
- R500_DY_S_SWIZ_R |
- R500_DY_T_SWIZ_R |
- R500_DY_R_SWIZ_R |
- R500_DY_Q_SWIZ_R));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* tex inst */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK |
- R500_INST_RGB_CLAMP |
- R500_INST_ALPHA_CLAMP));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(2) |
- R500_TEX_INST_LD |
- R500_TEX_SEM_ACQUIRE |
- R500_TEX_IGNORE_UNCOVERED));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
- R500_TEX_SRC_S_SWIZ_R |
- R500_TEX_SRC_T_SWIZ_G |
- R500_TEX_DST_ADDR(0) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
- R500_DX_S_SWIZ_R |
- R500_DX_T_SWIZ_R |
- R500_DX_R_SWIZ_R |
- R500_DX_Q_SWIZ_R |
- R500_DY_ADDR(0) |
- R500_DY_S_SWIZ_R |
- R500_DY_T_SWIZ_R |
- R500_DY_R_SWIZ_R |
- R500_DY_Q_SWIZ_R));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* ALU inst */
- /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
- R500_RGB_ADDR0_CONST |
- R500_RGB_ADDR1(2) |
- R500_RGB_ADDR2(0) |
- R500_RGB_ADDR2_CONST));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
- R500_ALPHA_ADDR0_CONST |
- R500_ALPHA_ADDR1(2) |
- R500_ALPHA_ADDR2(0) |
- R500_ALPHA_ADDR2_CONST));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
- R500_ALU_RGB_R_SWIZ_A_A |
- R500_ALU_RGB_G_SWIZ_A_A |
- R500_ALU_RGB_B_SWIZ_A_A |
- R500_ALU_RGB_SEL_B_SRC1 |
- R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_B_SWIZ_B_G |
- R500_ALU_RGB_G_SWIZ_B_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
- R500_ALPHA_ADDRD(2) |
- R500_ALPHA_SWIZ_A_0 |
- R500_ALPHA_SWIZ_B_0));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_ADDRD(2) |
- R500_ALU_RGBA_SEL_C_SRC0 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_B |
- R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
- R500_ALU_RGBA_A_SWIZ_0));
-
- /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) |
- R500_RGB_ADDR0_CONST |
- R500_RGB_ADDR1(1) |
- R500_RGB_ADDR2(2)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) |
- R500_ALPHA_ADDR0_CONST |
- R500_ALPHA_ADDR1(1) |
- R500_ALPHA_ADDR2(2)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
- R500_ALU_RGB_R_SWIZ_A_R |
- R500_ALU_RGB_G_SWIZ_A_G |
- R500_ALU_RGB_B_SWIZ_A_B |
- R500_ALU_RGB_SEL_B_SRC1 |
- R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_B_SWIZ_B_G |
- R500_ALU_RGB_G_SWIZ_B_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
- R500_ALPHA_ADDRD(2) |
- R500_ALPHA_SWIZ_A_0 |
- R500_ALPHA_SWIZ_B_0));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_ADDRD(2) |
- R500_ALU_RGBA_SEL_C_SRC2 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_B |
- R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
- R500_ALU_RGBA_A_SWIZ_0));
-
- /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_LAST |
- R500_INST_RGB_OMASK_R |
- R500_INST_RGB_OMASK_G |
- R500_INST_RGB_OMASK_B |
- R500_INST_ALPHA_OMASK |
- R500_INST_RGB_CLAMP |
- R500_INST_ALPHA_CLAMP));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) |
- R500_RGB_ADDR0_CONST |
- R500_RGB_ADDR1(0) |
- R500_RGB_ADDR2(2)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(2) |
- R500_ALPHA_ADDR0_CONST |
- R500_ALPHA_ADDR1(0) |
- R500_ALPHA_ADDR2(2)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
- R500_ALU_RGB_R_SWIZ_A_R |
- R500_ALU_RGB_G_SWIZ_A_G |
- R500_ALU_RGB_B_SWIZ_A_B |
- R500_ALU_RGB_SEL_B_SRC1 |
- R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_B_SWIZ_B_G |
- R500_ALU_RGB_G_SWIZ_B_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
- R500_ALPHA_ADDRD(0) |
- R500_ALPHA_SWIZ_A_0 |
- R500_ALPHA_SWIZ_B_0));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_ADDRD(0) |
- R500_ALU_RGBA_SEL_C_SRC2 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_B |
- R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
- R500_ALU_RGBA_A_SWIZ_1));
-
- /* Shader constants. */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0));
-
- /* constant 0: off, yco */
- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[0]);
- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[1]);
- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[2]);
- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, yco);
- /* constant 1: uco */
- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[0]);
- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[1]);
- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[2]);
- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, gamma);
- /* constant 2: vco */
- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[0]);
- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[1]);
- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[2]);
- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0.0);
-
- FINISH_ACCEL();
-
} else {
/*
* y' = y - .0625
@@ -3562,175 +3318,414 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
if that's of any use. */
}
- BEGIN_ACCEL(44);
- /* 2 components: 2 for tex0/1/2 */
- OUT_ACCEL_REG(R300_RS_COUNT,
- ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
- R300_RS_COUNT_HIRES_EN));
-
- /* R300_INST_COUNT_RS - highest RS instruction used */
- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
-
- /* Pixel stack frame size. */
- OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
-
- /* FP length. */
- OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
- R500_US_CODE_END_ADDR(3)));
- OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
- R500_US_CODE_RANGE_SIZE(3)));
-
- /* Prepare for FP emission. */
- OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
-
- /* tex inst */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK |
- R500_INST_RGB_CLAMP |
- R500_INST_ALPHA_CLAMP));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
- R500_TEX_INST_LD |
- R500_TEX_SEM_ACQUIRE |
- R500_TEX_IGNORE_UNCOVERED));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
- R500_TEX_SRC_S_SWIZ_R |
- R500_TEX_SRC_T_SWIZ_G |
- R500_TEX_DST_ADDR(0) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
- R500_DX_S_SWIZ_R |
- R500_DX_T_SWIZ_R |
- R500_DX_R_SWIZ_R |
- R500_DX_Q_SWIZ_R |
- R500_DY_ADDR(0) |
- R500_DY_S_SWIZ_R |
- R500_DY_T_SWIZ_R |
- R500_DY_R_SWIZ_R |
- R500_DY_Q_SWIZ_R));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* ALU inst */
- /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
- R500_RGB_ADDR0_CONST |
- R500_RGB_ADDR1(0) |
- R500_RGB_ADDR2(0) |
- R500_RGB_ADDR2_CONST));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
- R500_ALPHA_ADDR0_CONST |
- R500_ALPHA_ADDR1(0) |
- R500_ALPHA_ADDR2(0) |
- R500_ALPHA_ADDR2_CONST));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
- R500_ALU_RGB_R_SWIZ_A_A |
- R500_ALU_RGB_G_SWIZ_A_A |
- R500_ALU_RGB_B_SWIZ_A_A |
- R500_ALU_RGB_SEL_B_SRC1 |
- R500_ALU_RGB_R_SWIZ_B_G |
- R500_ALU_RGB_B_SWIZ_B_G |
- R500_ALU_RGB_G_SWIZ_B_G));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
- R500_ALPHA_ADDRD(1) |
- R500_ALPHA_SWIZ_A_0 |
- R500_ALPHA_SWIZ_B_0));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_ADDRD(1) |
- R500_ALU_RGBA_SEL_C_SRC0 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_B |
- R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
- R500_ALU_RGBA_A_SWIZ_0));
-
- /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) |
- R500_RGB_ADDR0_CONST |
- R500_RGB_ADDR1(0) |
- R500_RGB_ADDR2(1)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) |
- R500_ALPHA_ADDR0_CONST |
- R500_ALPHA_ADDR1(0) |
- R500_ALPHA_ADDR2(1)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
- R500_ALU_RGB_R_SWIZ_A_R |
- R500_ALU_RGB_G_SWIZ_A_G |
- R500_ALU_RGB_B_SWIZ_A_B |
- R500_ALU_RGB_SEL_B_SRC1 |
- R500_ALU_RGB_R_SWIZ_B_B |
- R500_ALU_RGB_B_SWIZ_B_B |
- R500_ALU_RGB_G_SWIZ_B_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
- R500_ALPHA_ADDRD(1) |
- R500_ALPHA_SWIZ_A_0 |
- R500_ALPHA_SWIZ_B_0));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_ADDRD(1) |
- R500_ALU_RGBA_SEL_C_SRC2 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_B |
- R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
- R500_ALU_RGBA_A_SWIZ_0));
-
- /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_LAST |
- R500_INST_RGB_OMASK_R |
- R500_INST_RGB_OMASK_G |
- R500_INST_RGB_OMASK_B |
- R500_INST_ALPHA_OMASK |
- R500_INST_RGB_CLAMP |
- R500_INST_ALPHA_CLAMP));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) |
- R500_RGB_ADDR0_CONST |
- R500_RGB_ADDR1(0) |
- R500_RGB_ADDR2(1)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) |
- R500_ALPHA_ADDR0_CONST |
- R500_ALPHA_ADDR1(0) |
- R500_ALPHA_ADDR2(1)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
- R500_ALU_RGB_R_SWIZ_A_R |
- R500_ALU_RGB_G_SWIZ_A_G |
- R500_ALU_RGB_B_SWIZ_A_B |
- R500_ALU_RGB_SEL_B_SRC1 |
- R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_B_SWIZ_B_R |
- R500_ALU_RGB_G_SWIZ_B_R));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
- R500_ALPHA_ADDRD(1) |
- R500_ALPHA_SWIZ_A_0 |
- R500_ALPHA_SWIZ_B_0));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_ADDRD(1) |
- R500_ALU_RGBA_SEL_C_SRC2 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_B |
- R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
- R500_ALU_RGBA_A_SWIZ_1));
+ if (isplanar) {
+ BEGIN_ACCEL(56);
+ /* 2 components: 2 for tex0 */
+ OUT_ACCEL_REG(R300_RS_COUNT,
+ ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+ R300_RS_COUNT_HIRES_EN));
+
+ /* R300_INST_COUNT_RS - highest RS instruction used */
+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
+
+ /* Pixel stack frame size. */
+ OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */
+
+ /* FP length. */
+ OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
+ R500_US_CODE_END_ADDR(5)));
+ OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
+ R500_US_CODE_RANGE_SIZE(5)));
+
+ /* Prepare for FP emission. */
+ OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
+
+ /* tex inst */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK |
+ R500_INST_RGB_CLAMP |
+ R500_INST_ALPHA_CLAMP));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
+ R500_TEX_INST_LD |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
+ R500_TEX_SRC_S_SWIZ_R |
+ R500_TEX_SRC_T_SWIZ_G |
+ R500_TEX_DST_ADDR(2) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
+ R500_DX_S_SWIZ_R |
+ R500_DX_T_SWIZ_R |
+ R500_DX_R_SWIZ_R |
+ R500_DX_Q_SWIZ_R |
+ R500_DY_ADDR(0) |
+ R500_DY_S_SWIZ_R |
+ R500_DY_T_SWIZ_R |
+ R500_DY_R_SWIZ_R |
+ R500_DY_Q_SWIZ_R));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* tex inst */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK |
+ R500_INST_RGB_CLAMP |
+ R500_INST_ALPHA_CLAMP));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
+ R500_TEX_INST_LD |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
+ R500_TEX_SRC_S_SWIZ_R |
+ R500_TEX_SRC_T_SWIZ_G |
+ R500_TEX_DST_ADDR(1) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
+ R500_DX_S_SWIZ_R |
+ R500_DX_T_SWIZ_R |
+ R500_DX_R_SWIZ_R |
+ R500_DX_Q_SWIZ_R |
+ R500_DY_ADDR(0) |
+ R500_DY_S_SWIZ_R |
+ R500_DY_T_SWIZ_R |
+ R500_DY_R_SWIZ_R |
+ R500_DY_Q_SWIZ_R));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* tex inst */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK |
+ R500_INST_RGB_CLAMP |
+ R500_INST_ALPHA_CLAMP));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(2) |
+ R500_TEX_INST_LD |
+ R500_TEX_SEM_ACQUIRE |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
+ R500_TEX_SRC_S_SWIZ_R |
+ R500_TEX_SRC_T_SWIZ_G |
+ R500_TEX_DST_ADDR(0) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
+ R500_DX_S_SWIZ_R |
+ R500_DX_T_SWIZ_R |
+ R500_DX_R_SWIZ_R |
+ R500_DX_Q_SWIZ_R |
+ R500_DY_ADDR(0) |
+ R500_DY_S_SWIZ_R |
+ R500_DY_T_SWIZ_R |
+ R500_DY_R_SWIZ_R |
+ R500_DY_Q_SWIZ_R));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* ALU inst */
+ /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
+ R500_RGB_ADDR0_CONST |
+ R500_RGB_ADDR1(2) |
+ R500_RGB_ADDR2(0) |
+ R500_RGB_ADDR2_CONST));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
+ R500_ALPHA_ADDR0_CONST |
+ R500_ALPHA_ADDR1(2) |
+ R500_ALPHA_ADDR2(0) |
+ R500_ALPHA_ADDR2_CONST));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_A |
+ R500_ALU_RGB_G_SWIZ_A_A |
+ R500_ALU_RGB_B_SWIZ_A_A |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_B_SWIZ_B_G |
+ R500_ALU_RGB_G_SWIZ_B_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
+ R500_ALPHA_ADDRD(2) |
+ R500_ALPHA_SWIZ_A_0 |
+ R500_ALPHA_SWIZ_B_0));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_ADDRD(2) |
+ R500_ALU_RGBA_SEL_C_SRC0 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
+ R500_ALU_RGBA_A_SWIZ_0));
+
+ /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) |
+ R500_RGB_ADDR0_CONST |
+ R500_RGB_ADDR1(1) |
+ R500_RGB_ADDR2(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) |
+ R500_ALPHA_ADDR0_CONST |
+ R500_ALPHA_ADDR1(1) |
+ R500_ALPHA_ADDR2(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_R |
+ R500_ALU_RGB_G_SWIZ_A_G |
+ R500_ALU_RGB_B_SWIZ_A_B |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_B_SWIZ_B_G |
+ R500_ALU_RGB_G_SWIZ_B_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
+ R500_ALPHA_ADDRD(2) |
+ R500_ALPHA_SWIZ_A_0 |
+ R500_ALPHA_SWIZ_B_0));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_ADDRD(2) |
+ R500_ALU_RGBA_SEL_C_SRC2 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
+ R500_ALU_RGBA_A_SWIZ_0));
+
+ /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_LAST |
+ R500_INST_RGB_OMASK_R |
+ R500_INST_RGB_OMASK_G |
+ R500_INST_RGB_OMASK_B |
+ R500_INST_ALPHA_OMASK |
+ R500_INST_RGB_CLAMP |
+ R500_INST_ALPHA_CLAMP));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) |
+ R500_RGB_ADDR0_CONST |
+ R500_RGB_ADDR1(0) |
+ R500_RGB_ADDR2(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(2) |
+ R500_ALPHA_ADDR0_CONST |
+ R500_ALPHA_ADDR1(0) |
+ R500_ALPHA_ADDR2(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_R |
+ R500_ALU_RGB_G_SWIZ_A_G |
+ R500_ALU_RGB_B_SWIZ_A_B |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_B_SWIZ_B_G |
+ R500_ALU_RGB_G_SWIZ_B_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
+ R500_ALPHA_ADDRD(0) |
+ R500_ALPHA_SWIZ_A_0 |
+ R500_ALPHA_SWIZ_B_0));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_ADDRD(0) |
+ R500_ALU_RGBA_SEL_C_SRC2 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
+ R500_ALU_RGBA_A_SWIZ_1));
+
+ } else {
+ BEGIN_ACCEL(44);
+ /* 2 components: 2 for tex0/1/2 */
+ OUT_ACCEL_REG(R300_RS_COUNT,
+ ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+ R300_RS_COUNT_HIRES_EN));
+
+ /* R300_INST_COUNT_RS - highest RS instruction used */
+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
+
+ /* Pixel stack frame size. */
+ OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
+
+ /* FP length. */
+ OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
+ R500_US_CODE_END_ADDR(3)));
+ OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
+ R500_US_CODE_RANGE_SIZE(3)));
+
+ /* Prepare for FP emission. */
+ OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
+
+ /* tex inst */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK |
+ R500_INST_RGB_CLAMP |
+ R500_INST_ALPHA_CLAMP));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
+ R500_TEX_INST_LD |
+ R500_TEX_SEM_ACQUIRE |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
+ R500_TEX_SRC_S_SWIZ_R |
+ R500_TEX_SRC_T_SWIZ_G |
+ R500_TEX_DST_ADDR(0) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
+ R500_DX_S_SWIZ_R |
+ R500_DX_T_SWIZ_R |
+ R500_DX_R_SWIZ_R |
+ R500_DX_Q_SWIZ_R |
+ R500_DY_ADDR(0) |
+ R500_DY_S_SWIZ_R |
+ R500_DY_T_SWIZ_R |
+ R500_DY_R_SWIZ_R |
+ R500_DY_Q_SWIZ_R));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* ALU inst */
+ /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
+ R500_RGB_ADDR0_CONST |
+ R500_RGB_ADDR1(0) |
+ R500_RGB_ADDR2(0) |
+ R500_RGB_ADDR2_CONST));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
+ R500_ALPHA_ADDR0_CONST |
+ R500_ALPHA_ADDR1(0) |
+ R500_ALPHA_ADDR2(0) |
+ R500_ALPHA_ADDR2_CONST));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_A |
+ R500_ALU_RGB_G_SWIZ_A_A |
+ R500_ALU_RGB_B_SWIZ_A_A |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_G |
+ R500_ALU_RGB_B_SWIZ_B_G |
+ R500_ALU_RGB_G_SWIZ_B_G));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
+ R500_ALPHA_ADDRD(1) |
+ R500_ALPHA_SWIZ_A_0 |
+ R500_ALPHA_SWIZ_B_0));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_ADDRD(1) |
+ R500_ALU_RGBA_SEL_C_SRC0 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
+ R500_ALU_RGBA_A_SWIZ_0));
+
+ /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) |
+ R500_RGB_ADDR0_CONST |
+ R500_RGB_ADDR1(0) |
+ R500_RGB_ADDR2(1)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) |
+ R500_ALPHA_ADDR0_CONST |
+ R500_ALPHA_ADDR1(0) |
+ R500_ALPHA_ADDR2(1)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_R |
+ R500_ALU_RGB_G_SWIZ_A_G |
+ R500_ALU_RGB_B_SWIZ_A_B |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_B |
+ R500_ALU_RGB_B_SWIZ_B_B |
+ R500_ALU_RGB_G_SWIZ_B_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
+ R500_ALPHA_ADDRD(1) |
+ R500_ALPHA_SWIZ_A_0 |
+ R500_ALPHA_SWIZ_B_0));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_ADDRD(1) |
+ R500_ALU_RGBA_SEL_C_SRC2 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
+ R500_ALU_RGBA_A_SWIZ_0));
+
+ /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_LAST |
+ R500_INST_RGB_OMASK_R |
+ R500_INST_RGB_OMASK_G |
+ R500_INST_RGB_OMASK_B |
+ R500_INST_ALPHA_OMASK |
+ R500_INST_RGB_CLAMP |
+ R500_INST_ALPHA_CLAMP));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) |
+ R500_RGB_ADDR0_CONST |
+ R500_RGB_ADDR1(0) |
+ R500_RGB_ADDR2(1)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) |
+ R500_ALPHA_ADDR0_CONST |
+ R500_ALPHA_ADDR1(0) |
+ R500_ALPHA_ADDR2(1)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_R |
+ R500_ALU_RGB_G_SWIZ_A_G |
+ R500_ALU_RGB_B_SWIZ_A_B |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_B_SWIZ_B_R |
+ R500_ALU_RGB_G_SWIZ_B_R));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
+ R500_ALPHA_ADDRD(1) |
+ R500_ALPHA_SWIZ_A_0 |
+ R500_ALPHA_SWIZ_B_0));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_ADDRD(1) |
+ R500_ALU_RGBA_SEL_C_SRC2 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
+ R500_ALU_RGBA_A_SWIZ_1));
+ }
/* Shader constants. */
OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0));
commit 85323a7f84381fef7fad20c7f7ec601637af9aa7
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Thu Apr 16 11:10:51 2009 -0400
R3xx/R4xx: set tex caching for Y texture when doing planar rendering
Doesn't affect performance, but docs indicate its the right
thing to do.
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index e449204..0ac247a 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -1091,7 +1091,10 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
OUT_ACCEL_REG(R300_TX_FILTER0_0, txfilter);
OUT_ACCEL_REG(R300_TX_FILTER1_0, 0);
OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0);
- OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1);
+ if (isplanar)
+ OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1 | R300_TX_FORMAT_CACHE_HALF_REGION_0);
+ else
+ OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1);
OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch);
OUT_ACCEL_REG(R300_TX_OFFSET_0, txoffset);
FINISH_ACCEL();
commit 5ea5df22c038fc8f00984acc760e9d8c962bf902
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Wed Apr 15 20:26:34 2009 -0400
Tex vid: remove remnants of XV_HWPLANAR
no longer needed as bicubic is the only thing that uses
the old csc code.
diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c
index 532b600..2318a62 100644
--- a/src/radeon_textured_video.c
+++ b/src/radeon_textured_video.c
@@ -367,26 +367,19 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
pPriv->bicubic_enabled = FALSE;
}
- pPriv->planar_hw = pPriv->planar_state;
- if (pPriv->bicubic_enabled || IS_R600_3D)
- pPriv->planar_hw = 0;
-
- if (info->ChipFamily < CHIP_FAMILY_R300)
- pPriv->planar_hw = 1;
-
switch(id) {
case FOURCC_YV12:
case FOURCC_I420:
srcPitch = (width + 3) & ~3;
srcPitch2 = ((width >> 1) + 3) & ~3;
- if (pPriv->planar_hw) {
+ if (pPriv->bicubic_enabled) {
+ dstPitch = ((dst_width << 1) + 15) & ~15;
+ dstPitch = (dstPitch + 63) & ~63;
+ } else {
dstPitch = (dst_width + 15) & ~15;
dstPitch = (dstPitch + 63) & ~63;
dstPitch2 = ((dst_width >> 1) + 15) & ~15;
dstPitch2 = (dstPitch2 + 63) & ~63;
- } else {
- dstPitch = ((dst_width << 1) + 15) & ~15;
- dstPitch = (dstPitch + 63) & ~63;
}
break;
case FOURCC_UYVY:
@@ -509,8 +502,24 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
srcPitch, srcPitch2, pPriv->src_pitch,
width, height);
}
- }
- else if (pPriv->planar_hw) {
+ } else if (pPriv->bicubic_enabled) {
+ top &= ~1;
+ nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top;
+ s2offset = srcPitch * height;
+ s3offset = (srcPitch2 * (height >> 1)) + s2offset;
+ pPriv->src_addr += left << 1;
+ tmp = ((top >> 1) * srcPitch2) + (left >> 1);
+ s2offset += tmp;
+ s3offset += tmp;
+ if (id == FOURCC_I420) {
+ tmp = s2offset;
+ s2offset = s3offset;
+ s3offset = tmp;
+ }
+ RADEONCopyMungedData(pScrn, buf + (top * srcPitch) + left,
+ buf + s2offset, buf + s3offset, pPriv->src_addr,
+ srcPitch, srcPitch2, dstPitch, nlines, npixels);
+ } else {
top &= ~1;
s2offset = srcPitch * ((height + 1) & ~1);
s3offset = s2offset + srcPitch2 * ((height + 1) >> 1);
@@ -532,23 +541,6 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
srcPitch2, dstPitch2, (nlines + 1) >> 1, npixels >> 1, 1);
RADEONCopyData(pScrn, buf + s3offset, pPriv->src_addr + d3line + (left >> 1),
srcPitch2, dstPitch2, (nlines + 1) >> 1, npixels >> 1, 1);
- } else {
- top &= ~1;
- nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top;
- s2offset = srcPitch * height;
- s3offset = (srcPitch2 * (height >> 1)) + s2offset;
- pPriv->src_addr += left << 1;
- tmp = ((top >> 1) * srcPitch2) + (left >> 1);
- s2offset += tmp;
- s3offset += tmp;
- if (id == FOURCC_I420) {
- tmp = s2offset;
- s2offset = s3offset;
- s3offset = tmp;
- }
- RADEONCopyMungedData(pScrn, buf + (top * srcPitch) + left,
- buf + s2offset, buf + s3offset, pPriv->src_addr,
- srcPitch, srcPitch2, dstPitch, nlines, npixels);
}
break;
case FOURCC_UYVY:
@@ -681,13 +673,12 @@ static XF86AttributeRec Attributes_r200[NUM_ATTRIBUTES_R200+1] =
{0, 0, 0, NULL}
};
-#define NUM_ATTRIBUTES_R300 9
+#define NUM_ATTRIBUTES_R300 8
static XF86AttributeRec Attributes_r300[NUM_ATTRIBUTES_R300+1] =
{
{XvSettable | XvGettable, 0, 2, "XV_BICUBIC"},
{XvSettable | XvGettable, 0, 1, "XV_VSYNC"},
- {XvSettable | XvGettable, 0, 1, "XV_HWPLANAR"},
{XvSettable | XvGettable, -1000, 1000, "XV_BRIGHTNESS"},
{XvSettable | XvGettable, -1000, 1000, "XV_CONTRAST"},
{XvSettable | XvGettable, -1000, 1000, "XV_SATURATION"},
@@ -697,13 +688,12 @@ static XF86AttributeRec Attributes_r300[NUM_ATTRIBUTES_R300+1] =
{0, 0, 0, NULL}
};
-#define NUM_ATTRIBUTES_R500 8
+#define NUM_ATTRIBUTES_R500 7
static XF86AttributeRec Attributes_r500[NUM_ATTRIBUTES_R500+1] =
{
{XvSettable | XvGettable, 0, 2, "XV_BICUBIC"},
{XvSettable | XvGettable, 0, 1, "XV_VSYNC"},
- {XvSettable | XvGettable, 0, 1, "XV_HWPLANAR"},
{XvSettable | XvGettable, -1000, 1000, "XV_BRIGHTNESS"},
{XvSettable | XvGettable, -1000, 1000, "XV_CONTRAST"},
{XvSettable | XvGettable, -1000, 1000, "XV_SATURATION"},
@@ -727,7 +717,6 @@ static XF86AttributeRec Attributes_r600[NUM_ATTRIBUTES_R600+1] =
static Atom xvBicubic;
static Atom xvVSync;
-static Atom xvHWPlanar;
static Atom xvBrightness, xvContrast, xvSaturation, xvHue;
static Atom xvGamma, xvColorspace;
@@ -756,8 +745,6 @@ RADEONGetTexPortAttribute(ScrnInfoPtr pScrn,
*value = pPriv->bicubic_state;
else if (attribute == xvVSync)
*value = pPriv->vsync;
- else if (attribute == xvHWPlanar)
- *value = pPriv->planar_state;
else if (attribute == xvBrightness)
*value = pPriv->brightness;
else if (attribute == xvContrast)
@@ -791,10 +778,6 @@ RADEONSetTexPortAttribute(ScrnInfoPtr pScrn,
pPriv->bicubic_state = ClipValue (value, 0, 2);
else if (attribute == xvVSync)
pPriv->vsync = ClipValue (value, 0, 1);
- else if (attribute == xvHWPlanar)
- pPriv->planar_state = ClipValue (value, 0, 1);
- else if (attribute == xvHWPlanar)
- pPriv->planar_state = ClipValue (value, 0, 1);
else if (attribute == xvBrightness)
pPriv->brightness = ClipValue (value, -1000, 1000);
else if (attribute == xvContrast)
@@ -830,7 +813,6 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen)
xvBicubic = MAKE_ATOM("XV_BICUBIC");
xvVSync = MAKE_ATOM("XV_VSYNC");
- xvHWPlanar = MAKE_ATOM("XV_HWPLANAR");
xvBrightness = MAKE_ATOM("XV_BRIGHTNESS");
xvContrast = MAKE_ATOM("XV_CONTRAST");
xvSaturation = MAKE_ATOM("XV_SATURATION");
@@ -899,7 +881,6 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen)
pPriv->doubleBuffer = 0;
pPriv->bicubic_state = BICUBIC_AUTO;
pPriv->vsync = TRUE;
- pPriv->planar_state = 1;
pPriv->brightness = 0;
pPriv->contrast = 0;
pPriv->saturation = 0;
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index db943e3..e449204 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -1050,7 +1050,8 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
if (RADEONTilingEnabled(pScrn, pPixmap))
colorpitch |= R300_COLORTILE;
- if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
+
+ if (!pPriv->bicubic_enabled && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
isplanar = TRUE;
}
@@ -2487,7 +2488,7 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
if (RADEONTilingEnabled(pScrn, pPixmap))
colorpitch |= R300_COLORTILE;
- if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
+ if (!pPriv->bicubic_enabled && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
isplanar = TRUE;
}
diff --git a/src/radeon_video.h b/src/radeon_video.h
index 0f8342a..3f8f5e0 100644
--- a/src/radeon_video.h
+++ b/src/radeon_video.h
@@ -90,8 +90,6 @@ typedef struct {
void *video_memory;
int video_offset;
- Bool planar_hw;
- Bool planar_state;
int planeu_offset;
int planev_offset;
commit 9091b3f5f13dbea83ffd89679dac600e9f280bb2
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Wed Apr 15 20:07:42 2009 -0400
R3xx/R4xx: fix up planar shader
We were overwriting the coord fetch address with the first
tex fetch. Seemed to work however, luck I guess. Reorder
the fetches to write to temp0 last.
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index b74763f..db943e3 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -1685,7 +1685,7 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
/* tex inst */
OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
- R300_TEX_DST_ADDR(0) |
+ R300_TEX_DST_ADDR(2) |
R300_TEX_ID(0) |
R300_TEX_INST(R300_TEX_INST_LD)));
OUT_ACCEL_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) |
@@ -1693,16 +1693,16 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
R300_TEX_ID(1) |
R300_TEX_INST(R300_TEX_INST_LD)));
OUT_ACCEL_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) |
- R300_TEX_DST_ADDR(2) |
+ R300_TEX_DST_ADDR(0) |
R300_TEX_ID(2) |
R300_TEX_INST(R300_TEX_INST_LD)));
/* ALU inst */
- /* MAD temp0.rgb, const0.aaa, temp0.rgb, const0.rgb */
+ /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */
OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) |
- R300_ALU_RGB_ADDR1(0) |
+ R300_ALU_RGB_ADDR1(2) |
R300_ALU_RGB_ADDR2(0) |
- R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_ADDRD(2) |
R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) |
R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
@@ -1714,20 +1714,20 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
/* alpha nop, but need to set up alpha source for rgb usage */
OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) |
- R300_ALU_ALPHA_ADDR1(0) |
+ R300_ALU_ALPHA_ADDR1(2) |
R300_ALU_ALPHA_ADDR2(0) |
- R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_ADDRD(2) |
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- /* MAD temp0.rgb, const1.rgb, temp1.rgb, temp0.rgb */
+ /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */
OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) |
R300_ALU_RGB_ADDR1(1) |
- R300_ALU_RGB_ADDR2(0) |
- R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_ADDR2(2) |
+ R300_ALU_RGB_ADDRD(2) |
R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
@@ -1738,17 +1738,17 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
/* alpha nop */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) |
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(2) |
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- /* MAD result.rgb, const2.rgb, temp2.rgb, temp0.rgb */
+ /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */
OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) |
- R300_ALU_RGB_ADDR1(2) |
- R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDR1(0) |
+ R300_ALU_RGB_ADDR2(2) |
R300_ALU_RGB_ADDRD(0) |
R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) |
(needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))));
commit 14c13faeb9f9b7717a25fcc1ca97d46cc6ee0031
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Wed Apr 15 19:53:12 2009 -0400
R5xx: add shader-based csc
- native planar support
- Xv attributes
diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c
index 3626e8d..532b600 100644
--- a/src/radeon_textured_video.c
+++ b/src/radeon_textured_video.c
@@ -368,7 +368,7 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
}
pPriv->planar_hw = pPriv->planar_state;
- if (pPriv->bicubic_enabled || (IS_R600_3D || IS_R500_3D))
+ if (pPriv->bicubic_enabled || IS_R600_3D)
pPriv->planar_hw = 0;
if (info->ChipFamily < CHIP_FAMILY_R300)
@@ -697,12 +697,18 @@ static XF86AttributeRec Attributes_r300[NUM_ATTRIBUTES_R300+1] =
{0, 0, 0, NULL}
};
-#define NUM_ATTRIBUTES_R500 2
+#define NUM_ATTRIBUTES_R500 8
static XF86AttributeRec Attributes_r500[NUM_ATTRIBUTES_R500+1] =
{
{XvSettable | XvGettable, 0, 2, "XV_BICUBIC"},
{XvSettable | XvGettable, 0, 1, "XV_VSYNC"},
+ {XvSettable | XvGettable, 0, 1, "XV_HWPLANAR"},
+ {XvSettable | XvGettable, -1000, 1000, "XV_BRIGHTNESS"},
+ {XvSettable | XvGettable, -1000, 1000, "XV_CONTRAST"},
+ {XvSettable | XvGettable, -1000, 1000, "XV_SATURATION"},
+ {XvSettable | XvGettable, -1000, 1000, "XV_HUE"},
+ {XvSettable | XvGettable, 0, 1, "XV_COLORSPACE"},
{0, 0, 0, NULL}
};
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index 41ed7fa..b74763f 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -2492,7 +2492,7 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
}
if (isplanar) {
- txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0;
+ txformat1 = R300_TX_FORMAT_X8;
txpitch = pPriv->src_pitch;
} else {
if (pPriv->id == FOURCC_UYVY)
@@ -2500,7 +2500,8 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
else
txformat1 = R300_TX_FORMAT_VYUY422;
- txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
+ if (pPriv->bicubic_enabled)
+ txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
/* pitch is in pixels */
txpitch = pPriv->src_pitch / 2;
@@ -2555,13 +2556,13 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT));
OUT_ACCEL_REG(R300_TX_FILTER1_1, 0);
OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0);
- OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2);
+ OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8);
OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch);
OUT_ACCEL_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset);
OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT));
OUT_ACCEL_REG(R300_TX_FILTER1_2, 0);
OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0);
- OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3);
+ OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8);
OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch);
OUT_ACCEL_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset);
FINISH_ACCEL();
@@ -3162,8 +3163,76 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
FINISH_ACCEL();
- } else {
- BEGIN_ACCEL(19);
+ } else if (isplanar) {
+ /*
+ * y' = y - .0625
+ * u' = u - .5
+ * v' = v - .5;
+ *
+ * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v'
+ * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
+ * b = 1.1643 * y' + 2.017 * u' + 0.0 * v'
+ *
+ * DP3 might look like the straightforward solution
+ * but we'd need to move the texture yuv values in
+ * the same reg for this to work. Therefore use MADs.
+ * Brightness just adds to the off constant.
+ * Contrast is multiplication of luminance.
+ * Saturation and hue change the u and v coeffs.
+ * Default values (before adjustments - depend on colorspace):
+ * yco = 1.1643
+ * uco = 0, -0.39173, 2.017
+ * vco = 1.5958, -0.8129, 0
+ * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
+ * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
+ * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
+ *
+ * temp = MAD(yco, yuv.yyyy, off)
+ * temp = MAD(uco, yuv.uuuu, temp)
+ * result = MAD(vco, yuv.vvvv, temp)
+ */
+ /* TODO: don't recalc consts always */
+ const float Loff = -0.0627;
+ const float Coff = -0.502;
+ float uvcosf, uvsinf;
+ float yco;
+ float uco[3], vco[3], off[3];
+ float bright, cont, gamma;
+ int ref = pPriv->transform_index;
+ Bool needgamma = FALSE;
+
+ cont = RTFContrast(pPriv->contrast);
+ bright = RTFBrightness(pPriv->brightness);
+ gamma = (float)pPriv->gamma / 1000.0;
+ uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue));
+ uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue));
+ /* overlay video also does pre-gamma contrast/sat adjust, should we? */
+
+ yco = trans[ref].RefLuma * cont;
+ uco[0] = -trans[ref].RefRCr * uvsinf;
+ uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
+ uco[2] = trans[ref].RefBCb * uvcosf;
+ vco[0] = trans[ref].RefRCr * uvcosf;
+ vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
+ vco[2] = trans[ref].RefBCb * uvsinf;
+ off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright;
+ off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright;
+ off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright;
+
+ //XXX gamma
+
+ if (gamma != 1.0) {
+ needgamma = TRUE;
+ /* note: gamma correction is out = in ^ gamma;
+ gpu can only do LG2/EX2 therefore we transform into
+ in ^ gamma = 2 ^ (log2(in) * gamma).
+ Lots of scalar ops, unfortunately (better solution?) -
+ without gamma that's 3 inst, with gamma it's 10...
+ could use different gamma factors per channel,
+ if that's of any use. */
+ }
+
+ BEGIN_ACCEL(56);
/* 2 components: 2 for tex0 */
OUT_ACCEL_REG(R300_RS_COUNT,
((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
@@ -3173,13 +3242,13 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
/* Pixel stack frame size. */
- OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */
+ OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */
/* FP length. */
OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
- R500_US_CODE_END_ADDR(1)));
+ R500_US_CODE_END_ADDR(5)));
OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
- R500_US_CODE_RANGE_SIZE(1)));
+ R500_US_CODE_RANGE_SIZE(5)));
/* Prepare for FP emission. */
OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
@@ -3196,6 +3265,72 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
R500_INST_ALPHA_CLAMP));
OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
R500_TEX_INST_LD |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
+ R500_TEX_SRC_S_SWIZ_R |
+ R500_TEX_SRC_T_SWIZ_G |
+ R500_TEX_DST_ADDR(2) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
+ R500_DX_S_SWIZ_R |
+ R500_DX_T_SWIZ_R |
+ R500_DX_R_SWIZ_R |
+ R500_DX_Q_SWIZ_R |
+ R500_DY_ADDR(0) |
+ R500_DY_S_SWIZ_R |
+ R500_DY_T_SWIZ_R |
+ R500_DY_R_SWIZ_R |
+ R500_DY_Q_SWIZ_R));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* tex inst */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK |
+ R500_INST_RGB_CLAMP |
+ R500_INST_ALPHA_CLAMP));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
+ R500_TEX_INST_LD |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
+ R500_TEX_SRC_S_SWIZ_R |
+ R500_TEX_SRC_T_SWIZ_G |
+ R500_TEX_DST_ADDR(1) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
+ R500_DX_S_SWIZ_R |
+ R500_DX_T_SWIZ_R |
+ R500_DX_R_SWIZ_R |
+ R500_DX_Q_SWIZ_R |
+ R500_DY_ADDR(0) |
+ R500_DY_S_SWIZ_R |
+ R500_DY_T_SWIZ_R |
+ R500_DY_R_SWIZ_R |
+ R500_DY_Q_SWIZ_R));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* tex inst */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK |
+ R500_INST_RGB_CLAMP |
+ R500_INST_ALPHA_CLAMP));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(2) |
+ R500_TEX_INST_LD |
R500_TEX_SEM_ACQUIRE |
R500_TEX_IGNORE_UNCOVERED));
OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
@@ -3220,6 +3355,81 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
/* ALU inst */
+ /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
+ R500_RGB_ADDR0_CONST |
+ R500_RGB_ADDR1(2) |
+ R500_RGB_ADDR2(0) |
+ R500_RGB_ADDR2_CONST));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
+ R500_ALPHA_ADDR0_CONST |
+ R500_ALPHA_ADDR1(2) |
+ R500_ALPHA_ADDR2(0) |
+ R500_ALPHA_ADDR2_CONST));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_A |
+ R500_ALU_RGB_G_SWIZ_A_A |
+ R500_ALU_RGB_B_SWIZ_A_A |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_B_SWIZ_B_G |
+ R500_ALU_RGB_G_SWIZ_B_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
+ R500_ALPHA_ADDRD(2) |
+ R500_ALPHA_SWIZ_A_0 |
+ R500_ALPHA_SWIZ_B_0));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_ADDRD(2) |
+ R500_ALU_RGBA_SEL_C_SRC0 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
+ R500_ALU_RGBA_A_SWIZ_0));
+
+ /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) |
+ R500_RGB_ADDR0_CONST |
+ R500_RGB_ADDR1(1) |
+ R500_RGB_ADDR2(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) |
+ R500_ALPHA_ADDR0_CONST |
+ R500_ALPHA_ADDR1(1) |
+ R500_ALPHA_ADDR2(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_R |
+ R500_ALU_RGB_G_SWIZ_A_G |
+ R500_ALU_RGB_B_SWIZ_A_B |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_B_SWIZ_B_G |
+ R500_ALU_RGB_G_SWIZ_B_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
+ R500_ALPHA_ADDRD(2) |
+ R500_ALPHA_SWIZ_A_0 |
+ R500_ALPHA_SWIZ_B_0));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_ADDRD(2) |
+ R500_ALU_RGBA_SEL_C_SRC2 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
+ R500_ALU_RGBA_A_SWIZ_0));
+
+ /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */
OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
R500_INST_TEX_SEM_WAIT |
R500_INST_LAST |
@@ -3229,32 +3439,314 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
R500_INST_ALPHA_OMASK |
R500_INST_RGB_CLAMP |
R500_INST_ALPHA_CLAMP));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) |
+ R500_RGB_ADDR0_CONST |
+ R500_RGB_ADDR1(0) |
+ R500_RGB_ADDR2(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(2) |
+ R500_ALPHA_ADDR0_CONST |
+ R500_ALPHA_ADDR1(0) |
+ R500_ALPHA_ADDR2(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_R |
+ R500_ALU_RGB_G_SWIZ_A_G |
+ R500_ALU_RGB_B_SWIZ_A_B |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_B_SWIZ_B_G |
+ R500_ALU_RGB_G_SWIZ_B_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
+ R500_ALPHA_ADDRD(0) |
+ R500_ALPHA_SWIZ_A_0 |
+ R500_ALPHA_SWIZ_B_0));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_ADDRD(0) |
+ R500_ALU_RGBA_SEL_C_SRC2 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
+ R500_ALU_RGBA_A_SWIZ_1));
+
+ /* Shader constants. */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0));
+
+ /* constant 0: off, yco */
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[0]);
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[1]);
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[2]);
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, yco);
+ /* constant 1: uco */
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[0]);
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[1]);
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[2]);
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, gamma);
+ /* constant 2: vco */
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[0]);
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[1]);
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[2]);
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0.0);
+
+ FINISH_ACCEL();
+
+ } else {
+ /*
+ * y' = y - .0625
+ * u' = u - .5
+ * v' = v - .5;
+ *
+ * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v'
+ * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
+ * b = 1.1643 * y' + 2.017 * u' + 0.0 * v'
+ *
+ * DP3 might look like the straightforward solution
+ * but we'd need to move the texture yuv values in
+ * the same reg for this to work. Therefore use MADs.
+ * Brightness just adds to the off constant.
+ * Contrast is multiplication of luminance.
+ * Saturation and hue change the u and v coeffs.
+ * Default values (before adjustments - depend on colorspace):
+ * yco = 1.1643
+ * uco = 0, -0.39173, 2.017
+ * vco = 1.5958, -0.8129, 0
+ * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
+ * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
+ * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
+ *
+ * temp = MAD(yco, yuv.yyyy, off)
+ * temp = MAD(uco, yuv.uuuu, temp)
+ * result = MAD(vco, yuv.vvvv, temp)
+ */
+ /* TODO: don't recalc consts always */
+ const float Loff = -0.0627;
+ const float Coff = -0.502;
+ float uvcosf, uvsinf;
+ float yco;
+ float uco[3], vco[3], off[3];
+ float bright, cont, gamma;
+ int ref = pPriv->transform_index;
+ Bool needgamma = FALSE;
+
+ cont = RTFContrast(pPriv->contrast);
+ bright = RTFBrightness(pPriv->brightness);
+ gamma = (float)pPriv->gamma / 1000.0;
+ uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue));
+ uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue));
+ /* overlay video also does pre-gamma contrast/sat adjust, should we? */
+
+ yco = trans[ref].RefLuma * cont;
+ uco[0] = -trans[ref].RefRCr * uvsinf;
+ uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
+ uco[2] = trans[ref].RefBCb * uvcosf;
+ vco[0] = trans[ref].RefRCr * uvcosf;
+ vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
+ vco[2] = trans[ref].RefBCb * uvsinf;
+ off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright;
+ off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright;
+ off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright;
+
+ //XXX gamma
+
+ if (gamma != 1.0) {
+ needgamma = TRUE;
+ /* note: gamma correction is out = in ^ gamma;
+ gpu can only do LG2/EX2 therefore we transform into
+ in ^ gamma = 2 ^ (log2(in) * gamma).
+ Lots of scalar ops, unfortunately (better solution?) -
+ without gamma that's 3 inst, with gamma it's 10...
+ could use different gamma factors per channel,
+ if that's of any use. */
+ }
+
+ BEGIN_ACCEL(44);
+ /* 2 components: 2 for tex0/1/2 */
+ OUT_ACCEL_REG(R300_RS_COUNT,
+ ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+ R300_RS_COUNT_HIRES_EN));
+
+ /* R300_INST_COUNT_RS - highest RS instruction used */
+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
+
+ /* Pixel stack frame size. */
+ OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
+
+ /* FP length. */
+ OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
+ R500_US_CODE_END_ADDR(3)));
+ OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
+ R500_US_CODE_RANGE_SIZE(3)));
+
+ /* Prepare for FP emission. */
+ OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
+
+ /* tex inst */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK |
+ R500_INST_RGB_CLAMP |
+ R500_INST_ALPHA_CLAMP));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
+ R500_TEX_INST_LD |
+ R500_TEX_SEM_ACQUIRE |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
+ R500_TEX_SRC_S_SWIZ_R |
+ R500_TEX_SRC_T_SWIZ_G |
+ R500_TEX_DST_ADDR(0) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
+ R500_DX_S_SWIZ_R |
+ R500_DX_T_SWIZ_R |
+ R500_DX_R_SWIZ_R |
+ R500_DX_Q_SWIZ_R |
+ R500_DY_ADDR(0) |
+ R500_DY_S_SWIZ_R |
+ R500_DY_T_SWIZ_R |
+ R500_DY_R_SWIZ_R |
+ R500_DY_Q_SWIZ_R));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* ALU inst */
+ /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
+ R500_RGB_ADDR0_CONST |
R500_RGB_ADDR1(0) |
- R500_RGB_ADDR1_CONST |
R500_RGB_ADDR2(0) |
R500_RGB_ADDR2_CONST));
OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
+ R500_ALPHA_ADDR0_CONST |
R500_ALPHA_ADDR1(0) |
- R500_ALPHA_ADDR1_CONST |
R500_ALPHA_ADDR2(0) |
R500_ALPHA_ADDR2_CONST));
OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_A |
+ R500_ALU_RGB_G_SWIZ_A_A |
+ R500_ALU_RGB_B_SWIZ_A_A |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_G |
+ R500_ALU_RGB_B_SWIZ_B_G |
+ R500_ALU_RGB_G_SWIZ_B_G));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
+ R500_ALPHA_ADDRD(1) |
+ R500_ALPHA_SWIZ_A_0 |
+ R500_ALPHA_SWIZ_B_0));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_ADDRD(1) |
+ R500_ALU_RGBA_SEL_C_SRC0 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
+ R500_ALU_RGBA_A_SWIZ_0));
+
+ /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) |
+ R500_RGB_ADDR0_CONST |
+ R500_RGB_ADDR1(0) |
+ R500_RGB_ADDR2(1)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) |
+ R500_ALPHA_ADDR0_CONST |
+ R500_ALPHA_ADDR1(0) |
+ R500_ALPHA_ADDR2(1)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
R500_ALU_RGB_R_SWIZ_A_R |
R500_ALU_RGB_G_SWIZ_A_G |
R500_ALU_RGB_B_SWIZ_A_B |
- R500_ALU_RGB_SEL_B_SRC0 |
- R500_ALU_RGB_R_SWIZ_B_1 |
- R500_ALU_RGB_B_SWIZ_B_1 |
- R500_ALU_RGB_G_SWIZ_B_1));
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_B |
+ R500_ALU_RGB_B_SWIZ_B_B |
+ R500_ALU_RGB_G_SWIZ_B_B));
OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
- R500_ALPHA_SWIZ_A_A |
- R500_ALPHA_SWIZ_B_1));
+ R500_ALPHA_ADDRD(1) |
+ R500_ALPHA_SWIZ_A_0 |
+ R500_ALPHA_SWIZ_B_0));
OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_R_SWIZ_0 |
- R500_ALU_RGBA_G_SWIZ_0 |
- R500_ALU_RGBA_B_SWIZ_0 |
+ R500_ALU_RGBA_ADDRD(1) |
+ R500_ALU_RGBA_SEL_C_SRC2 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
R500_ALU_RGBA_A_SWIZ_0));
+
+ /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_LAST |
+ R500_INST_RGB_OMASK_R |
+ R500_INST_RGB_OMASK_G |
+ R500_INST_RGB_OMASK_B |
+ R500_INST_ALPHA_OMASK |
+ R500_INST_RGB_CLAMP |
+ R500_INST_ALPHA_CLAMP));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) |
+ R500_RGB_ADDR0_CONST |
+ R500_RGB_ADDR1(0) |
+ R500_RGB_ADDR2(1)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) |
+ R500_ALPHA_ADDR0_CONST |
+ R500_ALPHA_ADDR1(0) |
+ R500_ALPHA_ADDR2(1)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_R |
+ R500_ALU_RGB_G_SWIZ_A_G |
+ R500_ALU_RGB_B_SWIZ_A_B |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_B_SWIZ_B_R |
+ R500_ALU_RGB_G_SWIZ_B_R));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
+ R500_ALPHA_ADDRD(1) |
+ R500_ALPHA_SWIZ_A_0 |
+ R500_ALPHA_SWIZ_B_0));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_ADDRD(1) |
+ R500_ALU_RGBA_SEL_C_SRC2 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
+ R500_ALU_RGBA_A_SWIZ_1));
+
+ /* Shader constants. */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0));
+
+ /* constant 0: off, yco */
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[0]);
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[1]);
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[2]);
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, yco);
+ /* constant 1: uco */
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[0]);
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[1]);
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[2]);
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, gamma);
+ /* constant 2: vco */
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[0]);
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[1]);
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[2]);
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0.0);
+
FINISH_ACCEL();
}
commit 832efc7b90f5eb2da99512fcb902ab4838d2dcd1
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Tue Apr 14 17:56:49 2009 -0400
R3xx/R4xx: Implement shader-based csc for packed formats
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index 4ce34e3..41ed7fa 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -1063,7 +1063,8 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
else
txformat1 = R300_TX_FORMAT_VYUY422;
- txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
+ if (pPriv->bicubic_enabled)
+ txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
/* pitch is in pixels */
txpitch = pPriv->src_pitch / 2;
@@ -1697,7 +1698,7 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
R300_TEX_INST(R300_TEX_INST_LD)));
/* ALU inst */
- /* MAD temp0, const0.a, temp0, const0.rgb */
+ /* MAD temp0.rgb, const0.aaa, temp0.rgb, const0.rgb */
OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) |
R300_ALU_RGB_ADDR1(0) |
R300_ALU_RGB_ADDR2(0) |
@@ -1722,7 +1723,7 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- /* MAD const1, temp1, temp0 */
+ /* MAD temp0.rgb, const1.rgb, temp1.rgb, temp0.rgb */
OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) |
R300_ALU_RGB_ADDR1(1) |
R300_ALU_RGB_ADDR2(0) |
@@ -1744,7 +1745,7 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- /* MAD result, const2, temp2, temp0 */
+ /* MAD result.rgb, const2.rgb, temp2.rgb, temp0.rgb */
OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) |
R300_ALU_RGB_ADDR1(2) |
R300_ALU_RGB_ADDR2(0) |
@@ -1901,27 +1902,93 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
FINISH_ACCEL();
} else {
- BEGIN_ACCEL(11);
- /* 2 components: 2 for tex0 */
+ /*
+ * y' = y - .0625
+ * u' = u - .5
+ * v' = v - .5;
+ *
+ * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v'
+ * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
+ * b = 1.1643 * y' + 2.017 * u' + 0.0 * v'
+ *
+ * DP3 might look like the straightforward solution
+ * but we'd need to move the texture yuv values in
+ * the same reg for this to work. Therefore use MADs.
+ * Brightness just adds to the off constant.
+ * Contrast is multiplication of luminance.
+ * Saturation and hue change the u and v coeffs.
+ * Default values (before adjustments - depend on colorspace):
+ * yco = 1.1643
+ * uco = 0, -0.39173, 2.017
+ * vco = 1.5958, -0.8129, 0
+ * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
+ * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
+ * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
+ *
+ * temp = MAD(yco, yuv.yyyy, off)
+ * temp = MAD(uco, yuv.uuuu, temp)
+ * result = MAD(vco, yuv.vvvv, temp)
+ */
+ /* TODO: don't recalc consts always */
+ const float Loff = -0.0627;
+ const float Coff = -0.502;
+ float uvcosf, uvsinf;
+ float yco;
+ float uco[3], vco[3], off[3];
+ float bright, cont, gamma;
+ int ref = pPriv->transform_index;
+ Bool needgamma = FALSE;
+
+ cont = RTFContrast(pPriv->contrast);
+ bright = RTFBrightness(pPriv->brightness);
+ gamma = (float)pPriv->gamma / 1000.0;
+ uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue));
+ uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue));
+ /* overlay video also does pre-gamma contrast/sat adjust, should we? */
+
+ yco = trans[ref].RefLuma * cont;
+ uco[0] = -trans[ref].RefRCr * uvsinf;
+ uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
+ uco[2] = trans[ref].RefBCb * uvcosf;
+ vco[0] = trans[ref].RefRCr * uvcosf;
+ vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
+ vco[2] = trans[ref].RefBCb * uvsinf;
+ off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright;
+ off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright;
+ off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright;
+
+ if (gamma != 1.0) {
+ needgamma = TRUE;
+ /* note: gamma correction is out = in ^ gamma;
+ gpu can only do LG2/EX2 therefore we transform into
+ in ^ gamma = 2 ^ (log2(in) * gamma).
+ Lots of scalar ops, unfortunately (better solution?) -
+ without gamma that's 3 inst, with gamma it's 10...
+ could use different gamma factors per channel,
+ if that's of any use. */
+ }
+
+ BEGIN_ACCEL(needgamma ? 28 + 33 : 33);
+ /* 2 components */
OUT_ACCEL_REG(R300_RS_COUNT,
((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
R300_RS_COUNT_HIRES_EN));
/* R300_INST_COUNT_RS - highest RS instruction used */
OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
- OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */
+ OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
/* Indirection levels */
OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
R300_FIRST_TEX));
OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
- R300_ALU_CODE_SIZE(1) |
+ R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) |
R300_TEX_CODE_OFFSET(0) |
R300_TEX_CODE_SIZE(1)));
OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
- R300_ALU_SIZE(0) |
+ R300_ALU_SIZE(needgamma ? 7 + 2 : 2) |
R300_TEX_START(0) |
R300_TEX_SIZE(0) |
R300_RGBA_OUT));
@@ -1933,41 +2000,207 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
R300_TEX_INST(R300_TEX_INST_LD)));
/* ALU inst */
- /* RGB */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR_0, (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR1(0) |
- R300_ALU_RGB_ADDR2(0) |
- R300_ALU_RGB_ADDRD(0) |
- R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R |
- R300_ALU_RGB_MASK_G |
- R300_ALU_RGB_MASK_B)) |
- R300_ALU_RGB_TARGET_A));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST_0, (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
- R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
- R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
- R300_ALU_RGB_CLAMP));
- /* Alpha */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR_0, (R300_ALU_ALPHA_ADDR0(0) |
- R300_ALU_ALPHA_ADDR1(0) |
- R300_ALU_ALPHA_ADDR2(0) |
- R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
- R300_ALU_ALPHA_TARGET_A |
- R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST_0, (R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) |
- R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) |
- R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) |
- R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) |
- R300_ALU_ALPHA_CLAMP));
+ /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) |
+ R300_ALU_RGB_ADDR1(0) |
+ R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDRD(1) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_GGG) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
+ /* alpha nop, but need to set up alpha source for rgb usage */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) |
+ R300_ALU_ALPHA_ADDR1(0) |
+ R300_ALU_ALPHA_ADDR2(0) |
+ R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) |
+ R300_ALU_RGB_ADDR1(0) |
+ R300_ALU_RGB_ADDR2(1) |
+ R300_ALU_RGB_ADDRD(1) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_BBB) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
+ /* alpha nop */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) |
+ R300_ALU_RGB_ADDR1(0) |
+ R300_ALU_RGB_ADDR2(1) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) |
+ (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RRR) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
+ R300_ALU_RGB_CLAMP));
+ /* write alpha 1 */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
+ R300_ALU_ALPHA_TARGET_A));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0)));
+
+ if (needgamma) {
+ /* rgb temp0.r = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha lg2 temp0, temp0.r */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb temp0.g = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha lg2 temp0, temp0.g */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb temp0.b = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha lg2 temp0, temp0.b */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* MUL const1, temp1, temp0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR1(0) |
+ R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
+ /* alpha nop, but set up const1 */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb out0.r = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) |
+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha ex2 temp0, temp0.r */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb out0.g = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) |
+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha ex2 temp0, temp0.g */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb out0.b = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) |
+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha ex2 temp0, temp0.b */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ }
+
+ /* Shader constants. */
+ /* constant 0: off, yco */
+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), F_TO_24(off[1]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), F_TO_24(off[2]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), F_TO_24(yco));
+ /* constant 1: uco */
+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(gamma));
+ /* constant 2: vco */
+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(2), F_TO_24(vco[2]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0));
+
FINISH_ACCEL();
}
commit 32625118c27041265d25811c00d25ab7e82fb340
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Tue Apr 14 16:38:40 2009 -0400
tex vid: fix attribute setup typo for XV_COLORSPACE
diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c
index 542fc2a..3626e8d 100644
--- a/src/radeon_textured_video.c
+++ b/src/radeon_textured_video.c
@@ -677,7 +677,7 @@ static XF86AttributeRec Attributes_r200[NUM_ATTRIBUTES_R200+1] =
{XvSettable | XvGettable, -1000, 1000, "XV_CONTRAST"},
{XvSettable | XvGettable, -1000, 1000, "XV_SATURATION"},
{XvSettable | XvGettable, -1000, 1000, "XV_HUE"},
- {XvSettable | XvGettable, 100, 10000, "XV_COLORSPACE"},
+ {XvSettable | XvGettable, 0, 1, "XV_COLORSPACE"},
{0, 0, 0, NULL}
};
@@ -715,7 +715,7 @@ static XF86AttributeRec Attributes_r600[NUM_ATTRIBUTES_R600+1] =
{XvSettable | XvGettable, -1000, 1000, "XV_CONTRAST"},
{XvSettable | XvGettable, -1000, 1000, "XV_SATURATION"},
{XvSettable | XvGettable, -1000, 1000, "XV_HUE"},
- {XvSettable | XvGettable, 100, 10000, "XV_COLORSPACE"},
+ {XvSettable | XvGettable, 0, 1, "XV_COLORSPACE"},
{0, 0, 0, NULL}
};
commit adf0912006b4f1597784dbfcc563d5c6d1c5667d
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Tue Apr 14 16:16:33 2009 -0400
R6xx/R7xx: implement Xv attributes
- brightness, contrast, hue, etc.
- TODO: implement gamma
diff --git a/src/r600_shader.c b/src/r600_shader.c
index addba36..0a820cf 100644
--- a/src/r600_shader.c
+++ b/src/r600_shader.c
@@ -560,41 +560,12 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
return i;
}
-/*
- * ; xv ps planar
- * 00 TEX: ADDR(20) CNT(3) NO_BARRIER
- * 0 SAMPLE R1.x__1, R0.xy01, t0, s0
- * 1 SAMPLE R1.__x_, R0.xy01, t1, s1
- * 2 SAMPLE R1._x__, R0.xy01, t2, s2
- * 01 TEX: ADDR(28) CNT(2) NO_BARRIER
- * 0 SAMPLE R1.x__1, R0.xy01, t0, s0
- * 1 SAMPLE R1._xy_, R0.xy01, t1, s1
- * 02 ALU: ADDR(4) CNT(16)
- * 3 x: MULADD R1.x, R1.x, C3.x, C3.y CLAMP
- * y: MULADD R1.y, R1.y, C3.z, C3.w
- * z: MULADD R1.z, R1.z, C3.z, C3.w
- * w: MOV R1.w, 0.0f
- * 4 x: DOT4 R2.x, R1.x, C0.x CLAMP VEC_102
- * y: DOT4 ____, R1.y, C0.y CLAMP VEC_102
- * z: DOT4 ____, R1.z, C0.z CLAMP VEC_102
- * w: DOT4 ____, R1.w, C0.w CLAMP VEC_021
- * 5 x: DOT4 ____, R1.x, C1.x CLAMP VEC_102
- * y: DOT4 R2.y, R1.y, C1.y CLAMP VEC_102
- * z: DOT4 ____, R1.z, C1.z CLAMP VEC_102
- * w: DOT4 ____, R1.w, C1.w CLAMP VEC_021
- * 6 x: DOT4 ____, R1.x, C2.x CLAMP VEC_102
- * y: DOT4 ____, R1.y, C2.y CLAMP VEC_102
- * z: DOT4 R2.z, R1.z, C2.z CLAMP VEC_102
- * w: DOT4 ____, R1.w, C2.w CLAMP VEC_021
- * 03 EXP_DONE: PIX0, R2
- * END_OF_PROGRAM
- */
int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
{
int i = 0;
/* 0 */
- shader[i++] = CF_DWORD0(ADDR(20));
+ shader[i++] = CF_DWORD0(ADDR(16));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_BOOL),
@@ -606,7 +577,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(0));
/* 1 */
- shader[i++] = CF_DWORD0(ADDR(28));
+ shader[i++] = CF_DWORD0(ADDR(24));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_NOT_BOOL),
@@ -625,7 +596,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
KCACHE_ADDR0(0),
KCACHE_ADDR1(0),
- I_COUNT(16),
+ I_COUNT(12),
USES_WATERFALL(0),
CF_INST(SQ_CF_INST_ALU),
WHOLE_QUAD_MODE(0),
@@ -648,73 +619,74 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_EXPORT_DONE),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 4 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ /* 4,5,6,7 */
+ /* r2.x = MAD(c0.w, r1.x, c0.x) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(256),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
+ SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(259),
+ SRC1_SEL(1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Y),
+ SRC2_ELEM(ELEM_X),
SRC2_NEG(0),
ALU_INST(SQ_OP3_INST_MULADD),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
- CLAMP(1));
- /* 5 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ CLAMP(0));
+ /* r2.y = MAD(c0.w, r1.x, c0.y) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(256),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
+ SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(259),
+ SRC1_SEL(1),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
+ SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_W),
+ SRC2_ELEM(ELEM_Y),
SRC2_NEG(0),
ALU_INST(SQ_OP3_INST_MULADD),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 6 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ /* r2.z = MAD(c0.w, r1.x, c0.z) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(256),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Z),
+ SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(259),
+ SRC1_SEL(1),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
+ SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_W),
+ SRC2_ELEM(ELEM_Z),
SRC2_NEG(0),
ALU_INST(SQ_OP3_INST_MULADD),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(0));
- /* 7 */
+ /* r2.w = MAD(0, 0, 1) */
shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
@@ -726,334 +698,198 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MOV),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(0));
- /* 8 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+
+ /* 8,9,10,11 */
+ /* r2.x = MAD(c1.x, r1.y, pv.x) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(257),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(256),
+ SRC1_SEL(1),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
- CLAMP(1));
- /* 9 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ CLAMP(0));
+ /* r2.y = MAD(c1.y, r1.y, pv.y) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(257),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(256),
+ SRC1_SEL(1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(0),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Y),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
- CLAMP(1));
- /* 10 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ CLAMP(0));
+ /* r2.z = MAD(c1.z, r1.y, pv.z) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(257),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(256),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Z),
- CLAMP(1));
- /* 11 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_W),
- SRC0_NEG(0),
- SRC1_SEL(256),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_021),
- DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_W),
- CLAMP(1));
- /* 12 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
- SRC0_NEG(0),
- SRC1_SEL(257),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_X),
- CLAMP(1));
- /* 13 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
- SRC0_NEG(0),
- SRC1_SEL(257),
+ SRC1_SEL(1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
DST_GPR(2),
DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Y),
- CLAMP(1));
- /* 14 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Z),
- SRC0_NEG(0),
- SRC1_SEL(257),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(0),
- DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
- CLAMP(1));
- /* 15 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ CLAMP(0));
+ /* r2.w = MAD(0, 0, 1) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_W),
+ SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(257),
+ SRC1_SEL(SQ_ALU_SRC_0),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
+ SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_021),
- DST_GPR(0),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_W),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
- CLAMP(1));
- /* 16 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ CLAMP(0));
+ /* 12,13,14,15 */
+ /* r2.x = MAD(c2.x, r1.z, pv.x) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(258),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(258),
+ SRC1_SEL(1),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(0),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(1));
- /* 17 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ /* r2.y = MAD(c2.y, r1.z, pv.y) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(258),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(258),
+ SRC1_SEL(1),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
+ SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(0),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Y),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(1));
- /* 18 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ /* r2.z = MAD(c2.z, r1.z, pv.z) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(258),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(258),
+ SRC1_SEL(1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(1));
- /* 19 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ /* r2.w = MAD(0, 0, 1) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_W),
+ SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(258),
+ SRC1_SEL(SQ_ALU_SRC_0),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
+ SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_021),
- DST_GPR(0),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(1));
- /* 20 */
- shader[i++] = CF_DWORD0(ADDR(22));
+
+ /* 16 */
+ shader[i++] = CF_DWORD0(ADDR(18));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
@@ -1064,7 +900,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_TEX),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 21 */
+ /* 17 */
shader[i++] = CF_DWORD0(ADDR(0));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
@@ -1076,7 +912,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_RETURN),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 22/23 */
+ /* 18/19 */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
BC_FRAC_MODE(0),
FETCH_WHOLE_QUAD(0),
@@ -1104,7 +940,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
SRC_SEL_Z(SQ_SEL_0),
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 24/25 */
+ /* 20/21 */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
BC_FRAC_MODE(0),
FETCH_WHOLE_QUAD(0),
@@ -1132,7 +968,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
SRC_SEL_Z(SQ_SEL_0),
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 26/27 */
+ /* 22/23 */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
BC_FRAC_MODE(0),
FETCH_WHOLE_QUAD(0),
@@ -1160,8 +996,8 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
SRC_SEL_Z(SQ_SEL_0),
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 28 */
- shader[i++] = CF_DWORD0(ADDR(30));
+ /* 24 */
+ shader[i++] = CF_DWORD0(ADDR(26));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
@@ -1172,7 +1008,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_TEX),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 29 */
+ /* 25 */
shader[i++] = CF_DWORD0(ADDR(0));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
@@ -1184,7 +1020,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_RETURN),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 30/31 */
+ /* 26/27 */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
BC_FRAC_MODE(0),
FETCH_WHOLE_QUAD(0),
@@ -1212,7 +1048,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
SRC_SEL_Z(SQ_SEL_0),
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 32/33 */
+ /* 28/29 */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
BC_FRAC_MODE(0),
FETCH_WHOLE_QUAD(0),
diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c
index 88745d5..600262b 100644
--- a/src/r600_textured_videofuncs.c
+++ b/src/r600_textured_videofuncs.c
@@ -45,6 +45,15 @@
#include "damage.h"
+/* Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces
+ note the difference to the parameters used in overlay are due
+ to 10bit vs. float calcs */
+static REF_TRANSFORM trans[2] =
+{
+ {1.1643, 0.0, 1.5960, -0.3918, -0.8129, 2.0172, 0.0}, /* BT.601 */
+ {1.1643, 0.0, 1.7927, -0.2132, -0.5329, 2.1124, 0.0} /* BT.709 */
+};
+
static void
R600DoneTexturedVideo(ScrnInfoPtr pScrn)
{
@@ -115,18 +124,91 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
tex_sampler_t tex_samp;
shader_config_t vs_conf, ps_conf;
int uv_offset;
- static float ps_alu_consts[] = {
- 1.0, 0.0, 1.4020, 0, /* r - c[0] */
- 1.0, -0.34414, -0.71414, 0, /* g - c[1] */
- 1.0, 1.7720, 0.0, 0, /* b - c[2] */
- /* Constants for undoing Y'CbCr scaling
- * - Y' is scaled from 16:235
- * - Cb/Cr are scaled from 16:240
- * Unscaled value N' = N * N_mul + N_shift (N' in range [-0.5, 0.5])
- * Vector is [Y_mul, Y_shfit, C_mul, C_shift]
- */
- 256.0/219.0, -16.0/219.0, 256.0/224.0, -128.0/224.0,
- };
+ /*
+ * y' = y - .0625
+ * u' = u - .5
+ * v' = v - .5;
+ *
+ * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v'
+ * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
+ * b = 1.1643 * y' + 2.017 * u' + 0.0 * v'
+ *
+ * DP3 might look like the straightforward solution
+ * but we'd need to move the texture yuv values in
+ * the same reg for this to work. Therefore use MADs.
+ * Brightness just adds to the off constant.
+ * Contrast is multiplication of luminance.
+ * Saturation and hue change the u and v coeffs.
+ * Default values (before adjustments - depend on colorspace):
+ * yco = 1.1643
+ * uco = 0, -0.39173, 2.017
+ * vco = 1.5958, -0.8129, 0
+ * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
+ * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
+ * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
+ *
+ * temp = MAD(yco, yuv.yyyy, off)
+ * temp = MAD(uco, yuv.uuuu, temp)
+ * result = MAD(vco, yuv.vvvv, temp)
+ */
+ /* TODO: calc consts in the shader */
+ const float Loff = -0.0627;
+ const float Coff = -0.502;
+ float uvcosf, uvsinf;
+ float yco;
+ float uco[3], vco[3], off[3];
+ float bright, cont, gamma;
+ int ref = pPriv->transform_index;
+ Bool needgamma = FALSE;
+ float ps_alu_consts[12];
+
+ cont = RTFContrast(pPriv->contrast);
+ bright = RTFBrightness(pPriv->brightness);
+ gamma = (float)pPriv->gamma / 1000.0;
+ uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue));
+ uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue));
+ /* overlay video also does pre-gamma contrast/sat adjust, should we? */
+
+ yco = trans[ref].RefLuma * cont;
+ uco[0] = -trans[ref].RefRCr * uvsinf;
+ uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
+ uco[2] = trans[ref].RefBCb * uvcosf;
+ vco[0] = trans[ref].RefRCr * uvcosf;
+ vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
+ vco[2] = trans[ref].RefBCb * uvsinf;
+ off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright;
+ off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright;
+ off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright;
+
+ // XXX
+ gamma = 1.0;
+
+ if (gamma != 1.0) {
+ needgamma = TRUE;
+ /* note: gamma correction is out = in ^ gamma;
+ gpu can only do LG2/EX2 therefore we transform into
+ in ^ gamma = 2 ^ (log2(in) * gamma).
+ Lots of scalar ops, unfortunately (better solution?) -
+ without gamma that's 3 inst, with gamma it's 10...
+ could use different gamma factors per channel,
+ if that's of any use. */
+ }
+
+ /* setup the ps consts */
+ ps_alu_consts[0] = off[0];
+ ps_alu_consts[1] = off[1];
+ ps_alu_consts[2] = off[2];
+ ps_alu_consts[3] = yco;
+
+ ps_alu_consts[4] = uco[0];
+ ps_alu_consts[5] = uco[1];
+ ps_alu_consts[6] = uco[2];
+ ps_alu_consts[7] = gamma;
+
+ ps_alu_consts[8] = vco[0];
+ ps_alu_consts[9] = vco[1];
+ ps_alu_consts[10] = vco[2];
+ ps_alu_consts[11] = 0.0;
CLEAR (cb_conf);
CLEAR (tex_res);
diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c
index 82675d9..542fc2a 100644
--- a/src/radeon_textured_video.c
+++ b/src/radeon_textured_video.c
@@ -137,12 +137,6 @@ static REF_TRANSFORM trans[2] =
{1.1643, 0.0, 1.7927, -0.2132, -0.5329, 2.1124, 0.0} /* BT.709 */
};
-
-#define RTFSaturation(a) (1.0 + ((a)*1.0)/1000.0)
-#define RTFBrightness(a) (((a)*1.0)/2000.0)
-#define RTFContrast(a) (1.0 + ((a)*1.0)/1000.0)
-#define RTFHue(a) (((a)*3.1416)/1000.0)
-
#define ACCEL_MMIO
#define ACCEL_PREAMBLE() unsigned char *RADEONMMIO = info->MMIO
#define BEGIN_ACCEL(n) RADEONWaitForFifo(pScrn, (n))
@@ -712,11 +706,16 @@ static XF86AttributeRec Attributes_r500[NUM_ATTRIBUTES_R500+1] =
{0, 0, 0, NULL}
};
-#define NUM_ATTRIBUTES_R600 1
+#define NUM_ATTRIBUTES_R600 6
static XF86AttributeRec Attributes_r600[NUM_ATTRIBUTES_R600+1] =
{
{XvSettable | XvGettable, 0, 1, "XV_VSYNC"},
+ {XvSettable | XvGettable, -1000, 1000, "XV_BRIGHTNESS"},
+ {XvSettable | XvGettable, -1000, 1000, "XV_CONTRAST"},
+ {XvSettable | XvGettable, -1000, 1000, "XV_SATURATION"},
+ {XvSettable | XvGettable, -1000, 1000, "XV_HUE"},
+ {XvSettable | XvGettable, 100, 10000, "XV_COLORSPACE"},
{0, 0, 0, NULL}
};
diff --git a/src/radeon_video.c b/src/radeon_video.c
index 8479160..a14f44c 100644
--- a/src/radeon_video.c
+++ b/src/radeon_video.c
@@ -1704,12 +1704,6 @@ RADEONSetPortAttribute(ScrnInfoPtr pScrn,
RADEON_SYNC(info, pScrn);
-#define RTFSaturation(a) (1.0 + ((a)*1.0)/1000.0)
-#define RTFBrightness(a) (((a)*1.0)/2000.0)
-#define RTFIntensity(a) (((a)*1.0)/2000.0)
-#define RTFContrast(a) (1.0 + ((a)*1.0)/1000.0)
-#define RTFHue(a) (((a)*3.1416)/1000.0)
-
if(attribute == xvAutopaintColorkey)
{
pPriv->autopaint_colorkey = ClipValue (value, 0, 1);
diff --git a/src/radeon_video.h b/src/radeon_video.h
index be33871..0f8342a 100644
--- a/src/radeon_video.h
+++ b/src/radeon_video.h
@@ -135,6 +135,12 @@ typedef struct tagREF_TRANSFORM
float RefBCr;
} REF_TRANSFORM;
+#define RTFSaturation(a) (1.0 + ((a)*1.0)/1000.0)
+#define RTFBrightness(a) (((a)*1.0)/2000.0)
+#define RTFIntensity(a) (((a)*1.0)/2000.0)
+#define RTFContrast(a) (1.0 + ((a)*1.0)/1000.0)
+#define RTFHue(a) (((a)*3.1416)/1000.0)
+
xf86CrtcPtr
radeon_xv_pick_best_crtc(ScrnInfoPtr pScrn,
int x1, int x2, int y1, int y2);
commit 8810fe92b5aed08888584c6914482586b59f71ab
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Tue Apr 14 11:26:35 2009 -0400
R200: clean code, always use shader based csc
- consolidate common r2xx csc shader code
- always use shader based csc for both packed and planar
formats
- always use native planar csc on r1xx
diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c
index f657536..82675d9 100644
--- a/src/radeon_textured_video.c
+++ b/src/radeon_textured_video.c
@@ -362,7 +362,7 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
/* Bicubic filter setup */
pPriv->bicubic_enabled = (pPriv->bicubic_state != BICUBIC_OFF);
- if (!(IS_R300_3D || IS_R500_3D || IS_R600_3D))
+ if (!(IS_R300_3D || IS_R500_3D))
pPriv->bicubic_enabled = FALSE;
if (pPriv->bicubic_enabled && (pPriv->bicubic_state == BICUBIC_AUTO)) {
/*
@@ -377,6 +377,9 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
if (pPriv->bicubic_enabled || (IS_R600_3D || IS_R500_3D))
pPriv->planar_hw = 0;
+ if (info->ChipFamily < CHIP_FAMILY_R300)
+ pPriv->planar_hw = 1;
+
switch(id) {
case FOURCC_YV12:
case FOURCC_I420:
@@ -663,21 +666,19 @@ static XF86VideoFormatRec Formats[NUM_FORMATS] =
{15, TrueColor}, {16, TrueColor}, {24, TrueColor}
};
-#define NUM_ATTRIBUTES 2
+#define NUM_ATTRIBUTES 1
static XF86AttributeRec Attributes[NUM_ATTRIBUTES+1] =
{
{XvSettable | XvGettable, 0, 1, "XV_VSYNC"},
- {XvSettable | XvGettable, 0, 1, "XV_HWPLANAR"},
{0, 0, 0, NULL}
};
-#define NUM_ATTRIBUTES_R200 7
+#define NUM_ATTRIBUTES_R200 6
static XF86AttributeRec Attributes_r200[NUM_ATTRIBUTES_R200+1] =
{
{XvSettable | XvGettable, 0, 1, "XV_VSYNC"},
- {XvSettable | XvGettable, 0, 1, "XV_HWPLANAR"},
{XvSettable | XvGettable, -1000, 1000, "XV_BRIGHTNESS"},
{XvSettable | XvGettable, -1000, 1000, "XV_CONTRAST"},
{XvSettable | XvGettable, -1000, 1000, "XV_SATURATION"},
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index 76c8456..4ce34e3 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -109,11 +109,11 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
dst_pitch = exaGetPixmapPitch(pPixmap);
} else
#endif
- {
- dst_offset = (pPixmap->devPrivate.ptr - info->FB) +
- info->fbLocation + pScrn->fbOffset;
- dst_pitch = pPixmap->devKind;
- }
+ {
+ dst_offset = (pPixmap->devPrivate.ptr - info->FB) +
+ info->fbLocation + pScrn->fbOffset;
+ dst_pitch = pPixmap->devKind;
+ }
#ifdef COMPOSITE
dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
@@ -158,11 +158,8 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
return;
}
- if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
+ if (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12) {
isplanar = TRUE;
- }
-
- if (isplanar) {
txformat = RADEON_TXFORMAT_Y8;
} else {
if (pPriv->id == FOURCC_UYVY)
@@ -444,13 +441,24 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
RADEONInfoPtr info = RADEONPTR(pScrn);
PixmapPtr pPixmap = pPriv->pPixmap;
uint32_t txformat;
- uint32_t txfilter, txformat0, txpitch;
+ uint32_t txfilter, txsize, txpitch;
uint32_t dst_offset, dst_pitch, dst_format;
uint32_t colorpitch;
Bool isplanar = FALSE;
int dstxoff, dstyoff, pixel_shift, vtx_count;
BoxPtr pBox = REGION_RECTS(&pPriv->clip);
int nBox = REGION_NUM_RECTS(&pPriv->clip);
+
+ /* note: in contrast to r300, use input biasing on uv components */
+ const float Loff = -0.0627;
+ float uvcosf, uvsinf;
+ float yco, yoff;
+ float uco[3], vco[3];
+ float bright, cont, sat;
+ int ref = pPriv->transform_index;
+ float ucscale = 0.25, vcscale = 0.25;
+ Bool needux8 = FALSE, needvx8 = FALSE;
+
ACCEL_PREAMBLE();
pixel_shift = pPixmap->drawable.bitsPerPixel >> 4;
@@ -495,8 +503,6 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
RADEONInit3DEngine(pScrn);
}
- vtx_count = 4;
-
/* Same for R100/R200 */
switch (pPixmap->drawable.bitsPerPixel) {
case 16:
@@ -512,11 +518,8 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
return;
}
- if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
+ if (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12) {
isplanar = TRUE;
- }
-
- if (isplanar) {
txformat = RADEON_TXFORMAT_I8;
} else {
if (pPriv->id == FOURCC_UYVY)
@@ -546,61 +549,52 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
info->accel_state->texW[0] = pPriv->w;
info->accel_state->texH[0] = pPriv->h;
- if (isplanar) {
- /* note: in contrast to r300, use input biasing on uv components */
- const float Loff = -0.0627;
- float uvcosf, uvsinf;
- float yco, yoff;
- float uco[3], vco[3];
- float bright, cont, sat;
- int ref = pPriv->transform_index;
- float ucscale = 0.25, vcscale = 0.25;
- Bool needux8 = FALSE, needvx8 = FALSE;
-
- /* contrast can cause constant overflow, clamp */
- cont = RTFContrast(pPriv->contrast);
- if (cont * trans[ref].RefLuma > 2.0)
- cont = 2.0 / trans[ref].RefLuma;
- /* brightness is only from -0.5 to 0.5 should be safe */
- bright = RTFBrightness(pPriv->brightness);
- /* saturation can also cause overflow, clamp */
- sat = RTFSaturation(pPriv->saturation);
- if (sat * trans[ref].RefBCb > 4.0)
- sat = 4.0 / trans[ref].RefBCb;
- uvcosf = sat * cos(RTFHue(pPriv->hue));
- uvsinf = sat * sin(RTFHue(pPriv->hue));
-
- yco = trans[ref].RefLuma * cont;
- uco[0] = -trans[ref].RefRCr * uvsinf;
- uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
- uco[2] = trans[ref].RefBCb * uvcosf;
- vco[0] = trans[ref].RefRCr * uvcosf;
- vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
- vco[2] = trans[ref].RefBCb * uvsinf;
- yoff = Loff * yco + bright;
-
- if ((uco[0] > 2.0) || (uco[2] > 2.0)) {
- needux8 = TRUE;
- ucscale = 0.125;
- }
- if ((vco[0] > 2.0) || (vco[2] > 2.0)) {
- needvx8 = TRUE;
- vcscale = 0.125;
- }
+ txfilter = R200_MAG_FILTER_LINEAR |
+ R200_MIN_FILTER_LINEAR |
+ R200_CLAMP_S_CLAMP_LAST |
+ R200_CLAMP_T_CLAMP_LAST;
+
+ /* contrast can cause constant overflow, clamp */
+ cont = RTFContrast(pPriv->contrast);
+ if (cont * trans[ref].RefLuma > 2.0)
+ cont = 2.0 / trans[ref].RefLuma;
+ /* brightness is only from -0.5 to 0.5 should be safe */
+ bright = RTFBrightness(pPriv->brightness);
+ /* saturation can also cause overflow, clamp */
+ sat = RTFSaturation(pPriv->saturation);
+ if (sat * trans[ref].RefBCb > 4.0)
+ sat = 4.0 / trans[ref].RefBCb;
+ uvcosf = sat * cos(RTFHue(pPriv->hue));
+ uvsinf = sat * sin(RTFHue(pPriv->hue));
+
+ yco = trans[ref].RefLuma * cont;
+ uco[0] = -trans[ref].RefRCr * uvsinf;
+ uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
+ uco[2] = trans[ref].RefBCb * uvcosf;
+ vco[0] = trans[ref].RefRCr * uvcosf;
+ vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
+ vco[2] = trans[ref].RefBCb * uvsinf;
+ yoff = Loff * yco + bright;
+
+ if ((uco[0] > 2.0) || (uco[2] > 2.0)) {
+ needux8 = TRUE;
+ ucscale = 0.125;
+ }
+ if ((vco[0] > 2.0) || (vco[2] > 2.0)) {
+ needvx8 = TRUE;
+ vcscale = 0.125;
+ }
+ if (isplanar) {
/* need 2 texcoord sets (even though they are identical) due
to denormalization! hw apparently can't premultiply
same coord set by different texture size */
vtx_count = 6;
- txformat0 = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) |
- (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT));
+ txsize = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) |
+ (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT));
txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63;
txpitch -= 32;
- txfilter = R200_MAG_FILTER_LINEAR |
- R200_MIN_FILTER_LINEAR |
- R200_CLAMP_S_CLAMP_LAST |
- R200_CLAMP_T_CLAMP_LAST;
BEGIN_ACCEL(36);
@@ -627,14 +621,14 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter);
OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat | R200_TXFORMAT_ST_ROUTE_STQ1);
OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0);
- OUT_ACCEL_REG(R200_PP_TXSIZE_1, txformat0);
+ OUT_ACCEL_REG(R200_PP_TXSIZE_1, txsize);
OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch);
OUT_ACCEL_REG(R200_PP_TXOFFSET_1, pPriv->src_offset + pPriv->planeu_offset);
OUT_ACCEL_REG(R200_PP_TXFILTER_2, txfilter);
OUT_ACCEL_REG(R200_PP_TXFORMAT_2, txformat | R200_TXFORMAT_ST_ROUTE_STQ1);
OUT_ACCEL_REG(R200_PP_TXFORMAT_X_2, 0);
- OUT_ACCEL_REG(R200_PP_TXSIZE_2, txformat0);
+ OUT_ACCEL_REG(R200_PP_TXSIZE_2, txsize);
OUT_ACCEL_REG(R200_PP_TXPITCH_2, txpitch);
OUT_ACCEL_REG(R200_PP_TXOFFSET_2, pPriv->src_offset + pPriv->planev_offset);
@@ -757,58 +751,8 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
0.0));
FINISH_ACCEL();
- } else if (info->ChipFamily == CHIP_FAMILY_RV250) {
- /* fix up broken packed yuv - shader same as above except
- yuv components are all in same reg */
- /* note: in contrast to r300, use input biasing on uv components */
- const float Loff = -0.0627;
- float uvcosf, uvsinf;
- float yco, yoff;
- float uco[3], vco[3];
- float bright, cont, sat;
- int ref = pPriv->transform_index;
- float ucscale = 0.25, vcscale = 0.25;
- Bool needux8 = FALSE, needvx8 = FALSE;
-
- /* contrast can cause constant overflow, clamp */
- cont = RTFContrast(pPriv->contrast);
- if (cont * trans[ref].RefLuma > 2.0)
- cont = 2.0 / trans[ref].RefLuma;
- /* brightness is only from -0.5 to 0.5 should be safe */
- bright = RTFBrightness(pPriv->brightness);
- /* saturation can also cause overflow, clamp */
- sat = RTFSaturation(pPriv->saturation);
- if (sat * trans[ref].RefBCb > 4.0)
- sat = 4.0 / trans[ref].RefBCb;
- uvcosf = sat * cos(RTFHue(pPriv->hue));
- uvsinf = sat * sin(RTFHue(pPriv->hue));
-
- yco = trans[ref].RefLuma * cont;
- uco[0] = -trans[ref].RefRCr * uvsinf;
- uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
- uco[2] = trans[ref].RefBCb * uvcosf;
- vco[0] = trans[ref].RefRCr * uvcosf;
- vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
- vco[2] = trans[ref].RefBCb * uvsinf;
- yoff = Loff * yco + bright;
-
- if ((uco[0] > 2.0) || (uco[2] > 2.0)) {
- needux8 = TRUE;
- ucscale = 0.125;
- }
- if ((vco[0] > 2.0) || (vco[2] > 2.0)) {
- needvx8 = TRUE;
- vcscale = 0.125;
- }
-
- txformat0 = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) |
- (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT));
- txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63;
- txpitch -= 32;
- txfilter = R200_MAG_FILTER_LINEAR |
- R200_MIN_FILTER_LINEAR |
- R200_CLAMP_S_CLAMP_LAST |
- R200_CLAMP_T_CLAMP_LAST;
+ } else {
+ vtx_count = 4;
BEGIN_ACCEL(24);
@@ -911,45 +855,6 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
0.0));
FINISH_ACCEL();
- } else {
- BEGIN_ACCEL(13);
- OUT_ACCEL_REG(RADEON_PP_CNTL,
- RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE);
-
- OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
- OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
- (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
-
- OUT_ACCEL_REG(R200_PP_TXFILTER_0,
- R200_MAG_FILTER_LINEAR |
- R200_MIN_FILTER_LINEAR |
- R200_CLAMP_S_CLAMP_LAST |
- R200_CLAMP_T_CLAMP_LAST |
- R200_YUV_TO_RGB);
- OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
- OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
- OUT_ACCEL_REG(R200_PP_TXSIZE_0,
- (pPriv->w - 1) |
- ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
- OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
-
- OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset);
-
- OUT_ACCEL_REG(R200_PP_TXCBLEND_0,
- R200_TXC_ARG_A_ZERO |
- R200_TXC_ARG_B_ZERO |
- R200_TXC_ARG_C_R0_COLOR |
- R200_TXC_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
- R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
- OUT_ACCEL_REG(R200_PP_TXABLEND_0,
- R200_TXA_ARG_A_ZERO |
- R200_TXA_ARG_B_ZERO |
- R200_TXA_ARG_C_R0_ALPHA |
- R200_TXA_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
- R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
- FINISH_ACCEL();
}
if (pPriv->vsync) {
@@ -1021,7 +926,6 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
if (isplanar) {
/*
* Just render a rect (using three coords).
- * Filter is a bit a misnomer, it's just texcoords...
*/
VTX_OUT_6((float)dstX, (float)(dstY + dsth),
(float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0],
commit 17685fefba68d188c7c0fe7a079180ec0722c046
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Mon Apr 13 20:06:11 2009 -0400
Tex vid: general cleanup
- convert macros to more meaningful VTX_OUT_4 and VTX_OUT_6
names to reflect that they actually do
- fix indenting
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index 83aa101..76c8456 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -47,8 +47,8 @@
#ifdef ACCEL_CP
-#define VTX_OUT_FILTER(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \
-do { \
+#define VTX_OUT_6(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \
+do { \
OUT_RING_F(_dstX); \
OUT_RING_F(_dstY); \
OUT_RING_F(_srcX); \
@@ -57,7 +57,7 @@ do { \
OUT_RING_F(_maskY); \
} while (0)
-#define VTX_OUT(_dstX, _dstY, _srcX, _srcY) \
+#define VTX_OUT_4(_dstX, _dstY, _srcX, _srcY) \
do { \
OUT_RING_F(_dstX); \
OUT_RING_F(_dstY); \
@@ -67,7 +67,7 @@ do { \
#else /* ACCEL_CP */
-#define VTX_OUT_FILTER(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \
+#define VTX_OUT_6(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \
do { \
OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX); \
OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY); \
@@ -77,7 +77,7 @@ do { \
OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskY); \
} while (0)
-#define VTX_OUT(_dstX, _dstY, _srcX, _srcY) \
+#define VTX_OUT_4(_dstX, _dstY, _srcX, _srcY) \
do { \
OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX); \
OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY); \
@@ -404,25 +404,25 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
* Just render a rect (using three coords).
* Filter is a bit a misnomer, it's just texcoords...
*/
- VTX_OUT_FILTER((float)dstX, (float)(dstY + dsth),
- (float)srcX, (float)(srcY + srch),
- (float)srcX, (float)(srcY + (srch / 2)));
- VTX_OUT_FILTER((float)(dstX + dstw), (float)(dstY + dsth),
- (float)(srcX + srcw), (float)(srcY + srch),
- (float)(srcX + (srcw / 2)), (float)(srcY + (srch / 2)));
- VTX_OUT_FILTER((float)(dstX + dstw), (float)dstY,
- (float)(srcX + srcw), (float)srcY,
- (float)(srcX + (srcw / 2)), (float)srcY);
+ VTX_OUT_6((float)dstX, (float)(dstY + dsth),
+ (float)srcX, (float)(srcY + srch),
+ (float)srcX, (float)(srcY + (srch / 2)));
+ VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth),
+ (float)(srcX + srcw), (float)(srcY + srch),
+ (float)(srcX + (srcw / 2)), (float)(srcY + (srch / 2)));
+ VTX_OUT_6((float)(dstX + dstw), (float)dstY,
+ (float)(srcX + srcw), (float)srcY,
+ (float)(srcX + (srcw / 2)), (float)srcY);
} else {
/*
* Just render a rect (using three coords).
*/
- VTX_OUT((float)dstX, (float)(dstY + dsth),
- (float)srcX, (float)(srcY + srch));
- VTX_OUT((float)(dstX + dstw), (float)(dstY + dsth),
- (float)(srcX + srcw), (float)(srcY + srch));
- VTX_OUT((float)(dstX + dstw), (float)dstY,
- (float)(srcX + srcw), (float)srcY);
+ VTX_OUT_4((float)dstX, (float)(dstY + dsth),
+ (float)srcX, (float)(srcY + srch));
+ VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth),
+ (float)(srcX + srcw), (float)(srcY + srch));
+ VTX_OUT_4((float)(dstX + dstw), (float)dstY,
+ (float)(srcX + srcw), (float)srcY);
}
pBox++;
@@ -480,20 +480,20 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
RADEON_SWITCH_TO_3D();
} else
#endif
- {
- BEGIN_ACCEL(2);
- OUT_ACCEL_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH);
- /* We must wait for 3d to idle, in case source was just written as a dest. */
- OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
- RADEON_WAIT_HOST_IDLECLEAN |
- RADEON_WAIT_2D_IDLECLEAN |
- RADEON_WAIT_3D_IDLECLEAN |
- RADEON_WAIT_DMA_GUI_IDLE);
- FINISH_ACCEL();
-
- if (!info->accel_state->XInited3D)
- RADEONInit3DEngine(pScrn);
- }
+ {
+ BEGIN_ACCEL(2);
+ OUT_ACCEL_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH);
+ /* We must wait for 3d to idle, in case source was just written as a dest. */
+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
+ RADEON_WAIT_HOST_IDLECLEAN |
+ RADEON_WAIT_2D_IDLECLEAN |
+ RADEON_WAIT_3D_IDLECLEAN |
+ RADEON_WAIT_DMA_GUI_IDLE);
+ FINISH_ACCEL();
+
+ if (!info->accel_state->XInited3D)
+ RADEONInit3DEngine(pScrn);
+ }
vtx_count = 4;
@@ -534,10 +534,8 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
BEGIN_ACCEL(4);
- OUT_ACCEL_REG(RADEON_RB3D_CNTL,
- dst_format /*| RADEON_ALPHA_BLEND_ENABLE*/);
+ OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format);
OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset);
-
OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch);
OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL,
@@ -990,14 +988,14 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
*/
#ifdef ACCEL_CP
- BEGIN_RING(nBox * 3 * vtx_count + 2);
+ BEGIN_RING(nBox * 3 * vtx_count + 4);
OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
nBox * 3 * vtx_count));
OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
RADEON_CP_VC_CNTL_PRIM_WALK_RING |
((nBox * 3) << RADEON_CP_VC_CNTL_NUM_SHIFT));
#else /* ACCEL_CP */
- BEGIN_ACCEL(nBox * 3 * vtx_count + 1);
+ BEGIN_ACCEL(nBox * 3 * vtx_count + 2);
OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
RADEON_VF_PRIM_WALK_DATA |
((nBox * 3) << RADEON_VF_NUM_VERTICES_SHIFT)));
@@ -1025,40 +1023,38 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
* Just render a rect (using three coords).
* Filter is a bit a misnomer, it's just texcoords...
*/
- VTX_OUT_FILTER((float)dstX, (float)(dstY + dsth),
- (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0],
- (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
- VTX_OUT_FILTER((float)(dstX + dstw), (float)(dstY + dsth),
- (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0],
- (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
- VTX_OUT_FILTER((float)(dstX + dstw), (float)dstY,
- (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0],
- (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
+ VTX_OUT_6((float)dstX, (float)(dstY + dsth),
+ (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0],
+ (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
+ VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth),
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0],
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
+ VTX_OUT_6((float)(dstX + dstw), (float)dstY,
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0],
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
} else {
/*
* Just render a rect (using three coords).
*/
- VTX_OUT((float)dstX, (float)(dstY + dsth),
- (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
- VTX_OUT((float)(dstX + dstw), (float)(dstY + dsth),
- (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
- VTX_OUT((float)(dstX + dstw), (float)dstY,
- (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
+ VTX_OUT_4((float)dstX, (float)(dstY + dsth),
+ (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
+ VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth),
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
+ VTX_OUT_4((float)(dstX + dstw), (float)dstY,
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
}
pBox++;
}
+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
+
#ifdef ACCEL_CP
ADVANCE_RING();
#else
FINISH_ACCEL();
#endif /* !ACCEL_CP */
- BEGIN_ACCEL(1);
- OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
- FINISH_ACCEL();
-
DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
}
@@ -1104,20 +1100,20 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
RADEON_SWITCH_TO_3D();
} else
#endif
- {
- BEGIN_ACCEL(2);
- OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
- /* We must wait for 3d to idle, in case source was just written as a dest. */
- OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
- RADEON_WAIT_HOST_IDLECLEAN |
- RADEON_WAIT_2D_IDLECLEAN |
- RADEON_WAIT_3D_IDLECLEAN |
- RADEON_WAIT_DMA_GUI_IDLE);
- FINISH_ACCEL();
-
- if (!info->accel_state->XInited3D)
- RADEONInit3DEngine(pScrn);
- }
+ {
+ BEGIN_ACCEL(2);
+ OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
+ /* We must wait for 3d to idle, in case source was just written as a dest. */
+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
+ RADEON_WAIT_HOST_IDLECLEAN |
+ RADEON_WAIT_2D_IDLECLEAN |
+ RADEON_WAIT_3D_IDLECLEAN |
+ RADEON_WAIT_DMA_GUI_IDLE);
+ FINISH_ACCEL();
+
+ if (!info->accel_state->XInited3D)
+ RADEONInit3DEngine(pScrn);
+ }
if (pPriv->bicubic_enabled)
vtx_count = 6;
@@ -2191,54 +2187,57 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
* have to deal with the legacy handling.
*/
if (use_quad) {
- VTX_OUT_FILTER((float)dstX, (float)dstY,
- (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0],
- (float)srcX + 0.5, (float)srcY + 0.5);
- VTX_OUT_FILTER((float)dstX, (float)(dstY + dsth),
- (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0],
- (float)srcX + 0.5, (float)(srcY + srch) + 0.5);
- VTX_OUT_FILTER((float)(dstX + dstw), (float)(dstY + dsth),
- (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0],
- (float)(srcX + srcw) + 0.5, (float)(srcY + srch) + 0.5);
- VTX_OUT_FILTER((float)(dstX + dstw), (float)dstY,
- (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0],
- (float)(srcX + srcw) + 0.5, (float)srcY + 0.5);
+ VTX_OUT_6((float)dstX, (float)dstY,
+ (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0],
+ (float)srcX + 0.5, (float)srcY + 0.5);
+ VTX_OUT_6((float)dstX, (float)(dstY + dsth),
+ (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0],
+ (float)srcX + 0.5, (float)(srcY + srch) + 0.5);
+ VTX_OUT_6((float)(dstX + dstw), (float)(dstY + dsth),
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0],
+ (float)(srcX + srcw) + 0.5, (float)(srcY + srch) + 0.5);
+ VTX_OUT_6((float)(dstX + dstw), (float)dstY,
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0],
+ (float)(srcX + srcw) + 0.5, (float)srcY + 0.5);
} else {
- VTX_OUT_FILTER((float)dstX, (float)dstY,
- (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0],
- (float)srcX + 0.5, (float)srcY + 0.5);
- VTX_OUT_FILTER((float)dstX, (float)(dstY + dstw + dsth),
- (float)srcX / info->accel_state->texW[0], ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / info->accel_state->texH[0],
- (float)srcX + 0.5, (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5);
- VTX_OUT_FILTER((float)(dstX + dstw + dsth), (float)dstY,
- ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / info->accel_state->texW[0],
- (float)srcY / info->accel_state->texH[0],
- (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5,
- (float)srcY + 0.5);
+ VTX_OUT_6((float)dstX, (float)dstY,
+ (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0],
+ (float)srcX + 0.5, (float)srcY + 0.5);
+ VTX_OUT_6((float)dstX, (float)(dstY + dstw + dsth),
+ (float)srcX / info->accel_state->texW[0],
+ ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / info->accel_state->texH[0],
+ (float)srcX + 0.5,
+ (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5);
+ VTX_OUT_6((float)(dstX + dstw + dsth), (float)dstY,
+ ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / info->accel_state->texW[0],
+ (float)srcY / info->accel_state->texH[0],
+ (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5,
+ (float)srcY + 0.5);
}
} else {
if (use_quad) {
- VTX_OUT((float)dstX, (float)dstY,
- (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
- VTX_OUT((float)dstX, (float)(dstY + dsth),
- (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
- VTX_OUT((float)(dstX + dstw), (float)(dstY + dsth),
- (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
- VTX_OUT((float)(dstX + dstw), (float)dstY,
- (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
+ VTX_OUT_4((float)dstX, (float)dstY,
+ (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
+ VTX_OUT_4((float)dstX, (float)(dstY + dsth),
+ (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
+ VTX_OUT_4((float)(dstX + dstw), (float)(dstY + dsth),
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
+ VTX_OUT_4((float)(dstX + dstw), (float)dstY,
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
} else {
/*
* Render a big, scissored triangle. This means
* increasing the triangle size and adjusting
* texture coordinates.
*/
- VTX_OUT((float)dstX, (float)dstY,
- (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
- VTX_OUT((float)dstX, (float)(dstY + dsth + dstw),
- (float)srcX / info->accel_state->texW[0], ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / info->accel_state->texH[0]);
- VTX_OUT((float)(dstX + dstw + dsth), (float)dstY,
- ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / info->accel_state->texW[0],
- (float)srcY / info->accel_state->texH[0]);
+ VTX_OUT_4((float)dstX, (float)dstY,
+ (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
+ VTX_OUT_4((float)dstX, (float)(dstY + dsth + dstw),
+ (float)srcX / info->accel_state->texW[0],
+ ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / info->accel_state->texH[0]);
+ VTX_OUT_4((float)(dstX + dstw + dsth), (float)dstY,
+ ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / info->accel_state->texW[0],
+ (float)srcY / info->accel_state->texH[0]);
}
}
@@ -2305,20 +2304,20 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
RADEON_SWITCH_TO_3D();
} else
#endif
- {
- BEGIN_ACCEL(2);
- OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
- /* We must wait for 3d to idle, in case source was just written as a dest. */
- OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
- RADEON_WAIT_HOST_IDLECLEAN |
- RADEON_WAIT_2D_IDLECLEAN |
- RADEON_WAIT_3D_IDLECLEAN |
- RADEON_WAIT_DMA_GUI_IDLE);
- FINISH_ACCEL();
-
- if (!info->accel_state->XInited3D)
- RADEONInit3DEngine(pScrn);
- }
+ {
+ BEGIN_ACCEL(2);
+ OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
+ /* We must wait for 3d to idle, in case source was just written as a dest. */
+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
+ RADEON_WAIT_HOST_IDLECLEAN |
+ RADEON_WAIT_2D_IDLECLEAN |
+ RADEON_WAIT_3D_IDLECLEAN |
+ RADEON_WAIT_DMA_GUI_IDLE);
+ FINISH_ACCEL();
+
+ if (!info->accel_state->XInited3D)
+ RADEONInit3DEngine(pScrn);
+ }
if (pPriv->bicubic_enabled)
vtx_count = 6;
@@ -3207,30 +3206,30 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
(3 << RADEON_VF_NUM_VERTICES_SHIFT)));
#endif
if (pPriv->bicubic_enabled) {
- VTX_OUT_FILTER((float)dstX, (float)dstY,
- (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0],
- (float)srcX + 0.5, (float)srcY + 0.5);
- VTX_OUT_FILTER((float)dstX, (float)(dstY + dstw + dsth),
- (float)srcX / info->accel_state->texW[0], ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / info->accel_state->texH[0],
- (float)srcX + 0.5, (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5);
- VTX_OUT_FILTER((float)(dstX + dstw + dsth), (float)dstY,
- ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / info->accel_state->texW[0],
- (float)srcY / info->accel_state->texH[0],
- (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5,
- (float)srcY + 0.5);
+ VTX_OUT_6((float)dstX, (float)dstY,
+ (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0],
+ (float)srcX + 0.5, (float)srcY + 0.5);
+ VTX_OUT_6((float)dstX, (float)(dstY + dstw + dsth),
+ (float)srcX / info->accel_state->texW[0], ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / info->accel_state->texH[0],
+ (float)srcX + 0.5, (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5);
+ VTX_OUT_6((float)(dstX + dstw + dsth), (float)dstY,
+ ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / info->accel_state->texW[0],
+ (float)srcY / info->accel_state->texH[0],
+ (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5,
+ (float)srcY + 0.5);
} else {
/*
* Render a big, scissored triangle. This means
* increasing the triangle size and adjusting
* texture coordinates.
*/
- VTX_OUT((float)dstX, (float)dstY,
- (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
- VTX_OUT((float)dstX, (float)(dstY + dsth + dstw),
- (float)srcX / info->accel_state->texW[0], ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / info->accel_state->texH[0]);
- VTX_OUT((float)(dstX + dstw + dsth), (float)dstY,
- ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / info->accel_state->texW[0],
- (float)srcY / info->accel_state->texH[0]);
+ VTX_OUT_4((float)dstX, (float)dstY,
+ (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
+ VTX_OUT_4((float)dstX, (float)(dstY + dsth + dstw),
+ (float)srcX / info->accel_state->texW[0], ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / info->accel_state->texH[0]);
+ VTX_OUT_4((float)(dstX + dstw + dsth), (float)dstY,
+ ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / info->accel_state->texW[0],
+ (float)srcY / info->accel_state->texH[0]);
}
/* flushing is pipelined, free/finish is not */
@@ -3254,6 +3253,6 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
}
-#undef VTX_OUT
-#undef VTX_OUT_FILTER
+#undef VTX_OUT_4
+#undef VTX_OUT_6
#undef FUNC_NAME
commit 093ab4c9a33b0b396b78c061c3321dc044bdccdc
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Mon Apr 13 19:48:35 2009 -0400
R1xx: add support for native planar textured Xv
diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c
index f64da02..f657536 100644
--- a/src/radeon_textured_video.c
+++ b/src/radeon_textured_video.c
@@ -374,7 +374,7 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
}
pPriv->planar_hw = pPriv->planar_state;
- if (pPriv->bicubic_enabled || !( IS_R300_3D || IS_R200_3D ))
+ if (pPriv->bicubic_enabled || (IS_R600_3D || IS_R500_3D))
pPriv->planar_hw = 0;
switch(id) {
@@ -663,11 +663,12 @@ static XF86VideoFormatRec Formats[NUM_FORMATS] =
{15, TrueColor}, {16, TrueColor}, {24, TrueColor}
};
-#define NUM_ATTRIBUTES 1
+#define NUM_ATTRIBUTES 2
static XF86AttributeRec Attributes[NUM_ATTRIBUTES+1] =
{
{XvSettable | XvGettable, 0, 1, "XV_VSYNC"},
+ {XvSettable | XvGettable, 0, 1, "XV_HWPLANAR"},
{0, 0, 0, NULL}
};
@@ -710,6 +711,14 @@ static XF86AttributeRec Attributes_r500[NUM_ATTRIBUTES_R500+1] =
{0, 0, 0, NULL}
};
+#define NUM_ATTRIBUTES_R600 1
+
+static XF86AttributeRec Attributes_r600[NUM_ATTRIBUTES_R600+1] =
+{
+ {XvSettable | XvGettable, 0, 1, "XV_VSYNC"},
+ {0, 0, 0, NULL}
+};
+
static Atom xvBicubic;
static Atom xvVSync;
static Atom xvHWPlanar;
@@ -841,14 +850,18 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen)
pPortPriv =
(RADEONPortPrivPtr)(&adapt->pPortPrivates[num_texture_ports]);
- if (IS_R300_3D) {
- adapt->pAttributes = Attributes_r300;
- adapt->nAttributes = NUM_ATTRIBUTES_R300;
+ if (IS_R600_3D) {
+ adapt->pAttributes = Attributes_r600;
+ adapt->nAttributes = NUM_ATTRIBUTES_R600;
}
else if (IS_R500_3D) {
adapt->pAttributes = Attributes_r500;
adapt->nAttributes = NUM_ATTRIBUTES_R500;
}
+ else if (IS_R300_3D) {
+ adapt->pAttributes = Attributes_r300;
+ adapt->nAttributes = NUM_ATTRIBUTES_R300;
+ }
else if (IS_R200_3D) {
adapt->pAttributes = Attributes_r200;
adapt->nAttributes = NUM_ATTRIBUTES_R200;
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index 360532e..83aa101 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -92,7 +92,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
{
RADEONInfoPtr info = RADEONPTR(pScrn);
PixmapPtr pPixmap = pPriv->pPixmap;
- uint32_t txformat;
+ uint32_t txformat, txsize, txpitch;
uint32_t dst_offset, dst_pitch, dst_format;
uint32_t colorpitch;
Bool isplanar = FALSE;
@@ -128,22 +128,20 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
RADEON_SWITCH_TO_3D();
} else
#endif
- {
- BEGIN_ACCEL(2);
- OUT_ACCEL_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH);
- /* We must wait for 3d to idle, in case source was just written as a dest. */
- OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
- RADEON_WAIT_HOST_IDLECLEAN |
- RADEON_WAIT_2D_IDLECLEAN |
- RADEON_WAIT_3D_IDLECLEAN |
- RADEON_WAIT_DMA_GUI_IDLE);
- FINISH_ACCEL();
-
- if (!info->accel_state->XInited3D)
- RADEONInit3DEngine(pScrn);
- }
+ {
+ BEGIN_ACCEL(2);
+ OUT_ACCEL_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH);
+ /* We must wait for 3d to idle, in case source was just written as a dest. */
+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
+ RADEON_WAIT_HOST_IDLECLEAN |
+ RADEON_WAIT_2D_IDLECLEAN |
+ RADEON_WAIT_3D_IDLECLEAN |
+ RADEON_WAIT_DMA_GUI_IDLE);
+ FINISH_ACCEL();
- vtx_count = 4;
+ if (!info->accel_state->XInited3D)
+ RADEONInit3DEngine(pScrn);
+ }
/* Same for R100/R200 */
switch (pPixmap->drawable.bitsPerPixel) {
@@ -165,7 +163,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
}
if (isplanar) {
- txformat = RADEON_TXFORMAT_I8;
+ txformat = RADEON_TXFORMAT_Y8;
} else {
if (pPriv->id == FOURCC_UYVY)
txformat = RADEON_TXFORMAT_YVYU422;
@@ -182,56 +180,149 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
BEGIN_ACCEL(4);
- OUT_ACCEL_REG(RADEON_RB3D_CNTL,
- dst_format /*| RADEON_ALPHA_BLEND_ENABLE*/);
+ OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format);
OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset);
-
OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch);
-
OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL,
RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
FINISH_ACCEL();
+ if (isplanar) {
+ /* need 2 texcoord sets (even though they are identical) due
+ to denormalization! hw apparently can't premultiply
+ same coord set by different texture size */
+ vtx_count = 6;
- info->accel_state->texW[0] = 1;
- info->accel_state->texH[0] = 1;
-
- BEGIN_ACCEL(9);
-
- OUT_ACCEL_REG(RADEON_PP_CNTL,
- RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE);
-
- OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
- RADEON_SE_VTX_FMT_ST0));
-
- OUT_ACCEL_REG(RADEON_PP_TXFILTER_0,
- RADEON_MAG_FILTER_LINEAR |
- RADEON_MIN_FILTER_LINEAR |
- RADEON_CLAMP_S_CLAMP_LAST |
- RADEON_CLAMP_T_CLAMP_LAST |
- RADEON_YUV_TO_RGB);
- OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat);
- OUT_ACCEL_REG(RADEON_PP_TXOFFSET_0, pPriv->src_offset);
- OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0,
- RADEON_COLOR_ARG_A_ZERO |
- RADEON_COLOR_ARG_B_ZERO |
- RADEON_COLOR_ARG_C_T0_COLOR |
- RADEON_BLEND_CTL_ADD |
- RADEON_CLAMP_TX);
- OUT_ACCEL_REG(RADEON_PP_TXABLEND_0,
- RADEON_ALPHA_ARG_A_ZERO |
- RADEON_ALPHA_ARG_B_ZERO |
- RADEON_ALPHA_ARG_C_T0_ALPHA |
- RADEON_BLEND_CTL_ADD |
- RADEON_CLAMP_TX);
-
- OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0,
- (pPriv->w - 1) |
- ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
- OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0,
- pPriv->src_pitch - 32);
- FINISH_ACCEL();
+ txsize = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) |
+ (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT));
+ txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63;
+ txpitch -= 32;
+
+ BEGIN_ACCEL(23);
+
+ OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
+ RADEON_SE_VTX_FMT_ST0 |
+ RADEON_SE_VTX_FMT_ST1));
+
+ OUT_ACCEL_REG(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE |
+ RADEON_TEX_1_ENABLE | RADEON_TEX_BLEND_1_ENABLE |
+ RADEON_TEX_2_ENABLE | RADEON_TEX_BLEND_2_ENABLE |
+ RADEON_PLANAR_YUV_ENABLE));
+
+ /* Y */
+ OUT_ACCEL_REG(RADEON_PP_TXFILTER_0,
+ RADEON_MAG_FILTER_LINEAR |
+ RADEON_MIN_FILTER_LINEAR |
+ RADEON_CLAMP_S_CLAMP_LAST |
+ RADEON_CLAMP_T_CLAMP_LAST |
+ RADEON_YUV_TO_RGB);
+ OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0);
+ OUT_ACCEL_REG(RADEON_PP_TXOFFSET_0, pPriv->src_offset);
+ OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0,
+ RADEON_COLOR_ARG_A_ZERO |
+ RADEON_COLOR_ARG_B_ZERO |
+ RADEON_COLOR_ARG_C_T0_COLOR |
+ RADEON_BLEND_CTL_ADD |
+ RADEON_CLAMP_TX);
+ OUT_ACCEL_REG(RADEON_PP_TXABLEND_0,
+ RADEON_ALPHA_ARG_A_ZERO |
+ RADEON_ALPHA_ARG_B_ZERO |
+ RADEON_ALPHA_ARG_C_T0_ALPHA |
+ RADEON_BLEND_CTL_ADD |
+ RADEON_CLAMP_TX);
+
+ OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0,
+ (pPriv->w - 1) |
+ ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
+ OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0,
+ pPriv->src_pitch - 32);
+
+ /* U */
+ OUT_ACCEL_REG(RADEON_PP_TXFILTER_1,
+ RADEON_MAG_FILTER_LINEAR |
+ RADEON_MIN_FILTER_LINEAR |
+ RADEON_CLAMP_S_CLAMP_LAST |
+ RADEON_CLAMP_T_CLAMP_LAST);
+ OUT_ACCEL_REG(RADEON_PP_TXFORMAT_1, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1);
+ OUT_ACCEL_REG(RADEON_PP_TXOFFSET_1, pPriv->src_offset + pPriv->planeu_offset);
+ OUT_ACCEL_REG(RADEON_PP_TXCBLEND_1,
+ RADEON_COLOR_ARG_A_ZERO |
+ RADEON_COLOR_ARG_B_ZERO |
+ RADEON_COLOR_ARG_C_T0_COLOR |
+ RADEON_BLEND_CTL_ADD |
+ RADEON_CLAMP_TX);
+ OUT_ACCEL_REG(RADEON_PP_TXABLEND_1,
+ RADEON_ALPHA_ARG_A_ZERO |
+ RADEON_ALPHA_ARG_B_ZERO |
+ RADEON_ALPHA_ARG_C_T0_ALPHA |
+ RADEON_BLEND_CTL_ADD |
+ RADEON_CLAMP_TX);
+
+ OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_1, txsize);
+ OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_1, txpitch);
+
+ /* V */
+ OUT_ACCEL_REG(RADEON_PP_TXFILTER_2,
+ RADEON_MAG_FILTER_LINEAR |
+ RADEON_MIN_FILTER_LINEAR |
+ RADEON_CLAMP_S_CLAMP_LAST |
+ RADEON_CLAMP_T_CLAMP_LAST);
+ OUT_ACCEL_REG(RADEON_PP_TXFORMAT_2, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1);
+ OUT_ACCEL_REG(RADEON_PP_TXOFFSET_2, pPriv->src_offset + pPriv->planev_offset);
+ OUT_ACCEL_REG(RADEON_PP_TXCBLEND_2,
+ RADEON_COLOR_ARG_A_ZERO |
+ RADEON_COLOR_ARG_B_ZERO |
+ RADEON_COLOR_ARG_C_T0_COLOR |
+ RADEON_BLEND_CTL_ADD |
+ RADEON_CLAMP_TX);
+ OUT_ACCEL_REG(RADEON_PP_TXABLEND_2,
+ RADEON_ALPHA_ARG_A_ZERO |
+ RADEON_ALPHA_ARG_B_ZERO |
+ RADEON_ALPHA_ARG_C_T0_ALPHA |
+ RADEON_BLEND_CTL_ADD |
+ RADEON_CLAMP_TX);
+
+ OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_2, txsize);
+ OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_2, txpitch);
+ FINISH_ACCEL();
+ } else {
+ vtx_count = 4;
+ BEGIN_ACCEL(9);
+
+ OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
+ RADEON_SE_VTX_FMT_ST0));
+
+ OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE);
+
+ OUT_ACCEL_REG(RADEON_PP_TXFILTER_0,
+ RADEON_MAG_FILTER_LINEAR |
+ RADEON_MIN_FILTER_LINEAR |
+ RADEON_CLAMP_S_CLAMP_LAST |
+ RADEON_CLAMP_T_CLAMP_LAST |
+ RADEON_YUV_TO_RGB);
+ OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0);
+ OUT_ACCEL_REG(RADEON_PP_TXOFFSET_0, pPriv->src_offset);
+ OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0,
+ RADEON_COLOR_ARG_A_ZERO |
+ RADEON_COLOR_ARG_B_ZERO |
+ RADEON_COLOR_ARG_C_T0_COLOR |
+ RADEON_BLEND_CTL_ADD |
+ RADEON_CLAMP_TX);
+ OUT_ACCEL_REG(RADEON_PP_TXABLEND_0,
+ RADEON_ALPHA_ARG_A_ZERO |
+ RADEON_ALPHA_ARG_B_ZERO |
+ RADEON_ALPHA_ARG_C_T0_ALPHA |
+ RADEON_BLEND_CTL_ADD |
+ RADEON_CLAMP_TX);
+
+ OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0,
+ (pPriv->w - 1) |
+ ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
+ OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0,
+ pPriv->src_pitch - 32);
+ FINISH_ACCEL();
+ }
if (pPriv->vsync) {
xf86CrtcPtr crtc = radeon_xv_pick_best_crtc(pScrn,
@@ -269,18 +360,23 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
*/
#ifdef ACCEL_CP
- BEGIN_RING(nBox * 3 * vtx_count + 3);
+ BEGIN_RING(nBox * 3 * vtx_count + 5);
OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
nBox * 3 * vtx_count + 1));
- OUT_RING(RADEON_CP_VC_FRMT_XY |
- RADEON_CP_VC_FRMT_ST0);
+ if (isplanar)
+ OUT_RING(RADEON_CP_VC_FRMT_XY |
+ RADEON_CP_VC_FRMT_ST0 |
+ RADEON_CP_VC_FRMT_ST1);
+ else
+ OUT_RING(RADEON_CP_VC_FRMT_XY |
+ RADEON_CP_VC_FRMT_ST0);
OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
RADEON_CP_VC_CNTL_PRIM_WALK_RING |
RADEON_CP_VC_CNTL_MAOS_ENABLE |
RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
((nBox * 3) << RADEON_CP_VC_CNTL_NUM_SHIFT));
#else /* ACCEL_CP */
- BEGIN_ACCEL(nBox * vtx_count * 3 + 1);
+ BEGIN_ACCEL(nBox * vtx_count * 3 + 2);
OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
RADEON_VF_PRIM_WALK_DATA |
RADEON_VF_RADEON_MODE |
@@ -303,29 +399,42 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
srcw = (pPriv->src_w * dstw) / pPriv->dst_w;
srch = (pPriv->src_h * dsth) / pPriv->dst_h;
- /*
- * Just render a rect (using three coords).
- */
- VTX_OUT((float)dstX, (float)(dstY + dsth),
- (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
- VTX_OUT((float)(dstX + dstw), (float)(dstY + dsth),
- (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
- VTX_OUT((float)(dstX + dstw), (float)dstY,
- (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
+ if (isplanar) {
+ /*
+ * Just render a rect (using three coords).
+ * Filter is a bit a misnomer, it's just texcoords...
+ */
+ VTX_OUT_FILTER((float)dstX, (float)(dstY + dsth),
+ (float)srcX, (float)(srcY + srch),
+ (float)srcX, (float)(srcY + (srch / 2)));
+ VTX_OUT_FILTER((float)(dstX + dstw), (float)(dstY + dsth),
+ (float)(srcX + srcw), (float)(srcY + srch),
+ (float)(srcX + (srcw / 2)), (float)(srcY + (srch / 2)));
+ VTX_OUT_FILTER((float)(dstX + dstw), (float)dstY,
+ (float)(srcX + srcw), (float)srcY,
+ (float)(srcX + (srcw / 2)), (float)srcY);
+ } else {
+ /*
+ * Just render a rect (using three coords).
+ */
+ VTX_OUT((float)dstX, (float)(dstY + dsth),
+ (float)srcX, (float)(srcY + srch));
+ VTX_OUT((float)(dstX + dstw), (float)(dstY + dsth),
+ (float)(srcX + srcw), (float)(srcY + srch));
+ VTX_OUT((float)(dstX + dstw), (float)dstY,
+ (float)(srcX + srcw), (float)srcY);
+ }
pBox++;
}
+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
#ifdef ACCEL_CP
ADVANCE_RING();
#else
FINISH_ACCEL();
#endif /* !ACCEL_CP */
- BEGIN_ACCEL(1);
- OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
- FINISH_ACCEL();
-
DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
}
commit ec0cb51df81c6c9a1de640d227fa9c9c33161083
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Mon Apr 13 17:21:20 2009 -0400
R2xx tex vid: append verts for clip boxes
rather than sending a new draw packet for each rect
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index d5cf47c..360532e 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -880,6 +880,21 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
* render as a quad.
*/
+#ifdef ACCEL_CP
+ BEGIN_RING(nBox * 3 * vtx_count + 2);
+ OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
+ nBox * 3 * vtx_count));
+ OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
+ RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+ ((nBox * 3) << RADEON_CP_VC_CNTL_NUM_SHIFT));
+#else /* ACCEL_CP */
+ BEGIN_ACCEL(nBox * 3 * vtx_count + 1);
+ OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
+ RADEON_VF_PRIM_WALK_DATA |
+ ((nBox * 3) << RADEON_VF_NUM_VERTICES_SHIFT)));
+
+#endif
+
while (nBox--) {
int srcX, srcY, srcw, srch;
int dstX, dstY, dstw, dsth;
@@ -896,20 +911,6 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
srcw = (pPriv->src_w * dstw) / pPriv->dst_w;
srch = (pPriv->src_h * dsth) / pPriv->dst_h;
-#ifdef ACCEL_CP
- BEGIN_RING(3 * vtx_count + 2);
- OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
- 3 * vtx_count));
- OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
- RADEON_CP_VC_CNTL_PRIM_WALK_RING |
- (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
-#else /* ACCEL_CP */
- BEGIN_ACCEL(1 + vtx_count * 3);
- OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
- RADEON_VF_PRIM_WALK_DATA |
- (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
-
-#endif
if (isplanar) {
/*
* Just render a rect (using three coords).
@@ -936,15 +937,15 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
(float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
}
+ pBox++;
+ }
+
#ifdef ACCEL_CP
ADVANCE_RING();
#else
FINISH_ACCEL();
#endif /* !ACCEL_CP */
- pBox++;
- }
-
BEGIN_ACCEL(1);
OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
FINISH_ACCEL();
commit fde075a30a8ee2c333aa1bbe8fbd177258b085ba
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Mon Apr 13 17:13:51 2009 -0400
R1xx tex vid: append verts for clip boxes
rather than sending a new draw packet for each rect
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index 9361f07..d5cf47c 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -268,6 +268,25 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
* render as a quad.
*/
+#ifdef ACCEL_CP
+ BEGIN_RING(nBox * 3 * vtx_count + 3);
+ OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
+ nBox * 3 * vtx_count + 1));
+ OUT_RING(RADEON_CP_VC_FRMT_XY |
+ RADEON_CP_VC_FRMT_ST0);
+ OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
+ RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+ RADEON_CP_VC_CNTL_MAOS_ENABLE |
+ RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+ ((nBox * 3) << RADEON_CP_VC_CNTL_NUM_SHIFT));
+#else /* ACCEL_CP */
+ BEGIN_ACCEL(nBox * vtx_count * 3 + 1);
+ OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
+ RADEON_VF_PRIM_WALK_DATA |
+ RADEON_VF_RADEON_MODE |
+ ((nBox * 3) << RADEON_VF_NUM_VERTICES_SHIFT)));
+#endif
+
while (nBox--) {
int srcX, srcY, srcw, srch;
int dstX, dstY, dstw, dsth;
@@ -284,24 +303,6 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
srcw = (pPriv->src_w * dstw) / pPriv->dst_w;
srch = (pPriv->src_h * dsth) / pPriv->dst_h;
-#ifdef ACCEL_CP
- BEGIN_RING(3 * vtx_count + 3);
- OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
- 3 * vtx_count + 1));
- OUT_RING(RADEON_CP_VC_FRMT_XY |
- RADEON_CP_VC_FRMT_ST0);
- OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
- RADEON_CP_VC_CNTL_PRIM_WALK_RING |
- RADEON_CP_VC_CNTL_MAOS_ENABLE |
- RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
- (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
-#else /* ACCEL_CP */
- BEGIN_ACCEL(1 + vtx_count * 3);
- OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
- RADEON_VF_PRIM_WALK_DATA |
- RADEON_VF_RADEON_MODE |
- (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
-#endif
/*
* Just render a rect (using three coords).
*/
@@ -312,15 +313,15 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
VTX_OUT((float)(dstX + dstw), (float)dstY,
(float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
+ pBox++;
+ }
+
#ifdef ACCEL_CP
ADVANCE_RING();
#else
FINISH_ACCEL();
#endif /* !ACCEL_CP */
- pBox++;
- }
-
BEGIN_ACCEL(1);
OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
FINISH_ACCEL();
commit 12839fc17a2cca4ac14b9757bdaa63ba4679f96f
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Mon Apr 13 17:04:31 2009 -0400
Tex vid: split by family
diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c
index bf8a276..f64da02 100644
--- a/src/radeon_textured_video.c
+++ b/src/radeon_textured_video.c
@@ -598,13 +598,29 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
pPriv->h = height;
#ifdef XF86DRI
- if (IS_R600_3D)
- R600DisplayTexturedVideo(pScrn, pPriv);
- else if (info->directRenderingEnabled)
- RADEONDisplayTexturedVideoCP(pScrn, pPriv);
- else
+ if (info->directRenderingEnabled) {
+ if (IS_R600_3D)
+ R600DisplayTexturedVideo(pScrn, pPriv);
+ else if (IS_R500_3D)
+ R500DisplayTexturedVideoCP(pScrn, pPriv);
+ else if (IS_R300_3D)
+ R300DisplayTexturedVideoCP(pScrn, pPriv);
+ else if (IS_R200_3D)
+ R200DisplayTexturedVideoCP(pScrn, pPriv);
+ else
+ RADEONDisplayTexturedVideoCP(pScrn, pPriv);
+ } else
#endif
- RADEONDisplayTexturedVideoMMIO(pScrn, pPriv);
+ {
+ if (IS_R500_3D)
+ R500DisplayTexturedVideoMMIO(pScrn, pPriv);
+ else if (IS_R300_3D)
+ R300DisplayTexturedVideoMMIO(pScrn, pPriv);
+ else if (IS_R200_3D)
+ R200DisplayTexturedVideoMMIO(pScrn, pPriv);
+ else
+ RADEONDisplayTexturedVideoMMIO(pScrn, pPriv);
+ }
return Success;
}
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index 3c4289f..9361f07 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -93,10 +93,8 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
RADEONInfoPtr info = RADEONPTR(pScrn);
PixmapPtr pPixmap = pPriv->pPixmap;
uint32_t txformat;
- uint32_t txfilter, txformat0, txformat1, txoffset, txpitch;
uint32_t dst_offset, dst_pitch, dst_format;
- uint32_t txenable, colorpitch;
- uint32_t blendcntl;
+ uint32_t colorpitch;
Bool isplanar = FALSE;
int dstxoff, dstyoff, pixel_shift, vtx_count;
BoxPtr pBox = REGION_RECTS(&pPriv->clip);
@@ -132,10 +130,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
#endif
{
BEGIN_ACCEL(2);
- if (IS_R300_3D || IS_R500_3D)
- OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
- else
- OUT_ACCEL_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH);
+ OUT_ACCEL_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH);
/* We must wait for 3d to idle, in case source was just written as a dest. */
OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
RADEON_WAIT_HOST_IDLECLEAN |
@@ -148,2057 +143,1835 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
RADEONInit3DEngine(pScrn);
}
- if (pPriv->bicubic_enabled)
- vtx_count = 6;
- else
- vtx_count = 4;
+ vtx_count = 4;
- if (IS_R300_3D || IS_R500_3D) {
- uint32_t output_fmt;
-
- switch (pPixmap->drawable.bitsPerPixel) {
- case 16:
- if (pPixmap->drawable.depth == 15)
- dst_format = R300_COLORFORMAT_ARGB1555;
- else
- dst_format = R300_COLORFORMAT_RGB565;
- break;
- case 32:
- dst_format = R300_COLORFORMAT_ARGB8888;
- break;
- default:
- return;
+ /* Same for R100/R200 */
+ switch (pPixmap->drawable.bitsPerPixel) {
+ case 16:
+ if (pPixmap->drawable.depth == 15)
+ dst_format = RADEON_COLOR_FORMAT_ARGB1555;
+ else
+ dst_format = RADEON_COLOR_FORMAT_RGB565;
+ break;
+ case 32:
+ dst_format = RADEON_COLOR_FORMAT_ARGB8888;
+ break;
+ default:
+ return;
+ }
+
+ if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
+ isplanar = TRUE;
+ }
+
+ if (isplanar) {
+ txformat = RADEON_TXFORMAT_I8;
+ } else {
+ if (pPriv->id == FOURCC_UYVY)
+ txformat = RADEON_TXFORMAT_YVYU422;
+ else
+ txformat = RADEON_TXFORMAT_VYUY422;
+ }
+
+ txformat |= RADEON_TXFORMAT_NON_POWER2;
+
+ colorpitch = dst_pitch >> pixel_shift;
+
+ if (RADEONTilingEnabled(pScrn, pPixmap))
+ colorpitch |= RADEON_COLOR_TILE_ENABLE;
+
+ BEGIN_ACCEL(4);
+
+ OUT_ACCEL_REG(RADEON_RB3D_CNTL,
+ dst_format /*| RADEON_ALPHA_BLEND_ENABLE*/);
+ OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset);
+
+ OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch);
+
+ OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL,
+ RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
+
+ FINISH_ACCEL();
+
+
+ info->accel_state->texW[0] = 1;
+ info->accel_state->texH[0] = 1;
+
+ BEGIN_ACCEL(9);
+
+ OUT_ACCEL_REG(RADEON_PP_CNTL,
+ RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE);
+
+ OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
+ RADEON_SE_VTX_FMT_ST0));
+
+ OUT_ACCEL_REG(RADEON_PP_TXFILTER_0,
+ RADEON_MAG_FILTER_LINEAR |
+ RADEON_MIN_FILTER_LINEAR |
+ RADEON_CLAMP_S_CLAMP_LAST |
+ RADEON_CLAMP_T_CLAMP_LAST |
+ RADEON_YUV_TO_RGB);
+ OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat);
+ OUT_ACCEL_REG(RADEON_PP_TXOFFSET_0, pPriv->src_offset);
+ OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0,
+ RADEON_COLOR_ARG_A_ZERO |
+ RADEON_COLOR_ARG_B_ZERO |
+ RADEON_COLOR_ARG_C_T0_COLOR |
+ RADEON_BLEND_CTL_ADD |
+ RADEON_CLAMP_TX);
+ OUT_ACCEL_REG(RADEON_PP_TXABLEND_0,
+ RADEON_ALPHA_ARG_A_ZERO |
+ RADEON_ALPHA_ARG_B_ZERO |
+ RADEON_ALPHA_ARG_C_T0_ALPHA |
+ RADEON_BLEND_CTL_ADD |
+ RADEON_CLAMP_TX);
+
+ OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0,
+ (pPriv->w - 1) |
+ ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
+ OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0,
+ pPriv->src_pitch - 32);
+ FINISH_ACCEL();
+
+ if (pPriv->vsync) {
+ xf86CrtcPtr crtc = radeon_xv_pick_best_crtc(pScrn,
+ pPriv->drw_x,
+ pPriv->drw_x + pPriv->dst_w,
+ pPriv->drw_y,
+ pPriv->drw_y + pPriv->dst_h);
+ if (crtc) {
+ RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private;
+
+ FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap,
+ radeon_crtc->crtc_id,
+ pPriv->drw_y - crtc->y,
+ (pPriv->drw_y - crtc->y) + pPriv->dst_h);
}
+ }
+ /*
+ * Rendering of the actual polygon is done in two different
+ * ways depending on chip generation:
+ *
+ * < R300:
+ *
+ * These chips can render a rectangle in one pass, so
+ * handling is pretty straight-forward.
+ *
+ * >= R300:
+ *
+ * These chips can accept a quad, but will render it as
+ * two triangles which results in a diagonal tear. Instead
+ * We render a single, large triangle and use the scissor
+ * functionality to restrict it to the desired rectangle.
+ * Due to guardband limits on r3xx/r4xx, we can only use
+ * the single triangle up to 2880 pixels; above that we
+ * render as a quad.
+ */
+
+ while (nBox--) {
+ int srcX, srcY, srcw, srch;
+ int dstX, dstY, dstw, dsth;
+ dstX = pBox->x1 + dstxoff;
+ dstY = pBox->y1 + dstyoff;
+ dstw = pBox->x2 - pBox->x1;
+ dsth = pBox->y2 - pBox->y1;
+
+ srcX = ((pBox->x1 - pPriv->drw_x) *
+ pPriv->src_w) / pPriv->dst_w;
+ srcY = ((pBox->y1 - pPriv->drw_y) *
+ pPriv->src_h) / pPriv->dst_h;
+
+ srcw = (pPriv->src_w * dstw) / pPriv->dst_w;
+ srch = (pPriv->src_h * dsth) / pPriv->dst_h;
+
+#ifdef ACCEL_CP
+ BEGIN_RING(3 * vtx_count + 3);
+ OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
+ 3 * vtx_count + 1));
+ OUT_RING(RADEON_CP_VC_FRMT_XY |
+ RADEON_CP_VC_FRMT_ST0);
+ OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
+ RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+ RADEON_CP_VC_CNTL_MAOS_ENABLE |
+ RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+ (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
+#else /* ACCEL_CP */
+ BEGIN_ACCEL(1 + vtx_count * 3);
+ OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
+ RADEON_VF_PRIM_WALK_DATA |
+ RADEON_VF_RADEON_MODE |
+ (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
+#endif
+ /*
+ * Just render a rect (using three coords).
+ */
+ VTX_OUT((float)dstX, (float)(dstY + dsth),
+ (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
+ VTX_OUT((float)(dstX + dstw), (float)(dstY + dsth),
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
+ VTX_OUT((float)(dstX + dstw), (float)dstY,
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
+
+#ifdef ACCEL_CP
+ ADVANCE_RING();
+#else
+ FINISH_ACCEL();
+#endif /* !ACCEL_CP */
+
+ pBox++;
+ }
- output_fmt = (R300_OUT_FMT_C4_8 |
- R300_OUT_FMT_C0_SEL_BLUE |
- R300_OUT_FMT_C1_SEL_GREEN |
- R300_OUT_FMT_C2_SEL_RED |
- R300_OUT_FMT_C3_SEL_ALPHA);
+ BEGIN_ACCEL(1);
+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
+ FINISH_ACCEL();
- colorpitch = dst_pitch >> pixel_shift;
- colorpitch |= dst_format;
+ DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
+}
- if (RADEONTilingEnabled(pScrn, pPixmap))
- colorpitch |= R300_COLORTILE;
+static void
+FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ PixmapPtr pPixmap = pPriv->pPixmap;
+ uint32_t txformat;
+ uint32_t txfilter, txformat0, txpitch;
+ uint32_t dst_offset, dst_pitch, dst_format;
+ uint32_t colorpitch;
+ Bool isplanar = FALSE;
+ int dstxoff, dstyoff, pixel_shift, vtx_count;
+ BoxPtr pBox = REGION_RECTS(&pPriv->clip);
+ int nBox = REGION_NUM_RECTS(&pPriv->clip);
+ ACCEL_PREAMBLE();
- if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
- isplanar = TRUE;
+ pixel_shift = pPixmap->drawable.bitsPerPixel >> 4;
+
+#ifdef USE_EXA
+ if (info->useEXA) {
+ dst_offset = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset;
+ dst_pitch = exaGetPixmapPitch(pPixmap);
+ } else
+#endif
+ {
+ dst_offset = (pPixmap->devPrivate.ptr - info->FB) +
+ info->fbLocation + pScrn->fbOffset;
+ dst_pitch = pPixmap->devKind;
}
- if (isplanar) {
- txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0;
- txpitch = pPriv->src_pitch;
- } else {
- if (pPriv->id == FOURCC_UYVY)
- txformat1 = R300_TX_FORMAT_YVYU422;
- else
- txformat1 = R300_TX_FORMAT_VYUY422;
+#ifdef COMPOSITE
+ dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
+ dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
+#else
+ dstxoff = 0;
+ dstyoff = 0;
+#endif
- txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
+#ifdef USE_EXA
+ if (info->useEXA) {
+ RADEON_SWITCH_TO_3D();
+ } else
+#endif
+ {
+ BEGIN_ACCEL(2);
+ OUT_ACCEL_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH);
+ /* We must wait for 3d to idle, in case source was just written as a dest. */
+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
+ RADEON_WAIT_HOST_IDLECLEAN |
+ RADEON_WAIT_2D_IDLECLEAN |
+ RADEON_WAIT_3D_IDLECLEAN |
+ RADEON_WAIT_DMA_GUI_IDLE);
+ FINISH_ACCEL();
- /* pitch is in pixels */
- txpitch = pPriv->src_pitch / 2;
+ if (!info->accel_state->XInited3D)
+ RADEONInit3DEngine(pScrn);
}
- txpitch -= 1;
- txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
- (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
- R300_TXPITCH_EN);
+ vtx_count = 4;
- info->accel_state->texW[0] = pPriv->w;
- info->accel_state->texH[0] = pPriv->h;
+ /* Same for R100/R200 */
+ switch (pPixmap->drawable.bitsPerPixel) {
+ case 16:
+ if (pPixmap->drawable.depth == 15)
+ dst_format = RADEON_COLOR_FORMAT_ARGB1555;
+ else
+ dst_format = RADEON_COLOR_FORMAT_RGB565;
+ break;
+ case 32:
+ dst_format = RADEON_COLOR_FORMAT_ARGB8888;
+ break;
+ default:
+ return;
+ }
- txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
- R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
- R300_TX_MAG_FILTER_LINEAR |
- R300_TX_MIN_FILTER_LINEAR |
- (0 << R300_TX_ID_SHIFT));
+ if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
+ isplanar = TRUE;
+ }
+ if (isplanar) {
+ txformat = RADEON_TXFORMAT_I8;
+ } else {
+ if (pPriv->id == FOURCC_UYVY)
+ txformat = RADEON_TXFORMAT_YVYU422;
+ else
+ txformat = RADEON_TXFORMAT_VYUY422;
+ }
- if (IS_R500_3D && ((pPriv->w - 1) & 0x800))
- txpitch |= R500_TXWIDTH_11;
+ txformat |= RADEON_TXFORMAT_NON_POWER2;
- if (IS_R500_3D && ((pPriv->h - 1) & 0x800))
- txpitch |= R500_TXHEIGHT_11;
+ colorpitch = dst_pitch >> pixel_shift;
- txoffset = pPriv->src_offset;
+ if (RADEONTilingEnabled(pScrn, pPixmap))
+ colorpitch |= RADEON_COLOR_TILE_ENABLE;
- BEGIN_ACCEL(6);
- OUT_ACCEL_REG(R300_TX_FILTER0_0, txfilter);
- OUT_ACCEL_REG(R300_TX_FILTER1_0, 0);
- OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0);
- OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1);
- OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch);
- OUT_ACCEL_REG(R300_TX_OFFSET_0, txoffset);
- FINISH_ACCEL();
+ BEGIN_ACCEL(4);
- txenable = R300_TEX_0_ENABLE;
+ OUT_ACCEL_REG(RADEON_RB3D_CNTL,
+ dst_format /*| RADEON_ALPHA_BLEND_ENABLE*/);
+ OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset);
- if (isplanar) {
- txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
- (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
- R300_TXPITCH_EN);
- txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63;
- txpitch -= 1;
- txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
- R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
- R300_TX_MIN_FILTER_LINEAR |
- R300_TX_MAG_FILTER_LINEAR);
-
- BEGIN_ACCEL(12);
- OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT));
- OUT_ACCEL_REG(R300_TX_FILTER1_1, 0);
- OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0);
- OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2);
- OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch);
- OUT_ACCEL_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset);
- OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT));
- OUT_ACCEL_REG(R300_TX_FILTER1_2, 0);
- OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0);
- OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3);
- OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch);
- OUT_ACCEL_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset);
- FINISH_ACCEL();
- txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE;
+ OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch);
+
+ OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL,
+ RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
+
+ FINISH_ACCEL();
+
+ info->accel_state->texW[0] = pPriv->w;
+ info->accel_state->texH[0] = pPriv->h;
+
+ if (isplanar) {
+ /* note: in contrast to r300, use input biasing on uv components */
+ const float Loff = -0.0627;
+ float uvcosf, uvsinf;
+ float yco, yoff;
+ float uco[3], vco[3];
+ float bright, cont, sat;
+ int ref = pPriv->transform_index;
+ float ucscale = 0.25, vcscale = 0.25;
+ Bool needux8 = FALSE, needvx8 = FALSE;
+
+ /* contrast can cause constant overflow, clamp */
+ cont = RTFContrast(pPriv->contrast);
+ if (cont * trans[ref].RefLuma > 2.0)
+ cont = 2.0 / trans[ref].RefLuma;
+ /* brightness is only from -0.5 to 0.5 should be safe */
+ bright = RTFBrightness(pPriv->brightness);
+ /* saturation can also cause overflow, clamp */
+ sat = RTFSaturation(pPriv->saturation);
+ if (sat * trans[ref].RefBCb > 4.0)
+ sat = 4.0 / trans[ref].RefBCb;
+ uvcosf = sat * cos(RTFHue(pPriv->hue));
+ uvsinf = sat * sin(RTFHue(pPriv->hue));
+
+ yco = trans[ref].RefLuma * cont;
+ uco[0] = -trans[ref].RefRCr * uvsinf;
+ uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
+ uco[2] = trans[ref].RefBCb * uvcosf;
+ vco[0] = trans[ref].RefRCr * uvcosf;
+ vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
+ vco[2] = trans[ref].RefBCb * uvsinf;
+ yoff = Loff * yco + bright;
+
+ if ((uco[0] > 2.0) || (uco[2] > 2.0)) {
+ needux8 = TRUE;
+ ucscale = 0.125;
+ }
+ if ((vco[0] > 2.0) || (vco[2] > 2.0)) {
+ needvx8 = TRUE;
+ vcscale = 0.125;
}
- if (pPriv->bicubic_enabled) {
- /* Size is 128x1 */
- txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) |
- (0x0 << R300_TXHEIGHT_SHIFT) |
- R300_TXPITCH_EN);
- /* Format is 32-bit floats, 4bpp */
- txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16);
- /* Pitch is 127 (128-1) */
- txpitch = 0x7f;
- /* Tex filter */
- txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) |
- R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) |
- R300_TX_MIN_FILTER_NEAREST |
- R300_TX_MAG_FILTER_NEAREST |
- (1 << R300_TX_ID_SHIFT));
-
- BEGIN_ACCEL(6);
- OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter);
- OUT_ACCEL_REG(R300_TX_FILTER1_1, 0);
- OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0);
- OUT_ACCEL_REG(R300_TX_FORMAT1_1, txformat1);
- OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch);
- OUT_ACCEL_REG(R300_TX_OFFSET_1, pPriv->bicubic_src_offset);
- FINISH_ACCEL();
-
- /* Enable tex 1 */
- txenable |= R300_TEX_1_ENABLE;
+ /* need 2 texcoord sets (even though they are identical) due
+ to denormalization! hw apparently can't premultiply
+ same coord set by different texture size */
+ vtx_count = 6;
+
+ txformat0 = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) |
+ (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT));
+ txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63;
+ txpitch -= 32;
+ txfilter = R200_MAG_FILTER_LINEAR |
+ R200_MIN_FILTER_LINEAR |
+ R200_CLAMP_S_CLAMP_LAST |
+ R200_CLAMP_T_CLAMP_LAST;
+
+ BEGIN_ACCEL(36);
+
+ OUT_ACCEL_REG(RADEON_PP_CNTL,
+ RADEON_TEX_0_ENABLE | RADEON_TEX_1_ENABLE | RADEON_TEX_2_ENABLE |
+ RADEON_TEX_BLEND_0_ENABLE |
+ RADEON_TEX_BLEND_1_ENABLE |
+ RADEON_TEX_BLEND_2_ENABLE);
+
+ OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
+ OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
+ (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) |
+ (2 << R200_VTX_TEX1_COMP_CNT_SHIFT));
+
+ OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter);
+ OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
+ OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
+ OUT_ACCEL_REG(R200_PP_TXSIZE_0,
+ (pPriv->w - 1) |
+ ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
+ OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
+ OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset);
+
+ OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter);
+ OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat | R200_TXFORMAT_ST_ROUTE_STQ1);
+ OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0);
+ OUT_ACCEL_REG(R200_PP_TXSIZE_1, txformat0);
+ OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch);
+ OUT_ACCEL_REG(R200_PP_TXOFFSET_1, pPriv->src_offset + pPriv->planeu_offset);
+
+ OUT_ACCEL_REG(R200_PP_TXFILTER_2, txfilter);
+ OUT_ACCEL_REG(R200_PP_TXFORMAT_2, txformat | R200_TXFORMAT_ST_ROUTE_STQ1);
+ OUT_ACCEL_REG(R200_PP_TXFORMAT_X_2, 0);
+ OUT_ACCEL_REG(R200_PP_TXSIZE_2, txformat0);
+ OUT_ACCEL_REG(R200_PP_TXPITCH_2, txpitch);
+ OUT_ACCEL_REG(R200_PP_TXOFFSET_2, pPriv->src_offset + pPriv->planev_offset);
+
+ /* similar to r300 code. Note the big problem is that hardware constants
+ * are 8 bits only, representing 0.0-1.0. We can get that up (using bias
+ * + scale) to -1.0-1.0 (but precision will suffer). AFAIK the hw actually
+ * has 12 bits fractional precision (plus 1 sign bit, 3 range bits) but
+ * the constants not. To get larger range can use output scale, but for
+ * that 2.018 value we need a total scale by 8, which means the constants
+ * really have no accuracy whatsoever (5 fractional bits only).
+ * The only direct way to get high precision "constants" into the fragment
+ * pipe I know of is to use the texcoord interpolator (not color, this one
+ * is 8 bit only too), which seems a bit expensive. We're lucky though it
+ * seems the values we need seem to fit better than worst case (get about
+ * 6 fractional bits for this instead of 5, at least when not correcting for
+ * hue/saturation/contrast/brightness, which is the same as for vco - yco and
+ * yoff get 8 fractional bits). Try to preserve as much accuracy as possible
+ * even with non-default saturation/hue/contrast/brightness adjustments,
+ * it gets a little crazy and ultimately precision might still be lacking.
+ *
+ * A higher precision (8 fractional bits) version might just put uco into
+ * a texcoord, and calculate a new vcoconst in the shader, like so:
+ * cohelper = {1.0, 0.0, 0.0} - shouldn't use 0.5 since not exactly representable
+ * vco = {1.5958 - 1.0, -0.8129 + 1.0, 1.0}
+ * vcocalc = ADD temp, bias/scale(cohelper), vco
+ * would in total use 4 tex units, 4 instructions which seems fairly
+ * balanced for this architecture (instead of 3 + 3 for the solution here)
+ *
+ * temp = MAD(yco, yuv.yyyy, yoff)
+ * temp = MAD(uco, yuv.uuuu, temp)
+ * result = MAD(vco, yuv.vvvv, temp)
+ *
+ * note first mad produces actually scalar, hence we transform
+ * it into a dp2a to get 8 bit precision of yco instead of 7 -
+ * That's assuming hw correctly expands consts to internal precision.
+ * (y * 1 + y * (yco - 1) + yoff)
+ * temp = DP2A / 2 (yco, yuv.yyyy, yoff)
+ * temp = MAD (uco / 4, yuv.uuuu * 2, temp)
+ * result = MAD x2 (vco / 2, yuv.vvvv, temp)
+ *
+ * vco, uco need bias (and hence scale too)
+ *
+ */
+
+ /* MAD temp0 / 2, const0.a * 2, temp0, -const0.rgb */
+ OUT_ACCEL_REG(R200_PP_TXCBLEND_0,
+ R200_TXC_ARG_A_TFACTOR_COLOR |
+ R200_TXC_ARG_B_R0_COLOR |
+ R200_TXC_ARG_C_TFACTOR_COLOR |
+ (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) |
+ R200_TXC_OP_DOT2_ADD);
+ OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
+ (0 << R200_TXC_TFACTOR_SEL_SHIFT) |
+ R200_TXC_SCALE_INV2 |
+ R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0);
+ OUT_ACCEL_REG(R200_PP_TXABLEND_0,
+ R200_TXA_ARG_A_ZERO |
+ R200_TXA_ARG_B_ZERO |
+ R200_TXA_ARG_C_ZERO |
+ R200_TXA_OP_MADD);
+ OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
+ R200_TXA_OUTPUT_REG_NONE);
+
+ /* MAD temp0, (const1 - 0.5) * 2, (temp1 - 0.5) * 2, temp0 */
+ OUT_ACCEL_REG(R200_PP_TXCBLEND_1,
+ R200_TXC_ARG_A_TFACTOR_COLOR |
+ R200_TXC_BIAS_ARG_A |
+ R200_TXC_SCALE_ARG_A |
+ R200_TXC_ARG_B_R1_COLOR |
+ R200_TXC_BIAS_ARG_B |
+ (needux8 ? R200_TXC_SCALE_ARG_B : 0) |
+ R200_TXC_ARG_C_R0_COLOR |
+ R200_TXC_OP_MADD);
+ OUT_ACCEL_REG(R200_PP_TXCBLEND2_1,
+ (1 << R200_TXC_TFACTOR_SEL_SHIFT) |
+ R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0);
+ OUT_ACCEL_REG(R200_PP_TXABLEND_1,
+ R200_TXA_ARG_A_ZERO |
+ R200_TXA_ARG_B_ZERO |
+ R200_TXA_ARG_C_ZERO |
+ R200_TXA_OP_MADD);
+ OUT_ACCEL_REG(R200_PP_TXABLEND2_1,
+ R200_TXA_OUTPUT_REG_NONE);
+
+ /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp2 - 0.5), temp0 */
+ OUT_ACCEL_REG(R200_PP_TXCBLEND_2,
+ R200_TXC_ARG_A_TFACTOR_COLOR |
+ R200_TXC_BIAS_ARG_A |
+ R200_TXC_SCALE_ARG_A |
+ R200_TXC_ARG_B_R2_COLOR |
+ R200_TXC_BIAS_ARG_B |
+ (needvx8 ? R200_TXC_SCALE_ARG_B : 0) |
+ R200_TXC_ARG_C_R0_COLOR |
+ R200_TXC_OP_MADD);
+ OUT_ACCEL_REG(R200_PP_TXCBLEND2_2,
+ (2 << R200_TXC_TFACTOR_SEL_SHIFT) |
+ R200_TXC_SCALE_2X |
+ R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
+ OUT_ACCEL_REG(R200_PP_TXABLEND_2,
+ R200_TXA_ARG_A_ZERO |
+ R200_TXA_ARG_B_ZERO |
+ R200_TXA_ARG_C_ZERO |
+ R200_TXA_COMP_ARG_C |
+ R200_TXA_OP_MADD);
+ OUT_ACCEL_REG(R200_PP_TXABLEND2_2,
+ R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
+
+ /* shader constants */
+ OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */
+ yco > 1.0 ? yco - 1.0: yco,
+ yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */
+ 0.0));
+ OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */
+ uco[1] * ucscale + 0.5, /* or [-2, 2] */
+ uco[2] * ucscale + 0.5,
+ 0.0));
+ OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */
+ vco[1] * vcscale + 0.5, /* or [-4, 4] */
+ vco[2] * vcscale + 0.5,
+ 0.0));
+
+ FINISH_ACCEL();
+ } else if (info->ChipFamily == CHIP_FAMILY_RV250) {
+ /* fix up broken packed yuv - shader same as above except
+ yuv components are all in same reg */
+ /* note: in contrast to r300, use input biasing on uv components */
+ const float Loff = -0.0627;
+ float uvcosf, uvsinf;
+ float yco, yoff;
+ float uco[3], vco[3];
+ float bright, cont, sat;
+ int ref = pPriv->transform_index;
+ float ucscale = 0.25, vcscale = 0.25;
+ Bool needux8 = FALSE, needvx8 = FALSE;
+
+ /* contrast can cause constant overflow, clamp */
+ cont = RTFContrast(pPriv->contrast);
+ if (cont * trans[ref].RefLuma > 2.0)
+ cont = 2.0 / trans[ref].RefLuma;
+ /* brightness is only from -0.5 to 0.5 should be safe */
+ bright = RTFBrightness(pPriv->brightness);
+ /* saturation can also cause overflow, clamp */
+ sat = RTFSaturation(pPriv->saturation);
+ if (sat * trans[ref].RefBCb > 4.0)
+ sat = 4.0 / trans[ref].RefBCb;
+ uvcosf = sat * cos(RTFHue(pPriv->hue));
+ uvsinf = sat * sin(RTFHue(pPriv->hue));
+
+ yco = trans[ref].RefLuma * cont;
+ uco[0] = -trans[ref].RefRCr * uvsinf;
+ uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
+ uco[2] = trans[ref].RefBCb * uvcosf;
+ vco[0] = trans[ref].RefRCr * uvcosf;
+ vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
+ vco[2] = trans[ref].RefBCb * uvsinf;
+ yoff = Loff * yco + bright;
+
+ if ((uco[0] > 2.0) || (uco[2] > 2.0)) {
+ needux8 = TRUE;
+ ucscale = 0.125;
+ }
+ if ((vco[0] > 2.0) || (vco[2] > 2.0)) {
+ needvx8 = TRUE;
+ vcscale = 0.125;
}
- /* setup the VAP */
- if (info->accel_state->has_tcl) {
- if (pPriv->bicubic_enabled)
- BEGIN_ACCEL(7);
- else
- BEGIN_ACCEL(6);
- } else {
- if (pPriv->bicubic_enabled)
- BEGIN_ACCEL(5);
- else
- BEGIN_ACCEL(4);
+ txformat0 = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) |
+ (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT));
+ txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63;
+ txpitch -= 32;
+ txfilter = R200_MAG_FILTER_LINEAR |
+ R200_MIN_FILTER_LINEAR |
+ R200_CLAMP_S_CLAMP_LAST |
+ R200_CLAMP_T_CLAMP_LAST;
+
+ BEGIN_ACCEL(24);
+
+ OUT_ACCEL_REG(RADEON_PP_CNTL,
+ RADEON_TEX_0_ENABLE |
+ RADEON_TEX_BLEND_0_ENABLE | RADEON_TEX_BLEND_1_ENABLE |
+ RADEON_TEX_BLEND_2_ENABLE);
+
+ OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
+ OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
+ (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
+
+ OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter);
+ OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
+ OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
+ OUT_ACCEL_REG(R200_PP_TXSIZE_0,
+ (pPriv->w - 1) |
+ ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
+ OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
+ OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset);
+
+ /* MAD temp1 / 2, const0.a * 2, temp0.ggg, -const0.rgb */
+ OUT_ACCEL_REG(R200_PP_TXCBLEND_0,
+ R200_TXC_ARG_A_TFACTOR_COLOR |
+ R200_TXC_ARG_B_R0_COLOR |
+ R200_TXC_ARG_C_TFACTOR_COLOR |
+ (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) |
+ R200_TXC_OP_DOT2_ADD);
+ OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
+ (0 << R200_TXC_TFACTOR_SEL_SHIFT) |
+ R200_TXC_SCALE_INV2 |
+ (R200_TXC_REPL_GREEN << R200_TXC_REPL_ARG_B_SHIFT) |
+ R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1);
+ OUT_ACCEL_REG(R200_PP_TXABLEND_0,
+ R200_TXA_ARG_A_ZERO |
+ R200_TXA_ARG_B_ZERO |
+ R200_TXA_ARG_C_ZERO |
+ R200_TXA_OP_MADD);
+ OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
+ R200_TXA_OUTPUT_REG_NONE);
+
+ /* MAD temp1, (const1 - 0.5) * 2, (temp0.rrr - 0.5) * 2, temp1 */
+ OUT_ACCEL_REG(R200_PP_TXCBLEND_1,
+ R200_TXC_ARG_A_TFACTOR_COLOR |
+ R200_TXC_BIAS_ARG_A |
+ R200_TXC_SCALE_ARG_A |
+ R200_TXC_ARG_B_R0_COLOR |
+ R200_TXC_BIAS_ARG_B |
+ (needux8 ? R200_TXC_SCALE_ARG_B : 0) |
+ R200_TXC_ARG_C_R1_COLOR |
+ R200_TXC_OP_MADD);
+ OUT_ACCEL_REG(R200_PP_TXCBLEND2_1,
+ (1 << R200_TXC_TFACTOR_SEL_SHIFT) |
+ (R200_TXC_REPL_BLUE << R200_TXC_REPL_ARG_B_SHIFT) |
+ R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1);
+ OUT_ACCEL_REG(R200_PP_TXABLEND_1,
+ R200_TXA_ARG_A_ZERO |
+ R200_TXA_ARG_B_ZERO |
+ R200_TXA_ARG_C_ZERO |
+ R200_TXA_OP_MADD);
+ OUT_ACCEL_REG(R200_PP_TXABLEND2_1,
+ R200_TXA_OUTPUT_REG_NONE);
+
+ /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp0.bbb - 0.5), temp1 */
+ OUT_ACCEL_REG(R200_PP_TXCBLEND_2,
+ R200_TXC_ARG_A_TFACTOR_COLOR |
+ R200_TXC_BIAS_ARG_A |
+ R200_TXC_SCALE_ARG_A |
+ R200_TXC_ARG_B_R0_COLOR |
+ R200_TXC_BIAS_ARG_B |
+ (needvx8 ? R200_TXC_SCALE_ARG_B : 0) |
+ R200_TXC_ARG_C_R1_COLOR |
+ R200_TXC_OP_MADD);
+ OUT_ACCEL_REG(R200_PP_TXCBLEND2_2,
+ (2 << R200_TXC_TFACTOR_SEL_SHIFT) |
+ R200_TXC_SCALE_2X |
+ (R200_TXC_REPL_RED << R200_TXC_REPL_ARG_B_SHIFT) |
+ R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
+ OUT_ACCEL_REG(R200_PP_TXABLEND_2,
+ R200_TXA_ARG_A_ZERO |
+ R200_TXA_ARG_B_ZERO |
+ R200_TXA_ARG_C_ZERO |
+ R200_TXA_COMP_ARG_C |
+ R200_TXA_OP_MADD);
+ OUT_ACCEL_REG(R200_PP_TXABLEND2_2,
+ R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
+
+ /* shader constants */
+ OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */
+ yco > 1.0 ? yco - 1.0: yco,
+ yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */
+ 0.0));
+ OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */
+ uco[1] * ucscale + 0.5, /* or [-2, 2] */
+ uco[2] * ucscale + 0.5,
+ 0.0));
+ OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */
+ vco[1] * vcscale + 0.5, /* or [-4, 4] */
+ vco[2] * vcscale + 0.5,
+ 0.0));
+
+ FINISH_ACCEL();
+ } else {
+ BEGIN_ACCEL(13);
+ OUT_ACCEL_REG(RADEON_PP_CNTL,
+ RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE);
+
+ OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
+ OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
+ (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
+
+ OUT_ACCEL_REG(R200_PP_TXFILTER_0,
+ R200_MAG_FILTER_LINEAR |
+ R200_MIN_FILTER_LINEAR |
+ R200_CLAMP_S_CLAMP_LAST |
+ R200_CLAMP_T_CLAMP_LAST |
+ R200_YUV_TO_RGB);
+ OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
+ OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
+ OUT_ACCEL_REG(R200_PP_TXSIZE_0,
+ (pPriv->w - 1) |
+ ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
+ OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
+
+ OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset);
+
+ OUT_ACCEL_REG(R200_PP_TXCBLEND_0,
+ R200_TXC_ARG_A_ZERO |
+ R200_TXC_ARG_B_ZERO |
+ R200_TXC_ARG_C_R0_COLOR |
+ R200_TXC_OP_MADD);
+ OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
+ R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
+ OUT_ACCEL_REG(R200_PP_TXABLEND_0,
+ R200_TXA_ARG_A_ZERO |
+ R200_TXA_ARG_B_ZERO |
+ R200_TXA_ARG_C_R0_ALPHA |
+ R200_TXA_OP_MADD);
+ OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
+ R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
+ FINISH_ACCEL();
+ }
+
+ if (pPriv->vsync) {
+ xf86CrtcPtr crtc = radeon_xv_pick_best_crtc(pScrn,
+ pPriv->drw_x,
+ pPriv->drw_x + pPriv->dst_w,
+ pPriv->drw_y,
+ pPriv->drw_y + pPriv->dst_h);
+ if (crtc) {
+ RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private;
+
+ FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap,
+ radeon_crtc->crtc_id,
+ pPriv->drw_y - crtc->y,
+ (pPriv->drw_y - crtc->y) + pPriv->dst_h);
}
+ }
+ /*
+ * Rendering of the actual polygon is done in two different
+ * ways depending on chip generation:
+ *
+ * < R300:
+ *
+ * These chips can render a rectangle in one pass, so
+ * handling is pretty straight-forward.
+ *
+ * >= R300:
+ *
+ * These chips can accept a quad, but will render it as
+ * two triangles which results in a diagonal tear. Instead
+ * We render a single, large triangle and use the scissor
+ * functionality to restrict it to the desired rectangle.
+ * Due to guardband limits on r3xx/r4xx, we can only use
+ * the single triangle up to 2880 pixels; above that we
+ * render as a quad.
+ */
- /* These registers define the number, type, and location of data submitted
- * to the PVS unit of GA input (when PVS is disabled)
- * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is
- * enabled. This memory provides the imputs to the vertex shader program
- * and ordering is not important. When PVS/TCL is disabled, this field maps
- * directly to the GA input memory and the order is signifigant. In
- * PVS_BYPASS mode the order is as follows:
- * Position
- * Point Size
- * Color 0-3
- * Textures 0-7
- * Fog
- */
- if (pPriv->bicubic_enabled) {
- OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
- ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
- (0 << R300_SKIP_DWORDS_0_SHIFT) |
- (0 << R300_DST_VEC_LOC_0_SHIFT) |
- R300_SIGNED_0 |
- (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
- (0 << R300_SKIP_DWORDS_1_SHIFT) |
- (6 << R300_DST_VEC_LOC_1_SHIFT) |
- R300_SIGNED_1));
- OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1,
- ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
- (0 << R300_SKIP_DWORDS_2_SHIFT) |
- (7 << R300_DST_VEC_LOC_2_SHIFT) |
- R300_LAST_VEC_2 |
- R300_SIGNED_2));
+ while (nBox--) {
+ int srcX, srcY, srcw, srch;
+ int dstX, dstY, dstw, dsth;
+ dstX = pBox->x1 + dstxoff;
+ dstY = pBox->y1 + dstyoff;
+ dstw = pBox->x2 - pBox->x1;
+ dsth = pBox->y2 - pBox->y1;
+
+ srcX = ((pBox->x1 - pPriv->drw_x) *
+ pPriv->src_w) / pPriv->dst_w;
+ srcY = ((pBox->y1 - pPriv->drw_y) *
+ pPriv->src_h) / pPriv->dst_h;
+
+ srcw = (pPriv->src_w * dstw) / pPriv->dst_w;
+ srch = (pPriv->src_h * dsth) / pPriv->dst_h;
+
+#ifdef ACCEL_CP
+ BEGIN_RING(3 * vtx_count + 2);
+ OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
+ 3 * vtx_count));
+ OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
+ RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+ (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
+#else /* ACCEL_CP */
+ BEGIN_ACCEL(1 + vtx_count * 3);
+ OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
+ RADEON_VF_PRIM_WALK_DATA |
+ (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
+
+#endif
+ if (isplanar) {
+ /*
+ * Just render a rect (using three coords).
+ * Filter is a bit a misnomer, it's just texcoords...
+ */
+ VTX_OUT_FILTER((float)dstX, (float)(dstY + dsth),
+ (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0],
+ (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
+ VTX_OUT_FILTER((float)(dstX + dstw), (float)(dstY + dsth),
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0],
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
+ VTX_OUT_FILTER((float)(dstX + dstw), (float)dstY,
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0],
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
} else {
- OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
- ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
- (0 << R300_SKIP_DWORDS_0_SHIFT) |
- (0 << R300_DST_VEC_LOC_0_SHIFT) |
- R300_SIGNED_0 |
- (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
- (0 << R300_SKIP_DWORDS_1_SHIFT) |
- (6 << R300_DST_VEC_LOC_1_SHIFT) |
- R300_LAST_VEC_1 |
- R300_SIGNED_1));
+ /*
+ * Just render a rect (using three coords).
+ */
+ VTX_OUT((float)dstX, (float)(dstY + dsth),
+ (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
+ VTX_OUT((float)(dstX + dstw), (float)(dstY + dsth),
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
+ VTX_OUT((float)(dstX + dstw), (float)dstY,
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
}
- /* load the vertex shader
- * We pre-load vertex programs in RADEONInit3DEngine():
- * - exa mask/Xv bicubic
- * - exa no mask
- * - Xv
- * Here we select the offset of the vertex program we want to use
- */
- if (info->accel_state->has_tcl) {
- if (pPriv->bicubic_enabled) {
- OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
- ((0 << R300_PVS_FIRST_INST_SHIFT) |
- (2 << R300_PVS_XYZW_VALID_INST_SHIFT) |
- (2 << R300_PVS_LAST_INST_SHIFT)));
- OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
- (2 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
- } else {
- OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
- ((5 << R300_PVS_FIRST_INST_SHIFT) |
- (6 << R300_PVS_XYZW_VALID_INST_SHIFT) |
- (6 << R300_PVS_LAST_INST_SHIFT)));
- OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
- (6 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
- }
+#ifdef ACCEL_CP
+ ADVANCE_RING();
+#else
+ FINISH_ACCEL();
+#endif /* !ACCEL_CP */
+
+ pBox++;
+ }
+
+ BEGIN_ACCEL(1);
+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
+ FINISH_ACCEL();
+
+ DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
+}
+
+static void
+FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ PixmapPtr pPixmap = pPriv->pPixmap;
+ uint32_t txfilter, txformat0, txformat1, txoffset, txpitch;
+ uint32_t dst_offset, dst_pitch, dst_format;
+ uint32_t txenable, colorpitch;
+ uint32_t output_fmt;
+ Bool isplanar = FALSE;
+ int dstxoff, dstyoff, pixel_shift, vtx_count;
+ BoxPtr pBox = REGION_RECTS(&pPriv->clip);
+ int nBox = REGION_NUM_RECTS(&pPriv->clip);
+ ACCEL_PREAMBLE();
+
+ pixel_shift = pPixmap->drawable.bitsPerPixel >> 4;
+
+#ifdef USE_EXA
+ if (info->useEXA) {
+ dst_offset = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset;
+ dst_pitch = exaGetPixmapPitch(pPixmap);
+ } else
+#endif
+ {
+ dst_offset = (pPixmap->devPrivate.ptr - info->FB) +
+ info->fbLocation + pScrn->fbOffset;
+ dst_pitch = pPixmap->devKind;
}
- /* Position and one set of 2 texture coordinates */
- OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
- if (pPriv->bicubic_enabled)
- OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) |
- (2 << R300_TEX_1_COMP_CNT_SHIFT)));
+#ifdef COMPOSITE
+ dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
+ dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
+#else
+ dstxoff = 0;
+ dstyoff = 0;
+#endif
+
+#ifdef USE_EXA
+ if (info->useEXA) {
+ RADEON_SWITCH_TO_3D();
+ } else
+#endif
+ {
+ BEGIN_ACCEL(2);
+ OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
+ /* We must wait for 3d to idle, in case source was just written as a dest. */
+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
+ RADEON_WAIT_HOST_IDLECLEAN |
+ RADEON_WAIT_2D_IDLECLEAN |
+ RADEON_WAIT_3D_IDLECLEAN |
+ RADEON_WAIT_DMA_GUI_IDLE);
+ FINISH_ACCEL();
+
+ if (!info->accel_state->XInited3D)
+ RADEONInit3DEngine(pScrn);
+ }
+
+ if (pPriv->bicubic_enabled)
+ vtx_count = 6;
+ else
+ vtx_count = 4;
+
+ switch (pPixmap->drawable.bitsPerPixel) {
+ case 16:
+ if (pPixmap->drawable.depth == 15)
+ dst_format = R300_COLORFORMAT_ARGB1555;
else
- OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT));
+ dst_format = R300_COLORFORMAT_RGB565;
+ break;
+ case 32:
+ dst_format = R300_COLORFORMAT_ARGB8888;
+ break;
+ default:
+ return;
+ }
- OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt);
- FINISH_ACCEL();
+ output_fmt = (R300_OUT_FMT_C4_8 |
+ R300_OUT_FMT_C0_SEL_BLUE |
+ R300_OUT_FMT_C1_SEL_GREEN |
+ R300_OUT_FMT_C2_SEL_RED |
+ R300_OUT_FMT_C3_SEL_ALPHA);
- /* setup pixel shader */
- if (IS_R300_3D) {
- if (pPriv->bicubic_enabled) {
- BEGIN_ACCEL(79);
-
- /* 4 components: 2 for tex0 and 2 for tex1 */
- OUT_ACCEL_REG(R300_RS_COUNT, ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
- R300_RS_COUNT_HIRES_EN));
-
- /* R300_INST_COUNT_RS - highest RS instruction used */
- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6));
-
- /* Pixel stack frame size. */
- OUT_ACCEL_REG(R300_US_PIXSIZE, 5);
-
- /* Indirection levels */
- OUT_ACCEL_REG(R300_US_CONFIG, ((2 << R300_NLEVEL_SHIFT) |
- R300_FIRST_TEX));
-
- /* Set nodes. */
- OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
- R300_ALU_CODE_SIZE(14) |
- R300_TEX_CODE_OFFSET(0) |
- R300_TEX_CODE_SIZE(6)));
-
- /* Nodes are allocated highest first, but executed lowest first */
- OUT_ACCEL_REG(R300_US_CODE_ADDR_0, 0);
- OUT_ACCEL_REG(R300_US_CODE_ADDR_1, (R300_ALU_START(0) |
- R300_ALU_SIZE(0) |
- R300_TEX_START(0) |
- R300_TEX_SIZE(0)));
- OUT_ACCEL_REG(R300_US_CODE_ADDR_2, (R300_ALU_START(1) |
- R300_ALU_SIZE(9) |
- R300_TEX_START(1) |
- R300_TEX_SIZE(0)));
- OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(11) |
- R300_ALU_SIZE(2) |
- R300_TEX_START(2) |
- R300_TEX_SIZE(3) |
- R300_RGBA_OUT));
-
- /* ** BICUBIC FP ** */
-
- /* texcoord0 => temp0
- * texcoord1 => temp1 */
-
- // first node
- /* TEX temp2, temp1.rrr0, tex1, 1D */
- OUT_ACCEL_REG(R300_US_TEX_INST(0), (R300_TEX_INST(R300_TEX_INST_LD) |
- R300_TEX_ID(1) |
- R300_TEX_SRC_ADDR(1) |
- R300_TEX_DST_ADDR(2)));
-
- /* MOV temp1.r, temp1.ggg0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(1) |
- R300_ALU_RGB_ADDRD(1) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDRD(1) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ colorpitch = dst_pitch >> pixel_shift;
+ colorpitch |= dst_format;
+ if (RADEONTilingEnabled(pScrn, pPixmap))
+ colorpitch |= R300_COLORTILE;
- // second node
- /* TEX temp1, temp1, tex1, 1D */
- OUT_ACCEL_REG(R300_US_TEX_INST(1), (R300_TEX_INST(R300_TEX_INST_LD) |
- R300_TEX_ID(1) |
- R300_TEX_SRC_ADDR(1) |
- R300_TEX_DST_ADDR(1)));
-
- /* MUL temp3.rg, temp2.ggg0, const0.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(2) |
- R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) |
- R300_ALU_RGB_ADDRD(3) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(3) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
+ isplanar = TRUE;
+ }
+ if (isplanar) {
+ txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0;
+ txpitch = pPriv->src_pitch;
+ } else {
+ if (pPriv->id == FOURCC_UYVY)
+ txformat1 = R300_TX_FORMAT_YVYU422;
+ else
+ txformat1 = R300_TX_FORMAT_VYUY422;
- /* MUL temp2.rg, temp2.rrr0, const0.rgb */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(2) |
- R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) |
- R300_ALU_RGB_ADDRD(2) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(2) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
- /* MAD temp4.rg, temp1.ggg0, const1.rgb, temp3.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(1) |
- R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
- R300_ALU_RGB_ADDR2(3) |
- R300_ALU_RGB_ADDRD(4) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(4) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ /* pitch is in pixels */
+ txpitch = pPriv->src_pitch / 2;
+ }
+ txpitch -= 1;
+
+ txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
+ (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
+ R300_TXPITCH_EN);
+
+ info->accel_state->texW[0] = pPriv->w;
+ info->accel_state->texH[0] = pPriv->h;
+
+ txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
+ R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
+ R300_TX_MAG_FILTER_LINEAR |
+ R300_TX_MIN_FILTER_LINEAR |
+ (0 << R300_TX_ID_SHIFT));
+
+ txoffset = pPriv->src_offset;
+
+ BEGIN_ACCEL(6);
+ OUT_ACCEL_REG(R300_TX_FILTER0_0, txfilter);
+ OUT_ACCEL_REG(R300_TX_FILTER1_0, 0);
+ OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0);
+ OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1);
+ OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch);
+ OUT_ACCEL_REG(R300_TX_OFFSET_0, txoffset);
+ FINISH_ACCEL();
- /* MAD temp5.rg, temp1.ggg0, const1.rgb, temp2.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(1) |
- R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
- R300_ALU_RGB_ADDR2(2) |
- R300_ALU_RGB_ADDRD(5) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(5) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ txenable = R300_TEX_0_ENABLE;
- /* MAD temp3.rg, temp1.rrr0, const1.rgb, temp3.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(1) |
- R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
- R300_ALU_RGB_ADDR2(3) |
- R300_ALU_RGB_ADDRD(3) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(3) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ if (isplanar) {
+ txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
+ (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
+ R300_TXPITCH_EN);
+ txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63;
+ txpitch -= 1;
+ txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
+ R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
+ R300_TX_MIN_FILTER_LINEAR |
+ R300_TX_MAG_FILTER_LINEAR);
+
+ BEGIN_ACCEL(12);
+ OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT));
+ OUT_ACCEL_REG(R300_TX_FILTER1_1, 0);
+ OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0);
+ OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2);
+ OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch);
+ OUT_ACCEL_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset);
+ OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT));
+ OUT_ACCEL_REG(R300_TX_FILTER1_2, 0);
+ OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0);
+ OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3);
+ OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch);
+ OUT_ACCEL_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset);
+ FINISH_ACCEL();
+ txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE;
+ }
- /* MAD temp1.rg, temp1.rrr0, const1.rgb, temp2.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(1) |
- R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
- R300_ALU_RGB_ADDR2(2) |
- R300_ALU_RGB_ADDRD(1) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(1) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ if (pPriv->bicubic_enabled) {
+ /* Size is 128x1 */
+ txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) |
+ (0x0 << R300_TXHEIGHT_SHIFT) |
+ R300_TXPITCH_EN);
+ /* Format is 32-bit floats, 4bpp */
+ txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16);
+ /* Pitch is 127 (128-1) */
+ txpitch = 0x7f;
+ /* Tex filter */
+ txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) |
+ R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) |
+ R300_TX_MIN_FILTER_NEAREST |
+ R300_TX_MAG_FILTER_NEAREST |
+ (1 << R300_TX_ID_SHIFT));
- /* ADD temp1.rg, temp0.rgb0, temp1.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR2(1) |
- R300_ALU_RGB_ADDRD(1) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(1) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ BEGIN_ACCEL(6);
+ OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter);
+ OUT_ACCEL_REG(R300_TX_FILTER1_1, 0);
+ OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0);
+ OUT_ACCEL_REG(R300_TX_FORMAT1_1, txformat1);
+ OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch);
+ OUT_ACCEL_REG(R300_TX_OFFSET_1, pPriv->bicubic_src_offset);
+ FINISH_ACCEL();
- /* ADD temp2.rg, temp0.rgb0, temp3.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR2(3) |
- R300_ALU_RGB_ADDRD(2) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(2) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ /* Enable tex 1 */
+ txenable |= R300_TEX_1_ENABLE;
+ }
- /* ADD temp3.rg, temp0.rgb0, temp5.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR2(5) |
- R300_ALU_RGB_ADDRD(3) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(3) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ /* setup the VAP */
+ if (info->accel_state->has_tcl) {
+ if (pPriv->bicubic_enabled)
+ BEGIN_ACCEL(7);
+ else
+ BEGIN_ACCEL(6);
+ } else {
+ if (pPriv->bicubic_enabled)
+ BEGIN_ACCEL(5);
+ else
+ BEGIN_ACCEL(4);
+ }
- /* ADD temp0.rg, temp0.rgb0, temp4.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(10), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(10), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR2(4) |
- R300_ALU_RGB_ADDRD(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(10), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ /* These registers define the number, type, and location of data submitted
+ * to the PVS unit of GA input (when PVS is disabled)
+ * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is
+ * enabled. This memory provides the imputs to the vertex shader program
+ * and ordering is not important. When PVS/TCL is disabled, this field maps
+ * directly to the GA input memory and the order is signifigant. In
+ * PVS_BYPASS mode the order is as follows:
+ * Position
+ * Point Size
+ * Color 0-3
+ * Textures 0-7
+ * Fog
+ */
+ if (pPriv->bicubic_enabled) {
+ OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
+ ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
+ (0 << R300_SKIP_DWORDS_0_SHIFT) |
+ (0 << R300_DST_VEC_LOC_0_SHIFT) |
+ R300_SIGNED_0 |
+ (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
+ (0 << R300_SKIP_DWORDS_1_SHIFT) |
+ (6 << R300_DST_VEC_LOC_1_SHIFT) |
+ R300_SIGNED_1));
+ OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1,
+ ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
+ (0 << R300_SKIP_DWORDS_2_SHIFT) |
+ (7 << R300_DST_VEC_LOC_2_SHIFT) |
+ R300_LAST_VEC_2 |
+ R300_SIGNED_2));
+ } else {
+ OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
+ ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
+ (0 << R300_SKIP_DWORDS_0_SHIFT) |
+ (0 << R300_DST_VEC_LOC_0_SHIFT) |
+ R300_SIGNED_0 |
+ (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
+ (0 << R300_SKIP_DWORDS_1_SHIFT) |
+ (6 << R300_DST_VEC_LOC_1_SHIFT) |
+ R300_LAST_VEC_1 |
+ R300_SIGNED_1));
+ }
+
+ /* load the vertex shader
+ * We pre-load vertex programs in RADEONInit3DEngine():
+ * - exa mask/Xv bicubic
+ * - exa no mask
+ * - Xv
+ * Here we select the offset of the vertex program we want to use
+ */
+ if (info->accel_state->has_tcl) {
+ if (pPriv->bicubic_enabled) {
+ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
+ ((0 << R300_PVS_FIRST_INST_SHIFT) |
+ (2 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+ (2 << R300_PVS_LAST_INST_SHIFT)));
+ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
+ (2 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
+ } else {
+ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
+ ((5 << R300_PVS_FIRST_INST_SHIFT) |
+ (6 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+ (6 << R300_PVS_LAST_INST_SHIFT)));
+ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
+ (6 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
+ }
+ }
+
+ /* Position and one set of 2 texture coordinates */
+ OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
+ if (pPriv->bicubic_enabled)
+ OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) |
+ (2 << R300_TEX_1_COMP_CNT_SHIFT)));
+ else
+ OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT));
+
+ OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt);
+ FINISH_ACCEL();
+
+ /* setup pixel shader */
+ if (pPriv->bicubic_enabled) {
+ BEGIN_ACCEL(79);
+
+ /* 4 components: 2 for tex0 and 2 for tex1 */
+ OUT_ACCEL_REG(R300_RS_COUNT, ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+ R300_RS_COUNT_HIRES_EN));
+
+ /* R300_INST_COUNT_RS - highest RS instruction used */
+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6));
+
+ /* Pixel stack frame size. */
+ OUT_ACCEL_REG(R300_US_PIXSIZE, 5);
+
+ /* Indirection levels */
+ OUT_ACCEL_REG(R300_US_CONFIG, ((2 << R300_NLEVEL_SHIFT) |
+ R300_FIRST_TEX));
+
+ /* Set nodes. */
+ OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
+ R300_ALU_CODE_SIZE(14) |
+ R300_TEX_CODE_OFFSET(0) |
+ R300_TEX_CODE_SIZE(6)));
+
+ /* Nodes are allocated highest first, but executed lowest first */
+ OUT_ACCEL_REG(R300_US_CODE_ADDR_0, 0);
+ OUT_ACCEL_REG(R300_US_CODE_ADDR_1, (R300_ALU_START(0) |
+ R300_ALU_SIZE(0) |
+ R300_TEX_START(0) |
+ R300_TEX_SIZE(0)));
+ OUT_ACCEL_REG(R300_US_CODE_ADDR_2, (R300_ALU_START(1) |
+ R300_ALU_SIZE(9) |
+ R300_TEX_START(1) |
+ R300_TEX_SIZE(0)));
+ OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(11) |
+ R300_ALU_SIZE(2) |
+ R300_TEX_START(2) |
+ R300_TEX_SIZE(3) |
+ R300_RGBA_OUT));
+
+ /* ** BICUBIC FP ** */
+
+ /* texcoord0 => temp0
+ * texcoord1 => temp1 */
+
+ // first node
+ /* TEX temp2, temp1.rrr0, tex1, 1D */
+ OUT_ACCEL_REG(R300_US_TEX_INST(0), (R300_TEX_INST(R300_TEX_INST_LD) |
+ R300_TEX_ID(1) |
+ R300_TEX_SRC_ADDR(1) |
+ R300_TEX_DST_ADDR(2)));
+
+ /* MOV temp1.r, temp1.ggg0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(1) |
+ R300_ALU_RGB_ADDRD(1) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDRD(1) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+
+ // second node
+ /* TEX temp1, temp1, tex1, 1D */
+ OUT_ACCEL_REG(R300_US_TEX_INST(1), (R300_TEX_INST(R300_TEX_INST_LD) |
+ R300_TEX_ID(1) |
+ R300_TEX_SRC_ADDR(1) |
+ R300_TEX_DST_ADDR(1)));
+
+ /* MUL temp3.rg, temp2.ggg0, const0.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(2) |
+ R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) |
+ R300_ALU_RGB_ADDRD(3) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(3) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+
+ /* MUL temp2.rg, temp2.rrr0, const0.rgb */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(2) |
+ R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) |
+ R300_ALU_RGB_ADDRD(2) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(2) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* MAD temp4.rg, temp1.ggg0, const1.rgb, temp3.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(1) |
+ R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
+ R300_ALU_RGB_ADDR2(3) |
+ R300_ALU_RGB_ADDRD(4) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(4) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* MAD temp5.rg, temp1.ggg0, const1.rgb, temp2.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(1) |
+ R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
+ R300_ALU_RGB_ADDR2(2) |
+ R300_ALU_RGB_ADDRD(5) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(5) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* MAD temp3.rg, temp1.rrr0, const1.rgb, temp3.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(1) |
+ R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
+ R300_ALU_RGB_ADDR2(3) |
+ R300_ALU_RGB_ADDRD(3) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(3) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* MAD temp1.rg, temp1.rrr0, const1.rgb, temp2.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(1) |
+ R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
+ R300_ALU_RGB_ADDR2(2) |
+ R300_ALU_RGB_ADDRD(1) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(1) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* ADD temp1.rg, temp0.rgb0, temp1.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR2(1) |
+ R300_ALU_RGB_ADDRD(1) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(1) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* ADD temp2.rg, temp0.rgb0, temp3.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR2(3) |
+ R300_ALU_RGB_ADDRD(2) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(2) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* ADD temp3.rg, temp0.rgb0, temp5.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR2(5) |
+ R300_ALU_RGB_ADDRD(3) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(3) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* ADD temp0.rg, temp0.rgb0, temp4.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(10), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(10), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR2(4) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(10), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(10), (R300_ALU_ALPHA_ADDRD(0) |
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(10), (R300_ALU_ALPHA_ADDRD(0) |
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- // third node
- /* TEX temp4, temp1.rg--, tex0, 1D */
- OUT_ACCEL_REG(R300_US_TEX_INST(2), (R300_TEX_INST(R300_TEX_INST_LD) |
- R300_TEX_ID(0) |
- R300_TEX_SRC_ADDR(1) |
- R300_TEX_DST_ADDR(4)));
-
- /* TEX temp3, temp3.rg--, tex0, 1D */
- OUT_ACCEL_REG(R300_US_TEX_INST(3), (R300_TEX_INST(R300_TEX_INST_LD) |
- R300_TEX_ID(0) |
- R300_TEX_SRC_ADDR(3) |
- R300_TEX_DST_ADDR(3)));
-
- /* TEX temp5, temp2.rg--, tex0, 1D */
- OUT_ACCEL_REG(R300_US_TEX_INST(4), (R300_TEX_INST(R300_TEX_INST_LD) |
- R300_TEX_ID(0) |
- R300_TEX_SRC_ADDR(2) |
- R300_TEX_DST_ADDR(5)));
-
- /* TEX temp0, temp0.rg--, tex0, 1D */
- OUT_ACCEL_REG(R300_US_TEX_INST(5), (R300_TEX_INST(R300_TEX_INST_LD) |
- R300_TEX_ID(0) |
- R300_TEX_SRC_ADDR(0) |
- R300_TEX_DST_ADDR(0)));
-
- /* LRP temp3, temp1.bbbb, temp4, temp3 ->
- * - PRESUB temps, temp4 - temp3
- * - MAD temp3, temp1.bbbb, temps, temp3 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(11), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(11), (R300_ALU_RGB_ADDR0(3) |
- R300_ALU_RGB_ADDR1(4) |
- R300_ALU_RGB_ADDR2(1) |
- R300_ALU_RGB_ADDRD(3) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(11), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ // third node
+ /* TEX temp4, temp1.rg--, tex0, 1D */
+ OUT_ACCEL_REG(R300_US_TEX_INST(2), (R300_TEX_INST(R300_TEX_INST_LD) |
+ R300_TEX_ID(0) |
+ R300_TEX_SRC_ADDR(1) |
+ R300_TEX_DST_ADDR(4)));
+
+ /* TEX temp3, temp3.rg--, tex0, 1D */
+ OUT_ACCEL_REG(R300_US_TEX_INST(3), (R300_TEX_INST(R300_TEX_INST_LD) |
+ R300_TEX_ID(0) |
+ R300_TEX_SRC_ADDR(3) |
+ R300_TEX_DST_ADDR(3)));
+
+ /* TEX temp5, temp2.rg--, tex0, 1D */
+ OUT_ACCEL_REG(R300_US_TEX_INST(4), (R300_TEX_INST(R300_TEX_INST_LD) |
+ R300_TEX_ID(0) |
+ R300_TEX_SRC_ADDR(2) |
+ R300_TEX_DST_ADDR(5)));
+
+ /* TEX temp0, temp0.rg--, tex0, 1D */
+ OUT_ACCEL_REG(R300_US_TEX_INST(5), (R300_TEX_INST(R300_TEX_INST_LD) |
+ R300_TEX_ID(0) |
+ R300_TEX_SRC_ADDR(0) |
+ R300_TEX_DST_ADDR(0)));
+
+ /* LRP temp3, temp1.bbbb, temp4, temp3 ->
+ * - PRESUB temps, temp4 - temp3
+ * - MAD temp3, temp1.bbbb, temps, temp3 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(11), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(11), (R300_ALU_RGB_ADDR0(3) |
+ R300_ALU_RGB_ADDR1(4) |
+ R300_ALU_RGB_ADDR2(1) |
+ R300_ALU_RGB_ADDRD(3) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(11), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) |
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) |
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(11), (R300_ALU_ALPHA_ADDR0(3) |
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(11), (R300_ALU_ALPHA_ADDR0(3) |
R300_ALU_ALPHA_ADDR1(4) |
R300_ALU_ALPHA_ADDR2(1) |
R300_ALU_ALPHA_ADDRD(3) |
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A)));
- /* LRP temp0, temp1.bbbb, temp5, temp0 ->
- * - PRESUB temps, temp5 - temp0
- * - MAD temp0, temp1.bbbb, temps, temp0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(12), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0) |
- R300_ALU_RGB_INSERT_NOP));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(12), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR1(5) |
- R300_ALU_RGB_ADDR2(1) |
- R300_ALU_RGB_ADDRD(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(12), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ /* LRP temp0, temp1.bbbb, temp5, temp0 ->
+ * - PRESUB temps, temp5 - temp0
+ * - MAD temp0, temp1.bbbb, temps, temp0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(12), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0) |
+ R300_ALU_RGB_INSERT_NOP));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(12), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR1(5) |
+ R300_ALU_RGB_ADDR2(1) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(12), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) |
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) |
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(12), (R300_ALU_ALPHA_ADDR0(0) |
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(12), (R300_ALU_ALPHA_ADDR0(0) |
R300_ALU_ALPHA_ADDR1(5) |
R300_ALU_ALPHA_ADDR2(1) |
R300_ALU_ALPHA_ADDRD(0) |
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A)));
- /* LRP output, temp2.bbbb, temp3, temp0 ->
- * - PRESUB temps, temp3 - temp0
- * - MAD output, temp2.bbbb, temps, temp0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(13), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(13), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR1(3) |
- R300_ALU_RGB_ADDR2(2) |
- R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(13), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ /* LRP output, temp2.bbbb, temp3, temp0 ->
+ * - PRESUB temps, temp3 - temp0
+ * - MAD output, temp2.bbbb, temps, temp0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(13), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(13), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR1(3) |
+ R300_ALU_RGB_ADDR2(2) |
+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(13), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) |
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) |
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(13), (R300_ALU_ALPHA_ADDR0(0) |
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(13), (R300_ALU_ALPHA_ADDR0(0) |
R300_ALU_ALPHA_ADDR1(3) |
R300_ALU_ALPHA_ADDR2(2) |
R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A)));
- /* Shader constants. */
- OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(1.0/(float)pPriv->w));
- OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), 0);
- OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), 0);
- OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), 0);
-
- OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), 0);
- OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(1.0/(float)pPriv->h));
- OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), 0);
- OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), 0);
-
- FINISH_ACCEL();
- } else if (isplanar) {
- /*
- * y' = y - .0625
- * u' = u - .5
- * v' = v - .5;
- *
- * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v'
- * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
- * b = 1.1643 * y' + 2.017 * u' + 0.0 * v'
- *
- * DP3 might look like the straightforward solution
- * but we'd need to move the texture yuv values in
- * the same reg for this to work. Therefore use MADs.
- * Brightness just adds to the off constant.
- * Contrast is multiplication of luminance.
- * Saturation and hue change the u and v coeffs.
- * Default values (before adjustments - depend on colorspace):
- * yco = 1.1643
- * uco = 0, -0.39173, 2.017
- * vco = 1.5958, -0.8129, 0
- * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
- * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
- * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
- *
- * temp = MAD(yco, yuv.yyyy, off)
- * temp = MAD(uco, yuv.uuuu, temp)
- * result = MAD(vco, yuv.vvvv, temp)
- */
- /* TODO: don't recalc consts always */
- const float Loff = -0.0627;
- const float Coff = -0.502;
- float uvcosf, uvsinf;
- float yco;
- float uco[3], vco[3], off[3];
- float bright, cont, gamma;
- int ref = pPriv->transform_index;
- Bool needgamma = FALSE;
-
- cont = RTFContrast(pPriv->contrast);
- bright = RTFBrightness(pPriv->brightness);
- gamma = (float)pPriv->gamma / 1000.0;
- uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue));
- uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue));
- /* overlay video also does pre-gamma contrast/sat adjust, should we? */
-
- yco = trans[ref].RefLuma * cont;
- uco[0] = -trans[ref].RefRCr * uvsinf;
- uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
- uco[2] = trans[ref].RefBCb * uvcosf;
- vco[0] = trans[ref].RefRCr * uvcosf;
- vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
- vco[2] = trans[ref].RefBCb * uvsinf;
- off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright;
- off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright;
- off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright;
-
- if (gamma != 1.0) {
- needgamma = TRUE;
- /* note: gamma correction is out = in ^ gamma;
- gpu can only do LG2/EX2 therefore we transform into
- in ^ gamma = 2 ^ (log2(in) * gamma).
- Lots of scalar ops, unfortunately (better solution?) -
- without gamma that's 3 inst, with gamma it's 10...
- could use different gamma factors per channel,
- if that's of any use. */
- }
-
- BEGIN_ACCEL(needgamma ? 28 + 33 : 33);
- /* 2 components: same 2 for tex0/1/2 */
- OUT_ACCEL_REG(R300_RS_COUNT,
- ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
- R300_RS_COUNT_HIRES_EN));
- /* R300_INST_COUNT_RS - highest RS instruction used */
- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
-
- OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */
-
- /* Indirection levels */
- OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
- R300_FIRST_TEX));
-
- OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
- R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) |
- R300_TEX_CODE_OFFSET(0) |
- R300_TEX_CODE_SIZE(3)));
-
- OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
- R300_ALU_SIZE(needgamma ? 7 + 2 : 2) |
- R300_TEX_START(0) |
- R300_TEX_SIZE(2) |
- R300_RGBA_OUT));
-
- /* tex inst */
- OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
- R300_TEX_DST_ADDR(0) |
- R300_TEX_ID(0) |
- R300_TEX_INST(R300_TEX_INST_LD)));
- OUT_ACCEL_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) |
- R300_TEX_DST_ADDR(1) |
- R300_TEX_ID(1) |
- R300_TEX_INST(R300_TEX_INST_LD)));
- OUT_ACCEL_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) |
- R300_TEX_DST_ADDR(2) |
- R300_TEX_ID(2) |
- R300_TEX_INST(R300_TEX_INST_LD)));
-
- /* ALU inst */
- /* MAD temp0, const0.a, temp0, const0.rgb */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) |
- R300_ALU_RGB_ADDR1(0) |
- R300_ALU_RGB_ADDR2(0) |
- R300_ALU_RGB_ADDRD(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) |
- R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
- /* alpha nop, but need to set up alpha source for rgb usage */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) |
- R300_ALU_ALPHA_ADDR1(0) |
- R300_ALU_ALPHA_ADDR2(0) |
- R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
-
- /* MAD const1, temp1, temp0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) |
- R300_ALU_RGB_ADDR1(1) |
- R300_ALU_RGB_ADDR2(0) |
- R300_ALU_RGB_ADDRD(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
- R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
- /* alpha nop */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
-
- /* MAD result, const2, temp2, temp0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) |
- R300_ALU_RGB_ADDR1(2) |
- R300_ALU_RGB_ADDR2(0) |
- R300_ALU_RGB_ADDRD(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) |
- (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
- R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
- R300_ALU_RGB_CLAMP));
- /* write alpha 1 */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
- R300_ALU_ALPHA_TARGET_A));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0)));
-
- if (needgamma) {
- /* rgb temp0.r = op_sop, set up src0 reg */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3),
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
- /* alpha lg2 temp0, temp0.r */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
-
- /* rgb temp0.g = op_sop, set up src0 reg */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4),
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
- /* alpha lg2 temp0, temp0.g */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ /* Shader constants. */
+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(1.0/(float)pPriv->w));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), 0);
+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), 0);
+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), 0);
- /* rgb temp0.b = op_sop, set up src0 reg */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5),
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
- /* alpha lg2 temp0, temp0.b */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), 0);
+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(1.0/(float)pPriv->h));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), 0);
+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), 0);
- /* MUL const1, temp1, temp0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR1(0) |
- R300_ALU_RGB_ADDR2(0) |
- R300_ALU_RGB_ADDRD(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) |
- R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
- R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
- /* alpha nop, but set up const1 */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
-
- /* rgb out0.r = op_sop, set up src0 reg */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) |
- R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7),
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
- /* alpha ex2 temp0, temp0.r */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
-
- /* rgb out0.g = op_sop, set up src0 reg */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) |
- R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8),
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
- /* alpha ex2 temp0, temp0.g */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
-
- /* rgb out0.b = op_sop, set up src0 reg */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) |
- R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9),
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
- /* alpha ex2 temp0, temp0.b */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- }
-
- /* Shader constants. */
- /* constant 0: off, yco */
- OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), F_TO_24(off[1]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), F_TO_24(off[2]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), F_TO_24(yco));
- /* constant 1: uco */
- OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(gamma));
- /* constant 2: vco */
- OUT_ACCEL_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_B(2), F_TO_24(vco[2]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0));
-
- FINISH_ACCEL();
-
- } else {
- BEGIN_ACCEL(11);
- /* 2 components: 2 for tex0 */
- OUT_ACCEL_REG(R300_RS_COUNT,
- ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
- R300_RS_COUNT_HIRES_EN));
- /* R300_INST_COUNT_RS - highest RS instruction used */
- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
-
- OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */
-
- /* Indirection levels */
- OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
- R300_FIRST_TEX));
-
- OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
- R300_ALU_CODE_SIZE(1) |
- R300_TEX_CODE_OFFSET(0) |
- R300_TEX_CODE_SIZE(1)));
-
- OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
- R300_ALU_SIZE(0) |
- R300_TEX_START(0) |
- R300_TEX_SIZE(0) |
- R300_RGBA_OUT));
-
- /* tex inst */
- OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
- R300_TEX_DST_ADDR(0) |
- R300_TEX_ID(0) |
- R300_TEX_INST(R300_TEX_INST_LD)));
-
- /* ALU inst */
- /* RGB */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR_0, (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR1(0) |
- R300_ALU_RGB_ADDR2(0) |
- R300_ALU_RGB_ADDRD(0) |
- R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R |
- R300_ALU_RGB_MASK_G |
- R300_ALU_RGB_MASK_B)) |
- R300_ALU_RGB_TARGET_A));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST_0, (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
- R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
- R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
- R300_ALU_RGB_CLAMP));
- /* Alpha */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR_0, (R300_ALU_ALPHA_ADDR0(0) |
- R300_ALU_ALPHA_ADDR1(0) |
- R300_ALU_ALPHA_ADDR2(0) |
- R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
- R300_ALU_ALPHA_TARGET_A |
- R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST_0, (R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) |
- R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) |
- R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) |
- R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) |
- R300_ALU_ALPHA_CLAMP));
- FINISH_ACCEL();
- }
- } else {
- if (pPriv->bicubic_enabled) {
- BEGIN_ACCEL(7);
-
- /* 4 components: 2 for tex0 and 2 for tex1 */
- OUT_ACCEL_REG(R300_RS_COUNT,
- ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
- R300_RS_COUNT_HIRES_EN));
-
- /* R300_INST_COUNT_RS - highest RS instruction used */
- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6));
-
- /* Pixel stack frame size. */
- OUT_ACCEL_REG(R300_US_PIXSIZE, 5);
-
- /* FP length. */
- OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
- R500_US_CODE_END_ADDR(13)));
- OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
- R500_US_CODE_RANGE_SIZE(13)));
-
- /* Prepare for FP emission. */
- OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
- FINISH_ACCEL();
-
- BEGIN_ACCEL(89);
- /* Pixel shader.
- * I've gone ahead and annotated each instruction, since this
- * thing is MASSIVE. :3
- * Note: In order to avoid buggies with temps and multiple
- * inputs, all temps are offset by 2. temp0 -> register2. */
-
- /* TEX temp2, input1.xxxx, tex1, 1D */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
- R500_TEX_INST_LD |
- R500_TEX_IGNORE_UNCOVERED));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
- R500_TEX_SRC_S_SWIZ_R |
- R500_TEX_SRC_T_SWIZ_R |
- R500_TEX_SRC_R_SWIZ_R |
- R500_TEX_SRC_Q_SWIZ_R |
- R500_TEX_DST_ADDR(2) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* TEX temp5, input1.yyyy, tex1, 1D */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
- R500_TEX_INST_LD |
- R500_TEX_SEM_ACQUIRE |
- R500_TEX_IGNORE_UNCOVERED));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
- R500_TEX_SRC_S_SWIZ_G |
- R500_TEX_SRC_T_SWIZ_G |
- R500_TEX_SRC_R_SWIZ_G |
- R500_TEX_SRC_Q_SWIZ_G |
- R500_TEX_DST_ADDR(5) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* MUL temp4, const0.x0x0, temp2.yyxx */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
- R500_RGB_ADDR0_CONST |
- R500_RGB_ADDR1(2)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
- R500_ALPHA_ADDR0_CONST |
- R500_ALPHA_ADDR1(2)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
- R500_ALU_RGB_R_SWIZ_A_R |
- R500_ALU_RGB_G_SWIZ_A_0 |
- R500_ALU_RGB_B_SWIZ_A_R |
- R500_ALU_RGB_SEL_B_SRC1 |
- R500_ALU_RGB_R_SWIZ_B_G |
- R500_ALU_RGB_G_SWIZ_B_G |
- R500_ALU_RGB_B_SWIZ_B_R));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) |
- R500_ALPHA_OP_MAD |
- R500_ALPHA_SEL_A_SRC0 |
- R500_ALPHA_SWIZ_A_0 |
- R500_ALPHA_SEL_B_SRC1 |
- R500_ALPHA_SWIZ_B_R));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) |
- R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_R_SWIZ_0 |
- R500_ALU_RGBA_G_SWIZ_0 |
- R500_ALU_RGBA_B_SWIZ_0 |
- R500_ALU_RGBA_A_SWIZ_0));
-
- /* MAD temp3, const0.0y0y, temp5.xxxx, temp4 */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
- R500_RGB_ADDR0_CONST |
- R500_RGB_ADDR1(5) |
- R500_RGB_ADDR2(4)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
- R500_ALPHA_ADDR0_CONST |
- R500_ALPHA_ADDR1(5) |
- R500_ALPHA_ADDR2(4)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
- R500_ALU_RGB_R_SWIZ_A_0 |
- R500_ALU_RGB_G_SWIZ_A_G |
- R500_ALU_RGB_B_SWIZ_A_0 |
- R500_ALU_RGB_SEL_B_SRC1 |
- R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_G_SWIZ_B_R |
- R500_ALU_RGB_B_SWIZ_B_R));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) |
- R500_ALPHA_OP_MAD |
- R500_ALPHA_SEL_A_SRC0 |
- R500_ALPHA_SWIZ_A_G |
- R500_ALPHA_SEL_B_SRC1 |
- R500_ALPHA_SWIZ_B_R));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) |
- R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_SEL_C_SRC2 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_B |
- R500_ALU_RGBA_A_SWIZ_A));
-
- /* ADD temp3, temp3, input0.xyxy */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(3) |
- R500_RGB_ADDR2(0)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(3) |
- R500_ALPHA_ADDR2(0)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 |
- R500_ALU_RGB_G_SWIZ_A_1 |
- R500_ALU_RGB_B_SWIZ_A_1 |
- R500_ALU_RGB_SEL_B_SRC1 |
- R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_G_SWIZ_B_G |
- R500_ALU_RGB_B_SWIZ_B_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) |
- R500_ALPHA_OP_MAD |
- R500_ALPHA_SWIZ_A_1 |
- R500_ALPHA_SEL_B_SRC1 |
- R500_ALPHA_SWIZ_B_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) |
- R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_SEL_C_SRC2 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_R |
- R500_ALU_RGBA_A_SWIZ_G));
-
- /* TEX temp1, temp3.zwxy, tex0, 2D */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
- R500_TEX_INST_LD |
- R500_TEX_IGNORE_UNCOVERED));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) |
- R500_TEX_SRC_S_SWIZ_B |
- R500_TEX_SRC_T_SWIZ_A |
- R500_TEX_SRC_R_SWIZ_R |
- R500_TEX_SRC_Q_SWIZ_G |
- R500_TEX_DST_ADDR(1) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* TEX temp3, temp3.xyzw, tex0, 2D */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
- R500_TEX_INST_LD |
- R500_TEX_SEM_ACQUIRE |
- R500_TEX_IGNORE_UNCOVERED));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) |
- R500_TEX_SRC_S_SWIZ_R |
- R500_TEX_SRC_T_SWIZ_G |
- R500_TEX_SRC_R_SWIZ_B |
- R500_TEX_SRC_Q_SWIZ_A |
- R500_TEX_DST_ADDR(3) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* MAD temp4, const0.0y0y, temp5.yyyy, temp4 */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
- R500_RGB_ADDR0_CONST |
- R500_RGB_ADDR1(5) |
- R500_RGB_ADDR2(4)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
- R500_ALPHA_ADDR0_CONST |
- R500_ALPHA_ADDR1(5) |
- R500_ALPHA_ADDR2(4)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
- R500_ALU_RGB_R_SWIZ_A_0 |
- R500_ALU_RGB_G_SWIZ_A_G |
- R500_ALU_RGB_B_SWIZ_A_0 |
- R500_ALU_RGB_SEL_B_SRC1 |
- R500_ALU_RGB_R_SWIZ_B_G |
- R500_ALU_RGB_G_SWIZ_B_G |
- R500_ALU_RGB_B_SWIZ_B_G));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) |
- R500_ALPHA_OP_MAD |
- R500_ALPHA_SEL_A_SRC0 |
- R500_ALPHA_SWIZ_A_G |
- R500_ALPHA_SEL_B_SRC1 |
- R500_ALPHA_SWIZ_B_G));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) |
- R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_SEL_C_SRC2 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_B |
- R500_ALU_RGBA_A_SWIZ_A));
-
- /* ADD temp0, temp4, input0.xyxy */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(4) |
- R500_RGB_ADDR2(0)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(4) |
- R500_ALPHA_ADDR2(0)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 |
- R500_ALU_RGB_G_SWIZ_A_1 |
- R500_ALU_RGB_B_SWIZ_A_1 |
- R500_ALU_RGB_SEL_B_SRC1 |
- R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_G_SWIZ_B_G |
- R500_ALU_RGB_B_SWIZ_B_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) |
- R500_ALPHA_OP_MAD |
- R500_ALPHA_SWIZ_A_1 |
- R500_ALPHA_SEL_B_SRC1 |
- R500_ALPHA_SWIZ_B_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) |
- R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_SEL_C_SRC2 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_R |
- R500_ALU_RGBA_A_SWIZ_G));
-
- /* TEX temp4, temp0.zwzw, tex0, 2D */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
- R500_TEX_INST_LD |
- R500_TEX_IGNORE_UNCOVERED));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
- R500_TEX_SRC_S_SWIZ_B |
- R500_TEX_SRC_T_SWIZ_A |
- R500_TEX_SRC_R_SWIZ_B |
- R500_TEX_SRC_Q_SWIZ_A |
- R500_TEX_DST_ADDR(4) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* TEX temp0, temp0.xyzw, tex0, 2D */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
- R500_TEX_INST_LD |
- R500_TEX_SEM_ACQUIRE |
- R500_TEX_IGNORE_UNCOVERED));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
- R500_TEX_SRC_S_SWIZ_R |
- R500_TEX_SRC_T_SWIZ_G |
- R500_TEX_SRC_R_SWIZ_B |
- R500_TEX_SRC_Q_SWIZ_A |
- R500_TEX_DST_ADDR(0) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* LRP temp3, temp2.zzzz, temp1, temp3 ->
- * - PRESUB temps, temp1 - temp3
- * - MAD temp2.zzzz, temps, temp3 */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(3) |
- R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 |
- R500_RGB_ADDR1(1) |
- R500_RGB_ADDR2(2)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(3) |
- R500_ALPHA_SRCP_OP_A1_MINUS_A0 |
- R500_ALPHA_ADDR1(1) |
- R500_ALPHA_ADDR2(2)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 |
- R500_ALU_RGB_R_SWIZ_A_B |
- R500_ALU_RGB_G_SWIZ_A_B |
- R500_ALU_RGB_B_SWIZ_A_B |
- R500_ALU_RGB_SEL_B_SRCP |
- R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_G_SWIZ_B_G |
- R500_ALU_RGB_B_SWIZ_B_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) |
- R500_ALPHA_OP_MAD |
- R500_ALPHA_SEL_A_SRC2 |
- R500_ALPHA_SWIZ_A_B |
- R500_ALPHA_SEL_B_SRCP |
- R500_ALPHA_SWIZ_B_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) |
- R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_SEL_C_SRC0 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_B |
- R500_ALU_RGBA_A_SWIZ_A));
-
- /* LRP temp0, temp2.zzzz, temp4, temp0 ->
- * - PRESUB temps, temp4 - temp1
- * - MAD temp2.zzzz, temps, temp0 */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
- R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 |
- R500_RGB_ADDR1(4) |
- R500_RGB_ADDR2(2)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
- R500_ALPHA_SRCP_OP_A1_MINUS_A0 |
- R500_ALPHA_ADDR1(4) |
- R500_ALPHA_ADDR2(2)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 |
- R500_ALU_RGB_R_SWIZ_A_B |
- R500_ALU_RGB_G_SWIZ_A_B |
- R500_ALU_RGB_B_SWIZ_A_B |
- R500_ALU_RGB_SEL_B_SRCP |
- R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_G_SWIZ_B_G |
- R500_ALU_RGB_B_SWIZ_B_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) |
- R500_ALPHA_OP_MAD |
- R500_ALPHA_SEL_A_SRC2 |
- R500_ALPHA_SWIZ_A_B |
- R500_ALPHA_SEL_B_SRCP |
- R500_ALPHA_SWIZ_B_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) |
- R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_SEL_C_SRC0 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_B |
- R500_ALU_RGBA_A_SWIZ_A));
-
- /* LRP output, temp5.zzzz, temp3, temp0 ->
- * - PRESUB temps, temp3 - temp0
- * - MAD temp5.zzzz, temps, temp0 */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
- R500_INST_LAST |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK |
- R500_INST_RGB_OMASK_R |
- R500_INST_RGB_OMASK_G |
- R500_INST_RGB_OMASK_B |
- R500_INST_ALPHA_OMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
- R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 |
- R500_RGB_ADDR1(3) |
- R500_RGB_ADDR2(5)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
- R500_ALPHA_SRCP_OP_A1_MINUS_A0 |
- R500_ALPHA_ADDR1(3) |
- R500_ALPHA_ADDR2(5)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 |
- R500_ALU_RGB_R_SWIZ_A_B |
- R500_ALU_RGB_G_SWIZ_A_B |
- R500_ALU_RGB_B_SWIZ_A_B |
- R500_ALU_RGB_SEL_B_SRCP |
- R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_G_SWIZ_B_G |
- R500_ALU_RGB_B_SWIZ_B_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) |
- R500_ALPHA_OP_MAD |
- R500_ALPHA_SEL_A_SRC2 |
- R500_ALPHA_SWIZ_A_B |
- R500_ALPHA_SEL_B_SRCP |
- R500_ALPHA_SWIZ_B_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) |
- R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_SEL_C_SRC0 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_B |
- R500_ALU_RGBA_A_SWIZ_A));
-
- /* Shader constants. */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0));
-
- /* const0 = {1 / texture[0].width, 1 / texture[0].height, 0, 0} */
- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->w));
- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->h));
- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0);
- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0);
-
- FINISH_ACCEL();
-
- } else {
- BEGIN_ACCEL(19);
- /* 2 components: 2 for tex0 */
- OUT_ACCEL_REG(R300_RS_COUNT,
- ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
- R300_RS_COUNT_HIRES_EN));
-
- /* R300_INST_COUNT_RS - highest RS instruction used */
- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
-
- /* Pixel stack frame size. */
- OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */
-
- /* FP length. */
- OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
- R500_US_CODE_END_ADDR(1)));
- OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
- R500_US_CODE_RANGE_SIZE(1)));
-
- /* Prepare for FP emission. */
- OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
-
- /* tex inst */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK |
- R500_INST_RGB_CLAMP |
- R500_INST_ALPHA_CLAMP));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
- R500_TEX_INST_LD |
- R500_TEX_SEM_ACQUIRE |
- R500_TEX_IGNORE_UNCOVERED));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
- R500_TEX_SRC_S_SWIZ_R |
- R500_TEX_SRC_T_SWIZ_G |
- R500_TEX_DST_ADDR(0) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
- R500_DX_S_SWIZ_R |
- R500_DX_T_SWIZ_R |
- R500_DX_R_SWIZ_R |
- R500_DX_Q_SWIZ_R |
- R500_DY_ADDR(0) |
- R500_DY_S_SWIZ_R |
- R500_DY_T_SWIZ_R |
- R500_DY_R_SWIZ_R |
- R500_DY_Q_SWIZ_R));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* ALU inst */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_LAST |
- R500_INST_RGB_OMASK_R |
- R500_INST_RGB_OMASK_G |
- R500_INST_RGB_OMASK_B |
- R500_INST_ALPHA_OMASK |
- R500_INST_RGB_CLAMP |
- R500_INST_ALPHA_CLAMP));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
- R500_RGB_ADDR1(0) |
- R500_RGB_ADDR1_CONST |
- R500_RGB_ADDR2(0) |
- R500_RGB_ADDR2_CONST));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
- R500_ALPHA_ADDR1(0) |
- R500_ALPHA_ADDR1_CONST |
- R500_ALPHA_ADDR2(0) |
- R500_ALPHA_ADDR2_CONST));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
- R500_ALU_RGB_R_SWIZ_A_R |
- R500_ALU_RGB_G_SWIZ_A_G |
- R500_ALU_RGB_B_SWIZ_A_B |
- R500_ALU_RGB_SEL_B_SRC0 |
- R500_ALU_RGB_R_SWIZ_B_1 |
- R500_ALU_RGB_B_SWIZ_B_1 |
- R500_ALU_RGB_G_SWIZ_B_1));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
- R500_ALPHA_SWIZ_A_A |
- R500_ALPHA_SWIZ_B_1));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_R_SWIZ_0 |
- R500_ALU_RGBA_G_SWIZ_0 |
- R500_ALU_RGBA_B_SWIZ_0 |
- R500_ALU_RGBA_A_SWIZ_0));
- FINISH_ACCEL();
- }
- }
-
- BEGIN_ACCEL(6);
- OUT_ACCEL_REG(R300_TX_INVALTAGS, 0);
- OUT_ACCEL_REG(R300_TX_ENABLE, txenable);
-
- OUT_ACCEL_REG(R300_RB3D_COLOROFFSET0, dst_offset);
- OUT_ACCEL_REG(R300_RB3D_COLORPITCH0, colorpitch);
-
- blendcntl = RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO;
- /* no need to enable blending */
- OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, blendcntl);
-
- OUT_ACCEL_REG(R300_VAP_VTX_SIZE, vtx_count);
FINISH_ACCEL();
-
- } else {
-
- /* Same for R100/R200 */
- switch (pPixmap->drawable.bitsPerPixel) {
- case 16:
- if (pPixmap->drawable.depth == 15)
- dst_format = RADEON_COLOR_FORMAT_ARGB1555;
- else
- dst_format = RADEON_COLOR_FORMAT_RGB565;
- break;
- case 32:
- dst_format = RADEON_COLOR_FORMAT_ARGB8888;
- break;
- default:
- return;
- }
-
- if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
- isplanar = TRUE;
+ } else if (isplanar) {
+ /*
+ * y' = y - .0625
+ * u' = u - .5
+ * v' = v - .5;
+ *
+ * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v'
+ * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
+ * b = 1.1643 * y' + 2.017 * u' + 0.0 * v'
+ *
+ * DP3 might look like the straightforward solution
+ * but we'd need to move the texture yuv values in
+ * the same reg for this to work. Therefore use MADs.
+ * Brightness just adds to the off constant.
+ * Contrast is multiplication of luminance.
+ * Saturation and hue change the u and v coeffs.
+ * Default values (before adjustments - depend on colorspace):
+ * yco = 1.1643
+ * uco = 0, -0.39173, 2.017
+ * vco = 1.5958, -0.8129, 0
+ * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
+ * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
+ * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
+ *
+ * temp = MAD(yco, yuv.yyyy, off)
+ * temp = MAD(uco, yuv.uuuu, temp)
+ * result = MAD(vco, yuv.vvvv, temp)
+ */
+ /* TODO: don't recalc consts always */
+ const float Loff = -0.0627;
+ const float Coff = -0.502;
+ float uvcosf, uvsinf;
+ float yco;
+ float uco[3], vco[3], off[3];
+ float bright, cont, gamma;
+ int ref = pPriv->transform_index;
+ Bool needgamma = FALSE;
+
+ cont = RTFContrast(pPriv->contrast);
+ bright = RTFBrightness(pPriv->brightness);
+ gamma = (float)pPriv->gamma / 1000.0;
+ uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue));
+ uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue));
+ /* overlay video also does pre-gamma contrast/sat adjust, should we? */
+
+ yco = trans[ref].RefLuma * cont;
+ uco[0] = -trans[ref].RefRCr * uvsinf;
+ uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
+ uco[2] = trans[ref].RefBCb * uvcosf;
+ vco[0] = trans[ref].RefRCr * uvcosf;
+ vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
+ vco[2] = trans[ref].RefBCb * uvsinf;
+ off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright;
+ off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright;
+ off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright;
+
+ if (gamma != 1.0) {
+ needgamma = TRUE;
+ /* note: gamma correction is out = in ^ gamma;
+ gpu can only do LG2/EX2 therefore we transform into
+ in ^ gamma = 2 ^ (log2(in) * gamma).
+ Lots of scalar ops, unfortunately (better solution?) -
+ without gamma that's 3 inst, with gamma it's 10...
+ could use different gamma factors per channel,
+ if that's of any use. */
}
- if (isplanar) {
- txformat = RADEON_TXFORMAT_I8;
- } else {
- if (pPriv->id == FOURCC_UYVY)
- txformat = RADEON_TXFORMAT_YVYU422;
- else
- txformat = RADEON_TXFORMAT_VYUY422;
+ BEGIN_ACCEL(needgamma ? 28 + 33 : 33);
+ /* 2 components: same 2 for tex0/1/2 */
+ OUT_ACCEL_REG(R300_RS_COUNT,
+ ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+ R300_RS_COUNT_HIRES_EN));
+ /* R300_INST_COUNT_RS - highest RS instruction used */
+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
+
+ OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */
+
+ /* Indirection levels */
+ OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
+ R300_FIRST_TEX));
+
+ OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
+ R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) |
+ R300_TEX_CODE_OFFSET(0) |
+ R300_TEX_CODE_SIZE(3)));
+
+ OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
+ R300_ALU_SIZE(needgamma ? 7 + 2 : 2) |
+ R300_TEX_START(0) |
+ R300_TEX_SIZE(2) |
+ R300_RGBA_OUT));
+
+ /* tex inst */
+ OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
+ R300_TEX_DST_ADDR(0) |
+ R300_TEX_ID(0) |
+ R300_TEX_INST(R300_TEX_INST_LD)));
+ OUT_ACCEL_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) |
+ R300_TEX_DST_ADDR(1) |
+ R300_TEX_ID(1) |
+ R300_TEX_INST(R300_TEX_INST_LD)));
+ OUT_ACCEL_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) |
+ R300_TEX_DST_ADDR(2) |
+ R300_TEX_ID(2) |
+ R300_TEX_INST(R300_TEX_INST_LD)));
+
+ /* ALU inst */
+ /* MAD temp0, const0.a, temp0, const0.rgb */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) |
+ R300_ALU_RGB_ADDR1(0) |
+ R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
+ /* alpha nop, but need to set up alpha source for rgb usage */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) |
+ R300_ALU_ALPHA_ADDR1(0) |
+ R300_ALU_ALPHA_ADDR2(0) |
+ R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* MAD const1, temp1, temp0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) |
+ R300_ALU_RGB_ADDR1(1) |
+ R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
+ /* alpha nop */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* MAD result, const2, temp2, temp0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) |
+ R300_ALU_RGB_ADDR1(2) |
+ R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) |
+ (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
+ R300_ALU_RGB_CLAMP));
+ /* write alpha 1 */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
+ R300_ALU_ALPHA_TARGET_A));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0)));
+
+ if (needgamma) {
+ /* rgb temp0.r = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha lg2 temp0, temp0.r */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb temp0.g = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha lg2 temp0, temp0.g */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb temp0.b = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha lg2 temp0, temp0.b */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* MUL const1, temp1, temp0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR1(0) |
+ R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
+ /* alpha nop, but set up const1 */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb out0.r = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) |
+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha ex2 temp0, temp0.r */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb out0.g = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) |
+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha ex2 temp0, temp0.g */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb out0.b = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) |
+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha ex2 temp0, temp0.b */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
}
- txformat |= RADEON_TXFORMAT_NON_POWER2;
-
- colorpitch = dst_pitch >> pixel_shift;
+ /* Shader constants. */
+ /* constant 0: off, yco */
+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), F_TO_24(off[1]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), F_TO_24(off[2]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), F_TO_24(yco));
+ /* constant 1: uco */
+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(gamma));
+ /* constant 2: vco */
+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(2), F_TO_24(vco[2]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0));
- if (RADEONTilingEnabled(pScrn, pPixmap))
- colorpitch |= RADEON_COLOR_TILE_ENABLE;
-
- BEGIN_ACCEL(4);
-
- OUT_ACCEL_REG(RADEON_RB3D_CNTL,
- dst_format /*| RADEON_ALPHA_BLEND_ENABLE*/);
- OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset);
-
- OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch);
-
- OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL,
- RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
+ FINISH_ACCEL();
+ } else {
+ BEGIN_ACCEL(11);
+ /* 2 components: 2 for tex0 */
+ OUT_ACCEL_REG(R300_RS_COUNT,
+ ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+ R300_RS_COUNT_HIRES_EN));
+ /* R300_INST_COUNT_RS - highest RS instruction used */
+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
+
+ OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */
+
+ /* Indirection levels */
+ OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
+ R300_FIRST_TEX));
+
+ OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
+ R300_ALU_CODE_SIZE(1) |
+ R300_TEX_CODE_OFFSET(0) |
+ R300_TEX_CODE_SIZE(1)));
+
+ OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
+ R300_ALU_SIZE(0) |
+ R300_TEX_START(0) |
+ R300_TEX_SIZE(0) |
+ R300_RGBA_OUT));
+
+ /* tex inst */
+ OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
+ R300_TEX_DST_ADDR(0) |
+ R300_TEX_ID(0) |
+ R300_TEX_INST(R300_TEX_INST_LD)));
+
+ /* ALU inst */
+ /* RGB */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR_0, (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR1(0) |
+ R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R |
+ R300_ALU_RGB_MASK_G |
+ R300_ALU_RGB_MASK_B)) |
+ R300_ALU_RGB_TARGET_A));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST_0, (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
+ R300_ALU_RGB_CLAMP));
+ /* Alpha */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR_0, (R300_ALU_ALPHA_ADDR0(0) |
+ R300_ALU_ALPHA_ADDR1(0) |
+ R300_ALU_ALPHA_ADDR2(0) |
+ R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
+ R300_ALU_ALPHA_TARGET_A |
+ R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST_0, (R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) |
+ R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) |
+ R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) |
+ R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) |
+ R300_ALU_ALPHA_CLAMP));
FINISH_ACCEL();
+ }
+ BEGIN_ACCEL(6);
+ OUT_ACCEL_REG(R300_TX_INVALTAGS, 0);
+ OUT_ACCEL_REG(R300_TX_ENABLE, txenable);
- if (IS_R200_3D) {
-
- info->accel_state->texW[0] = pPriv->w;
- info->accel_state->texH[0] = pPriv->h;
-
- if (isplanar) {
- /* note: in contrast to r300, use input biasing on uv components */
- const float Loff = -0.0627;
- float uvcosf, uvsinf;
- float yco, yoff;
- float uco[3], vco[3];
- float bright, cont, sat;
- int ref = pPriv->transform_index;
- float ucscale = 0.25, vcscale = 0.25;
- Bool needux8 = FALSE, needvx8 = FALSE;
-
- /* contrast can cause constant overflow, clamp */
- cont = RTFContrast(pPriv->contrast);
- if (cont * trans[ref].RefLuma > 2.0)
- cont = 2.0 / trans[ref].RefLuma;
- /* brightness is only from -0.5 to 0.5 should be safe */
- bright = RTFBrightness(pPriv->brightness);
- /* saturation can also cause overflow, clamp */
- sat = RTFSaturation(pPriv->saturation);
- if (sat * trans[ref].RefBCb > 4.0)
- sat = 4.0 / trans[ref].RefBCb;
- uvcosf = sat * cos(RTFHue(pPriv->hue));
- uvsinf = sat * sin(RTFHue(pPriv->hue));
-
- yco = trans[ref].RefLuma * cont;
- uco[0] = -trans[ref].RefRCr * uvsinf;
- uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
- uco[2] = trans[ref].RefBCb * uvcosf;
- vco[0] = trans[ref].RefRCr * uvcosf;
- vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
- vco[2] = trans[ref].RefBCb * uvsinf;
- yoff = Loff * yco + bright;
-
- if ((uco[0] > 2.0) || (uco[2] > 2.0)) {
- needux8 = TRUE;
- ucscale = 0.125;
- }
- if ((vco[0] > 2.0) || (vco[2] > 2.0)) {
- needvx8 = TRUE;
- vcscale = 0.125;
- }
-
- /* need 2 texcoord sets (even though they are identical) due
- to denormalization! hw apparently can't premultiply
- same coord set by different texture size */
- vtx_count = 6;
-
- txformat0 = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) |
- (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT));
- txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63;
- txpitch -= 32;
- txfilter = R200_MAG_FILTER_LINEAR |
- R200_MIN_FILTER_LINEAR |
- R200_CLAMP_S_CLAMP_LAST |
- R200_CLAMP_T_CLAMP_LAST;
-
- BEGIN_ACCEL(36);
-
- OUT_ACCEL_REG(RADEON_PP_CNTL,
- RADEON_TEX_0_ENABLE | RADEON_TEX_1_ENABLE | RADEON_TEX_2_ENABLE |
- RADEON_TEX_BLEND_0_ENABLE | RADEON_TEX_BLEND_1_ENABLE |
- RADEON_TEX_BLEND_2_ENABLE);
-
- OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
- OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
- (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) |
- (2 << R200_VTX_TEX1_COMP_CNT_SHIFT));
-
- OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter);
- OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
- OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
- OUT_ACCEL_REG(R200_PP_TXSIZE_0,
- (pPriv->w - 1) |
- ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
- OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
- OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset);
-
- OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter);
- OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat | R200_TXFORMAT_ST_ROUTE_STQ1);
- OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0);
- OUT_ACCEL_REG(R200_PP_TXSIZE_1, txformat0);
- OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch);
- OUT_ACCEL_REG(R200_PP_TXOFFSET_1, pPriv->src_offset + pPriv->planeu_offset);
-
- OUT_ACCEL_REG(R200_PP_TXFILTER_2, txfilter);
- OUT_ACCEL_REG(R200_PP_TXFORMAT_2, txformat | R200_TXFORMAT_ST_ROUTE_STQ1);
- OUT_ACCEL_REG(R200_PP_TXFORMAT_X_2, 0);
- OUT_ACCEL_REG(R200_PP_TXSIZE_2, txformat0);
- OUT_ACCEL_REG(R200_PP_TXPITCH_2, txpitch);
- OUT_ACCEL_REG(R200_PP_TXOFFSET_2, pPriv->src_offset + pPriv->planev_offset);
-
- /* similar to r300 code. Note the big problem is that hardware constants
- * are 8 bits only, representing 0.0-1.0. We can get that up (using bias
- * + scale) to -1.0-1.0 (but precision will suffer). AFAIK the hw actually
- * has 12 bits fractional precision (plus 1 sign bit, 3 range bits) but
- * the constants not. To get larger range can use output scale, but for
- * that 2.018 value we need a total scale by 8, which means the constants
- * really have no accuracy whatsoever (5 fractional bits only).
- * The only direct way to get high precision "constants" into the fragment
- * pipe I know of is to use the texcoord interpolator (not color, this one
- * is 8 bit only too), which seems a bit expensive. We're lucky though it
- * seems the values we need seem to fit better than worst case (get about
- * 6 fractional bits for this instead of 5, at least when not correcting for
- * hue/saturation/contrast/brightness, which is the same as for vco - yco and
- * yoff get 8 fractional bits). Try to preserve as much accuracy as possible
- * even with non-default saturation/hue/contrast/brightness adjustments,
- * it gets a little crazy and ultimately precision might still be lacking.
- *
- * A higher precision (8 fractional bits) version might just put uco into
- * a texcoord, and calculate a new vcoconst in the shader, like so:
- * cohelper = {1.0, 0.0, 0.0} - shouldn't use 0.5 since not exactly representable
- * vco = {1.5958 - 1.0, -0.8129 + 1.0, 1.0}
- * vcocalc = ADD temp, bias/scale(cohelper), vco
- * would in total use 4 tex units, 4 instructions which seems fairly
- * balanced for this architecture (instead of 3 + 3 for the solution here)
- *
- * temp = MAD(yco, yuv.yyyy, yoff)
- * temp = MAD(uco, yuv.uuuu, temp)
- * result = MAD(vco, yuv.vvvv, temp)
- *
- * note first mad produces actually scalar, hence we transform
- * it into a dp2a to get 8 bit precision of yco instead of 7 -
- * That's assuming hw correctly expands consts to internal precision.
- * (y * 1 + y * (yco - 1) + yoff)
- * temp = DP2A / 2 (yco, yuv.yyyy, yoff)
- * temp = MAD (uco / 4, yuv.uuuu * 2, temp)
- * result = MAD x2 (vco / 2, yuv.vvvv, temp)
- *
- * vco, uco need bias (and hence scale too)
- *
- */
+ OUT_ACCEL_REG(R300_RB3D_COLOROFFSET0, dst_offset);
+ OUT_ACCEL_REG(R300_RB3D_COLORPITCH0, colorpitch);
- /* MAD temp0 / 2, const0.a * 2, temp0, -const0.rgb */
- OUT_ACCEL_REG(R200_PP_TXCBLEND_0,
- R200_TXC_ARG_A_TFACTOR_COLOR |
- R200_TXC_ARG_B_R0_COLOR |
- R200_TXC_ARG_C_TFACTOR_COLOR |
- (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) |
- R200_TXC_OP_DOT2_ADD);
- OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
- (0 << R200_TXC_TFACTOR_SEL_SHIFT) |
- R200_TXC_SCALE_INV2 |
- R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0);
- OUT_ACCEL_REG(R200_PP_TXABLEND_0,
- R200_TXA_ARG_A_ZERO |
- R200_TXA_ARG_B_ZERO |
- R200_TXA_ARG_C_ZERO |
- R200_TXA_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
- R200_TXA_OUTPUT_REG_NONE);
-
- /* MAD temp0, (const1 - 0.5) * 2, (temp1 - 0.5) * 2, temp0 */
- OUT_ACCEL_REG(R200_PP_TXCBLEND_1,
- R200_TXC_ARG_A_TFACTOR_COLOR |
- R200_TXC_BIAS_ARG_A |
- R200_TXC_SCALE_ARG_A |
- R200_TXC_ARG_B_R1_COLOR |
- R200_TXC_BIAS_ARG_B |
- (needux8 ? R200_TXC_SCALE_ARG_B : 0) |
- R200_TXC_ARG_C_R0_COLOR |
- R200_TXC_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXCBLEND2_1,
- (1 << R200_TXC_TFACTOR_SEL_SHIFT) |
- R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0);
- OUT_ACCEL_REG(R200_PP_TXABLEND_1,
- R200_TXA_ARG_A_ZERO |
- R200_TXA_ARG_B_ZERO |
- R200_TXA_ARG_C_ZERO |
- R200_TXA_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXABLEND2_1,
- R200_TXA_OUTPUT_REG_NONE);
-
- /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp2 - 0.5), temp0 */
- OUT_ACCEL_REG(R200_PP_TXCBLEND_2,
- R200_TXC_ARG_A_TFACTOR_COLOR |
- R200_TXC_BIAS_ARG_A |
- R200_TXC_SCALE_ARG_A |
- R200_TXC_ARG_B_R2_COLOR |
- R200_TXC_BIAS_ARG_B |
- (needvx8 ? R200_TXC_SCALE_ARG_B : 0) |
- R200_TXC_ARG_C_R0_COLOR |
- R200_TXC_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXCBLEND2_2,
- (2 << R200_TXC_TFACTOR_SEL_SHIFT) |
- R200_TXC_SCALE_2X |
- R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
- OUT_ACCEL_REG(R200_PP_TXABLEND_2,
- R200_TXA_ARG_A_ZERO |
- R200_TXA_ARG_B_ZERO |
- R200_TXA_ARG_C_ZERO |
- R200_TXA_COMP_ARG_C |
- R200_TXA_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXABLEND2_2,
- R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
-
- /* shader constants */
- OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */
- yco > 1.0 ? yco - 1.0: yco,
- yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */
- 0.0));
- OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */
- uco[1] * ucscale + 0.5, /* or [-2, 2] */
- uco[2] * ucscale + 0.5,
- 0.0));
- OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */
- vco[1] * vcscale + 0.5, /* or [-4, 4] */
- vco[2] * vcscale + 0.5,
- 0.0));
-
- FINISH_ACCEL();
- }
- else if (info->ChipFamily == CHIP_FAMILY_RV250) {
- /* fix up broken packed yuv - shader same as above except
- yuv components are all in same reg */
- /* note: in contrast to r300, use input biasing on uv components */
- const float Loff = -0.0627;
- float uvcosf, uvsinf;
- float yco, yoff;
- float uco[3], vco[3];
- float bright, cont, sat;
- int ref = pPriv->transform_index;
- float ucscale = 0.25, vcscale = 0.25;
- Bool needux8 = FALSE, needvx8 = FALSE;
-
- /* contrast can cause constant overflow, clamp */
- cont = RTFContrast(pPriv->contrast);
- if (cont * trans[ref].RefLuma > 2.0)
- cont = 2.0 / trans[ref].RefLuma;
- /* brightness is only from -0.5 to 0.5 should be safe */
- bright = RTFBrightness(pPriv->brightness);
- /* saturation can also cause overflow, clamp */
- sat = RTFSaturation(pPriv->saturation);
- if (sat * trans[ref].RefBCb > 4.0)
- sat = 4.0 / trans[ref].RefBCb;
- uvcosf = sat * cos(RTFHue(pPriv->hue));
- uvsinf = sat * sin(RTFHue(pPriv->hue));
-
- yco = trans[ref].RefLuma * cont;
- uco[0] = -trans[ref].RefRCr * uvsinf;
- uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
- uco[2] = trans[ref].RefBCb * uvcosf;
- vco[0] = trans[ref].RefRCr * uvcosf;
- vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
- vco[2] = trans[ref].RefBCb * uvsinf;
- yoff = Loff * yco + bright;
-
- if ((uco[0] > 2.0) || (uco[2] > 2.0)) {
- needux8 = TRUE;
- ucscale = 0.125;
- }
- if ((vco[0] > 2.0) || (vco[2] > 2.0)) {
- needvx8 = TRUE;
- vcscale = 0.125;
- }
-
- txformat0 = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) |
- (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT));
- txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63;
- txpitch -= 32;
- txfilter = R200_MAG_FILTER_LINEAR |
- R200_MIN_FILTER_LINEAR |
- R200_CLAMP_S_CLAMP_LAST |
- R200_CLAMP_T_CLAMP_LAST;
-
- BEGIN_ACCEL(24);
-
- OUT_ACCEL_REG(RADEON_PP_CNTL,
- RADEON_TEX_0_ENABLE |
- RADEON_TEX_BLEND_0_ENABLE | RADEON_TEX_BLEND_1_ENABLE |
- RADEON_TEX_BLEND_2_ENABLE);
-
- OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
- OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
- (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
-
- OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter);
- OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
- OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
- OUT_ACCEL_REG(R200_PP_TXSIZE_0,
- (pPriv->w - 1) |
- ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
- OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
- OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset);
-
- /* MAD temp1 / 2, const0.a * 2, temp0.ggg, -const0.rgb */
- OUT_ACCEL_REG(R200_PP_TXCBLEND_0,
- R200_TXC_ARG_A_TFACTOR_COLOR |
- R200_TXC_ARG_B_R0_COLOR |
- R200_TXC_ARG_C_TFACTOR_COLOR |
- (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) |
- R200_TXC_OP_DOT2_ADD);
- OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
- (0 << R200_TXC_TFACTOR_SEL_SHIFT) |
- R200_TXC_SCALE_INV2 |
- (R200_TXC_REPL_GREEN << R200_TXC_REPL_ARG_B_SHIFT) |
- R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1);
- OUT_ACCEL_REG(R200_PP_TXABLEND_0,
- R200_TXA_ARG_A_ZERO |
- R200_TXA_ARG_B_ZERO |
- R200_TXA_ARG_C_ZERO |
- R200_TXA_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
- R200_TXA_OUTPUT_REG_NONE);
-
- /* MAD temp1, (const1 - 0.5) * 2, (temp0.rrr - 0.5) * 2, temp1 */
- OUT_ACCEL_REG(R200_PP_TXCBLEND_1,
- R200_TXC_ARG_A_TFACTOR_COLOR |
- R200_TXC_BIAS_ARG_A |
- R200_TXC_SCALE_ARG_A |
- R200_TXC_ARG_B_R0_COLOR |
- R200_TXC_BIAS_ARG_B |
- (needux8 ? R200_TXC_SCALE_ARG_B : 0) |
- R200_TXC_ARG_C_R1_COLOR |
- R200_TXC_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXCBLEND2_1,
- (1 << R200_TXC_TFACTOR_SEL_SHIFT) |
- (R200_TXC_REPL_BLUE << R200_TXC_REPL_ARG_B_SHIFT) |
- R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1);
- OUT_ACCEL_REG(R200_PP_TXABLEND_1,
- R200_TXA_ARG_A_ZERO |
- R200_TXA_ARG_B_ZERO |
- R200_TXA_ARG_C_ZERO |
- R200_TXA_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXABLEND2_1,
- R200_TXA_OUTPUT_REG_NONE);
-
- /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp0.bbb - 0.5), temp1 */
- OUT_ACCEL_REG(R200_PP_TXCBLEND_2,
- R200_TXC_ARG_A_TFACTOR_COLOR |
- R200_TXC_BIAS_ARG_A |
- R200_TXC_SCALE_ARG_A |
- R200_TXC_ARG_B_R0_COLOR |
- R200_TXC_BIAS_ARG_B |
- (needvx8 ? R200_TXC_SCALE_ARG_B : 0) |
- R200_TXC_ARG_C_R1_COLOR |
- R200_TXC_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXCBLEND2_2,
- (2 << R200_TXC_TFACTOR_SEL_SHIFT) |
- R200_TXC_SCALE_2X |
- (R200_TXC_REPL_RED << R200_TXC_REPL_ARG_B_SHIFT) |
- R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
- OUT_ACCEL_REG(R200_PP_TXABLEND_2,
- R200_TXA_ARG_A_ZERO |
- R200_TXA_ARG_B_ZERO |
- R200_TXA_ARG_C_ZERO |
- R200_TXA_COMP_ARG_C |
- R200_TXA_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXABLEND2_2,
- R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
-
- /* shader constants */
- OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */
- yco > 1.0 ? yco - 1.0: yco,
- yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */
- 0.0));
- OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */
- uco[1] * ucscale + 0.5, /* or [-2, 2] */
- uco[2] * ucscale + 0.5,
- 0.0));
- OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */
- vco[1] * vcscale + 0.5, /* or [-4, 4] */
- vco[2] * vcscale + 0.5,
- 0.0));
-
- FINISH_ACCEL();
- }
- else {
- BEGIN_ACCEL(13);
- OUT_ACCEL_REG(RADEON_PP_CNTL,
- RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE);
-
- OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
- OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
- (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
-
- OUT_ACCEL_REG(R200_PP_TXFILTER_0,
- R200_MAG_FILTER_LINEAR |
- R200_MIN_FILTER_LINEAR |
- R200_CLAMP_S_CLAMP_LAST |
- R200_CLAMP_T_CLAMP_LAST |
- R200_YUV_TO_RGB);
- OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
- OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
- OUT_ACCEL_REG(R200_PP_TXSIZE_0,
- (pPriv->w - 1) |
- ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
- OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
-
- OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset);
-
- OUT_ACCEL_REG(R200_PP_TXCBLEND_0,
- R200_TXC_ARG_A_ZERO |
- R200_TXC_ARG_B_ZERO |
- R200_TXC_ARG_C_R0_COLOR |
- R200_TXC_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
- R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
- OUT_ACCEL_REG(R200_PP_TXABLEND_0,
- R200_TXA_ARG_A_ZERO |
- R200_TXA_ARG_B_ZERO |
- R200_TXA_ARG_C_R0_ALPHA |
- R200_TXA_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
- R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
- FINISH_ACCEL();
- }
- } else {
+ /* no need to enable blending */
+ OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
- info->accel_state->texW[0] = 1;
- info->accel_state->texH[0] = 1;
-
- BEGIN_ACCEL(9);
-
- OUT_ACCEL_REG(RADEON_PP_CNTL,
- RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE);
-
- OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
- RADEON_SE_VTX_FMT_ST0));
-
- OUT_ACCEL_REG(RADEON_PP_TXFILTER_0,
- RADEON_MAG_FILTER_LINEAR |
- RADEON_MIN_FILTER_LINEAR |
- RADEON_CLAMP_S_CLAMP_LAST |
- RADEON_CLAMP_T_CLAMP_LAST |
- RADEON_YUV_TO_RGB);
- OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat);
- OUT_ACCEL_REG(RADEON_PP_TXOFFSET_0, pPriv->src_offset);
- OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0,
- RADEON_COLOR_ARG_A_ZERO |
- RADEON_COLOR_ARG_B_ZERO |
- RADEON_COLOR_ARG_C_T0_COLOR |
- RADEON_BLEND_CTL_ADD |
- RADEON_CLAMP_TX);
- OUT_ACCEL_REG(RADEON_PP_TXABLEND_0,
- RADEON_ALPHA_ARG_A_ZERO |
- RADEON_ALPHA_ARG_B_ZERO |
- RADEON_ALPHA_ARG_C_T0_ALPHA |
- RADEON_BLEND_CTL_ADD |
- RADEON_CLAMP_TX);
-
- OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0,
- (pPriv->w - 1) |
- ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
- OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0,
- pPriv->src_pitch - 32);
- FINISH_ACCEL();
- }
- }
+ OUT_ACCEL_REG(R300_VAP_VTX_SIZE, vtx_count);
+ FINISH_ACCEL();
if (pPriv->vsync) {
xf86CrtcPtr crtc = radeon_xv_pick_best_crtc(pScrn,
@@ -2257,92 +2030,49 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
ErrorF("src: %d, %d, %d, %d\n", srcX, srcY, srcw, srch);
#endif
- if (IS_R300_3D || IS_R500_3D) {
- if (IS_R300_3D && ((dstw+dsth) > 2880))
- use_quad = TRUE;
- /*
- * Set up the scissor area to that of the output size.
- */
- BEGIN_ACCEL(2);
- if (IS_R300_3D) {
- /* R300 has an offset */
- OUT_ACCEL_REG(R300_SC_SCISSOR0, (((dstX + 1088) << R300_SCISSOR_X_SHIFT) |
- ((dstY + 1088) << R300_SCISSOR_Y_SHIFT)));
- OUT_ACCEL_REG(R300_SC_SCISSOR1, (((dstX + dstw + 1088 - 1) << R300_SCISSOR_X_SHIFT) |
- ((dstY + dsth + 1088 - 1) << R300_SCISSOR_Y_SHIFT)));
- } else {
- OUT_ACCEL_REG(R300_SC_SCISSOR0, (((dstX) << R300_SCISSOR_X_SHIFT) |
- ((dstY) << R300_SCISSOR_Y_SHIFT)));
- OUT_ACCEL_REG(R300_SC_SCISSOR1, (((dstX + dstw - 1) << R300_SCISSOR_X_SHIFT) |
- ((dstY + dsth - 1) << R300_SCISSOR_Y_SHIFT)));
- }
- FINISH_ACCEL();
- }
+ if (IS_R300_3D && ((dstw+dsth) > 2880))
+ use_quad = TRUE;
+ /*
+ * Set up the scissor area to that of the output size.
+ */
+ BEGIN_ACCEL(2);
+ /* R300 has an offset */
+ OUT_ACCEL_REG(R300_SC_SCISSOR0, (((dstX + 1088) << R300_SCISSOR_X_SHIFT) |
+ ((dstY + 1088) << R300_SCISSOR_Y_SHIFT)));
+ OUT_ACCEL_REG(R300_SC_SCISSOR1, (((dstX + dstw + 1088 - 1) << R300_SCISSOR_X_SHIFT) |
+ ((dstY + dsth + 1088 - 1) << R300_SCISSOR_Y_SHIFT)));
+ FINISH_ACCEL();
#ifdef ACCEL_CP
- if (info->ChipFamily < CHIP_FAMILY_R200) {
- BEGIN_RING(3 * vtx_count + 3);
- OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
- 3 * vtx_count + 1));
- OUT_RING(RADEON_CP_VC_FRMT_XY |
- RADEON_CP_VC_FRMT_ST0);
- OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
+ if (use_quad) {
+ BEGIN_RING(4 * vtx_count + 4);
+ OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
+ 4 * vtx_count));
+ OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
RADEON_CP_VC_CNTL_PRIM_WALK_RING |
- RADEON_CP_VC_CNTL_MAOS_ENABLE |
- RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
- (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
- } else if (IS_R300_3D || IS_R500_3D) {
- if (use_quad) {
- BEGIN_RING(4 * vtx_count + 4);
- OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
- 4 * vtx_count));
- OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
- RADEON_CP_VC_CNTL_PRIM_WALK_RING |
- (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
- } else {
- BEGIN_RING(3 * vtx_count + 4);
- OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
- 3 * vtx_count));
- OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST |
- RADEON_CP_VC_CNTL_PRIM_WALK_RING |
- (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
- }
+ (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
} else {
- BEGIN_RING(3 * vtx_count + 2);
+ BEGIN_RING(3 * vtx_count + 4);
OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
3 * vtx_count));
- OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
+ OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST |
RADEON_CP_VC_CNTL_PRIM_WALK_RING |
(3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
}
#else /* ACCEL_CP */
- if (IS_R300_3D || IS_R500_3D) {
- if (use_quad)
- BEGIN_ACCEL(2 + vtx_count * 4);
- else
- BEGIN_ACCEL(2 + vtx_count * 3);
- } else
- BEGIN_ACCEL(1 + vtx_count * 3);
-
- if (info->ChipFamily < CHIP_FAMILY_R200)
- OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
+ if (use_quad)
+ BEGIN_ACCEL(2 + vtx_count * 4);
+ else
+ BEGIN_ACCEL(2 + vtx_count * 3);
+
+ if (use_quad)
+ OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST |
RADEON_VF_PRIM_WALK_DATA |
- RADEON_VF_RADEON_MODE |
- (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
- else if (IS_R300_3D || IS_R500_3D) {
- if (use_quad)
- OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST |
- RADEON_VF_PRIM_WALK_DATA |
- (4 << RADEON_VF_NUM_VERTICES_SHIFT)));
- else
- OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_TRIANGLE_LIST |
- RADEON_VF_PRIM_WALK_DATA |
- (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
- } else
- OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
+ (4 << RADEON_VF_NUM_VERTICES_SHIFT)));
+ else
+ OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_TRIANGLE_LIST |
RADEON_VF_PRIM_WALK_DATA |
(3 << RADEON_VF_NUM_VERTICES_SHIFT)));
-
#endif
if (pPriv->bicubic_enabled) {
/*
@@ -2376,61 +2106,33 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
(float)srcY + 0.5);
}
} else {
- if (IS_R300_3D || IS_R500_3D) {
- if (use_quad) {
- VTX_OUT((float)dstX, (float)dstY,
- (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
- VTX_OUT((float)dstX, (float)(dstY + dsth),
- (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
- VTX_OUT((float)(dstX + dstw), (float)(dstY + dsth),
- (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
- VTX_OUT((float)(dstX + dstw), (float)dstY,
- (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
- } else {
- /*
- * Render a big, scissored triangle. This means
- * increasing the triangle size and adjusting
- * texture coordinates.
- */
- VTX_OUT((float)dstX, (float)dstY,
- (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
- VTX_OUT((float)dstX, (float)(dstY + dsth + dstw),
- (float)srcX / info->accel_state->texW[0], ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / info->accel_state->texH[0]);
-
- VTX_OUT((float)(dstX + dstw + dsth), (float)dstY,
- ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / info->accel_state->texW[0],
- (float)srcY / info->accel_state->texH[0]);
- }
- } else if (isplanar) {
- /*
- * Just render a rect (using three coords).
- * Filter is a bit a misnomer, it's just texcoords...
- */
- VTX_OUT_FILTER((float)dstX, (float)(dstY + dsth),
- (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0],
- (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
- VTX_OUT_FILTER((float)(dstX + dstw), (float)(dstY + dsth),
- (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0],
- (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
- VTX_OUT_FILTER((float)(dstX + dstw), (float)dstY,
- (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0],
- (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
- } else {
- /*
- * Just render a rect (using three coords).
- */
+ if (use_quad) {
+ VTX_OUT((float)dstX, (float)dstY,
+ (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
VTX_OUT((float)dstX, (float)(dstY + dsth),
(float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
VTX_OUT((float)(dstX + dstw), (float)(dstY + dsth),
(float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
VTX_OUT((float)(dstX + dstw), (float)dstY,
(float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
+ } else {
+ /*
+ * Render a big, scissored triangle. This means
+ * increasing the triangle size and adjusting
+ * texture coordinates.
+ */
+ VTX_OUT((float)dstX, (float)dstY,
+ (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
+ VTX_OUT((float)dstX, (float)(dstY + dsth + dstw),
+ (float)srcX / info->accel_state->texW[0], ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / info->accel_state->texH[0]);
+ VTX_OUT((float)(dstX + dstw + dsth), (float)dstY,
+ ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / info->accel_state->texW[0],
+ (float)srcY / info->accel_state->texH[0]);
}
}
- if (IS_R300_3D || IS_R500_3D)
- /* flushing is pipelined, free/finish is not */
- OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
+ /* flushing is pipelined, free/finish is not */
+ OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
#ifdef ACCEL_CP
ADVANCE_RING();
@@ -2441,12 +2143,1000 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
pBox++;
}
- if (IS_R300_3D || IS_R500_3D) {
- BEGIN_ACCEL(3);
- OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA);
- OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL);
+ BEGIN_ACCEL(3);
+ OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA);
+ OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL);
+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
+ FINISH_ACCEL();
+
+ DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
+}
+
+static void
+FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ PixmapPtr pPixmap = pPriv->pPixmap;
+ uint32_t txfilter, txformat0, txformat1, txoffset, txpitch;
+ uint32_t dst_offset, dst_pitch, dst_format;
+ uint32_t txenable, colorpitch;
+ uint32_t output_fmt;
+ Bool isplanar = FALSE;
+ int dstxoff, dstyoff, pixel_shift, vtx_count;
+ BoxPtr pBox = REGION_RECTS(&pPriv->clip);
+ int nBox = REGION_NUM_RECTS(&pPriv->clip);
+ ACCEL_PREAMBLE();
+
+ pixel_shift = pPixmap->drawable.bitsPerPixel >> 4;
+
+#ifdef USE_EXA
+ if (info->useEXA) {
+ dst_offset = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset;
+ dst_pitch = exaGetPixmapPitch(pPixmap);
} else
- BEGIN_ACCEL(1);
+#endif
+ {
+ dst_offset = (pPixmap->devPrivate.ptr - info->FB) +
+ info->fbLocation + pScrn->fbOffset;
+ dst_pitch = pPixmap->devKind;
+ }
+
+#ifdef COMPOSITE
+ dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
+ dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
+#else
+ dstxoff = 0;
+ dstyoff = 0;
+#endif
+
+#ifdef USE_EXA
+ if (info->useEXA) {
+ RADEON_SWITCH_TO_3D();
+ } else
+#endif
+ {
+ BEGIN_ACCEL(2);
+ OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
+ /* We must wait for 3d to idle, in case source was just written as a dest. */
+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
+ RADEON_WAIT_HOST_IDLECLEAN |
+ RADEON_WAIT_2D_IDLECLEAN |
+ RADEON_WAIT_3D_IDLECLEAN |
+ RADEON_WAIT_DMA_GUI_IDLE);
+ FINISH_ACCEL();
+
+ if (!info->accel_state->XInited3D)
+ RADEONInit3DEngine(pScrn);
+ }
+
+ if (pPriv->bicubic_enabled)
+ vtx_count = 6;
+ else
+ vtx_count = 4;
+
+ switch (pPixmap->drawable.bitsPerPixel) {
+ case 16:
+ if (pPixmap->drawable.depth == 15)
+ dst_format = R300_COLORFORMAT_ARGB1555;
+ else
+ dst_format = R300_COLORFORMAT_RGB565;
+ break;
+ case 32:
+ dst_format = R300_COLORFORMAT_ARGB8888;
+ break;
+ default:
+ return;
+ }
+
+ output_fmt = (R300_OUT_FMT_C4_8 |
+ R300_OUT_FMT_C0_SEL_BLUE |
+ R300_OUT_FMT_C1_SEL_GREEN |
+ R300_OUT_FMT_C2_SEL_RED |
+ R300_OUT_FMT_C3_SEL_ALPHA);
+
+ colorpitch = dst_pitch >> pixel_shift;
+ colorpitch |= dst_format;
+
+ if (RADEONTilingEnabled(pScrn, pPixmap))
+ colorpitch |= R300_COLORTILE;
+
+ if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
+ isplanar = TRUE;
+ }
+
+ if (isplanar) {
+ txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0;
+ txpitch = pPriv->src_pitch;
+ } else {
+ if (pPriv->id == FOURCC_UYVY)
+ txformat1 = R300_TX_FORMAT_YVYU422;
+ else
+ txformat1 = R300_TX_FORMAT_VYUY422;
+
+ txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
+
+ /* pitch is in pixels */
+ txpitch = pPriv->src_pitch / 2;
+ }
+ txpitch -= 1;
+
+ txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
+ (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
+ R300_TXPITCH_EN);
+
+ info->accel_state->texW[0] = pPriv->w;
+ info->accel_state->texH[0] = pPriv->h;
+
+ txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
+ R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
+ R300_TX_MAG_FILTER_LINEAR |
+ R300_TX_MIN_FILTER_LINEAR |
+ (0 << R300_TX_ID_SHIFT));
+
+
+ if ((pPriv->w - 1) & 0x800)
+ txpitch |= R500_TXWIDTH_11;
+
+ if ((pPriv->h - 1) & 0x800)
+ txpitch |= R500_TXHEIGHT_11;
+
+ txoffset = pPriv->src_offset;
+
+ BEGIN_ACCEL(6);
+ OUT_ACCEL_REG(R300_TX_FILTER0_0, txfilter);
+ OUT_ACCEL_REG(R300_TX_FILTER1_0, 0);
+ OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0);
+ OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1);
+ OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch);
+ OUT_ACCEL_REG(R300_TX_OFFSET_0, txoffset);
+ FINISH_ACCEL();
+
+ txenable = R300_TEX_0_ENABLE;
+
+ if (isplanar) {
+ txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
+ (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
+ R300_TXPITCH_EN);
+ txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63;
+ txpitch -= 1;
+ txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
+ R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
+ R300_TX_MIN_FILTER_LINEAR |
+ R300_TX_MAG_FILTER_LINEAR);
+
+ BEGIN_ACCEL(12);
+ OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT));
+ OUT_ACCEL_REG(R300_TX_FILTER1_1, 0);
+ OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0);
+ OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2);
+ OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch);
+ OUT_ACCEL_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset);
+ OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT));
+ OUT_ACCEL_REG(R300_TX_FILTER1_2, 0);
+ OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0);
+ OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3);
+ OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch);
+ OUT_ACCEL_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset);
+ FINISH_ACCEL();
+ txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE;
+ }
+
+ if (pPriv->bicubic_enabled) {
+ /* Size is 128x1 */
+ txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) |
+ (0x0 << R300_TXHEIGHT_SHIFT) |
+ R300_TXPITCH_EN);
+ /* Format is 32-bit floats, 4bpp */
+ txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16);
+ /* Pitch is 127 (128-1) */
+ txpitch = 0x7f;
+ /* Tex filter */
+ txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) |
+ R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) |
+ R300_TX_MIN_FILTER_NEAREST |
+ R300_TX_MAG_FILTER_NEAREST |
+ (1 << R300_TX_ID_SHIFT));
+
+ BEGIN_ACCEL(6);
+ OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter);
+ OUT_ACCEL_REG(R300_TX_FILTER1_1, 0);
+ OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0);
+ OUT_ACCEL_REG(R300_TX_FORMAT1_1, txformat1);
+ OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch);
+ OUT_ACCEL_REG(R300_TX_OFFSET_1, pPriv->bicubic_src_offset);
+ FINISH_ACCEL();
+
+ /* Enable tex 1 */
+ txenable |= R300_TEX_1_ENABLE;
+ }
+
+ /* setup the VAP */
+ if (info->accel_state->has_tcl) {
+ if (pPriv->bicubic_enabled)
+ BEGIN_ACCEL(7);
+ else
+ BEGIN_ACCEL(6);
+ } else {
+ if (pPriv->bicubic_enabled)
+ BEGIN_ACCEL(5);
+ else
+ BEGIN_ACCEL(4);
+ }
+
+ /* These registers define the number, type, and location of data submitted
+ * to the PVS unit of GA input (when PVS is disabled)
+ * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is
+ * enabled. This memory provides the imputs to the vertex shader program
+ * and ordering is not important. When PVS/TCL is disabled, this field maps
+ * directly to the GA input memory and the order is signifigant. In
+ * PVS_BYPASS mode the order is as follows:
+ * Position
+ * Point Size
+ * Color 0-3
+ * Textures 0-7
+ * Fog
+ */
+ if (pPriv->bicubic_enabled) {
+ OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
+ ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
+ (0 << R300_SKIP_DWORDS_0_SHIFT) |
+ (0 << R300_DST_VEC_LOC_0_SHIFT) |
+ R300_SIGNED_0 |
+ (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
+ (0 << R300_SKIP_DWORDS_1_SHIFT) |
+ (6 << R300_DST_VEC_LOC_1_SHIFT) |
+ R300_SIGNED_1));
+ OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1,
+ ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
+ (0 << R300_SKIP_DWORDS_2_SHIFT) |
+ (7 << R300_DST_VEC_LOC_2_SHIFT) |
+ R300_LAST_VEC_2 |
+ R300_SIGNED_2));
+ } else {
+ OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
+ ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
+ (0 << R300_SKIP_DWORDS_0_SHIFT) |
+ (0 << R300_DST_VEC_LOC_0_SHIFT) |
+ R300_SIGNED_0 |
+ (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
+ (0 << R300_SKIP_DWORDS_1_SHIFT) |
+ (6 << R300_DST_VEC_LOC_1_SHIFT) |
+ R300_LAST_VEC_1 |
+ R300_SIGNED_1));
+ }
+
+ /* load the vertex shader
+ * We pre-load vertex programs in RADEONInit3DEngine():
+ * - exa mask/Xv bicubic
+ * - exa no mask
+ * - Xv
+ * Here we select the offset of the vertex program we want to use
+ */
+ if (info->accel_state->has_tcl) {
+ if (pPriv->bicubic_enabled) {
+ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
+ ((0 << R300_PVS_FIRST_INST_SHIFT) |
+ (2 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+ (2 << R300_PVS_LAST_INST_SHIFT)));
+ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
+ (2 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
+ } else {
+ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
+ ((5 << R300_PVS_FIRST_INST_SHIFT) |
+ (6 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+ (6 << R300_PVS_LAST_INST_SHIFT)));
+ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
+ (6 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
+ }
+ }
+
+ /* Position and one set of 2 texture coordinates */
+ OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
+ if (pPriv->bicubic_enabled)
+ OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) |
+ (2 << R300_TEX_1_COMP_CNT_SHIFT)));
+ else
+ OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT));
+
+ OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt);
+ FINISH_ACCEL();
+
+ /* setup pixel shader */
+ if (pPriv->bicubic_enabled) {
+ BEGIN_ACCEL(7);
+
+ /* 4 components: 2 for tex0 and 2 for tex1 */
+ OUT_ACCEL_REG(R300_RS_COUNT,
+ ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+ R300_RS_COUNT_HIRES_EN));
+
+ /* R300_INST_COUNT_RS - highest RS instruction used */
+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6));
+
+ /* Pixel stack frame size. */
+ OUT_ACCEL_REG(R300_US_PIXSIZE, 5);
+
+ /* FP length. */
+ OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
+ R500_US_CODE_END_ADDR(13)));
+ OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
+ R500_US_CODE_RANGE_SIZE(13)));
+
+ /* Prepare for FP emission. */
+ OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
+ FINISH_ACCEL();
+
+ BEGIN_ACCEL(89);
+ /* Pixel shader.
+ * I've gone ahead and annotated each instruction, since this
+ * thing is MASSIVE. :3
+ * Note: In order to avoid buggies with temps and multiple
+ * inputs, all temps are offset by 2. temp0 -> register2. */
+
+ /* TEX temp2, input1.xxxx, tex1, 1D */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
+ R500_TEX_INST_LD |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
+ R500_TEX_SRC_S_SWIZ_R |
+ R500_TEX_SRC_T_SWIZ_R |
+ R500_TEX_SRC_R_SWIZ_R |
+ R500_TEX_SRC_Q_SWIZ_R |
+ R500_TEX_DST_ADDR(2) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* TEX temp5, input1.yyyy, tex1, 1D */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
+ R500_TEX_INST_LD |
+ R500_TEX_SEM_ACQUIRE |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
+ R500_TEX_SRC_S_SWIZ_G |
+ R500_TEX_SRC_T_SWIZ_G |
+ R500_TEX_SRC_R_SWIZ_G |
+ R500_TEX_SRC_Q_SWIZ_G |
+ R500_TEX_DST_ADDR(5) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* MUL temp4, const0.x0x0, temp2.yyxx */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
+ R500_RGB_ADDR0_CONST |
+ R500_RGB_ADDR1(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
+ R500_ALPHA_ADDR0_CONST |
+ R500_ALPHA_ADDR1(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_R |
+ R500_ALU_RGB_G_SWIZ_A_0 |
+ R500_ALU_RGB_B_SWIZ_A_R |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_G |
+ R500_ALU_RGB_G_SWIZ_B_G |
+ R500_ALU_RGB_B_SWIZ_B_R));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) |
+ R500_ALPHA_OP_MAD |
+ R500_ALPHA_SEL_A_SRC0 |
+ R500_ALPHA_SWIZ_A_0 |
+ R500_ALPHA_SEL_B_SRC1 |
+ R500_ALPHA_SWIZ_B_R));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) |
+ R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_R_SWIZ_0 |
+ R500_ALU_RGBA_G_SWIZ_0 |
+ R500_ALU_RGBA_B_SWIZ_0 |
+ R500_ALU_RGBA_A_SWIZ_0));
+
+ /* MAD temp3, const0.0y0y, temp5.xxxx, temp4 */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
+ R500_RGB_ADDR0_CONST |
+ R500_RGB_ADDR1(5) |
+ R500_RGB_ADDR2(4)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
+ R500_ALPHA_ADDR0_CONST |
+ R500_ALPHA_ADDR1(5) |
+ R500_ALPHA_ADDR2(4)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_0 |
+ R500_ALU_RGB_G_SWIZ_A_G |
+ R500_ALU_RGB_B_SWIZ_A_0 |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_G_SWIZ_B_R |
+ R500_ALU_RGB_B_SWIZ_B_R));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) |
+ R500_ALPHA_OP_MAD |
+ R500_ALPHA_SEL_A_SRC0 |
+ R500_ALPHA_SWIZ_A_G |
+ R500_ALPHA_SEL_B_SRC1 |
+ R500_ALPHA_SWIZ_B_R));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) |
+ R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_SEL_C_SRC2 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_A_SWIZ_A));
+
+ /* ADD temp3, temp3, input0.xyxy */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(3) |
+ R500_RGB_ADDR2(0)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(3) |
+ R500_ALPHA_ADDR2(0)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 |
+ R500_ALU_RGB_G_SWIZ_A_1 |
+ R500_ALU_RGB_B_SWIZ_A_1 |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_G_SWIZ_B_G |
+ R500_ALU_RGB_B_SWIZ_B_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) |
+ R500_ALPHA_OP_MAD |
+ R500_ALPHA_SWIZ_A_1 |
+ R500_ALPHA_SEL_B_SRC1 |
+ R500_ALPHA_SWIZ_B_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) |
+ R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_SEL_C_SRC2 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_R |
+ R500_ALU_RGBA_A_SWIZ_G));
+
+ /* TEX temp1, temp3.zwxy, tex0, 2D */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
+ R500_TEX_INST_LD |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) |
+ R500_TEX_SRC_S_SWIZ_B |
+ R500_TEX_SRC_T_SWIZ_A |
+ R500_TEX_SRC_R_SWIZ_R |
+ R500_TEX_SRC_Q_SWIZ_G |
+ R500_TEX_DST_ADDR(1) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* TEX temp3, temp3.xyzw, tex0, 2D */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
+ R500_TEX_INST_LD |
+ R500_TEX_SEM_ACQUIRE |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) |
+ R500_TEX_SRC_S_SWIZ_R |
+ R500_TEX_SRC_T_SWIZ_G |
+ R500_TEX_SRC_R_SWIZ_B |
+ R500_TEX_SRC_Q_SWIZ_A |
+ R500_TEX_DST_ADDR(3) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* MAD temp4, const0.0y0y, temp5.yyyy, temp4 */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
+ R500_RGB_ADDR0_CONST |
+ R500_RGB_ADDR1(5) |
+ R500_RGB_ADDR2(4)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
+ R500_ALPHA_ADDR0_CONST |
+ R500_ALPHA_ADDR1(5) |
+ R500_ALPHA_ADDR2(4)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_0 |
+ R500_ALU_RGB_G_SWIZ_A_G |
+ R500_ALU_RGB_B_SWIZ_A_0 |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_G |
+ R500_ALU_RGB_G_SWIZ_B_G |
+ R500_ALU_RGB_B_SWIZ_B_G));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) |
+ R500_ALPHA_OP_MAD |
+ R500_ALPHA_SEL_A_SRC0 |
+ R500_ALPHA_SWIZ_A_G |
+ R500_ALPHA_SEL_B_SRC1 |
+ R500_ALPHA_SWIZ_B_G));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) |
+ R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_SEL_C_SRC2 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_A_SWIZ_A));
+
+ /* ADD temp0, temp4, input0.xyxy */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(4) |
+ R500_RGB_ADDR2(0)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(4) |
+ R500_ALPHA_ADDR2(0)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 |
+ R500_ALU_RGB_G_SWIZ_A_1 |
+ R500_ALU_RGB_B_SWIZ_A_1 |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_G_SWIZ_B_G |
+ R500_ALU_RGB_B_SWIZ_B_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) |
+ R500_ALPHA_OP_MAD |
+ R500_ALPHA_SWIZ_A_1 |
+ R500_ALPHA_SEL_B_SRC1 |
+ R500_ALPHA_SWIZ_B_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) |
+ R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_SEL_C_SRC2 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_R |
+ R500_ALU_RGBA_A_SWIZ_G));
+
+ /* TEX temp4, temp0.zwzw, tex0, 2D */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
+ R500_TEX_INST_LD |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
+ R500_TEX_SRC_S_SWIZ_B |
+ R500_TEX_SRC_T_SWIZ_A |
+ R500_TEX_SRC_R_SWIZ_B |
+ R500_TEX_SRC_Q_SWIZ_A |
+ R500_TEX_DST_ADDR(4) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* TEX temp0, temp0.xyzw, tex0, 2D */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
+ R500_TEX_INST_LD |
+ R500_TEX_SEM_ACQUIRE |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
+ R500_TEX_SRC_S_SWIZ_R |
+ R500_TEX_SRC_T_SWIZ_G |
+ R500_TEX_SRC_R_SWIZ_B |
+ R500_TEX_SRC_Q_SWIZ_A |
+ R500_TEX_DST_ADDR(0) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* LRP temp3, temp2.zzzz, temp1, temp3 ->
+ * - PRESUB temps, temp1 - temp3
+ * - MAD temp2.zzzz, temps, temp3 */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(3) |
+ R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 |
+ R500_RGB_ADDR1(1) |
+ R500_RGB_ADDR2(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(3) |
+ R500_ALPHA_SRCP_OP_A1_MINUS_A0 |
+ R500_ALPHA_ADDR1(1) |
+ R500_ALPHA_ADDR2(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 |
+ R500_ALU_RGB_R_SWIZ_A_B |
+ R500_ALU_RGB_G_SWIZ_A_B |
+ R500_ALU_RGB_B_SWIZ_A_B |
+ R500_ALU_RGB_SEL_B_SRCP |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_G_SWIZ_B_G |
+ R500_ALU_RGB_B_SWIZ_B_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) |
+ R500_ALPHA_OP_MAD |
+ R500_ALPHA_SEL_A_SRC2 |
+ R500_ALPHA_SWIZ_A_B |
+ R500_ALPHA_SEL_B_SRCP |
+ R500_ALPHA_SWIZ_B_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) |
+ R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_SEL_C_SRC0 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_A_SWIZ_A));
+
+ /* LRP temp0, temp2.zzzz, temp4, temp0 ->
+ * - PRESUB temps, temp4 - temp1
+ * - MAD temp2.zzzz, temps, temp0 */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
+ R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 |
+ R500_RGB_ADDR1(4) |
+ R500_RGB_ADDR2(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
+ R500_ALPHA_SRCP_OP_A1_MINUS_A0 |
+ R500_ALPHA_ADDR1(4) |
+ R500_ALPHA_ADDR2(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 |
+ R500_ALU_RGB_R_SWIZ_A_B |
+ R500_ALU_RGB_G_SWIZ_A_B |
+ R500_ALU_RGB_B_SWIZ_A_B |
+ R500_ALU_RGB_SEL_B_SRCP |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_G_SWIZ_B_G |
+ R500_ALU_RGB_B_SWIZ_B_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) |
+ R500_ALPHA_OP_MAD |
+ R500_ALPHA_SEL_A_SRC2 |
+ R500_ALPHA_SWIZ_A_B |
+ R500_ALPHA_SEL_B_SRCP |
+ R500_ALPHA_SWIZ_B_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) |
+ R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_SEL_C_SRC0 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_A_SWIZ_A));
+
+ /* LRP output, temp5.zzzz, temp3, temp0 ->
+ * - PRESUB temps, temp3 - temp0
+ * - MAD temp5.zzzz, temps, temp0 */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
+ R500_INST_LAST |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK |
+ R500_INST_RGB_OMASK_R |
+ R500_INST_RGB_OMASK_G |
+ R500_INST_RGB_OMASK_B |
+ R500_INST_ALPHA_OMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
+ R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 |
+ R500_RGB_ADDR1(3) |
+ R500_RGB_ADDR2(5)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
+ R500_ALPHA_SRCP_OP_A1_MINUS_A0 |
+ R500_ALPHA_ADDR1(3) |
+ R500_ALPHA_ADDR2(5)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 |
+ R500_ALU_RGB_R_SWIZ_A_B |
+ R500_ALU_RGB_G_SWIZ_A_B |
+ R500_ALU_RGB_B_SWIZ_A_B |
+ R500_ALU_RGB_SEL_B_SRCP |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_G_SWIZ_B_G |
+ R500_ALU_RGB_B_SWIZ_B_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) |
+ R500_ALPHA_OP_MAD |
+ R500_ALPHA_SEL_A_SRC2 |
+ R500_ALPHA_SWIZ_A_B |
+ R500_ALPHA_SEL_B_SRCP |
+ R500_ALPHA_SWIZ_B_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) |
+ R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_SEL_C_SRC0 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_A_SWIZ_A));
+
+ /* Shader constants. */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0));
+
+ /* const0 = {1 / texture[0].width, 1 / texture[0].height, 0, 0} */
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->w));
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->h));
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0);
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0);
+
+ FINISH_ACCEL();
+
+ } else {
+ BEGIN_ACCEL(19);
+ /* 2 components: 2 for tex0 */
+ OUT_ACCEL_REG(R300_RS_COUNT,
+ ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+ R300_RS_COUNT_HIRES_EN));
+
+ /* R300_INST_COUNT_RS - highest RS instruction used */
+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
+
+ /* Pixel stack frame size. */
+ OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */
+
+ /* FP length. */
+ OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
+ R500_US_CODE_END_ADDR(1)));
+ OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
+ R500_US_CODE_RANGE_SIZE(1)));
+
+ /* Prepare for FP emission. */
+ OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
+
+ /* tex inst */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK |
+ R500_INST_RGB_CLAMP |
+ R500_INST_ALPHA_CLAMP));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
+ R500_TEX_INST_LD |
+ R500_TEX_SEM_ACQUIRE |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
+ R500_TEX_SRC_S_SWIZ_R |
+ R500_TEX_SRC_T_SWIZ_G |
+ R500_TEX_DST_ADDR(0) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
+ R500_DX_S_SWIZ_R |
+ R500_DX_T_SWIZ_R |
+ R500_DX_R_SWIZ_R |
+ R500_DX_Q_SWIZ_R |
+ R500_DY_ADDR(0) |
+ R500_DY_S_SWIZ_R |
+ R500_DY_T_SWIZ_R |
+ R500_DY_R_SWIZ_R |
+ R500_DY_Q_SWIZ_R));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* ALU inst */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_LAST |
+ R500_INST_RGB_OMASK_R |
+ R500_INST_RGB_OMASK_G |
+ R500_INST_RGB_OMASK_B |
+ R500_INST_ALPHA_OMASK |
+ R500_INST_RGB_CLAMP |
+ R500_INST_ALPHA_CLAMP));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
+ R500_RGB_ADDR1(0) |
+ R500_RGB_ADDR1_CONST |
+ R500_RGB_ADDR2(0) |
+ R500_RGB_ADDR2_CONST));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
+ R500_ALPHA_ADDR1(0) |
+ R500_ALPHA_ADDR1_CONST |
+ R500_ALPHA_ADDR2(0) |
+ R500_ALPHA_ADDR2_CONST));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_R |
+ R500_ALU_RGB_G_SWIZ_A_G |
+ R500_ALU_RGB_B_SWIZ_A_B |
+ R500_ALU_RGB_SEL_B_SRC0 |
+ R500_ALU_RGB_R_SWIZ_B_1 |
+ R500_ALU_RGB_B_SWIZ_B_1 |
+ R500_ALU_RGB_G_SWIZ_B_1));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
+ R500_ALPHA_SWIZ_A_A |
+ R500_ALPHA_SWIZ_B_1));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_R_SWIZ_0 |
+ R500_ALU_RGBA_G_SWIZ_0 |
+ R500_ALU_RGBA_B_SWIZ_0 |
+ R500_ALU_RGBA_A_SWIZ_0));
+ FINISH_ACCEL();
+ }
+
+ BEGIN_ACCEL(6);
+ OUT_ACCEL_REG(R300_TX_INVALTAGS, 0);
+ OUT_ACCEL_REG(R300_TX_ENABLE, txenable);
+
+ OUT_ACCEL_REG(R300_RB3D_COLOROFFSET0, dst_offset);
+ OUT_ACCEL_REG(R300_RB3D_COLORPITCH0, colorpitch);
+
+ /* no need to enable blending */
+ OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
+
+ OUT_ACCEL_REG(R300_VAP_VTX_SIZE, vtx_count);
+ FINISH_ACCEL();
+
+ if (pPriv->vsync) {
+ xf86CrtcPtr crtc = radeon_xv_pick_best_crtc(pScrn,
+ pPriv->drw_x,
+ pPriv->drw_x + pPriv->dst_w,
+ pPriv->drw_y,
+ pPriv->drw_y + pPriv->dst_h);
+ if (crtc) {
+ RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private;
+
+ FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap,
+ radeon_crtc->crtc_id,
+ pPriv->drw_y - crtc->y,
+ (pPriv->drw_y - crtc->y) + pPriv->dst_h);
+ }
+ }
+ /*
+ * Rendering of the actual polygon is done in two different
+ * ways depending on chip generation:
+ *
+ * < R300:
+ *
+ * These chips can render a rectangle in one pass, so
+ * handling is pretty straight-forward.
+ *
+ * >= R300:
+ *
+ * These chips can accept a quad, but will render it as
+ * two triangles which results in a diagonal tear. Instead
+ * We render a single, large triangle and use the scissor
+ * functionality to restrict it to the desired rectangle.
+ * Due to guardband limits on r3xx/r4xx, we can only use
+ * the single triangle up to 2880 pixels; above that we
+ * render as a quad.
+ */
+
+ while (nBox--) {
+ int srcX, srcY, srcw, srch;
+ int dstX, dstY, dstw, dsth;
+ dstX = pBox->x1 + dstxoff;
+ dstY = pBox->y1 + dstyoff;
+ dstw = pBox->x2 - pBox->x1;
+ dsth = pBox->y2 - pBox->y1;
+
+ srcX = ((pBox->x1 - pPriv->drw_x) *
+ pPriv->src_w) / pPriv->dst_w;
+ srcY = ((pBox->y1 - pPriv->drw_y) *
+ pPriv->src_h) / pPriv->dst_h;
+
+ srcw = (pPriv->src_w * dstw) / pPriv->dst_w;
+ srch = (pPriv->src_h * dsth) / pPriv->dst_h;
+
+ BEGIN_ACCEL(2);
+ OUT_ACCEL_REG(R300_SC_SCISSOR0, (((dstX) << R300_SCISSOR_X_SHIFT) |
+ ((dstY) << R300_SCISSOR_Y_SHIFT)));
+ OUT_ACCEL_REG(R300_SC_SCISSOR1, (((dstX + dstw - 1) << R300_SCISSOR_X_SHIFT) |
+ ((dstY + dsth - 1) << R300_SCISSOR_Y_SHIFT)));
+ FINISH_ACCEL();
+
+#ifdef ACCEL_CP
+ BEGIN_RING(3 * vtx_count + 4);
+ OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
+ 3 * vtx_count));
+ OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST |
+ RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+ (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
+#else /* ACCEL_CP */
+ BEGIN_ACCEL(2 + vtx_count * 3);
+ OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_TRIANGLE_LIST |
+ RADEON_VF_PRIM_WALK_DATA |
+ (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
+#endif
+ if (pPriv->bicubic_enabled) {
+ VTX_OUT_FILTER((float)dstX, (float)dstY,
+ (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0],
+ (float)srcX + 0.5, (float)srcY + 0.5);
+ VTX_OUT_FILTER((float)dstX, (float)(dstY + dstw + dsth),
+ (float)srcX / info->accel_state->texW[0], ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / info->accel_state->texH[0],
+ (float)srcX + 0.5, (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5);
+ VTX_OUT_FILTER((float)(dstX + dstw + dsth), (float)dstY,
+ ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / info->accel_state->texW[0],
+ (float)srcY / info->accel_state->texH[0],
+ (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5,
+ (float)srcY + 0.5);
+ } else {
+ /*
+ * Render a big, scissored triangle. This means
+ * increasing the triangle size and adjusting
+ * texture coordinates.
+ */
+ VTX_OUT((float)dstX, (float)dstY,
+ (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
+ VTX_OUT((float)dstX, (float)(dstY + dsth + dstw),
+ (float)srcX / info->accel_state->texW[0], ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / info->accel_state->texH[0]);
+ VTX_OUT((float)(dstX + dstw + dsth), (float)dstY,
+ ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / info->accel_state->texW[0],
+ (float)srcY / info->accel_state->texH[0]);
+ }
+
+ /* flushing is pipelined, free/finish is not */
+ OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
+
+#ifdef ACCEL_CP
+ ADVANCE_RING();
+#else
+ FINISH_ACCEL();
+#endif /* !ACCEL_CP */
+
+ pBox++;
+ }
+
+ BEGIN_ACCEL(3);
+ OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA);
+ OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL);
OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
FINISH_ACCEL();
commit a30737b337edb31528174b483c9094941a5d41bb
Author: Roland Scheidegger <sroland at tungstengraphics.com>
Date: Mon Apr 13 15:36:07 2009 -0400
r200/r300: implement brightness/contrast/hue/saturation/gamma controls for textured video
This implements
contrast/brightness/hue/saturation controls for r200/r300 plus gamma (same
gamma value for all channels used though separate values would be trivial)
control for r300.
Some issues left:
- only r200/r300
- still can't be combined with bicubic
- controls will silently cease to work if the format used is packed and not
planar (except for rv250)
- gamma range is from 100 to 10000 corresponding to 0.1 and 10.0 like used in
overlay. However, usable range is far smaller. Over 2.0 picture gets dark
pretty quickly, and below 0.6 or so black seems to turn into purple (I've
verified that even with gamma 1.0 black actually often seems to be RGB 1/0/1 so
this explains this since that gets amplified by low gamma values - not sure if
this is a rounding problem somewhere, bogus reference values or is somehow
expected).
- gamma adds a bit too many instructions for my taste (7) though the
alternative (3 texture lookups + some swizzling instructions) doesn't seem any
better.
diff --git a/src/radeon.h b/src/radeon.h
index 7e84aeb..174352d 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -378,6 +378,11 @@ typedef enum {
(info->ChipFamily == CHIP_FAMILY_RS400) || \
(info->ChipFamily == CHIP_FAMILY_RS480))
+#define IS_R200_3D ((info->ChipFamily == CHIP_FAMILY_RV250) || \
+ (info->ChipFamily == CHIP_FAMILY_RV280) || \
+ (info->ChipFamily == CHIP_FAMILY_RS300) || \
+ (info->ChipFamily == CHIP_FAMILY_R200))
+
/*
* Errata workarounds
*/
diff --git a/src/radeon_accelfuncs.c b/src/radeon_accelfuncs.c
index 45eb6d5..2d6fe01 100644
--- a/src/radeon_accelfuncs.c
+++ b/src/radeon_accelfuncs.c
@@ -1345,10 +1345,7 @@ FUNC_NAME(RADEONAccelInit)(ScreenPtr pScreen, XAAInfoRecPtr a)
xf86DrvMsg(pScrn->scrnIndex, X_INFO, "XAA Render acceleration "
"unsupported on Radeon 9500/9700 and newer. "
"Please use EXA instead.\n");
- } else if ((info->ChipFamily == CHIP_FAMILY_RV250) ||
- (info->ChipFamily == CHIP_FAMILY_RV280) ||
- (info->ChipFamily == CHIP_FAMILY_RS300) ||
- (info->ChipFamily == CHIP_FAMILY_R200)) {
+ } else if (IS_R200_3D) {
a->SetupForCPUToScreenAlphaTexture2 =
FUNC_NAME(R200SetupForCPUToScreenAlphaTexture);
a->SubsequentCPUToScreenAlphaTexture =
diff --git a/src/radeon_commonfuncs.c b/src/radeon_commonfuncs.c
index a9bc7d2..3dbe617 100644
--- a/src/radeon_commonfuncs.c
+++ b/src/radeon_commonfuncs.c
@@ -565,10 +565,7 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA);
OUT_ACCEL_REG(R300_SC_SCREENDOOR, 0xffffff);
FINISH_ACCEL();
- } else if ((info->ChipFamily == CHIP_FAMILY_RV250) ||
- (info->ChipFamily == CHIP_FAMILY_RV280) ||
- (info->ChipFamily == CHIP_FAMILY_RS300) ||
- (info->ChipFamily == CHIP_FAMILY_R200)) {
+ } else if (IS_R200_3D) {
BEGIN_ACCEL(6);
if (info->ChipFamily == CHIP_FAMILY_RS300) {
diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c
index 59cb46f..6a2b25c 100644
--- a/src/radeon_exa_funcs.c
+++ b/src/radeon_exa_funcs.c
@@ -505,10 +505,7 @@ Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen)
info->accel_state->exa->DoneComposite = FUNC_NAME(RadeonDoneComposite);
} else
xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA Composite requires CP on R5xx/IGP\n");
- } else if ((info->ChipFamily == CHIP_FAMILY_RV250) ||
- (info->ChipFamily == CHIP_FAMILY_RV280) ||
- (info->ChipFamily == CHIP_FAMILY_RS300) ||
- (info->ChipFamily == CHIP_FAMILY_R200)) {
+ } else if (IS_R200_3D) {
xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration "
"enabled for R200 type cards.\n");
info->accel_state->exa->CheckComposite = R200CheckComposite;
diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c
index 79671c0..bf8a276 100644
--- a/src/radeon_textured_video.c
+++ b/src/radeon_textured_video.c
@@ -128,6 +128,21 @@ static __inline__ uint32_t float4touint(float fr, float fg, float fb, float fa)
return (ua << 24) | (ur << 16) | (ug << 8) | ub;
}
+/* Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces
+ note the difference to the parameters used in overlay are due
+ to 10bit vs. float calcs */
+static REF_TRANSFORM trans[2] =
+{
+ {1.1643, 0.0, 1.5960, -0.3918, -0.8129, 2.0172, 0.0}, /* BT.601 */
+ {1.1643, 0.0, 1.7927, -0.2132, -0.5329, 2.1124, 0.0} /* BT.709 */
+};
+
+
+#define RTFSaturation(a) (1.0 + ((a)*1.0)/1000.0)
+#define RTFBrightness(a) (((a)*1.0)/2000.0)
+#define RTFContrast(a) (1.0 + ((a)*1.0)/1000.0)
+#define RTFHue(a) (((a)*3.1416)/1000.0)
+
#define ACCEL_MMIO
#define ACCEL_PREAMBLE() unsigned char *RADEONMMIO = info->MMIO
#define BEGIN_ACCEL(n) RADEONWaitForFifo(pScrn, (n))
@@ -359,12 +374,8 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
}
pPriv->planar_hw = pPriv->planar_state;
- if (pPriv->bicubic_enabled || !( IS_R300_3D ||
- (info->ChipFamily == CHIP_FAMILY_RV250) ||
- (info->ChipFamily == CHIP_FAMILY_RV280) ||
- (info->ChipFamily == CHIP_FAMILY_RS300) ||
- (info->ChipFamily == CHIP_FAMILY_R200) ))
- pPriv->planar_hw = 0;
+ if (pPriv->bicubic_enabled || !( IS_R300_3D || IS_R200_3D ))
+ pPriv->planar_hw = 0;
switch(id) {
case FOURCC_YV12:
@@ -636,28 +647,58 @@ static XF86VideoFormatRec Formats[NUM_FORMATS] =
{15, TrueColor}, {16, TrueColor}, {24, TrueColor}
};
-#define NUM_ATTRIBUTES 2
+#define NUM_ATTRIBUTES 1
static XF86AttributeRec Attributes[NUM_ATTRIBUTES+1] =
{
{XvSettable | XvGettable, 0, 1, "XV_VSYNC"},
+ {0, 0, 0, NULL}
+};
+
+#define NUM_ATTRIBUTES_R200 7
+
+static XF86AttributeRec Attributes_r200[NUM_ATTRIBUTES_R200+1] =
+{
+ {XvSettable | XvGettable, 0, 1, "XV_VSYNC"},
{XvSettable | XvGettable, 0, 1, "XV_HWPLANAR"},
+ {XvSettable | XvGettable, -1000, 1000, "XV_BRIGHTNESS"},
+ {XvSettable | XvGettable, -1000, 1000, "XV_CONTRAST"},
+ {XvSettable | XvGettable, -1000, 1000, "XV_SATURATION"},
+ {XvSettable | XvGettable, -1000, 1000, "XV_HUE"},
+ {XvSettable | XvGettable, 100, 10000, "XV_COLORSPACE"},
{0, 0, 0, NULL}
};
-#define NUM_ATTRIBUTES_R300 3
+#define NUM_ATTRIBUTES_R300 9
static XF86AttributeRec Attributes_r300[NUM_ATTRIBUTES_R300+1] =
{
{XvSettable | XvGettable, 0, 2, "XV_BICUBIC"},
{XvSettable | XvGettable, 0, 1, "XV_VSYNC"},
{XvSettable | XvGettable, 0, 1, "XV_HWPLANAR"},
+ {XvSettable | XvGettable, -1000, 1000, "XV_BRIGHTNESS"},
+ {XvSettable | XvGettable, -1000, 1000, "XV_CONTRAST"},
+ {XvSettable | XvGettable, -1000, 1000, "XV_SATURATION"},
+ {XvSettable | XvGettable, -1000, 1000, "XV_HUE"},
+ {XvSettable | XvGettable, 100, 10000, "XV_GAMMA"},
+ {XvSettable | XvGettable, 0, 1, "XV_COLORSPACE"},
+ {0, 0, 0, NULL}
+};
+
+#define NUM_ATTRIBUTES_R500 2
+
+static XF86AttributeRec Attributes_r500[NUM_ATTRIBUTES_R500+1] =
+{
+ {XvSettable | XvGettable, 0, 2, "XV_BICUBIC"},
+ {XvSettable | XvGettable, 0, 1, "XV_VSYNC"},
{0, 0, 0, NULL}
};
static Atom xvBicubic;
static Atom xvVSync;
static Atom xvHWPlanar;
+static Atom xvBrightness, xvContrast, xvSaturation, xvHue;
+static Atom xvGamma, xvColorspace;
#define NUM_IMAGES 4
@@ -686,6 +727,18 @@ RADEONGetTexPortAttribute(ScrnInfoPtr pScrn,
*value = pPriv->vsync;
else if (attribute == xvHWPlanar)
*value = pPriv->planar_state;
+ else if (attribute == xvBrightness)
+ *value = pPriv->brightness;
+ else if (attribute == xvContrast)
+ *value = pPriv->contrast;
+ else if (attribute == xvSaturation)
+ *value = pPriv->saturation;
+ else if (attribute == xvHue)
+ *value = pPriv->hue;
+ else if (attribute == xvGamma)
+ *value = pPriv->gamma;
+ else if(attribute == xvColorspace)
+ *value = pPriv->transform_index;
else
return BadMatch;
@@ -709,6 +762,20 @@ RADEONSetTexPortAttribute(ScrnInfoPtr pScrn,
pPriv->vsync = ClipValue (value, 0, 1);
else if (attribute == xvHWPlanar)
pPriv->planar_state = ClipValue (value, 0, 1);
+ else if (attribute == xvHWPlanar)
+ pPriv->planar_state = ClipValue (value, 0, 1);
+ else if (attribute == xvBrightness)
+ pPriv->brightness = ClipValue (value, -1000, 1000);
+ else if (attribute == xvContrast)
+ pPriv->contrast = ClipValue (value, -1000, 1000);
+ else if (attribute == xvSaturation)
+ pPriv->saturation = ClipValue (value, -1000, 1000);
+ else if (attribute == xvHue)
+ pPriv->hue = ClipValue (value, -1000, 1000);
+ else if (attribute == xvGamma)
+ pPriv->gamma = ClipValue (value, 100, 10000);
+ else if(attribute == xvColorspace)
+ pPriv->transform_index = ClipValue (value, 0, 1);
else
return BadMatch;
@@ -733,6 +800,12 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen)
xvBicubic = MAKE_ATOM("XV_BICUBIC");
xvVSync = MAKE_ATOM("XV_VSYNC");
xvHWPlanar = MAKE_ATOM("XV_HWPLANAR");
+ xvBrightness = MAKE_ATOM("XV_BRIGHTNESS");
+ xvContrast = MAKE_ATOM("XV_CONTRAST");
+ xvSaturation = MAKE_ATOM("XV_SATURATION");
+ xvHue = MAKE_ATOM("XV_HUE");
+ xvGamma = MAKE_ATOM("XV_GAMMA");
+ xvColorspace = MAKE_ATOM("XV_COLORSPACE");
adapt->type = XvWindowMask | XvInputMask | XvImageMask;
adapt->flags = 0;
@@ -752,10 +825,19 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen)
pPortPriv =
(RADEONPortPrivPtr)(&adapt->pPortPrivates[num_texture_ports]);
- if (IS_R300_3D || IS_R500_3D) {
+ if (IS_R300_3D) {
adapt->pAttributes = Attributes_r300;
adapt->nAttributes = NUM_ATTRIBUTES_R300;
- } else {
+ }
+ else if (IS_R500_3D) {
+ adapt->pAttributes = Attributes_r500;
+ adapt->nAttributes = NUM_ATTRIBUTES_R500;
+ }
+ else if (IS_R200_3D) {
+ adapt->pAttributes = Attributes_r200;
+ adapt->nAttributes = NUM_ATTRIBUTES_R200;
+ }
+ else {
adapt->pAttributes = Attributes;
adapt->nAttributes = NUM_ATTRIBUTES;
}
@@ -783,6 +865,12 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen)
pPriv->bicubic_state = BICUBIC_AUTO;
pPriv->vsync = TRUE;
pPriv->planar_state = 1;
+ pPriv->brightness = 0;
+ pPriv->contrast = 0;
+ pPriv->saturation = 0;
+ pPriv->hue = 0;
+ pPriv->gamma = 1000;
+ pPriv->transform_index = 0;
/* gotta uninit this someplace, XXX: shouldn't be necessary for textured */
REGION_NULL(pScreen, &pPriv->clip);
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index 6cb2870..3c4289f 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -743,9 +743,10 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
* DP3 might look like the straightforward solution
* but we'd need to move the texture yuv values in
* the same reg for this to work. Therefore use MADs.
- * Without changing the shader at all (only the constants)
- * could also provide hue/saturation/brightness/contrast control.
- *
+ * Brightness just adds to the off constant.
+ * Contrast is multiplication of luminance.
+ * Saturation and hue change the u and v coeffs.
+ * Default values (before adjustments - depend on colorspace):
* yco = 1.1643
* uco = 0, -0.39173, 2.017
* vco = 1.5958, -0.8129, 0
@@ -757,14 +758,46 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
* temp = MAD(uco, yuv.uuuu, temp)
* result = MAD(vco, yuv.vvvv, temp)
*/
- float yco = 1.1643;
- float uco[3] = {0.0, -0.39173, 2.018};
- float vco[3] = {1.5958, -0.8129, 0.0};
- float off[3] = {-0.0625 * yco + -0.5 * uco[0] + -0.5 * vco[0],
- -0.0625 * yco + -0.5 * uco[1] + -0.5 * vco[1],
- -0.0625 * yco + -0.5 * uco[2] + -0.5 * vco[2]};
-
- BEGIN_ACCEL(33);
+ /* TODO: don't recalc consts always */
+ const float Loff = -0.0627;
+ const float Coff = -0.502;
+ float uvcosf, uvsinf;
+ float yco;
+ float uco[3], vco[3], off[3];
+ float bright, cont, gamma;
+ int ref = pPriv->transform_index;
+ Bool needgamma = FALSE;
+
+ cont = RTFContrast(pPriv->contrast);
+ bright = RTFBrightness(pPriv->brightness);
+ gamma = (float)pPriv->gamma / 1000.0;
+ uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue));
+ uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue));
+ /* overlay video also does pre-gamma contrast/sat adjust, should we? */
+
+ yco = trans[ref].RefLuma * cont;
+ uco[0] = -trans[ref].RefRCr * uvsinf;
+ uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
+ uco[2] = trans[ref].RefBCb * uvcosf;
+ vco[0] = trans[ref].RefRCr * uvcosf;
+ vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
+ vco[2] = trans[ref].RefBCb * uvsinf;
+ off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright;
+ off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright;
+ off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright;
+
+ if (gamma != 1.0) {
+ needgamma = TRUE;
+ /* note: gamma correction is out = in ^ gamma;
+ gpu can only do LG2/EX2 therefore we transform into
+ in ^ gamma = 2 ^ (log2(in) * gamma).
+ Lots of scalar ops, unfortunately (better solution?) -
+ without gamma that's 3 inst, with gamma it's 10...
+ could use different gamma factors per channel,
+ if that's of any use. */
+ }
+
+ BEGIN_ACCEL(needgamma ? 28 + 33 : 33);
/* 2 components: same 2 for tex0/1/2 */
OUT_ACCEL_REG(R300_RS_COUNT,
((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
@@ -779,12 +812,12 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
R300_FIRST_TEX));
OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
- R300_ALU_CODE_SIZE(3) |
+ R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) |
R300_TEX_CODE_OFFSET(0) |
R300_TEX_CODE_SIZE(3)));
OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
- R300_ALU_SIZE(2) |
+ R300_ALU_SIZE(needgamma ? 7 + 2 : 2) |
R300_TEX_START(0) |
R300_TEX_SIZE(2) |
R300_RGBA_OUT));
@@ -857,7 +890,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
R300_ALU_RGB_ADDR2(0) |
R300_ALU_RGB_ADDRD(0) |
R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) |
- R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)));
+ (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))));
OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
@@ -868,14 +901,126 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
R300_ALU_RGB_CLAMP));
/* write alpha 1 */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) |
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) |
R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
R300_ALU_ALPHA_TARGET_A));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0)));
+ if (needgamma) {
+ /* rgb temp0.r = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha lg2 temp0, temp0.r */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb temp0.g = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha lg2 temp0, temp0.g */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb temp0.b = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha lg2 temp0, temp0.b */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* MUL const1, temp1, temp0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR1(0) |
+ R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
+ /* alpha nop, but set up const1 */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb out0.r = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) |
+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha ex2 temp0, temp0.r */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb out0.g = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) |
+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha ex2 temp0, temp0.g */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb out0.b = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) |
+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha ex2 temp0, temp0.b */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ }
+
/* Shader constants. */
/* constant 0: off, yco */
OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0]));
@@ -886,7 +1031,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0]));
OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1]));
OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(0.0));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(gamma));
/* constant 2: vco */
OUT_ACCEL_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0]));
OUT_ACCEL_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1]));
@@ -1601,20 +1746,52 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
FINISH_ACCEL();
- if ((info->ChipFamily == CHIP_FAMILY_RV250) ||
- (info->ChipFamily == CHIP_FAMILY_RV280) ||
- (info->ChipFamily == CHIP_FAMILY_RS300) ||
- (info->ChipFamily == CHIP_FAMILY_R200)) {
+ if (IS_R200_3D) {
info->accel_state->texW[0] = pPriv->w;
info->accel_state->texH[0] = pPriv->h;
if (isplanar) {
/* note: in contrast to r300, use input biasing on uv components */
- float yco = 1.1643;
- float yoff = -0.0625 * yco;
- float uco[3] = {0.0, -0.39173, 2.018};
- float vco[3] = {1.5958, -0.8129, 0.0};
+ const float Loff = -0.0627;
+ float uvcosf, uvsinf;
+ float yco, yoff;
+ float uco[3], vco[3];
+ float bright, cont, sat;
+ int ref = pPriv->transform_index;
+ float ucscale = 0.25, vcscale = 0.25;
+ Bool needux8 = FALSE, needvx8 = FALSE;
+
+ /* contrast can cause constant overflow, clamp */
+ cont = RTFContrast(pPriv->contrast);
+ if (cont * trans[ref].RefLuma > 2.0)
+ cont = 2.0 / trans[ref].RefLuma;
+ /* brightness is only from -0.5 to 0.5 should be safe */
+ bright = RTFBrightness(pPriv->brightness);
+ /* saturation can also cause overflow, clamp */
+ sat = RTFSaturation(pPriv->saturation);
+ if (sat * trans[ref].RefBCb > 4.0)
+ sat = 4.0 / trans[ref].RefBCb;
+ uvcosf = sat * cos(RTFHue(pPriv->hue));
+ uvsinf = sat * sin(RTFHue(pPriv->hue));
+
+ yco = trans[ref].RefLuma * cont;
+ uco[0] = -trans[ref].RefRCr * uvsinf;
+ uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
+ uco[2] = trans[ref].RefBCb * uvcosf;
+ vco[0] = trans[ref].RefRCr * uvcosf;
+ vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
+ vco[2] = trans[ref].RefBCb * uvsinf;
+ yoff = Loff * yco + bright;
+
+ if ((uco[0] > 2.0) || (uco[2] > 2.0)) {
+ needux8 = TRUE;
+ ucscale = 0.125;
+ }
+ if ((vco[0] > 2.0) || (vco[2] > 2.0)) {
+ needvx8 = TRUE;
+ vcscale = 0.125;
+ }
/* need 2 texcoord sets (even though they are identical) due
to denormalization! hw apparently can't premultiply
@@ -1678,7 +1855,9 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
* seems the values we need seem to fit better than worst case (get about
* 6 fractional bits for this instead of 5, at least when not correcting for
* hue/saturation/contrast/brightness, which is the same as for vco - yco and
- * yoff get 8 fractional bits).
+ * yoff get 8 fractional bits). Try to preserve as much accuracy as possible
+ * even with non-default saturation/hue/contrast/brightness adjustments,
+ * it gets a little crazy and ultimately precision might still be lacking.
*
* A higher precision (8 fractional bits) version might just put uco into
* a texcoord, and calculate a new vcoconst in the shader, like so:
@@ -1709,7 +1888,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
R200_TXC_ARG_A_TFACTOR_COLOR |
R200_TXC_ARG_B_R0_COLOR |
R200_TXC_ARG_C_TFACTOR_COLOR |
- R200_TXC_NEG_ARG_C |
+ (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) |
R200_TXC_OP_DOT2_ADD);
OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
(0 << R200_TXC_TFACTOR_SEL_SHIFT) |
@@ -1730,7 +1909,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
R200_TXC_SCALE_ARG_A |
R200_TXC_ARG_B_R1_COLOR |
R200_TXC_BIAS_ARG_B |
- R200_TXC_SCALE_ARG_B |
+ (needux8 ? R200_TXC_SCALE_ARG_B : 0) |
R200_TXC_ARG_C_R0_COLOR |
R200_TXC_OP_MADD);
OUT_ACCEL_REG(R200_PP_TXCBLEND2_1,
@@ -1751,6 +1930,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
R200_TXC_SCALE_ARG_A |
R200_TXC_ARG_B_R2_COLOR |
R200_TXC_BIAS_ARG_B |
+ (needvx8 ? R200_TXC_SCALE_ARG_B : 0) |
R200_TXC_ARG_C_R0_COLOR |
R200_TXC_OP_MADD);
OUT_ACCEL_REG(R200_PP_TXCBLEND2_2,
@@ -1767,28 +1947,64 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
/* shader constants */
- OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(1.0, /* src range [1, 2] */
- yco - 1.0,
- -yoff, /* range [-1, 0] */
+ OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */
+ yco > 1.0 ? yco - 1.0: yco,
+ yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */
0.0));
- OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * 0.125 + 0.5, /* range [-4, 4] */
- uco[1] * 0.125 + 0.5,
- uco[2] * 0.125 + 0.5,
+ OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */
+ uco[1] * ucscale + 0.5, /* or [-2, 2] */
+ uco[2] * ucscale + 0.5,
0.0));
- OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * 0.25 + 0.5, /* range [-2, 2] */
- vco[1] * 0.25 + 0.5,
- vco[2] * 0.25 + 0.5,
+ OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */
+ vco[1] * vcscale + 0.5, /* or [-4, 4] */
+ vco[2] * vcscale + 0.5,
0.0));
FINISH_ACCEL();
}
else if (info->ChipFamily == CHIP_FAMILY_RV250) {
/* fix up broken packed yuv - shader same as above except
- yuv compoents are all in same reg */
- float yco = 1.1643;
- float yoff = -0.0625 * yco;
- float uco[3] = {0.0, -0.39173, 2.018};
- float vco[3] = {1.5958, -0.8129, 0.0};
+ yuv components are all in same reg */
+ /* note: in contrast to r300, use input biasing on uv components */
+ const float Loff = -0.0627;
+ float uvcosf, uvsinf;
+ float yco, yoff;
+ float uco[3], vco[3];
+ float bright, cont, sat;
+ int ref = pPriv->transform_index;
+ float ucscale = 0.25, vcscale = 0.25;
+ Bool needux8 = FALSE, needvx8 = FALSE;
+
+ /* contrast can cause constant overflow, clamp */
+ cont = RTFContrast(pPriv->contrast);
+ if (cont * trans[ref].RefLuma > 2.0)
+ cont = 2.0 / trans[ref].RefLuma;
+ /* brightness is only from -0.5 to 0.5 should be safe */
+ bright = RTFBrightness(pPriv->brightness);
+ /* saturation can also cause overflow, clamp */
+ sat = RTFSaturation(pPriv->saturation);
+ if (sat * trans[ref].RefBCb > 4.0)
+ sat = 4.0 / trans[ref].RefBCb;
+ uvcosf = sat * cos(RTFHue(pPriv->hue));
+ uvsinf = sat * sin(RTFHue(pPriv->hue));
+
+ yco = trans[ref].RefLuma * cont;
+ uco[0] = -trans[ref].RefRCr * uvsinf;
+ uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
+ uco[2] = trans[ref].RefBCb * uvcosf;
+ vco[0] = trans[ref].RefRCr * uvcosf;
+ vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
+ vco[2] = trans[ref].RefBCb * uvsinf;
+ yoff = Loff * yco + bright;
+
+ if ((uco[0] > 2.0) || (uco[2] > 2.0)) {
+ needux8 = TRUE;
+ ucscale = 0.125;
+ }
+ if ((vco[0] > 2.0) || (vco[2] > 2.0)) {
+ needvx8 = TRUE;
+ vcscale = 0.125;
+ }
txformat0 = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) |
(((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT));
@@ -1824,7 +2040,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
R200_TXC_ARG_A_TFACTOR_COLOR |
R200_TXC_ARG_B_R0_COLOR |
R200_TXC_ARG_C_TFACTOR_COLOR |
- R200_TXC_NEG_ARG_C |
+ (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) |
R200_TXC_OP_DOT2_ADD);
OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
(0 << R200_TXC_TFACTOR_SEL_SHIFT) |
@@ -1846,7 +2062,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
R200_TXC_SCALE_ARG_A |
R200_TXC_ARG_B_R0_COLOR |
R200_TXC_BIAS_ARG_B |
- R200_TXC_SCALE_ARG_B |
+ (needux8 ? R200_TXC_SCALE_ARG_B : 0) |
R200_TXC_ARG_C_R1_COLOR |
R200_TXC_OP_MADD);
OUT_ACCEL_REG(R200_PP_TXCBLEND2_1,
@@ -1868,6 +2084,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
R200_TXC_SCALE_ARG_A |
R200_TXC_ARG_B_R0_COLOR |
R200_TXC_BIAS_ARG_B |
+ (needvx8 ? R200_TXC_SCALE_ARG_B : 0) |
R200_TXC_ARG_C_R1_COLOR |
R200_TXC_OP_MADD);
OUT_ACCEL_REG(R200_PP_TXCBLEND2_2,
@@ -1885,17 +2102,17 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
/* shader constants */
- OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(1.0, /* src range [1, 2] */
- yco - 1.0,
- -yoff, /* range [-1, 0] */
+ OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */
+ yco > 1.0 ? yco - 1.0: yco,
+ yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */
0.0));
- OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * 0.125 + 0.5, /* range [-4, 4] */
- uco[1] * 0.125 + 0.5,
- uco[2] * 0.125 + 0.5,
+ OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */
+ uco[1] * ucscale + 0.5, /* or [-2, 2] */
+ uco[2] * ucscale + 0.5,
0.0));
- OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * 0.25 + 0.5, /* range [-2, 2] */
- vco[1] * 0.25 + 0.5,
- vco[2] * 0.25 + 0.5,
+ OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */
+ vco[1] * vcscale + 0.5, /* or [-4, 4] */
+ vco[2] * vcscale + 0.5,
0.0));
FINISH_ACCEL();
diff --git a/src/radeon_video.c b/src/radeon_video.c
index 6314eb1..8479160 100644
--- a/src/radeon_video.c
+++ b/src/radeon_video.c
@@ -541,18 +541,6 @@ static XF86ImageRec Images[NUM_IMAGES] =
#endif
-/* Reference color space transform data */
-typedef struct tagREF_TRANSFORM
-{
- float RefLuma;
- float RefRCb;
- float RefRCr;
- float RefGCb;
- float RefGCr;
- float RefBCb;
- float RefBCr;
-} REF_TRANSFORM;
-
/* Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces */
static REF_TRANSFORM trans[2] =
{
@@ -560,7 +548,6 @@ static REF_TRANSFORM trans[2] =
{1.1678, 0.0, 1.7980, -0.2139, -0.5345, 2.1186, 0.0} /* BT.709 */
};
-
/* Gamma curve definition for preset gammas */
typedef struct tagGAMMA_CURVE_R100
{
diff --git a/src/radeon_video.h b/src/radeon_video.h
index 4498002..be33871 100644
--- a/src/radeon_video.h
+++ b/src/radeon_video.h
@@ -123,6 +123,18 @@ typedef struct {
int vsync;
} RADEONPortPrivRec, *RADEONPortPrivPtr;
+/* Reference color space transform data */
+typedef struct tagREF_TRANSFORM
+{
+ float RefLuma;
+ float RefRCb;
+ float RefRCr;
+ float RefGCb;
+ float RefGCr;
+ float RefBCb;
+ float RefBCr;
+} REF_TRANSFORM;
+
xf86CrtcPtr
radeon_xv_pick_best_crtc(ScrnInfoPtr pScrn,
int x1, int x2, int y1, int y2);
More information about the xorg-commit
mailing list