xf86-video-ati: Branch 'master' - 4 commits

Alex Deucher agd5f at kemper.freedesktop.org
Fri Feb 11 14:23:06 PST 2011


 src/drmmode_display.c               |   10 ++--
 src/evergreen_accel.c               |    7 +++
 src/evergreen_exa.c                 |   31 +++++++++++++
 src/evergreen_reg.h                 |    3 +
 src/evergreen_shader.c              |   60 ++++++++++++++++++++++-----
 src/evergreen_shader.h              |   66 ++++++++++++++---------------
 src/evergreen_textured_videofuncs.c |    6 ++
 src/r600_exa.c                      |   31 +++++++++++++
 src/r600_reg.h                      |   21 +++++----
 src/r600_shader.c                   |   60 ++++++++++++++++++++++-----
 src/r600_shader.h                   |   80 ++++++++++++++++++------------------
 src/r600_textured_videofuncs.c      |    6 ++
 src/r6xx_accel.c                    |   11 ++++
 src/radeon_atombios.c               |   28 ++++++------
 src/radeon_kms.c                    |    5 --
 15 files changed, 299 insertions(+), 126 deletions(-)

New commits:
commit f1dc419c989addc4737aed06ec8b8acdb4d40063
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Fri Feb 11 17:21:10 2011 -0500

    kms: evergreen/ni big endian accel support
    
    Based on 6xx/7xx patches from Cédric Cano.
    
    Signed-off-by: Alex Deucher <alexdeucher at gmail.com>

diff --git a/src/evergreen_accel.c b/src/evergreen_accel.c
index ef24e18..d41ce72 100644
--- a/src/evergreen_accel.c
+++ b/src/evergreen_accel.c
@@ -1188,7 +1188,11 @@ evergreen_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf)
     BEGIN_BATCH(10);
     EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
     PACK3(IT_INDEX_TYPE, 1);
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+    E32(IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type);
+#else
     E32(draw_conf->index_type);
+#endif
     PACK3(IT_NUM_INSTANCES, 1);
     E32(draw_conf->num_instances);
     PACK3(IT_DRAW_INDEX_AUTO, 2);
@@ -1227,6 +1231,9 @@ void evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size)
     vtx_res.dst_sel_y       = SQ_SEL_Y;
     vtx_res.dst_sel_z       = SQ_SEL_Z;
     vtx_res.dst_sel_w       = SQ_SEL_W;
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+    vtx_res.endian          = SQ_ENDIAN_8IN32;
+#endif
     evergreen_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT);
 
     /* Draw */
diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c
index 7e1e464..d451154 100644
--- a/src/evergreen_exa.c
+++ b/src/evergreen_exa.c
@@ -127,9 +127,15 @@ EVERGREENPrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
     } else if (accel_state->dst_obj.bpp == 16) {
 	cb_conf.format = COLOR_5_6_5;
 	cb_conf.comp_swap = 2; /* RGB */
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+	cb_conf.endian = ENDIAN_8IN16;
+#endif
     } else {
 	cb_conf.format = COLOR_8_8_8_8;
 	cb_conf.comp_swap = 1; /* ARGB */
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+	cb_conf.endian = ENDIAN_8IN32;
+#endif
     }
     cb_conf.source_format = EXPORT_4C_16BPC;
     cb_conf.blend_clamp = 1;
@@ -795,6 +801,19 @@ static Bool EVERGREENTextureSetup(PicturePtr pPict, PixmapPtr pPix,
     tex_res.bo                  = accel_state->src_obj[unit].bo;
     tex_res.mip_bo              = accel_state->src_obj[unit].bo;
 
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+    switch (accel_state->src_obj[unit].bpp) {
+    case 16:
+	tex_res.endian = SQ_ENDIAN_8IN16;
+	break;
+    case 32:
+	tex_res.endian = SQ_ENDIAN_8IN32;
+	break;
+    default :
+	break;
+    }
+#endif
+
     /* component swizzles */
     switch (pPict->format) {
     case PICT_a1r5g5b5:
@@ -1224,6 +1243,18 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
     cb_conf.pmask = 0xf;
     if (accel_state->dst_obj.tiling_flags == 0)
 	cb_conf.array_mode = 1;
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+    switch (dst_obj.bpp) {
+    case 16:
+	cb_conf.endian = ENDIAN_8IN16;
+	break;
+    case 32:
+	cb_conf.endian = ENDIAN_8IN32;
+	break;
+    default:
+	break;
+    }
+#endif
     evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
 
     if (pMask)
diff --git a/src/evergreen_reg.h b/src/evergreen_reg.h
index ef9ac87..b08dbf9 100644
--- a/src/evergreen_reg.h
+++ b/src/evergreen_reg.h
@@ -110,6 +110,9 @@ enum {
 
 #define IT_WAIT_ADDR(x)         ((x) >> 2)
 
+/* IT_INDEX_TYPE */
+#define IT_INDEX_TYPE_SWAP_MODE(x) ((x) << 2)
+
 enum {
 
     SQ_LDS_ALLOC_PS                                       = 0x288ec,
diff --git a/src/evergreen_shader.c b/src/evergreen_shader.c
index ef56d2d..bbdd7a7 100644
--- a/src/evergreen_shader.c
+++ b/src/evergreen_shader.c
@@ -110,7 +110,11 @@ int evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     shader[i++] = VTX_DWORD2(OFFSET(0),
-			     ENDIAN_SWAP(ENDIAN_NONE),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+			     ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+			     ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
 			     CONST_BUF_NO_STRIDE(0),
 			     MEGA_FETCH(1),
 			     ALT_CONST(0),
@@ -331,7 +335,11 @@ int evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     shader[i++] = VTX_DWORD2(OFFSET(0),
-			     ENDIAN_SWAP(ENDIAN_NONE),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
 			     CONST_BUF_NO_STRIDE(0),
 			     MEGA_FETCH(1),
 			     ALT_CONST(0),
@@ -358,7 +366,11 @@ int evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     shader[i++] = VTX_DWORD2(OFFSET(8),
-			     ENDIAN_SWAP(ENDIAN_NONE),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
 			     CONST_BUF_NO_STRIDE(0),
 			     MEGA_FETCH(0),
 			     ALT_CONST(0),
@@ -689,7 +701,11 @@ int evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
                                  FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
                                  SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     shader[i++] = VTX_DWORD2(OFFSET(0),
-                             ENDIAN_SWAP(ENDIAN_NONE),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
                              CONST_BUF_NO_STRIDE(0),
                              MEGA_FETCH(1),
 			     ALT_CONST(0),
@@ -716,7 +732,11 @@ int evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
                                  FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
                                  SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     shader[i++] = VTX_DWORD2(OFFSET(8),
-                             ENDIAN_SWAP(ENDIAN_NONE),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
                              CONST_BUF_NO_STRIDE(0),
                              MEGA_FETCH(0),
 			     ALT_CONST(0),
@@ -2344,7 +2364,11 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     shader[i++] = VTX_DWORD2(OFFSET(0),
-			     ENDIAN_SWAP(ENDIAN_NONE),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
 			     CONST_BUF_NO_STRIDE(0),
 			     MEGA_FETCH(1),
 			     ALT_CONST(0),
@@ -2371,7 +2395,11 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     shader[i++] = VTX_DWORD2(OFFSET(8),
-			     ENDIAN_SWAP(ENDIAN_NONE),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
 			     CONST_BUF_NO_STRIDE(0),
 			     MEGA_FETCH(0),
 			     ALT_CONST(0),
@@ -2398,7 +2426,11 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     shader[i++] = VTX_DWORD2(OFFSET(16),
-			     ENDIAN_SWAP(ENDIAN_NONE),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
 			     CONST_BUF_NO_STRIDE(0),
 			     MEGA_FETCH(0),
 			     ALT_CONST(0),
@@ -2426,7 +2458,11 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     shader[i++] = VTX_DWORD2(OFFSET(0),
-			     ENDIAN_SWAP(ENDIAN_NONE),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
 			     CONST_BUF_NO_STRIDE(0),
 			     MEGA_FETCH(1),
 			     ALT_CONST(0),
@@ -2453,7 +2489,11 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     shader[i++] = VTX_DWORD2(OFFSET(8),
-			     ENDIAN_SWAP(ENDIAN_NONE),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
 			     CONST_BUF_NO_STRIDE(0),
 			     MEGA_FETCH(0),
                              ALT_CONST(0),
diff --git a/src/evergreen_shader.h b/src/evergreen_shader.h
index 4106619..8c7136b 100644
--- a/src/evergreen_shader.h
+++ b/src/evergreen_shader.h
@@ -104,23 +104,23 @@
 #define SRC_SEL_Z(x)    (x)
 #define SRC_SEL_W(x)    (x)
 
-#define CF_DWORD0(addr, jmptbl) ((addr) | ((jmptbl) << 24))
+#define CF_DWORD0(addr, jmptbl) cpu_to_le32(((addr) | ((jmptbl) << 24)))
 #define CF_DWORD1(pc, cf_const, cond, count, vpm, eop, cf_inst, wqm, b) \
-        (((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | ((count) << 10) | \
-         ((vpm) << 20) | ((eop) << 21) | ((cf_inst) << 22) | ((wqm) << 30) | ((b) << 31))
+    cpu_to_le32((((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | ((count) << 10) | \
+		 ((vpm) << 20) | ((eop) << 21) | ((cf_inst) << 22) | ((wqm) << 30) | ((b) << 31)))
 
-#define CF_ALU_DWORD0(addr, kb0, kb1, km0) (((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30))
+#define CF_ALU_DWORD0(addr, kb0, kb1, km0) cpu_to_le32((((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30)))
 #define CF_ALU_DWORD1(km1, kcache_addr0, kcache_addr1, count, alt_const, cf_inst, wqm, b) \
-        (((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \
-	 ((count) << 18) | ((alt_const) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31))
+    cpu_to_le32((((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \
+		 ((count) << 18) | ((alt_const) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31)))
 
 #define CF_ALLOC_IMP_EXP_DWORD0(array_base, type, rw_gpr, rr, index_gpr, es) \
-	 (((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | \
-	  ((index_gpr) << 23) | ((es) << 30))
+    cpu_to_le32((((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | \
+		 ((index_gpr) << 23) | ((es) << 30)))
 #define CF_ALLOC_IMP_EXP_DWORD1_SWIZ(sel_x, sel_y, sel_z, sel_w, bc, vpm, eop, cf_inst, m, b) \
-        (((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | \
-	 ((bc) << 16) | ((vpm) << 20) | ((eop) << 21) | ((cf_inst) << 22) | \
-	 ((m) << 30) | ((b) << 31))
+    cpu_to_le32((((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | \
+		 ((bc) << 16) | ((vpm) << 20) | ((eop) << 21) | ((cf_inst) << 22) | \
+		 ((m) << 30) | ((b) << 31)))
 
 // ALU clause insts
 #define SRC0_SEL(x)        (x)
@@ -185,19 +185,19 @@
 #define CLAMP(x)          (x)
 
 #define ALU_DWORD0(src0_sel, s0r, s0e, s0n, src1_sel, s1r, s1e, s1n, im, ps, last) \
-        (((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \
-         ((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \
-	 ((im) << 26) | ((ps) << 29) | ((last) << 31))
+    cpu_to_le32((((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \
+		 ((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \
+		 ((im) << 26) | ((ps) << 29) | ((last) << 31)))
 
 #define ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
-        (((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \
-         ((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \
-	 ((dr) << 28) | ((de) << 29) | ((clamp) << 31))
+    cpu_to_le32((((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \
+		 ((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \
+		 ((dr) << 28) | ((de) << 29) | ((clamp) << 31)))
 
 #define ALU_DWORD1_OP3(src2_sel, s2r, s2e, s2n, alu_inst, bs, dst_gpr, dr, de, clamp) \
-        (((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \
-         ((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \
-	 ((de) << 29) | ((clamp) << 31))
+    cpu_to_le32((((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \
+		 ((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \
+		 ((de) << 29) | ((clamp) << 31)))
 
 // VTX clause insts
 // vxt insts
@@ -235,14 +235,14 @@
 #define BUFFER_INDEX_MODE(x) (x)
 
 #define VTX_DWORD0(vtx_inst, ft, fwq, buffer_id, src_gpr, sr, ssx, mfc) \
-        (((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \
-	 ((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((mfc) << 26))
+    cpu_to_le32((((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \
+		 ((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((mfc) << 26)))
 #define VTX_DWORD1_GPR(dst_gpr, dr, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \
-        (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
-	 ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31))
+    cpu_to_le32((((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
+		 ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31)))
 #define VTX_DWORD2(offset, es, cbns, mf, alt_const, bim)			\
-	(((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((mf) << 19) | ((alt_const) << 20) | ((bim) << 21))
-#define VTX_DWORD_PAD 0x00000000
+    cpu_to_le32((((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((mf) << 19) | ((alt_const) << 20) | ((bim) << 21)))
+#define VTX_DWORD_PAD cpu_to_le32(0x00000000)
 
 // TEX clause insts
 // tex insts
@@ -267,15 +267,15 @@
 #define SAMPLER_ID(x)     (x)
 
 #define TEX_DWORD0(tex_inst, im, fwq, resource_id, src_gpr, sr, ac, rim, sim) \
-	 (((tex_inst) << 0) | ((im) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \
-	  ((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24) | ((rim) << 25) | ((sim) << 27))
+    cpu_to_le32((((tex_inst) << 0) | ((im) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \
+		 ((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24) | ((rim) << 25) | ((sim) << 27)))
 #define TEX_DWORD1(dst_gpr, dr, dsx, dsy, dsz, dsw, lod_bias, ctx, cty, ctz, ctw) \
-        (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
-	 ((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31))
+    cpu_to_le32((((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
+		 ((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31)))
 #define TEX_DWORD2(offset_x, offset_y, offset_z, sampler_id, ssx, ssy, ssz, ssw) \
-        (((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \
-	 ((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29))
-#define TEX_DWORD_PAD 0x00000000
+    cpu_to_le32((((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \
+		 ((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29)))
+#define TEX_DWORD_PAD cpu_to_le32(0x00000000)
 
 extern int evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* vs);
 extern int evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* ps);
diff --git a/src/evergreen_textured_videofuncs.c b/src/evergreen_textured_videofuncs.c
index b184956..315e2ba 100644
--- a/src/evergreen_textured_videofuncs.c
+++ b/src/evergreen_textured_videofuncs.c
@@ -422,10 +422,16 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	    cb_conf.format = COLOR_5_6_5;
 	    cb_conf.comp_swap = 2; /* RGB */
 	}
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+	cb_conf.endian = ENDIAN_8IN16;
+#endif
 	break;
     case 32:
 	cb_conf.format = COLOR_8_8_8_8;
 	cb_conf.comp_swap = 1; /* ARGB */
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+	cb_conf.endian = ENDIAN_8IN32;
+#endif
 	break;
     default:
 	return;
commit e8dc728a549323f1babe337b9d42ad504af1ca39
Author: Cédric Cano <ccano at interfaceconcept.com>
Date:   Fri Feb 11 17:06:47 2011 -0500

    ums: atombios endian fixes
    
    agd5f: fix a few more cases.
    
    Signed-off-by: Alex Deucher <alexdeucher at gmail.com>

diff --git a/src/radeon_atombios.c b/src/radeon_atombios.c
index 3da6068..207783b 100644
--- a/src/radeon_atombios.c
+++ b/src/radeon_atombios.c
@@ -782,15 +782,15 @@ rhdAtomDTDTimings(atomBiosHandlePtr handle, ATOM_DTD_FORMAT *dtd)
 
     mode->CrtcHDisplay = mode->HDisplay = le16_to_cpu(dtd->usHActive);
     mode->CrtcVDisplay = mode->VDisplay = le16_to_cpu(dtd->usVActive);
-    mode->CrtcHBlankStart = dtd->usHActive + dtd->ucHBorder;
+    mode->CrtcHBlankStart = le16_to_cpu(dtd->usHActive) + dtd->ucHBorder;
     mode->CrtcHBlankEnd = mode->CrtcHBlankStart + le16_to_cpu(dtd->usHBlanking_Time);
     mode->CrtcHTotal = mode->HTotal = mode->CrtcHBlankEnd + dtd->ucHBorder;
-    mode->CrtcVBlankStart = dtd->usVActive + dtd->ucVBorder;
+    mode->CrtcVBlankStart = le16_to_cpu(dtd->usVActive) + dtd->ucVBorder;
     mode->CrtcVBlankEnd = mode->CrtcVBlankStart + le16_to_cpu(dtd->usVBlanking_Time);
     mode->CrtcVTotal = mode->VTotal = mode->CrtcVBlankEnd + dtd->ucVBorder;
-    mode->CrtcHSyncStart = mode->HSyncStart = dtd->usHActive + le16_to_cpu(dtd->usHSyncOffset);
+    mode->CrtcHSyncStart = mode->HSyncStart = le16_to_cpu(dtd->usHActive) + le16_to_cpu(dtd->usHSyncOffset);
     mode->CrtcHSyncEnd = mode->HSyncEnd = mode->HSyncStart + le16_to_cpu(dtd->usHSyncWidth);
-    mode->CrtcVSyncStart = mode->VSyncStart = dtd->usVActive + le16_to_cpu(dtd->usVSyncOffset);
+    mode->CrtcVSyncStart = mode->VSyncStart = le16_to_cpu(dtd->usVActive) + le16_to_cpu(dtd->usVSyncOffset);
     mode->CrtcVSyncEnd = mode->VSyncEnd = mode->VSyncStart + le16_to_cpu(dtd->usVSyncWidth);
 
     mode->SynthClock = mode->Clock = le16_to_cpu(dtd->usPixClk) * 10;
@@ -1540,7 +1540,7 @@ RADEONLookupGPIOLineForDDC(ScrnInfoPtr pScrn, uint8_t id)
 
 	    if (IS_DCE4_VARIANT) {
 	        if ((i == 7) &&
-		    (gpio->usClkMaskRegisterIndex == 0x1936) &&
+		    (le16_to_cpu(gpio->usClkMaskRegisterIndex) == 0x1936) &&
 		    (gpio->sucI2cId.ucAccess == 0)) {
 		    gpio->sucI2cId.ucAccess = 0x97;
 		    gpio->ucDataMaskShift = 8;
@@ -1579,14 +1579,14 @@ RADEONLookupGPIOLineForDDC(ScrnInfoPtr pScrn, uint8_t id)
     ErrorF("hw capable: %d\n", gpio->sucI2cId.sbfAccess.bfHW_Capable);
     ErrorF("hw engine id: %d\n", gpio->sucI2cId.sbfAccess.bfHW_EngineID);
     ErrorF("line mux %d\n", gpio->sucI2cId.sbfAccess.bfI2C_LineMux);
-    ErrorF("mask_clk_reg: 0x%x\n", gpio->usClkMaskRegisterIndex * 4);
-    ErrorF("mask_data_reg: 0x%x\n", gpio->usDataMaskRegisterIndex * 4);
-    ErrorF("put_clk_reg: 0x%x\n", gpio->usClkEnRegisterIndex * 4);
-    ErrorF("put_data_reg: 0x%x\n", gpio->usDataEnRegisterIndex * 4);
-    ErrorF("get_clk_reg: 0x%x\n", gpio->usClkY_RegisterIndex * 4);
-    ErrorF("get_data_reg: 0x%x\n", gpio->usDataY_RegisterIndex * 4);
-    ErrorF("a_clk_reg: 0x%x\n", gpio->usClkA_RegisterIndex * 4);
-    ErrorF("a_data_reg: 0x%x\n", gpio->usDataA_RegisterIndex * 4);
+    ErrorF("mask_clk_reg: 0x%x\n", le16_to_cpu(gpio->usClkMaskRegisterIndex) * 4);
+    ErrorF("mask_data_reg: 0x%x\n", le16_to_cpu(gpio->usDataMaskRegisterIndex) * 4);
+    ErrorF("put_clk_reg: 0x%x\n", le16_to_cpu(gpio->usClkEnRegisterIndex) * 4);
+    ErrorF("put_data_reg: 0x%x\n", le16_to_cpu(gpio->usDataEnRegisterIndex) * 4);
+    ErrorF("get_clk_reg: 0x%x\n", le16_to_cpu(gpio->usClkY_RegisterIndex) * 4);
+    ErrorF("get_data_reg: 0x%x\n", le16_to_cpu(gpio->usDataY_RegisterIndex) * 4);
+    ErrorF("a_clk_reg: 0x%x\n", le16_to_cpu(gpio->usClkA_RegisterIndex) * 4);
+    ErrorF("a_data_reg: 0x%x\n", le16_to_cpu(gpio->usDataA_RegisterIndex) * 4);
     ErrorF("mask_clk_mask: %d\n", gpio->ucClkMaskShift);
     ErrorF("mask_data_mask: %d\n", gpio->ucDataMaskShift);
     ErrorF("put_clk_mask: %d\n", gpio->ucClkEnShift);
@@ -1645,7 +1645,7 @@ radeon_lookup_hpd_id(ScrnInfoPtr pScrn, ATOM_HPD_INT_RECORD *record)
     for (i = 0; i < num_indices; i++) {
 	pin = &gpio_info->asGPIO_Pin[i];
 	if (record->ucHPDIntGPIOID == pin->ucGPIO_ID) {
-	    if ((pin->usGpioPin_AIndex * 4) == reg) {
+	    if ((le16_to_cpu(pin->usGpioPin_AIndex) * 4) == reg) {
 		switch (pin->ucGpioPinBitShift) {
 		case 0:
 		default:
commit 48ffad957f1dbca909515ffa00629f4caa68706b
Author: Cédric Cano <ccano at interfaceconcept.com>
Date:   Fri Feb 11 17:00:31 2011 -0500

    kms: 6xx/7xx big endian accel support
    
    agd5f: minor cleanups
    
    Signed-off-by: Alex Deucher <alexdeucher at gmail.com>

diff --git a/src/r600_exa.c b/src/r600_exa.c
index 8057d56..ea58482 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -274,9 +274,15 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
     } else if (accel_state->dst_obj.bpp == 16) {
 	cb_conf.format = COLOR_5_6_5;
 	cb_conf.comp_swap = 2; /* RGB */
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+	cb_conf.endian = ENDIAN_8IN16;
+#endif
     } else {
 	cb_conf.format = COLOR_8_8_8_8;
 	cb_conf.comp_swap = 1; /* ARGB */
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+	cb_conf.endian = ENDIAN_8IN32;
+#endif
     }
     cb_conf.source_format = 1;
     cb_conf.blend_clamp = 1;
@@ -941,6 +947,19 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
     tex_res.mip_bo              = accel_state->src_obj[unit].bo;
     tex_res.request_size        = 1;
 
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+    switch (accel_state->src_obj[unit].bpp) {
+    case 16:
+	tex_res.endian = SQ_ENDIAN_8IN16;
+	break;
+    case 32:
+	tex_res.endian = SQ_ENDIAN_8IN32;
+	break;
+    default :
+	break;
+    }
+#endif
+
     /* component swizzles */
     switch (pPict->format) {
     case PICT_a1r5g5b5:
@@ -1409,6 +1428,18 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
     cb_conf.rop = 3;
     if (accel_state->dst_obj.tiling_flags == 0)
 	cb_conf.array_mode = 1;
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+    switch (dst_obj.bpp) {
+    case 16:
+	cb_conf.endian = ENDIAN_8IN16;
+	break;
+    case 32:
+	cb_conf.endian = ENDIAN_8IN32;
+	break;
+    default:
+	break;
+    }
+#endif
     r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
 
     if (pMask)
diff --git a/src/r600_reg.h b/src/r600_reg.h
index 937926b..95c924f 100644
--- a/src/r600_reg.h
+++ b/src/r600_reg.h
@@ -117,16 +117,19 @@ enum {
 
 /* IT_WAIT_REG_MEM operation encoding */
 
-#define IT_WAIT_ALWAYS          (0<<0)
-#define IT_WAIT_LT              (1<<0)
-#define IT_WAIT_LE              (2<<0)
-#define IT_WAIT_EQ              (3<<0)
-#define IT_WAIT_NE              (4<<0)
-#define IT_WAIT_GE              (5<<0)
-#define IT_WAIT_GT              (6<<0)
-#define IT_WAIT_REG             (0<<4)
-#define IT_WAIT_MEM             (1<<4)
+#define IT_WAIT_ALWAYS          (0 << 0)
+#define IT_WAIT_LT              (1 << 0)
+#define IT_WAIT_LE              (2 << 0)
+#define IT_WAIT_EQ              (3 << 0)
+#define IT_WAIT_NE              (4 << 0)
+#define IT_WAIT_GE              (5 << 0)
+#define IT_WAIT_GT              (6 << 0)
+#define IT_WAIT_REG             (0 << 4)
+#define IT_WAIT_MEM             (1 << 4)
 
 #define IT_WAIT_ADDR(x)         ((x) >> 2)
 
+/* IT_INDEX_TYPE */
+#define IT_INDEX_TYPE_SWAP_MODE(x) ((x) << 2)
+
 #endif
diff --git a/src/r600_shader.c b/src/r600_shader.c
index 7dceffe..ab2f485 100644
--- a/src/r600_shader.c
+++ b/src/r600_shader.c
@@ -111,7 +111,11 @@ int R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     shader[i++] = VTX_DWORD2(OFFSET(0),
-			     ENDIAN_SWAP(ENDIAN_NONE),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+			     ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+			     ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
 			     CONST_BUF_NO_STRIDE(0),
 			     MEGA_FETCH(1));
     shader[i++] = VTX_DWORD_PAD;
@@ -341,7 +345,11 @@ int R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     shader[i++] = VTX_DWORD2(OFFSET(0),
-			     ENDIAN_SWAP(ENDIAN_NONE),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+			     ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+			     ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
 			     CONST_BUF_NO_STRIDE(0),
 			     MEGA_FETCH(1));
     shader[i++] = VTX_DWORD_PAD;
@@ -366,7 +374,11 @@ int R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     shader[i++] = VTX_DWORD2(OFFSET(8),
-			     ENDIAN_SWAP(ENDIAN_NONE),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
 			     CONST_BUF_NO_STRIDE(0),
 			     MEGA_FETCH(0));
     shader[i++] = VTX_DWORD_PAD;
@@ -596,7 +608,11 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
                                  FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
                                  SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     shader[i++] = VTX_DWORD2(OFFSET(0),
-                             ENDIAN_SWAP(ENDIAN_NONE),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
                              CONST_BUF_NO_STRIDE(0),
                              MEGA_FETCH(1));
     shader[i++] = VTX_DWORD_PAD;
@@ -621,7 +637,11 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
                                  FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
                                  SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     shader[i++] = VTX_DWORD2(OFFSET(8),
-                             ENDIAN_SWAP(ENDIAN_NONE),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
                              CONST_BUF_NO_STRIDE(0),
                              MEGA_FETCH(0));
     shader[i++] = VTX_DWORD_PAD;
@@ -2191,7 +2211,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     shader[i++] = VTX_DWORD2(OFFSET(0),
-			     ENDIAN_SWAP(ENDIAN_NONE),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
 			     CONST_BUF_NO_STRIDE(0),
 			     MEGA_FETCH(1));
     shader[i++] = VTX_DWORD_PAD;
@@ -2216,7 +2240,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     shader[i++] = VTX_DWORD2(OFFSET(8),
-			     ENDIAN_SWAP(ENDIAN_NONE),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
 			     CONST_BUF_NO_STRIDE(0),
 			     MEGA_FETCH(0));
     shader[i++] = VTX_DWORD_PAD;
@@ -2241,7 +2269,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     shader[i++] = VTX_DWORD2(OFFSET(16),
-			     ENDIAN_SWAP(ENDIAN_NONE),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
 			     CONST_BUF_NO_STRIDE(0),
 			     MEGA_FETCH(0));
     shader[i++] = VTX_DWORD_PAD;
@@ -2267,7 +2299,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     shader[i++] = VTX_DWORD2(OFFSET(0),
-			     ENDIAN_SWAP(ENDIAN_NONE),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
 			     CONST_BUF_NO_STRIDE(0),
 			     MEGA_FETCH(1));
     shader[i++] = VTX_DWORD_PAD;
@@ -2292,7 +2328,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
 				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     shader[i++] = VTX_DWORD2(OFFSET(8),
-			     ENDIAN_SWAP(ENDIAN_NONE),
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
+#else
+                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
+#endif
 			     CONST_BUF_NO_STRIDE(0),
 			     MEGA_FETCH(0));
     shader[i++] = VTX_DWORD_PAD;
diff --git a/src/r600_shader.h b/src/r600_shader.h
index 3d5acc7..d79ed40 100644
--- a/src/r600_shader.h
+++ b/src/r600_shader.h
@@ -157,32 +157,32 @@
 #define SRC_SEL_Z(x)    (x)
 #define SRC_SEL_W(x)    (x)
 
-#define CF_DWORD0(addr) (addr)
+#define CF_DWORD0(addr) cpu_to_le32((addr))
 // R7xx has another entry (COUNT3), but that is only used for adding a bit to count.
 // We allow one more bit for count in the argument of the macro on R7xx instead.
 // R6xx: [0,7]  R7xx: [1,16]
 #define CF_DWORD1(pc, cf_const, cond, count, call_count, eop, vpm, cf_inst, wqm, b) \
-        (((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | (((count) & 7) << 10) | (((count) >> 3) << 19) | \
-         ((call_count) << 13) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31))
+    cpu_to_le32((((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | (((count) & 7) << 10) | (((count) >> 3) << 19) | \
+		 ((call_count) << 13) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31)))
 
-#define CF_ALU_DWORD0(addr, kb0, kb1, km0) (((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30))
+#define CF_ALU_DWORD0(addr, kb0, kb1, km0) cpu_to_le32((((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30)))
 #define CF_ALU_DWORD1(km1, kcache_addr0, kcache_addr1, count, uw, cf_inst, wqm, b) \
-        (((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \
-	 ((count) << 18) | ((uw) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31))
+    cpu_to_le32((((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \
+		 ((count) << 18) | ((uw) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31)))
 
 #define CF_ALLOC_IMP_EXP_DWORD0(array_base, type, rw_gpr, rr, index_gpr, es) \
-	 (((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | ((index_gpr) << 23) | \
-          ((es) << 30))
+    cpu_to_le32((((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | ((index_gpr) << 23) | \
+		 ((es) << 30)))
 // R7xx apparently doesn't have the ELEM_LOOP entry any more
 // We still expose it, but ELEM_LOOP is explicitely R6xx now.
 // TODO: is this just forgotten in the docs, or really not available any more?
 #define CF_ALLOC_IMP_EXP_DWORD1_BUF(array_size, comp_mask, el, bc, eop, vpm, cf_inst, wqm, b) \
-        (((array_size) << 0) | ((comp_mask) << 12) | ((el) << 16) | ((bc) << 17) | \
-	 ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31))
+    cpu_to_le32((((array_size) << 0) | ((comp_mask) << 12) | ((el) << 16) | ((bc) << 17) | \
+		 ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31)))
 #define CF_ALLOC_IMP_EXP_DWORD1_SWIZ(sel_x, sel_y, sel_z, sel_w, el, bc, eop, vpm, cf_inst, wqm, b) \
-        (((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | ((el) << 16) | \
-	 ((bc) << 17) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | \
-	 ((wqm) << 30) | ((b) << 31))
+    cpu_to_le32((((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | ((el) << 16) | \
+		 ((bc) << 17) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | \
+		 ((wqm) << 30) | ((b) << 31)))
 
 // ALU clause insts
 #define SRC0_SEL(x)        (x)
@@ -242,18 +242,18 @@
 #define CLAMP(x)          (x)
 
 #define ALU_DWORD0(src0_sel, s0r, s0e, s0n, src1_sel, s1r, s1e, s1n, im, ps, last) \
-        (((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \
-         ((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \
-	 ((im) << 26) | ((ps) << 29) | ((last) << 31))
+    cpu_to_le32((((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \
+		 ((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \
+		 ((im) << 26) | ((ps) << 29) | ((last) << 31)))
 // R7xx has alu_inst at a different slot, and no fog merge any more (no fix function fog any more)
 #define R6xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
-        (((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \
-         ((fm) << 5) | ((omod) << 6) | ((alu_inst) << 8) | ((bs) << 18) | ((dst_gpr) << 21) | \
-	 ((dr) << 28) | ((de) << 29) | ((clamp) << 31))
+    cpu_to_le32((((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \
+		 ((fm) << 5) | ((omod) << 6) | ((alu_inst) << 8) | ((bs) << 18) | ((dst_gpr) << 21) | \
+		 ((dr) << 28) | ((de) << 29) | ((clamp) << 31)))
 #define R7xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
-        (((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \
-         ((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \
-	 ((dr) << 28) | ((de) << 29) | ((clamp) << 31))
+    cpu_to_le32((((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \
+		 ((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \
+		 ((dr) << 28) | ((de) << 29) | ((clamp) << 31)))
 // This is a general chipset macro, but due to selection by chipid typically not usable in static arrays
 // Fog is NOT USED on R7xx, even if specified.
 #define ALU_DWORD1_OP2(chipfamily, s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
@@ -261,9 +261,9 @@
      R6xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) : \
      R7xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp))
 #define ALU_DWORD1_OP3(src2_sel, s2r, s2e, s2n, alu_inst, bs, dst_gpr, dr, de, clamp) \
-        (((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \
-         ((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \
-	 ((de) << 29) | ((clamp) << 31))
+    cpu_to_le32((((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \
+		 ((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \
+		 ((de) << 29) | ((clamp) << 31)))
 
 // VTX clause insts
 // vxt insts
@@ -301,17 +301,17 @@
 #define MEGA_FETCH(x)     (x)
 
 #define VTX_DWORD0(vtx_inst, ft, fwq, buffer_id, src_gpr, sr, ssx, mfc) \
-        (((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \
-	 ((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((mfc) << 26))
+    cpu_to_le32((((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \
+		 ((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((mfc) << 26)))
 #define VTX_DWORD1_SEM(semantic_id, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \
-        (((semantic_id) << 0) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
-	 ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31))
+    cpu_to_le32((((semantic_id) << 0) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
+		 ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31)))
 #define VTX_DWORD1_GPR(dst_gpr, dr, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \
-        (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
-	 ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31))
+    cpu_to_le32((((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
+		 ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31)))
 #define VTX_DWORD2(offset, es, cbns, mf) \
-	 (((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((mf) << 19))
-#define VTX_DWORD_PAD 0x00000000
+    cpu_to_le32((((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((mf) << 19)))
+#define VTX_DWORD_PAD cpu_to_le32(0x00000000)
 
 // TEX clause insts
 // tex insts
@@ -337,15 +337,15 @@
 
 // R7xx has an additional parameter ALT_CONST. We always expose it, but ALT_CONST is R7xx only
 #define TEX_DWORD0(tex_inst, bfm, fwq, resource_id, src_gpr, sr, ac) \
-	 (((tex_inst) << 0) | ((bfm) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \
-          ((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24))
+    cpu_to_le32((((tex_inst) << 0) | ((bfm) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \
+		 ((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24)))
 #define TEX_DWORD1(dst_gpr, dr, dsx, dsy, dsz, dsw, lod_bias, ctx, cty, ctz, ctw) \
-        (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
-	 ((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31))
+    cpu_to_le32((((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
+		 ((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31)))
 #define TEX_DWORD2(offset_x, offset_y, offset_z, sampler_id, ssx, ssy, ssz, ssw) \
-        (((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \
-	 ((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29))
-#define TEX_DWORD_PAD 0x00000000
+    cpu_to_le32((((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \
+		 ((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29)))
+#define TEX_DWORD_PAD cpu_to_le32(0x00000000)
 
 extern int R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* vs);
 extern int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* ps);
diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c
index fdf76b2..4ff0833 100644
--- a/src/r600_textured_videofuncs.c
+++ b/src/r600_textured_videofuncs.c
@@ -439,10 +439,16 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	    cb_conf.format = COLOR_5_6_5;
 	    cb_conf.comp_swap = 2; /* RGB */
 	}
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+	cb_conf.endian = ENDIAN_8IN16;
+#endif
 	break;
     case 32:
 	cb_conf.format = COLOR_8_8_8_8;
 	cb_conf.comp_swap = 1; /* ARGB */
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+	cb_conf.endian = ENDIAN_8IN32;
+#endif
 	break;
     default:
 	return;
diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c
index a9d1cb4..b5acf85 100644
--- a/src/r6xx_accel.c
+++ b/src/r6xx_accel.c
@@ -1184,7 +1184,11 @@ r600_draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32
     BEGIN_BATCH(8 + count);
     EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
     PACK3(ib, IT_INDEX_TYPE, 1);
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+    E32(ib, IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type);
+#else
     E32(ib, draw_conf->index_type);
+#endif
     PACK3(ib, IT_NUM_INSTANCES, 1);
     E32(ib, draw_conf->num_instances);
 
@@ -1214,7 +1218,11 @@ r600_draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf)
     BEGIN_BATCH(10);
     EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
     PACK3(ib, IT_INDEX_TYPE, 1);
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+    E32(ib, IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type);
+#else
     E32(ib, draw_conf->index_type);
+#endif
     PACK3(ib, IT_NUM_INSTANCES, 1);
     E32(ib, draw_conf->num_instances);
     PACK3(ib, IT_DRAW_INDEX_AUTO, 2);
@@ -1249,6 +1257,9 @@ void r600_finish_op(ScrnInfoPtr pScrn, int vtx_size)
     vtx_res.mem_req_size    = 1;
     vtx_res.vb_addr         = accel_state->vbo.vb_mc_addr + accel_state->vbo.vb_start_op;
     vtx_res.bo              = accel_state->vbo.vb_bo;
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+    vtx_res.endian          = SQ_ENDIAN_8IN32;
+#endif
     r600_set_vtx_resource(pScrn, accel_state->ib, &vtx_res, RADEON_GEM_DOMAIN_GTT);
 
     /* Draw */
commit 151b22bd7c3b1002a7261538611fb2b468815c86
Author: Cédric Cano <ccano at interfaceconcept.com>
Date:   Fri Feb 11 16:33:16 2011 -0500

    kms/6xx+: endian swap cursor uploads
    
    agd5f: remove kms surface for cursor since
    we now do the swap in the cursor load to avoid
    breaking pre-6xx cards with BE.
    
    Signed-off-by: Alex Deucher <alexdeucher at gmail.com>

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index 06cfd95..705a41a 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -411,14 +411,14 @@ static void
 drmmode_load_cursor_argb (xf86CrtcPtr crtc, CARD32 *image)
 {
 	drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
-	void *ptr;
+	int i;
+	uint32_t *ptr;
 
 	/* cursor should be mapped already */
-	ptr = drmmode_crtc->cursor_bo->ptr;
-
-	memcpy (ptr, image, 64 * 64 * 4);
+	ptr = (uint32_t *)(drmmode_crtc->cursor_bo->ptr);
 
-	return;
+	for (i = 0; i < 64 * 64; i++)
+		ptr[i] = cpu_to_le32(image[i]);
 }
 
 
diff --git a/src/radeon_kms.c b/src/radeon_kms.c
index 65d62c2..82258d9 100644
--- a/src/radeon_kms.c
+++ b/src/radeon_kms.c
@@ -1148,11 +1148,6 @@ static Bool radeon_setup_kernel_mem(ScreenPtr pScreen)
                     return FALSE;
                 }
 
-#if X_BYTE_ORDER == X_BIG_ENDIAN
-		radeon_bo_set_tiling(info->cursor_bo[c], RADEON_TILING_SWAP_32BIT |
-				     RADEON_TILING_SURFACE, CURSOR_WIDTH);
-#endif
-
                 if (radeon_bo_map(info->cursor_bo[c], 1)) {
                     ErrorF("Failed to map cursor buffer memory\n");
                 }


More information about the xorg-commit mailing list