xf86-video-intel: 7 commits - README.sgml src/i810_reg.h src/i830_cursor.c src/i830_dri.c src/i830_driver.c src/i830.h src/i830_rotate.c src/i830_video.c src/i830_video.h src/i915_video.c src/Makefile.am

Eric Anholt anholt at kemper.freedesktop.org
Thu Aug 10 02:14:46 EEST 2006


 README.sgml       |    4 
 src/Makefile.am   |    4 
 src/i810_reg.h    |  617 ------------------------------------------------------
 src/i830.h        |    1 
 src/i830_cursor.c |    1 
 src/i830_dri.c    |    2 
 src/i830_driver.c |   61 +++--
 src/i830_rotate.c |    5 
 src/i830_video.c  |  611 -----------------------------------------------------
 src/i830_video.h  |   76 ++++++
 src/i915_video.c  |  467 ++++++++++++++++++++++++++++++++++++++++
 11 files changed, 592 insertions(+), 1257 deletions(-)

New commits:
diff-tree caa3b35cd772fc75d65a7ff791f00addbb39a764 (from 2a7426cf138e518a5eafb40f478359160a7ec98b)
Author: Eric Anholt <eric at anholt.net>
Date:   Wed Aug 9 16:11:30 2006 -0700

    Correct typos in README source.

diff --git a/README.sgml b/README.sgml
index 2f9abb7..fd52608 100644
--- a/README.sgml
+++ b/README.sgml
@@ -5,7 +5,7 @@
 <article>
 <title>Information for Intel graphics driver users
 <author>Eric Anholt
-<date>206-08-04
+<date>2006-08-04
 <toc>
 
 <sect>Introduction
@@ -84,7 +84,7 @@ for information on configuration options
   <item>Many systems with Intel graphics have issues with setting video modes
 at larger than some small maximum resolution.  This is not fixed in the current
 release, but is being actively worked on in the modesetting branch.
-  <item>Bug #5774: Some systems have issues with VT switching.  This should
+  <item>Bug #5795: Some systems have issues with VT switching.  This should
 be fixed with the modesetting brach integration.
   <item>Bug #5817: Hotkey switching from LVDS to CRT breaks CRT output.  This
 is a known issue, but will not be fixed in the current release.
diff-tree 2a7426cf138e518a5eafb40f478359160a7ec98b (from 24e59a0daa20b7c3e5028c9ca7972052801d02a1)
Author: Eric Anholt <eric at anholt.net>
Date:   Wed Aug 9 15:47:09 2006 -0700

    Clean up warnings.

diff --git a/src/i830.h b/src/i830.h
index ced8c75..38a880f 100644
--- a/src/i830.h
+++ b/src/i830.h
@@ -447,6 +447,7 @@ extern int I830WaitLpRing(ScrnInfoPtr pS
 extern void I830SetPIOAccess(I830Ptr pI830);
 extern void I830SetMMIOAccess(I830Ptr pI830);
 extern void I830PrintErrorState(ScrnInfoPtr pScrn);
+extern void I965PrintErrorState(ScrnInfoPtr pScrn);
 extern void I830Sync(ScrnInfoPtr pScrn);
 extern void I830InitHWCursor(ScrnInfoPtr pScrn);
 extern Bool I830CursorInit(ScreenPtr pScreen);
diff --git a/src/i830_cursor.c b/src/i830_cursor.c
index dd3071e..c9e04e9 100644
--- a/src/i830_cursor.c
+++ b/src/i830_cursor.c
@@ -475,7 +475,6 @@ I830SetCursorPosition(ScrnInfoPtr pScrn,
 {
    I830Ptr pI830 = I830PTR(pScrn);
    CARD32 temp = 0;
-   static Bool outsideViewport = FALSE;
    Bool hide = FALSE, show = FALSE;
    int oldx = x, oldy = y;
    int hotspotx = 0, hotspoty = 0;
diff --git a/src/i830_dri.c b/src/i830_dri.c
index 445bbec..c9b52c4 100644
--- a/src/i830_dri.c
+++ b/src/i830_dri.c
@@ -102,8 +102,6 @@ static void I830DRIInitBuffers(WindowPtr
 static void I830DRIMoveBuffers(WindowPtr pParent, DDXPointRec ptOldOrg,
 			       RegionPtr prgnSrc, CARD32 index);
 
-static Bool I830DRICloseFullScreen(ScreenPtr pScreen);
-static Bool I830DRIOpenFullScreen(ScreenPtr pScreen);
 static void I830DRITransitionTo2d(ScreenPtr pScreen);
 static void I830DRITransitionTo3d(ScreenPtr pScreen);
 static void I830DRITransitionMultiToSingle3d(ScreenPtr pScreen);
diff --git a/src/i830_driver.c b/src/i830_driver.c
index 62a62ab..5d5fb72 100644
--- a/src/i830_driver.c
+++ b/src/i830_driver.c
@@ -6614,39 +6614,52 @@ I965PrintErrorState(ScrnInfoPtr pScrn)
 	  INREG(LP_RING + RING_LEN), INREG(LP_RING + RING_START));
 
    ErrorF("Err ID (eir): %x Err Status (esr): %x Err Mask (emr): %x\n",
-	  INREG(EIR), INREG(ESR), INREG(EMR));
+	  (int)INREG(EIR), (int)INREG(ESR), (int)INREG(EMR));
 
-   ErrorF("instdone: %x instdone_1: %x\n", INREG(INST_DONE_I965), INREG(INST_DONE_1));
-   ErrorF("instpm: %x\n", INREG(INST_PM));
+   ErrorF("instdone: %x instdone_1: %x\n", (int)INREG(INST_DONE_I965),
+	  (int)INREG(INST_DONE_1));
+   ErrorF("instpm: %x\n", (int)INREG(INST_PM));
 
    ErrorF("memmode: %lx instps: %lx\n", INREG(MEMMODE), INREG(INST_PS_I965));
 
    ErrorF("HW Status mask (hwstam): %x\nIRQ enable (ier): %x imr: %x iir: %x\n",
-	  INREG(HWSTAM), INREG(IER), INREG(IMR), INREG(IIR));
+	  (int)INREG(HWSTAM), (int)INREG(IER), (int)INREG(IMR),
+	  (int)INREG(IIR));
 
    ErrorF("acthd: %lx dma_fadd_p: %lx\n", INREG(ACTHD), INREG(DMA_FADD_P));
    ErrorF("ecoskpd: %lx excc: %lx\n", INREG(ECOSKPD), INREG(EXCC));
 
-   ErrorF("cache_mode: %x/%x\n", INREG(CACHE_MODE_0), INREG(CACHE_MODE_1));
-   ErrorF("mi_arb_state: %x\n", INREG(MI_ARB_STATE));
-
-   ErrorF("IA_VERTICES_COUNT_QW %x/%x\n", INREG(IA_VERTICES_COUNT_QW), INREG(IA_VERTICES_COUNT_QW+4));
-   ErrorF("IA_PRIMITIVES_COUNT_QW %x/%x\n", INREG(IA_PRIMITIVES_COUNT_QW), INREG(IA_PRIMITIVES_COUNT_QW+4));
-
-   ErrorF("VS_INVOCATION_COUNT_QW %x/%x\n", INREG(VS_INVOCATION_COUNT_QW), INREG(VS_INVOCATION_COUNT_QW+4));
-
-   ErrorF("GS_INVOCATION_COUNT_QW %x/%x\n", INREG(GS_INVOCATION_COUNT_QW), INREG(GS_INVOCATION_COUNT_QW+4));
-   ErrorF("GS_PRIMITIVES_COUNT_QW %x/%x\n", INREG(GS_PRIMITIVES_COUNT_QW), INREG(GS_PRIMITIVES_COUNT_QW+4));
-
-   ErrorF("CL_INVOCATION_COUNT_QW %x/%x\n", INREG(CL_INVOCATION_COUNT_QW), INREG(CL_INVOCATION_COUNT_QW+4));
-   ErrorF("CL_PRIMITIVES_COUNT_QW %x/%x\n", INREG(CL_PRIMITIVES_COUNT_QW), INREG(CL_PRIMITIVES_COUNT_QW+4));
-
-   ErrorF("PS_INVOCATION_COUNT_QW %x/%x\n", INREG(PS_INVOCATION_COUNT_QW), INREG(PS_INVOCATION_COUNT_QW+4));
-   ErrorF("PS_DEPTH_COUNT_QW %x/%x\n", INREG(PS_DEPTH_COUNT_QW), INREG(PS_DEPTH_COUNT_QW+4));
-
-   ErrorF("WIZ_CTL %x\n", INREG(WIZ_CTL));
-   ErrorF("TS_CTL %x  TS_DEBUG_DATA %x\n", INREG(TS_CTL), INREG(TS_DEBUG_DATA));
-   ErrorF("TD_CTL %x / %x\n", INREG(TD_CTL), INREG(TD_CTL2));
+   ErrorF("cache_mode: %x/%x\n", (int)INREG(CACHE_MODE_0),
+	  (int)INREG(CACHE_MODE_1));
+   ErrorF("mi_arb_state: %x\n", (int)INREG(MI_ARB_STATE));
+
+   ErrorF("IA_VERTICES_COUNT_QW %x/%x\n", (int)INREG(IA_VERTICES_COUNT_QW),
+	  (int)INREG(IA_VERTICES_COUNT_QW+4));
+   ErrorF("IA_PRIMITIVES_COUNT_QW %x/%x\n", (int)INREG(IA_PRIMITIVES_COUNT_QW),
+	  (int)INREG(IA_PRIMITIVES_COUNT_QW+4));
+
+   ErrorF("VS_INVOCATION_COUNT_QW %x/%x\n", (int)INREG(VS_INVOCATION_COUNT_QW),
+	  (int)INREG(VS_INVOCATION_COUNT_QW+4));
+
+   ErrorF("GS_INVOCATION_COUNT_QW %x/%x\n", (int)INREG(GS_INVOCATION_COUNT_QW),
+	  (int)INREG(GS_INVOCATION_COUNT_QW+4));
+   ErrorF("GS_PRIMITIVES_COUNT_QW %x/%x\n", (int)INREG(GS_PRIMITIVES_COUNT_QW),
+	  (int)INREG(GS_PRIMITIVES_COUNT_QW+4));
+
+   ErrorF("CL_INVOCATION_COUNT_QW %x/%x\n", (int)INREG(CL_INVOCATION_COUNT_QW),
+	  (int)INREG(CL_INVOCATION_COUNT_QW+4));
+   ErrorF("CL_PRIMITIVES_COUNT_QW %x/%x\n", (int)INREG(CL_PRIMITIVES_COUNT_QW),
+	  (int)INREG(CL_PRIMITIVES_COUNT_QW+4));
+
+   ErrorF("PS_INVOCATION_COUNT_QW %x/%x\n", (int)INREG(PS_INVOCATION_COUNT_QW),
+	  (int)INREG(PS_INVOCATION_COUNT_QW+4));
+   ErrorF("PS_DEPTH_COUNT_QW %x/%x\n", (int)INREG(PS_DEPTH_COUNT_QW),
+	  (int)INREG(PS_DEPTH_COUNT_QW+4));
+
+   ErrorF("WIZ_CTL %x\n", (int)INREG(WIZ_CTL));
+   ErrorF("TS_CTL %x  TS_DEBUG_DATA %x\n", (int)INREG(TS_CTL),
+	  (int)INREG(TS_DEBUG_DATA));
+   ErrorF("TD_CTL %x / %x\n", (int)INREG(TD_CTL), (int)INREG(TD_CTL2));
 
    
 }
diff --git a/src/i830_rotate.c b/src/i830_rotate.c
index aee04da..020d7e6 100644
--- a/src/i830_rotate.c
+++ b/src/i830_rotate.c
@@ -217,7 +217,6 @@ I915UpdateRotate (ScreenPtr      pScreen
    drm_context_t myContext = 0;
 #endif
    Bool didLock = FALSE;
-   CARD32 format;
 
    if (I830IsPrimary(pScrn)) {
       pI8301 = pI830;
diff --git a/src/i830_video.c b/src/i830_video.c
index 9a66f7f..693c703 100644
--- a/src/i830_video.c
+++ b/src/i830_video.c
@@ -2191,6 +2191,7 @@ static CARD32 float_to_uint (float f) {
    return x.i;
 }
 
+#if 0
 static struct {
    CARD32   svg_ctl;
    char	    *name;
@@ -2219,6 +2220,7 @@ brw_debug (ScrnInfoPtr pScrn, char *when
       ErrorF("\t%34.34s: 0x%08x\n", svg_ctl_bits[i].name, v);
    }
 }
+#endif
 
 #define WATCH_SF 0
 #define WATCH_WIZ 0
@@ -2234,10 +2236,8 @@ BroadwaterDisplayVideoTextured(ScrnInfoP
 			       DrawablePtr pDraw)
 {
    I830Ptr pI830 = I830PTR(pScrn);
-   CARD32 format, ms3, s2;
    BoxPtr pbox;
    int nbox, dxo, dyo;
-   Bool planar;
    int urb_vs_start, urb_vs_size;
    int urb_gs_start, urb_gs_size;
    int urb_clip_start, urb_clip_size;
@@ -2811,9 +2811,6 @@ BroadwaterDisplayVideoTextured(ScrnInfoP
       OUT_RING(0); /* index buffer offset, ignored */
       ADVANCE_LP_RING();
 
-      int   j, k;
-      CARD32	  ctl = 0, rdata;
-      
 #if 0
       for (j = 0; j < 100000; j++) {
 	ctl = INREG(BRW_VF_CTL);
diff-tree 24e59a0daa20b7c3e5028c9ca7972052801d02a1 (from parents)
Merge: bb6080735efc40e103e92b65d0c2f1f729156632 32f1199937e92b9100aba52cbbb97157014e3182
Author: Eric Anholt <eric at anholt.net>
Date:   Wed Aug 9 14:19:06 2006 -0700

    Merge branch 'textured-video', bringing in fixed-up i915 textured video.
    
    Conflicts:
    
    	src/i830_video.c

diff --cc src/i810_reg.h
index 9774a4c,2c5e271..05710c4
@@@ -1607,244 -918,6 +992,244 @@@
  #define ENABLE_FOG_CONST	(1<<24)
  #define ENABLE_FOG_DENSITY	(1<<23)
  
 +/*
 + * New regs for broadwater -- we need to split this file up sensibly somehow.
 + */
- #define BRW_3D(Pipeline,Opcode,Subopcode) (CMD_3D | \
++#define BRW_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \
 +					   ((Pipeline) << 27) | \
 +					   ((Opcode) << 24) | \
 +					   ((Subopcode) << 16))
 +
 +#define BRW_URB_FENCE				BRW_3D(0, 0, 0)
 +#define BRW_CS_URB_STATE			BRW_3D(0, 0, 1)
 +#define BRW_CONSTANT_BUFFER			BRW_3D(0, 0, 2)
 +#define BRW_STATE_PREFETCH			BRW_3D(0, 0, 3)
 +
 +#define BRW_STATE_BASE_ADDRESS			BRW_3D(0, 1, 1)
 +#define BRW_STATE_SIP				BRW_3D(0, 1, 2)
 +#define BRW_PIPELINE_SELECT			BRW_3D(0, 1, 4)
 +
 +#define BRW_MEDIA_STATE_POINTERS		BRW_3D(2, 0, 0)
 +#define BRW_MEDIA_OBJECT			BRW_3D(2, 1, 0)
 +
 +#define BRW_3DSTATE_PIPELINED_POINTERS		BRW_3D(3, 0, 0)
 +#define BRW_3DSTATE_BINDING_TABLE_POINTERS	BRW_3D(3, 0, 1)
 +#define BRW_3DSTATE_VERTEX_BUFFERS		BRW_3D(3, 0, 8)
 +#define BRW_3DSTATE_VERTEX_ELEMENTS		BRW_3D(3, 0, 9)
 +#define BRW_3DSTATE_INDEX_BUFFER		BRW_3D(3, 0, 0xa)
 +#define BRW_3DSTATE_VF_STATISTICS		BRW_3D(3, 0, 0xb)
 +
 +#define BRW_3DSTATE_DRAWING_RECTANGLE		BRW_3D(3, 1, 0)
 +#define BRW_3DSTATE_CONSTANT_COLOR		BRW_3D(3, 1, 1)
 +#define BRW_3DSTATE_SAMPLER_PALETTE_LOAD	BRW_3D(3, 1, 2)
 +#define BRW_3DSTATE_CHROMA_KEY			BRW_3D(3, 1, 4)
 +#define BRW_3DSTATE_DEPTH_BUFFER		BRW_3D(3, 1, 5)
 +#define BRW_3DSTATE_POLY_STIPPLE_OFFSET		BRW_3D(3, 1, 6)
 +#define BRW_3DSTATE_POLY_STIPPLE_PATTERN	BRW_3D(3, 1, 7)
 +#define BRW_3DSTATE_LINE_STIPPLE		BRW_3D(3, 1, 8)
 +#define BRW_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP	BRW_3D(3, 1, 9)
 +/* These two are BLC and CTG only, not BW or CL */
 +#define BRW_3DSTATE_AA_LINE_PARAMS		BRW_3D(3, 1, 0xa)
 +#define BRW_3DSTATE_GS_SVB_INDEX		BRW_3D(3, 1, 0xb)
 +
 +#define BRW_PIPE_CONTROL			BRW_3D(3, 2, 0)
 +
 +#define BRW_3DPRIMITIVE				BRW_3D(3, 3, 0)
 +
 +#define PIPELINE_SELECT_3D		0
 +#define PIPELINE_SELECT_MEDIA		1
 +
 +#define UF0_CS_REALLOC			(1 << 13)
 +#define UF0_VFE_REALLOC			(1 << 12)
 +#define UF0_SF_REALLOC			(1 << 11)
 +#define UF0_CLIP_REALLOC		(1 << 10)
 +#define UF0_GS_REALLOC			(1 << 9)
 +#define UF0_VS_REALLOC			(1 << 8)
 +#define UF1_CLIP_FENCE_SHIFT		20
 +#define UF1_GS_FENCE_SHIFT		10
 +#define UF1_VS_FENCE_SHIFT		0
 +#define UF2_CS_FENCE_SHIFT		20
 +#define UF2_VFE_FENCE_SHIFT		10
 +#define UF2_SF_FENCE_SHIFT		0
 +
 +/* for BRW_STATE_BASE_ADDRESS */
 +#define BASE_ADDRESS_MODIFY		(1 << 0)
 +
 +/* for BRW_3DSTATE_PIPELINED_POINTERS */
 +#define BRW_GS_DISABLE		       0
 +#define BRW_GS_ENABLE		       1
 +#define BRW_CLIP_DISABLE	       0
 +#define BRW_CLIP_ENABLE		       1
 +
 +/* for BRW_PIPE_CONTROL */
 +#define BRW_PIPE_CONTROL_NOWRITE       (0 << 14)
 +#define BRW_PIPE_CONTROL_WRITE_QWORD   (1 << 14)
 +#define BRW_PIPE_CONTROL_WRITE_DEPTH   (2 << 14)
 +#define BRW_PIPE_CONTROL_WRITE_TIME    (3 << 14)
 +#define BRW_PIPE_CONTROL_DEPTH_STALL   (1 << 13)
 +#define BRW_PIPE_CONTROL_WC_FLUSH      (1 << 12)
 +#define BRW_PIPE_CONTROL_IS_FLUSH      (1 << 11)
 +#define BRW_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
 +#define BRW_PIPE_CONTROL_GLOBAL_GTT    (1 << 2)
 +#define BRW_PIPE_CONTROL_LOCAL_PGTT    (0 << 2)
 +
 +/* VERTEX_BUFFER_STATE Structure */
 +#define VB0_BUFFER_INDEX_SHIFT		27
 +#define VB0_VERTEXDATA			(0 << 26)
 +#define VB0_INSTANCEDATA		(1 << 26)
 +#define VB0_BUFFER_PITCH_SHIFT		0
 +
 +/* VERTEX_ELEMENT_STATE Structure */
 +#define VE0_VERTEX_BUFFER_INDEX_SHIFT	27
 +#define VE0_VALID			(1 << 26)
 +#define VE0_FORMAT_SHIFT		16
 +#define VE0_OFFSET_SHIFT		0
 +#define VE1_VFCOMPONENT_0_SHIFT		28
 +#define VE1_VFCOMPONENT_1_SHIFT		24
 +#define VE1_VFCOMPONENT_2_SHIFT		20
 +#define VE1_VFCOMPONENT_3_SHIFT		16
 +#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT	0
 +
 +/* 3DPRIMITIVE bits */
 +#define BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
 +#define BRW_3DPRIMITIVE_VERTEX_RANDOM	  (1 << 15)
 +/* Primitive types are in brw_defines.h */
 +#define BRW_3DPRIMITIVE_TOPOLOGY_SHIFT	  10
 +
 +#define BRW_SVG_CTL		       0x7400
 +
 +#define BRW_SVG_CTL_GS_BA	       (0 << 8)
 +#define BRW_SVG_CTL_SS_BA	       (1 << 8)
 +#define BRW_SVG_CTL_IO_BA	       (2 << 8)
 +#define BRW_SVG_CTL_GS_AUB	       (3 << 8)
 +#define BRW_SVG_CTL_IO_AUB	       (4 << 8)
 +#define BRW_SVG_CTL_SIP		       (5 << 8)
 +
 +#define BRW_SVG_RDATA		       0x7404
 +#define BRW_SVG_WORK_CTL	       0x7408
 +
 +#define BRW_VF_CTL		       0x7500
 +
 +#define BRW_VF_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
 +#define BRW_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID	   (0 << 8)
 +#define BRW_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG	   (1 << 8)
 +#define BRW_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE   (0 << 4)
 +#define BRW_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX	   (1 << 4)
 +#define BRW_VF_CTL_SKIP_INITIAL_PRIMITIVES	   (1 << 3)
 +#define BRW_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE	   (1 << 2)
 +#define BRW_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE	   (1 << 1)
 +#define BRW_VF_CTL_SNAPSHOT_ENABLE	     	   (1 << 0)
 +
 +#define BRW_VF_STRG_VAL		       0x7504
 +#define BRW_VF_STR_VL_OVR	       0x7508
 +#define BRW_VF_VC_OVR		       0x750c
 +#define BRW_VF_STR_PSKIP	       0x7510
 +#define BRW_VF_MAX_PRIM		       0x7514
 +#define BRW_VF_RDATA		       0x7518
 +
 +#define BRW_VS_CTL		       0x7600
 +#define BRW_VS_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
 +#define BRW_VS_CTL_SNAPSHOT_MUX_VERTEX_0	   (0 << 8)
 +#define BRW_VS_CTL_SNAPSHOT_MUX_VERTEX_1	   (1 << 8)
 +#define BRW_VS_CTL_SNAPSHOT_MUX_VALID_COUNT	   (2 << 8)
 +#define BRW_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER  (3 << 8)
 +#define BRW_VS_CTL_SNAPSHOT_ALL_THREADS		   (1 << 2)
 +#define BRW_VS_CTL_THREAD_SNAPSHOT_ENABLE	   (1 << 1)
 +#define BRW_VS_CTL_SNAPSHOT_ENABLE		   (1 << 0)
 +
 +#define BRW_VS_STRG_VAL		       0x7604
 +#define BRW_VS_RDATA		       0x7608
 +
 +#define BRW_SF_CTL		       0x7b00
 +#define BRW_SF_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
 +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID	   (0 << 8)
 +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
 +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID	   (2 << 8)
 +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
 +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID	   (4 << 8)
 +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
 +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT	   (6 << 8)
 +#define BRW_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER  (7 << 8)
 +#define BRW_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE  (1 << 4)
 +#define BRW_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE	   (1 << 3)
 +#define BRW_SF_CTL_SNAPSHOT_ALL_THREADS		   (1 << 2)
 +#define BRW_SF_CTL_THREAD_SNAPSHOT_ENABLE	   (1 << 1)
 +#define BRW_SF_CTL_SNAPSHOT_ENABLE		   (1 << 0)
 +
 +#define BRW_SF_STRG_VAL		       0x7b04
 +#define BRW_SF_RDATA		       0x7b18
 +
 +#define BRW_WIZ_CTL		       0x7c00
 +#define BRW_WIZ_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
 +#define BRW_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT	   16
 +#define BRW_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER   (0 << 8)
 +#define BRW_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE     (1 << 8)
 +#define BRW_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE   (2 << 8)
 +#define BRW_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH	      (1 << 6)
 +#define BRW_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS    (1 << 5)
 +#define BRW_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE   (1 << 4)
 +#define BRW_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG	      (1 << 3)
 +#define BRW_WIZ_CTL_SNAPSHOT_ALL_THREADS	      (1 << 2)
 +#define BRW_WIZ_CTL_THREAD_SNAPSHOT_ENABLE	      (1 << 1)
 +#define BRW_WIZ_CTL_SNAPSHOT_ENABLE		      (1 << 0)
 +
 +#define BRW_WIZ_STRG_VAL			      0x7c04
 +#define BRW_WIZ_RDATA				      0x7c18
 +
 +#define BRW_TS_CTL		       0x7e00
 +#define BRW_TS_CTL_SNAPSHOT_COMPLETE		   (1 << 31)
 +#define BRW_TS_CTL_SNAPSHOT_MESSAGE_ERROR	   (0 << 8)
 +#define BRW_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR   (3 << 8)
 +#define BRW_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS	   (1 << 2)
 +#define BRW_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS  	   (1 << 1)
 +#define BRW_TS_CTL_SNAPSHOT_ENABLE		   (1 << 0)
 +
 +#define BRW_TS_STRG_VAL		       0x7e04
 +#define BRW_TS_RDATA		       0x7e08
 +
 +#define BRW_TD_CTL		       0x8000
 +#define BRW_TD_CTL_MUX_SHIFT	       8
 +#define BRW_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH	   (1 << 7)
 +#define BRW_TD_CTL_FORCE_EXTERNAL_HALT		   (1 << 6)
 +#define BRW_TD_CTL_EXCEPTION_MASK_OVERRIDE	   (1 << 5)
 +#define BRW_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE  (1 << 4)
 +#define BRW_TD_CTL_BREAKPOINT_ENABLE		   (1 << 2)
 +#define BRW_TD_CTL2		       0x8004
 +#define BRW_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
 +#define BRW_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE      (1 << 26)
 +#define BRW_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE	      (1 << 25)
 +#define BRW_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT	      16
 +#define BRW_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE	      (1 << 8)
 +#define BRW_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
 +#define BRW_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE	      (1 << 6)
 +#define BRW_TD_CTL2_SF_EXECUTION_MASK_ENABLE	      (1 << 5)
 +#define BRW_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE     (1 << 4)
 +#define BRW_TD_CTL2_GS_EXECUTION_MASK_ENABLE	      (1 << 3)
 +#define BRW_TD_CTL2_VS_EXECUTION_MASK_ENABLE	      (1 << 0)
 +#define BRW_TD_VF_VS_EMSK	       0x8008
 +#define BRW_TD_GS_EMSK		       0x800c
 +#define BRW_TD_CLIP_EMSK	       0x8010
 +#define BRW_TD_SF_EMSK		       0x8014
 +#define BRW_TD_WIZ_EMSK		       0x8018
 +#define BRW_TD_0_6_EHTRG_VAL	       0x801c
 +#define BRW_TD_0_7_EHTRG_VAL	       0x8020
 +#define BRW_TD_0_6_EHTRG_MSK           0x8024
 +#define BRW_TD_0_7_EHTRG_MSK	       0x8028
 +#define BRW_TD_RDATA		       0x802c
 +#define BRW_TD_TS_EMSK		       0x8030
 +
 +#define BRW_EU_CTL		       0x8800
 +#define BRW_EU_CTL_SELECT_SHIFT	       16
 +#define BRW_EU_CTL_DATA_MUX_SHIFT      8
 +#define BRW_EU_ATT_0		       0x8810
 +#define BRW_EU_ATT_1		       0x8814
 +#define BRW_EU_ATT_DATA_0	       0x8820
 +#define BRW_EU_ATT_DATA_1	       0x8824
 +#define BRW_EU_ATT_CLR_0	       0x8830
 +#define BRW_EU_ATT_CLR_1	       0x8834
 +#define BRW_EU_RDATA		       0x8840
 +
 +/* End regs for broadwater */
  
  #define MAX_DISPLAY_PIPES	2
  
diff --cc src/i830_rotate.c
index 425eeef,716f425..aee04da
@@@ -576,15 -575,15 +576,15 @@@
        OUT_RING(MI_FLUSH | MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE);
        OUT_RING(0x00000000);
        /* draw rect */
-       OUT_RING(STATE3D_DRAWING_RECTANGLE);
 -      OUT_RING(0x7d800003);
 -      OUT_RING(0x00000000);
 -      OUT_RING(0x00000000);
 -      OUT_RING((pScrn->virtualX - 1) | (pScrn->virtualY - 1) << 16);
 -      OUT_RING(0x00000000);
 -      OUT_RING(0x00000000);
++      OUT_RING(_3DSTATE_DRAW_RECT_CMD);
 +      OUT_RING(0x00000000);	/* flags */
 +      OUT_RING(0x00000000);	/* ymin, xmin */
 +      OUT_RING((pScrn->virtualX - 1) | (pScrn->virtualY - 1) << 16); /* ymax, xmax */
 +      OUT_RING(0x00000000);	/* yorigin, xorigin */
 +      OUT_RING(MI_NOOP);
  
        /* front buffer */
-       OUT_RING(STATE3D_BUFFER_INFO);
 -      OUT_RING(0x7d8e0001);
++      OUT_RING(_3DSTATE_BUF_INFO_CMD);
        OUT_RING(0x03800000 | (((pI830->displayWidth * pI830->cpp) / 4) << 2));
        if (I830IsPrimary(pScrn))
  	 OUT_RING(pI830->FrontBuffer.Start);
diff --cc src/i830_video.c
index a056fdf,044d6c1..9a66f7f
@@@ -2134,1384 -1986,6 +2095,821 @@@
     OVERLAY_UPDATE;
  }
  
- /* Doesn't matter on the order for our purposes */
- typedef struct {
-    unsigned char red, green, blue, alpha;
- } intel_color_t;
- 
- /* Vertex format */
- typedef union {
-    struct {
-       float x, y, z, w;
-       intel_color_t color;
-       intel_color_t specular;
-       float u0, v0;
-       float u1, v1;
-       float u2, v2;
-       float u3, v3;
-    } v;
-    float f[24];
-    unsigned int  ui[24];
-    unsigned char ub4[24][4];
- } intelVertex, *intelVertexPtr;
- 
- static void draw_poly(CARD32 *vb,
-                       float verts[][2],
-                       float texcoords[][2],
- 		      float texcoords2[][2])
- {
-    int vertex_size;
-    intelVertex tmp;
-    int i, k;
- 
-    if (texcoords2 != NULL)
-       vertex_size = 10;
-    else
-       vertex_size = 8;
-    
-    /* initial constant vertex fields */
-    tmp.v.z = 1.0;
-    tmp.v.w = 1.0; 
-    tmp.v.color.red = 255;
-    tmp.v.color.green = 255;
-    tmp.v.color.blue = 255;
-    tmp.v.color.alpha = 255;
-    tmp.v.specular.red = 0;
-    tmp.v.specular.green = 0;
-    tmp.v.specular.blue = 0;
-    tmp.v.specular.alpha = 0;
- 
-    for (k = 0; k < 4; k++) {
-       tmp.v.x = verts[k][0];
-       tmp.v.y = verts[k][1];
-       tmp.v.u0 = texcoords[k][0];
-       tmp.v.v0 = texcoords[k][1];
-       if (texcoords2 != NULL) {
- 	 tmp.v.u1 = texcoords2[k][0];
- 	 tmp.v.v1 = texcoords2[k][1];
-       }
- 
-       for (i = 0 ; i < vertex_size ; i++)
-          vb[i] = tmp.ui[i];
- 
-       vb += vertex_size;
-    }
- }
- 
- union intfloat {
-    CARD32 ui;
-    float f;
- };
- 
- #define OUT_RING_F(x) do {						\
-    union intfloat _tmp;							\
-    _tmp.f = x;								\
-    OUT_RING(_tmp.ui);							\
- } while (0)
- 
- #define OUT_DCL(type, nr) do {						\
-    CARD32 chans = 0;							\
-    if (REG_TYPE_##type == REG_TYPE_T)					\
-       chans = D0_CHANNEL_ALL;						\
-    else if (REG_TYPE_##type != REG_TYPE_S)				\
-       FatalError("wrong reg type %d to declare\n", REG_TYPE_##type);	\
-    OUT_RING(D0_DCL |							\
- 	    (REG_TYPE_##type << D0_TYPE_SHIFT) | (nr << D0_NR_SHIFT) |	\
- 	    chans);							\
-    OUT_RING(0x00000000);						\
-    OUT_RING(0x00000000);						\
- } while (0)
- 
- #define OUT_TEXLD(dest_type, dest_nr, sampler_nr, addr_type, addr_nr)	\
- do {									\
-       OUT_RING(T0_TEXLD |						\
- 	       (REG_TYPE_##dest_type << T0_DEST_TYPE_SHIFT) |		\
- 	       (dest_nr << T0_DEST_NR_SHIFT) |				\
- 	       (sampler_nr << T0_SAMPLER_NR_SHIFT));			\
-       OUT_RING((REG_TYPE_##addr_type << T1_ADDRESS_REG_TYPE_SHIFT) |	\
- 	       (addr_nr << T1_ADDRESS_REG_NR_SHIFT));			\
-       OUT_RING(0x00000000);						\
- } while (0)
- 
- /* Move the dest_chan from src0 to dest, leaving the other channels alone */
- #define OUT_MOV_TO_CHANNEL(dest_type, dest_nr, src0_type, src0_nr,	\
- 			   dest_chan)					\
- do {									\
-    OUT_RING(A0_MOV | A0_DEST_CHANNEL_##dest_chan |			\
- 	    (REG_TYPE_##dest_type << A0_DEST_TYPE_SHIFT) |		\
- 	    (dest_nr << A0_DEST_NR_SHIFT) |				\
- 	    (REG_TYPE_##src0_type << A0_SRC0_TYPE_SHIFT) |		\
- 	    (src0_nr << A0_SRC0_NR_SHIFT));				\
-    OUT_RING((SRC_X << A1_SRC0_CHANNEL_X_SHIFT) |			\
- 	    (SRC_Y << A1_SRC0_CHANNEL_Y_SHIFT) |			\
- 	    (SRC_Z << A1_SRC0_CHANNEL_Z_SHIFT) |			\
- 	    (SRC_W << A1_SRC0_CHANNEL_W_SHIFT));			\
-    OUT_RING(0);								\
- } while (0)
- 
- /* Dot3-product src0 and src1, storing the result in dest_chan of the dest.
-  * Saturates, in case we have out-of-range YUV values.
-  */
- #define OUT_DP3_TO_CHANNEL(dest_type, dest_nr, src0_type, src0_nr,	\
- 			   src1_type, src1_nr, dest_chan)		\
- do {									\
-    OUT_RING(A0_DP3 | A0_DEST_CHANNEL_##dest_chan | A0_DEST_SATURATE |	\
- 	    (REG_TYPE_##dest_type << A0_DEST_TYPE_SHIFT) |		\
- 	    (dest_nr << A0_DEST_NR_SHIFT) |				\
- 	    (REG_TYPE_##src0_type << A0_SRC0_TYPE_SHIFT) |		\
- 	    (src0_nr << A0_SRC0_NR_SHIFT));				\
-    OUT_RING((SRC_X << A1_SRC0_CHANNEL_X_SHIFT) |			\
- 	    (SRC_Y << A1_SRC0_CHANNEL_Y_SHIFT) |			\
- 	    (SRC_Z << A1_SRC0_CHANNEL_Z_SHIFT) |			\
- 	    (SRC_W << A1_SRC0_CHANNEL_W_SHIFT) |			\
- 	    (REG_TYPE_##src1_type << A1_SRC1_TYPE_SHIFT) |		\
- 	    (src1_nr << A1_SRC1_TYPE_SHIFT) |				\
- 	    (SRC_X << A1_SRC1_CHANNEL_X_SHIFT) |			\
- 	    (SRC_Y << A1_SRC1_CHANNEL_Y_SHIFT));			\
-    OUT_RING((SRC_Z << A2_SRC1_CHANNEL_Z_SHIFT) |			\
- 	    (SRC_W << A2_SRC1_CHANNEL_W_SHIFT));			\
- } while (0)
- 
- static void
- I915DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
- 			 RegionPtr dstRegion,
- 			 short width, short height, int video_pitch,
- 			 int x1, int y1, int x2, int y2,
- 			 short src_w, short src_h, short drw_w, short drw_h,
- 			 DrawablePtr pDraw)
- {
-    I830Ptr pI830 = I830PTR(pScrn);
-    CARD32 format, ms3, s2;
-    BoxPtr pbox;
-    int nbox, dxo, dyo;
-    Bool planar;
- 
-    ErrorF("I915DisplayVideo: %dx%d (pitch %d)\n", width, height,
- 	  video_pitch);
- 
-    switch (id) {
-    case FOURCC_UYVY:
-    case FOURCC_YUY2:
-       planar = FALSE;
-       break;
-    case FOURCC_YV12:
-    case FOURCC_I420:
-       planar = TRUE;
-       break;
-    default:
-       ErrorF("Unknown format 0x%x\n", id);
-       planar = FALSE;
-       break;
-    }
- 
-    /* Tell the rotation code that we have stomped its invariant state by
-     * setting a high bit.  We don't use any invariant 3D state for video, so we
-     * don't have to worry about it ourselves.
-     */
-    *pI830->used3D |= 1 << 30;
- 
-    BEGIN_LP_RING(44);
- 
-    /* invarient state */
-    OUT_RING(MI_NOOP);
-    OUT_RING(STATE3D_ANTI_ALIASING |
- 	    LINE_CAP_WIDTH_MODIFY | LINE_CAP_WIDTH_1_0 |
- 	    LINE_WIDTH_MODIFY | LINE_WIDTH_1_0);
- 
-    OUT_RING(STATE3D_DFLT_DIFFUSE_CMD);
-    OUT_RING(0x00000000);
- 
-    OUT_RING(STATE3D_DFLT_SPEC_CMD);
-    OUT_RING(0x00000000);
- 
-    OUT_RING(STATE3D_DFLT_Z_CMD);
-    OUT_RING(0x00000000);
- 
-    OUT_RING(STATE3D_COORD_SET_BINDINGS | CSB_TCB(0, 0) | CSB_TCB(1, 1) |
- 	    CSB_TCB(2,2) | CSB_TCB(3,3) | CSB_TCB(4,4) | CSB_TCB(5,5) |
- 	    CSB_TCB(6,6) | CSB_TCB(7,7));
- 
-    OUT_RING(STATE3D_RASTERIZATION_RULES |
- 	    ENABLE_TRI_FAN_PROVOKE_VRTX | TRI_FAN_PROVOKE_VRTX(2) |
- 	    ENABLE_LINE_STRIP_PROVOKE_VRTX | LINE_STRIP_PROVOKE_VRTX(1) |
- 	    ENABLE_TEXKILL_3D_4D | TEXKILL_4D |
- 	    ENABLE_POINT_RASTER_RULE | OGL_POINT_RASTER_RULE);
- 
-    OUT_RING(STATE3D_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | 1);
-    OUT_RING(0x00000000); /* texture coordinate wrap */
- 
-    /* flush map & render cache */
-    OUT_RING(MI_FLUSH | MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE);
-    OUT_RING(0x00000000);
- 
-    /* draw rect -- just clipping */
-    OUT_RING(STATE3D_DRAWING_RECTANGLE);
-    OUT_RING(0x00000000);	/* flags */
-    OUT_RING(0x00000000);	/* ymin, xmin */
-    OUT_RING((pScrn->virtualX - 1) | (pScrn->virtualY - 1) << 16); /* ymax, xmax */
-    OUT_RING(0x00000000);	/* yorigin, xorigin */
-    OUT_RING(MI_NOOP);
- 
-    /* scissor */
-    OUT_RING(STATE3D_SCISSOR_ENABLE | DISABLE_SCISSOR_RECT);
-    OUT_RING(STATE3D_SCISSOR_RECTANGLE);
-    OUT_RING(0x00000000);	/* ymin, xmin */
-    OUT_RING(0x00000000);	/* ymax, xmax */
- 
-    OUT_RING(0x7c000003);	/* unknown command */
-    OUT_RING(0x7d070000);
-    OUT_RING(0x00000000);
-    OUT_RING(0x68000002);
- 
-    /* context setup */
-    OUT_RING(STATE3D_MODES_4 |
- 	    ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) |
- 	    ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) |
- 	    ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff));
- 
-    OUT_RING(STATE3D_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) |
- 	    I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | 4);
-    s2 = S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D);
-    if (planar)
-       s2 |= S2_TEXCOORD_FMT(1, TEXCOORDFMT_2D);
-    else
-       s2 |= S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT);
-    s2 |= S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
-       S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
-       S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
-       S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
-       S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
-       S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT);
-    OUT_RING(s2);
-    OUT_RING((1 << S4_POINT_WIDTH_SHIFT) | S4_LINE_WIDTH_ONE |
- 	    S4_CULLMODE_NONE | S4_VFMT_SPEC_FOG | S4_VFMT_COLOR | S4_VFMT_XYZW);
-    OUT_RING(0x00000000); /* S5 - enable bits */
-    OUT_RING((2 << S6_DEPTH_TEST_FUNC_SHIFT) |
- 	    (2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
- 	    (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) | S6_COLOR_WRITE_ENABLE |
- 	    (2 << S6_TRISTRIP_PV_SHIFT));
- 
-    OUT_RING(STATE3D_INDEPENDENT_ALPHA_BLEND |
- 	    IAB_MODIFY_ENABLE |
- 	    IAB_MODIFY_FUNC | (BLENDFUNC_ADD << IAB_FUNC_SHIFT) |
- 	    IAB_MODIFY_SRC_FACTOR | (BLENDFACT_ONE << IAB_SRC_FACTOR_SHIFT) |
- 	    IAB_MODIFY_DST_FACTOR | (BLENDFACT_ZERO << IAB_DST_FACTOR_SHIFT));
- 
-    OUT_RING(STATE3D_CONST_BLEND_COLOR);
-    OUT_RING(0x00000000);
- 
-    OUT_RING(STATE3D_DEST_BUFFER_VARIABLES);
-    if (pI830->cpp == 2)
-       format = COLR_BUF_RGB565;
-    else
-       format = COLR_BUF_ARGB8888 | DEPTH_FRMT_24_FIXED_8_OTHER;
- 
-    OUT_RING(LOD_PRECLAMP_OGL |
-      DSTORG_HORIZ_BIAS(0x80) | DSTORG_VERT_BIAS(0x80) | format);
- 
-    OUT_RING(STATE3D_STIPPLE);
-    OUT_RING(0x00000000);
- 
-    /* front buffer, pitch, offset */
-    OUT_RING(STATE3D_BUFFER_INFO);
-    OUT_RING(BUFFERID_COLOR_BACK | BUFFER_USE_FENCES |
- 	    (((pI830->displayWidth * pI830->cpp) / 4) << 2));
-    OUT_RING(pI830->bufferOffset);
-    ADVANCE_LP_RING();
- 
-    if (!planar) {
-       BEGIN_LP_RING(20);
-       /* fragment program - texture blend replace. */
-       OUT_RING(STATE3D_PIXEL_SHADER_PROGRAM | 8);
-       OUT_DCL(S, 0);
-       OUT_DCL(T, 0);
-       OUT_TEXLD(OC, 0, 0, T, 0);
-       /* End fragment program */
- 
-       OUT_RING(STATE3D_SAMPLER_STATE | 3);
-       OUT_RING(0x00000001);
-       OUT_RING(SS2_COLORSPACE_CONVERSION |
- 	       (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
- 	       (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
-       OUT_RING((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
- 	       (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT));
-       OUT_RING(0x00000000);
- 
-       OUT_RING(STATE3D_MAP_STATE | 3);
-       OUT_RING(0x00000001);	/* texture map #1 */
-       OUT_RING(pPriv->YBuf0offset);
-       ms3 = MAPSURF_422;
-       switch (id) {
-       case FOURCC_YUY2:
- 	 ms3 |= MT_422_YCRCB_NORMAL;
- 	 break;
-       case FOURCC_UYVY:
- 	 ms3 |= MT_422_YCRCB_SWAPY;
- 	 break;
-       }
-       ms3 |= (height - 1) << MS3_HEIGHT_SHIFT;
-       ms3 |= (width - 1) << MS3_WIDTH_SHIFT;
-       if (!pI830->disableTiling)
- 	 ms3 |= MS3_USE_FENCE_REGS;
-       OUT_RING(ms3);
-       OUT_RING(((video_pitch / 4) - 1) << 21);
-       ADVANCE_LP_RING();
-    } else {
-       BEGIN_LP_RING(1 + 18 + (1 + 3*16) + 11 + 11);
-       OUT_RING(MI_NOOP);
-       /* For the planar formats, we set up three samplers -- one for each plane,
-        * in a Y8 format.  Because I couldn't get the special PLANAR_TO_PACKED
-        * shader setup to work, I did the manual pixel shader:
-        *
-        * y' = y - .0625
-        * u' = u - .5
-        * v' = v - .5;
-        *
-        * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
-        * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
-        * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
-        *
-        * register assignment:
-        * r0 = (y',u',v',0)
-        * r1 = (y,y,y,y)
-        * r2 = (u,u,u,u)
-        * r3 = (v,v,v,v)
-        * OC = (r,g,b,1)
-        */
-       OUT_RING(STATE3D_PIXEL_SHADER_CONSTANTS | 16);
-       OUT_RING(0x000000f);	/* constants 0-3 */
-       /* constant 0: normalization offsets */
-       OUT_RING_F(-0.0625);
-       OUT_RING_F(-0.5);
-       OUT_RING_F(-0.5);
-       OUT_RING_F(0.0);
-       /* constant 1: r coefficients*/
-       OUT_RING_F(1.1643);
-       OUT_RING_F(0.0);
-       OUT_RING_F(1.5958);
-       OUT_RING_F(0.0);
-       /* constant 2: g coefficients */
-       OUT_RING_F(1.1643);
-       OUT_RING_F(-0.39173);
-       OUT_RING_F(-0.81290);
-       OUT_RING_F(0.0);
-       /* constant 3: b coefficients */
-       OUT_RING_F(1.1643);
-       OUT_RING_F(2.017);
-       OUT_RING_F(0.0);
-       OUT_RING_F(0.0);
- 
-       OUT_RING(STATE3D_PIXEL_SHADER_PROGRAM | (3 * 16 - 1));
-       /* Declare samplers */
-       OUT_DCL(S, 0);
-       OUT_DCL(S, 1);
-       OUT_DCL(S, 2);
-       OUT_DCL(T, 0);
-       OUT_DCL(T, 1);
- 
-       /* Load samplers to temporaries.  Y (sampler 0) gets the un-halved coords
-        * from t1.
-        */
-       OUT_TEXLD(R, 1, 0, T, 1);
-       OUT_TEXLD(R, 2, 1, T, 0);
-       OUT_TEXLD(R, 3, 2, T, 0);
- 
-       /* Move the sampled YUV data in R[123] to the first 3 channels of R0. */
-       OUT_MOV_TO_CHANNEL(R, 0, R, 1, X);
-       OUT_MOV_TO_CHANNEL(R, 0, R, 2, Y);
-       OUT_MOV_TO_CHANNEL(R, 0, R, 3, Z);
- 
-       /* Normalize the YUV data */
-       OUT_RING(A0_ADD | A0_DEST_CHANNEL_ALL |
- 	       (REG_TYPE_R << A0_DEST_TYPE_SHIFT) | (0 << A0_DEST_NR_SHIFT) |				\
- 	       (REG_TYPE_R << A0_SRC0_TYPE_SHIFT) | (0 << A0_SRC0_NR_SHIFT));
-       OUT_RING((SRC_X << A1_SRC0_CHANNEL_X_SHIFT) |
- 	       (SRC_Y << A1_SRC0_CHANNEL_Y_SHIFT) |
- 	       (SRC_Z << A1_SRC0_CHANNEL_Z_SHIFT) |
- 	       (SRC_W << A1_SRC0_CHANNEL_W_SHIFT) |
- 	       (REG_TYPE_CONST << A1_SRC1_TYPE_SHIFT) | (0 << A1_SRC1_NR_SHIFT) |
- 	       (SRC_X << A1_SRC1_CHANNEL_X_SHIFT) |
- 	       (SRC_Y << A1_SRC1_CHANNEL_Y_SHIFT));
-       OUT_RING((SRC_Z << A2_SRC1_CHANNEL_Z_SHIFT) |
- 	       (SRC_W << A2_SRC1_CHANNEL_W_SHIFT));
- 
-       /* dot-product the YUV data in R0 by the vectors of coefficients for
-        * calculating R, G, and B, storing the results in the R, G, or B channels
-        * of the output color.
-        */
-       OUT_DP3_TO_CHANNEL(OC, 0, R, 0, CONST, 1, X);
-       OUT_DP3_TO_CHANNEL(OC, 0, R, 0, CONST, 2, Y);
-       OUT_DP3_TO_CHANNEL(OC, 0, R, 0, CONST, 3, Z);
- 
-       /* Set alpha of the output to 1.0, by wiring W to 1 and not actually using
-        * the source.
-        */
-       OUT_RING(A0_MOV | A0_DEST_CHANNEL_W |
- 	       (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | (0 << A0_DEST_NR_SHIFT) |
- 	       (REG_TYPE_OC << A0_SRC0_TYPE_SHIFT) | (0 << A0_SRC0_NR_SHIFT));
-       OUT_RING((SRC_X << A1_SRC0_CHANNEL_X_SHIFT) |
- 	       (SRC_Y << A1_SRC0_CHANNEL_Y_SHIFT) |
- 	       (SRC_Z << A1_SRC0_CHANNEL_Z_SHIFT) |
- 	       (SRC_ONE << A1_SRC0_CHANNEL_W_SHIFT));
-       OUT_RING(0);
-       /* End fragment program */
- 
-       OUT_RING(STATE3D_SAMPLER_STATE | 9);
-       OUT_RING(0x00000007);
-       /* sampler 0 */
-       OUT_RING(0x00000000);
-       OUT_RING((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
- 	       (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
-       OUT_RING((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
- 	       (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT));
-       /* sampler 1 */
-       OUT_RING(0x00000000);
-       OUT_RING((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
- 	       (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
-       OUT_RING((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
- 	       (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT));
-       /* sampler 2 */
-       OUT_RING(0x00000000);
-       OUT_RING((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
- 	       (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
-       OUT_RING((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
- 	       (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT));
- 
-       OUT_RING(STATE3D_MAP_STATE | 9);
-       OUT_RING(0x00000007);
- 
-       OUT_RING(pPriv->YBuf0offset);
-       ms3 = MAPSURF_8BIT | MT_8BIT_I8;
-       ms3 |= (height - 1) << MS3_HEIGHT_SHIFT;
-       ms3 |= (width - 1) << MS3_WIDTH_SHIFT;
-       OUT_RING(ms3);
-       OUT_RING(((video_pitch * 2 / 4) - 1) << 21);
- 
-       OUT_RING(pPriv->UBuf0offset);
-       ms3 = MAPSURF_8BIT | MT_8BIT_I8;
-       ms3 |= (height / 2 - 1) << MS3_HEIGHT_SHIFT;
-       ms3 |= (width / 2 - 1) << MS3_WIDTH_SHIFT;
-       OUT_RING(ms3);
-       OUT_RING(((video_pitch / 4) - 1) << 21);
- 
-       OUT_RING(pPriv->VBuf0offset);
-       ms3 = MAPSURF_8BIT | MT_8BIT_I8;
-       ms3 |= (height / 2 - 1) << MS3_HEIGHT_SHIFT;
-       ms3 |= (width / 2 - 1) << MS3_WIDTH_SHIFT;
-       OUT_RING(ms3);
-       OUT_RING(((video_pitch / 4) - 1) << 21);
-       ADVANCE_LP_RING();
-    }
-    
-    {
-       BEGIN_LP_RING(2);
-       OUT_RING(MI_FLUSH | MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE);
-       OUT_RING(0x00000000);
-       ADVANCE_LP_RING();
-    }
- 
-    dxo = dstRegion->extents.x1;
-    dyo = dstRegion->extents.y1;
- 
-    pbox = REGION_RECTS(dstRegion);
-    nbox = REGION_NUM_RECTS(dstRegion);
-    while (nbox--)
-    {
-       int box_x1 = pbox->x1;
-       int box_y1 = pbox->y1;
-       int box_x2 = pbox->x2;
-       int box_y2 = pbox->y2;
-       int j;
-       float src_scale_x, src_scale_y;
-       CARD32 vb[40];
-       float verts[4][2], tex[4][2], tex2[4][2];
-       int vert_data_count;
- 
-       pbox++;
- 
-       src_scale_x = (float)src_w / (float)drw_w;
-       src_scale_y  = (float)src_h / (float)drw_h;
- 
-       if (!planar)
- 	 vert_data_count = 32;
-       else
- 	 vert_data_count = 40;
- 
-       BEGIN_LP_RING(vert_data_count + 8);
-       OUT_RING(MI_NOOP);
-       OUT_RING(MI_NOOP);
-       OUT_RING(MI_NOOP);
-       OUT_RING(MI_NOOP);
-       OUT_RING(MI_NOOP);
-       OUT_RING(MI_NOOP);
-       OUT_RING(MI_NOOP);
- 
-       /* vertex data */
-       OUT_RING(PRIMITIVE3D | PRIM3D_INLINE | PRIM3D_TRIFAN |
- 	       (vert_data_count - 1));
-       verts[0][0] = box_x1; verts[0][1] = box_y1;
-       verts[1][0] = box_x2; verts[1][1] = box_y1;
-       verts[2][0] = box_x2; verts[2][1] = box_y2;
-       verts[3][0] = box_x1; verts[3][1] = box_y2;
- 
-       if (!planar) {
- 	 tex[0][0] = (box_x1 - dxo) * src_scale_x;
- 	 tex[0][1] = (box_y1 - dyo) * src_scale_y;
- 	 tex[1][0] = (box_x2 - dxo) * src_scale_x;
- 	 tex[1][1] = (box_y1 - dyo) * src_scale_y;
- 	 tex[2][0] = (box_x2 - dxo) * src_scale_x;
- 	 tex[2][1] = (box_y2 - dyo) * src_scale_y;
- 	 tex[3][0] = (box_x1 - dxo) * src_scale_x;
- 	 tex[3][1] = (box_y2 - dyo) * src_scale_y;
- 	 /* emit vertex buffer */
- 	 draw_poly(vb, verts, tex, NULL);
- 	 for (j = 0; j < vert_data_count; j++)
- 	    OUT_RING(vb[j]);
-       } else {
- 	 tex[0][0] = (box_x1 - dxo) * src_scale_x / 2.0;
- 	 tex[0][1] = (box_y1 - dyo) * src_scale_y / 2.0;
- 	 tex[1][0] = (box_x2 - dxo) * src_scale_x / 2.0;
- 	 tex[1][1] = (box_y1 - dyo) * src_scale_y / 2.0;
- 	 tex[2][0] = (box_x2 - dxo) * src_scale_x / 2.0;
- 	 tex[2][1] = (box_y2 - dyo) * src_scale_y / 2.0;
- 	 tex[3][0] = (box_x1 - dxo) * src_scale_x / 2.0;
- 	 tex[3][1] = (box_y2 - dyo) * src_scale_y / 2.0;
- 	 tex2[0][0] = (box_x1 - dxo) * src_scale_x;
- 	 tex2[0][1] = (box_y1 - dyo) * src_scale_y;
- 	 tex2[1][0] = (box_x2 - dxo) * src_scale_x;
- 	 tex2[1][1] = (box_y1 - dyo) * src_scale_y;
- 	 tex2[2][0] = (box_x2 - dxo) * src_scale_x;
- 	 tex2[2][1] = (box_y2 - dyo) * src_scale_y;
- 	 tex2[3][0] = (box_x1 - dxo) * src_scale_x;
- 	 tex2[3][1] = (box_y2 - dyo) * src_scale_y;
- 	 /* emit vertex buffer */
- 	 draw_poly(vb, verts, tex, tex2);
- 	 for (j = 0; j < vert_data_count; j++)
- 	    OUT_RING(vb[j]);
-       }
- 
-       ADVANCE_LP_RING();
-    }
- 
-    if (pI830->AccelInfoRec)
-       pI830->AccelInfoRec->NeedToSync = TRUE;
- }
- 
 +static const CARD32 sip_kernel_static[][4] = {
 +/*    wait (1) a0<1>UW a145<0,1,0>UW { align1 +  } */
 +    { 0x00000030, 0x20000108, 0x00001220, 0x00000000 },
 +/*    nop (4) g0<1>UD { align1 +  } */
 +    { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
 +/*    nop (4) g0<1>UD { align1 +  } */
 +    { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
 +/*    nop (4) g0<1>UD { align1 +  } */
 +    { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
 +/*    nop (4) g0<1>UD { align1 +  } */
 +    { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
 +/*    nop (4) g0<1>UD { align1 +  } */
 +    { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
 +/*    nop (4) g0<1>UD { align1 +  } */
 +    { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
 +/*    nop (4) g0<1>UD { align1 +  } */
 +    { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
 +/*    nop (4) g0<1>UD { align1 +  } */
 +    { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
 +/*    nop (4) g0<1>UD { align1 +  } */
 +    { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
 +};
 +   
 +/*
 + * this program computes dA/dx and dA/dy for the texture coordinates along
 + * with the base texture coordinate. It was extracted from the Mesa driver.
 + * It uses about 10 GRF registers.
 + */
 +
 +#define SF_KERNEL_NUM_GRF  16
 +#define SF_MAX_THREADS	   1
 +
 +static const CARD32 sf_kernel_static[][4] = {
 +/*    send   0 (1) g6<1>F g1.12<0,1,0>F math mlen 1 rlen 1 { align1 +  } */
 +   { 0x00000031, 0x20c01fbd, 0x0000002c, 0x01110081 },
 +/*    send   0 (1) g6.4<1>F g1.20<0,1,0>F math mlen 1 rlen 1 { align1 +  } */
 +   { 0x00000031, 0x20c41fbd, 0x00000034, 0x01110081 },
 +/*    add (8) g7<1>F g4<8,8,1>F g3<8,8,1>F { align1 +  } */
 +   { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d4060 },
 +/*    mul (1) g7<1>F g7<0,1,0>F g6<0,1,0>F { align1 +  } */
 +   { 0x00000041, 0x20e077bd, 0x000000e0, 0x000000c0 },
 +/*    mul (1) g7.4<1>F g7.4<0,1,0>F g6.4<0,1,0>F { align1 +  } */
 +   { 0x00000041, 0x20e477bd, 0x000000e4, 0x000000c4 },
 +/*    mov (8) m1<1>F g7<0,1,0>F { align1 +  } */
 +   { 0x00600001, 0x202003be, 0x000000e0, 0x00000000 },
 +/*    mov (8) m2<1>F g7.4<0,1,0>F { align1 +  } */
 +   { 0x00600001, 0x204003be, 0x000000e4, 0x00000000 },
 +/*    mov (8) m3<1>F g3<8,8,1>F { align1 +  } */
 +   { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
 +/*    send   0 (8) a0<1>F g0<8,8,1>F urb mlen 4 rlen 0 write +0 transpose used complete EOT{ align1 +  } */
 +   { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
 +/*    nop (4) g0<1>UD { align1 +  } */
 +   { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
 +/*    nop (4) g0<1>UD { align1 +  } */
 +   { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
 +/*    nop (4) g0<1>UD { align1 +  } */
 +   { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
 +/*    nop (4) g0<1>UD { align1 +  } */
 +   { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
 +/*    nop (4) g0<1>UD { align1 +  } */
 +   { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
 +/*    nop (4) g0<1>UD { align1 +  } */
 +   { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
 +/*    nop (4) g0<1>UD { align1 +  } */
 +   { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
 +/*    nop (4) g0<1>UD { align1 +  } */
 +   { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
 +};
 +
 +/*
 + * Ok, this kernel picks up the required data flow values in g0 and g1
 + * and passes those along in m0 and m1. In m2-m9, it sticks constant
 + * values (bright pink).
 + */
 +
 +/* Our PS kernel uses less than 32 GRF registers (about 20) */
 +#define PS_KERNEL_NUM_GRF   32
 +#define PS_MAX_THREADS	   32
 +
 +#define BRW_GRF_BLOCKS(nreg)	((nreg + 15) / 16 - 1)
 +
 +static const CARD32 ps_kernel_static[][4] = {
 +#include "wm_prog.h"
 +};
 +
 +#define ALIGN(i,m)    (((i) + (m) - 1) & ~((m) - 1))
 +#define MIN(a,b) ((a) < (b) ? (a) : (b))
 +
 +#define WM_BINDING_TABLE_ENTRIES    2
 +
 +static CARD32 float_to_uint (float f) {
 +   union {CARD32 i; float f;} x;
 +   x.f = f;
 +   return x.i;
 +}
 +
 +static struct {
 +   CARD32   svg_ctl;
 +   char	    *name;
 +} svg_ctl_bits[] = {
 +   { BRW_SVG_CTL_GS_BA, "General State Base Address" },
 +   { BRW_SVG_CTL_SS_BA, "Surface State Base Address" },
 +   { BRW_SVG_CTL_IO_BA, "Indirect Object Base Address" },
 +   { BRW_SVG_CTL_GS_AUB, "Generate State Access Upper Bound" },
 +   { BRW_SVG_CTL_IO_AUB, "Indirect Object Access Upper Bound" },
 +   { BRW_SVG_CTL_SIP, "System Instruction Pointer" },
 +   { 0, 0 },
 +};
 +
 +static void
 +brw_debug (ScrnInfoPtr pScrn, char *when)
 +{
 +   I830Ptr pI830 = I830PTR(pScrn);
 +   int	    i;
 +   CARD32   v;
 +   
 +   I830Sync (pScrn);
 +   ErrorF("brw_debug: %s\n", when);
 +   for (i = 0; svg_ctl_bits[i].name; i++) {
 +      OUTREG(BRW_SVG_CTL, svg_ctl_bits[i].svg_ctl);
 +      v = INREG(BRW_SVG_RDATA);
 +      ErrorF("\t%34.34s: 0x%08x\n", svg_ctl_bits[i].name, v);
 +   }
 +}
 +
 +#define WATCH_SF 0
 +#define WATCH_WIZ 0
 +#define WATCH_STATS 0
 +
 +static void
 +BroadwaterDisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 +			       RegionPtr dstRegion,
 +			       short width, short height, int video_pitch,
 +			       int x1, int y1, int x2, int y2,
 +			       short src_w, short src_h,
 +			       short drw_w, short drw_h,
 +			       DrawablePtr pDraw)
 +{
 +   I830Ptr pI830 = I830PTR(pScrn);
 +   CARD32 format, ms3, s2;
 +   BoxPtr pbox;
 +   int nbox, dxo, dyo;
 +   Bool planar;
 +   int urb_vs_start, urb_vs_size;
 +   int urb_gs_start, urb_gs_size;
 +   int urb_clip_start, urb_clip_size;
 +   int urb_sf_start, urb_sf_size;
 +   int urb_cs_start, urb_cs_size;
 +   struct brw_surface_state *dest_surf_state;
 +   struct brw_surface_state *src_surf_state;
 +   struct brw_sampler_state *src_sampler_state;
 +   struct brw_vs_unit_state *vs_state;
 +   struct brw_sf_unit_state *sf_state;
 +   struct brw_wm_unit_state *wm_state;
 +   struct brw_cc_unit_state *cc_state;
 +   struct brw_cc_viewport *cc_viewport;
 +   struct brw_instruction *sf_kernel;
 +   struct brw_instruction *ps_kernel;
 +   struct brw_instruction *sip_kernel;
 +   float *vb;
 +    CARD32 *binding_table;
 +   Bool first_output = TRUE;
 +   int dest_surf_offset, src_surf_offset, src_sampler_offset, vs_offset;
 +   int sf_offset, wm_offset, cc_offset, vb_offset, cc_viewport_offset;
 +   int wm_scratch_offset;
 +   int sf_kernel_offset, ps_kernel_offset, sip_kernel_offset;
 +   int binding_table_offset;
 +   int next_offset, total_state_size;
 +   int vb_size = (4 * 4) * 4; /* 4 DWORDS per vertex */
 +   char *state_base;
 +   int state_base_offset;
 +
 +#if 0
 +   ErrorF("BroadwaterDisplayVideoTextured: %dx%d (pitch %d)\n", width, height,
 +	  video_pitch);
 +#endif
 +
 +   /* enable debug */
 +   OUTREG (INST_PM,
 +	   (1 << (16 + 4)) |
 +	   (1 << 4));
 +#if 0
 +   ErrorF ("INST_PM 0x%08x\n", INREG(INST_PM));
 +#endif
 +   
 +   assert((id == FOURCC_UYVY) || (id == FOURCC_YUY2));
 +
 +   /* Tell the rotation code that we have stomped its invariant state by
 +    * setting a high bit.  We don't use any invariant 3D state for video, so we
 +    * don't have to worry about it ourselves.
 +    */
 +   *pI830->used3D |= 1 << 30;
 +
 +   next_offset = 0;
 +
 +   /* Set up our layout of state in framebuffer.  First the general state: */
 +   vs_offset = ALIGN(next_offset, 64);
 +   next_offset = vs_offset + sizeof(*vs_state);
 +   sf_offset = ALIGN(next_offset, 32);
 +   next_offset = sf_offset + sizeof(*sf_state);
 +   wm_offset = ALIGN(next_offset, 32);
 +   next_offset = wm_offset + sizeof(*wm_state);
 +   wm_scratch_offset = ALIGN(next_offset, 1024);
 +   next_offset = wm_scratch_offset + 1024 * PS_MAX_THREADS;
 +   cc_offset = ALIGN(next_offset, 32);
 +   next_offset = cc_offset + sizeof(*cc_state);
 +
 +   sf_kernel_offset = ALIGN(next_offset, 64);
 +   next_offset = sf_kernel_offset + sizeof (sf_kernel_static);
 +   ps_kernel_offset = ALIGN(next_offset, 64);
 +   next_offset = ps_kernel_offset + sizeof (ps_kernel_static);
 +   sip_kernel_offset = ALIGN(next_offset, 64);
 +   next_offset = sip_kernel_offset + sizeof (sip_kernel_static);
 +   cc_viewport_offset = ALIGN(next_offset, 32);
 +   next_offset = cc_viewport_offset + sizeof(*cc_viewport);
 +
 +   src_sampler_offset = ALIGN(next_offset, 32);
 +   next_offset = src_sampler_offset + sizeof(*src_sampler_state);
 +
 +   /* Align VB to native size of elements, for safety */
 +   vb_offset = ALIGN(next_offset, 8);
 +   next_offset = vb_offset + vb_size;
 +
 +   /* And then the general state: */
 +   dest_surf_offset = ALIGN(next_offset, 32);
 +   next_offset = dest_surf_offset + sizeof(*dest_surf_state);
 +   src_surf_offset = ALIGN(next_offset, 32);
 +   next_offset = src_surf_offset + sizeof(*src_surf_state);
 +   binding_table_offset = ALIGN(next_offset, 32);
 +   next_offset = binding_table_offset + (WM_BINDING_TABLE_ENTRIES * 4);
 +
 +   /* Allocate an area in framebuffer for our state layout we just set up */
 +   total_state_size = next_offset;
 +   assert (total_state_size < BRW_LINEAR_EXTRA);
 +
 +   /*
 +    * Use the extra space allocated at the end of the Xv buffer
 +    */
 +   state_base_offset = (pPriv->YBuf0offset + 
 +			pPriv->linear->size * pI830->cpp -
 +			BRW_LINEAR_EXTRA);
 +   state_base_offset = ALIGN(state_base_offset, 64);
 +
 +   state_base = (char *)(pI830->FbBase + state_base_offset);
 +   /* Set up our pointers to state structures in framebuffer.  It would probably
 +    * be a good idea to fill these structures out in system memory and then dump
 +    * them there, instead.
 +    */
 +   vs_state = (void *)(state_base + vs_offset);
 +   sf_state = (void *)(state_base + sf_offset);
 +   wm_state = (void *)(state_base + wm_offset);
 +   cc_state = (void *)(state_base + cc_offset);
 +   sf_kernel = (void *)(state_base + sf_kernel_offset);
 +   ps_kernel = (void *)(state_base + ps_kernel_offset);
 +   sip_kernel = (void *)(state_base + sip_kernel_offset);
 +   
 +   cc_viewport = (void *)(state_base + cc_viewport_offset);
 +   dest_surf_state = (void *)(state_base + dest_surf_offset);
 +   src_surf_state = (void *)(state_base + src_surf_offset);
 +   src_sampler_state = (void *)(state_base + src_sampler_offset);
 +   binding_table = (void *)(state_base + binding_table_offset);
 +   vb = (void *)(state_base + vb_offset);
 +
 +   /* For 3D, the VS must have 8, 12, 16, 24, or 32 VUEs allocated to it.
 +    * A VUE consists of a 256-bit vertex header followed by the vertex data,
 +    * which in our case is 4 floats (128 bits), thus a single 512-bit URB
 +    * entry.
 +    */
 +#define URB_VS_ENTRIES	      8
 +#define URB_VS_ENTRY_SIZE     1
 +   
 +#define URB_GS_ENTRIES	      0
 +#define URB_GS_ENTRY_SIZE     0
 +   
 +#define URB_CLIP_ENTRIES      0
 +#define URB_CLIP_ENTRY_SIZE   0
 +   
 +   /* The SF kernel we use outputs only 4 256-bit registers, leading to an
 +    * entry size of 2 512-bit URBs.  We don't need to have many entries to
 +    * output as we're generally working on large rectangles and don't care
 +    * about having WM threads running on different rectangles simultaneously.
 +    */
 +#define URB_SF_ENTRIES	      1
 +#define URB_SF_ENTRY_SIZE     2
 +
 +#define URB_CS_ENTRIES	      0
 +#define URB_CS_ENTRY_SIZE     0
 +   
 +   urb_vs_start = 0;
 +   urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
 +   urb_gs_start = urb_vs_start + urb_vs_size;
 +   urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
 +   urb_clip_start = urb_gs_start + urb_gs_size;
 +   urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
 +   urb_sf_start = urb_clip_start + urb_clip_size;
 +   urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
 +   urb_cs_start = urb_sf_start + urb_sf_size;
 +   urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
 +
 +   /* We'll be poking the state buffers that could be in use by the 3d hardware
 +    * here, but we should have synced the 3D engine already in I830PutImage.
 +    */
 +
 +   memset (cc_viewport, 0, sizeof (*cc_viewport));
 +   cc_viewport->min_depth = -1.e35;
 +   cc_viewport->max_depth = 1.e35;
 +
 +   /* Color calculator state */
 +   memset(cc_state, 0, sizeof(*cc_state));
 +   cc_state->cc0.stencil_enable = 0;   /* disable stencil */
 +   cc_state->cc2.depth_test = 0;       /* disable depth test */
 +   cc_state->cc2.logicop_enable = 1;   /* enable logic op */
 +   cc_state->cc3.ia_blend_enable = 1;  /* blend alpha just like colors */
 +   cc_state->cc3.blend_enable = 0;     /* disable color blend */
 +   cc_state->cc3.alpha_test = 0;       /* disable alpha test */
 +   cc_state->cc4.cc_viewport_state_offset = (state_base_offset + cc_viewport_offset) >> 5;
 +   cc_state->cc5.dither_enable = 0;    /* disable dither */
 +   cc_state->cc5.logicop_func = 0xc;   /* WHITE */
 +   cc_state->cc5.statistics_enable = 1;
 +   cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD;
 +   cc_state->cc5.ia_src_blend_factor = BRW_BLENDFACTOR_ONE;
 +   cc_state->cc5.ia_dest_blend_factor = BRW_BLENDFACTOR_ONE;
 +
 +   /* Upload system kernel */
 +   memcpy (sip_kernel, sip_kernel_static, sizeof (sip_kernel_static));
 +   
 +   /* Set up the state buffer for the destination surface */
 +   memset(dest_surf_state, 0, sizeof(*dest_surf_state));
 +   dest_surf_state->ss0.surface_type = BRW_SURFACE_2D;
 +   dest_surf_state->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32;
 +   if (pI830->cpp == 2) {
 +      dest_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
 +   } else {
 +      dest_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
 +   }
 +   dest_surf_state->ss0.writedisable_alpha = 0;
 +   dest_surf_state->ss0.writedisable_red = 0;
 +   dest_surf_state->ss0.writedisable_green = 0;
 +   dest_surf_state->ss0.writedisable_blue = 0;
 +   dest_surf_state->ss0.color_blend = 1;
 +   dest_surf_state->ss0.vert_line_stride = 0;
 +   dest_surf_state->ss0.vert_line_stride_ofs = 0;
 +   dest_surf_state->ss0.mipmap_layout_mode = 0;
 +   dest_surf_state->ss0.render_cache_read_mode = 0;
 +   
 +   dest_surf_state->ss1.base_addr = pI830->FrontBuffer.Start;
 +   dest_surf_state->ss2.height = pScrn->virtualY - 1;
 +   dest_surf_state->ss2.width = pScrn->virtualX - 1;
 +   dest_surf_state->ss2.mip_count = 0;
 +   dest_surf_state->ss2.render_target_rotation = 0;
 +   dest_surf_state->ss3.pitch = (pI830->displayWidth * pI830->cpp) - 1;
 +
 +   /* Set up the source surface state buffer */
 +   memset(src_surf_state, 0, sizeof(*src_surf_state));
 +   src_surf_state->ss0.surface_type = BRW_SURFACE_2D;
 +/*   src_surf_state->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32; */
 +   switch (id) {
 +   case FOURCC_YUY2:
 +      src_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_YCRCB_NORMAL;
 +      break;
 +   case FOURCC_UYVY:
 +      src_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_YCRCB_SWAPY;
 +      break;
 +   }
 +   src_surf_state->ss0.writedisable_alpha = 0;
 +   src_surf_state->ss0.writedisable_red = 0;
 +   src_surf_state->ss0.writedisable_green = 0;
 +   src_surf_state->ss0.writedisable_blue = 0;
 +   src_surf_state->ss0.color_blend = 1;
 +   src_surf_state->ss0.vert_line_stride = 0;
 +   src_surf_state->ss0.vert_line_stride_ofs = 0;
 +   src_surf_state->ss0.mipmap_layout_mode = 0;
 +   src_surf_state->ss0.render_cache_read_mode = 0;
 +   
 +   src_surf_state->ss1.base_addr = pPriv->YBuf0offset;
 +   src_surf_state->ss2.width = width - 1;
 +   src_surf_state->ss2.height = height - 1;
 +   src_surf_state->ss2.mip_count = 0;
 +   src_surf_state->ss2.render_target_rotation = 0;
 +   src_surf_state->ss3.pitch = video_pitch - 1;
 +
 +   /* Set up a binding table for our two surfaces.  Only the PS will use it */
 +   /* XXX: are these offset from the right place? */
 +   binding_table[0] = state_base_offset + dest_surf_offset;
 +   binding_table[1] = state_base_offset + src_surf_offset;
 +
 +   /* Set up the packed YUV source sampler.  Doesn't do colorspace conversion.
 +    */
 +   memset(src_sampler_state, 0, sizeof(*src_sampler_state));
 +   src_sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR;
 +   src_sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
 +   src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
 +   src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
 +   src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
 +
 +   /* Set up the vertex shader to be disabled (passthrough) */
 +   memset(vs_state, 0, sizeof(*vs_state));
 +   vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
 +   vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
 +   vs_state->vs6.vs_enable = 0;
 +   vs_state->vs6.vert_cache_disable = 1;
 +
 +   /* Set up the SF kernel to do coord interp: for each attribute,
 +    * calculate dA/dx and dA/dy.  Hand these interpolation coefficients
 +    * back to SF which then hands pixels off to WM.
 +    */
 +
 +   memcpy (sf_kernel, sf_kernel_static, sizeof (sf_kernel_static));
 +   memset(sf_state, 0, sizeof(*sf_state));
 +#if 0
 +   ErrorF ("sf kernel: 0x%08x\n", state_base_offset + sf_kernel_offset);
 +#endif
 +   sf_state->thread0.kernel_start_pointer = 
 +	       (state_base_offset + sf_kernel_offset) >> 6;
 +   sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
 +   sf_state->sf1.single_program_flow = 1; /* XXX */
 +   sf_state->sf1.binding_table_entry_count = 0;
 +   sf_state->sf1.thread_priority = 0;
 +   sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
 +   sf_state->sf1.illegal_op_exception_enable = 1;
 +   sf_state->sf1.mask_stack_exception_enable = 1;
 +   sf_state->sf1.sw_exception_enable = 1;
 +   sf_state->thread2.per_thread_scratch_space = 0;
 +   sf_state->thread2.scratch_space_base_pointer = 0; /* not used in our kernel */
 +   sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
 +   sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
 +   sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
 +   sf_state->thread3.urb_entry_read_offset = 0;
 +   sf_state->thread3.dispatch_grf_start_reg = 3;
 +   sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
 +   sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
 +   sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
 +   sf_state->thread4.stats_enable = 1;
 +   sf_state->sf5.viewport_transform = FALSE; /* skip viewport */
 +   sf_state->sf6.cull_mode = BRW_CULLMODE_NONE;
 +   sf_state->sf6.scissor = 0;
 +   sf_state->sf7.trifan_pv = 2;
 +   sf_state->sf6.dest_org_vbias = 0x8;
 +   sf_state->sf6.dest_org_hbias = 0x8;
 +
 +   memcpy (ps_kernel, ps_kernel_static, sizeof (ps_kernel_static));
 +#if 0
 +   ErrorF ("ps kernel: 0x%08x\n", state_base_offset + ps_kernel_offset);
 +#endif
 +   memset (wm_state, 0, sizeof (*wm_state));
 +   wm_state->thread0.kernel_start_pointer = 
 +	    (state_base_offset + ps_kernel_offset) >> 6;
 +   wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
 +   wm_state->thread1.single_program_flow = 1; /* XXX */
 +   wm_state->thread1.binding_table_entry_count = 2;
 +   /* Though we never use the scratch space in our WM kernel, it has to be
 +    * set, and the minimum allocation is 1024 bytes.
 +    */
 +   wm_state->thread2.scratch_space_base_pointer = (state_base_offset +
 +						   wm_scratch_offset) >> 10;
 +   wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
 +   wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */
 +   wm_state->thread3.const_urb_entry_read_length = 0;
 +   wm_state->thread3.const_urb_entry_read_offset = 0;
 +   wm_state->thread3.urb_entry_read_length = 1; /* XXX */
 +   wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
 +   wm_state->wm4.stats_enable = 1;
 +   wm_state->wm4.sampler_state_pointer = (state_base_offset + src_sampler_offset) >> 5;
 +   wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */
 +   wm_state->wm5.max_threads = PS_MAX_THREADS - 1;
 +   wm_state->wm5.thread_dispatch_enable = 1;
 +   wm_state->wm5.enable_16_pix = 1;
 +   wm_state->wm5.enable_8_pix = 0;
 +   wm_state->wm5.early_depth_test = 1;
 +
 +   {
 +      BEGIN_LP_RING(2);
 +      OUT_RING(MI_FLUSH | 
 +	       MI_STATE_INSTRUCTION_CACHE_FLUSH |
 +	       BRW_MI_GLOBAL_SNAPSHOT_RESET);
 +      OUT_RING(MI_NOOP);
 +      ADVANCE_LP_RING();
 +   }
 +   
 +/*    brw_debug (pScrn, "before base address modify"); */
 +   { BEGIN_LP_RING(12);
 +   /* Match Mesa driver setup */
 +   OUT_RING(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
 +
 +   /* Mesa does this. Who knows... */
 +   OUT_RING(BRW_CS_URB_STATE | 0);
 +   OUT_RING((0 << 4) |	/* URB Entry Allocation Size */
 +	    (0 << 0));	/* Number of URB Entries */
 +   
 +   /* Zero out the two base address registers so all offsets are absolute */
 +   OUT_RING(BRW_STATE_BASE_ADDRESS | 4);
 +   OUT_RING(0 | BASE_ADDRESS_MODIFY);  /* Generate state base address */
 +   OUT_RING(0 | BASE_ADDRESS_MODIFY);  /* Surface state base address */
 +   OUT_RING(0 | BASE_ADDRESS_MODIFY);  /* media base addr, don't care */
 +   OUT_RING(0x10000000 | BASE_ADDRESS_MODIFY);  /* general state max addr, disabled */
 +   OUT_RING(0x10000000 | BASE_ADDRESS_MODIFY);  /* media object state max addr, disabled */
 +
 +   /* Set system instruction pointer */
 +   OUT_RING(BRW_STATE_SIP | 0);
 +   OUT_RING(state_base_offset + sip_kernel_offset); /* system instruction pointer */
 +      
 +   OUT_RING(MI_NOOP);
 +   ADVANCE_LP_RING(); }
 +   
 +/*   brw_debug (pScrn, "after base address modify"); */
 +
 +   { BEGIN_LP_RING(42);
 +   /* Enable VF statistics */
 +   OUT_RING(BRW_3DSTATE_VF_STATISTICS | 1);
 +   
 +   /* Pipe control */
 +   OUT_RING(BRW_PIPE_CONTROL |
 +	    BRW_PIPE_CONTROL_NOWRITE |
 +	    BRW_PIPE_CONTROL_IS_FLUSH |
 +	    2);
 +   OUT_RING(0);			       /* Destination address */
 +   OUT_RING(0);			       /* Immediate data low DW */
 +   OUT_RING(0);			       /* Immediate data high DW */
 +
 +   /* Binding table pointers */
 +   OUT_RING(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4);
 +   OUT_RING(0); /* vs */
 +   OUT_RING(0); /* gs */
 +   OUT_RING(0); /* clip */
 +   OUT_RING(0); /* sf */
 +   /* Only the PS uses the binding table */
 +   OUT_RING(state_base_offset + binding_table_offset); /* ps */
 +   
 +   /* Blend constant color (magenta is fun) */
 +   OUT_RING(BRW_3DSTATE_CONSTANT_COLOR | 3);
 +   OUT_RING(float_to_uint (1.0));
 +   OUT_RING(float_to_uint (0.0));
 +   OUT_RING(float_to_uint (1.0));
 +   OUT_RING(float_to_uint (1.0));
 +   
 +   /* The drawing rectangle clipping is always on.  Set it to values that
 +    * shouldn't do any clipping.
 +    */
 +   OUT_RING(BRW_3DSTATE_DRAWING_RECTANGLE | 2);	/* XXX 3 for BLC or CTG */
 +   OUT_RING(0x00000000);	/* ymin, xmin */
 +   OUT_RING((pScrn->virtualX - 1) |
 +	    (pScrn->virtualY - 1) << 16); /* ymax, xmax */
 +   OUT_RING(0x00000000);	/* yorigin, xorigin */
 +
 +   /* skip the depth buffer */
 +   /* skip the polygon stipple */
 +   /* skip the polygon stipple offset */
 +   /* skip the line stipple */
 +   
 +   /* Set the pointers to the 3d pipeline state */
 +   OUT_RING(BRW_3DSTATE_PIPELINED_POINTERS | 5);
 +   OUT_RING(state_base_offset + vs_offset);  /* 32 byte aligned */
 +   OUT_RING(BRW_GS_DISABLE);		     /* disable GS, resulting in passthrough */
 +   OUT_RING(BRW_CLIP_DISABLE);		     /* disable CLIP, resulting in passthrough */
 +   OUT_RING(state_base_offset + sf_offset);  /* 32 byte aligned */
 +   OUT_RING(state_base_offset + wm_offset);  /* 32 byte aligned */
 +   OUT_RING(state_base_offset + cc_offset);  /* 64 byte aligned */
 +
 +   /* URB fence */
 +   OUT_RING(BRW_URB_FENCE |
 +	    UF0_CS_REALLOC |
 +	    UF0_SF_REALLOC |
 +	    UF0_CLIP_REALLOC |
 +	    UF0_GS_REALLOC |
 +	    UF0_VS_REALLOC |
 +	    1);
 +   OUT_RING(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
 +	    ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
 +	    ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
 +   OUT_RING(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
 +	    ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
 +
 +   /* Constant buffer state */
 +   OUT_RING(BRW_CS_URB_STATE | 0);
 +   OUT_RING(((URB_CS_ENTRY_SIZE - 1) << 4) | /* URB Entry Allocation Size */
 +	    (URB_CS_ENTRIES << 0));	     /* Number of URB Entries */
 +   
 +   /* Set up the pointer to our vertex buffer */
 +   OUT_RING(BRW_3DSTATE_VERTEX_BUFFERS | 2);
 +   OUT_RING((0 << VB0_BUFFER_INDEX_SHIFT) |
 +	    VB0_VERTEXDATA |
 +	    ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); /* four 32-bit floats per vertex */
 +   OUT_RING(state_base_offset + vb_offset);
 +   OUT_RING(3); /* four corners to our rectangle */
 +
 +   /* Set up our vertex elements, sourced from the single vertex buffer. */
 +   OUT_RING(BRW_3DSTATE_VERTEX_ELEMENTS | 3);
 +   /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
 +   OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
 +	    VE0_VALID |
 +	    (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
 +	    (0 << VE0_OFFSET_SHIFT));
 +   OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
 +	    (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
 +	    (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
 +	    (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
 +	    (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
 +   /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
 +   OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
 +	    VE0_VALID |
 +	    (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
 +	    (8 << VE0_OFFSET_SHIFT));
 +   OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
 +	    (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
 +	    (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
 +	    (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
 +	    (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
 +
 +   OUT_RING(MI_NOOP);			/* pad to quadword */
 +   ADVANCE_LP_RING(); }
 +
 +   dxo = dstRegion->extents.x1;
 +   dyo = dstRegion->extents.y1;
 +
 +   pbox = REGION_RECTS(dstRegion);
 +   nbox = REGION_NUM_RECTS(dstRegion);
 +   while (nbox--)
 +   {
 +      int box_x1 = pbox->x1;
 +      int box_y1 = pbox->y1;
 +      int box_x2 = pbox->x2;
 +      int box_y2 = pbox->y2;
 +      int i;
 +      float src_scale_x, src_scale_y;
 +
 +      if (!first_output) {
 +	 /* Since we use the same little vertex buffer over and over, sync for
 +	  * subsequent rectangles.
 +	  */
 +	 if (pI830->AccelInfoRec && pI830->AccelInfoRec->NeedToSync) {
 +	    (*pI830->AccelInfoRec->Sync)(pScrn);
 +	    pI830->AccelInfoRec->NeedToSync = FALSE;
 +	 }
 +      }
 +
 +      pbox++;
 +
 +      /* Use normalized texture coordinates */
 +      src_scale_x = (float)1.0 / (float)drw_w;
 +      src_scale_y  = (float)1.0 / (float)drw_h;
 +
 +      i = 0;
 +      vb[i++] = (box_x2 - dxo) * src_scale_x;
 +      vb[i++] = (box_y2 - dyo) * src_scale_y;
 +      vb[i++] = (float) box_x2;
 +      vb[i++] = (float) box_y2;
 +
 +      vb[i++] = (box_x1 - dxo) * src_scale_x;
 +      vb[i++] = (box_y2 - dyo) * src_scale_y;
 +      vb[i++] = (float) box_x1;
 +      vb[i++] = (float) box_y2;
 +
 +      vb[i++] = (box_x1 - dxo) * src_scale_x;
 +      vb[i++] = (box_y1 - dyo) * src_scale_y;
 +      vb[i++] = (float) box_x1;
 +      vb[i++] = (float) box_y1;
 +
 +#if 0
 +      ErrorF ("before EU_ATT 0x%08x%08x EU_ATT_DATA 0x%08x%08x\n",
 +	      INREG(BRW_EU_ATT_1), INREG(BRW_EU_ATT_0),
 +	      INREG(BRW_EU_ATT_DATA_1), INREG(BRW_EU_ATT_DATA_0));
 +
 +      OUTREG(BRW_VF_CTL,
 +	     BRW_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID |
 +	     BRW_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX |
 +	     BRW_VF_CTL_SNAPSHOT_ENABLE);
 +      OUTREG(BRW_VF_STRG_VAL, 0);
 +#endif
 +      
 +#if 0
 +      OUTREG(BRW_VS_CTL,
 +	     BRW_VS_CTL_SNAPSHOT_ALL_THREADS |
 +	     BRW_VS_CTL_SNAPSHOT_MUX_VALID_COUNT |
 +	     BRW_VS_CTL_THREAD_SNAPSHOT_ENABLE);
 +      
 +      OUTREG(BRW_VS_STRG_VAL, 0);
 +#endif
 +      
 +#if WATCH_SF
 +      OUTREG(BRW_SF_CTL,
 +	     BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT |
 +	     BRW_SF_CTL_SNAPSHOT_ALL_THREADS |
 +	     BRW_SF_CTL_THREAD_SNAPSHOT_ENABLE);
 +      OUTREG(BRW_SF_STRG_VAL, 0);
 +#endif
 +
 +#if WATCH_WIZ
 +      OUTREG(BRW_WIZ_CTL,
 +	     BRW_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE |
 +	     BRW_WIZ_CTL_SNAPSHOT_ALL_THREADS |
 +	     BRW_WIZ_CTL_SNAPSHOT_ENABLE);
 +      OUTREG(BRW_WIZ_STRG_VAL,
 +	     (box_x1) | (box_y1 << 16));
 +#endif
 +      
 +#if 0
 +      OUTREG(BRW_TS_CTL,
 +	     BRW_TS_CTL_SNAPSHOT_MESSAGE_ERROR |
 +	     BRW_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS |
 +	     BRW_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS |
 +	     BRW_TS_CTL_SNAPSHOT_ENABLE);
 +#endif
 +
 +      BEGIN_LP_RING(6);
 +      OUT_RING(BRW_3DPRIMITIVE | 
 +	       BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
 +	       (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | 
 +	       (0 << 9) |  /* CTG - indirect vertex count */
 +	       4);
 +      OUT_RING(3); /* vertex count per instance */
 +      OUT_RING(0); /* start vertex offset */
 +      OUT_RING(1); /* single instance */
 +      OUT_RING(0); /* start instance location */
 +      OUT_RING(0); /* index buffer offset, ignored */
 +      ADVANCE_LP_RING();
 +
 +      int   j, k;
 +      CARD32	  ctl = 0, rdata;
 +      
 +#if 0
 +      for (j = 0; j < 100000; j++) {
 +	ctl = INREG(BRW_VF_CTL);
 +	 if (ctl & BRW_VF_CTL_SNAPSHOT_COMPLETE)
 +	    break;
 +      }
 +      
 +      rdata = INREG(BRW_VF_RDATA);
 +      OUTREG(BRW_VF_CTL, 0);
 +      ErrorF ("VF_CTL: 0x%08x VF_RDATA: 0x%08x\n", ctl, rdata);
 +#endif
 +
 +#if 0
 +      for (j = 0; j < 1000000; j++) {
 +	ctl = INREG(BRW_VS_CTL);
 +	 if (ctl & BRW_VS_CTL_SNAPSHOT_COMPLETE)
 +	    break;
 +      }
 +
 +      rdata = INREG(BRW_VS_RDATA);
 +      for (k = 0; k <= 3; k++) {
 +	 OUTREG(BRW_VS_CTL,
 +		BRW_VS_CTL_SNAPSHOT_COMPLETE |
 +		(k << 8));
 +	 rdata = INREG(BRW_VS_RDATA);
 +	 ErrorF ("VS_CTL: 0x%08x VS_RDATA(%d): 0x%08x\n", ctl, k, rdata);
 +      }
 +      
 +      OUTREG(BRW_VS_CTL, 0);
 +#endif
 +
 +#if WATCH_SF
 +      for (j = 0; j < 1000000; j++) {
 +	ctl = INREG(BRW_SF_CTL);
 +	 if (ctl & BRW_SF_CTL_SNAPSHOT_COMPLETE)
 +	    break;
 +      }
 +
 +      for (k = 0; k <= 7; k++) {
 +	 OUTREG(BRW_SF_CTL,
 +		BRW_SF_CTL_SNAPSHOT_COMPLETE |
 +		(k << 8));
 +	 rdata = INREG(BRW_SF_RDATA);
 +	 ErrorF ("SF_CTL: 0x%08x SF_RDATA(%d): 0x%08x\n", ctl, k, rdata);
 +      }
 +      
 +      OUTREG(BRW_SF_CTL, 0);
 +#endif
 +
 +#if WATCH_WIZ
 +      for (j = 0; j < 100000; j++) {
 +	ctl = INREG(BRW_WIZ_CTL);
 +	 if (ctl & BRW_WIZ_CTL_SNAPSHOT_COMPLETE)
 +	    break;
 +      }
 +      
 +      rdata = INREG(BRW_WIZ_RDATA);
 +      OUTREG(BRW_WIZ_CTL, 0);
 +      ErrorF ("WIZ_CTL: 0x%08x WIZ_RDATA: 0x%08x\n", ctl, rdata);
 +#endif
 +      
 +#if 0
 +      for (j = 0; j < 100000; j++) {
 +	ctl = INREG(BRW_TS_CTL);
 +	 if (ctl & BRW_TS_CTL_SNAPSHOT_COMPLETE)
 +	    break;
 +      }
 +      
 +      rdata = INREG(BRW_TS_RDATA);
 +      OUTREG(BRW_TS_CTL, 0);
 +      ErrorF ("TS_CTL: 0x%08x TS_RDATA: 0x%08x\n", ctl, rdata);
 +      
 +      ErrorF ("after EU_ATT 0x%08x%08x EU_ATT_DATA 0x%08x%08x\n",
 +	      INREG(BRW_EU_ATT_1), INREG(BRW_EU_ATT_0),
 +	      INREG(BRW_EU_ATT_DATA_1), INREG(BRW_EU_ATT_DATA_0));
 +#endif
 +
 +#if 0
 +      for (j = 0; j < 256; j++) {
 +	 OUTREG(BRW_TD_CTL, j << BRW_TD_CTL_MUX_SHIFT);
 +	 rdata = INREG(BRW_TD_RDATA);
 +	 ErrorF ("TD_RDATA(%d): 0x%08x\n", j, rdata);
 +      }
 +#endif
 +      first_output = FALSE;
 +      if (pI830->AccelInfoRec)
 +	 pI830->AccelInfoRec->NeedToSync = TRUE;
 +   }
 +
 +   if (pI830->AccelInfoRec)
 +      (*pI830->AccelInfoRec->Sync)(pScrn);
 +#if WATCH_STATS
 +   I830PrintErrorState (pScrn);
 +#endif
 +}
 +
  static FBLinearPtr
  I830AllocateMemory(ScrnInfoPtr pScrn, FBLinearPtr linear, int size)
  {
diff-tree 32f1199937e92b9100aba52cbbb97157014e3182 (from baf65ce98abcdd21dff2531a43bb9c5044732c28)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date:   Mon Jul 24 15:42:15 2006 +0800

    remove an extra '-'

diff --git a/src/i915_video.c b/src/i915_video.c
index e05ad72..0833d50 100644
--- a/src/i915_video.c
+++ b/src/i915_video.c
@@ -359,7 +359,7 @@ I915DisplayVideoTextured(ScrnInfoPtr pSc
        */
       i915_fs_dp3_masked(FS_OC, MASK_X | MASK_SATURATE,
                         i915_fs_operand_reg(FS_R0),
--                        i915_fs_operand_reg(FS_C1));
+                        i915_fs_operand_reg(FS_C1));
       i915_fs_dp3_masked(FS_OC, MASK_Y | MASK_SATURATE,
                         i915_fs_operand_reg(FS_R0),
                         i915_fs_operand_reg(FS_C2));
diff-tree baf65ce98abcdd21dff2531a43bb9c5044732c28 (from bb81e8d6c777a5e16b8193c07667fbee8e21203e)
Author: Eric Anholt <anholt at FreeBSD.org>
Date:   Tue Jul 18 19:42:37 2006 -0400

    Re-convert i915 video to new fragment shader API.
    
    Although in the history of this branch it had happened before, this time it's
    for real.

diff --git a/src/i915_video.c b/src/i915_video.c
index 8d687a1..e05ad72 100644
--- a/src/i915_video.c
+++ b/src/i915_video.c
@@ -37,6 +37,7 @@
 #include "i830.h"
 #include "i830_video.h"
 #include "i915_reg.h"
+#include "i915_3d.h"
 
 union intfloat {
    CARD32 ui;
@@ -49,68 +50,6 @@ union intfloat {
    OUT_RING(_tmp.ui);							\
 } while (0)
 
-#define OUT_DCL(type, nr) do {						\
-   CARD32 chans = 0;							\
-   if (REG_TYPE_##type == REG_TYPE_T)					\
-      chans = D0_CHANNEL_ALL;						\
-   else if (REG_TYPE_##type != REG_TYPE_S)				\
-      FatalError("wrong reg type %d to declare\n", REG_TYPE_##type);	\
-   OUT_RING(D0_DCL |							\
-	    (REG_TYPE_##type << D0_TYPE_SHIFT) | (nr << D0_NR_SHIFT) |	\
-	    chans);							\
-   OUT_RING(0x00000000);						\
-   OUT_RING(0x00000000);						\
-} while (0)
-
-#define OUT_TEXLD(dest_type, dest_nr, sampler_nr, addr_type, addr_nr)	\
-do {									\
-      OUT_RING(T0_TEXLD |						\
-	       (REG_TYPE_##dest_type << T0_DEST_TYPE_SHIFT) |		\
-	       (dest_nr << T0_DEST_NR_SHIFT) |				\
-	       (sampler_nr << T0_SAMPLER_NR_SHIFT));			\
-      OUT_RING((REG_TYPE_##addr_type << T1_ADDRESS_REG_TYPE_SHIFT) |	\
-	       (addr_nr << T1_ADDRESS_REG_NR_SHIFT));			\
-      OUT_RING(0x00000000);						\
-} while (0)
-
-/* Move the dest_chan from src0 to dest, leaving the other channels alone */
-#define OUT_MOV_TO_CHANNEL(dest_type, dest_nr, src0_type, src0_nr,	\
-			   dest_chan)					\
-do {									\
-   OUT_RING(A0_MOV | A0_DEST_CHANNEL_##dest_chan |			\
-	    (REG_TYPE_##dest_type << A0_DEST_TYPE_SHIFT) |		\
-	    (dest_nr << A0_DEST_NR_SHIFT) |				\
-	    (REG_TYPE_##src0_type << A0_SRC0_TYPE_SHIFT) |		\
-	    (src0_nr << A0_SRC0_NR_SHIFT));				\
-   OUT_RING((SRC_X << A1_SRC0_CHANNEL_X_SHIFT) |			\
-	    (SRC_Y << A1_SRC0_CHANNEL_Y_SHIFT) |			\
-	    (SRC_Z << A1_SRC0_CHANNEL_Z_SHIFT) |			\
-	    (SRC_W << A1_SRC0_CHANNEL_W_SHIFT));			\
-   OUT_RING(0);								\
-} while (0)
-
-/* Dot3-product src0 and src1, storing the result in dest_chan of the dest.
- * Saturates, in case we have out-of-range YUV values.
- */
-#define OUT_DP3_TO_CHANNEL(dest_type, dest_nr, src0_type, src0_nr,	\
-			   src1_type, src1_nr, dest_chan)		\
-do {									\
-   OUT_RING(A0_DP3 | A0_DEST_CHANNEL_##dest_chan | A0_DEST_SATURATE |	\
-	    (REG_TYPE_##dest_type << A0_DEST_TYPE_SHIFT) |		\
-	    (dest_nr << A0_DEST_NR_SHIFT) |				\
-	    (REG_TYPE_##src0_type << A0_SRC0_TYPE_SHIFT) |		\
-	    (src0_nr << A0_SRC0_NR_SHIFT));				\
-   OUT_RING((SRC_X << A1_SRC0_CHANNEL_X_SHIFT) |			\
-	    (SRC_Y << A1_SRC0_CHANNEL_Y_SHIFT) |			\
-	    (SRC_Z << A1_SRC0_CHANNEL_Z_SHIFT) |			\
-	    (SRC_W << A1_SRC0_CHANNEL_W_SHIFT) |			\
-	    (REG_TYPE_##src1_type << A1_SRC1_TYPE_SHIFT) |		\
-	    (src1_nr << A1_SRC1_TYPE_SHIFT) |				\
-	    (SRC_X << A1_SRC1_CHANNEL_X_SHIFT) |			\
-	    (SRC_Y << A1_SRC1_CHANNEL_Y_SHIFT));			\
-   OUT_RING((SRC_Z << A2_SRC1_CHANNEL_Z_SHIFT) |			\
-	    (SRC_W << A2_SRC1_CHANNEL_W_SHIFT));			\
-} while (0)
 
 void
 I915DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
@@ -261,14 +200,9 @@ I915DisplayVideoTextured(ScrnInfoPtr pSc
    ADVANCE_LP_RING();
 
    if (!planar) {
-      BEGIN_LP_RING(20);
-      /* fragment program - texture blend replace. */
-      OUT_RING(_3DSTATE_PIXEL_SHADER_PROGRAM | 8);
-      OUT_DCL(S, 0);
-      OUT_DCL(T, 0);
-      OUT_TEXLD(OC, 0, 0, T, 0);
-      /* End fragment program */
+      FS_LOCALS(3);
 
+      BEGIN_LP_RING(10);
       OUT_RING(_3DSTATE_SAMPLER_STATE | 3);
       OUT_RING(0x00000001);
       OUT_RING(SS2_COLORSPACE_CONVERSION |
@@ -297,8 +231,16 @@ I915DisplayVideoTextured(ScrnInfoPtr pSc
       OUT_RING(ms3);
       OUT_RING(((video_pitch / 4) - 1) << 21);
       ADVANCE_LP_RING();
+
+      FS_BEGIN();
+      i915_fs_dcl(FS_S0);
+      i915_fs_dcl(FS_T0);
+      i915_fs_texld(FS_OC, FS_S0, FS_T0);
+      FS_END();
    } else {
-      BEGIN_LP_RING(1 + 18 + (1 + 3*16) + 11 + 11);
+      FS_LOCALS(16);
+
+      BEGIN_LP_RING(1 + 18 + 11 + 11);
       OUT_RING(MI_NOOP);
       /* For the planar formats, we set up three samplers -- one for each plane,
        * in a Y8 format.  Because I couldn't get the special PLANAR_TO_PACKED
@@ -342,61 +284,6 @@ I915DisplayVideoTextured(ScrnInfoPtr pSc
       OUT_RING_F(0.0);
       OUT_RING_F(0.0);
 
-      OUT_RING(_3DSTATE_PIXEL_SHADER_PROGRAM | (3 * 16 - 1));
-      /* Declare samplers */
-      OUT_DCL(S, 0);
-      OUT_DCL(S, 1);
-      OUT_DCL(S, 2);
-      OUT_DCL(T, 0);
-      OUT_DCL(T, 1);
-
-      /* Load samplers to temporaries.  Y (sampler 0) gets the un-halved coords
-       * from t1.
-       */
-      OUT_TEXLD(R, 1, 0, T, 1);
-      OUT_TEXLD(R, 2, 1, T, 0);
-      OUT_TEXLD(R, 3, 2, T, 0);
-
-      /* Move the sampled YUV data in R[123] to the first 3 channels of R0. */
-      OUT_MOV_TO_CHANNEL(R, 0, R, 1, X);
-      OUT_MOV_TO_CHANNEL(R, 0, R, 2, Y);
-      OUT_MOV_TO_CHANNEL(R, 0, R, 3, Z);
-
-      /* Normalize the YUV data */
-      OUT_RING(A0_ADD | A0_DEST_CHANNEL_ALL |
-	       (REG_TYPE_R << A0_DEST_TYPE_SHIFT) | (0 << A0_DEST_NR_SHIFT) |				\
-	       (REG_TYPE_R << A0_SRC0_TYPE_SHIFT) | (0 << A0_SRC0_NR_SHIFT));
-      OUT_RING((SRC_X << A1_SRC0_CHANNEL_X_SHIFT) |
-	       (SRC_Y << A1_SRC0_CHANNEL_Y_SHIFT) |
-	       (SRC_Z << A1_SRC0_CHANNEL_Z_SHIFT) |
-	       (SRC_W << A1_SRC0_CHANNEL_W_SHIFT) |
-	       (REG_TYPE_CONST << A1_SRC1_TYPE_SHIFT) | (0 << A1_SRC1_NR_SHIFT) |
-	       (SRC_X << A1_SRC1_CHANNEL_X_SHIFT) |
-	       (SRC_Y << A1_SRC1_CHANNEL_Y_SHIFT));
-      OUT_RING((SRC_Z << A2_SRC1_CHANNEL_Z_SHIFT) |
-	       (SRC_W << A2_SRC1_CHANNEL_W_SHIFT));
-
-      /* dot-product the YUV data in R0 by the vectors of coefficients for
-       * calculating R, G, and B, storing the results in the R, G, or B channels
-       * of the output color.
-       */
-      OUT_DP3_TO_CHANNEL(OC, 0, R, 0, CONST, 1, X);
-      OUT_DP3_TO_CHANNEL(OC, 0, R, 0, CONST, 2, Y);
-      OUT_DP3_TO_CHANNEL(OC, 0, R, 0, CONST, 3, Z);
-
-      /* Set alpha of the output to 1.0, by wiring W to 1 and not actually using
-       * the source.
-       */
-      OUT_RING(A0_MOV | A0_DEST_CHANNEL_W |
-	       (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | (0 << A0_DEST_NR_SHIFT) |
-	       (REG_TYPE_OC << A0_SRC0_TYPE_SHIFT) | (0 << A0_SRC0_NR_SHIFT));
-      OUT_RING((SRC_X << A1_SRC0_CHANNEL_X_SHIFT) |
-	       (SRC_Y << A1_SRC0_CHANNEL_Y_SHIFT) |
-	       (SRC_Z << A1_SRC0_CHANNEL_Z_SHIFT) |
-	       (SRC_ONE << A1_SRC0_CHANNEL_W_SHIFT));
-      OUT_RING(0);
-      /* End fragment program */
-
       OUT_RING(_3DSTATE_SAMPLER_STATE | 9);
       OUT_RING(0x00000007);
       /* sampler 0 */
@@ -442,6 +329,48 @@ I915DisplayVideoTextured(ScrnInfoPtr pSc
       OUT_RING(ms3);
       OUT_RING(((video_pitch / 4) - 1) << 21);
       ADVANCE_LP_RING();
+
+      FS_BEGIN();
+      /* Declare samplers */
+      i915_fs_dcl(FS_S0);
+      i915_fs_dcl(FS_S1);
+      i915_fs_dcl(FS_S2);
+      i915_fs_dcl(FS_T0);
+      i915_fs_dcl(FS_T1);
+
+      /* Load samplers to temporaries.  Y (sampler 0) gets the un-halved coords-
+       * from t1.
+       */
+      i915_fs_texld(FS_R1, FS_S0, FS_T1);
+      i915_fs_texld(FS_R2, FS_S1, FS_T0);
+      i915_fs_texld(FS_R3, FS_S2, FS_T0);
+
+      /* Move the sampled YUV data in R[123] to the first 3 channels of R0. */
+      i915_fs_mov_masked(FS_R0, MASK_X, i915_fs_operand_reg(FS_R1));
+      i915_fs_mov_masked(FS_R0, MASK_Y, i915_fs_operand_reg(FS_R2));
+      i915_fs_mov_masked(FS_R0, MASK_Z, i915_fs_operand_reg(FS_R3));
+
+      /* Normalize the YUV data */
+      i915_fs_add(FS_R0, i915_fs_operand_reg(FS_R0),
+                 i915_fs_operand_reg(FS_C0));
+      /* dot-product the YUV data in R0 by the vectors of coefficients for
+       * calculating R, G, and B, storing the results in the R, G, or B channels
+       * of the output color.
+       */
+      i915_fs_dp3_masked(FS_OC, MASK_X | MASK_SATURATE,
+                        i915_fs_operand_reg(FS_R0),
+-                        i915_fs_operand_reg(FS_C1));
+      i915_fs_dp3_masked(FS_OC, MASK_Y | MASK_SATURATE,
+                        i915_fs_operand_reg(FS_R0),
+                        i915_fs_operand_reg(FS_C2));
+      i915_fs_dp3_masked(FS_OC, MASK_Z | MASK_SATURATE,
+                        i915_fs_operand_reg(FS_R0),
+                        i915_fs_operand_reg(FS_C3));
+      /* Set alpha of the output to 1.0, by wiring W to 1 and not actually using
+       * the source.
+       */
+      i915_fs_mov_masked(FS_OC, MASK_W, i915_fs_operand_one());
+      FS_END();
    }
    
    {
diff-tree bb81e8d6c777a5e16b8193c07667fbee8e21203e (from parents)
Merge: 2a1b3cfccb7de53f7ce8f9e4816e4278afb1fcab 84805167ab8a422966355b9753bfcb4dad802413
Author: Eric Anholt <anholt at FreeBSD.org>
Date:   Tue Jul 18 19:23:21 2006 -0400

    Merge branch 'master' into textured-video
    
    This moves the i915 textured video implementation into i915_video.c to avoid
    conflicts in register definitions with i830_reg.h when we use i915_reg.h.
    This also means that i810_reg.h's i915 3D regs definitions are removed and
    replaced with i915_reg.h usage.
    
    Conflicts:
    
    	src/i830_rotate.c

diff --cc src/Makefile.am
index c64c203,f97dc52..bf4ded4
@@@ -53,8 -53,13 +53,15 @@@
           i830_memory.c \
           i830_modes.c \
           i830_video.c \
++         i830_video.h \
           i830_rotate.c \
- 	 i830_randr.c
+ 	 i830_randr.c \
+ 	 i830_3d.c \
+ 	 i830_reg.h \
+ 	 i915_3d.c \
+ 	 i915_3d.h \
 -	 i915_reg.h
++	 i915_reg.h \
++	 i915_video.c
  
  if DRI
  i810_drv_la_SOURCES += \
diff --cc src/i810_reg.h
index 0ed7ff6,e52375f..2c5e271
@@@ -869,624 -866,15 +866,12 @@@
  #define XY_MONO_SRC_BLT_WRITE_ALPHA	(1<<21)
  #define XY_MONO_SRC_BLT_WRITE_RGB	(1<<20)
  
--/* 3d state */
- #define STATE3D_ANTI_ALIASING		(CMD_3D | (0x06<<24))
- #define LINE_CAP_WIDTH_MODIFY		(1 << 16)
- #define LINE_CAP_WIDTH_1_0		(0x1 << 14)
- #define LINE_WIDTH_MODIFY		(1 << 8)
- #define LINE_WIDTH_1_0			(0x1 << 6)
- 
- #define STATE3D_RASTERIZATION_RULES	(CMD_3D | (0x07<<24))
- #define ENABLE_POINT_RASTER_RULE	(1<<15)
- #define OGL_POINT_RASTER_RULE		(1<<13)
- #define ENABLE_TEXKILL_3D_4D            (1<<10)
- #define TEXKILL_3D                      (0<<9)
- #define TEXKILL_4D                      (1<<9)
- #define ENABLE_LINE_STRIP_PROVOKE_VRTX	(1<<8)
- #define ENABLE_TRI_FAN_PROVOKE_VRTX	(1<<5)
- #define LINE_STRIP_PROVOKE_VRTX(x)	((x)<<6)
- #define TRI_FAN_PROVOKE_VRTX(x) 	((x)<<3)
- 
- #define STATE3D_INDEPENDENT_ALPHA_BLEND	(CMD_3D | (0x0b<<24))
- #define IAB_MODIFY_ENABLE	        (1<<23)
- #define IAB_ENABLE       	        (1<<22)
- #define IAB_MODIFY_FUNC         	(1<<21)
- #define IAB_FUNC_SHIFT          	16
- #define IAB_MODIFY_SRC_FACTOR   	(1<<11)
- #define IAB_SRC_FACTOR_SHIFT		6
- #define IAB_SRC_FACTOR_MASK		(BLENDFACT_MASK<<6)
- #define IAB_MODIFY_DST_FACTOR	        (1<<5)
- #define IAB_DST_FACTOR_SHIFT		0
- #define IAB_DST_FACTOR_MASK		(BLENDFACT_MASK<<0)
- 
- #define BLENDFUNC_ADD			0x0
- #define BLENDFUNC_SUBTRACT		0x1
- #define BLENDFUNC_REVERSE_SUBTRACT	0x2
- #define BLENDFUNC_MIN			0x3
- #define BLENDFUNC_MAX			0x4
- #define BLENDFUNC_MASK			0x7
- 
- #define BLENDFACT_ZERO			0x01
- #define BLENDFACT_ONE			0x02
- #define BLENDFACT_SRC_COLR		0x03
- #define BLENDFACT_INV_SRC_COLR 		0x04
- #define BLENDFACT_SRC_ALPHA		0x05
- #define BLENDFACT_INV_SRC_ALPHA 	0x06
- #define BLENDFACT_DST_ALPHA		0x07
- #define BLENDFACT_INV_DST_ALPHA 	0x08
- #define BLENDFACT_DST_COLR		0x09
- #define BLENDFACT_INV_DST_COLR		0x0a
- #define BLENDFACT_SRC_ALPHA_SATURATE	0x0b
- #define BLENDFACT_CONST_COLOR		0x0c
- #define BLENDFACT_INV_CONST_COLOR	0x0d
- #define BLENDFACT_CONST_ALPHA		0x0e
- #define BLENDFACT_INV_CONST_ALPHA	0x0f
- #define BLENDFACT_MASK          	0x0f
- 
- #define STATE3D_MODES_4			(CMD_3D | (0x0d<<24))
- #define ENABLE_LOGIC_OP_FUNC		(1<<23)
- #define LOGIC_OP_FUNC(x)		((x)<<18)
- #define LOGICOP_MASK			(0xf<<18)
- #define MODE4_ENABLE_STENCIL_TEST_MASK	((1<<17)|(0xff00))
- #define ENABLE_STENCIL_TEST_MASK	(1<<17)
- #define STENCIL_TEST_MASK(x)		((x)<<8)
- #define MODE4_ENABLE_STENCIL_WRITE_MASK	((1<<16)|(0x00ff))
- #define ENABLE_STENCIL_WRITE_MASK	(1<<16)
- #define STENCIL_WRITE_MASK(x)		((x)&0xff)
- 
- #define LOGICOP_CLEAR			0
- #define LOGICOP_NOR			0x1
- #define LOGICOP_AND_INV 		0x2
- #define LOGICOP_COPY_INV		0x3
- #define LOGICOP_AND_RVRSE		0x4
- #define LOGICOP_INV			0x5
- #define LOGICOP_XOR			0x6
- #define LOGICOP_NAND			0x7
- #define LOGICOP_AND			0x8
- #define LOGICOP_EQUIV			0x9
- #define LOGICOP_NOOP			0xa
- #define LOGICOP_OR_INV			0xb
- #define LOGICOP_COPY			0xc
- #define LOGICOP_OR_RVRSE		0xd
- #define LOGICOP_OR			0xe
- #define LOGICOP_SET			0xf
- 
- #define STATE3D_COORD_SET_BINDINGS	(CMD_3D | (0x16<<24))
- #define CSB_TCB(iunit,eunit)		((eunit) << ((iunit) * 3))
- 
- #define STATE3D_SCISSOR_ENABLE		(CMD_3D | (0x1c<<24)|(0x10<<19))
- #define ENABLE_SCISSOR_RECT		((1<<1) | 1)
- #define DISABLE_SCISSOR_RECT		((1<<1) | 0)
- 
- #define STATE3D_MAP_STATE		(CMD_3D | (0x1d<<24)|(0x00<<16))
- 
- #define MS1_MAPMASK_SHIFT               0
- #define MS1_MAPMASK_MASK                (0x8fff<<0)
- 
- #define MS2_UNTRUSTED_SURFACE           (1<<31)
- #define MS2_ADDRESS_MASK                0xfffffffc
- #define MS2_VERTICAL_LINE_STRIDE        (1<<1)
- #define MS2_VERTICAL_OFFSET             (1<<1)
- 
- #define MS3_HEIGHT_SHIFT              21
- #define MS3_WIDTH_SHIFT               10
- #define MS3_PALETTE_SELECT            (1<<9)
- #define MS3_MAPSURF_FORMAT_SHIFT      7
- #define MS3_MAPSURF_FORMAT_MASK       (0x7<<7)
- #define    MAPSURF_8BIT		 	   (1<<7)
- #define    MAPSURF_16BIT		   (2<<7)
- #define    MAPSURF_32BIT		   (3<<7)
- #define    MAPSURF_422			   (5<<7)
- #define    MAPSURF_COMPRESSED		   (6<<7)
- #define    MAPSURF_4BIT_INDEXED		   (7<<7)
- #define MS3_MT_FORMAT_MASK         (0x7 << 3)
- #define MS3_MT_FORMAT_SHIFT        3
- #define    MT_4BIT_IDX_ARGB8888	           (7<<3) /* SURFACE_4BIT_INDEXED */
- #define    MT_8BIT_I8		           (0<<3) /* SURFACE_8BIT */
- #define    MT_8BIT_L8		           (1<<3)
- #define    MT_8BIT_A8		           (4<<3)
- #define    MT_8BIT_MONO8	           (5<<3)
- #define    MT_16BIT_RGB565 		   (0<<3) /* SURFACE_16BIT */
- #define    MT_16BIT_ARGB1555		   (1<<3)
- #define    MT_16BIT_ARGB4444		   (2<<3)
- #define    MT_16BIT_AY88		   (3<<3)
- #define    MT_16BIT_88DVDU	           (5<<3)
- #define    MT_16BIT_BUMP_655LDVDU	   (6<<3)
- #define    MT_16BIT_I16	                   (7<<3)
- #define    MT_16BIT_L16	                   (8<<3)
- #define    MT_16BIT_A16	                   (9<<3)
- #define    MT_32BIT_ARGB8888		   (0<<3) /* SURFACE_32BIT */
- #define    MT_32BIT_ABGR8888		   (1<<3)
- #define    MT_32BIT_XRGB8888		   (2<<3)
- #define    MT_32BIT_XBGR8888		   (3<<3)
- #define    MT_32BIT_QWVU8888		   (4<<3)
- #define    MT_32BIT_AXVU8888		   (5<<3)
- #define    MT_32BIT_LXVU8888	           (6<<3)
- #define    MT_32BIT_XLVU8888	           (7<<3)
- #define    MT_32BIT_ARGB2101010	           (8<<3)
- #define    MT_32BIT_ABGR2101010	           (9<<3)
- #define    MT_32BIT_AWVU2101010	           (0xA<<3)
- #define    MT_32BIT_GR1616	           (0xB<<3)
- #define    MT_32BIT_VU1616	           (0xC<<3)
- #define    MT_32BIT_xI824	           (0xD<<3)
- #define    MT_32BIT_xA824	           (0xE<<3)
- #define    MT_32BIT_xL824	           (0xF<<3)
- #define    MT_422_YCRCB_SWAPY	           (0<<3) /* SURFACE_422 */
- #define    MT_422_YCRCB_NORMAL	           (1<<3)
- #define    MT_422_YCRCB_SWAPUV	           (2<<3)
- #define    MT_422_YCRCB_SWAPUVY	           (3<<3)
- #define    MT_COMPRESS_DXT1		   (0<<3) /* SURFACE_COMPRESSED */
- #define    MT_COMPRESS_DXT2_3	           (1<<3)
- #define    MT_COMPRESS_DXT4_5	           (2<<3)
- #define    MT_COMPRESS_FXT1		   (3<<3)
- #define    MT_COMPRESS_DXT1_RGB		   (4<<3)
- #define MS3_USE_FENCE_REGS              (1<<2)
- #define MS3_TILED_SURFACE             (1<<1)
- #define MS3_TILE_WALK                 (1<<0)
- 
- #define MS4_PITCH_SHIFT                 21
- #define MS4_CUBE_FACE_ENA_NEGX          (1<<20)
- #define MS4_CUBE_FACE_ENA_POSX          (1<<19)
- #define MS4_CUBE_FACE_ENA_NEGY          (1<<18)
- #define MS4_CUBE_FACE_ENA_POSY          (1<<17)
- #define MS4_CUBE_FACE_ENA_NEGZ          (1<<16)
- #define MS4_CUBE_FACE_ENA_POSZ          (1<<15)
- #define MS4_CUBE_FACE_ENA_MASK          (0x3f<<15)
- #define MS4_MAX_LOD_SHIFT		9
- #define MS4_MAX_LOD_MASK		(0x3f<<9)
- #define MS4_MIP_LAYOUT_LEGACY           (0<<8)
- #define MS4_MIP_LAYOUT_BELOW_LPT        (0<<8)
- #define MS4_MIP_LAYOUT_RIGHT_LPT        (1<<8)
- #define MS4_VOLUME_DEPTH_SHIFT          0    
- #define MS4_VOLUME_DEPTH_MASK           (0xff<<0)
- 
- #define STATE3D_SAMPLER_STATE		(CMD_3D | (0x1d<<24)|(0x01<<16))
- 
- #define SS1_MAPMASK_SHIFT               0
- #define SS1_MAPMASK_MASK                (0x8fff<<0)
- 
- #define SS2_REVERSE_GAMMA_ENABLE        (1<<31)
- #define SS2_PLANAR_TO_PACKED_ENABLE     (1<<30)
- #define SS2_COLORSPACE_CONVERSION       (1<<29)
- #define SS2_CHROMAKEY_SHIFT             27
- #define SS2_BASE_MIP_LEVEL_SHIFT        22
- #define SS2_BASE_MIP_LEVEL_MASK         (0x1f<<22)
- #define SS2_MIP_FILTER_SHIFT            20
- #define SS2_MIP_FILTER_MASK             (0x3<<20)
- #define   MIPFILTER_NONE       	0
- #define   MIPFILTER_NEAREST	1
- #define   MIPFILTER_LINEAR	3
- #define SS2_MAG_FILTER_SHIFT          17
- #define SS2_MAG_FILTER_MASK           (0x7<<17)
- #define   FILTER_NEAREST	0
- #define   FILTER_LINEAR		1
- #define   FILTER_ANISOTROPIC	2
- #define   FILTER_4X4_1    	3
- #define   FILTER_4X4_2    	4
- #define   FILTER_4X4_FLAT 	5
- #define   FILTER_6X5_MONO   	6 /* XXX - check */
- #define SS2_MIN_FILTER_SHIFT          14
- #define SS2_MIN_FILTER_MASK           (0x7<<14)
- #define SS2_LOD_BIAS_SHIFT            5
- #define SS2_LOD_BIAS_ONE              (0x10<<5)
- #define SS2_LOD_BIAS_MASK             (0x1ff<<5)
- /* Shadow requires:
-  *  MT_X8{I,L,A}24 or MT_{I,L,A}16 texture format
-  *  FILTER_4X4_x  MIN and MAG filters
-  */
- #define SS2_SHADOW_ENABLE             (1<<4)
- #define SS2_MAX_ANISO_MASK            (1<<3)
- #define SS2_MAX_ANISO_2               (0<<3)
- #define SS2_MAX_ANISO_4               (1<<3)
- #define SS2_SHADOW_FUNC_SHIFT         0
- #define SS2_SHADOW_FUNC_MASK          (0x7<<0)
- /* SS2_SHADOW_FUNC values: see COMPAREFUNC_* */
- 
- #define SS3_MIN_LOD_SHIFT            24
- #define SS3_MIN_LOD_ONE              (0x10<<24)
- #define SS3_MIN_LOD_MASK             (0xff<<24)
- #define SS3_KILL_PIXEL_ENABLE        (1<<17)
- #define SS3_TCX_ADDR_MODE_SHIFT      12
- #define SS3_TCX_ADDR_MODE_MASK       (0x7<<12)
- #define   TEXCOORDMODE_WRAP		0
- #define   TEXCOORDMODE_MIRROR		1
- #define   TEXCOORDMODE_CLAMP_EDGE	2
- #define   TEXCOORDMODE_CUBE       	3
- #define   TEXCOORDMODE_CLAMP_BORDER	4
- #define   TEXCOORDMODE_MIRROR_ONCE      5
- #define SS3_TCY_ADDR_MODE_SHIFT      9
- #define SS3_TCY_ADDR_MODE_MASK       (0x7<<9)
- #define SS3_TCZ_ADDR_MODE_SHIFT      6
- #define SS3_TCZ_ADDR_MODE_MASK       (0x7<<6)
- #define SS3_NORMALIZED_COORDS        (1<<5)
- #define SS3_TEXTUREMAP_INDEX_SHIFT   1
- #define SS3_TEXTUREMAP_INDEX_MASK    (0xf<<1)
- #define SS3_DEINTERLACER_ENABLE      (1<<0)
- 
- #define SS4_BORDER_COLOR_MASK        (~0)
- 
- #define STATE3D_LOAD_STATE_IMMEDIATE_1	(CMD_3D | (0x1d<<24)|(0x04<<16))
- #define I1_LOAD_S(n)				(1 << (4 + n))
- 
- #define S0_VB_OFFSET_MASK              0xffffffc
- #define S0_AUTO_CACHE_INV_DISABLE      (1<<0)
- 
- #define S1_VERTEX_WIDTH_SHIFT          24
- #define S1_VERTEX_WIDTH_MASK           (0x3f<<24)
- #define S1_VERTEX_PITCH_SHIFT          16
- #define S1_VERTEX_PITCH_MASK           (0x3f<<16)
- 
- #define TEXCOORDFMT_2D                 0x0
- #define TEXCOORDFMT_3D                 0x1
- #define TEXCOORDFMT_4D                 0x2
- #define TEXCOORDFMT_1D                 0x3
- #define TEXCOORDFMT_2D_16              0x4
- #define TEXCOORDFMT_4D_16              0x5
- #define TEXCOORDFMT_NOT_PRESENT        0xf
- #define S2_TEXCOORD_FMT0_MASK            0xf
- #define S2_TEXCOORD_FMT1_SHIFT           4
- #define S2_TEXCOORD_FMT(unit, type)    ((type)<<(unit*4))
- #define S2_TEXCOORD_NONE               (~0)
- 
- /* S3 not interesting */
- 
- #define S4_POINT_WIDTH_SHIFT           23
- #define S4_POINT_WIDTH_MASK            (0x1ff<<23)
- #define S4_LINE_WIDTH_SHIFT            19
- #define S4_LINE_WIDTH_ONE              (0x2<<19)
- #define S4_LINE_WIDTH_MASK             (0xf<<19)
- #define S4_FLATSHADE_ALPHA             (1<<18)
- #define S4_FLATSHADE_FOG               (1<<17)
- #define S4_FLATSHADE_SPECULAR          (1<<16)
- #define S4_FLATSHADE_COLOR             (1<<15)
- #define S4_CULLMODE_BOTH	       (0<<13)
- #define S4_CULLMODE_NONE	       (1<<13)
- #define S4_CULLMODE_CW		       (2<<13)
- #define S4_CULLMODE_CCW		       (3<<13)
- #define S4_CULLMODE_MASK	       (3<<13)
- #define S4_VFMT_POINT_WIDTH            (1<<12)
- #define S4_VFMT_SPEC_FOG               (1<<11)
- #define S4_VFMT_COLOR                  (1<<10)
- #define S4_VFMT_DEPTH_OFFSET           (1<<9)
- #define S4_VFMT_XYZ     	       (1<<6)
- #define S4_VFMT_XYZW     	       (2<<6)
- #define S4_VFMT_XY     		       (3<<6)
- #define S4_VFMT_XYW     	       (4<<6)
- #define S4_VFMT_XYZW_MASK              (7<<6)
- #define S4_FORCE_DEFAULT_DIFFUSE       (1<<5)
- #define S4_FORCE_DEFAULT_SPECULAR      (1<<4)
- #define S4_LOCAL_DEPTH_OFFSET_ENABLE   (1<<3)
- #define S4_VFMT_FOG_PARAM              (1<<2)
- #define S4_SPRITE_POINT_ENABLE         (1<<1)
- #define S4_LINE_ANTIALIAS_ENABLE       (1<<0)
- 
- #define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH   | 	\
- 		      S4_VFMT_SPEC_FOG      |	\
- 		      S4_VFMT_COLOR         |	\
- 		      S4_VFMT_DEPTH_OFFSET  |	\
- 		      S4_VFMT_XYZW_MASK     |	\
- 		      S4_VFMT_FOG_PARAM)
- 
- 
- #define S5_WRITEDISABLE_ALPHA          (1<<31)
- #define S5_WRITEDISABLE_RED            (1<<30)
- #define S5_WRITEDISABLE_GREEN          (1<<29)
- #define S5_WRITEDISABLE_BLUE           (1<<28)
- #define S5_WRITEDISABLE_MASK           (0xf<<28)
- #define S5_FORCE_DEFAULT_POINT_SIZE    (1<<27)
- #define S5_LAST_PIXEL_ENABLE           (1<<26)
- #define S5_GLOBAL_DEPTH_OFFSET_ENABLE  (1<<25)
- #define S5_FOG_ENABLE                  (1<<24)
- #define S5_STENCIL_REF_SHIFT           16
- #define S5_STENCIL_REF_MASK            (0xff<<16)
- #define S5_STENCIL_TEST_FUNC_SHIFT     13
- #define S5_STENCIL_TEST_FUNC_MASK      (0x7<<13)
- #define S5_STENCIL_FAIL_SHIFT          10
- #define S5_STENCIL_FAIL_MASK           (0x7<<10)
- #define S5_STENCIL_PASS_Z_FAIL_SHIFT   7
- #define S5_STENCIL_PASS_Z_FAIL_MASK    (0x7<<7)
- #define S5_STENCIL_PASS_Z_PASS_SHIFT   4
- #define S5_STENCIL_PASS_Z_PASS_MASK    (0x7<<4)
- #define S5_STENCIL_WRITE_ENABLE        (1<<3)
- #define S5_STENCIL_TEST_ENABLE         (1<<2)
- #define S5_COLOR_DITHER_ENABLE         (1<<1)
- #define S5_LOGICOP_ENABLE              (1<<0)
- 
- 
- #define S6_ALPHA_TEST_ENABLE           (1<<31)
- #define S6_ALPHA_TEST_FUNC_SHIFT       28
- #define S6_ALPHA_TEST_FUNC_MASK        (0x7<<28)
- #define S6_ALPHA_REF_SHIFT             20
- #define S6_ALPHA_REF_MASK              (0xff<<20)
- #define S6_DEPTH_TEST_ENABLE           (1<<19)
- #define S6_DEPTH_TEST_FUNC_SHIFT       16
- #define S6_DEPTH_TEST_FUNC_MASK        (0x7<<16)
- #define S6_CBUF_BLEND_ENABLE           (1<<15)
- #define S6_CBUF_BLEND_FUNC_SHIFT       12
- #define S6_CBUF_BLEND_FUNC_MASK        (0x7<<12)
- #define S6_CBUF_SRC_BLEND_FACT_SHIFT   8
- #define S6_CBUF_SRC_BLEND_FACT_MASK    (0xf<<8)
- #define S6_CBUF_DST_BLEND_FACT_SHIFT   4
- #define S6_CBUF_DST_BLEND_FACT_MASK    (0xf<<4)
- #define S6_DEPTH_WRITE_ENABLE          (1<<3)
- #define S6_COLOR_WRITE_ENABLE          (1<<2)
- #define S6_TRISTRIP_PV_SHIFT           0
- #define S6_TRISTRIP_PV_MASK            (0x3<<0)
- 
- #define S7_DEPTH_OFFSET_CONST_MASK     ~0
- 
- #define STATE3D_PIXEL_SHADER_PROGRAM	(CMD_3D | (0x1d<<24)|(0x05<<16))
- 
- #define REG_TYPE_R                 0 /* temporary regs, no need to
- 				      * dcl, must be written before
- 				      * read -- Preserved between
- 				      * phases. 
- 				      */
- #define REG_TYPE_T                 1 /* Interpolated values, must be
- 				      * dcl'ed before use.
- 				      *
- 				      * 0..7: texture coord,
- 				      * 8: diffuse spec,
- 				      * 9: specular color,
- 				      * 10: fog parameter in w.
- 				      */
- #define REG_TYPE_CONST             2 /* Restriction: only one const
- 				      * can be referenced per
- 				      * instruction, though it may be
- 				      * selected for multiple inputs.
- 				      * Constants not initialized
- 				      * default to zero.
- 				      */
- #define REG_TYPE_S                 3 /* sampler */
- #define REG_TYPE_OC                4 /* output color (rgba) */
- #define REG_TYPE_OD                5 /* output depth (w), xyz are
- 				      * temporaries.  If not written,
- 				      * interpolated depth is used?
- 				      */
- #define REG_TYPE_U                 6 /* unpreserved temporaries */
- #define REG_TYPE_MASK              0x7
- #define REG_NR_MASK                0xf
- 
- 
- /* REG_TYPE_T:
-  */
- #define T_TEX0     0
- #define T_TEX1     1
- #define T_TEX2     2
- #define T_TEX3     3
- #define T_TEX4     4
- #define T_TEX5     5
- #define T_TEX6     6
- #define T_TEX7     7
- #define T_DIFFUSE  8
- #define T_SPECULAR 9
- #define T_FOG_W    10		/* interpolated fog is in W coord */
- 
- /* Arithmetic instructions */
- 
- /* .replicate_swizzle == selection and replication of a particular
-  * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww 
-  */
- #define A0_NOP    (0x0<<24)		/* no operation */
- #define A0_ADD    (0x1<<24)		/* dst = src0 + src1 */
- #define A0_MOV    (0x2<<24)		/* dst = src0 */
- #define A0_MUL    (0x3<<24)		/* dst = src0 * src1 */
- #define A0_MAD    (0x4<<24)		/* dst = src0 * src1 + src2 */
- #define A0_DP2ADD (0x5<<24)		/* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */
- #define A0_DP3    (0x6<<24)		/* dst.xyzw = src0.xyz dot src1.xyz */
- #define A0_DP4    (0x7<<24)		/* dst.xyzw = src0.xyzw dot src1.xyzw */
- #define A0_FRC    (0x8<<24)		/* dst = src0 - floor(src0) */
- #define A0_RCP    (0x9<<24)		/* dst.xyzw = 1/(src0.replicate_swizzle) */
- #define A0_RSQ    (0xa<<24)		/* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */
- #define A0_EXP    (0xb<<24)		/* dst.xyzw = exp2(src0.replicate_swizzle) */
- #define A0_LOG    (0xc<<24)		/* dst.xyzw = log2(abs(src0.replicate_swizzle)) */
- #define A0_CMP    (0xd<<24)		/* dst = (src0 >= 0.0) ? src1 : src2 */
- #define A0_MIN    (0xe<<24)		/* dst = (src0 < src1) ? src0 : src1 */
- #define A0_MAX    (0xf<<24)		/* dst = (src0 >= src1) ? src0 : src1 */
- #define A0_FLR    (0x10<<24)		/* dst = floor(src0) */
- #define A0_MOD    (0x11<<24)		/* dst = src0 fmod 1.0 */
- #define A0_TRC    (0x12<<24)		/* dst = int(src0) */
- #define A0_SGE    (0x13<<24)		/* dst = src0 >= src1 ? 1.0 : 0.0 */
- #define A0_SLT    (0x14<<24)		/* dst = src0 < src1 ? 1.0 : 0.0 */
- #define A0_DEST_SATURATE                 (1<<22)
- #define A0_DEST_TYPE_SHIFT                19
- /* Allow: R, OC, OD, U */
- #define A0_DEST_NR_SHIFT                 14
- /* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
- #define A0_DEST_CHANNEL_X                (1<<10)
- #define A0_DEST_CHANNEL_Y                (2<<10)
- #define A0_DEST_CHANNEL_Z                (4<<10)
- #define A0_DEST_CHANNEL_W                (8<<10)
- #define A0_DEST_CHANNEL_ALL              (0xf<<10)
- #define A0_DEST_CHANNEL_SHIFT            10
- #define A0_SRC0_TYPE_SHIFT               7
- #define A0_SRC0_NR_SHIFT                 2
- 
- #define A0_DEST_CHANNEL_XY              (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y)
- #define A0_DEST_CHANNEL_XYZ             (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z)
- 
- 
- #define SRC_X        0
- #define SRC_Y        1
- #define SRC_Z        2
- #define SRC_W        3
- #define SRC_ZERO     4
- #define SRC_ONE      5
- 
- #define A1_SRC0_CHANNEL_X_NEGATE         (1<<31)
- #define A1_SRC0_CHANNEL_X_SHIFT          28
- #define A1_SRC0_CHANNEL_Y_NEGATE         (1<<27)
- #define A1_SRC0_CHANNEL_Y_SHIFT          24
- #define A1_SRC0_CHANNEL_Z_NEGATE         (1<<23)
- #define A1_SRC0_CHANNEL_Z_SHIFT          20
- #define A1_SRC0_CHANNEL_W_NEGATE         (1<<19)
- #define A1_SRC0_CHANNEL_W_SHIFT          16
- #define A1_SRC1_TYPE_SHIFT               13
- #define A1_SRC1_NR_SHIFT                 8
- #define A1_SRC1_CHANNEL_X_NEGATE         (1<<7)
- #define A1_SRC1_CHANNEL_X_SHIFT          4
- #define A1_SRC1_CHANNEL_Y_NEGATE         (1<<3)
- #define A1_SRC1_CHANNEL_Y_SHIFT          0
- 
- #define A2_SRC1_CHANNEL_Z_NEGATE         (1<<31)
- #define A2_SRC1_CHANNEL_Z_SHIFT          28
- #define A2_SRC1_CHANNEL_W_NEGATE         (1<<27)
- #define A2_SRC1_CHANNEL_W_SHIFT          24
- #define A2_SRC2_TYPE_SHIFT               21
- #define A2_SRC2_NR_SHIFT                 16
- #define A2_SRC2_CHANNEL_X_NEGATE         (1<<15)
- #define A2_SRC2_CHANNEL_X_SHIFT          12
- #define A2_SRC2_CHANNEL_Y_NEGATE         (1<<11)
- #define A2_SRC2_CHANNEL_Y_SHIFT          8
- #define A2_SRC2_CHANNEL_Z_NEGATE         (1<<7)
- #define A2_SRC2_CHANNEL_Z_SHIFT          4
- #define A2_SRC2_CHANNEL_W_NEGATE         (1<<3)
- #define A2_SRC2_CHANNEL_W_SHIFT          0
- 
- 
- 
- /* Texture instructions */
- #define T0_TEXLD     (0x15<<24)	/* Sample texture using predeclared
- 				 * sampler and address, and output
- 				 * filtered texel data to destination
- 				 * register */
- #define T0_TEXLDP    (0x16<<24)	/* Same as texld but performs a
- 				 * perspective divide of the texture
- 				 * coordinate .xyz values by .w before
- 				 * sampling. */
- #define T0_TEXLDB    (0x17<<24)	/* Same as texld but biases the
- 				 * computed LOD by w.  Only S4.6 two's
- 				 * comp is used.  This implies that a
- 				 * float to fixed conversion is
- 				 * done. */
- #define T0_TEXKILL   (0x18<<24)	/* Does not perform a sampling
- 				 * operation.  Simply kills the pixel
- 				 * if any channel of the address
- 				 * register is < 0.0. */
- #define T0_DEST_TYPE_SHIFT                19
- /* Allow: R, OC, OD, U */
- /* Note: U (unpreserved) regs do not retain their values between
-  * phases (cannot be used for feedback) 
-  *
-  * Note: oC and OD registers can only be used as the destination of a
-  * texture instruction once per phase (this is an implementation
-  * restriction). 
-  */
- #define T0_DEST_NR_SHIFT                 14
- /* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
- #define T0_SAMPLER_NR_SHIFT              0 /* This field ignored for TEXKILL */
- #define T0_SAMPLER_NR_MASK               (0xf<<0)
- 
- #define T1_ADDRESS_REG_TYPE_SHIFT        24 /* Reg to use as texture coord */
- /* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */
- #define T1_ADDRESS_REG_NR_SHIFT          17
- #define T2_MBZ                           0
- 
- /* Declaration instructions */
- #define D0_DCL       (0x19<<24)	/* Declare a t (interpolated attrib)
- 				 * register or an s (sampler)
- 				 * register. */
- #define D0_SAMPLE_TYPE_SHIFT              22
- #define D0_SAMPLE_TYPE_2D                 (0x0<<22)
- #define D0_SAMPLE_TYPE_CUBE               (0x1<<22)
- #define D0_SAMPLE_TYPE_VOLUME             (0x2<<22)
- #define D0_SAMPLE_TYPE_MASK               (0x3<<22)
- 
- #define D0_TYPE_SHIFT                19
- /* Allow: T, S */
- #define D0_NR_SHIFT                  14
- /* Allow T: 0..10, S: 0..15 */
- #define D0_CHANNEL_X                (1<<10)
- #define D0_CHANNEL_Y                (2<<10)
- #define D0_CHANNEL_Z                (4<<10)
- #define D0_CHANNEL_W                (8<<10)
- #define D0_CHANNEL_ALL              (0xf<<10)
- #define D0_CHANNEL_NONE             (0<<10)
- 
- #define D0_CHANNEL_XY               (D0_CHANNEL_X|D0_CHANNEL_Y)
- #define D0_CHANNEL_XYZ              (D0_CHANNEL_XY|D0_CHANNEL_Z)
- /* End description of STATE3D_PIXEL_SHADER_PROGRAM */
- 
- #define STATE3D_PIXEL_SHADER_CONSTANTS	(CMD_3D | (0x1d<<24)|(0x06<<16))
- 
- #define STATE3D_DRAWING_RECTANGLE	(CMD_3D | (0x1d<<24)|(0x80<<16)|3)
- 
- #define STATE3D_SCISSOR_RECTANGLE	(CMD_3D | (0x1d<<24)|(0x81<<16)|1)
- 
- #define STATE3D_STIPPLE			(CMD_3D | (0x1d<<24)|(0x83<<16))
- #define ST1_ENABLE               (1<<16)
- #define ST1_MASK                 (0xffff)
- 
- #define STATE3D_DEST_BUFFER_VARIABLES	(CMD_3D | (0x1d<<24)|(0x85<<16))
- #define TEX_DEFAULT_COLOR_OGL           (0<<30)
- #define TEX_DEFAULT_COLOR_D3D           (1<<30)
- #define ZR_EARLY_DEPTH                  (1<<29)
- #define LOD_PRECLAMP_OGL                (1<<28)
- #define LOD_PRECLAMP_D3D                (0<<28)
- #define DITHER_FULL_ALWAYS              (0<<26)
- #define DITHER_FULL_ON_FB_BLEND         (1<<26)
- #define DITHER_CLAMPED_ALWAYS           (2<<26)
- #define LINEAR_GAMMA_BLEND_32BPP        (1<<25)
- #define DEBUG_DISABLE_ENH_DITHER        (1<<24)
- #define DSTORG_HORIZ_BIAS(x)		((x)<<20)
- #define DSTORG_VERT_BIAS(x)		((x)<<16)
- #define COLOR_4_2_2_CHNL_WRT_ALL	0
- #define COLOR_4_2_2_CHNL_WRT_Y		(1<<12)
- #define COLOR_4_2_2_CHNL_WRT_CR		(2<<12)
- #define COLOR_4_2_2_CHNL_WRT_CB		(3<<12)
- #define COLOR_4_2_2_CHNL_WRT_CRCB	(4<<12)
- #define COLR_BUF_8BIT			0
- #define COLR_BUF_RGB555 		(1<<8)
- #define COLR_BUF_RGB565 		(2<<8)
- #define COLR_BUF_ARGB8888		(3<<8)
- #define DEPTH_FRMT_16_FIXED		0
- #define DEPTH_FRMT_16_FLOAT		(1<<2)
- #define DEPTH_FRMT_24_FIXED_8_OTHER	(2<<2)
- #define VERT_LINE_STRIDE_1		(1<<1)
- #define VERT_LINE_STRIDE_0		(0<<1)
- #define VERT_LINE_STRIDE_OFS_1		1
- #define VERT_LINE_STRIDE_OFS_0		0
- 
- #define STATE3D_CONST_BLEND_COLOR	(CMD_3D | (0x1d<<24)|(0x88<<16))
- 
  #define STATE3D_FOG_MODE		((3<<29)|(0x1d<<24)|(0x89<<16)|2)
  #define FOG_MODE_VERTEX 		(1<<31)
--#define STATE3D_MAP_COORD_TRANSFORM	((3<<29)|(0x1d<<24)|(0x8c<<16))
- 
- #define STATE3D_BUFFER_INFO		(CMD_3D | (0x1d<<24)|(0x8e<<16)|1)
- #define BUFFERID_COLOR_BACK		(3 << 24)
- #define BUFFERID_COLOR_AUX		(4 << 24)
- #define BUFFERID_MC_INTRA_CORR		(5 << 24)
- #define BUFFERID_DEPTH			(7 << 24)
- #define BUFFER_USE_FENCES		(1 << 23)
- 
- #define STATE3D_DFLT_Z_CMD		(CMD_3D | (0x1d<<24)|(0x98<<16))
- 
- #define STATE3D_DFLT_DIFFUSE_CMD	(CMD_3D | (0x1d<<24)|(0x99<<16))
- 
- #define STATE3D_DFLT_SPEC_CMD		(CMD_3D | (0x1d<<24)|(0x9a<<16))
- 
- #define PRIMITIVE3D			(CMD_3D | (0x1f<<24))
- #define PRIM3D_INLINE		(0<<23)
- #define PRIM3D_INDIRECT		(1<<23)
- #define PRIM3D_TRILIST		(0x0<<18)
- #define PRIM3D_TRISTRIP 	(0x1<<18)
- #define PRIM3D_TRISTRIP_RVRSE	(0x2<<18)
- #define PRIM3D_TRIFAN		(0x3<<18)
- #define PRIM3D_POLY		(0x4<<18)
- #define PRIM3D_LINELIST 	(0x5<<18)
- #define PRIM3D_LINESTRIP	(0x6<<18)
- #define PRIM3D_RECTLIST 	(0x7<<18)
- #define PRIM3D_POINTLIST	(0x8<<18)
- #define PRIM3D_DIB		(0x9<<18)
- #define PRIM3D_CLEAR_RECT	(0xa<<18)
- #define PRIM3D_ZONE_INIT	(0xd<<18)
- #define PRIM3D_MASK		(0x1f<<18)
- 
 +
  #define DISABLE_TEX_TRANSFORM		(1<<28)
  #define TEXTURE_SET(x)			(x<<29)
 -#define STATE3D_RASTERIZATION_RULES	((3<<29)|(0x07<<24))
 -#define POINT_RASTER_ENABLE		(1<<15)
 -#define POINT_RASTER_OGL		(1<<13)
 +
  #define STATE3D_VERTEX_TRANSFORM	((3<<29)|(0x1d<<24)|(0x8b<<16))
  #define DISABLE_VIEWPORT_TRANSFORM	(1<<31)
  #define DISABLE_PERSPECTIVE_DIVIDE	(1<<29)
diff --cc src/i830_video.c
index 37dcaa7,a608a7e..044d6c1
@@@ -77,6 -77,6 +77,7 @@@
  #include "regionstr.h"
  #include "randrstr.h"
  #include "i830.h"
++#include "i830_video.h"
  #include "xf86xv.h"
  #include <X11/extensions/Xv.h>
  #include "xaa.h"
@@@ -369,46 -360,45 +370,6 @@@
     CARD16 RESERVEDG[0x100 / 2 - N_HORIZ_UV_TAPS * N_PHASES];
  } I830OverlayRegRec, *I830OverlayRegPtr;
  
--typedef struct {
--   CARD32 YBuf0offset;
--   CARD32 UBuf0offset;
--   CARD32 VBuf0offset;
--
--   CARD32 YBuf1offset;
--   CARD32 UBuf1offset;
--   CARD32 VBuf1offset;
--
--   unsigned char currentBuf;
--
--   int brightness;
--   int contrast;
--   int pipe;
--   int doubleBuffer;
--
--   RegionRec clip;
--   CARD32 colorKey;
--
--   CARD32 gamma0;
--   CARD32 gamma1;
--   CARD32 gamma2;
--   CARD32 gamma3;
--   CARD32 gamma4;
--   CARD32 gamma5;
--
--   CARD32 videoStatus;
--   Time offTime;
--   Time freeTime;
--   FBLinearPtr linear;
--
--   Bool overlayOK;
--   int oneLineMode;
--   int scaleRatio;
-    Bool textured;
--} I830PortPrivRec, *I830PortPrivPtr;
--
--#define GET_PORT_PRIVATE(pScrn) \
--   (I830PortPrivPtr)((I830PTR(pScrn))->adaptor->pPortPrivates[0].ptr)
--
  #if VIDEO_DEBUG
  static void
  CompareOverlay(I830Ptr pI830, CARD32 * overlay, int size)
diff --cc src/i830_video.h
index 0000000,0000000..9e11641
new file mode 100644
@@@ -1,0 -1,0 +1,76 @@@
++/***************************************************************************
++ 
++Copyright 2000 Intel Corporation.  All Rights Reserved. 
++
++Permission is hereby granted, free of charge, to any person obtaining a 
++copy of this software and associated documentation files (the 
++"Software"), to deal in the Software without restriction, including 
++without limitation the rights to use, copy, modify, merge, publish, 
++distribute, sub license, and/or sell copies of the Software, and to 
++permit persons to whom the Software is furnished to do so, subject to 
++the following conditions: 
++
++The above copyright notice and this permission notice (including the 
++next paragraph) shall be included in all copies or substantial portions 
++of the Software. 
++
++THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
++OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
++MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 
++IN NO EVENT SHALL INTEL, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 
++DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
++OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR 
++THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++
++**************************************************************************/
++
++#include "xf86.h"
++#include "xf86_OSproc.h"
++
++typedef struct {
++   CARD32 YBuf0offset;
++   CARD32 UBuf0offset;
++   CARD32 VBuf0offset;
++
++   CARD32 YBuf1offset;
++   CARD32 UBuf1offset;
++   CARD32 VBuf1offset;
++
++   unsigned char currentBuf;
++
++   int brightness;
++   int contrast;
++   int pipe;
++   int doubleBuffer;
++
++   RegionRec clip;
++   CARD32 colorKey;
++
++   CARD32 gamma0;
++   CARD32 gamma1;
++   CARD32 gamma2;
++   CARD32 gamma3;
++   CARD32 gamma4;
++   CARD32 gamma5;
++
++   CARD32 videoStatus;
++   Time offTime;
++   Time freeTime;
++   FBLinearPtr linear;
++
++   Bool overlayOK;
++   int oneLineMode;
++   int scaleRatio;
++   Bool textured;
++} I830PortPrivRec, *I830PortPrivPtr;
++
++#define GET_PORT_PRIVATE(pScrn) \
++   (I830PortPrivPtr)((I830PTR(pScrn))->adaptor->pPortPrivates[0].ptr)
++
++void I915DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv,
++			      int id, RegionPtr dstRegion, short width,
++			      short height, int video_pitch,
++			      int x1, int y1, int x2, int y2,
++			      short src_w, short src_h,
++			      short drw_w, short drw_h,
++			      DrawablePtr pDraw);
diff --cc src/i915_video.c
index 0000000,0000000..8d687a1
new file mode 100644
@@@ -1,0 -1,0 +1,538 @@@
++/*
++ * Copyright © 2006 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ *
++ * Authors:
++ *    Eric Anholt <eric at anholt.net>
++ *
++ */
++
++#ifdef HAVE_CONFIG_H
++#include "config.h"
++#endif
++
++#include "xf86.h"
++#include "xf86_OSproc.h"
++#include "xf86xv.h"
++#include "fourcc.h"
++
++#include "i830.h"
++#include "i830_video.h"
++#include "i915_reg.h"
++
++union intfloat {
++   CARD32 ui;
++   float f;
++};
++
++#define OUT_RING_F(x) do {						\
++   union intfloat _tmp;							\
++   _tmp.f = x;								\
++   OUT_RING(_tmp.ui);							\
++} while (0)
++
++#define OUT_DCL(type, nr) do {						\
++   CARD32 chans = 0;							\
++   if (REG_TYPE_##type == REG_TYPE_T)					\
++      chans = D0_CHANNEL_ALL;						\
++   else if (REG_TYPE_##type != REG_TYPE_S)				\
++      FatalError("wrong reg type %d to declare\n", REG_TYPE_##type);	\
++   OUT_RING(D0_DCL |							\
++	    (REG_TYPE_##type << D0_TYPE_SHIFT) | (nr << D0_NR_SHIFT) |	\
++	    chans);							\
++   OUT_RING(0x00000000);						\
++   OUT_RING(0x00000000);						\
++} while (0)
++
++#define OUT_TEXLD(dest_type, dest_nr, sampler_nr, addr_type, addr_nr)	\
++do {									\
++      OUT_RING(T0_TEXLD |						\
++	       (REG_TYPE_##dest_type << T0_DEST_TYPE_SHIFT) |		\
++	       (dest_nr << T0_DEST_NR_SHIFT) |				\
++	       (sampler_nr << T0_SAMPLER_NR_SHIFT));			\
++      OUT_RING((REG_TYPE_##addr_type << T1_ADDRESS_REG_TYPE_SHIFT) |	\
++	       (addr_nr << T1_ADDRESS_REG_NR_SHIFT));			\
++      OUT_RING(0x00000000);						\
++} while (0)
++
++/* Move the dest_chan from src0 to dest, leaving the other channels alone */
++#define OUT_MOV_TO_CHANNEL(dest_type, dest_nr, src0_type, src0_nr,	\
++			   dest_chan)					\
++do {									\
++   OUT_RING(A0_MOV | A0_DEST_CHANNEL_##dest_chan |			\
++	    (REG_TYPE_##dest_type << A0_DEST_TYPE_SHIFT) |		\
++	    (dest_nr << A0_DEST_NR_SHIFT) |				\
++	    (REG_TYPE_##src0_type << A0_SRC0_TYPE_SHIFT) |		\
++	    (src0_nr << A0_SRC0_NR_SHIFT));				\
++   OUT_RING((SRC_X << A1_SRC0_CHANNEL_X_SHIFT) |			\
++	    (SRC_Y << A1_SRC0_CHANNEL_Y_SHIFT) |			\
++	    (SRC_Z << A1_SRC0_CHANNEL_Z_SHIFT) |			\
++	    (SRC_W << A1_SRC0_CHANNEL_W_SHIFT));			\
++   OUT_RING(0);								\
++} while (0)
++
++/* Dot3-product src0 and src1, storing the result in dest_chan of the dest.
++ * Saturates, in case we have out-of-range YUV values.
++ */
++#define OUT_DP3_TO_CHANNEL(dest_type, dest_nr, src0_type, src0_nr,	\
++			   src1_type, src1_nr, dest_chan)		\
++do {									\
++   OUT_RING(A0_DP3 | A0_DEST_CHANNEL_##dest_chan | A0_DEST_SATURATE |	\
++	    (REG_TYPE_##dest_type << A0_DEST_TYPE_SHIFT) |		\
++	    (dest_nr << A0_DEST_NR_SHIFT) |				\
++	    (REG_TYPE_##src0_type << A0_SRC0_TYPE_SHIFT) |		\
++	    (src0_nr << A0_SRC0_NR_SHIFT));				\
++   OUT_RING((SRC_X << A1_SRC0_CHANNEL_X_SHIFT) |			\
++	    (SRC_Y << A1_SRC0_CHANNEL_Y_SHIFT) |			\
++	    (SRC_Z << A1_SRC0_CHANNEL_Z_SHIFT) |			\
++	    (SRC_W << A1_SRC0_CHANNEL_W_SHIFT) |			\
++	    (REG_TYPE_##src1_type << A1_SRC1_TYPE_SHIFT) |		\
++	    (src1_nr << A1_SRC1_TYPE_SHIFT) |				\
++	    (SRC_X << A1_SRC1_CHANNEL_X_SHIFT) |			\
++	    (SRC_Y << A1_SRC1_CHANNEL_Y_SHIFT));			\
++   OUT_RING((SRC_Z << A2_SRC1_CHANNEL_Z_SHIFT) |			\
++	    (SRC_W << A2_SRC1_CHANNEL_W_SHIFT));			\
++} while (0)
++
++void
++I915DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
++			 RegionPtr dstRegion,
++			 short width, short height, int video_pitch,
++			 int x1, int y1, int x2, int y2,
++			 short src_w, short src_h, short drw_w, short drw_h,
++			 DrawablePtr pDraw)
++{
++   I830Ptr pI830 = I830PTR(pScrn);
++   CARD32 format, ms3, s2;
++   BoxPtr pbox;
++   int nbox, dxo, dyo;
++   Bool planar;
++
++   ErrorF("I915DisplayVideo: %dx%d (pitch %d)\n", width, height,
++	  video_pitch);
++
++   switch (id) {
++   case FOURCC_UYVY:
++   case FOURCC_YUY2:
++      planar = FALSE;
++      break;
++   case FOURCC_YV12:
++   case FOURCC_I420:
++      planar = TRUE;
++      break;
++   default:
++      ErrorF("Unknown format 0x%x\n", id);
++      planar = FALSE;
++      break;
++   }
++
++   /* Tell the rotation code that we have stomped its invariant state by
++    * setting a high bit.  We don't use any invariant 3D state for video, so we
++    * don't have to worry about it ourselves.
++    */
++   *pI830->used3D |= 1 << 30;
++
++   BEGIN_LP_RING(44);
++
++   /* invarient state */
++   OUT_RING(MI_NOOP);
++   OUT_RING(_3DSTATE_AA_CMD |
++	    AA_LINE_ECAAR_WIDTH_ENABLE | AA_LINE_ECAAR_WIDTH_1_0 |
++	    AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0);
++
++   OUT_RING(_3DSTATE_DFLT_DIFFUSE_CMD);
++   OUT_RING(0x00000000);
++
++   OUT_RING(_3DSTATE_DFLT_SPEC_CMD);
++   OUT_RING(0x00000000);
++
++   OUT_RING(_3DSTATE_DFLT_Z_CMD);
++   OUT_RING(0x00000000);
++
++   OUT_RING(_3DSTATE_COORD_SET_BINDINGS | CSB_TCB(0, 0) | CSB_TCB(1, 1) |
++	    CSB_TCB(2,2) | CSB_TCB(3,3) | CSB_TCB(4,4) | CSB_TCB(5,5) |
++	    CSB_TCB(6,6) | CSB_TCB(7,7));
++
++   OUT_RING(_3DSTATE_RASTER_RULES_CMD |
++	    ENABLE_TRI_FAN_PROVOKE_VRTX | TRI_FAN_PROVOKE_VRTX(2) |
++	    ENABLE_LINE_STRIP_PROVOKE_VRTX | LINE_STRIP_PROVOKE_VRTX(1) |
++	    ENABLE_TEXKILL_3D_4D | TEXKILL_4D |
++	    ENABLE_POINT_RASTER_RULE | OGL_POINT_RASTER_RULE);
++
++   OUT_RING(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | 1);
++   OUT_RING(0x00000000); /* texture coordinate wrap */
++
++   /* flush map & render cache */
++   OUT_RING(MI_FLUSH | MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE);
++   OUT_RING(0x00000000);
++
++   /* draw rect -- just clipping */
++   OUT_RING(_3DSTATE_DRAW_RECT_CMD);
++   OUT_RING(0x00000000);	/* flags */
++   OUT_RING(0x00000000);	/* ymin, xmin */
++   OUT_RING((pScrn->virtualX - 1) |
++	    (pScrn->virtualY - 1) << 16); /* ymax, xmax */
++   OUT_RING(0x00000000);	/* yorigin, xorigin */
++   OUT_RING(MI_NOOP);
++
++   /* scissor */
++   OUT_RING(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
++   OUT_RING(_3DSTATE_SCISSOR_RECT_0_CMD);
++   OUT_RING(0x00000000);	/* ymin, xmin */
++   OUT_RING(0x00000000);	/* ymax, xmax */
++
++   OUT_RING(0x7c000003);	/* unknown command */
++   OUT_RING(0x7d070000);
++   OUT_RING(0x00000000);
++   OUT_RING(0x68000002);
++
++   /* context setup */
++   OUT_RING(_3DSTATE_MODES_4_CMD |
++	    ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) |
++	    ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) |
++	    ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff));
++
++   OUT_RING(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) |
++	    I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | 4);
++   s2 = S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D);
++   if (planar)
++      s2 |= S2_TEXCOORD_FMT(1, TEXCOORDFMT_2D);
++   else
++      s2 |= S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT);
++   s2 |= S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
++      S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
++      S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
++      S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
++      S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
++      S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT);
++   OUT_RING(s2);
++   OUT_RING((1 << S4_POINT_WIDTH_SHIFT) | S4_LINE_WIDTH_ONE |
++	    S4_CULLMODE_NONE | S4_VFMT_XY);
++   OUT_RING(0x00000000); /* S5 - enable bits */
++   OUT_RING((2 << S6_DEPTH_TEST_FUNC_SHIFT) |
++	    (2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
++	    (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) | S6_COLOR_WRITE_ENABLE |
++	    (2 << S6_TRISTRIP_PV_SHIFT));
++
++   OUT_RING(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD |
++	    IAB_MODIFY_ENABLE |
++	    IAB_MODIFY_FUNC | (BLENDFUNC_ADD << IAB_FUNC_SHIFT) |
++	    IAB_MODIFY_SRC_FACTOR | (BLENDFACT_ONE << IAB_SRC_FACTOR_SHIFT) |
++	    IAB_MODIFY_DST_FACTOR | (BLENDFACT_ZERO << IAB_DST_FACTOR_SHIFT));
++
++   OUT_RING(_3DSTATE_CONST_BLEND_COLOR_CMD);
++   OUT_RING(0x00000000);
++
++   OUT_RING(_3DSTATE_DST_BUF_VARS_CMD);
++   if (pI830->cpp == 2)
++      format = COLR_BUF_RGB565;
++   else
++      format = COLR_BUF_ARGB8888 | DEPTH_FRMT_24_FIXED_8_OTHER;
++
++   OUT_RING(LOD_PRECLAMP_OGL |
++     DSTORG_HORT_BIAS(0x80) | DSTORG_VERT_BIAS(0x80) | format);
++
++   OUT_RING(_3DSTATE_STIPPLE);
++   OUT_RING(0x00000000);
++
++   /* front buffer, pitch, offset */
++   OUT_RING(_3DSTATE_BUF_INFO_CMD);
++   OUT_RING(BUF_3D_ID_COLOR_BACK | BUF_3D_USE_FENCE |
++	    (((pI830->displayWidth * pI830->cpp) / 4) << 2));
++   OUT_RING(pI830->bufferOffset);
++   ADVANCE_LP_RING();
++
++   if (!planar) {
++      BEGIN_LP_RING(20);
++      /* fragment program - texture blend replace. */
++      OUT_RING(_3DSTATE_PIXEL_SHADER_PROGRAM | 8);
++      OUT_DCL(S, 0);
++      OUT_DCL(T, 0);
++      OUT_TEXLD(OC, 0, 0, T, 0);
++      /* End fragment program */
++
++      OUT_RING(_3DSTATE_SAMPLER_STATE | 3);
++      OUT_RING(0x00000001);
++      OUT_RING(SS2_COLORSPACE_CONVERSION |
++	       (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
++	       (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
++      OUT_RING((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
++	       (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT));
++      OUT_RING(0x00000000);
++
++      OUT_RING(_3DSTATE_MAP_STATE | 3);
++      OUT_RING(0x00000001);	/* texture map #1 */
++      OUT_RING(pPriv->YBuf0offset);
++      ms3 = MAPSURF_422;
++      switch (id) {
++      case FOURCC_YUY2:
++	 ms3 |= MT_422_YCRCB_NORMAL;
++	 break;
++      case FOURCC_UYVY:
++	 ms3 |= MT_422_YCRCB_SWAPY;
++	 break;
++      }
++      ms3 |= (height - 1) << MS3_HEIGHT_SHIFT;
++      ms3 |= (width - 1) << MS3_WIDTH_SHIFT;
++      if (!pI830->disableTiling)
++	 ms3 |= MS3_USE_FENCE_REGS;
++      OUT_RING(ms3);
++      OUT_RING(((video_pitch / 4) - 1) << 21);
++      ADVANCE_LP_RING();
++   } else {
++      BEGIN_LP_RING(1 + 18 + (1 + 3*16) + 11 + 11);
++      OUT_RING(MI_NOOP);
++      /* For the planar formats, we set up three samplers -- one for each plane,
++       * in a Y8 format.  Because I couldn't get the special PLANAR_TO_PACKED
++       * shader setup to work, I did the manual pixel shader:
++       *
++       * y' = y - .0625
++       * u' = u - .5
++       * v' = v - .5;
++       *
++       * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
++       * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
++       * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
++       *
++       * register assignment:
++       * r0 = (y',u',v',0)
++       * r1 = (y,y,y,y)
++       * r2 = (u,u,u,u)
++       * r3 = (v,v,v,v)
++       * OC = (r,g,b,1)
++       */
++      OUT_RING(_3DSTATE_PIXEL_SHADER_CONSTANTS | 16);
++      OUT_RING(0x000000f);	/* constants 0-3 */
++      /* constant 0: normalization offsets */
++      OUT_RING_F(-0.0625);
++      OUT_RING_F(-0.5);
++      OUT_RING_F(-0.5);
++      OUT_RING_F(0.0);
++      /* constant 1: r coefficients*/
++      OUT_RING_F(1.1643);
++      OUT_RING_F(0.0);
++      OUT_RING_F(1.5958);
++      OUT_RING_F(0.0);
++      /* constant 2: g coefficients */
++      OUT_RING_F(1.1643);
++      OUT_RING_F(-0.39173);
++      OUT_RING_F(-0.81290);
++      OUT_RING_F(0.0);
++      /* constant 3: b coefficients */
++      OUT_RING_F(1.1643);
++      OUT_RING_F(2.017);
++      OUT_RING_F(0.0);
++      OUT_RING_F(0.0);
++
++      OUT_RING(_3DSTATE_PIXEL_SHADER_PROGRAM | (3 * 16 - 1));
++      /* Declare samplers */
++      OUT_DCL(S, 0);
++      OUT_DCL(S, 1);
++      OUT_DCL(S, 2);
++      OUT_DCL(T, 0);
++      OUT_DCL(T, 1);
++
++      /* Load samplers to temporaries.  Y (sampler 0) gets the un-halved coords
++       * from t1.
++       */
++      OUT_TEXLD(R, 1, 0, T, 1);
++      OUT_TEXLD(R, 2, 1, T, 0);
++      OUT_TEXLD(R, 3, 2, T, 0);
++
++      /* Move the sampled YUV data in R[123] to the first 3 channels of R0. */
++      OUT_MOV_TO_CHANNEL(R, 0, R, 1, X);
++      OUT_MOV_TO_CHANNEL(R, 0, R, 2, Y);
++      OUT_MOV_TO_CHANNEL(R, 0, R, 3, Z);
++
++      /* Normalize the YUV data */
++      OUT_RING(A0_ADD | A0_DEST_CHANNEL_ALL |
++	       (REG_TYPE_R << A0_DEST_TYPE_SHIFT) | (0 << A0_DEST_NR_SHIFT) |				\
++	       (REG_TYPE_R << A0_SRC0_TYPE_SHIFT) | (0 << A0_SRC0_NR_SHIFT));
++      OUT_RING((SRC_X << A1_SRC0_CHANNEL_X_SHIFT) |
++	       (SRC_Y << A1_SRC0_CHANNEL_Y_SHIFT) |
++	       (SRC_Z << A1_SRC0_CHANNEL_Z_SHIFT) |
++	       (SRC_W << A1_SRC0_CHANNEL_W_SHIFT) |
++	       (REG_TYPE_CONST << A1_SRC1_TYPE_SHIFT) | (0 << A1_SRC1_NR_SHIFT) |
++	       (SRC_X << A1_SRC1_CHANNEL_X_SHIFT) |
++	       (SRC_Y << A1_SRC1_CHANNEL_Y_SHIFT));
++      OUT_RING((SRC_Z << A2_SRC1_CHANNEL_Z_SHIFT) |
++	       (SRC_W << A2_SRC1_CHANNEL_W_SHIFT));
++
++      /* dot-product the YUV data in R0 by the vectors of coefficients for
++       * calculating R, G, and B, storing the results in the R, G, or B channels
++       * of the output color.
++       */
++      OUT_DP3_TO_CHANNEL(OC, 0, R, 0, CONST, 1, X);
++      OUT_DP3_TO_CHANNEL(OC, 0, R, 0, CONST, 2, Y);
++      OUT_DP3_TO_CHANNEL(OC, 0, R, 0, CONST, 3, Z);
++
++      /* Set alpha of the output to 1.0, by wiring W to 1 and not actually using
++       * the source.
++       */
++      OUT_RING(A0_MOV | A0_DEST_CHANNEL_W |
++	       (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | (0 << A0_DEST_NR_SHIFT) |
++	       (REG_TYPE_OC << A0_SRC0_TYPE_SHIFT) | (0 << A0_SRC0_NR_SHIFT));
++      OUT_RING((SRC_X << A1_SRC0_CHANNEL_X_SHIFT) |
++	       (SRC_Y << A1_SRC0_CHANNEL_Y_SHIFT) |
++	       (SRC_Z << A1_SRC0_CHANNEL_Z_SHIFT) |
++	       (SRC_ONE << A1_SRC0_CHANNEL_W_SHIFT));
++      OUT_RING(0);
++      /* End fragment program */
++
++      OUT_RING(_3DSTATE_SAMPLER_STATE | 9);
++      OUT_RING(0x00000007);
++      /* sampler 0 */
++      OUT_RING(0x00000000);
++      OUT_RING((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
++	       (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
++      OUT_RING((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
++	       (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT));
++      /* sampler 1 */
++      OUT_RING(0x00000000);
++      OUT_RING((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
++	       (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
++      OUT_RING((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
++	       (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT));
++      /* sampler 2 */
++      OUT_RING(0x00000000);
++      OUT_RING((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
++	       (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
++      OUT_RING((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
++	       (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT));
++
++      OUT_RING(_3DSTATE_MAP_STATE | 9);
++      OUT_RING(0x00000007);
++
++      OUT_RING(pPriv->YBuf0offset);
++      ms3 = MAPSURF_8BIT | MT_8BIT_I8;
++      ms3 |= (height - 1) << MS3_HEIGHT_SHIFT;
++      ms3 |= (width - 1) << MS3_WIDTH_SHIFT;
++      OUT_RING(ms3);
++      OUT_RING(((video_pitch * 2 / 4) - 1) << 21);
++
++      OUT_RING(pPriv->UBuf0offset);
++      ms3 = MAPSURF_8BIT | MT_8BIT_I8;
++      ms3 |= (height / 2 - 1) << MS3_HEIGHT_SHIFT;
++      ms3 |= (width / 2 - 1) << MS3_WIDTH_SHIFT;
++      OUT_RING(ms3);
++      OUT_RING(((video_pitch / 4) - 1) << 21);
++
++      OUT_RING(pPriv->VBuf0offset);
++      ms3 = MAPSURF_8BIT | MT_8BIT_I8;
++      ms3 |= (height / 2 - 1) << MS3_HEIGHT_SHIFT;
++      ms3 |= (width / 2 - 1) << MS3_WIDTH_SHIFT;
++      OUT_RING(ms3);
++      OUT_RING(((video_pitch / 4) - 1) << 21);
++      ADVANCE_LP_RING();
++   }
++   
++   {
++      BEGIN_LP_RING(2);
++      OUT_RING(MI_FLUSH | MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE);
++      OUT_RING(0x00000000);
++      ADVANCE_LP_RING();
++   }
++
++   dxo = dstRegion->extents.x1;
++   dyo = dstRegion->extents.y1;
++
++   pbox = REGION_RECTS(dstRegion);
++   nbox = REGION_NUM_RECTS(dstRegion);
++   while (nbox--)
++   {
++      int box_x1 = pbox->x1;
++      int box_y1 = pbox->y1;
++      int box_x2 = pbox->x2;
++      int box_y2 = pbox->y2;
++      float src_scale_x, src_scale_y;
++      int vert_data_count;
++
++      pbox++;
++
++      src_scale_x = (float)src_w / (float)drw_w;
++      src_scale_y  = (float)src_h / (float)drw_h;
++
++      if (!planar)
++	 vert_data_count = 12;
++      else
++	 vert_data_count = 18;
++
++      BEGIN_LP_RING(vert_data_count + 8);
++      OUT_RING(MI_NOOP);
++      OUT_RING(MI_NOOP);
++      OUT_RING(MI_NOOP);
++      OUT_RING(MI_NOOP);
++      OUT_RING(MI_NOOP);
++      OUT_RING(MI_NOOP);
++      OUT_RING(MI_NOOP);
++
++      /* vertex data - rect list consists of bottom right, bottom left, and top
++       * left vertices.
++       */
++      OUT_RING(PRIM3D_INLINE | PRIM3D_RECTLIST |
++	       (vert_data_count - 1));
++
++      /* bottom right */
++      OUT_RING_F(box_x2);
++      OUT_RING_F(box_y2);
++      if (!planar) {
++	 OUT_RING_F((box_x2 - dxo) * src_scale_x);
++	 OUT_RING_F((box_y2 - dyo) * src_scale_y);
++      } else {
++	 OUT_RING_F((box_x2 - dxo) * src_scale_x / 2.0);
++	 OUT_RING_F((box_y2 - dyo) * src_scale_y / 2.0);
++	 OUT_RING_F((box_x2 - dxo) * src_scale_x);
++	 OUT_RING_F((box_y2 - dyo) * src_scale_y);
++      }
++
++      /* bottom left */
++      OUT_RING_F(box_x1);
++      OUT_RING_F(box_y2);
++      if (!planar) {
++	 OUT_RING_F((box_x1 - dxo) * src_scale_x);
++	 OUT_RING_F((box_y2 - dyo) * src_scale_y);
++      } else {
++	 OUT_RING_F((box_x1 - dxo) * src_scale_x / 2.0);
++	 OUT_RING_F((box_y2 - dyo) * src_scale_y / 2.0);
++	 OUT_RING_F((box_x1 - dxo) * src_scale_x);
++	 OUT_RING_F((box_y2 - dyo) * src_scale_y);
++      }
++
++      /* top left */
++      OUT_RING_F(box_x1);
++      OUT_RING_F(box_y1);
++      if (!planar) {
++	 OUT_RING_F((box_x1 - dxo) * src_scale_x);
++	 OUT_RING_F((box_y1 - dyo) * src_scale_y);
++      } else {
++	 OUT_RING_F((box_x1 - dxo) * src_scale_x / 2.0);
++	 OUT_RING_F((box_y1 - dyo) * src_scale_y / 2.0);
++	 OUT_RING_F((box_x1 - dxo) * src_scale_x);
++	 OUT_RING_F((box_y1 - dyo) * src_scale_y);
++      }
++
++      ADVANCE_LP_RING();
++   }
++
++   if (pI830->AccelInfoRec)
++      pI830->AccelInfoRec->NeedToSync = TRUE;
++}
++
diff-tree 2a1b3cfccb7de53f7ce8f9e4816e4278afb1fcab (from c2cd10e1fba0e75c0ed3db5d17211bddf7ab1e33)
Author: Eric Anholt <anholt at FreeBSD.org>
Date:   Mon May 22 10:32:13 2006 -0700

    Use RECTLIST instead of TRIFAN for video so we get horizontal shearing instead
    of diagonal.  Also remove the unnecessary vertex elements that were being
    emitted.

diff --git a/src/i830_video.c b/src/i830_video.c
index a41db5c..37dcaa7 100644
--- a/src/i830_video.c
+++ b/src/i830_video.c
@@ -2025,70 +2025,6 @@ I830DisplayVideo(ScrnInfoPtr pScrn, int 
    OVERLAY_UPDATE;
 }
 
-/* Doesn't matter on the order for our purposes */
-typedef struct {
-   unsigned char red, green, blue, alpha;
-} intel_color_t;
-
-/* Vertex format */
-typedef union {
-   struct {
-      float x, y, z, w;
-      intel_color_t color;
-      intel_color_t specular;
-      float u0, v0;
-      float u1, v1;
-      float u2, v2;
-      float u3, v3;
-   } v;
-   float f[24];
-   unsigned int  ui[24];
-   unsigned char ub4[24][4];
-} intelVertex, *intelVertexPtr;
-
-static void draw_poly(CARD32 *vb,
-                      float verts[][2],
-                      float texcoords[][2],
-		      float texcoords2[][2])
-{
-   int vertex_size;
-   intelVertex tmp;
-   int i, k;
-
-   if (texcoords2 != NULL)
-      vertex_size = 10;
-   else
-      vertex_size = 8;
-   
-   /* initial constant vertex fields */
-   tmp.v.z = 1.0;
-   tmp.v.w = 1.0; 
-   tmp.v.color.red = 255;
-   tmp.v.color.green = 255;
-   tmp.v.color.blue = 255;
-   tmp.v.color.alpha = 255;
-   tmp.v.specular.red = 0;
-   tmp.v.specular.green = 0;
-   tmp.v.specular.blue = 0;
-   tmp.v.specular.alpha = 0;
-
-   for (k = 0; k < 4; k++) {
-      tmp.v.x = verts[k][0];
-      tmp.v.y = verts[k][1];
-      tmp.v.u0 = texcoords[k][0];
-      tmp.v.v0 = texcoords[k][1];
-      if (texcoords2 != NULL) {
-	 tmp.v.u1 = texcoords2[k][0];
-	 tmp.v.v1 = texcoords2[k][1];
-      }
-
-      for (i = 0 ; i < vertex_size ; i++)
-         vb[i] = tmp.ui[i];
-
-      vb += vertex_size;
-   }
-}
-
 union intfloat {
    CARD32 ui;
    float f;
@@ -2275,7 +2211,7 @@ I915DisplayVideoTextured(ScrnInfoPtr pSc
       S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT);
    OUT_RING(s2);
    OUT_RING((1 << S4_POINT_WIDTH_SHIFT) | S4_LINE_WIDTH_ONE |
-	    S4_CULLMODE_NONE | S4_VFMT_SPEC_FOG | S4_VFMT_COLOR | S4_VFMT_XYZW);
+	    S4_CULLMODE_NONE | S4_VFMT_XY);
    OUT_RING(0x00000000); /* S5 - enable bits */
    OUT_RING((2 << S6_DEPTH_TEST_FUNC_SHIFT) |
 	    (2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
@@ -2512,10 +2448,7 @@ I915DisplayVideoTextured(ScrnInfoPtr pSc
       int box_y1 = pbox->y1;
       int box_x2 = pbox->x2;
       int box_y2 = pbox->y2;
-      int j;
       float src_scale_x, src_scale_y;
-      CARD32 vb[40];
-      float verts[4][2], tex[4][2], tex2[4][2];
       int vert_data_count;
 
       pbox++;
@@ -2524,9 +2457,9 @@ I915DisplayVideoTextured(ScrnInfoPtr pSc
       src_scale_y  = (float)src_h / (float)drw_h;
 
       if (!planar)
-	 vert_data_count = 32;
+	 vert_data_count = 12;
       else
-	 vert_data_count = 40;
+	 vert_data_count = 18;
 
       BEGIN_LP_RING(vert_data_count + 8);
       OUT_RING(MI_NOOP);
@@ -2537,48 +2470,49 @@ I915DisplayVideoTextured(ScrnInfoPtr pSc
       OUT_RING(MI_NOOP);
       OUT_RING(MI_NOOP);
 
-      /* vertex data */
-      OUT_RING(PRIMITIVE3D | PRIM3D_INLINE | PRIM3D_TRIFAN |
+      /* vertex data - rect list consists of bottom right, bottom left, and top
+       * left vertices.
+       */
+      OUT_RING(PRIMITIVE3D | PRIM3D_INLINE | PRIM3D_RECTLIST |
 	       (vert_data_count - 1));
-      verts[0][0] = box_x1; verts[0][1] = box_y1;
-      verts[1][0] = box_x2; verts[1][1] = box_y1;
-      verts[2][0] = box_x2; verts[2][1] = box_y2;
-      verts[3][0] = box_x1; verts[3][1] = box_y2;
 
+      /* bottom right */
+      OUT_RING_F(box_x2);
+      OUT_RING_F(box_y2);
+      if (!planar) {
+	 OUT_RING_F((box_x2 - dxo) * src_scale_x);
+	 OUT_RING_F((box_y2 - dyo) * src_scale_y);
+      } else {
+	 OUT_RING_F((box_x2 - dxo) * src_scale_x / 2.0);
+	 OUT_RING_F((box_y2 - dyo) * src_scale_y / 2.0);
+	 OUT_RING_F((box_x2 - dxo) * src_scale_x);
+	 OUT_RING_F((box_y2 - dyo) * src_scale_y);
+      }
+
+      /* bottom left */
+      OUT_RING_F(box_x1);
+      OUT_RING_F(box_y2);
+      if (!planar) {
+	 OUT_RING_F((box_x1 - dxo) * src_scale_x);
+	 OUT_RING_F((box_y2 - dyo) * src_scale_y);
+      } else {
+	 OUT_RING_F((box_x1 - dxo) * src_scale_x / 2.0);
+	 OUT_RING_F((box_y2 - dyo) * src_scale_y / 2.0);
+	 OUT_RING_F((box_x1 - dxo) * src_scale_x);
+	 OUT_RING_F((box_y2 - dyo) * src_scale_y);
+      }
+
+      /* top left */
+      OUT_RING_F(box_x1);
+      OUT_RING_F(box_y1);
       if (!planar) {
-	 tex[0][0] = (box_x1 - dxo) * src_scale_x;
-	 tex[0][1] = (box_y1 - dyo) * src_scale_y;
-	 tex[1][0] = (box_x2 - dxo) * src_scale_x;
-	 tex[1][1] = (box_y1 - dyo) * src_scale_y;
-	 tex[2][0] = (box_x2 - dxo) * src_scale_x;
-	 tex[2][1] = (box_y2 - dyo) * src_scale_y;
-	 tex[3][0] = (box_x1 - dxo) * src_scale_x;
-	 tex[3][1] = (box_y2 - dyo) * src_scale_y;
-	 /* emit vertex buffer */
-	 draw_poly(vb, verts, tex, NULL);
-	 for (j = 0; j < vert_data_count; j++)
-	    OUT_RING(vb[j]);
+	 OUT_RING_F((box_x1 - dxo) * src_scale_x);
+	 OUT_RING_F((box_y1 - dyo) * src_scale_y);
       } else {
-	 tex[0][0] = (box_x1 - dxo) * src_scale_x / 2.0;
-	 tex[0][1] = (box_y1 - dyo) * src_scale_y / 2.0;
-	 tex[1][0] = (box_x2 - dxo) * src_scale_x / 2.0;
-	 tex[1][1] = (box_y1 - dyo) * src_scale_y / 2.0;
-	 tex[2][0] = (box_x2 - dxo) * src_scale_x / 2.0;
-	 tex[2][1] = (box_y2 - dyo) * src_scale_y / 2.0;
-	 tex[3][0] = (box_x1 - dxo) * src_scale_x / 2.0;
-	 tex[3][1] = (box_y2 - dyo) * src_scale_y / 2.0;
-	 tex2[0][0] = (box_x1 - dxo) * src_scale_x;
-	 tex2[0][1] = (box_y1 - dyo) * src_scale_y;
-	 tex2[1][0] = (box_x2 - dxo) * src_scale_x;
-	 tex2[1][1] = (box_y1 - dyo) * src_scale_y;
-	 tex2[2][0] = (box_x2 - dxo) * src_scale_x;
-	 tex2[2][1] = (box_y2 - dyo) * src_scale_y;
-	 tex2[3][0] = (box_x1 - dxo) * src_scale_x;
-	 tex2[3][1] = (box_y2 - dyo) * src_scale_y;
-	 /* emit vertex buffer */
-	 draw_poly(vb, verts, tex, tex2);
-	 for (j = 0; j < vert_data_count; j++)
-	    OUT_RING(vb[j]);
+	 OUT_RING_F((box_x1 - dxo) * src_scale_x / 2.0);
+	 OUT_RING_F((box_y1 - dyo) * src_scale_y / 2.0);
+	 OUT_RING_F((box_x1 - dxo) * src_scale_x);
+	 OUT_RING_F((box_y1 - dyo) * src_scale_y);
       }
 
       ADVANCE_LP_RING();



More information about the xorg-commit mailing list