xf86-video-intel: Branch 'xf86-video-intel-2.6-branch' - 19 commits - src/drmmode_display.c src/i830_dri.c src/i830_driver.c src/i830_exa.c src/i830.h src/i830_tv.c src/i830_video.c src/i830_video.h src/i965_video.c uxa/uxa.c uxa/uxa-priv.h uxa/uxa-render.c

Zhenyu Wang zhen at kemper.freedesktop.org
Sun Dec 7 18:42:04 PST 2008


 src/drmmode_display.c |    3 
 src/i830.h            |   11 
 src/i830_dri.c        |    3 
 src/i830_driver.c     |   16 
 src/i830_exa.c        |    6 
 src/i830_tv.c         |    2 
 src/i830_video.c      |   11 
 src/i830_video.h      |    7 
 src/i965_video.c      | 1073 +++++++++++++++++++++++++++++---------------------
 uxa/uxa-priv.h        |    5 
 uxa/uxa-render.c      |    2 
 uxa/uxa.c             |   22 +
 12 files changed, 684 insertions(+), 477 deletions(-)

New commits:
commit 6ca0d7e6ff05bff2bb88bfae64c2d79ac115bd38
Author: Zhenyu Wang <zhenyu.z.wang at intel.com>
Date:   Mon Dec 8 10:33:13 2008 +0800

    Fix DRI2 compiling warning
    (cherry picked from commit 768f317cf0da4cd6682af2e71e71c3e130e05182)

diff --git a/src/i830_dri.c b/src/i830_dri.c
index daa3ff0..0fe0eca 100644
--- a/src/i830_dri.c
+++ b/src/i830_dri.c
@@ -1930,7 +1930,7 @@ I830DRI2DestroyBuffers(DrawablePtr pDraw, DRI2BufferPtr buffers, int count)
     }
 }
 
-static unsigned int
+static void
 I830DRI2CopyRegion(DrawablePtr pDraw, RegionPtr pRegion,
 		   DRI2BufferPtr pDestBuffer, DRI2BufferPtr pSrcBuffer)
 {
@@ -1966,7 +1966,6 @@ I830DRI2CopyRegion(DrawablePtr pDraw, RegionPtr pRegion,
 #endif
     drmCommandNone(pI830->drmSubFD, DRM_I915_GEM_THROTTLE);
 
-    return 1;
 }
 
 Bool I830DRI2ScreenInit(ScreenPtr pScreen)
commit e0149c6189e422d74453144260317cdb30c86dcc
Author: Zhenyu Wang <zhenyu.z.wang at intel.com>
Date:   Mon Dec 8 10:30:12 2008 +0800

    Fix TV compiling warning
    (cherry picked from commit 95596f51503bb468364719aec9083d59999e34b7)

diff --git a/src/i830_tv.c b/src/i830_tv.c
index d507a0c..72d2bd8 100644
--- a/src/i830_tv.c
+++ b/src/i830_tv.c
@@ -1691,7 +1691,7 @@ i830_tv_init(ScrnInfoPtr pScrn)
     I830OutputPrivatePtr    intel_output;
     struct i830_tv_priv	    *dev_priv;
     uint32_t		    tv_dac_on, tv_dac_off, save_tv_dac;
-    char                    *mon_option_lst = NULL;
+    XF86OptionPtr	    mon_option_lst = NULL;
     char		    *tv_format = NULL;
 
     if (pI830->quirk_flag & QUIRK_IGNORE_TV)
commit f98ca2a0bdbe49f06b367a3f9c2bc01cdb7ce90a
Author: Kristian Høgsberg <krh at redhat.com>
Date:   Sat Dec 6 21:20:52 2008 -0500

    Make sure DRI/DRI2 can initialize properly with KMS.
    (cherry picked from commit c47b6d1def917fad5ad2b5a3e4167edfd354f2c8)

diff --git a/src/i830_driver.c b/src/i830_driver.c
index 42ceb7c..3327fbf 100644
--- a/src/i830_driver.c
+++ b/src/i830_driver.c
@@ -1652,7 +1652,7 @@ I830DrmModeInit(ScrnInfoPtr pScrn)
     pI830->drmSubFD = pI830->drmmode.fd;
     xfree(bus_id);
 
-    pI830->directRenderingType = DRI_DRI2;
+    pI830->directRenderingType = DRI_NONE;
     pI830->allocate_classic_textures = FALSE;
 
     i830_init_bufmgr(pScrn);
commit f41c3d7ad99d9ca8a5ec52d074c0dcc3d9d31acd
Author: Kristian Høgsberg <krh at redhat.com>
Date:   Sat Dec 6 19:19:21 2008 -0500

    Simplify crtc preinit a bit.
    (cherry picked from commit 70af658d4e94cc372f9e9c831611f70b3c1cecab)

diff --git a/src/i830_driver.c b/src/i830_driver.c
index 4031222..42ceb7c 100644
--- a/src/i830_driver.c
+++ b/src/i830_driver.c
@@ -1475,9 +1475,6 @@ I830PreInitCrtcConfig(ScrnInfoPtr pScrn)
     I830Ptr pI830 = I830PTR(pScrn);
     int max_width, max_height;
 
-    if (pI830->use_drm_mode)
-	return;
-
     /* check quirks */
     i830_fixup_devices(pScrn);
 
@@ -1869,14 +1866,13 @@ I830PreInit(ScrnInfoPtr pScrn, int flags)
        xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
 		  "VBIOS initialization failed.\n");
 
-   I830PreInitCrtcConfig(pScrn);
-
    if (pI830->use_drm_mode) {
        if (!I830DrmModeInit(pScrn))
 	   return FALSE;
    } else {
-       if (!I830AccelMethodInit(pScrn))
-	   return FALSE;
+      I830PreInitCrtcConfig(pScrn);
+      if (!I830AccelMethodInit(pScrn))
+         return FALSE;
    }
 
    I830XvInit(pScrn);
commit f05a0c9628a2696c8719a41cfcb7663db3383019
Author: Kristian Høgsberg <krh at redhat.com>
Date:   Sat Dec 6 19:17:39 2008 -0500

    Fix KMS compilation.
    (cherry picked from commit 3ad9c9a82d7b359b9b711070628e6ff07a2aa9f7)

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index 680071a..186ff2d 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -146,6 +146,7 @@ drmmode_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode,
 	crtc->x = x;
 	crtc->y = y;
 	crtc->rotation = rotation;
+	crtc->transformPresent = FALSE;
 
 	output_ids = xcalloc(sizeof(uint32_t), xf86_config->num_output);
 	if (!output_ids) {
@@ -166,7 +167,7 @@ drmmode_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode,
 		output_count++;
 	}
 
-	if (!xf86CrtcRotate(crtc, mode, rotation)) {
+	if (!xf86CrtcRotate(crtc)) {
 		goto done;
 	}
 
commit 1f74f7491508973240a649e9b480d435b013d847
Author: Eric Anholt <eric at anholt.net>
Date:   Fri Dec 5 12:21:53 2008 -0800

    uxa: Reject solid/copy to under-8bpp destinations.
    
    EXA wouldn't create pixmaps for under-8bpp, but UXA does.  Fixes
    mis-rendering in xfwm, evolution message compose, firefox link
    drag'n'drop, and I'm sure more.  Big thanks to Pierre Willenbrock for
    debugging the issue!
    
    Bug #18050
    (cherry picked from commit 2e3c098c5ed9a8451713dc754a5f086992249336)

diff --git a/src/i830_exa.c b/src/i830_exa.c
index 0be0614..3e3487e 100644
--- a/src/i830_exa.c
+++ b/src/i830_exa.c
@@ -185,6 +185,9 @@ I830EXAPrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg)
     if (pPixmap->drawable.bitsPerPixel == 24)
 	I830FALLBACK("solid 24bpp unsupported!\n");
 
+    if (pPixmap->drawable.bitsPerPixel < 8)
+	I830FALLBACK("under 8bpp pixmaps unsupported\n");
+
     i830_exa_check_pitch_2d(pPixmap);
 
     pitch = i830_pixmap_pitch(pPixmap);
@@ -273,6 +276,9 @@ I830EXAPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir,
     if (!EXA_PM_IS_SOLID(&pSrcPixmap->drawable, planemask))
 	I830FALLBACK("planemask is not solid");
 
+    if (pDstPixmap->drawable.bitsPerPixel < 8)
+	I830FALLBACK("under 8bpp pixmaps unsupported\n");
+
     i830_exa_check_pitch_2d(pSrcPixmap);
     i830_exa_check_pitch_2d(pDstPixmap);
 
commit b9ca7dcd0d6521c33d4b3beb09d9841411406657
Author: Eric Anholt <eric at anholt.net>
Date:   Fri Dec 5 12:13:26 2008 -0800

    uxa: Add in EnableDisableFBAccess handling like examodule.c did.
    
    This fixes assertion failures when rendering text while VT switched.
    (cherry picked from commit 261c20a479f6ec1e94c2ba801323072227cc3ade)

diff --git a/uxa/uxa-priv.h b/uxa/uxa-priv.h
index 1353587..0f9cfbf 100644
--- a/uxa/uxa-priv.h
+++ b/uxa/uxa-priv.h
@@ -31,6 +31,7 @@
 #else
 #include <xorg-server.h>
 #endif
+#include "xf86.h"
 
 #include "uxa.h"
 
@@ -117,6 +118,7 @@ typedef struct {
 
 #define UXA_NUM_GLYPH_CACHES 4
 
+typedef void (*EnableDisableFBAccessProcPtr)(int, Bool);
 typedef struct {
     uxa_driver_t		*info;
     CreateGCProcPtr 		 SavedCreateGC;
@@ -135,7 +137,8 @@ typedef struct {
     TrapezoidsProcPtr            SavedTrapezoids;
     AddTrapsProcPtr		 SavedAddTraps;
 #endif
-  
+    EnableDisableFBAccessProcPtr SavedEnableDisableFBAccess;
+
     Bool			 swappedOut;
     unsigned			 disableFbCount;
     unsigned			 offScreenCounter;
diff --git a/uxa/uxa-render.c b/uxa/uxa-render.c
index 94e18a5..b2d3297 100644
--- a/uxa/uxa-render.c
+++ b/uxa/uxa-render.c
@@ -323,7 +323,7 @@ uxa_try_driver_composite_rects(CARD8		    op,
     int src_off_x, src_off_y, dst_off_x, dst_off_y;
     PixmapPtr pSrcPix, pDstPix;
 
-    if (!uxa_screen->info->prepare_composite)
+    if (!uxa_screen->info->prepare_composite || uxa_screen->swappedOut)
 	return -1;
 
     if (uxa_screen->info->check_composite &&
diff --git a/uxa/uxa.c b/uxa/uxa.c
index 8658406..5b6f537 100644
--- a/uxa/uxa.c
+++ b/uxa/uxa.c
@@ -310,6 +310,22 @@ uxa_bitmap_to_region(PixmapPtr pPix)
   return ret;
 }
 
+static void
+uxa_xorg_enable_disable_fb_access (int index, Bool enable)
+{
+    ScreenPtr screen = screenInfo.screens[index];
+    uxa_screen_t *uxa_screen = uxa_get_screen(screen);
+
+    if (!enable && uxa_screen->disableFbCount++ == 0)
+	uxa_screen->swappedOut = TRUE;
+
+    if (enable && --uxa_screen->disableFbCount == 0)
+	uxa_screen->swappedOut = FALSE;
+
+    if (uxa_screen->SavedEnableDisableFBAccess)
+       uxa_screen->SavedEnableDisableFBAccess(index, enable);
+}
+
 /**
  * uxa_close_screen() unwraps its wrapped screen functions and tears down UXA's
  * screen private, before calling down to the next CloseSccreen.
@@ -318,6 +334,7 @@ static Bool
 uxa_close_screen(int i, ScreenPtr pScreen)
 {
     uxa_screen_t	*uxa_screen = uxa_get_screen(pScreen);
+    ScrnInfoPtr scrn = xf86Screens[pScreen->myNum];
 #ifdef RENDER
     PictureScreenPtr	ps = GetPictureScreenIfSet(pScreen);
 #endif
@@ -333,6 +350,7 @@ uxa_close_screen(int i, ScreenPtr pScreen)
     pScreen->CopyWindow = uxa_screen->SavedCopyWindow;
     pScreen->ChangeWindowAttributes = uxa_screen->SavedChangeWindowAttributes;
     pScreen->BitmapToRegion = uxa_screen->SavedBitmapToRegion;
+    scrn->EnableDisableFBAccess = uxa_screen->SavedEnableDisableFBAccess;
 #ifdef RENDER
     if (ps) {
 	ps->Composite = uxa_screen->SavedComposite;
@@ -378,6 +396,7 @@ Bool
 uxa_driver_init(ScreenPtr screen, uxa_driver_t *uxa_driver)
 {
     uxa_screen_t	*uxa_screen;
+    ScrnInfoPtr scrn = xf86Screens[screen->myNum];
 #ifdef RENDER
     PictureScreenPtr	ps;
 #endif
@@ -450,6 +469,9 @@ uxa_driver_init(ScreenPtr screen, uxa_driver_t *uxa_driver)
     uxa_screen->SavedBitmapToRegion = screen->BitmapToRegion;
     screen->BitmapToRegion = uxa_bitmap_to_region;
 
+    uxa_screen->SavedEnableDisableFBAccess = scrn->EnableDisableFBAccess;
+    scrn->EnableDisableFBAccess = uxa_xorg_enable_disable_fb_access;
+
 #ifdef RENDER
     if (ps) {
         uxa_screen->SavedComposite = ps->Composite;
commit 93485f521b330e98d40b181139a54a5cd00e2b91
Author: Eric Anholt <eric at anholt.net>
Date:   Thu Dec 4 15:18:08 2008 -0800

    Remove the extra memory allocation for 965 video state now that it's all in BOs.
    (cherry picked from commit caecd6031e416705b1f0a7051535211feaebdedd)

diff --git a/src/i830_video.c b/src/i830_video.c
index a483031..87fa020 100644
--- a/src/i830_video.c
+++ b/src/i830_video.c
@@ -2223,7 +2223,7 @@ I830PutImage(ScrnInfoPtr pScrn,
     int top, left, npixels, nlines, size;
     BoxRec dstBox;
     int pitchAlignMask;
-    int alloc_size, extraLinear;
+    int alloc_size;
     xf86CrtcPtr	crtc;
 
     if (pPriv->textured)
@@ -2370,15 +2370,9 @@ I830PutImage(ScrnInfoPtr pScrn,
     ErrorF("srcPitch: %d, dstPitch: %d, size: %d\n", srcPitch, dstPitch, size);
 #endif
 
-    if (IS_I965G(pI830))
-	extraLinear = BRW_LINEAR_EXTRA;
-    else
-	extraLinear = 0;
-
     alloc_size = size;
     if (pPriv->doubleBuffer)
 	alloc_size *= 2;
-    alloc_size += extraLinear;
 
     if (pPriv->buf) {
 	/* Wait for any previous acceleration to the buffer to have completed.
@@ -2402,9 +2396,6 @@ I830PutImage(ScrnInfoPtr pScrn,
     if (pPriv->buf == NULL)
 	return BadAlloc;
 
-    pPriv->extra_offset = pPriv->buf->offset +
-    (pPriv->doubleBuffer ? size * 2 : size);
-
     /* fixup pointers */
 #ifdef INTEL_XVMC
     if (id == FOURCC_XVMC && IS_I915(pI830)) {
diff --git a/src/i830_video.h b/src/i830_video.h
index 8545fb8..3c2fa4c 100644
--- a/src/i830_video.h
+++ b/src/i830_video.h
@@ -59,7 +59,6 @@ typedef struct {
    Time offTime;
    Time freeTime;
    i830_memory *buf; /** YUV data buffer */
-   unsigned int extra_offset;
 
    Bool overlayOK;
    int oneLineMode;
@@ -71,11 +70,6 @@ typedef struct {
 #define GET_PORT_PRIVATE(pScrn) \
    (I830PortPrivPtr)((I830PTR(pScrn))->adaptor->pPortPrivates[0].ptr)
 
-/*
- * Broadwater requires a bit of extra video memory for state information
- */
-#define BRW_LINEAR_EXTRA	(36*1024)
-
 void I915DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv,
 			      int id, RegionPtr dstRegion, short width,
 			      short height, int video_pitch, int video_pitch2,
commit 0972c5eeb5c2f2a31c92296b6de009adba1f861f
Author: Eric Anholt <eric at anholt.net>
Date:   Thu Dec 4 15:10:49 2008 -0800

    Emit proper relocations to pixmaps in BOs in i965 video.
    (cherry picked from commit 48c113ade26d5ad64999cd5cc288495c10e02fc4)

diff --git a/src/i965_video.c b/src/i965_video.c
index 68a12ec..7e84ae0 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -349,6 +349,7 @@ i965_create_dst_surface_state(ScrnInfoPtr scrn,
 {
     I830Ptr pI830 = I830PTR(scrn);
     struct brw_surface_state *dest_surf_state;
+    drm_intel_bo *pixmap_bo = i830_get_pixmap_bo(pixmap);
     drm_intel_bo *surf_bo;
 
     surf_bo = drm_intel_bo_alloc(pI830->bufmgr,
@@ -375,7 +376,14 @@ i965_create_dst_surface_state(ScrnInfoPtr scrn,
     dest_surf_state->ss0.mipmap_layout_mode = 0;
     dest_surf_state->ss0.render_cache_read_mode = 0;
 
-    dest_surf_state->ss1.base_addr = intel_get_pixmap_offset(pixmap);
+    if (pixmap_bo != NULL)
+	dest_surf_state->ss1.base_addr =
+	    intel_emit_reloc(surf_bo, offsetof(struct brw_surface_state, ss1),
+			     pixmap_bo, 0,
+			     I915_GEM_DOMAIN_SAMPLER, 0);
+    else
+	dest_surf_state->ss1.base_addr = intel_get_pixmap_offset(pixmap);
+
     dest_surf_state->ss2.height = scrn->virtualY - 1;
     dest_surf_state->ss2.width = scrn->virtualX - 1;
     dest_surf_state->ss2.mip_count = 0;
commit 87da69ba18c6535fa5faa959a49a0d47c91c7dfe
Author: Eric Anholt <eric at anholt.net>
Date:   Thu Dec 4 15:02:12 2008 -0800

    Move i965 video surface state and binding table to BOs.
    (cherry picked from commit ae2cd8b75e41393e072e27064ecefecf1aa1bc6c)

diff --git a/src/i965_video.c b/src/i965_video.c
index 3bc2f6e..68a12ec 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -343,12 +343,19 @@ intel_emit_reloc(drm_intel_bo *bo, uint32_t offset,
     return target_bo->offset + target_offset;
 }
 
-static void
-i965_set_dst_surface_state(ScrnInfoPtr scrn,
-			   struct brw_surface_state *dest_surf_state,
-			   PixmapPtr pixmap)
+static drm_intel_bo *
+i965_create_dst_surface_state(ScrnInfoPtr scrn,
+			      PixmapPtr pixmap)
 {
     I830Ptr pI830 = I830PTR(scrn);
+    struct brw_surface_state *dest_surf_state;
+    drm_intel_bo *surf_bo;
+
+    surf_bo = drm_intel_bo_alloc(pI830->bufmgr,
+				    "textured video surface state",
+				    4096, 4096);
+    drm_intel_bo_map(surf_bo, TRUE);
+    dest_surf_state = surf_bo->virtual;
 
     memset(dest_surf_state, 0, sizeof(*dest_surf_state));
     dest_surf_state->ss0.surface_type = BRW_SURFACE_2D;
@@ -376,17 +383,29 @@ i965_set_dst_surface_state(ScrnInfoPtr scrn,
     dest_surf_state->ss3.pitch = intel_get_pixmap_pitch(pixmap) - 1;
     dest_surf_state->ss3.tiled_surface = i830_pixmap_tiled(pixmap);
     dest_surf_state->ss3.tile_walk = 0; /* TileX */
+
+    drm_intel_bo_unmap(surf_bo);
+    return surf_bo;
 }
 
-static void
-i965_set_src_surface_state(ScrnInfoPtr scrn,
-			      struct brw_surface_state *src_surf_state,
+static drm_intel_bo *
+i965_create_src_surface_state(ScrnInfoPtr scrn,
 			      uint32_t src_offset,
 			      int src_width,
 			      int src_height,
 			      int src_pitch,
 			      uint32_t src_surf_format)
 {
+    I830Ptr pI830 = I830PTR(scrn);
+    drm_intel_bo *surface_bo;
+    struct brw_surface_state *src_surf_state;
+
+    surface_bo = drm_intel_bo_alloc(pI830->bufmgr,
+				    "textured video surface state",
+				    4096, 4096);
+    drm_intel_bo_map(surface_bo, TRUE);
+    src_surf_state = surface_bo->virtual;
+
     /* Set up the source surface state buffer */
     memset(src_surf_state, 0, sizeof(struct brw_surface_state));
     src_surf_state->ss0.surface_type = BRW_SURFACE_2D;
@@ -407,6 +426,34 @@ i965_set_src_surface_state(ScrnInfoPtr scrn,
     src_surf_state->ss2.mip_count = 0;
     src_surf_state->ss2.render_target_rotation = 0;
     src_surf_state->ss3.pitch = src_pitch - 1;
+
+    drm_intel_bo_unmap(surface_bo);
+    return surface_bo;
+}
+
+static drm_intel_bo *
+i965_create_binding_table(ScrnInfoPtr scrn, drm_intel_bo **surf_bos, int n_surf)
+{
+    I830Ptr pI830 = I830PTR(scrn);
+    drm_intel_bo *bind_bo;
+    uint32_t *binding_table;
+    int i;
+
+    /* Set up a binding table for our surfaces.  Only the PS will use it */
+
+    bind_bo = drm_intel_bo_alloc(pI830->bufmgr,
+				 "textured video binding table",
+				 4096, 4096);
+    drm_intel_bo_map(bind_bo, TRUE);
+    binding_table = bind_bo->virtual;
+
+    for (i = 0; i < n_surf; i++)
+	binding_table[i] = intel_emit_reloc(bind_bo, i * sizeof(uint32_t),
+					    surf_bos[i], 0,
+					    I915_GEM_DOMAIN_INSTRUCTION, 0);
+
+    drm_intel_bo_unmap(bind_bo);
+    return bind_bo;
 }
 
 static drm_intel_bo *
@@ -662,20 +709,14 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     int urb_sf_start, urb_sf_size;
     int urb_cs_start, urb_cs_size;
     float src_scale_x, src_scale_y;
-    uint32_t *binding_table;
-    int dest_surf_offset, src_surf_offset[6];
-    int binding_table_offset;
-    int next_offset, total_state_size;
-    char *state_base;
-    int state_base_offset;
-    int src_surf;
+    int src_surf, i;
     int n_src_surf;
     uint32_t	src_surf_format;
     uint32_t	src_surf_base[6];
     int		src_width[6];
     int		src_height[6];
     int		src_pitch[6];
-    int wm_binding_table_entries;
+    drm_intel_bo *bind_bo, *surf_bos[7];
 
 #if 0
     ErrorF("BroadwaterDisplayVideoTextured: %dx%d (pitch %d)\n", width, height,
@@ -733,44 +774,13 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     default:
 	return;
     }    
-    wm_binding_table_entries = 1 + n_src_surf;
 
     IntelEmitInvarientState(pScrn);
     *pI830->last_3d = LAST_3D_VIDEO;
 
-    next_offset = 0;
-
-    /* Set up our layout of state in framebuffer: */
-    /* And then the general state: */
-    dest_surf_offset = ALIGN(next_offset, 32);
-    next_offset = dest_surf_offset + sizeof(struct brw_surface_state);
-    
-    for (src_surf = 0; src_surf < n_src_surf; src_surf++) {
-	src_surf_offset[src_surf] = ALIGN(next_offset, 32);
-	next_offset = src_surf_offset[src_surf] + sizeof(struct brw_surface_state);
-    }
-    
-    binding_table_offset = ALIGN(next_offset, 32);
-    next_offset = binding_table_offset + (wm_binding_table_entries * 4);
-
-    /* Allocate an area in framebuffer for our state layout we just set up */
-    total_state_size = next_offset;
-    assert (total_state_size < BRW_LINEAR_EXTRA);
-
-    /*
-     * Use the extra space allocated at the end of the Xv buffer
-     */
-    state_base_offset = pPriv->extra_offset;
-    state_base_offset = ALIGN(state_base_offset, 64);
-
-    state_base = (char *)(pI830->FbBase + state_base_offset);
-
-    binding_table = (void *)(state_base + binding_table_offset);
-
 #if 0
     ErrorF("dst surf:      0x%08x\n", state_base_offset + dest_surf_offset);
     ErrorF("src surf:      0x%08x\n", state_base_offset + src_surf_offset);
-    ErrorF("binding table: 0x%08x\n", state_base_offset + binding_table_offset);
 #endif
 
     urb_vs_start = 0;
@@ -790,24 +800,22 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
      */
 
     /* Upload kernels */
-    i965_set_dst_surface_state(pScrn, (void *)(state_base +
-					       dest_surf_offset),
-			       pPixmap);
-
-    for (src_surf = 0; src_surf < n_src_surf; src_surf++)
-	i965_set_src_surface_state(pScrn,
-				   (void *)(state_base +
-					    src_surf_offset[src_surf]),
-				   src_surf_base[src_surf],
-				   src_width[src_surf],
-				   src_height[src_surf],
-				   src_pitch[src_surf],
-				   src_surf_format);
+    surf_bos[0] = i965_create_dst_surface_state(pScrn, pPixmap);
 
-    /* Set up a binding table for our surfaces.  Only the PS will use it */
-    binding_table[0] = state_base_offset + dest_surf_offset;
-    for (src_surf = 0; src_surf < n_src_surf; src_surf++)
-	binding_table[1 + src_surf] = state_base_offset + src_surf_offset[src_surf];
+    for (src_surf = 0; src_surf < n_src_surf; src_surf++) {
+	surf_bos[src_surf + 1] =
+	    i965_create_src_surface_state(pScrn,
+					  src_surf_base[src_surf],
+					  src_width[src_surf],
+					  src_height[src_surf],
+					  src_pitch[src_surf],
+					  src_surf_format);
+    }
+    bind_bo = i965_create_binding_table(pScrn, surf_bos, n_src_surf + 1);
+    for (i = 0; i < n_src_surf + 1; i++) {
+	drm_intel_bo_unreference(surf_bos[i]);
+	surf_bos[i] = NULL;
+    }
 
     if (pI830->video.gen4_sampler_bo == NULL)
 	pI830->video.gen4_sampler_bo = i965_create_sampler_state(pScrn);
@@ -900,7 +908,8 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
        OUT_BATCH(0); /* clip */
        OUT_BATCH(0); /* sf */
        /* Only the PS uses the binding table */
-       OUT_BATCH(state_base_offset + binding_table_offset); /* ps */
+       OUT_RELOC(bind_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+       drm_intel_bo_unreference(bind_bo);
 
        /* Blend constant color (magenta is fun) */
        OUT_BATCH(BRW_3DSTATE_CONSTANT_COLOR | 3);
commit 213ec0b1bdcdc37db73b67eab7f30188a66c62c1
Author: Eric Anholt <eric at anholt.net>
Date:   Thu Dec 4 14:30:56 2008 -0800

    Move i965 video vertex data to BOs.
    
    This eliminates extra syncing when clipping is involved.
    (cherry picked from commit 1b3c3c9d79305abe1785fdaef26a950dafa8890f)

diff --git a/src/i965_video.c b/src/i965_video.c
index 805a362..3bc2f6e 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -663,12 +663,9 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     int urb_cs_start, urb_cs_size;
     float src_scale_x, src_scale_y;
     uint32_t *binding_table;
-    Bool first_output = TRUE;
     int dest_surf_offset, src_surf_offset[6];
-    int vb_offset;
     int binding_table_offset;
     int next_offset, total_state_size;
-    int vb_size = (4 * 4) * 4; /* 4 DWORDS per vertex */
     char *state_base;
     int state_base_offset;
     int src_surf;
@@ -744,10 +741,6 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     next_offset = 0;
 
     /* Set up our layout of state in framebuffer: */
-    /* Align VB to native size of elements, for safety */
-    vb_offset = ALIGN(next_offset, 8);
-    next_offset = vb_offset + vb_size;
-
     /* And then the general state: */
     dest_surf_offset = ALIGN(next_offset, 32);
     next_offset = dest_surf_offset + sizeof(struct brw_surface_state);
@@ -775,7 +768,6 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     binding_table = (void *)(state_base + binding_table_offset);
 
 #if 0
-    ErrorF("vb:            0x%08x\n", state_base_offset + vb_offset);
     ErrorF("dst surf:      0x%08x\n", state_base_offset + dest_surf_offset);
     ErrorF("src surf:      0x%08x\n", state_base_offset + src_surf_offset);
     ErrorF("binding table: 0x%08x\n", state_base_offset + binding_table_offset);
@@ -888,7 +880,7 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     /* brw_debug (pScrn, "after base address modify"); */
 
     {
-       BEGIN_BATCH(42);
+       BEGIN_BATCH(38);
        /* Enable VF statistics */
        OUT_BATCH(BRW_3DSTATE_VF_STATISTICS | 1);
 
@@ -966,15 +958,6 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
        OUT_BATCH(((URB_CS_ENTRY_SIZE - 1) << 4) |
 		 (URB_CS_ENTRIES << 0));
 
-       /* Set up the pointer to our vertex buffer */
-       OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 2);
-       /* four 32-bit floats per vertex */
-       OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) |
-		 VB0_VERTEXDATA |
-		 ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
-       OUT_BATCH(state_base_offset + vb_offset);
-       OUT_BATCH(3); /* four corners to our rectangle */
-
        /* Set up our vertex elements, sourced from the single vertex buffer. */
        OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | 3);
        /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
@@ -1028,18 +1011,16 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 	int box_x2 = pbox->x2;
 	int box_y2 = pbox->y2;
 	int i;
+	drm_intel_bo *vb_bo;
 	float *vb;
 
-	if (!first_output) {
-	    /* Since we use the same little vertex buffer over and over, sync
-	     * for subsequent rectangles.
-	     */
-	    i830WaitSync(pScrn);
-	}
-
 	pbox++;
 
-	vb = (void *)(state_base + vb_offset);
+	vb_bo = drm_intel_bo_alloc(pI830->bufmgr, "textured video vb",
+				   4096, 4096);
+	drm_intel_bo_map(vb_bo, TRUE);
+
+	vb = vb_bo->virtual;
 	i = 0;
 	vb[i++] = (box_x2 - dxo) * src_scale_x;
 	vb[i++] = (box_y2 - dyo) * src_scale_y;
@@ -1056,9 +1037,20 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 	vb[i++] = (float) box_x1 + pix_xoff;
 	vb[i++] = (float) box_y1 + pix_yoff;
 
+	drm_intel_bo_unmap(vb_bo);
+
 	i965_pre_draw_debug(pScrn);
 
-	BEGIN_BATCH(6);
+	BEGIN_BATCH(10);
+	/* Set up the pointer to our vertex buffer */
+	OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 2);
+	/* four 32-bit floats per vertex */
+	OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) |
+		  VB0_VERTEXDATA |
+		  ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
+	OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
+	OUT_BATCH(3); /* four corners to our rectangle */
+
 	OUT_BATCH(BRW_3DPRIMITIVE |
 		  BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
 		  (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
@@ -1071,12 +1063,12 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 	OUT_BATCH(0); /* index buffer offset, ignored */
 	ADVANCE_BATCH();
 
-	i965_post_draw_debug(pScrn);
+	drm_intel_bo_unreference(vb_bo);
 
-	first_output = FALSE;
-	i830MarkSync(pScrn);
+	i965_post_draw_debug(pScrn);
     }
 
+    i830MarkSync(pScrn);
 #if WATCH_STATS
     i830_dump_error_state(pScrn);
 #endif
commit 733297f2ed8dd60c2077aae81a1c051524a3af04
Author: Eric Anholt <eric at anholt.net>
Date:   Thu Dec 4 14:26:09 2008 -0800

    Move remaining i965 video programs to BOs.
    (cherry picked from commit 84825972597042f0aa6784594dace96be96a0234)

diff --git a/src/i830.h b/src/i830.h
index 1af750d..c2e043a 100644
--- a/src/i830.h
+++ b/src/i830.h
@@ -577,6 +577,7 @@ typedef struct _I830Rec {
       drm_intel_bo *gen4_cc_bo;
       drm_intel_bo *gen4_cc_vp_bo;
       drm_intel_bo *gen4_sampler_bo;
+      drm_intel_bo *gen4_sip_kernel_bo;
    } video;
 #endif
 
diff --git a/src/i965_video.c b/src/i965_video.c
index 19f2f06..805a362 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -472,12 +472,15 @@ i965_create_program(ScrnInfoPtr scrn, const uint32_t *program,
 }
 
 static drm_intel_bo *
-i965_create_sf_state(ScrnInfoPtr scrn, uint32_t sf_kernel_offset)
+i965_create_sf_state(ScrnInfoPtr scrn)
 {
     I830Ptr pI830 = I830PTR(scrn);
-    drm_intel_bo *sf_bo;
+    drm_intel_bo *sf_bo, *kernel_bo;
     struct brw_sf_unit_state *sf_state;
 
+    kernel_bo = i965_create_program(scrn, &sf_kernel_static[0][0],
+				    sizeof(sf_kernel_static));
+
     sf_bo = drm_intel_bo_alloc(pI830->bufmgr, "textured video sf state",
 			       4096, 4096);
     drm_intel_bo_map(sf_bo, TRUE);
@@ -488,8 +491,11 @@ i965_create_sf_state(ScrnInfoPtr scrn, uint32_t sf_kernel_offset)
      * back to SF which then hands pixels off to WM.
      */
     memset(sf_state, 0, sizeof(*sf_state));
-    sf_state->thread0.kernel_start_pointer = sf_kernel_offset >> 6;
     sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
+    sf_state->thread0.kernel_start_pointer =
+	intel_emit_reloc(sf_bo, offsetof(struct brw_sf_unit_state, thread0),
+			 kernel_bo, sf_state->thread0.grf_reg_count << 1,
+			 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6;
     sf_state->sf1.single_program_flow = 1; /* XXX */
     sf_state->sf1.binding_table_entry_count = 0;
     sf_state->sf1.thread_priority = 0;
@@ -660,7 +666,6 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     Bool first_output = TRUE;
     int dest_surf_offset, src_surf_offset[6];
     int vb_offset;
-    int sf_kernel_offset, sip_kernel_offset;
     int binding_table_offset;
     int next_offset, total_state_size;
     int vb_size = (4 * 4) * 4; /* 4 DWORDS per vertex */
@@ -738,12 +743,7 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 
     next_offset = 0;
 
-    /* Set up our layout of state in framebuffer.  First the general state: */
-    sf_kernel_offset = ALIGN(next_offset, 64);
-    next_offset = sf_kernel_offset + sizeof (sf_kernel_static);
-    sip_kernel_offset = ALIGN(next_offset, 64);
-    next_offset = sip_kernel_offset + sizeof (sip_kernel_static);
-
+    /* Set up our layout of state in framebuffer: */
     /* Align VB to native size of elements, for safety */
     vb_offset = ALIGN(next_offset, 8);
     next_offset = vb_offset + vb_size;
@@ -775,8 +775,6 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     binding_table = (void *)(state_base + binding_table_offset);
 
 #if 0
-    ErrorF("sf kernel:     0x%08x\n", state_base_offset + sf_kernel_offset);
-    ErrorF("sip kernel:    0x%08x\n", state_base_offset + sip_kernel_offset);
     ErrorF("vb:            0x%08x\n", state_base_offset + vb_offset);
     ErrorF("dst surf:      0x%08x\n", state_base_offset + dest_surf_offset);
     ErrorF("src surf:      0x%08x\n", state_base_offset + src_surf_offset);
@@ -800,11 +798,6 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
      */
 
     /* Upload kernels */
-    memcpy(state_base + sip_kernel_offset, sip_kernel_static,
-	   sizeof(sip_kernel_static));
-    memcpy(state_base + sf_kernel_offset, sf_kernel_static,
-	   sizeof(sf_kernel_static));
-
     i965_set_dst_surface_state(pScrn, (void *)(state_base +
 					       dest_surf_offset),
 			       pPixmap);
@@ -826,13 +819,15 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 
     if (pI830->video.gen4_sampler_bo == NULL)
 	pI830->video.gen4_sampler_bo = i965_create_sampler_state(pScrn);
+    if (pI830->video.gen4_sip_kernel_bo == NULL)
+	pI830->video.gen4_sip_kernel_bo =
+	    i965_create_program(pScrn, &sip_kernel_static[0][0],
+				sizeof(sip_kernel_static));
 
     if (pI830->video.gen4_vs_bo == NULL)
 	pI830->video.gen4_vs_bo = i965_create_vs_state(pScrn);
     if (pI830->video.gen4_sf_bo == NULL)
-	pI830->video.gen4_sf_bo = i965_create_sf_state(pScrn,
-						       state_base_offset +
-						       sf_kernel_offset);
+	pI830->video.gen4_sf_bo = i965_create_sf_state(pScrn);
     if (pI830->video.gen4_wm_packed_bo == NULL) {
 	pI830->video.gen4_wm_packed_bo =
 	    i965_create_wm_state(pScrn, pI830->video.gen4_sampler_bo, TRUE);
@@ -883,7 +878,8 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 	/* Set system instruction pointer */
 	OUT_BATCH(BRW_STATE_SIP | 0);
 	/* system instruction pointer */
-	OUT_BATCH(state_base_offset + sip_kernel_offset);
+	OUT_RELOC(pI830->video.gen4_sip_kernel_bo,
+		  I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
 
 	OUT_BATCH(MI_NOOP);
 	ADVANCE_BATCH();
@@ -1105,4 +1101,6 @@ i965_free_video(ScrnInfoPtr scrn)
     pI830->video.gen4_cc_vp_bo = NULL;
     drm_intel_bo_unreference(pI830->video.gen4_sampler_bo);
     pI830->video.gen4_sampler_bo = NULL;
+    drm_intel_bo_unreference(pI830->video.gen4_sip_kernel_bo);
+    pI830->video.gen4_sip_kernel_bo = NULL;
 }
commit 17d0249f984601d433ac7c5def2374b989eca166
Author: Eric Anholt <eric at anholt.net>
Date:   Thu Dec 4 14:20:21 2008 -0800

    Move i965 video wm and sampler state to BOs.
    (cherry picked from commit 40671132cb3732728703c6444f4577467fa9223f)

diff --git a/src/i830.h b/src/i830.h
index 024c72b..1af750d 100644
--- a/src/i830.h
+++ b/src/i830.h
@@ -572,8 +572,11 @@ typedef struct _I830Rec {
    struct {
       drm_intel_bo *gen4_vs_bo;
       drm_intel_bo *gen4_sf_bo;
+      drm_intel_bo *gen4_wm_packed_bo;
+      drm_intel_bo *gen4_wm_planar_bo;
       drm_intel_bo *gen4_cc_bo;
       drm_intel_bo *gen4_cc_vp_bo;
+      drm_intel_bo *gen4_sampler_bo;
    } video;
 #endif
 
diff --git a/src/i965_video.c b/src/i965_video.c
index 32ff330..19f2f06 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -409,10 +409,19 @@ i965_set_src_surface_state(ScrnInfoPtr scrn,
     src_surf_state->ss3.pitch = src_pitch - 1;
 }
 
-static void
-i965_set_sampler_state(ScrnInfoPtr scrn,
-		       struct brw_sampler_state *sampler_state)
+static drm_intel_bo *
+i965_create_sampler_state(ScrnInfoPtr scrn)
 {
+    I830Ptr pI830 = I830PTR(scrn);
+    drm_intel_bo *sampler_bo;
+    struct brw_sampler_state *sampler_state;
+
+    sampler_bo = drm_intel_bo_alloc(pI830->bufmgr,
+				    "textured video sampler state",
+				    4096, 4096);
+    drm_intel_bo_map(sampler_bo, TRUE);
+    sampler_state = sampler_bo->virtual;
+
     memset(sampler_state, 0, sizeof(struct brw_sampler_state));
 
     sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR;
@@ -420,6 +429,9 @@ i965_set_sampler_state(ScrnInfoPtr scrn,
     sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
     sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
     sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+
+    drm_intel_bo_unmap(sampler_bo);
+    return sampler_bo;
 }
 
 static drm_intel_bo *
@@ -446,6 +458,20 @@ i965_create_vs_state(ScrnInfoPtr scrn)
 }
 
 static drm_intel_bo *
+i965_create_program(ScrnInfoPtr scrn, const uint32_t *program,
+		    unsigned int program_size)
+{
+    I830Ptr pI830 = I830PTR(scrn);
+    drm_intel_bo *prog_bo;
+
+    prog_bo = drm_intel_bo_alloc(pI830->bufmgr, "textured video program",
+				 program_size, 4096);
+    drm_intel_bo_subdata(prog_bo, 0, program_size, program);
+
+    return prog_bo;
+}
+
+static drm_intel_bo *
 i965_create_sf_state(ScrnInfoPtr scrn, uint32_t sf_kernel_offset)
 {
     I830Ptr pI830 = I830PTR(scrn);
@@ -494,16 +520,37 @@ i965_create_sf_state(ScrnInfoPtr scrn, uint32_t sf_kernel_offset)
     return sf_bo;
 }
 
-static void
-i965_set_wm_state(ScrnInfoPtr scrn, struct brw_wm_unit_state *wm_state,
-		  uint32_t ps_kernel_offset,
-		  uint32_t sampler_offset, int n_src_surf)
+static drm_intel_bo *
+i965_create_wm_state(ScrnInfoPtr scrn, drm_intel_bo *sampler_bo, Bool is_packed)
 {
+    I830Ptr pI830 = I830PTR(scrn);
+    drm_intel_bo *wm_bo, *kernel_bo;
+    struct brw_wm_unit_state *wm_state;
+
+    if (is_packed) {
+	kernel_bo = i965_create_program(scrn, &ps_kernel_packed_static[0][0],
+					sizeof(ps_kernel_packed_static));
+    } else {
+	kernel_bo = i965_create_program(scrn, &ps_kernel_planar_static[0][0],
+					sizeof(ps_kernel_planar_static));
+    }
+
+    wm_bo = drm_intel_bo_alloc(pI830->bufmgr, "textured video wm state",
+			       4096, 4096);
+    drm_intel_bo_map(wm_bo, TRUE);
+    wm_state = wm_bo->virtual;
+
     memset(wm_state, 0, sizeof (*wm_state));
-    wm_state->thread0.kernel_start_pointer = ps_kernel_offset >> 6;
     wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
+    wm_state->thread0.kernel_start_pointer =
+	intel_emit_reloc(wm_bo, offsetof(struct brw_wm_unit_state, thread0),
+			 kernel_bo, wm_state->thread0.grf_reg_count << 1,
+			 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6;
     wm_state->thread1.single_program_flow = 1; /* XXX */
-    wm_state->thread1.binding_table_entry_count = 1 + n_src_surf;
+    if (is_packed)
+	wm_state->thread1.binding_table_entry_count = 2;
+    else
+	wm_state->thread1.binding_table_entry_count = 7;
     /* Though we never use the scratch space in our WM kernel, it has to be
      * set, and the minimum allocation is 1024 bytes.
      */
@@ -515,13 +562,21 @@ i965_set_wm_state(ScrnInfoPtr scrn, struct brw_wm_unit_state *wm_state,
     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
     wm_state->wm4.stats_enable = 1;
-    wm_state->wm4.sampler_state_pointer = sampler_offset >> 5;
+    wm_state->wm4.sampler_state_pointer =
+	intel_emit_reloc(wm_bo, offsetof(struct brw_wm_unit_state, wm4),
+			 sampler_bo, 0,
+			 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5;
     wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */
     wm_state->wm5.max_threads = PS_MAX_THREADS - 1;
     wm_state->wm5.thread_dispatch_enable = 1;
     wm_state->wm5.enable_16_pix = 1;
     wm_state->wm5.enable_8_pix = 0;
     wm_state->wm5.early_depth_test = 1;
+
+    drm_intel_bo_unreference(kernel_bo);
+
+    drm_intel_bo_unmap(wm_bo);
+    return wm_bo;
 }
 
 static drm_intel_bo *
@@ -603,9 +658,9 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     float src_scale_x, src_scale_y;
     uint32_t *binding_table;
     Bool first_output = TRUE;
-    int dest_surf_offset, src_surf_offset[6], sampler_offset[6];
-    int wm_offset, vb_offset;
-    int sf_kernel_offset, ps_kernel_offset, sip_kernel_offset;
+    int dest_surf_offset, src_surf_offset[6];
+    int vb_offset;
+    int sf_kernel_offset, sip_kernel_offset;
     int binding_table_offset;
     int next_offset, total_state_size;
     int vb_size = (4 * 4) * 4; /* 4 DWORDS per vertex */
@@ -619,8 +674,6 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     int		src_height[6];
     int		src_pitch[6];
     int wm_binding_table_entries;
-    const uint32_t	*ps_kernel_static;
-    int		ps_kernel_static_size;
 
 #if 0
     ErrorF("BroadwaterDisplayVideoTextured: %dx%d (pitch %d)\n", width, height,
@@ -650,16 +703,12 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     case FOURCC_UYVY:
 	src_surf_format = BRW_SURFACEFORMAT_YCRCB_SWAPY;
 	n_src_surf = 1;
-	ps_kernel_static = &ps_kernel_packed_static[0][0];
-	ps_kernel_static_size = sizeof (ps_kernel_packed_static);
 	src_width[0] = width;
 	src_height[0] = height;
 	src_pitch[0] = video_pitch;
 	break;
     case FOURCC_YUY2:
 	src_surf_format = BRW_SURFACEFORMAT_YCRCB_NORMAL;
-	ps_kernel_static = &ps_kernel_packed_static[0][0];
-	ps_kernel_static_size = sizeof (ps_kernel_packed_static);
 	src_width[0] = width;
 	src_height[0] = height;
 	src_pitch[0] = video_pitch;
@@ -671,8 +720,6 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     case FOURCC_I420:
     case FOURCC_YV12:
 	src_surf_format = BRW_SURFACEFORMAT_R8_UNORM;
-	ps_kernel_static = &ps_kernel_planar_static[0][0];
-	ps_kernel_static_size = sizeof (ps_kernel_planar_static);
 	src_width[1] = src_width[0] = width;
 	src_height[1] = src_height[0] = height;
 	src_pitch[1] = src_pitch[0] = video_pitch * 2;
@@ -692,21 +739,11 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     next_offset = 0;
 
     /* Set up our layout of state in framebuffer.  First the general state: */
-    wm_offset = ALIGN(next_offset, 32);
-    next_offset = wm_offset + sizeof(struct brw_wm_unit_state);
-
     sf_kernel_offset = ALIGN(next_offset, 64);
     next_offset = sf_kernel_offset + sizeof (sf_kernel_static);
-    ps_kernel_offset = ALIGN(next_offset, 64);
-    next_offset = ps_kernel_offset + ps_kernel_static_size;
     sip_kernel_offset = ALIGN(next_offset, 64);
     next_offset = sip_kernel_offset + sizeof (sip_kernel_static);
 
-    for (src_surf = 0; src_surf < n_src_surf; src_surf++) {    
-	sampler_offset[src_surf] = ALIGN(next_offset, 32);
-	next_offset = sampler_offset[src_surf] + sizeof(struct brw_sampler_state);
-    }
-    
     /* Align VB to native size of elements, for safety */
     vb_offset = ALIGN(next_offset, 8);
     next_offset = vb_offset + vb_size;
@@ -738,11 +775,8 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     binding_table = (void *)(state_base + binding_table_offset);
 
 #if 0
-    ErrorF("wm:            0x%08x\n", state_base_offset + wm_offset);
     ErrorF("sf kernel:     0x%08x\n", state_base_offset + sf_kernel_offset);
-    ErrorF("ps kernel:     0x%08x\n", state_base_offset + ps_kernel_offset);
     ErrorF("sip kernel:    0x%08x\n", state_base_offset + sip_kernel_offset);
-    ErrorF("src sampler:   0x%08x\n", state_base_offset + sampler_offset);
     ErrorF("vb:            0x%08x\n", state_base_offset + vb_offset);
     ErrorF("dst surf:      0x%08x\n", state_base_offset + dest_surf_offset);
     ErrorF("src surf:      0x%08x\n", state_base_offset + src_surf_offset);
@@ -770,8 +804,6 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 	   sizeof(sip_kernel_static));
     memcpy(state_base + sf_kernel_offset, sf_kernel_static,
 	   sizeof(sf_kernel_static));
-    memcpy(state_base + ps_kernel_offset, ps_kernel_static,
-	   ps_kernel_static_size);
 
     i965_set_dst_surface_state(pScrn, (void *)(state_base +
 					       dest_surf_offset),
@@ -787,25 +819,28 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 				   src_pitch[src_surf],
 				   src_surf_format);
 
-    for (src_surf = 0; src_surf < n_src_surf; src_surf++)
-	i965_set_sampler_state(pScrn, (void *)(state_base +
-					       sampler_offset[src_surf]));
-
     /* Set up a binding table for our surfaces.  Only the PS will use it */
     binding_table[0] = state_base_offset + dest_surf_offset;
     for (src_surf = 0; src_surf < n_src_surf; src_surf++)
 	binding_table[1 + src_surf] = state_base_offset + src_surf_offset[src_surf];
 
+    if (pI830->video.gen4_sampler_bo == NULL)
+	pI830->video.gen4_sampler_bo = i965_create_sampler_state(pScrn);
+
     if (pI830->video.gen4_vs_bo == NULL)
 	pI830->video.gen4_vs_bo = i965_create_vs_state(pScrn);
     if (pI830->video.gen4_sf_bo == NULL)
 	pI830->video.gen4_sf_bo = i965_create_sf_state(pScrn,
 						       state_base_offset +
 						       sf_kernel_offset);
-    i965_set_wm_state(pScrn, (void *)(state_base + wm_offset),
-		      state_base_offset + ps_kernel_offset,
-		      state_base_offset + sampler_offset[0],
-		      n_src_surf);
+    if (pI830->video.gen4_wm_packed_bo == NULL) {
+	pI830->video.gen4_wm_packed_bo =
+	    i965_create_wm_state(pScrn, pI830->video.gen4_sampler_bo, TRUE);
+    }
+    if (pI830->video.gen4_wm_planar_bo == NULL) {
+	pI830->video.gen4_wm_planar_bo =
+	    i965_create_wm_state(pScrn, pI830->video.gen4_sampler_bo, FALSE);
+    }
 
     if (pI830->video.gen4_cc_bo == NULL)
 	pI830->video.gen4_cc_bo = i965_create_cc_state(pScrn);
@@ -908,7 +943,12 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
        /* disable CLIP, resulting in passthrough */
        OUT_BATCH(BRW_CLIP_DISABLE);
        OUT_RELOC(pI830->video.gen4_sf_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
-       OUT_BATCH(state_base_offset + wm_offset);  /* 32 byte aligned */
+       if (n_src_surf == 1)
+	   OUT_RELOC(pI830->video.gen4_wm_packed_bo,
+		     I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+       else
+	   OUT_RELOC(pI830->video.gen4_wm_planar_bo,
+		     I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
        OUT_RELOC(pI830->video.gen4_cc_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
 
        /* URB fence */
@@ -1057,6 +1097,12 @@ i965_free_video(ScrnInfoPtr scrn)
     pI830->video.gen4_sf_bo = NULL;
     drm_intel_bo_unreference(pI830->video.gen4_cc_bo);
     pI830->video.gen4_cc_bo = NULL;
+    drm_intel_bo_unreference(pI830->video.gen4_wm_packed_bo);
+    pI830->video.gen4_wm_packed_bo = NULL;
+    drm_intel_bo_unreference(pI830->video.gen4_wm_planar_bo);
+    pI830->video.gen4_wm_planar_bo = NULL;
     drm_intel_bo_unreference(pI830->video.gen4_cc_vp_bo);
     pI830->video.gen4_cc_vp_bo = NULL;
+    drm_intel_bo_unreference(pI830->video.gen4_sampler_bo);
+    pI830->video.gen4_sampler_bo = NULL;
 }
commit ba608105842e1089a74ca822f284ce00932aae04
Author: Eric Anholt <eric at anholt.net>
Date:   Thu Dec 4 13:22:12 2008 -0800

    Stop allocating unused scratch space for i965 video.
    (cherry picked from commit 48803eb7463ad14f3109f67fcf4ccff4362baaa2)

diff --git a/src/i965_video.c b/src/i965_video.c
index 8f4cd09..32ff330 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -496,7 +496,7 @@ i965_create_sf_state(ScrnInfoPtr scrn, uint32_t sf_kernel_offset)
 
 static void
 i965_set_wm_state(ScrnInfoPtr scrn, struct brw_wm_unit_state *wm_state,
-		  uint32_t ps_kernel_offset, uint32_t wm_scratch_offset,
+		  uint32_t ps_kernel_offset,
 		  uint32_t sampler_offset, int n_src_surf)
 {
     memset(wm_state, 0, sizeof (*wm_state));
@@ -507,7 +507,7 @@ i965_set_wm_state(ScrnInfoPtr scrn, struct brw_wm_unit_state *wm_state,
     /* Though we never use the scratch space in our WM kernel, it has to be
      * set, and the minimum allocation is 1024 bytes.
      */
-    wm_state->thread2.scratch_space_base_pointer = wm_scratch_offset >> 10;
+    wm_state->thread2.scratch_space_base_pointer = 0;
     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
     wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */
     wm_state->thread3.const_urb_entry_read_length = 0;
@@ -605,7 +605,6 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     Bool first_output = TRUE;
     int dest_surf_offset, src_surf_offset[6], sampler_offset[6];
     int wm_offset, vb_offset;
-    int wm_scratch_offset;
     int sf_kernel_offset, ps_kernel_offset, sip_kernel_offset;
     int binding_table_offset;
     int next_offset, total_state_size;
@@ -695,8 +694,6 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     /* Set up our layout of state in framebuffer.  First the general state: */
     wm_offset = ALIGN(next_offset, 32);
     next_offset = wm_offset + sizeof(struct brw_wm_unit_state);
-    wm_scratch_offset = ALIGN(next_offset, 1024);
-    next_offset = wm_scratch_offset + 1024 * PS_MAX_THREADS;
 
     sf_kernel_offset = ALIGN(next_offset, 64);
     next_offset = sf_kernel_offset + sizeof (sf_kernel_static);
@@ -807,7 +804,6 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 						       sf_kernel_offset);
     i965_set_wm_state(pScrn, (void *)(state_base + wm_offset),
 		      state_base_offset + ps_kernel_offset,
-		      state_base_offset + wm_scratch_offset,
 		      state_base_offset + sampler_offset[0],
 		      n_src_surf);
 
commit b8ce83013241dfad286f0e61bcaa7bc9b9b16828
Author: Eric Anholt <eric at anholt.net>
Date:   Thu Dec 4 13:18:06 2008 -0800

    Move i965 video vs/sf state to BOs.
    (cherry picked from commit f3fe46b557a4dd7e212c2790fb47142d375c1c75)

diff --git a/src/i830.h b/src/i830.h
index 7b47531..024c72b 100644
--- a/src/i830.h
+++ b/src/i830.h
@@ -570,6 +570,8 @@ typedef struct _I830Rec {
    Bool *overlayOn;
 
    struct {
+      drm_intel_bo *gen4_vs_bo;
+      drm_intel_bo *gen4_sf_bo;
       drm_intel_bo *gen4_cc_bo;
       drm_intel_bo *gen4_cc_vp_bo;
    } video;
diff --git a/src/i965_video.c b/src/i965_video.c
index 2514d3f..8f4cd09 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -422,21 +422,41 @@ i965_set_sampler_state(ScrnInfoPtr scrn,
     sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
 }
 
-static void
-i965_set_vs_state(ScrnInfoPtr scrn, struct brw_vs_unit_state *vs_state)
+static drm_intel_bo *
+i965_create_vs_state(ScrnInfoPtr scrn)
 {
+    I830Ptr pI830 = I830PTR(scrn);
+    drm_intel_bo *vs_bo;
+    struct brw_vs_unit_state *vs_state;
+
+    vs_bo = drm_intel_bo_alloc(pI830->bufmgr, "textured video vs state",
+			       4096, 4096);
+    drm_intel_bo_map(vs_bo, TRUE);
+    vs_state = vs_bo->virtual;
+
     /* Set up the vertex shader to be disabled (passthrough) */
     memset(vs_state, 0, sizeof(*vs_state));
     vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
     vs_state->vs6.vs_enable = 0;
     vs_state->vs6.vert_cache_disable = 1;
+
+    drm_intel_bo_unmap(vs_bo);
+    return vs_bo;
 }
 
-static void
-i965_set_sf_state(ScrnInfoPtr scrn, struct brw_sf_unit_state *sf_state,
-		  uint32_t sf_kernel_offset)
+static drm_intel_bo *
+i965_create_sf_state(ScrnInfoPtr scrn, uint32_t sf_kernel_offset)
 {
+    I830Ptr pI830 = I830PTR(scrn);
+    drm_intel_bo *sf_bo;
+    struct brw_sf_unit_state *sf_state;
+
+    sf_bo = drm_intel_bo_alloc(pI830->bufmgr, "textured video sf state",
+			       4096, 4096);
+    drm_intel_bo_map(sf_bo, TRUE);
+    sf_state = sf_bo->virtual;
+
     /* Set up the SF kernel to do coord interp: for each attribute,
      * calculate dA/dx and dA/dy.  Hand these interpolation coefficients
      * back to SF which then hands pixels off to WM.
@@ -469,6 +489,9 @@ i965_set_sf_state(ScrnInfoPtr scrn, struct brw_sf_unit_state *sf_state,
     sf_state->sf7.trifan_pv = 2;
     sf_state->sf6.dest_org_vbias = 0x8;
     sf_state->sf6.dest_org_hbias = 0x8;
+
+    drm_intel_bo_unmap(sf_bo);
+    return sf_bo;
 }
 
 static void
@@ -580,8 +603,8 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     float src_scale_x, src_scale_y;
     uint32_t *binding_table;
     Bool first_output = TRUE;
-    int dest_surf_offset, src_surf_offset[6], sampler_offset[6], vs_offset;
-    int sf_offset, wm_offset, vb_offset;
+    int dest_surf_offset, src_surf_offset[6], sampler_offset[6];
+    int wm_offset, vb_offset;
     int wm_scratch_offset;
     int sf_kernel_offset, ps_kernel_offset, sip_kernel_offset;
     int binding_table_offset;
@@ -670,10 +693,6 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     next_offset = 0;
 
     /* Set up our layout of state in framebuffer.  First the general state: */
-    vs_offset = ALIGN(next_offset, 64);
-    next_offset = vs_offset + sizeof(struct brw_vs_unit_state);
-    sf_offset = ALIGN(next_offset, 32);
-    next_offset = sf_offset + sizeof(struct brw_sf_unit_state);
     wm_offset = ALIGN(next_offset, 32);
     next_offset = wm_offset + sizeof(struct brw_wm_unit_state);
     wm_scratch_offset = ALIGN(next_offset, 1024);
@@ -722,9 +741,7 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     binding_table = (void *)(state_base + binding_table_offset);
 
 #if 0
-    ErrorF("vs:            0x%08x\n", state_base_offset + vs_offset);
     ErrorF("wm:            0x%08x\n", state_base_offset + wm_offset);
-    ErrorF("sf:            0x%08x\n", state_base_offset + sf_offset);
     ErrorF("sf kernel:     0x%08x\n", state_base_offset + sf_kernel_offset);
     ErrorF("ps kernel:     0x%08x\n", state_base_offset + ps_kernel_offset);
     ErrorF("sip kernel:    0x%08x\n", state_base_offset + sip_kernel_offset);
@@ -782,9 +799,12 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     for (src_surf = 0; src_surf < n_src_surf; src_surf++)
 	binding_table[1 + src_surf] = state_base_offset + src_surf_offset[src_surf];
 
-    i965_set_vs_state(pScrn, (void *)(state_base + vs_offset));
-    i965_set_sf_state(pScrn, (void *)(state_base + sf_offset),
-		      state_base_offset + sf_kernel_offset);
+    if (pI830->video.gen4_vs_bo == NULL)
+	pI830->video.gen4_vs_bo = i965_create_vs_state(pScrn);
+    if (pI830->video.gen4_sf_bo == NULL)
+	pI830->video.gen4_sf_bo = i965_create_sf_state(pScrn,
+						       state_base_offset +
+						       sf_kernel_offset);
     i965_set_wm_state(pScrn, (void *)(state_base + wm_offset),
 		      state_base_offset + ps_kernel_offset,
 		      state_base_offset + wm_scratch_offset,
@@ -886,12 +906,12 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 
        /* Set the pointers to the 3d pipeline state */
        OUT_BATCH(BRW_3DSTATE_PIPELINED_POINTERS | 5);
-       OUT_BATCH(state_base_offset + vs_offset);  /* 32 byte aligned */
+       OUT_RELOC(pI830->video.gen4_vs_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
        /* disable GS, resulting in passthrough */
        OUT_BATCH(BRW_GS_DISABLE);
        /* disable CLIP, resulting in passthrough */
        OUT_BATCH(BRW_CLIP_DISABLE);
-       OUT_BATCH(state_base_offset + sf_offset);  /* 32 byte aligned */
+       OUT_RELOC(pI830->video.gen4_sf_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
        OUT_BATCH(state_base_offset + wm_offset);  /* 32 byte aligned */
        OUT_RELOC(pI830->video.gen4_cc_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
 
@@ -1035,6 +1055,10 @@ i965_free_video(ScrnInfoPtr scrn)
 {
     I830Ptr pI830 = I830PTR(scrn);
 
+    drm_intel_bo_unreference(pI830->video.gen4_vs_bo);
+    pI830->video.gen4_vs_bo = NULL;
+    drm_intel_bo_unreference(pI830->video.gen4_sf_bo);
+    pI830->video.gen4_sf_bo = NULL;
     drm_intel_bo_unreference(pI830->video.gen4_cc_bo);
     pI830->video.gen4_cc_bo = NULL;
     drm_intel_bo_unreference(pI830->video.gen4_cc_vp_bo);
commit 0883f374c707f12a3c9ae8fcda3ff4f53b8e8db8
Author: Eric Anholt <eric at anholt.net>
Date:   Thu Dec 4 13:11:13 2008 -0800

    Move i965 video cc state to BOs.
    (cherry picked from commit 4b9d3eac57a972c055c4acd7a10dfe8aa918131c)

diff --git a/src/i830.h b/src/i830.h
index 68d0311..7b47531 100644
--- a/src/i830.h
+++ b/src/i830.h
@@ -568,6 +568,11 @@ typedef struct _I830Rec {
    XF86VideoAdaptorPtr adaptor;
    ScreenBlockHandlerProcPtr BlockHandler;
    Bool *overlayOn;
+
+   struct {
+      drm_intel_bo *gen4_cc_bo;
+      drm_intel_bo *gen4_cc_vp_bo;
+   } video;
 #endif
 
    /* EXA render state */
diff --git a/src/i830_driver.c b/src/i830_driver.c
index 5775763..4031222 100644
--- a/src/i830_driver.c
+++ b/src/i830_driver.c
@@ -3818,6 +3818,10 @@ I830CloseScreen(int scrnIndex, ScreenPtr pScreen)
 
    i830_allocator_fini(pScrn);
 
+#ifdef I830_XV
+   i965_free_video(pScrn);
+#endif
+
    dri_bufmgr_destroy(pI830->bufmgr);
    pI830->bufmgr = NULL;
 
diff --git a/src/i830_video.h b/src/i830_video.h
index 91f767f..8545fb8 100644
--- a/src/i830_video.h
+++ b/src/i830_video.h
@@ -94,3 +94,4 @@ void I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv,
 
 void I830VideoBlockHandler(int i, pointer blockData, pointer pTimeout,
 			   pointer pReadmask);
+void i965_free_video(ScrnInfoPtr scrn);
diff --git a/src/i965_video.c b/src/i965_video.c
index 648347a..2514d3f 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -326,6 +326,23 @@ i965_post_draw_debug(ScrnInfoPtr scrn)
 #define URB_CS_ENTRIES	      0
 #define URB_CS_ENTRY_SIZE     0
 
+/**
+ * Little wrapper around drm_intel_bo_reloc to return the initial value you
+ * should stuff into the relocation entry.
+ *
+ * If only we'd done this before settling on the library API.
+ */
+static uint32_t
+intel_emit_reloc(drm_intel_bo *bo, uint32_t offset,
+		 drm_intel_bo *target_bo, uint32_t target_offset,
+		 uint32_t read_domains, uint32_t write_domain)
+{
+    drm_intel_bo_emit_reloc(bo, offset, target_bo, target_offset,
+			    read_domains, write_domain);
+
+    return target_bo->offset + target_offset;
+}
+
 static void
 i965_set_dst_surface_state(ScrnInfoPtr scrn,
 			   struct brw_surface_state *dest_surf_state,
@@ -484,18 +501,40 @@ i965_set_wm_state(ScrnInfoPtr scrn, struct brw_wm_unit_state *wm_state,
     wm_state->wm5.early_depth_test = 1;
 }
 
-static void
-i965_set_cc_vp_state(ScrnInfoPtr scrn, struct brw_cc_viewport *cc_viewport)
+static drm_intel_bo *
+i965_create_cc_vp_state(ScrnInfoPtr scrn)
 {
+    I830Ptr pI830 = I830PTR(scrn);
+    drm_intel_bo *cc_vp_bo;
+    struct brw_cc_viewport *cc_viewport;
+
+    cc_vp_bo = drm_intel_bo_alloc(pI830->bufmgr, "textured video cc viewport",
+				  4096, 4096);
+    drm_intel_bo_map(cc_vp_bo, TRUE);
+    cc_viewport = cc_vp_bo->virtual;
+
     memset (cc_viewport, 0, sizeof (*cc_viewport));
     cc_viewport->min_depth = -1.e35;
     cc_viewport->max_depth = 1.e35;
+
+    drm_intel_bo_unmap(cc_vp_bo);
+    return cc_vp_bo;
 }
 
-static void
-i965_set_cc_state(ScrnInfoPtr scnr, struct brw_cc_unit_state *cc_state,
-		  uint32_t cc_viewport_offset)
+static drm_intel_bo *
+i965_create_cc_state(ScrnInfoPtr scrn)
 {
+    I830Ptr pI830 = I830PTR(scrn);
+    drm_intel_bo *cc_bo, *cc_vp_bo;
+    struct brw_cc_unit_state *cc_state;
+
+    cc_vp_bo = i965_create_cc_vp_state(scrn);
+
+    cc_bo = drm_intel_bo_alloc(pI830->bufmgr, "textured video cc state",
+			       4096, 4096);
+    drm_intel_bo_map(cc_bo, TRUE);
+    cc_state = cc_bo->virtual;
+
     /* Color calculator state */
     memset(cc_state, 0, sizeof(*cc_state));
     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
@@ -504,13 +543,21 @@ i965_set_cc_state(ScrnInfoPtr scnr, struct brw_cc_unit_state *cc_state,
     cc_state->cc3.ia_blend_enable = 1;  /* blend alpha just like colors */
     cc_state->cc3.blend_enable = 0;     /* disable color blend */
     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
-    cc_state->cc4.cc_viewport_state_offset = cc_viewport_offset >> 5;
+    cc_state->cc4.cc_viewport_state_offset =
+	intel_emit_reloc(cc_bo, offsetof(struct brw_cc_unit_state, cc4),
+			 cc_vp_bo, 0,
+			 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5;
     cc_state->cc5.dither_enable = 0;    /* disable dither */
     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
     cc_state->cc5.statistics_enable = 1;
     cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD;
     cc_state->cc5.ia_src_blend_factor = BRW_BLENDFACTOR_ONE;
     cc_state->cc5.ia_dest_blend_factor = BRW_BLENDFACTOR_ONE;
+
+    drm_intel_bo_unmap(cc_bo);
+
+    drm_intel_bo_unreference(cc_vp_bo);
+    return cc_bo;
 }
 
 void
@@ -534,7 +581,7 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     uint32_t *binding_table;
     Bool first_output = TRUE;
     int dest_surf_offset, src_surf_offset[6], sampler_offset[6], vs_offset;
-    int sf_offset, wm_offset, cc_offset, vb_offset, cc_viewport_offset;
+    int sf_offset, wm_offset, vb_offset;
     int wm_scratch_offset;
     int sf_kernel_offset, ps_kernel_offset, sip_kernel_offset;
     int binding_table_offset;
@@ -631,8 +678,6 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     next_offset = wm_offset + sizeof(struct brw_wm_unit_state);
     wm_scratch_offset = ALIGN(next_offset, 1024);
     next_offset = wm_scratch_offset + 1024 * PS_MAX_THREADS;
-    cc_offset = ALIGN(next_offset, 32);
-    next_offset = cc_offset + sizeof(struct brw_cc_unit_state);
 
     sf_kernel_offset = ALIGN(next_offset, 64);
     next_offset = sf_kernel_offset + sizeof (sf_kernel_static);
@@ -640,8 +685,6 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     next_offset = ps_kernel_offset + ps_kernel_static_size;
     sip_kernel_offset = ALIGN(next_offset, 64);
     next_offset = sip_kernel_offset + sizeof (sip_kernel_static);
-    cc_viewport_offset = ALIGN(next_offset, 32);
-    next_offset = cc_viewport_offset + sizeof(struct brw_cc_viewport);
 
     for (src_surf = 0; src_surf < n_src_surf; src_surf++) {    
 	sampler_offset[src_surf] = ALIGN(next_offset, 32);
@@ -682,11 +725,9 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     ErrorF("vs:            0x%08x\n", state_base_offset + vs_offset);
     ErrorF("wm:            0x%08x\n", state_base_offset + wm_offset);
     ErrorF("sf:            0x%08x\n", state_base_offset + sf_offset);
-    ErrorF("cc:            0x%08x\n", state_base_offset + cc_offset);
     ErrorF("sf kernel:     0x%08x\n", state_base_offset + sf_kernel_offset);
     ErrorF("ps kernel:     0x%08x\n", state_base_offset + ps_kernel_offset);
     ErrorF("sip kernel:    0x%08x\n", state_base_offset + sip_kernel_offset);
-    ErrorF("cc_vp:         0x%08x\n", state_base_offset + cc_viewport_offset);
     ErrorF("src sampler:   0x%08x\n", state_base_offset + sampler_offset);
     ErrorF("vb:            0x%08x\n", state_base_offset + vb_offset);
     ErrorF("dst surf:      0x%08x\n", state_base_offset + dest_surf_offset);
@@ -749,9 +790,9 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 		      state_base_offset + wm_scratch_offset,
 		      state_base_offset + sampler_offset[0],
 		      n_src_surf);
-    i965_set_cc_vp_state(pScrn, (void *)(state_base + cc_viewport_offset));
-    i965_set_cc_state(pScrn, (void *)(state_base + cc_offset),
-		      state_base_offset + cc_viewport_offset);
+
+    if (pI830->video.gen4_cc_bo == NULL)
+	pI830->video.gen4_cc_bo = i965_create_cc_state(pScrn);
 
     {
 	BEGIN_BATCH(2);
@@ -852,7 +893,7 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
        OUT_BATCH(BRW_CLIP_DISABLE);
        OUT_BATCH(state_base_offset + sf_offset);  /* 32 byte aligned */
        OUT_BATCH(state_base_offset + wm_offset);  /* 32 byte aligned */
-       OUT_BATCH(state_base_offset + cc_offset);  /* 64 byte aligned */
+       OUT_RELOC(pI830->video.gen4_cc_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
 
        /* URB fence */
        OUT_BATCH(BRW_URB_FENCE |
@@ -988,3 +1029,14 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     i830_dump_error_state(pScrn);
 #endif
 }
+
+void
+i965_free_video(ScrnInfoPtr scrn)
+{
+    I830Ptr pI830 = I830PTR(scrn);
+
+    drm_intel_bo_unreference(pI830->video.gen4_cc_bo);
+    pI830->video.gen4_cc_bo = NULL;
+    drm_intel_bo_unreference(pI830->video.gen4_cc_vp_bo);
+    pI830->video.gen4_cc_vp_bo = NULL;
+}
commit fadb1555b3dc088cadb6efb51c1bccade313d4ea
Author: Eric Anholt <eric at anholt.net>
Date:   Thu Dec 4 12:53:37 2008 -0800

    Move I965DisplayVideoTextured unit state setup to separate functions.
    (cherry picked from commit 176e92d89fdfb199780014722feab6ac25836dcc)

diff --git a/src/i965_video.c b/src/i965_video.c
index f9a9ff0..648347a 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -301,6 +301,31 @@ i965_post_draw_debug(ScrnInfoPtr scrn)
 #endif
 }
 
+/* For 3D, the VS must have 8, 12, 16, 24, or 32 VUEs allocated to it.
+ * A VUE consists of a 256-bit vertex header followed by the vertex data,
+ * which in our case is 4 floats (128 bits), thus a single 512-bit URB
+ * entry.
+ */
+#define URB_VS_ENTRIES	      8
+#define URB_VS_ENTRY_SIZE     1
+
+#define URB_GS_ENTRIES	      0
+#define URB_GS_ENTRY_SIZE     0
+
+#define URB_CLIP_ENTRIES      0
+#define URB_CLIP_ENTRY_SIZE   0
+
+/* The SF kernel we use outputs only 4 256-bit registers, leading to an
+ * entry size of 2 512-bit URBs.  We don't need to have many entries to
+ * output as we're generally working on large rectangles and don't care
+ * about having WM threads running on different rectangles simultaneously.
+ */
+#define URB_SF_ENTRIES	      1
+#define URB_SF_ENTRY_SIZE     2
+
+#define URB_CS_ENTRIES	      0
+#define URB_CS_ENTRY_SIZE     0
+
 static void
 i965_set_dst_surface_state(ScrnInfoPtr scrn,
 			   struct brw_surface_state *dest_surf_state,
@@ -380,6 +405,114 @@ i965_set_sampler_state(ScrnInfoPtr scrn,
     sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
 }
 
+static void
+i965_set_vs_state(ScrnInfoPtr scrn, struct brw_vs_unit_state *vs_state)
+{
+    /* Set up the vertex shader to be disabled (passthrough) */
+    memset(vs_state, 0, sizeof(*vs_state));
+    vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
+    vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
+    vs_state->vs6.vs_enable = 0;
+    vs_state->vs6.vert_cache_disable = 1;
+}
+
+static void
+i965_set_sf_state(ScrnInfoPtr scrn, struct brw_sf_unit_state *sf_state,
+		  uint32_t sf_kernel_offset)
+{
+    /* Set up the SF kernel to do coord interp: for each attribute,
+     * calculate dA/dx and dA/dy.  Hand these interpolation coefficients
+     * back to SF which then hands pixels off to WM.
+     */
+    memset(sf_state, 0, sizeof(*sf_state));
+    sf_state->thread0.kernel_start_pointer = sf_kernel_offset >> 6;
+    sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
+    sf_state->sf1.single_program_flow = 1; /* XXX */
+    sf_state->sf1.binding_table_entry_count = 0;
+    sf_state->sf1.thread_priority = 0;
+    sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
+    sf_state->sf1.illegal_op_exception_enable = 1;
+    sf_state->sf1.mask_stack_exception_enable = 1;
+    sf_state->sf1.sw_exception_enable = 1;
+    sf_state->thread2.per_thread_scratch_space = 0;
+    /* scratch space is not used in our kernel */
+    sf_state->thread2.scratch_space_base_pointer = 0;
+    sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
+    sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
+    sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
+    sf_state->thread3.urb_entry_read_offset = 0;
+    sf_state->thread3.dispatch_grf_start_reg = 3;
+    sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
+    sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
+    sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
+    sf_state->thread4.stats_enable = 1;
+    sf_state->sf5.viewport_transform = FALSE; /* skip viewport */
+    sf_state->sf6.cull_mode = BRW_CULLMODE_NONE;
+    sf_state->sf6.scissor = 0;
+    sf_state->sf7.trifan_pv = 2;
+    sf_state->sf6.dest_org_vbias = 0x8;
+    sf_state->sf6.dest_org_hbias = 0x8;
+}
+
+static void
+i965_set_wm_state(ScrnInfoPtr scrn, struct brw_wm_unit_state *wm_state,
+		  uint32_t ps_kernel_offset, uint32_t wm_scratch_offset,
+		  uint32_t sampler_offset, int n_src_surf)
+{
+    memset(wm_state, 0, sizeof (*wm_state));
+    wm_state->thread0.kernel_start_pointer = ps_kernel_offset >> 6;
+    wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
+    wm_state->thread1.single_program_flow = 1; /* XXX */
+    wm_state->thread1.binding_table_entry_count = 1 + n_src_surf;
+    /* Though we never use the scratch space in our WM kernel, it has to be
+     * set, and the minimum allocation is 1024 bytes.
+     */
+    wm_state->thread2.scratch_space_base_pointer = wm_scratch_offset >> 10;
+    wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
+    wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */
+    wm_state->thread3.const_urb_entry_read_length = 0;
+    wm_state->thread3.const_urb_entry_read_offset = 0;
+    wm_state->thread3.urb_entry_read_length = 1; /* XXX */
+    wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
+    wm_state->wm4.stats_enable = 1;
+    wm_state->wm4.sampler_state_pointer = sampler_offset >> 5;
+    wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */
+    wm_state->wm5.max_threads = PS_MAX_THREADS - 1;
+    wm_state->wm5.thread_dispatch_enable = 1;
+    wm_state->wm5.enable_16_pix = 1;
+    wm_state->wm5.enable_8_pix = 0;
+    wm_state->wm5.early_depth_test = 1;
+}
+
+static void
+i965_set_cc_vp_state(ScrnInfoPtr scrn, struct brw_cc_viewport *cc_viewport)
+{
+    memset (cc_viewport, 0, sizeof (*cc_viewport));
+    cc_viewport->min_depth = -1.e35;
+    cc_viewport->max_depth = 1.e35;
+}
+
+static void
+i965_set_cc_state(ScrnInfoPtr scnr, struct brw_cc_unit_state *cc_state,
+		  uint32_t cc_viewport_offset)
+{
+    /* Color calculator state */
+    memset(cc_state, 0, sizeof(*cc_state));
+    cc_state->cc0.stencil_enable = 0;   /* disable stencil */
+    cc_state->cc2.depth_test = 0;       /* disable depth test */
+    cc_state->cc2.logicop_enable = 1;   /* enable logic op */
+    cc_state->cc3.ia_blend_enable = 1;  /* blend alpha just like colors */
+    cc_state->cc3.blend_enable = 0;     /* disable color blend */
+    cc_state->cc3.alpha_test = 0;       /* disable alpha test */
+    cc_state->cc4.cc_viewport_state_offset = cc_viewport_offset >> 5;
+    cc_state->cc5.dither_enable = 0;    /* disable dither */
+    cc_state->cc5.logicop_func = 0xc;   /* WHITE */
+    cc_state->cc5.statistics_enable = 1;
+    cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD;
+    cc_state->cc5.ia_src_blend_factor = BRW_BLENDFACTOR_ONE;
+    cc_state->cc5.ia_dest_blend_factor = BRW_BLENDFACTOR_ONE;
+}
+
 void
 I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 			 RegionPtr dstRegion,
@@ -397,19 +530,10 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     int urb_clip_start, urb_clip_size;
     int urb_sf_start, urb_sf_size;
     int urb_cs_start, urb_cs_size;
-    struct brw_vs_unit_state *vs_state;
-    struct brw_sf_unit_state *sf_state;
-    struct brw_wm_unit_state *wm_state;
-    struct brw_cc_unit_state *cc_state;
-    struct brw_cc_viewport *cc_viewport;
-    struct brw_instruction *sf_kernel;
-    struct brw_instruction *ps_kernel;
-    struct brw_instruction *sip_kernel;
-    float *vb;
     float src_scale_x, src_scale_y;
     uint32_t *binding_table;
     Bool first_output = TRUE;
-    int dest_surf_offset, src_surf_offset[6], src_sampler_offset[6], vs_offset;
+    int dest_surf_offset, src_surf_offset[6], sampler_offset[6], vs_offset;
     int sf_offset, wm_offset, cc_offset, vb_offset, cc_viewport_offset;
     int wm_scratch_offset;
     int sf_kernel_offset, ps_kernel_offset, sip_kernel_offset;
@@ -500,15 +624,15 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 
     /* Set up our layout of state in framebuffer.  First the general state: */
     vs_offset = ALIGN(next_offset, 64);
-    next_offset = vs_offset + sizeof(*vs_state);
+    next_offset = vs_offset + sizeof(struct brw_vs_unit_state);
     sf_offset = ALIGN(next_offset, 32);
-    next_offset = sf_offset + sizeof(*sf_state);
+    next_offset = sf_offset + sizeof(struct brw_sf_unit_state);
     wm_offset = ALIGN(next_offset, 32);
-    next_offset = wm_offset + sizeof(*wm_state);
+    next_offset = wm_offset + sizeof(struct brw_wm_unit_state);
     wm_scratch_offset = ALIGN(next_offset, 1024);
     next_offset = wm_scratch_offset + 1024 * PS_MAX_THREADS;
     cc_offset = ALIGN(next_offset, 32);
-    next_offset = cc_offset + sizeof(*cc_state);
+    next_offset = cc_offset + sizeof(struct brw_cc_unit_state);
 
     sf_kernel_offset = ALIGN(next_offset, 64);
     next_offset = sf_kernel_offset + sizeof (sf_kernel_static);
@@ -517,11 +641,11 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     sip_kernel_offset = ALIGN(next_offset, 64);
     next_offset = sip_kernel_offset + sizeof (sip_kernel_static);
     cc_viewport_offset = ALIGN(next_offset, 32);
-    next_offset = cc_viewport_offset + sizeof(*cc_viewport);
+    next_offset = cc_viewport_offset + sizeof(struct brw_cc_viewport);
 
     for (src_surf = 0; src_surf < n_src_surf; src_surf++) {    
-	src_sampler_offset[src_surf] = ALIGN(next_offset, 32);
-	next_offset = src_sampler_offset[src_surf] + sizeof(struct brw_sampler_state);
+	sampler_offset[src_surf] = ALIGN(next_offset, 32);
+	next_offset = sampler_offset[src_surf] + sizeof(struct brw_sampler_state);
     }
     
     /* Align VB to native size of elements, for safety */
@@ -551,21 +675,8 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     state_base_offset = ALIGN(state_base_offset, 64);
 
     state_base = (char *)(pI830->FbBase + state_base_offset);
-    /* Set up our pointers to state structures in framebuffer.  It would
-     * probably be a good idea to fill these structures out in system memory
-     * and then dump them there, instead.
-     */
-    vs_state = (void *)(state_base + vs_offset);
-    sf_state = (void *)(state_base + sf_offset);
-    wm_state = (void *)(state_base + wm_offset);
-    cc_state = (void *)(state_base + cc_offset);
-    sf_kernel = (void *)(state_base + sf_kernel_offset);
-    ps_kernel = (void *)(state_base + ps_kernel_offset);
-    sip_kernel = (void *)(state_base + sip_kernel_offset);
-
-    cc_viewport = (void *)(state_base + cc_viewport_offset);
+
     binding_table = (void *)(state_base + binding_table_offset);
-    vb = (void *)(state_base + vb_offset);
 
 #if 0
     ErrorF("vs:            0x%08x\n", state_base_offset + vs_offset);
@@ -576,38 +687,13 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     ErrorF("ps kernel:     0x%08x\n", state_base_offset + ps_kernel_offset);
     ErrorF("sip kernel:    0x%08x\n", state_base_offset + sip_kernel_offset);
     ErrorF("cc_vp:         0x%08x\n", state_base_offset + cc_viewport_offset);
-    ErrorF("src sampler:   0x%08x\n", state_base_offset + src_sampler_offset);
+    ErrorF("src sampler:   0x%08x\n", state_base_offset + sampler_offset);
     ErrorF("vb:            0x%08x\n", state_base_offset + vb_offset);
     ErrorF("dst surf:      0x%08x\n", state_base_offset + dest_surf_offset);
     ErrorF("src surf:      0x%08x\n", state_base_offset + src_surf_offset);
     ErrorF("binding table: 0x%08x\n", state_base_offset + binding_table_offset);
 #endif
 
-    /* For 3D, the VS must have 8, 12, 16, 24, or 32 VUEs allocated to it.
-     * A VUE consists of a 256-bit vertex header followed by the vertex data,
-     * which in our case is 4 floats (128 bits), thus a single 512-bit URB
-     * entry.
-     */
-#define URB_VS_ENTRIES	      8
-#define URB_VS_ENTRY_SIZE     1
-
-#define URB_GS_ENTRIES	      0
-#define URB_GS_ENTRY_SIZE     0
-
-#define URB_CLIP_ENTRIES      0
-#define URB_CLIP_ENTRY_SIZE   0
-
-    /* The SF kernel we use outputs only 4 256-bit registers, leading to an
-     * entry size of 2 512-bit URBs.  We don't need to have many entries to
-     * output as we're generally working on large rectangles and don't care
-     * about having WM threads running on different rectangles simultaneously.
-     */
-#define URB_SF_ENTRIES	      1
-#define URB_SF_ENTRY_SIZE     2
-
-#define URB_CS_ENTRIES	      0
-#define URB_CS_ENTRY_SIZE     0
-
     urb_vs_start = 0;
     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
     urb_gs_start = urb_vs_start + urb_vs_size;
@@ -624,31 +710,16 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
      * I830PutImage.
      */
 
-    memset (cc_viewport, 0, sizeof (*cc_viewport));
-    cc_viewport->min_depth = -1.e35;
-    cc_viewport->max_depth = 1.e35;
-
-    /* Color calculator state */
-    memset(cc_state, 0, sizeof(*cc_state));
-    cc_state->cc0.stencil_enable = 0;   /* disable stencil */
-    cc_state->cc2.depth_test = 0;       /* disable depth test */
-    cc_state->cc2.logicop_enable = 1;   /* enable logic op */
-    cc_state->cc3.ia_blend_enable = 1;  /* blend alpha just like colors */
-    cc_state->cc3.blend_enable = 0;     /* disable color blend */
-    cc_state->cc3.alpha_test = 0;       /* disable alpha test */
-    cc_state->cc4.cc_viewport_state_offset = (state_base_offset +
-					      cc_viewport_offset) >> 5;
-    cc_state->cc5.dither_enable = 0;    /* disable dither */
-    cc_state->cc5.logicop_func = 0xc;   /* WHITE */
-    cc_state->cc5.statistics_enable = 1;
-    cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD;
-    cc_state->cc5.ia_src_blend_factor = BRW_BLENDFACTOR_ONE;
-    cc_state->cc5.ia_dest_blend_factor = BRW_BLENDFACTOR_ONE;
-
-    /* Upload system kernel */
-    memcpy (sip_kernel, sip_kernel_static, sizeof (sip_kernel_static));
+    /* Upload kernels */
+    memcpy(state_base + sip_kernel_offset, sip_kernel_static,
+	   sizeof(sip_kernel_static));
+    memcpy(state_base + sf_kernel_offset, sf_kernel_static,
+	   sizeof(sf_kernel_static));
+    memcpy(state_base + ps_kernel_offset, ps_kernel_static,
+	   ps_kernel_static_size);
 
-    i965_set_dst_surface_state(pScrn, (void *)(state_base + dest_surf_offset),
+    i965_set_dst_surface_state(pScrn, (void *)(state_base +
+					       dest_surf_offset),
 			       pPixmap);
 
     for (src_surf = 0; src_surf < n_src_surf; src_surf++)
@@ -661,87 +732,26 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 				   src_pitch[src_surf],
 				   src_surf_format);
 
-    /* Set up a binding table for our two surfaces.  Only the PS will use it */
-    /* XXX: are these offset from the right place? */
-    binding_table[0] = state_base_offset + dest_surf_offset;
-    
-    for (src_surf = 0; src_surf < n_src_surf; src_surf++)
-	binding_table[1 + src_surf] = state_base_offset + src_surf_offset[src_surf];
-
     for (src_surf = 0; src_surf < n_src_surf; src_surf++)
 	i965_set_sampler_state(pScrn, (void *)(state_base +
-					       src_sampler_offset[src_surf]));
+					       sampler_offset[src_surf]));
 
-    /* Set up the vertex shader to be disabled (passthrough) */
-    memset(vs_state, 0, sizeof(*vs_state));
-    vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
-    vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
-    vs_state->vs6.vs_enable = 0;
-    vs_state->vs6.vert_cache_disable = 1;
-
-    /* Set up the SF kernel to do coord interp: for each attribute,
-     * calculate dA/dx and dA/dy.  Hand these interpolation coefficients
-     * back to SF which then hands pixels off to WM.
-     */
-
-    memcpy (sf_kernel, sf_kernel_static, sizeof (sf_kernel_static));
-    memset(sf_state, 0, sizeof(*sf_state));
-    sf_state->thread0.kernel_start_pointer =
-	(state_base_offset + sf_kernel_offset) >> 6;
-    sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
-    sf_state->sf1.single_program_flow = 1; /* XXX */
-    sf_state->sf1.binding_table_entry_count = 0;
-    sf_state->sf1.thread_priority = 0;
-    sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
-    sf_state->sf1.illegal_op_exception_enable = 1;
-    sf_state->sf1.mask_stack_exception_enable = 1;
-    sf_state->sf1.sw_exception_enable = 1;
-    sf_state->thread2.per_thread_scratch_space = 0;
-    /* scratch space is not used in our kernel */
-    sf_state->thread2.scratch_space_base_pointer = 0;
-    sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
-    sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
-    sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
-    sf_state->thread3.urb_entry_read_offset = 0;
-    sf_state->thread3.dispatch_grf_start_reg = 3;
-    sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
-    sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
-    sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
-    sf_state->thread4.stats_enable = 1;
-    sf_state->sf5.viewport_transform = FALSE; /* skip viewport */
-    sf_state->sf6.cull_mode = BRW_CULLMODE_NONE;
-    sf_state->sf6.scissor = 0;
-    sf_state->sf7.trifan_pv = 2;
-    sf_state->sf6.dest_org_vbias = 0x8;
-    sf_state->sf6.dest_org_hbias = 0x8;
+    /* Set up a binding table for our surfaces.  Only the PS will use it */
+    binding_table[0] = state_base_offset + dest_surf_offset;
+    for (src_surf = 0; src_surf < n_src_surf; src_surf++)
+	binding_table[1 + src_surf] = state_base_offset + src_surf_offset[src_surf];
 
-    memcpy (ps_kernel, ps_kernel_static, ps_kernel_static_size);
-    memset (wm_state, 0, sizeof (*wm_state));
-    wm_state->thread0.kernel_start_pointer =
-	(state_base_offset + ps_kernel_offset) >> 6;
-    wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
-    wm_state->thread1.single_program_flow = 1; /* XXX */
-    wm_state->thread1.binding_table_entry_count = 1 + n_src_surf;
-    /* Though we never use the scratch space in our WM kernel, it has to be
-     * set, and the minimum allocation is 1024 bytes.
-     */
-    wm_state->thread2.scratch_space_base_pointer = (state_base_offset +
-						    wm_scratch_offset) >> 10;
-    wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
-    wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */
-    wm_state->thread3.const_urb_entry_read_length = 0;
-    wm_state->thread3.const_urb_entry_read_offset = 0;
-    wm_state->thread3.urb_entry_read_length = 1; /* XXX */
-    wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
-    wm_state->wm4.stats_enable = 1;
-    wm_state->wm4.sampler_state_pointer = (state_base_offset +
-					   src_sampler_offset[0]) >> 5;
-    wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */
-    wm_state->wm5.max_threads = PS_MAX_THREADS - 1;
-    wm_state->wm5.thread_dispatch_enable = 1;
-    wm_state->wm5.enable_16_pix = 1;
-    wm_state->wm5.enable_8_pix = 0;
-    wm_state->wm5.early_depth_test = 1;
+    i965_set_vs_state(pScrn, (void *)(state_base + vs_offset));
+    i965_set_sf_state(pScrn, (void *)(state_base + sf_offset),
+		      state_base_offset + sf_kernel_offset);
+    i965_set_wm_state(pScrn, (void *)(state_base + wm_offset),
+		      state_base_offset + ps_kernel_offset,
+		      state_base_offset + wm_scratch_offset,
+		      state_base_offset + sampler_offset[0],
+		      n_src_surf);
+    i965_set_cc_vp_state(pScrn, (void *)(state_base + cc_viewport_offset));
+    i965_set_cc_state(pScrn, (void *)(state_base + cc_offset),
+		      state_base_offset + cc_viewport_offset);
 
     {
 	BEGIN_BATCH(2);
@@ -925,6 +935,7 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 	int box_x2 = pbox->x2;
 	int box_y2 = pbox->y2;
 	int i;
+	float *vb;
 
 	if (!first_output) {
 	    /* Since we use the same little vertex buffer over and over, sync
@@ -935,6 +946,7 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 
 	pbox++;
 
+	vb = (void *)(state_base + vb_offset);
 	i = 0;
 	vb[i++] = (box_x2 - dxo) * src_scale_x;
 	vb[i++] = (box_y2 - dyo) * src_scale_y;
commit 0354ccaf9dccc7ee8cb92fb3ffb983481e45117e
Author: Eric Anholt <eric at anholt.net>
Date:   Thu Dec 4 11:44:35 2008 -0800

    Move I965DisplayVideoTextured surface/sampler setup to separate functions.
    (cherry picked from commit 61929f4c641e2ecb145ad2b22f7092d40e31ae6d)

diff --git a/src/i965_video.c b/src/i965_video.c
index 1900f2a..f9a9ff0 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -301,6 +301,85 @@ i965_post_draw_debug(ScrnInfoPtr scrn)
 #endif
 }
 
+static void
+i965_set_dst_surface_state(ScrnInfoPtr scrn,
+			   struct brw_surface_state *dest_surf_state,
+			   PixmapPtr pixmap)
+{
+    I830Ptr pI830 = I830PTR(scrn);
+
+    memset(dest_surf_state, 0, sizeof(*dest_surf_state));
+    dest_surf_state->ss0.surface_type = BRW_SURFACE_2D;
+    dest_surf_state->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32;
+    if (pI830->cpp == 2) {
+	dest_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
+    } else {
+	dest_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+    }
+    dest_surf_state->ss0.writedisable_alpha = 0;
+    dest_surf_state->ss0.writedisable_red = 0;
+    dest_surf_state->ss0.writedisable_green = 0;
+    dest_surf_state->ss0.writedisable_blue = 0;
+    dest_surf_state->ss0.color_blend = 1;
+    dest_surf_state->ss0.vert_line_stride = 0;
+    dest_surf_state->ss0.vert_line_stride_ofs = 0;
+    dest_surf_state->ss0.mipmap_layout_mode = 0;
+    dest_surf_state->ss0.render_cache_read_mode = 0;
+
+    dest_surf_state->ss1.base_addr = intel_get_pixmap_offset(pixmap);
+    dest_surf_state->ss2.height = scrn->virtualY - 1;
+    dest_surf_state->ss2.width = scrn->virtualX - 1;
+    dest_surf_state->ss2.mip_count = 0;
+    dest_surf_state->ss2.render_target_rotation = 0;
+    dest_surf_state->ss3.pitch = intel_get_pixmap_pitch(pixmap) - 1;
+    dest_surf_state->ss3.tiled_surface = i830_pixmap_tiled(pixmap);
+    dest_surf_state->ss3.tile_walk = 0; /* TileX */
+}
+
+static void
+i965_set_src_surface_state(ScrnInfoPtr scrn,
+			      struct brw_surface_state *src_surf_state,
+			      uint32_t src_offset,
+			      int src_width,
+			      int src_height,
+			      int src_pitch,
+			      uint32_t src_surf_format)
+{
+    /* Set up the source surface state buffer */
+    memset(src_surf_state, 0, sizeof(struct brw_surface_state));
+    src_surf_state->ss0.surface_type = BRW_SURFACE_2D;
+    src_surf_state->ss0.surface_format = src_surf_format;
+    src_surf_state->ss0.writedisable_alpha = 0;
+    src_surf_state->ss0.writedisable_red = 0;
+    src_surf_state->ss0.writedisable_green = 0;
+    src_surf_state->ss0.writedisable_blue = 0;
+    src_surf_state->ss0.color_blend = 1;
+    src_surf_state->ss0.vert_line_stride = 0;
+    src_surf_state->ss0.vert_line_stride_ofs = 0;
+    src_surf_state->ss0.mipmap_layout_mode = 0;
+    src_surf_state->ss0.render_cache_read_mode = 0;
+
+    src_surf_state->ss1.base_addr = src_offset;
+    src_surf_state->ss2.width = src_width - 1;
+    src_surf_state->ss2.height = src_height - 1;
+    src_surf_state->ss2.mip_count = 0;
+    src_surf_state->ss2.render_target_rotation = 0;
+    src_surf_state->ss3.pitch = src_pitch - 1;
+}
+
+static void
+i965_set_sampler_state(ScrnInfoPtr scrn,
+		       struct brw_sampler_state *sampler_state)
+{
+    memset(sampler_state, 0, sizeof(struct brw_sampler_state));
+
+    sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+    sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
+    sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+    sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+    sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+}
+
 void
 I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 			 RegionPtr dstRegion,
@@ -318,9 +397,6 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     int urb_clip_start, urb_clip_size;
     int urb_sf_start, urb_sf_size;
     int urb_cs_start, urb_cs_size;
-    struct brw_surface_state *dest_surf_state;
-    struct brw_surface_state *src_surf_state[6];
-    struct brw_sampler_state *src_sampler_state[6];
     struct brw_vs_unit_state *vs_state;
     struct brw_sf_unit_state *sf_state;
     struct brw_wm_unit_state *wm_state;
@@ -454,7 +530,7 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 
     /* And then the general state: */
     dest_surf_offset = ALIGN(next_offset, 32);
-    next_offset = dest_surf_offset + sizeof(*dest_surf_state);
+    next_offset = dest_surf_offset + sizeof(struct brw_surface_state);
     
     for (src_surf = 0; src_surf < n_src_surf; src_surf++) {
 	src_surf_offset[src_surf] = ALIGN(next_offset, 32);
@@ -488,13 +564,6 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     sip_kernel = (void *)(state_base + sip_kernel_offset);
 
     cc_viewport = (void *)(state_base + cc_viewport_offset);
-    dest_surf_state = (void *)(state_base + dest_surf_offset);
-    
-    for (src_surf = 0; src_surf < n_src_surf; src_surf++) 
-    {
-	src_surf_state[src_surf] = (void *)(state_base + src_surf_offset[src_surf]);
-	src_sampler_state[src_surf] = (void *)(state_base + src_sampler_offset[src_surf]);
-    }
     binding_table = (void *)(state_base + binding_table_offset);
     vb = (void *)(state_base + vb_offset);
 
@@ -579,58 +648,18 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     /* Upload system kernel */
     memcpy (sip_kernel, sip_kernel_static, sizeof (sip_kernel_static));
 
-    /* Set up the state buffer for the destination surface */
-    memset(dest_surf_state, 0, sizeof(*dest_surf_state));
-    dest_surf_state->ss0.surface_type = BRW_SURFACE_2D;
-    dest_surf_state->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32;
-    if (pI830->cpp == 2) {
-	dest_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
-    } else {
-	dest_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
-    }
-    dest_surf_state->ss0.writedisable_alpha = 0;
-    dest_surf_state->ss0.writedisable_red = 0;
-    dest_surf_state->ss0.writedisable_green = 0;
-    dest_surf_state->ss0.writedisable_blue = 0;
-    dest_surf_state->ss0.color_blend = 1;
-    dest_surf_state->ss0.vert_line_stride = 0;
-    dest_surf_state->ss0.vert_line_stride_ofs = 0;
-    dest_surf_state->ss0.mipmap_layout_mode = 0;
-    dest_surf_state->ss0.render_cache_read_mode = 0;
-
-    dest_surf_state->ss1.base_addr = intel_get_pixmap_offset(pPixmap);
-    dest_surf_state->ss2.height = pScrn->virtualY - 1;
-    dest_surf_state->ss2.width = pScrn->virtualX - 1;
-    dest_surf_state->ss2.mip_count = 0;
-    dest_surf_state->ss2.render_target_rotation = 0;
-    dest_surf_state->ss3.pitch = intel_get_pixmap_pitch(pPixmap) - 1;
-    dest_surf_state->ss3.tiled_surface = i830_pixmap_tiled(pPixmap);
-    dest_surf_state->ss3.tile_walk = 0; /* TileX */
+    i965_set_dst_surface_state(pScrn, (void *)(state_base + dest_surf_offset),
+			       pPixmap);
 
     for (src_surf = 0; src_surf < n_src_surf; src_surf++)
-    {
-	/* Set up the source surface state buffer */
-	memset(src_surf_state[src_surf], 0, sizeof(struct brw_surface_state));
-	src_surf_state[src_surf]->ss0.surface_type = BRW_SURFACE_2D;
-	src_surf_state[src_surf]->ss0.surface_format = src_surf_format;
-	src_surf_state[src_surf]->ss0.writedisable_alpha = 0;
-	src_surf_state[src_surf]->ss0.writedisable_red = 0;
-	src_surf_state[src_surf]->ss0.writedisable_green = 0;
-	src_surf_state[src_surf]->ss0.writedisable_blue = 0;
-	src_surf_state[src_surf]->ss0.color_blend = 1;
-	src_surf_state[src_surf]->ss0.vert_line_stride = 0;
-	src_surf_state[src_surf]->ss0.vert_line_stride_ofs = 0;
-	src_surf_state[src_surf]->ss0.mipmap_layout_mode = 0;
-	src_surf_state[src_surf]->ss0.render_cache_read_mode = 0;
-    
-	src_surf_state[src_surf]->ss1.base_addr = src_surf_base[src_surf];
-	src_surf_state[src_surf]->ss2.width = src_width[src_surf] - 1;
-	src_surf_state[src_surf]->ss2.height = src_height[src_surf] - 1;
-	src_surf_state[src_surf]->ss2.mip_count = 0;
-	src_surf_state[src_surf]->ss2.render_target_rotation = 0;
-	src_surf_state[src_surf]->ss3.pitch = src_pitch[src_surf] - 1;
-    }
-    /* FIXME: account for tiling if we ever do it */
+	i965_set_src_surface_state(pScrn,
+				   (void *)(state_base +
+					    src_surf_offset[src_surf]),
+				   src_surf_base[src_surf],
+				   src_width[src_surf],
+				   src_height[src_surf],
+				   src_pitch[src_surf],
+				   src_surf_format);
 
     /* Set up a binding table for our two surfaces.  Only the PS will use it */
     /* XXX: are these offset from the right place? */
@@ -639,17 +668,9 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     for (src_surf = 0; src_surf < n_src_surf; src_surf++)
 	binding_table[1 + src_surf] = state_base_offset + src_surf_offset[src_surf];
 
-    /* Set up the packed YUV source sampler.  Doesn't do colorspace conversion.
-     */
     for (src_surf = 0; src_surf < n_src_surf; src_surf++)
-    {
-	memset(src_sampler_state[src_surf], 0, sizeof(struct brw_sampler_state));
-	src_sampler_state[src_surf]->ss0.min_filter = BRW_MAPFILTER_LINEAR;
-	src_sampler_state[src_surf]->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
-	src_sampler_state[src_surf]->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
-	src_sampler_state[src_surf]->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
-	src_sampler_state[src_surf]->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
-    }
+	i965_set_sampler_state(pScrn, (void *)(state_base +
+					       src_sampler_offset[src_surf]));
 
     /* Set up the vertex shader to be disabled (passthrough) */
     memset(vs_state, 0, sizeof(*vs_state));
commit 46ddcb4e7399bd94c86fab38b2d0c8d823e97e5d
Author: Eric Anholt <eric at anholt.net>
Date:   Thu Dec 4 11:37:38 2008 -0800

    Move debug code for I965DisplayVideoTextured to separate functions.
    (cherry picked from commit 73d03cb93ca761ee555b87558882e26bd2f10d91)

diff --git a/src/i965_video.c b/src/i965_video.c
index 6c295a6..1900f2a 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -154,6 +154,153 @@ brw_debug (ScrnInfoPtr pScrn, char *when)
 #define WATCH_WIZ 0
 #define WATCH_STATS 0
 
+static void
+i965_pre_draw_debug(ScrnInfoPtr scrn)
+{
+#if 0
+    I830Ptr pI830 = I830PTR(scrn);
+#endif
+
+#if 0
+    ErrorF ("before EU_ATT 0x%08x%08x EU_ATT_DATA 0x%08x%08x\n",
+	    INREG(BRW_EU_ATT_1), INREG(BRW_EU_ATT_0),
+	    INREG(BRW_EU_ATT_DATA_1), INREG(BRW_EU_ATT_DATA_0));
+
+    OUTREG(BRW_VF_CTL,
+	   BRW_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID |
+	   BRW_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX |
+	   BRW_VF_CTL_SNAPSHOT_ENABLE);
+    OUTREG(BRW_VF_STRG_VAL, 0);
+#endif
+
+#if 0
+    OUTREG(BRW_VS_CTL,
+	   BRW_VS_CTL_SNAPSHOT_ALL_THREADS |
+	   BRW_VS_CTL_SNAPSHOT_MUX_VALID_COUNT |
+	   BRW_VS_CTL_THREAD_SNAPSHOT_ENABLE);
+
+    OUTREG(BRW_VS_STRG_VAL, 0);
+#endif
+
+#if WATCH_SF
+    OUTREG(BRW_SF_CTL,
+	   BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT |
+	   BRW_SF_CTL_SNAPSHOT_ALL_THREADS |
+	   BRW_SF_CTL_THREAD_SNAPSHOT_ENABLE);
+    OUTREG(BRW_SF_STRG_VAL, 0);
+#endif
+
+#if WATCH_WIZ
+    OUTREG(BRW_WIZ_CTL,
+	   BRW_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE |
+	   BRW_WIZ_CTL_SNAPSHOT_ALL_THREADS |
+	   BRW_WIZ_CTL_SNAPSHOT_ENABLE);
+    OUTREG(BRW_WIZ_STRG_VAL,
+	   (box_x1) | (box_y1 << 16));
+#endif
+
+#if 0
+    OUTREG(BRW_TS_CTL,
+	   BRW_TS_CTL_SNAPSHOT_MESSAGE_ERROR |
+	   BRW_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS |
+	   BRW_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS |
+	   BRW_TS_CTL_SNAPSHOT_ENABLE);
+#endif
+}
+
+static void
+i965_post_draw_debug(ScrnInfoPtr scrn)
+{
+#if 0
+    I830Ptr pI830 = I830PTR(scrn);
+#endif
+
+#if 0
+    for (j = 0; j < 100000; j++) {
+	ctl = INREG(BRW_VF_CTL);
+	if (ctl & BRW_VF_CTL_SNAPSHOT_COMPLETE)
+	    break;
+    }
+
+    rdata = INREG(BRW_VF_RDATA);
+    OUTREG(BRW_VF_CTL, 0);
+    ErrorF ("VF_CTL: 0x%08x VF_RDATA: 0x%08x\n", ctl, rdata);
+#endif
+
+#if 0
+    for (j = 0; j < 1000000; j++) {
+	ctl = INREG(BRW_VS_CTL);
+	if (ctl & BRW_VS_CTL_SNAPSHOT_COMPLETE)
+	    break;
+    }
+
+    rdata = INREG(BRW_VS_RDATA);
+    for (k = 0; k <= 3; k++) {
+	OUTREG(BRW_VS_CTL,
+	       BRW_VS_CTL_SNAPSHOT_COMPLETE |
+	       (k << 8));
+	rdata = INREG(BRW_VS_RDATA);
+	ErrorF ("VS_CTL: 0x%08x VS_RDATA(%d): 0x%08x\n", ctl, k, rdata);
+    }
+
+    OUTREG(BRW_VS_CTL, 0);
+#endif
+
+#if WATCH_SF
+    for (j = 0; j < 1000000; j++) {
+	ctl = INREG(BRW_SF_CTL);
+	if (ctl & BRW_SF_CTL_SNAPSHOT_COMPLETE)
+	    break;
+    }
+
+    for (k = 0; k <= 7; k++) {
+	OUTREG(BRW_SF_CTL,
+	       BRW_SF_CTL_SNAPSHOT_COMPLETE |
+	       (k << 8));
+	rdata = INREG(BRW_SF_RDATA);
+	ErrorF("SF_CTL: 0x%08x SF_RDATA(%d): 0x%08x\n", ctl, k, rdata);
+    }
+
+    OUTREG(BRW_SF_CTL, 0);
+#endif
+
+#if WATCH_WIZ
+    for (j = 0; j < 100000; j++) {
+	ctl = INREG(BRW_WIZ_CTL);
+	if (ctl & BRW_WIZ_CTL_SNAPSHOT_COMPLETE)
+	    break;
+    }
+
+    rdata = INREG(BRW_WIZ_RDATA);
+    OUTREG(BRW_WIZ_CTL, 0);
+    ErrorF("WIZ_CTL: 0x%08x WIZ_RDATA: 0x%08x\n", ctl, rdata);
+#endif
+
+#if 0
+    for (j = 0; j < 100000; j++) {
+	ctl = INREG(BRW_TS_CTL);
+	if (ctl & BRW_TS_CTL_SNAPSHOT_COMPLETE)
+	    break;
+    }
+
+    rdata = INREG(BRW_TS_RDATA);
+    OUTREG(BRW_TS_CTL, 0);
+    ErrorF("TS_CTL: 0x%08x TS_RDATA: 0x%08x\n", ctl, rdata);
+
+    ErrorF("after EU_ATT 0x%08x%08x EU_ATT_DATA 0x%08x%08x\n",
+	   INREG(BRW_EU_ATT_1), INREG(BRW_EU_ATT_0),
+	   INREG(BRW_EU_ATT_DATA_1), INREG(BRW_EU_ATT_DATA_0));
+#endif
+
+#if 0
+    for (j = 0; j < 256; j++) {
+	OUTREG(BRW_TD_CTL, j << BRW_TD_CTL_MUX_SHIFT);
+	rdata = INREG(BRW_TD_RDATA);
+	ErrorF ("TD_RDATA(%d): 0x%08x\n", j, rdata);
+    }
+#endif
+}
+
 void
 I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 			 RegionPtr dstRegion,
@@ -783,51 +930,7 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 	vb[i++] = (float) box_x1 + pix_xoff;
 	vb[i++] = (float) box_y1 + pix_yoff;
 
-#if 0
-	ErrorF ("before EU_ATT 0x%08x%08x EU_ATT_DATA 0x%08x%08x\n",
-		INREG(BRW_EU_ATT_1), INREG(BRW_EU_ATT_0),
-		INREG(BRW_EU_ATT_DATA_1), INREG(BRW_EU_ATT_DATA_0));
-
-	OUTREG(BRW_VF_CTL,
-	       BRW_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID |
-	       BRW_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX |
-	       BRW_VF_CTL_SNAPSHOT_ENABLE);
-	OUTREG(BRW_VF_STRG_VAL, 0);
-#endif
-
-#if 0
-	OUTREG(BRW_VS_CTL,
-	       BRW_VS_CTL_SNAPSHOT_ALL_THREADS |
-	       BRW_VS_CTL_SNAPSHOT_MUX_VALID_COUNT |
-	       BRW_VS_CTL_THREAD_SNAPSHOT_ENABLE);
-
-	OUTREG(BRW_VS_STRG_VAL, 0);
-#endif
-
-#if WATCH_SF
-	OUTREG(BRW_SF_CTL,
-	       BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT |
-	       BRW_SF_CTL_SNAPSHOT_ALL_THREADS |
-	       BRW_SF_CTL_THREAD_SNAPSHOT_ENABLE);
-	OUTREG(BRW_SF_STRG_VAL, 0);
-#endif
-
-#if WATCH_WIZ
-	OUTREG(BRW_WIZ_CTL,
-	       BRW_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE |
-	       BRW_WIZ_CTL_SNAPSHOT_ALL_THREADS |
-	       BRW_WIZ_CTL_SNAPSHOT_ENABLE);
-	OUTREG(BRW_WIZ_STRG_VAL,
-	       (box_x1) | (box_y1 << 16));
-#endif
-
-#if 0
-	OUTREG(BRW_TS_CTL,
-	       BRW_TS_CTL_SNAPSHOT_MESSAGE_ERROR |
-	       BRW_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS |
-	       BRW_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS |
-	       BRW_TS_CTL_SNAPSHOT_ENABLE);
-#endif
+	i965_pre_draw_debug(pScrn);
 
 	BEGIN_BATCH(6);
 	OUT_BATCH(BRW_3DPRIMITIVE |
@@ -842,90 +945,8 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 	OUT_BATCH(0); /* index buffer offset, ignored */
 	ADVANCE_BATCH();
 
-#if 0
-	for (j = 0; j < 100000; j++) {
-	    ctl = INREG(BRW_VF_CTL);
-	    if (ctl & BRW_VF_CTL_SNAPSHOT_COMPLETE)
-		break;
-	}
+	i965_post_draw_debug(pScrn);
 
-	rdata = INREG(BRW_VF_RDATA);
-	OUTREG(BRW_VF_CTL, 0);
-	ErrorF ("VF_CTL: 0x%08x VF_RDATA: 0x%08x\n", ctl, rdata);
-#endif
-
-#if 0
-	for (j = 0; j < 1000000; j++) {
-	    ctl = INREG(BRW_VS_CTL);
-	    if (ctl & BRW_VS_CTL_SNAPSHOT_COMPLETE)
-		break;
-	}
-
-	rdata = INREG(BRW_VS_RDATA);
-	for (k = 0; k <= 3; k++) {
-	    OUTREG(BRW_VS_CTL,
-		   BRW_VS_CTL_SNAPSHOT_COMPLETE |
-		   (k << 8));
-	    rdata = INREG(BRW_VS_RDATA);
-	    ErrorF ("VS_CTL: 0x%08x VS_RDATA(%d): 0x%08x\n", ctl, k, rdata);
-	}
-
-	OUTREG(BRW_VS_CTL, 0);
-#endif
-
-#if WATCH_SF
-	for (j = 0; j < 1000000; j++) {
-	    ctl = INREG(BRW_SF_CTL);
-	    if (ctl & BRW_SF_CTL_SNAPSHOT_COMPLETE)
-		break;
-	}
-
-	for (k = 0; k <= 7; k++) {
-	    OUTREG(BRW_SF_CTL,
-		   BRW_SF_CTL_SNAPSHOT_COMPLETE |
-		   (k << 8));
-	    rdata = INREG(BRW_SF_RDATA);
-	    ErrorF("SF_CTL: 0x%08x SF_RDATA(%d): 0x%08x\n", ctl, k, rdata);
-	}
-
-	OUTREG(BRW_SF_CTL, 0);
-#endif
-
-#if WATCH_WIZ
-	for (j = 0; j < 100000; j++) {
-	    ctl = INREG(BRW_WIZ_CTL);
-	    if (ctl & BRW_WIZ_CTL_SNAPSHOT_COMPLETE)
-		break;
-	}
-
-	rdata = INREG(BRW_WIZ_RDATA);
-	OUTREG(BRW_WIZ_CTL, 0);
-	ErrorF("WIZ_CTL: 0x%08x WIZ_RDATA: 0x%08x\n", ctl, rdata);
-#endif
-
-#if 0
-	for (j = 0; j < 100000; j++) {
-	    ctl = INREG(BRW_TS_CTL);
-	    if (ctl & BRW_TS_CTL_SNAPSHOT_COMPLETE)
-		break;
-	}
-
-	rdata = INREG(BRW_TS_RDATA);
-	OUTREG(BRW_TS_CTL, 0);
-	ErrorF("TS_CTL: 0x%08x TS_RDATA: 0x%08x\n", ctl, rdata);
-
-	ErrorF("after EU_ATT 0x%08x%08x EU_ATT_DATA 0x%08x%08x\n",
-	       INREG(BRW_EU_ATT_1), INREG(BRW_EU_ATT_0),
-	       INREG(BRW_EU_ATT_DATA_1), INREG(BRW_EU_ATT_DATA_0));
-#endif
-
-#if 0
-	for (j = 0; j < 256; j++) {
-	    OUTREG(BRW_TD_CTL, j << BRW_TD_CTL_MUX_SHIFT);
-	    rdata = INREG(BRW_TD_RDATA);
-	    ErrorF ("TD_RDATA(%d): 0x%08x\n", j, rdata);
-	}
-#endif
 	first_output = FALSE;
 	i830MarkSync(pScrn);
     }


More information about the xorg-commit mailing list