xf86-video-intel: 9 commits - src/i830.h src/i965_render.c src/i965_video.c

Eric Anholt anholt at kemper.freedesktop.org
Tue Jan 20 11:34:23 PST 2009


 src/i830.h        |   17 ++
 src/i965_render.c |  402 +++++++++++++++++++++++++-----------------------------
 src/i965_video.c  |  390 ++++++++++++++++++++++++++--------------------------
 3 files changed, 409 insertions(+), 400 deletions(-)

New commits:
commit b6f3ce32e295929f461a7bc37e61f126fb51e4aa
Author: Eric Anholt <eric at anholt.net>
Date:   Tue Jan 20 10:45:23 2009 -0800

    Use drm_intel_bo_subdata to put render vb data in.
    
    This improves performance by avoiding repeated map/unmap cycles, which are
    a bit expensive on my machine with lock debugging on in the kernel.  It could
    do much better if we did more than 18 or so floats at a time.

diff --git a/src/i965_render.c b/src/i965_render.c
index 1d63eb8..0d7d8f3 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -1395,12 +1395,6 @@ i965_get_vb_space(ScrnInfoPtr pScrn)
 	render_state->vb_offset = 0;
     }
 
-    /* Map the vertex_buffer buffer object so we can write to it. */
-    if (drm_intel_bo_map(render_state->vertex_buffer_bo, 1) != 0) {
-	ErrorF("i965_get_vb_space(): couldn't map vb\n");
-	return NULL;
-    }
-
     drm_intel_bo_reference(render_state->vertex_buffer_bo);
     return render_state->vertex_buffer_bo;
 }
@@ -1416,7 +1410,7 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
     float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
     int i;
     drm_intel_bo *vb_bo;
-    float *vb;
+    float vb[18];
     Bool is_affine = render_state->composite_op.is_affine;
 
     if (is_affine)
@@ -1492,8 +1486,7 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
     vb_bo = i965_get_vb_space(pScrn);
     if (vb_bo == NULL)
 	return;
-    vb = vb_bo->virtual;
-    i = render_state->vb_offset;
+    i = 0;
     /* rect (x2,y2) */
     vb[i++] = (float)(dstX + w);
     vb[i++] = (float)(dstY + h);
@@ -1536,7 +1529,7 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
 	    vb[i++] = mask_w[0];
     }
     assert (i <= VERTEX_BUFFER_SIZE);
-    drm_intel_bo_unmap(vb_bo);
+    drm_intel_bo_subdata(vb_bo, render_state->vb_offset * 4, i * 4, vb);
 
     if (!i965_composite_check_aperture(pScrn))
 	intel_batch_flush(pScrn, FALSE);
@@ -1568,7 +1561,7 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
     OUT_BATCH(0); /* index buffer offset, ignored */
     ADVANCE_BATCH();
 
-    render_state->vb_offset = i;
+    render_state->vb_offset += i;
     drm_intel_bo_unreference(vb_bo);
 
     intel_batch_end_atomic(pScrn);
commit 9a8bbb1951ad0ca0a9407a97348fc7fa03127900
Author: Eric Anholt <eric at anholt.net>
Date:   Mon Jan 19 19:34:50 2009 -0800

    Move i965 render vb setup to use time, and decouple state emit from it.
    
    The require_space had failed since it only checked for the space required
    by the batch emits in the function itself, but not in the
    i965_emit_composite_state() that it called (the state we were concerned about
    having set up for that 12 * 4 dwords to follow!).  This is replaced by
    intel_batch_start_atomic(), which will catch such mistakes in the future.

diff --git a/src/i965_render.c b/src/i965_render.c
index eb5f76a..1d63eb8 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -520,6 +520,8 @@ struct gen4_render_state {
 
     int vb_offset;
     int vertex_size;
+
+    Bool needs_state_emit;
 };
 
 /**
@@ -962,12 +964,7 @@ i965_emit_composite_state(ScrnInfoPtr pScrn)
     uint32_t src_blend, dst_blend;
     dri_bo *binding_table_bo = composite_op->binding_table_bo;
 
-    if (render_state->vertex_buffer_bo == NULL) {
-	render_state->vertex_buffer_bo = dri_bo_alloc (pI830->bufmgr, "vb",
-						       sizeof (gen4_vertex_buffer),
-						       4096);
-	render_state->vb_offset = 0;
-    }
+    render_state->needs_state_emit = FALSE;
 
     IntelEmitInvarientState(pScrn);
     *pI830->last_3d = LAST_3D_RENDER;
@@ -1370,11 +1367,44 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
 	    i830_transform_is_affine(pI830->transform[1]);
     }
 
-    i965_emit_composite_state(pScrn);
+    render_state->needs_state_emit = TRUE;
 
     return TRUE;
 }
 
+static drm_intel_bo *
+i965_get_vb_space(ScrnInfoPtr pScrn)
+{
+    I830Ptr pI830 = I830PTR(pScrn);
+    struct gen4_render_state *render_state = pI830->gen4_render_state;
+
+    /* If the vertex buffer is too full, then we free the old and a new one
+     * gets made.
+     */
+    if (render_state->vb_offset + VERTEX_FLOATS_PER_COMPOSITE >
+	VERTEX_BUFFER_SIZE) {
+	drm_intel_bo_unreference(render_state->vertex_buffer_bo);
+	render_state->vertex_buffer_bo = NULL;
+    }
+
+    /* Alloc a new vertex buffer if necessary. */
+    if (render_state->vertex_buffer_bo == NULL) {
+	render_state->vertex_buffer_bo = drm_intel_bo_alloc(pI830->bufmgr, "vb",
+							    sizeof(gen4_vertex_buffer),
+							    4096);
+	render_state->vb_offset = 0;
+    }
+
+    /* Map the vertex_buffer buffer object so we can write to it. */
+    if (drm_intel_bo_map(render_state->vertex_buffer_bo, 1) != 0) {
+	ErrorF("i965_get_vb_space(): couldn't map vb\n");
+	return NULL;
+    }
+
+    drm_intel_bo_reference(render_state->vertex_buffer_bo);
+    return render_state->vertex_buffer_bo;
+}
+
 void
 i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
 	       int dstX, int dstY, int w, int h)
@@ -1385,6 +1415,7 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
     Bool has_mask;
     float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
     int i;
+    drm_intel_bo *vb_bo;
     float *vb;
     Bool is_affine = render_state->composite_op.is_affine;
 
@@ -1458,30 +1489,10 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
 	}
     }
 
-    /* We're about to do a BEGIN_BATCH(12) for the vertex setup. And
-     * we first need to ensure that that's not going to cause a flush
-     * since we need to not flush between setting up our vertices in
-     * the VB and emitting them into the batch. */
-    intel_batch_require_space(pScrn, pI830, 12 * 4);
-
-    /* If the vertex buffer is too full, then we flush and re-emit all
-     * necessary state into the batch for the composite operation. */
-    if (render_state->vb_offset + VERTEX_FLOATS_PER_COMPOSITE > VERTEX_BUFFER_SIZE) {
-	dri_bo_unreference (render_state->vertex_buffer_bo);
-	render_state->vertex_buffer_bo = NULL;
-    }
-
-    if (!i965_composite_check_aperture(pScrn))
-	intel_batch_flush(pScrn, FALSE);
-    if (render_state->vertex_buffer_bo == NULL)
-	i965_emit_composite_state(pScrn);
-
-    /* Map the vertex_buffer buffer object so we can write to it. */
-    if (dri_bo_map (render_state->vertex_buffer_bo, 1) != 0)
-	return;		/* XXX what else to do here? */
-
-    vb = render_state->vertex_buffer_bo->virtual;
-
+    vb_bo = i965_get_vb_space(pScrn);
+    if (vb_bo == NULL)
+	return;
+    vb = vb_bo->virtual;
     i = render_state->vb_offset;
     /* rect (x2,y2) */
     vb[i++] = (float)(dstX + w);
@@ -1525,8 +1536,14 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
 	    vb[i++] = mask_w[0];
     }
     assert (i <= VERTEX_BUFFER_SIZE);
+    drm_intel_bo_unmap(vb_bo);
 
-    dri_bo_unmap (render_state->vertex_buffer_bo);
+    if (!i965_composite_check_aperture(pScrn))
+	intel_batch_flush(pScrn, FALSE);
+
+    intel_batch_start_atomic(pScrn, 200);
+    if (render_state->needs_state_emit)
+	i965_emit_composite_state(pScrn);
 
     BEGIN_BATCH(12);
     OUT_BATCH(MI_FLUSH);
@@ -1535,8 +1552,7 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
     OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) |
 	      VB0_VERTEXDATA |
 	      (render_state->vertex_size << VB0_BUFFER_PITCH_SHIFT));
-    OUT_RELOC(render_state->vertex_buffer_bo, I915_GEM_DOMAIN_VERTEX, 0,
-	      render_state->vb_offset * 4);
+    OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, render_state->vb_offset * 4);
     OUT_BATCH(3);
     OUT_BATCH(0); // ignore for VERTEXDATA, but still there
 
@@ -1553,6 +1569,9 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
     ADVANCE_BATCH();
 
     render_state->vb_offset = i;
+    drm_intel_bo_unreference(vb_bo);
+
+    intel_batch_end_atomic(pScrn);
 
 #ifdef I830DEBUG
     ErrorF("sync after 3dprimitive\n");
@@ -1573,6 +1592,8 @@ i965_batch_flush_notify(ScrnInfoPtr pScrn)
 	dri_bo_unreference (render_state->vertex_buffer_bo);
 	render_state->vertex_buffer_bo = NULL;
     }
+
+    render_state->needs_state_emit = TRUE;
 }
 
 /**
commit 3d739597c4f5817079efd9067ad5db2f4105f765
Author: Eric Anholt <eric at anholt.net>
Date:   Tue Jan 20 10:37:20 2009 -0800

    Move i965 render transform setup from emit_composite_state to prepare_composite.

diff --git a/src/i965_render.c b/src/i965_render.c
index d5eb683..eb5f76a 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -506,6 +506,7 @@ typedef struct gen4_composite_op {
     sampler_state_filter_t mask_filter;
     sampler_state_extend_t src_extend;
     sampler_state_extend_t mask_extend;
+    Bool is_affine;
 } gen4_composite_op;
 
 /** Private data for gen4 render accel implementation. */
@@ -941,10 +942,8 @@ i965_emit_composite_state(ScrnInfoPtr pScrn)
     struct gen4_render_state *render_state= pI830->gen4_render_state;
     gen4_composite_op *composite_op = &render_state->composite_op;
     int op = composite_op->op;
-    PicturePtr pSrcPicture = composite_op->source_picture;
     PicturePtr pMaskPicture = composite_op->mask_picture;
     PicturePtr pDstPicture = composite_op->dest_picture;
-    PixmapPtr pSrc = composite_op->source;
     PixmapPtr pMask = composite_op->mask;
     PixmapPtr pDst = composite_op->dest;
     uint32_t sf_state_offset;
@@ -952,7 +951,7 @@ i965_emit_composite_state(ScrnInfoPtr pScrn)
     sampler_state_filter_t mask_filter = composite_op->mask_filter;
     sampler_state_extend_t src_extend = composite_op->src_extend;
     sampler_state_extend_t mask_extend = composite_op->mask_extend;
-    Bool is_affine_src, is_affine_mask, is_affine;
+    Bool is_affine = composite_op->is_affine;
     int urb_vs_start, urb_vs_size;
     int urb_gs_start, urb_gs_size;
     int urb_clip_start, urb_clip_size;
@@ -973,26 +972,6 @@ i965_emit_composite_state(ScrnInfoPtr pScrn)
     IntelEmitInvarientState(pScrn);
     *pI830->last_3d = LAST_3D_RENDER;
 
-    pI830->scale_units[0][0] = pSrc->drawable.width;
-    pI830->scale_units[0][1] = pSrc->drawable.height;
-
-    pI830->transform[0] = pSrcPicture->transform;
-    is_affine_src = i830_transform_is_affine (pI830->transform[0]);
-
-    if (!pMask) {
-	pI830->transform[1] = NULL;
-	pI830->scale_units[1][0] = -1;
-	pI830->scale_units[1][1] = -1;
-	is_affine_mask = TRUE;
-    } else {
-	pI830->transform[1] = pMaskPicture->transform;
-	pI830->scale_units[1][0] = pMask->drawable.width;
-	pI830->scale_units[1][1] = pMask->drawable.height;
-	is_affine_mask = i830_transform_is_affine (pI830->transform[1]);
-    }
-
-    is_affine = is_affine_src && is_affine_mask;
-
     state_base_offset = pI830->gen4_render_state_mem->offset;
     assert((state_base_offset & 63) == 0);
     state_base = (char *)(pI830->FbBase + state_base_offset);
@@ -1372,6 +1351,25 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
 	    I830FALLBACK("Couldn't fit render operation in aperture\n");
     }
 
+    pI830->scale_units[0][0] = pSrc->drawable.width;
+    pI830->scale_units[0][1] = pSrc->drawable.height;
+
+    pI830->transform[0] = pSrcPicture->transform;
+    composite_op->is_affine =
+	i830_transform_is_affine(pI830->transform[0]);
+
+    if (!pMask) {
+	pI830->transform[1] = NULL;
+	pI830->scale_units[1][0] = -1;
+	pI830->scale_units[1][1] = -1;
+    } else {
+	pI830->transform[1] = pMaskPicture->transform;
+	pI830->scale_units[1][0] = pMask->drawable.width;
+	pI830->scale_units[1][1] = pMask->drawable.height;
+	composite_op->is_affine |=
+	    i830_transform_is_affine(pI830->transform[1]);
+    }
+
     i965_emit_composite_state(pScrn);
 
     return TRUE;
@@ -1385,15 +1383,11 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
     I830Ptr pI830 = I830PTR(pScrn);
     struct gen4_render_state *render_state = pI830->gen4_render_state;
     Bool has_mask;
-    Bool is_affine_src, is_affine_mask, is_affine;
     float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
     int i;
     float *vb;
+    Bool is_affine = render_state->composite_op.is_affine;
 
-    is_affine_src = i830_transform_is_affine (pI830->transform[0]);
-    is_affine_mask = i830_transform_is_affine (pI830->transform[1]);
-    is_affine = is_affine_src && is_affine_mask;
-    
     if (is_affine)
     {
 	if (!i830_get_transformed_coordinates(srcX, srcY,
commit e20f7278f3abb44a3a151ac91f83c45cf1a2745a
Author: Eric Anholt <eric at anholt.net>
Date:   Mon Jan 19 20:31:31 2009 -0800

    i965: Pull check_aperture out to a separate function and make it dtrt.
    
    Previously it wouldn't count the pixmaps that were about to be used, which
    is pretty much the only purpose of having the pain around.  This also
    eliminates the check_twice confusion with emit_batch_header_for_composite().

diff --git a/src/i965_render.c b/src/i965_render.c
index f4ad29c..d5eb683 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -934,46 +934,8 @@ i965_set_picture_surface_state(dri_bo *ss_bo, int ss_index,
     }
 }
 
-
-static Bool
-_emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn,
-					   Bool check_twice);
-
-/* Allocate the dynamic state needed for a composite operation,
- * flushing the current batch if needed to create sufficient space.
- *
- * Even after flushing we check again and return FALSE if the
- * operation still can't fit with an empty batch. Otherwise, returns
- * TRUE.
- */
-static Bool
-_emit_batch_header_for_composite_check_twice (ScrnInfoPtr pScrn)
-{
-     return _emit_batch_header_for_composite_internal (pScrn, TRUE);
-}
-
-/* Allocate the dynamic state needed for a composite operation,
- * flushing the current batch if needed to create sufficient space.
- *
- * See _emit_batch_header_for_composite_check_twice for a safer
- * version, (but this version is fine if the safer version has
- * previously been called for the same composite operation).
- */
 static void
-_emit_batch_header_for_composite (ScrnInfoPtr pScrn)
-{
-    _emit_batch_header_for_composite_internal (pScrn, FALSE);
-}
-
-/* Number of buffer object in our call to check_aperture_size:
- *
- *	batch_bo
- *	vertex_buffer_bo
- */
-#define NUM_BO 2
-
-static Bool
-_emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
+i965_emit_composite_state(ScrnInfoPtr pScrn)
 {
     I830Ptr pI830 = I830PTR(pScrn);
     struct gen4_render_state *render_state= pI830->gen4_render_state;
@@ -999,7 +961,6 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
     char *state_base;
     int state_base_offset;
     uint32_t src_blend, dst_blend;
-    dri_bo *bo_table[NUM_BO];
     dri_bo *binding_table_bo = composite_op->binding_table_bo;
 
     if (render_state->vertex_buffer_bo == NULL) {
@@ -1009,23 +970,6 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
 	render_state->vb_offset = 0;
     }
 
-    bo_table[0] = pI830->batch_bo;
-    bo_table[1] = render_state->vertex_buffer_bo;
-
-    /* If this command won't fit in the current batch, flush. */
-    if (dri_bufmgr_check_aperture_space (bo_table, NUM_BO) < 0) {
-	intel_batch_flush (pScrn, FALSE);
-
-	if (check_twice) {
-	    bo_table[0] = pI830->batch_bo; /* get refreshed batch_bo */
-	    /* If the command still won't fit in an empty batch, then it's
-	     * just plain too big for the hardware---fallback to software.
-	     */
-	    if (dri_bufmgr_check_aperture_space (bo_table, 1) < 0)
-		return FALSE;
-	}
-    }
-
     IntelEmitInvarientState(pScrn);
     *pI830->last_3d = LAST_3D_RENDER;
 
@@ -1301,10 +1245,27 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
     ErrorF("try to sync to show any errors...\n");
     I830Sync(pScrn);
 #endif
+}
 
-    return TRUE;
+/**
+ * Returns whether the current set of composite state plus vertex buffer is
+ * expected to fit in the aperture.
+ */
+static Bool
+i965_composite_check_aperture(ScrnInfoPtr pScrn)
+{
+    I830Ptr pI830 = I830PTR(pScrn);
+    struct gen4_render_state *render_state= pI830->gen4_render_state;
+    gen4_composite_op *composite_op = &render_state->composite_op;
+    drm_intel_bo *bo_table[] = {
+	pI830->batch_bo,
+	composite_op->binding_table_bo,
+	render_state->vertex_buffer_bo,
+    };
+
+    return drm_intel_bufmgr_check_aperture_space(bo_table,
+						 ARRAY_SIZE(bo_table)) == 0;
 }
-#undef NUM_BO
 
 Bool
 i965_prepare_composite(int op, PicturePtr pSrcPicture,
@@ -1405,8 +1366,15 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     composite_op->src_filter =
 	sampler_state_filter_from_picture(pSrcPicture->filter);
 
-    /* Fallback if we can't make this operation fit. */
-    return _emit_batch_header_for_composite_check_twice (pScrn);
+    if (!i965_composite_check_aperture(pScrn)) {
+	intel_batch_flush(pScrn, FALSE);
+	if (!i965_composite_check_aperture(pScrn))
+	    I830FALLBACK("Couldn't fit render operation in aperture\n");
+    }
+
+    i965_emit_composite_state(pScrn);
+
+    return TRUE;
 }
 
 void
@@ -1509,8 +1477,10 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
 	render_state->vertex_buffer_bo = NULL;
     }
 
+    if (!i965_composite_check_aperture(pScrn))
+	intel_batch_flush(pScrn, FALSE);
     if (render_state->vertex_buffer_bo == NULL)
-	_emit_batch_header_for_composite (pScrn);
+	i965_emit_composite_state(pScrn);
 
     /* Map the vertex_buffer buffer object so we can write to it. */
     if (dri_bo_map (render_state->vertex_buffer_bo, 1) != 0)
commit 013e2adfbf955cb21450b610091542ebd54392c2
Author: Eric Anholt <eric at anholt.net>
Date:   Mon Jan 19 20:24:20 2009 -0800

    Move filter computation from emit_batch_header to prepare_composite.

diff --git a/src/i965_render.c b/src/i965_render.c
index 821be40..f4ad29c 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -502,6 +502,10 @@ typedef struct gen4_composite_op {
     PixmapPtr	mask;
     PixmapPtr	dest;
     drm_intel_bo *binding_table_bo;
+    sampler_state_filter_t src_filter;
+    sampler_state_filter_t mask_filter;
+    sampler_state_extend_t src_extend;
+    sampler_state_extend_t mask_extend;
 } gen4_composite_op;
 
 /** Private data for gen4 render accel implementation. */
@@ -982,8 +986,10 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
     PixmapPtr pMask = composite_op->mask;
     PixmapPtr pDst = composite_op->dest;
     uint32_t sf_state_offset;
-    sampler_state_filter_t src_filter, mask_filter;
-    sampler_state_extend_t src_extend, mask_extend;
+    sampler_state_filter_t src_filter = composite_op->src_filter;
+    sampler_state_filter_t mask_filter = composite_op->mask_filter;
+    sampler_state_extend_t src_extend = composite_op->src_extend;
+    sampler_state_extend_t mask_extend = composite_op->mask_extend;
     Bool is_affine_src, is_affine_mask, is_affine;
     int urb_vs_start, urb_vs_size;
     int urb_gs_start, urb_gs_size;
@@ -1061,25 +1067,6 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
     i965_get_blend_cntl(op, pMaskPicture, pDstPicture->format,
 			&src_blend, &dst_blend);
 
-    src_filter = sampler_state_filter_from_picture (pSrcPicture->filter);
-    if (src_filter < 0)
-	I830FALLBACK ("Bad src filter 0x%x\n", pSrcPicture->filter);
-    src_extend = sampler_state_extend_from_picture (pSrcPicture->repeatType);
-    if (src_extend < 0)
-	I830FALLBACK ("Bad src repeat 0x%x\n", pSrcPicture->repeatType);
-
-    if (pMaskPicture) {
-	mask_filter = sampler_state_filter_from_picture (pMaskPicture->filter);
-	if (mask_filter < 0)
-	    I830FALLBACK ("Bad mask filter 0x%x\n", pMaskPicture->filter);
-	mask_extend = sampler_state_extend_from_picture (pMaskPicture->repeatType);
-	if (mask_extend < 0)
-	    I830FALLBACK ("Bad mask repeat 0x%x\n", pMaskPicture->repeatType);
-    } else {
-	mask_filter = SAMPLER_STATE_FILTER_NEAREST;
-	mask_extend = SAMPLER_STATE_EXTEND_NONE;
-    }
-
     /* Begin the long sequence of commands needed to set up the 3D
      * rendering pipe
      */
@@ -1331,6 +1318,27 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     uint32_t *binding_table;
     drm_intel_bo *binding_table_bo, *surface_state_bo;
 
+    if (composite_op->src_filter < 0)
+	I830FALLBACK("Bad src filter 0x%x\n", pSrcPicture->filter);
+    composite_op->src_extend =
+	sampler_state_extend_from_picture(pSrcPicture->repeatType);
+    if (composite_op->src_extend < 0)
+	I830FALLBACK("Bad src repeat 0x%x\n", pSrcPicture->repeatType);
+
+    if (pMaskPicture) {
+	composite_op->mask_filter =
+	    sampler_state_filter_from_picture(pMaskPicture->filter);
+	if (composite_op->mask_filter < 0)
+	    I830FALLBACK("Bad mask filter 0x%x\n", pMaskPicture->filter);
+	composite_op->mask_extend =
+	    sampler_state_extend_from_picture(pMaskPicture->repeatType);
+	if (composite_op->mask_extend < 0)
+	    I830FALLBACK("Bad mask repeat 0x%x\n", pMaskPicture->repeatType);
+    } else {
+	composite_op->mask_filter = SAMPLER_STATE_FILTER_NEAREST;
+	composite_op->mask_extend = SAMPLER_STATE_EXTEND_NONE;
+    }
+
     /* Set up the surface states. */
     surface_state_bo = dri_bo_alloc(pI830->bufmgr, "surface_state",
 				    3 * sizeof (brw_surface_state_padded),
@@ -1394,6 +1402,8 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     composite_op->dest = pDst;
     drm_intel_bo_unreference(composite_op->binding_table_bo);
     composite_op->binding_table_bo = binding_table_bo;
+    composite_op->src_filter =
+	sampler_state_filter_from_picture(pSrcPicture->filter);
 
     /* Fallback if we can't make this operation fit. */
     return _emit_batch_header_for_composite_check_twice (pScrn);
commit a340fe5e4227ebea5493e658eb6289624b07ab0b
Author: Eric Anholt <eric at anholt.net>
Date:   Mon Jan 19 19:11:41 2009 -0800

    Use intel_emit_reloc from video to prettify 965 render bind_bo setup.

diff --git a/src/i830.h b/src/i830.h
index b726dd6..4ee9c39 100644
--- a/src/i830.h
+++ b/src/i830.h
@@ -1023,6 +1023,23 @@ Bool i830_pixmap_tiled(PixmapPtr p);
     if (pitch > KB(8)) I830FALLBACK("pitch exceeds 3d limit 8K\n");\
 } while(0)
 
+/**
+ * Little wrapper around drm_intel_bo_reloc to return the initial value you
+ * should stuff into the relocation entry.
+ *
+ * If only we'd done this before settling on the library API.
+ */
+static inline uint32_t
+intel_emit_reloc(drm_intel_bo *bo, uint32_t offset,
+		 drm_intel_bo *target_bo, uint32_t target_offset,
+		 uint32_t read_domains, uint32_t write_domain)
+{
+    drm_intel_bo_emit_reloc(bo, offset, target_bo, target_offset,
+			    read_domains, write_domain);
+
+    return target_bo->offset + target_offset;
+}
+
 extern const int I830PatternROP[16];
 extern const int I830CopyROP[16];
 
diff --git a/src/i965_render.c b/src/i965_render.c
index 281eb0d..821be40 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -1360,24 +1360,24 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     }
 
     binding_table = binding_table_bo->virtual;
-    binding_table[0] = 0 * sizeof (brw_surface_state_padded) + surface_state_bo->offset;
-    dri_bo_emit_reloc (binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-		       0 * sizeof (brw_surface_state_padded),
-		       0 * sizeof (uint32_t),
-		       surface_state_bo);
-
-    binding_table[1] = 1 * sizeof (brw_surface_state_padded) + surface_state_bo->offset;
-    dri_bo_emit_reloc (binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-		       1 * sizeof (brw_surface_state_padded),
-		       1 * sizeof (uint32_t),
-		       surface_state_bo);
+    binding_table[0] = intel_emit_reloc(binding_table_bo,
+					0 * sizeof(uint32_t),
+					surface_state_bo,
+					0 * sizeof(brw_surface_state_padded),
+					I915_GEM_DOMAIN_INSTRUCTION, 0);
+
+    binding_table[1] = intel_emit_reloc(binding_table_bo,
+					1 * sizeof(uint32_t),
+					surface_state_bo,
+					1 * sizeof(brw_surface_state_padded),
+					I915_GEM_DOMAIN_INSTRUCTION, 0);
 
     if (pMask) {
-	binding_table[2] = 2 * sizeof (brw_surface_state_padded) + surface_state_bo->offset;
-	dri_bo_emit_reloc (binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-			   2 * sizeof (brw_surface_state_padded),
-			   2 * sizeof (uint32_t),
-			   surface_state_bo);
+	binding_table[2] = intel_emit_reloc(binding_table_bo,
+					    2 * sizeof(uint32_t),
+					    surface_state_bo,
+					    2 * sizeof(brw_surface_state_padded),
+					    I915_GEM_DOMAIN_INSTRUCTION, 0);
     } else {
 	binding_table[2] = 0;
     }
diff --git a/src/i965_video.c b/src/i965_video.c
index cd726a2..7cd20f3 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -326,23 +326,6 @@ i965_post_draw_debug(ScrnInfoPtr scrn)
 #define URB_CS_ENTRIES	      0
 #define URB_CS_ENTRY_SIZE     0
 
-/**
- * Little wrapper around drm_intel_bo_reloc to return the initial value you
- * should stuff into the relocation entry.
- *
- * If only we'd done this before settling on the library API.
- */
-static uint32_t
-intel_emit_reloc(drm_intel_bo *bo, uint32_t offset,
-		 drm_intel_bo *target_bo, uint32_t target_offset,
-		 uint32_t read_domains, uint32_t write_domain)
-{
-    drm_intel_bo_emit_reloc(bo, offset, target_bo, target_offset,
-			    read_domains, write_domain);
-
-    return target_bo->offset + target_offset;
-}
-
 static int
 intel_alloc_and_map(I830Ptr i830, char *name, int size,
 		    drm_intel_bo **bop, void *virtualp)
commit aefe198ca427a5ad69717f49948eb3ede713bb28
Author: Eric Anholt <eric at anholt.net>
Date:   Mon Jan 19 18:57:01 2009 -0800

    Move i965 render state bo setup back to prepare_composite.
    
    We want the objects to be created once per prepare/done both for efficiency and
    so we can handle aperture checking better.

diff --git a/src/i965_render.c b/src/i965_render.c
index 00cb051..281eb0d 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -501,6 +501,7 @@ typedef struct gen4_composite_op {
     PixmapPtr	source;
     PixmapPtr	mask;
     PixmapPtr	dest;
+    drm_intel_bo *binding_table_bo;
 } gen4_composite_op;
 
 /** Private data for gen4 render accel implementation. */
@@ -992,9 +993,8 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
     char *state_base;
     int state_base_offset;
     uint32_t src_blend, dst_blend;
-    uint32_t *binding_table;
     dri_bo *bo_table[NUM_BO];
-    dri_bo *binding_table_bo, *surface_state_bo;
+    dri_bo *binding_table_bo = composite_op->binding_table_bo;
 
     if (render_state->vertex_buffer_bo == NULL) {
 	render_state->vertex_buffer_bo = dri_bo_alloc (pI830->bufmgr, "vb",
@@ -1061,67 +1061,6 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
     i965_get_blend_cntl(op, pMaskPicture, pDstPicture->format,
 			&src_blend, &dst_blend);
 
-    /* Set up the surface states. */
-    surface_state_bo = dri_bo_alloc (pI830->bufmgr, "surface_state",
-				     3 * sizeof (brw_surface_state_padded),
-				     4096);
-    if (dri_bo_map (surface_state_bo, 1) != 0) {
-	dri_bo_unreference (surface_state_bo);
-	dri_bo_unreference (render_state->vertex_buffer_bo);
-	render_state->vertex_buffer_bo = NULL;
-
-	return FALSE;
-    }
-    /* Set up the state buffer for the destination surface */
-    i965_set_picture_surface_state(surface_state_bo, 0,
-				   pDstPicture, pDst, TRUE);
-    /* Set up the source surface state buffer */
-    i965_set_picture_surface_state(surface_state_bo, 1,
-				   pSrcPicture, pSrc, FALSE);
-    if (pMask) {
-	/* Set up the mask surface state buffer */
-	i965_set_picture_surface_state(surface_state_bo, 2,
-				       pMaskPicture, pMask,
-				       FALSE);
-    }
-    dri_bo_unmap (surface_state_bo);
-
-    /* Set up the binding table of surface indices to surface state. */
-    binding_table_bo = dri_bo_alloc (pI830->bufmgr, "binding_table",
-				     3 * sizeof (uint32_t), 4096);
-    if (dri_bo_map (binding_table_bo, 1) != 0) {
-	dri_bo_unreference(binding_table_bo);
-	dri_bo_unreference(surface_state_bo);
-	dri_bo_unreference (render_state->vertex_buffer_bo);
-	render_state->vertex_buffer_bo = NULL;
-
-	return FALSE;
-    }
-
-    binding_table = binding_table_bo->virtual;
-    binding_table[0] = 0 * sizeof (brw_surface_state_padded) + surface_state_bo->offset;
-    dri_bo_emit_reloc (binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-		       0 * sizeof (brw_surface_state_padded),
-		       0 * sizeof (uint32_t),
-		       surface_state_bo);
-
-    binding_table[1] = 1 * sizeof (brw_surface_state_padded) + surface_state_bo->offset;
-    dri_bo_emit_reloc (binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-		       1 * sizeof (brw_surface_state_padded),
-		       1 * sizeof (uint32_t),
-		       surface_state_bo);
-
-    if (pMask) {
-	binding_table[2] = 2 * sizeof (brw_surface_state_padded) + surface_state_bo->offset;
-	dri_bo_emit_reloc (binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-			   2 * sizeof (brw_surface_state_padded),
-			   2 * sizeof (uint32_t),
-			   surface_state_bo);
-    } else {
-	binding_table[2] = 0;
-    }
-    dri_bo_unmap (binding_table_bo);
-
     src_filter = sampler_state_filter_from_picture (pSrcPicture->filter);
     if (src_filter < 0)
 	I830FALLBACK ("Bad src filter 0x%x\n", pSrcPicture->filter);
@@ -1376,9 +1315,6 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
     I830Sync(pScrn);
 #endif
 
-    dri_bo_unreference (binding_table_bo);
-    dri_bo_unreference (surface_state_bo);
-
     return TRUE;
 }
 #undef NUM_BO
@@ -1392,6 +1328,62 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     I830Ptr pI830 = I830PTR(pScrn);
     struct gen4_render_state *render_state= pI830->gen4_render_state;
     gen4_composite_op *composite_op = &render_state->composite_op;
+    uint32_t *binding_table;
+    drm_intel_bo *binding_table_bo, *surface_state_bo;
+
+    /* Set up the surface states. */
+    surface_state_bo = dri_bo_alloc(pI830->bufmgr, "surface_state",
+				    3 * sizeof (brw_surface_state_padded),
+				    4096);
+    if (dri_bo_map(surface_state_bo, 1) != 0)
+	return FALSE;
+    /* Set up the state buffer for the destination surface */
+    i965_set_picture_surface_state(surface_state_bo, 0,
+				   pDstPicture, pDst, TRUE);
+    /* Set up the source surface state buffer */
+    i965_set_picture_surface_state(surface_state_bo, 1,
+				   pSrcPicture, pSrc, FALSE);
+    if (pMask) {
+	/* Set up the mask surface state buffer */
+	i965_set_picture_surface_state(surface_state_bo, 2,
+				       pMaskPicture, pMask,
+				       FALSE);
+    }
+    dri_bo_unmap(surface_state_bo);
+
+    /* Set up the binding table of surface indices to surface state. */
+    binding_table_bo = dri_bo_alloc(pI830->bufmgr, "binding_table",
+				    3 * sizeof(uint32_t), 4096);
+    if (dri_bo_map (binding_table_bo, 1) != 0) {
+	dri_bo_unreference(surface_state_bo);
+	return FALSE;
+    }
+
+    binding_table = binding_table_bo->virtual;
+    binding_table[0] = 0 * sizeof (brw_surface_state_padded) + surface_state_bo->offset;
+    dri_bo_emit_reloc (binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+		       0 * sizeof (brw_surface_state_padded),
+		       0 * sizeof (uint32_t),
+		       surface_state_bo);
+
+    binding_table[1] = 1 * sizeof (brw_surface_state_padded) + surface_state_bo->offset;
+    dri_bo_emit_reloc (binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+		       1 * sizeof (brw_surface_state_padded),
+		       1 * sizeof (uint32_t),
+		       surface_state_bo);
+
+    if (pMask) {
+	binding_table[2] = 2 * sizeof (brw_surface_state_padded) + surface_state_bo->offset;
+	dri_bo_emit_reloc (binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+			   2 * sizeof (brw_surface_state_padded),
+			   2 * sizeof (uint32_t),
+			   surface_state_bo);
+    } else {
+	binding_table[2] = 0;
+    }
+    dri_bo_unmap(binding_table_bo);
+    /* All refs to surface_state are now contained in binding_table_bo. */
+    drm_intel_bo_unreference(surface_state_bo);
 
     composite_op->op = op;
     composite_op->source_picture = pSrcPicture;
@@ -1400,6 +1392,8 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     composite_op->source = pSrc;
     composite_op->mask = pMask;
     composite_op->dest = pDst;
+    drm_intel_bo_unreference(composite_op->binding_table_bo);
+    composite_op->binding_table_bo = binding_table_bo;
 
     /* Fallback if we can't make this operation fit. */
     return _emit_batch_header_for_composite_check_twice (pScrn);
commit 946c7ef8170e74ac178c83b1465242d57fa86f2e
Author: Eric Anholt <eric at anholt.net>
Date:   Mon Jan 19 14:43:20 2009 -0800

    Do check_aperture_space and batch_start_atomic for i965 video.
    
    This increases the overhead for video in the presence of cliprects, but we
    were already doing nasty things in that case and don't seem to care.  This
    could fix potential bad rendering or hangs with video, particularly with
    DRI2.

diff --git a/src/i965_video.c b/src/i965_video.c
index 3c626ca..cd726a2 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -1057,8 +1057,6 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 	}
     }
 
-    i965_emit_video_setup(pScrn, bind_bo, n_src_surf);
-
    /* Set up the offset for translating from the given region (in screen
     * coordinates) to the backing pixmap.
     */
@@ -1087,12 +1085,25 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 	int i;
 	drm_intel_bo *vb_bo;
 	float *vb;
+	drm_intel_bo *bo_table[] = {
+	    NULL, /* vb_bo */
+	    pI830->batch_bo,
+	    bind_bo,
+	    pI830->video.gen4_sampler_bo,
+	    pI830->video.gen4_sip_kernel_bo,
+	    pI830->video.gen4_vs_bo,
+	    pI830->video.gen4_sf_bo,
+	    pI830->video.gen4_wm_packed_bo,
+	    pI830->video.gen4_wm_planar_bo,
+	    pI830->video.gen4_cc_bo,
+	};
 
 	pbox++;
 
 	if (intel_alloc_and_map(pI830, "textured video vb", 4096,
 				&vb_bo, &vb) != 0)
 	    break;
+	bo_table[0] = vb_bo;
 
 	i = 0;
 	vb[i++] = (box_x2 - dxo) * src_scale_x;
@@ -1114,6 +1125,18 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 
 	i965_pre_draw_debug(pScrn);
 
+	/* If this command won't fit in the current batch, flush.
+	 * Assume that it does after being flushed.
+	 */
+	if (drm_intel_bufmgr_check_aperture_space(bo_table,
+						  ARRAY_SIZE(bo_table)) < 0) {
+	    intel_batch_flush(pScrn, FALSE);
+	}
+
+	intel_batch_start_atomic(pScrn, 100);
+
+	i965_emit_video_setup(pScrn, bind_bo, n_src_surf);
+
 	BEGIN_BATCH(10);
 	/* Set up the pointer to our vertex buffer */
 	OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 2);
@@ -1136,6 +1159,8 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 	OUT_BATCH(0); /* index buffer offset, ignored */
 	ADVANCE_BATCH();
 
+	intel_batch_end_atomic(pScrn);
+
 	drm_intel_bo_unreference(vb_bo);
 
 	i965_post_draw_debug(pScrn);
commit 7be668179a12918918cad863f6936ced4ab78dbf
Author: Eric Anholt <eric at anholt.net>
Date:   Mon Jan 19 14:29:25 2009 -0800

    Move 965 video setup to a separate function so we can move it around.

diff --git a/src/i965_video.c b/src/i965_video.c
index e9f5ced..3c626ca 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -714,6 +714,179 @@ i965_create_cc_state(ScrnInfoPtr scrn)
     return cc_bo;
 }
 
+static void
+i965_emit_video_setup(ScrnInfoPtr pScrn, drm_intel_bo *bind_bo, int n_src_surf)
+{
+    I830Ptr pI830 = I830PTR(pScrn);
+    int urb_vs_start, urb_vs_size;
+    int urb_gs_start, urb_gs_size;
+    int urb_clip_start, urb_clip_size;
+    int urb_sf_start, urb_sf_size;
+    int urb_cs_start, urb_cs_size;
+
+    IntelEmitInvarientState(pScrn);
+    *pI830->last_3d = LAST_3D_VIDEO;
+
+    urb_vs_start = 0;
+    urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
+    urb_gs_start = urb_vs_start + urb_vs_size;
+    urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
+    urb_clip_start = urb_gs_start + urb_gs_size;
+    urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
+    urb_sf_start = urb_clip_start + urb_clip_size;
+    urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
+    urb_cs_start = urb_sf_start + urb_sf_size;
+    urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
+
+    BEGIN_BATCH(2);
+    OUT_BATCH(MI_FLUSH |
+	      MI_STATE_INSTRUCTION_CACHE_FLUSH |
+	      BRW_MI_GLOBAL_SNAPSHOT_RESET);
+    OUT_BATCH(MI_NOOP);
+    ADVANCE_BATCH();
+
+    /* brw_debug (pScrn, "before base address modify"); */
+    BEGIN_BATCH(12);
+    /* Match Mesa driver setup */
+    if (IS_G4X(pI830))
+	OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+    else
+	OUT_BATCH(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+
+    /* Mesa does this. Who knows... */
+    OUT_BATCH(BRW_CS_URB_STATE | 0);
+    OUT_BATCH((0 << 4) |	/* URB Entry Allocation Size */
+	      (0 << 0));	/* Number of URB Entries */
+
+    /* Zero out the two base address registers so all offsets are
+     * absolute
+     */
+    OUT_BATCH(BRW_STATE_BASE_ADDRESS | 4);
+    OUT_BATCH(0 | BASE_ADDRESS_MODIFY);  /* Generate state base address */
+    OUT_BATCH(0 | BASE_ADDRESS_MODIFY);  /* Surface state base address */
+    OUT_BATCH(0 | BASE_ADDRESS_MODIFY);  /* media base addr, don't care */
+    /* general state max addr, disabled */
+    OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY);
+    /* media object state max addr, disabled */
+    OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY);
+
+    /* Set system instruction pointer */
+    OUT_BATCH(BRW_STATE_SIP | 0);
+    /* system instruction pointer */
+    OUT_RELOC(pI830->video.gen4_sip_kernel_bo,
+	      I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+
+    OUT_BATCH(MI_NOOP);
+    ADVANCE_BATCH();
+
+    /* brw_debug (pScrn, "after base address modify"); */
+
+    BEGIN_BATCH(38);
+    /* Enable VF statistics */
+    OUT_BATCH(BRW_3DSTATE_VF_STATISTICS | 1);
+
+    /* Pipe control */
+    OUT_BATCH(BRW_PIPE_CONTROL |
+	      BRW_PIPE_CONTROL_NOWRITE |
+	      BRW_PIPE_CONTROL_IS_FLUSH |
+	      2);
+    OUT_BATCH(0);			/* Destination address */
+    OUT_BATCH(0);			/* Immediate data low DW */
+    OUT_BATCH(0);			/* Immediate data high DW */
+
+    /* Binding table pointers */
+    OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4);
+    OUT_BATCH(0); /* vs */
+    OUT_BATCH(0); /* gs */
+    OUT_BATCH(0); /* clip */
+    OUT_BATCH(0); /* sf */
+    /* Only the PS uses the binding table */
+    OUT_RELOC(bind_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+    drm_intel_bo_unreference(bind_bo);
+
+    /* Blend constant color (magenta is fun) */
+    OUT_BATCH(BRW_3DSTATE_CONSTANT_COLOR | 3);
+    OUT_BATCH(float_to_uint (1.0));
+    OUT_BATCH(float_to_uint (0.0));
+    OUT_BATCH(float_to_uint (1.0));
+    OUT_BATCH(float_to_uint (1.0));
+
+    /* The drawing rectangle clipping is always on.  Set it to values that
+     * shouldn't do any clipping.
+     */
+    OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2); /* XXX 3 for BLC or CTG */
+    OUT_BATCH(0x00000000);			/* ymin, xmin */
+    OUT_BATCH((pScrn->virtualX - 1) |
+	      (pScrn->virtualY - 1) << 16);	/* ymax, xmax */
+    OUT_BATCH(0x00000000);			/* yorigin, xorigin */
+
+    /* skip the depth buffer */
+    /* skip the polygon stipple */
+    /* skip the polygon stipple offset */
+    /* skip the line stipple */
+
+    /* Set the pointers to the 3d pipeline state */
+    OUT_BATCH(BRW_3DSTATE_PIPELINED_POINTERS | 5);
+    OUT_RELOC(pI830->video.gen4_vs_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+    /* disable GS, resulting in passthrough */
+    OUT_BATCH(BRW_GS_DISABLE);
+    /* disable CLIP, resulting in passthrough */
+    OUT_BATCH(BRW_CLIP_DISABLE);
+    OUT_RELOC(pI830->video.gen4_sf_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+    if (n_src_surf == 1)
+	OUT_RELOC(pI830->video.gen4_wm_packed_bo,
+		  I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+    else
+	OUT_RELOC(pI830->video.gen4_wm_planar_bo,
+		  I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+    OUT_RELOC(pI830->video.gen4_cc_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+
+    /* URB fence */
+    OUT_BATCH(BRW_URB_FENCE |
+	      UF0_CS_REALLOC |
+	      UF0_SF_REALLOC |
+	      UF0_CLIP_REALLOC |
+	      UF0_GS_REALLOC |
+	      UF0_VS_REALLOC |
+	      1);
+    OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
+	      ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
+	      ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
+    OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
+	      ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
+
+    /* Constant buffer state */
+    OUT_BATCH(BRW_CS_URB_STATE | 0);
+    OUT_BATCH(((URB_CS_ENTRY_SIZE - 1) << 4) |
+	      (URB_CS_ENTRIES << 0));
+
+    /* Set up our vertex elements, sourced from the single vertex buffer. */
+    OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | 3);
+    /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
+    OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+	      VE0_VALID |
+	      (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+	      (0 << VE0_OFFSET_SHIFT));
+    OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+	      (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+	      (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+	      (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
+	      (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
+    /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
+    OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+	      VE0_VALID |
+	      (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+	      (8 << VE0_OFFSET_SHIFT));
+    OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+	      (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+	      (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+	      (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
+	      (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
+
+    OUT_BATCH(MI_NOOP);			/* pad to quadword */
+    ADVANCE_BATCH();
+}
+
 void
 I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 			 RegionPtr dstRegion,
@@ -726,11 +899,6 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     I830Ptr pI830 = I830PTR(pScrn);
     BoxPtr pbox;
     int nbox, dxo, dyo, pix_xoff, pix_yoff;
-    int urb_vs_start, urb_vs_size;
-    int urb_gs_start, urb_gs_size;
-    int urb_clip_start, urb_clip_size;
-    int urb_sf_start, urb_sf_size;
-    int urb_cs_start, urb_cs_size;
     float src_scale_x, src_scale_y;
     int src_surf, i;
     int n_src_surf;
@@ -798,25 +966,11 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 	return;
     }    
 
-    IntelEmitInvarientState(pScrn);
-    *pI830->last_3d = LAST_3D_VIDEO;
-
 #if 0
     ErrorF("dst surf:      0x%08x\n", state_base_offset + dest_surf_offset);
     ErrorF("src surf:      0x%08x\n", state_base_offset + src_surf_offset);
 #endif
 
-    urb_vs_start = 0;
-    urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
-    urb_gs_start = urb_vs_start + urb_vs_size;
-    urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
-    urb_clip_start = urb_gs_start + urb_gs_size;
-    urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
-    urb_sf_start = urb_clip_start + urb_clip_size;
-    urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
-    urb_cs_start = urb_sf_start + urb_sf_size;
-    urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
-
     /* We'll be poking the state buffers that could be in use by the 3d
      * hardware here, but we should have synced the 3D engine already in
      * I830PutImage.
@@ -903,159 +1057,7 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 	}
     }
 
-    {
-	BEGIN_BATCH(2);
-	OUT_BATCH(MI_FLUSH |
-		  MI_STATE_INSTRUCTION_CACHE_FLUSH |
-		  BRW_MI_GLOBAL_SNAPSHOT_RESET);
-	OUT_BATCH(MI_NOOP);
-	ADVANCE_BATCH();
-    }
-
-    /* brw_debug (pScrn, "before base address modify"); */
-    {
-	BEGIN_BATCH(12);
-	/* Match Mesa driver setup */
-	if (IS_G4X(pI830))
-	    OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
-	else
-	    OUT_BATCH(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
-
-	/* Mesa does this. Who knows... */
-	OUT_BATCH(BRW_CS_URB_STATE | 0);
-	OUT_BATCH((0 << 4) |	/* URB Entry Allocation Size */
-		  (0 << 0));	/* Number of URB Entries */
-
-	/* Zero out the two base address registers so all offsets are
-	 * absolute
-	 */
-	OUT_BATCH(BRW_STATE_BASE_ADDRESS | 4);
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);  /* Generate state base address */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);  /* Surface state base address */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);  /* media base addr, don't care */
-	/* general state max addr, disabled */
-	OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY);
-	/* media object state max addr, disabled */
-	OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY);
-
-	/* Set system instruction pointer */
-	OUT_BATCH(BRW_STATE_SIP | 0);
-	/* system instruction pointer */
-	OUT_RELOC(pI830->video.gen4_sip_kernel_bo,
-		  I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
-
-	OUT_BATCH(MI_NOOP);
-	ADVANCE_BATCH();
-    }
-
-    /* brw_debug (pScrn, "after base address modify"); */
-
-    {
-       BEGIN_BATCH(38);
-       /* Enable VF statistics */
-       OUT_BATCH(BRW_3DSTATE_VF_STATISTICS | 1);
-
-       /* Pipe control */
-       OUT_BATCH(BRW_PIPE_CONTROL |
-		 BRW_PIPE_CONTROL_NOWRITE |
-		 BRW_PIPE_CONTROL_IS_FLUSH |
-		 2);
-       OUT_BATCH(0);			/* Destination address */
-       OUT_BATCH(0);			/* Immediate data low DW */
-       OUT_BATCH(0);			/* Immediate data high DW */
-
-       /* Binding table pointers */
-       OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4);
-       OUT_BATCH(0); /* vs */
-       OUT_BATCH(0); /* gs */
-       OUT_BATCH(0); /* clip */
-       OUT_BATCH(0); /* sf */
-       /* Only the PS uses the binding table */
-       OUT_RELOC(bind_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
-       drm_intel_bo_unreference(bind_bo);
-
-       /* Blend constant color (magenta is fun) */
-       OUT_BATCH(BRW_3DSTATE_CONSTANT_COLOR | 3);
-       OUT_BATCH(float_to_uint (1.0));
-       OUT_BATCH(float_to_uint (0.0));
-       OUT_BATCH(float_to_uint (1.0));
-       OUT_BATCH(float_to_uint (1.0));
-
-       /* The drawing rectangle clipping is always on.  Set it to values that
-	* shouldn't do any clipping.
-	*/
-       OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2); /* XXX 3 for BLC or CTG */
-       OUT_BATCH(0x00000000);			/* ymin, xmin */
-       OUT_BATCH((pScrn->virtualX - 1) |
-		 (pScrn->virtualY - 1) << 16);	/* ymax, xmax */
-       OUT_BATCH(0x00000000);			/* yorigin, xorigin */
-
-       /* skip the depth buffer */
-       /* skip the polygon stipple */
-       /* skip the polygon stipple offset */
-       /* skip the line stipple */
-
-       /* Set the pointers to the 3d pipeline state */
-       OUT_BATCH(BRW_3DSTATE_PIPELINED_POINTERS | 5);
-       OUT_RELOC(pI830->video.gen4_vs_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
-       /* disable GS, resulting in passthrough */
-       OUT_BATCH(BRW_GS_DISABLE);
-       /* disable CLIP, resulting in passthrough */
-       OUT_BATCH(BRW_CLIP_DISABLE);
-       OUT_RELOC(pI830->video.gen4_sf_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
-       if (n_src_surf == 1)
-	   OUT_RELOC(pI830->video.gen4_wm_packed_bo,
-		     I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
-       else
-	   OUT_RELOC(pI830->video.gen4_wm_planar_bo,
-		     I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
-       OUT_RELOC(pI830->video.gen4_cc_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
-
-       /* URB fence */
-       OUT_BATCH(BRW_URB_FENCE |
-		 UF0_CS_REALLOC |
-		 UF0_SF_REALLOC |
-		 UF0_CLIP_REALLOC |
-		 UF0_GS_REALLOC |
-		 UF0_VS_REALLOC |
-		 1);
-       OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
-		 ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
-		 ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
-       OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
-		 ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
-
-       /* Constant buffer state */
-       OUT_BATCH(BRW_CS_URB_STATE | 0);
-       OUT_BATCH(((URB_CS_ENTRY_SIZE - 1) << 4) |
-		 (URB_CS_ENTRIES << 0));
-
-       /* Set up our vertex elements, sourced from the single vertex buffer. */
-       OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | 3);
-       /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
-       OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
-		 VE0_VALID |
-		 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
-		 (0 << VE0_OFFSET_SHIFT));
-       OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
-		 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
-		 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
-		 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
-		 (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
-       /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
-       OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
-		 VE0_VALID |
-		 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
-		 (8 << VE0_OFFSET_SHIFT));
-       OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
-		 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
-		 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
-		 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
-		 (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
-
-       OUT_BATCH(MI_NOOP);			/* pad to quadword */
-       ADVANCE_BATCH();
-    }
+    i965_emit_video_setup(pScrn, bind_bo, n_src_surf);
 
    /* Set up the offset for translating from the given region (in screen
     * coordinates) to the backing pixmap.


More information about the xorg-commit mailing list