xf86-video-intel: 5 commits - src/i965_render.c

Carl Worth cworth at kemper.freedesktop.org
Mon Nov 3 22:47:48 PST 2008


 src/i965_render.c |  259 ++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 186 insertions(+), 73 deletions(-)

New commits:
commit 08914cceda6c57530023cdcdb5ad7e4024f36a6e
Author: Carl Worth <cworth at cworth.org>
Date:   Thu Oct 30 16:46:06 2008 -0700

    Use buffer objects for binding table and surface-state objects.
    
    Instead of having a static array for these and doing an ugly sync
    everytime we recycle the array, we now simply allocate short-lived
    buffer objects for this dynamic state. The dri layer, in turn, can
    take care of efficiently reusing objects as necessary.
    
    On a GM965 this change was tested to improve the performance of
    x11perf -aa10text from roughly 120000 to 154000 glyphs/sec.

diff --git a/src/i965_render.c b/src/i965_render.c
index b28b2ce..d39915a 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -453,14 +453,6 @@ typedef struct brw_surface_state_padded {
  *
  * This structure contains static data for all of the combinations of
  * state that we use for Render acceleration.
- *
- * Meanwhile, gen4_render_state_t should contain all dynamic data,
- * but we're still in the process of migrating some data out of
- * gen4_static_state_t to gen4_render_state_t. Things remaining to be
- * migrated include
- *
- *	surface_state
- *	binding_table
  */
 typedef struct _gen4_static_state {
     uint8_t wm_scratch[128 * PS_MAX_THREADS];
@@ -494,10 +486,6 @@ typedef struct _gen4_static_state {
     WM_STATE_DECL (masknoca_affine);
     WM_STATE_DECL (masknoca_projective);
 
-    uint32_t binding_table[128];
-
-    struct brw_surface_state_padded surface_state[32];
-
     /* Index by [src_filter][src_extend][mask_filter][mask_extend].  Two of
      * the structs happen to add to 32 bytes.
      */
@@ -537,8 +525,6 @@ struct gen4_render_state {
 
     gen4_composite_op composite_op;
 
-    int binding_table_index;
-    int surface_state_index;
     int vb_offset;
     int vertex_size;
 };
@@ -883,20 +869,15 @@ sampler_state_extend_from_picture (int repeat_type)
 }
 
 /**
- * Sets up the common fields for a surface state buffer for the given picture
- * in the surface state buffer at index, and returns the offset within the
- * state buffer for this entry.
+ * Sets up the common fields for a surface state buffer for the given
+ * picture in the given surface state buffer.
  */
-static unsigned int
-i965_set_picture_surface_state(ScrnInfoPtr pScrn, struct brw_surface_state *ss,
+static void
+i965_set_picture_surface_state(struct brw_surface_state *ss,
 			       PicturePtr pPicture, PixmapPtr pPixmap,
 			       Bool is_dst)
 {
-    I830Ptr pI830 = I830PTR(pScrn);
-    struct gen4_render_state *render_state= pI830->gen4_render_state;
-    gen4_static_state_t *static_state = render_state->static_state;
     struct brw_surface_state local_ss;
-    uint32_t offset;
 
     /* Since ss is a pointer to WC memory, do all of our bit operations
      * into a local temporary first.
@@ -935,11 +916,6 @@ i965_set_picture_surface_state(ScrnInfoPtr pScrn, struct brw_surface_state *ss,
     local_ss.ss3.tiled_surface = i830_pixmap_tiled(pPixmap) ? 1 : 0;
 
     memcpy(ss, &local_ss, sizeof(local_ss));
-
-    offset = (char *)ss - (char *)static_state;
-    assert((offset & 31) == 0);
-
-    return offset;
 }
 
 
@@ -985,7 +961,6 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
 {
     I830Ptr pI830 = I830PTR(pScrn);
     struct gen4_render_state *render_state= pI830->gen4_render_state;
-    gen4_static_state_t *static_state = render_state->static_state;
     gen4_composite_op *composite_op = &render_state->composite_op;
     int op = composite_op->op;
     PicturePtr pSrcPicture = composite_op->source_picture;
@@ -1009,6 +984,7 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
     uint32_t src_blend, dst_blend;
     uint32_t *binding_table;
     dri_bo *bo_table[NUM_BO];
+    dri_bo *binding_table_bo, *surface_state_bo;
 
     if (render_state->vertex_buffer_bo == NULL) {
 	render_state->vertex_buffer_bo = dri_bo_alloc (pI830->bufmgr, "vb",
@@ -1076,48 +1052,52 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
     i965_get_blend_cntl(op, pMaskPicture, pDstPicture->format,
 			&src_blend, &dst_blend);
 
-    if ((render_state->binding_table_index + 3 >=
-	 ARRAY_SIZE(static_state->binding_table)) ||
-	(render_state->surface_state_index + 3 >=
-	 ARRAY_SIZE(static_state->surface_state)))
-    {
-	i830WaitSync(pScrn);
-	render_state->binding_table_index = 0;
-	render_state->surface_state_index = 0;
-	render_state->vb_offset = 0;
-    }
+    binding_table_bo = dri_bo_alloc (pI830->bufmgr, "binding_table",
+				     3 * sizeof (uint32_t), 4096);
+    dri_bo_map (binding_table_bo, 1);
+    binding_table = binding_table_bo->virtual;
 
-    binding_table = static_state->binding_table +
-	render_state->binding_table_index;
-    ss = static_state->surface_state + render_state->surface_state_index;
-    /* We only use 2 or 3 entries, but the table has to be 32-byte
-     * aligned.
-     */
-    render_state->binding_table_index += 8;
-    render_state->surface_state_index += (pMask != NULL) ? 3 : 2;
+    surface_state_bo = dri_bo_alloc (pI830->bufmgr, "surface_state",
+				     3 * sizeof (brw_surface_state_padded),
+				     4096);
+    dri_bo_map (surface_state_bo, 1);
+    ss = surface_state_bo->virtual;
 
     /* Set up and bind the state buffer for the destination surface */
-    binding_table[0] = state_base_offset +
-	i965_set_picture_surface_state(pScrn,
-				       &ss[0].state,
-				       pDstPicture, pDst, TRUE);
+    i965_set_picture_surface_state(&ss[0].state,
+				   pDstPicture, pDst, TRUE);
+    binding_table[0] = 0 * sizeof (brw_surface_state_padded) + surface_state_bo->offset;
+    dri_bo_emit_reloc (binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+		       0 * sizeof (brw_surface_state_padded),
+		       0 * sizeof (uint32_t),
+		       surface_state_bo);
 
     /* Set up and bind the source surface state buffer */
-    binding_table[1] = state_base_offset +
-	i965_set_picture_surface_state(pScrn,
-				       &ss[1].state,
-				       pSrcPicture, pSrc, FALSE);
+    i965_set_picture_surface_state(&ss[1].state,
+				   pSrcPicture, pSrc, FALSE);
+    binding_table[1] = 1 * sizeof (brw_surface_state_padded) + surface_state_bo->offset;
+    dri_bo_emit_reloc (binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+		       1 * sizeof (brw_surface_state_padded),
+		       1 * sizeof (uint32_t),
+		       surface_state_bo);
+
     if (pMask) {
 	/* Set up and bind the mask surface state buffer */
-	binding_table[2] = state_base_offset +
-	    i965_set_picture_surface_state(pScrn,
-					   &ss[2].state,
-					   pMaskPicture, pMask,
-					   FALSE);
+	i965_set_picture_surface_state(&ss[2].state,
+				       pMaskPicture, pMask,
+				       FALSE);
+	binding_table[2] = 2 * sizeof (brw_surface_state_padded) + surface_state_bo->offset;
+	dri_bo_emit_reloc (binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+			   2 * sizeof (brw_surface_state_padded),
+			   2 * sizeof (uint32_t),
+			   surface_state_bo);
     } else {
 	binding_table[2] = 0;
     }
 
+    dri_bo_unmap (binding_table_bo);
+    dri_bo_unmap (surface_state_bo);
+
     src_filter = sampler_state_filter_from_picture (pSrcPicture->filter);
     if (src_filter < 0)
 	I830FALLBACK ("Bad src filter 0x%x\n", pSrcPicture->filter);
@@ -1197,8 +1177,7 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
 	OUT_BATCH(0); /* clip */
 	OUT_BATCH(0); /* sf */
 	/* Only the PS uses the binding table */
-	assert((((unsigned char *)binding_table - pI830->FbBase) & 31) == 0);
-	OUT_BATCH((unsigned char *)binding_table - pI830->FbBase);
+	OUT_RELOC(binding_table_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0);
 
 	/* The drawing rectangle clipping is always on.  Set it to values that
 	 * shouldn't do any clipping.
@@ -1372,6 +1351,10 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
     ErrorF("try to sync to show any errors...\n");
     I830Sync(pScrn);
 #endif
+
+    dri_bo_unreference (binding_table_bo);
+    dri_bo_unreference (surface_state_bo);
+
     return TRUE;
 }
 #undef NUM_BO
commit 47cc3d79da8174ba30ca130b0fb6c7d9c871caed
Author: Carl Worth <cworth at cworth.org>
Date:   Thu Oct 30 16:53:57 2008 -0700

    Unreference the vertex_buffer_bo in gen4_render_state_cleanup
    
    This avoids leaking one buffer object.

diff --git a/src/i965_render.c b/src/i965_render.c
index 3ebd209..b28b2ce 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -1617,12 +1617,16 @@ void
 gen4_render_state_cleanup(ScrnInfoPtr pScrn)
 {
     I830Ptr pI830 = I830PTR(pScrn);
+    struct gen4_render_state *render_state= pI830->gen4_render_state;
+
+    if (render_state->vertex_buffer_bo)
+	dri_bo_unreference (render_state->vertex_buffer_bo);
 
     if (pI830->use_drm_mode) {
 	dri_bo_unmap(pI830->gen4_render_state_mem->bo);
 	dri_bo_unreference(pI830->gen4_render_state_mem->bo);
     }
-    pI830->gen4_render_state->static_state = NULL;
+    render_state->static_state = NULL;
 }
 
 unsigned int
commit 88700acf30f9eab8f96c197c7d113ce38c0af6e7
Author: Carl Worth <cworth at cworth.org>
Date:   Mon Oct 27 14:23:02 2008 -0700

    Rename gen4_dynamic_state to gen4_vertex_buffer
    
    We don't actually plan to put any other data in this structure, so it
    doesn't make sense to have a generic name, (since we'll only be using
    it for our vertex buffer).

diff --git a/src/i965_render.c b/src/i965_render.c
index 8f9f2d3..3ebd209 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -454,9 +454,9 @@ typedef struct brw_surface_state_padded {
  * This structure contains static data for all of the combinations of
  * state that we use for Render acceleration.
  *
- * Meanwhile, gen4_dynamic_state_t should contain all dynamic data,
+ * Meanwhile, gen4_render_state_t should contain all dynamic data,
  * but we're still in the process of migrating some data out of
- * gen4_static_state_t to gen4_dynamic_state_t. Things remaining to be
+ * gen4_static_state_t to gen4_render_state_t. Things remaining to be
  * migrated include
  *
  *	surface_state
@@ -516,9 +516,7 @@ typedef struct _gen4_static_state {
     PAD64 (brw_cc_viewport, 0);
 } gen4_static_state_t;
 
-typedef struct gen4_dynamic_state_state {
-    float vb[VERTEX_BUFFER_SIZE];
-} gen4_dynamic_state;
+typedef float gen4_vertex_buffer[VERTEX_BUFFER_SIZE];
 
 typedef struct gen4_composite_op {
     int		op;
@@ -535,7 +533,7 @@ struct gen4_render_state {
     gen4_static_state_t *static_state;
     uint32_t static_state_offset;
 
-    dri_bo* dynamic_state_bo;
+    dri_bo* vertex_buffer_bo;
 
     gen4_composite_op composite_op;
 
@@ -978,7 +976,7 @@ _emit_batch_header_for_composite (ScrnInfoPtr pScrn)
 /* Number of buffer object in our call to check_aperture_size:
  *
  *	batch_bo
- *	dynamic_state_bo
+ *	vertex_buffer_bo
  */
 #define NUM_BO 2
 
@@ -1012,14 +1010,14 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
     uint32_t *binding_table;
     dri_bo *bo_table[NUM_BO];
 
-    if (render_state->dynamic_state_bo == NULL) {
-	render_state->dynamic_state_bo = dri_bo_alloc (pI830->bufmgr, "vb",
-						       sizeof (gen4_dynamic_state),
+    if (render_state->vertex_buffer_bo == NULL) {
+	render_state->vertex_buffer_bo = dri_bo_alloc (pI830->bufmgr, "vb",
+						       sizeof (gen4_vertex_buffer),
 						       4096);
     }
 
     bo_table[0] = pI830->batch_bo;
-    bo_table[1] = render_state->dynamic_state_bo;
+    bo_table[1] = render_state->vertex_buffer_bo;
 
     /* If this command won't fit in the current batch, flush. */
     if (dri_bufmgr_check_aperture_space (bo_table, NUM_BO) < 0) {
@@ -1030,8 +1028,8 @@ _emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
 	     * just plain too big for the hardware---fallback to software.
 	     */
 	    if (dri_bufmgr_check_aperture_space (bo_table, NUM_BO) < 0) {
-		dri_bo_unreference (render_state->dynamic_state_bo);
-		render_state->dynamic_state_bo = NULL;
+		dri_bo_unreference (render_state->vertex_buffer_bo);
+		render_state->vertex_buffer_bo = NULL;
 		return FALSE;
 	    }
 	}
@@ -1407,12 +1405,11 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
     ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
     I830Ptr pI830 = I830PTR(pScrn);
     struct gen4_render_state *render_state = pI830->gen4_render_state;
-    gen4_dynamic_state *dynamic_state;
     Bool has_mask;
     Bool is_affine_src, is_affine_mask, is_affine;
     float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
-    float *vb;
     int i;
+    float *vb;
 
     is_affine_src = i830_transform_is_affine (pI830->transform[0]);
     is_affine_mask = i830_transform_is_affine (pI830->transform[1]);
@@ -1491,17 +1488,15 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
     /* If the vertex buffer is too full, then we flush and re-emit all
      * necessary state into the batch for the composite operation. */
     if (render_state->vb_offset + VERTEX_FLOATS_PER_COMPOSITE > VERTEX_BUFFER_SIZE) {
-	dri_bo_unreference (render_state->dynamic_state_bo);
-	render_state->dynamic_state_bo = NULL;
+	dri_bo_unreference (render_state->vertex_buffer_bo);
+	render_state->vertex_buffer_bo = NULL;
 	render_state->vb_offset = 0;
 	_emit_batch_header_for_composite (pScrn);
     }
 
-    /* Map the dynamic_state buffer object so we can write to the
-     * vertex buffer within it. */
-    dri_bo_map (render_state->dynamic_state_bo, 1);
-    dynamic_state = render_state->dynamic_state_bo->virtual;
-    vb = dynamic_state->vb;
+    /* Map the vertex_buffer buffer object so we can write to it. */
+    dri_bo_map (render_state->vertex_buffer_bo, 1);
+    vb = render_state->vertex_buffer_bo->virtual;
 
     i = render_state->vb_offset;
     /* rect (x2,y2) */
@@ -1547,7 +1542,7 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
     }
     assert (i <= VERTEX_BUFFER_SIZE);
 
-    dri_bo_unmap (render_state->dynamic_state_bo);
+    dri_bo_unmap (render_state->vertex_buffer_bo);
 
     BEGIN_BATCH(12);
     OUT_BATCH(MI_FLUSH);
@@ -1556,8 +1551,7 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
     OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) |
 	      VB0_VERTEXDATA |
 	      (render_state->vertex_size << VB0_BUFFER_PITCH_SHIFT));
-    OUT_RELOC(render_state->dynamic_state_bo, I915_GEM_DOMAIN_VERTEX, 0,
-	      offsetof(gen4_dynamic_state, vb) +
+    OUT_RELOC(render_state->vertex_buffer_bo, I915_GEM_DOMAIN_VERTEX, 0,
 	      render_state->vb_offset * 4);
     OUT_BATCH(3);
     OUT_BATCH(0); // ignore for VERTEXDATA, but still there
commit 9e95722763e2379d14a6b46c3750a44713da5135
Author: Carl Worth <cworth at cworth.org>
Date:   Thu Oct 23 15:43:19 2008 -0700

    965: Move composite setup to new _emit_batch_header_for_composite
    
    This function is the new name for _allocate_dynamic_state now that
    it also emits everything to the batch necessary for setting up a
    composite operation. This happens in prepare_composite() every
    time and in composite() whenever our vertex buffer fills up.
    
    It's not yet strictly necessary to be redoing this setup in
    composite() but it will be soon when the setup starts referring
    to buffer objects for surface state and binding table. This
    move prepares for that.

diff --git a/src/i965_render.c b/src/i965_render.c
index 7a3ff7f..8f9f2d3 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -520,6 +520,16 @@ typedef struct gen4_dynamic_state_state {
     float vb[VERTEX_BUFFER_SIZE];
 } gen4_dynamic_state;
 
+typedef struct gen4_composite_op {
+    int		op;
+    PicturePtr	source_picture;
+    PicturePtr	mask_picture;
+    PicturePtr	dest_picture;
+    PixmapPtr	source;
+    PixmapPtr	mask;
+    PixmapPtr	dest;
+} gen4_composite_op;
+
 /** Private data for gen4 render accel implementation. */
 struct gen4_render_state {
     gen4_static_state_t *static_state;
@@ -527,6 +537,8 @@ struct gen4_render_state {
 
     dri_bo* dynamic_state_bo;
 
+    gen4_composite_op composite_op;
+
     int binding_table_index;
     int surface_state_index;
     int vb_offset;
@@ -934,7 +946,8 @@ i965_set_picture_surface_state(ScrnInfoPtr pScrn, struct brw_surface_state *ss,
 
 
 static Bool
-_allocate_dynamic_state_internal (ScrnInfoPtr pScrn, Bool check_twice);
+_emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn,
+					   Bool check_twice);
 
 /* Allocate the dynamic state needed for a composite operation,
  * flushing the current batch if needed to create sufficient space.
@@ -943,17 +956,23 @@ _allocate_dynamic_state_internal (ScrnInfoPtr pScrn, Bool check_twice);
  * operation still can't fit with an empty batch. Otherwise, returns
  * TRUE.
  */
-static Bool _allocate_dynamic_state_check_twice (ScrnInfoPtr pScrn) {
-     return _allocate_dynamic_state_internal (pScrn, TRUE);
+static Bool
+_emit_batch_header_for_composite_check_twice (ScrnInfoPtr pScrn)
+{
+     return _emit_batch_header_for_composite_internal (pScrn, TRUE);
 }
 
 /* Allocate the dynamic state needed for a composite operation,
  * flushing the current batch if needed to create sufficient space.
+ *
+ * See _emit_batch_header_for_composite_check_twice for a safer
+ * version, (but this version is fine if the safer version has
+ * previously been called for the same composite operation).
  */
 static void
-_allocate_dynamic_state (ScrnInfoPtr pScrn)
+_emit_batch_header_for_composite (ScrnInfoPtr pScrn)
 {
-    _allocate_dynamic_state_internal (pScrn, FALSE);
+    _emit_batch_header_for_composite_internal (pScrn, FALSE);
 }
 
 /* Number of buffer object in our call to check_aperture_size:
@@ -964,10 +983,33 @@ _allocate_dynamic_state (ScrnInfoPtr pScrn)
 #define NUM_BO 2
 
 static Bool
-_allocate_dynamic_state_internal (ScrnInfoPtr pScrn, Bool check_twice)
+_emit_batch_header_for_composite_internal (ScrnInfoPtr pScrn, Bool check_twice)
 {
     I830Ptr pI830 = I830PTR(pScrn);
     struct gen4_render_state *render_state= pI830->gen4_render_state;
+    gen4_static_state_t *static_state = render_state->static_state;
+    gen4_composite_op *composite_op = &render_state->composite_op;
+    int op = composite_op->op;
+    PicturePtr pSrcPicture = composite_op->source_picture;
+    PicturePtr pMaskPicture = composite_op->mask_picture;
+    PicturePtr pDstPicture = composite_op->dest_picture;
+    PixmapPtr pSrc = composite_op->source;
+    PixmapPtr pMask = composite_op->mask;
+    PixmapPtr pDst = composite_op->dest;
+    struct brw_surface_state_padded *ss;
+    uint32_t sf_state_offset;
+    sampler_state_filter_t src_filter, mask_filter;
+    sampler_state_extend_t src_extend, mask_extend;
+    Bool is_affine_src, is_affine_mask, is_affine;
+    int urb_vs_start, urb_vs_size;
+    int urb_gs_start, urb_gs_size;
+    int urb_clip_start, urb_clip_size;
+    int urb_sf_start, urb_sf_size;
+    int urb_cs_start, urb_cs_size;
+    char *state_base;
+    int state_base_offset;
+    uint32_t src_blend, dst_blend;
+    uint32_t *binding_table;
     dri_bo *bo_table[NUM_BO];
 
     if (render_state->dynamic_state_bo == NULL) {
@@ -995,40 +1037,6 @@ _allocate_dynamic_state_internal (ScrnInfoPtr pScrn, Bool check_twice)
 	}
     }
 
-    return TRUE;
-}
-#undef NUM_BO
-
-Bool
-i965_prepare_composite(int op, PicturePtr pSrcPicture,
-		       PicturePtr pMaskPicture, PicturePtr pDstPicture,
-		       PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
-{
-    ScrnInfoPtr pScrn = xf86Screens[pSrcPicture->pDrawable->pScreen->myNum];
-    I830Ptr pI830 = I830PTR(pScrn);
-    struct gen4_render_state *render_state= pI830->gen4_render_state;
-    gen4_static_state_t *static_state = render_state->static_state;
-    struct brw_surface_state_padded *ss;
-    uint32_t sf_state_offset;
-    sampler_state_filter_t src_filter, mask_filter;
-    sampler_state_extend_t src_extend, mask_extend;
-    Bool is_affine_src, is_affine_mask, is_affine;
-    int urb_vs_start, urb_vs_size;
-    int urb_gs_start, urb_gs_size;
-    int urb_clip_start, urb_clip_size;
-    int urb_sf_start, urb_sf_size;
-    int urb_cs_start, urb_cs_size;
-    char *state_base;
-    int state_base_offset;
-    uint32_t src_blend, dst_blend;
-    uint32_t *binding_table;
-    Bool success;
-
-    /* Fallback if we can't make this operation fit. */
-    success = _allocate_dynamic_state_check_twice (pScrn);
-    if (! success)
-	return FALSE;
-
     IntelEmitInvarientState(pScrn);
     *pI830->last_3d = LAST_3D_RENDER;
 
@@ -1368,6 +1376,29 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
 #endif
     return TRUE;
 }
+#undef NUM_BO
+
+Bool
+i965_prepare_composite(int op, PicturePtr pSrcPicture,
+		       PicturePtr pMaskPicture, PicturePtr pDstPicture,
+		       PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pSrcPicture->pDrawable->pScreen->myNum];
+    I830Ptr pI830 = I830PTR(pScrn);
+    struct gen4_render_state *render_state= pI830->gen4_render_state;
+    gen4_composite_op *composite_op = &render_state->composite_op;
+
+    composite_op->op = op;
+    composite_op->source_picture = pSrcPicture;
+    composite_op->mask_picture = pMaskPicture;
+    composite_op->dest_picture = pDstPicture;
+    composite_op->source = pSrc;
+    composite_op->mask = pMask;
+    composite_op->dest = pDst;
+
+    /* Fallback if we can't make this operation fit. */
+    return _emit_batch_header_for_composite_check_twice (pScrn);
+}
 
 void
 i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
@@ -1457,13 +1488,13 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
 	}
     }
 
-    /* Arrange for a dynamic_state buffer object with sufficient space
-     * for our vertices. */
+    /* If the vertex buffer is too full, then we flush and re-emit all
+     * necessary state into the batch for the composite operation. */
     if (render_state->vb_offset + VERTEX_FLOATS_PER_COMPOSITE > VERTEX_BUFFER_SIZE) {
 	dri_bo_unreference (render_state->dynamic_state_bo);
 	render_state->dynamic_state_bo = NULL;
 	render_state->vb_offset = 0;
-	_allocate_dynamic_state (pScrn);
+	_emit_batch_header_for_composite (pScrn);
     }
 
     /* Map the dynamic_state buffer object so we can write to the
commit fcb2a5a1253c505913e66b08107c0a9f57b07bad
Author: Carl Worth <cworth at cworth.org>
Date:   Wed Oct 22 17:12:47 2008 -0700

    Use buffer object for vertex buffer (in new gen4_dynamic_state)
    
    This begins the process of separating the dynamic data from the
    static data, (still to move are the surface state and binding
    table objects). The new dynamic_state is stored in a buffer
    object, so this patch restores the buffer-object-for-vertex-buffer
    functionality originally in commit 1abf4d3a7a and later reverted
    in 5c9a62a29f.
    
    A notable difference is that this time we actually do use
    check_aperture_space to ensure things will fit, (assuming
    there's a non-empty implementation under that).

diff --git a/src/i965_render.c b/src/i965_render.c
index a9d7f66..7a3ff7f 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -59,8 +59,14 @@ do { 							\
 } while(0)
 #endif
 
-#define MAX_VERTEX_PER_COMPOSITE    24
-#define MAX_VERTEX_BUFFERS	    256
+/* 24 = 4 vertices/composite * 3 texcoords/vertex * 2 floats/texcoord
+ *
+ * This is an upper-bound based on the case of a non-affine
+ * transformation and with a mask, but useful for sizing all cases for
+ * simplicity.
+ */
+#define VERTEX_FLOATS_PER_COMPOSITE	24
+#define VERTEX_BUFFER_SIZE		(256 * VERTEX_FLOATS_PER_COMPOSITE)
 
 struct blendinfo {
     Bool dst_alpha;
@@ -445,11 +451,16 @@ typedef struct brw_surface_state_padded {
 /**
  * Gen4 rendering state buffer structure.
  *
- * Ideally this structure would contain static data for all of the
- * combinations of state that we use for Render acceleration, and
- * another buffer would contain the dynamic surface state, binding
- * table, and vertex data. We'll be moving to that organization soon,
- * so we use that naming already.
+ * This structure contains static data for all of the combinations of
+ * state that we use for Render acceleration.
+ *
+ * Meanwhile, gen4_dynamic_state_t should contain all dynamic data,
+ * but we're still in the process of migrating some data out of
+ * gen4_static_state_t to gen4_dynamic_state_t. Things remaining to be
+ * migrated include
+ *
+ *	surface_state
+ *	binding_table
  */
 typedef struct _gen4_static_state {
     uint8_t wm_scratch[128 * PS_MAX_THREADS];
@@ -503,15 +514,19 @@ typedef struct _gen4_static_state {
 				     [BRW_BLENDFACTOR_COUNT];
     struct brw_cc_viewport cc_viewport;
     PAD64 (brw_cc_viewport, 0);
-
-    float vb[MAX_VERTEX_PER_COMPOSITE * MAX_VERTEX_BUFFERS];
 } gen4_static_state_t;
 
+typedef struct gen4_dynamic_state_state {
+    float vb[VERTEX_BUFFER_SIZE];
+} gen4_dynamic_state;
+
 /** Private data for gen4 render accel implementation. */
 struct gen4_render_state {
     gen4_static_state_t *static_state;
     uint32_t static_state_offset;
 
+    dri_bo* dynamic_state_bo;
+
     int binding_table_index;
     int surface_state_index;
     int vb_offset;
@@ -917,6 +932,73 @@ i965_set_picture_surface_state(ScrnInfoPtr pScrn, struct brw_surface_state *ss,
     return offset;
 }
 
+
+static Bool
+_allocate_dynamic_state_internal (ScrnInfoPtr pScrn, Bool check_twice);
+
+/* Allocate the dynamic state needed for a composite operation,
+ * flushing the current batch if needed to create sufficient space.
+ *
+ * Even after flushing we check again and return FALSE if the
+ * operation still can't fit with an empty batch. Otherwise, returns
+ * TRUE.
+ */
+static Bool _allocate_dynamic_state_check_twice (ScrnInfoPtr pScrn) {
+     return _allocate_dynamic_state_internal (pScrn, TRUE);
+}
+
+/* Allocate the dynamic state needed for a composite operation,
+ * flushing the current batch if needed to create sufficient space.
+ */
+static void
+_allocate_dynamic_state (ScrnInfoPtr pScrn)
+{
+    _allocate_dynamic_state_internal (pScrn, FALSE);
+}
+
+/* Number of buffer object in our call to check_aperture_size:
+ *
+ *	batch_bo
+ *	dynamic_state_bo
+ */
+#define NUM_BO 2
+
+static Bool
+_allocate_dynamic_state_internal (ScrnInfoPtr pScrn, Bool check_twice)
+{
+    I830Ptr pI830 = I830PTR(pScrn);
+    struct gen4_render_state *render_state= pI830->gen4_render_state;
+    dri_bo *bo_table[NUM_BO];
+
+    if (render_state->dynamic_state_bo == NULL) {
+	render_state->dynamic_state_bo = dri_bo_alloc (pI830->bufmgr, "vb",
+						       sizeof (gen4_dynamic_state),
+						       4096);
+    }
+
+    bo_table[0] = pI830->batch_bo;
+    bo_table[1] = render_state->dynamic_state_bo;
+
+    /* If this command won't fit in the current batch, flush. */
+    if (dri_bufmgr_check_aperture_space (bo_table, NUM_BO) < 0) {
+	intel_batch_flush (pScrn, FALSE);
+
+	if (check_twice) {
+	    /* If the command still won't fit in an empty batch, then it's
+	     * just plain too big for the hardware---fallback to software.
+	     */
+	    if (dri_bufmgr_check_aperture_space (bo_table, NUM_BO) < 0) {
+		dri_bo_unreference (render_state->dynamic_state_bo);
+		render_state->dynamic_state_bo = NULL;
+		return FALSE;
+	    }
+	}
+    }
+
+    return TRUE;
+}
+#undef NUM_BO
+
 Bool
 i965_prepare_composite(int op, PicturePtr pSrcPicture,
 		       PicturePtr pMaskPicture, PicturePtr pDstPicture,
@@ -940,6 +1022,12 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     int state_base_offset;
     uint32_t src_blend, dst_blend;
     uint32_t *binding_table;
+    Bool success;
+
+    /* Fallback if we can't make this operation fit. */
+    success = _allocate_dynamic_state_check_twice (pScrn);
+    if (! success)
+	return FALSE;
 
     IntelEmitInvarientState(pScrn);
     *pI830->last_3d = LAST_3D_RENDER;
@@ -1288,11 +1376,11 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
     ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
     I830Ptr pI830 = I830PTR(pScrn);
     struct gen4_render_state *render_state = pI830->gen4_render_state;
-    gen4_static_state_t *static_state = render_state->static_state;
+    gen4_dynamic_state *dynamic_state;
     Bool has_mask;
     Bool is_affine_src, is_affine_mask, is_affine;
     float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
-    float *vb = static_state->vb;
+    float *vb;
     int i;
 
     is_affine_src = i830_transform_is_affine (pI830->transform[0]);
@@ -1369,11 +1457,21 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
 	}
     }
 
-    if (render_state->vb_offset + MAX_VERTEX_PER_COMPOSITE >= ARRAY_SIZE(static_state->vb)) {
-	i830WaitSync(pScrn);
+    /* Arrange for a dynamic_state buffer object with sufficient space
+     * for our vertices. */
+    if (render_state->vb_offset + VERTEX_FLOATS_PER_COMPOSITE > VERTEX_BUFFER_SIZE) {
+	dri_bo_unreference (render_state->dynamic_state_bo);
+	render_state->dynamic_state_bo = NULL;
 	render_state->vb_offset = 0;
+	_allocate_dynamic_state (pScrn);
     }
 
+    /* Map the dynamic_state buffer object so we can write to the
+     * vertex buffer within it. */
+    dri_bo_map (render_state->dynamic_state_bo, 1);
+    dynamic_state = render_state->dynamic_state_bo->virtual;
+    vb = dynamic_state->vb;
+
     i = render_state->vb_offset;
     /* rect (x2,y2) */
     vb[i++] = (float)(dstX + w);
@@ -1416,7 +1514,9 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
 	if (!is_affine)
 	    vb[i++] = mask_w[0];
     }
-    assert (i * 4 <= sizeof(static_state->vb));
+    assert (i <= VERTEX_BUFFER_SIZE);
+
+    dri_bo_unmap (render_state->dynamic_state_bo);
 
     BEGIN_BATCH(12);
     OUT_BATCH(MI_FLUSH);
@@ -1425,7 +1525,8 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
     OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) |
 	      VB0_VERTEXDATA |
 	      (render_state->vertex_size << VB0_BUFFER_PITCH_SHIFT));
-    OUT_BATCH(render_state->static_state_offset + offsetof(gen4_static_state_t, vb) +
+    OUT_RELOC(render_state->dynamic_state_bo, I915_GEM_DOMAIN_VERTEX, 0,
+	      offsetof(gen4_dynamic_state, vb) +
 	      render_state->vb_offset * 4);
     OUT_BATCH(3);
     OUT_BATCH(0); // ignore for VERTEXDATA, but still there


More information about the xorg-commit mailing list