xf86-video-intel: Branch 'drm-gem' - 5 commits - src/i830_batchbuffer.h src/i830_driver.c src/i965_render.c

Carl Worth cworth at kemper.freedesktop.org
Fri Aug 1 15:36:33 PDT 2008


 src/i830_batchbuffer.h |   17 +++++++++++++++
 src/i830_driver.c      |    4 +++
 src/i965_render.c      |   54 ++++++++++++++++++++++---------------------------
 3 files changed, 46 insertions(+), 29 deletions(-)

New commits:
commit 750bd0bde09adf956c17bbb49c5a6020f12e60a4
Author: Carl Worth <cworth at cworth.org>
Date:   Tue Jul 29 15:22:39 2008 -0700

    Call DRM_I915_GEM_THROTTLE from I830BlockHandler
    
    This prevents the CPU from ridiculously outrunning the GPU.

diff --git a/src/i830_driver.c b/src/i830_driver.c
index a453d90..195bc5c 100644
--- a/src/i830_driver.c
+++ b/src/i830_driver.c
@@ -2507,6 +2507,9 @@ I830BlockHandler(int i,
        intel_batch_flush(pScrn);
 
        pI830->need_mi_flush = FALSE;
+#ifdef XF86DRI
+       drmCommandNone(pI830->drmSubFD, DRM_I915_GEM_THROTTLE);
+#endif
     }
 
     /*
commit a893f176dda0b64f7dadfda6bf0331240037851e
Author: Carl Worth <cworth at cworth.org>
Date:   Fri Jul 25 15:56:35 2008 -0700

    Add call to intel_bufmgr_gem_enable_reuse
    
    This instructs GEM to reuse buffer objects and improves the
    performance of my favorite 'x11perf -aa10text' from about
    169k to about 188k glyphs/sec.

diff --git a/src/i830_driver.c b/src/i830_driver.c
index dcbaa04..a453d90 100644
--- a/src/i830_driver.c
+++ b/src/i830_driver.c
@@ -2760,6 +2760,7 @@ i830_init_bufmgr(ScrnInfoPtr pScrn)
 	 batch_size = 4096;
 
       pI830->bufmgr = intel_bufmgr_gem_init(pI830->drmSubFD, batch_size);
+      intel_bufmgr_gem_enable_reuse(pI830->bufmgr);
    } else {
       pI830->bufmgr = intel_bufmgr_fake_init(pI830->fake_bufmgr_mem->offset,
 					     pI830->FbBase +
commit b3c1a148679a4d943e556f996ef6b9004f549a41
Author: Carl Worth <cworth at cworth.org>
Date:   Fri Jul 25 15:18:28 2008 -0700

    Eliminate unnecessary flush from i965_composite
    
    This improves 'x11perf -aa10text' performance from ~144k to ~169k

diff --git a/src/i965_render.c b/src/i965_render.c
index 93e2888..6eafd44 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -1447,21 +1447,6 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
     ErrorF("sync after 3dprimitive\n");
     I830Sync(pScrn);
 #endif
-    /* we must be sure that the pipeline is flushed before next exa draw,
-       because that will be new state, binding state and instructions*/
-    {
-	BEGIN_BATCH(4);
-	OUT_BATCH(BRW_PIPE_CONTROL |
-		  BRW_PIPE_CONTROL_NOWRITE |
-		  BRW_PIPE_CONTROL_WC_FLUSH |
-		  BRW_PIPE_CONTROL_IS_FLUSH |
-		  (1 << 10) |  /* XXX texture cache flush for BLC/CTG */
-		  2);
-	OUT_BATCH(0); /* Destination address */
-	OUT_BATCH(0); /* Immediate data low DW */
-	OUT_BATCH(0); /* Immediate data high DW */
-	ADVANCE_BATCH();
-    }
 }
 
 /**
commit 1abf4d3a7a203ff5d6e5ceda29573e7fd69ddf8e
Author: Carl Worth <cworth at cworth.org>
Date:   Fri Jul 25 14:48:45 2008 -0700

    Switch to using a buffer object for the vertex buffer

diff --git a/src/i965_render.c b/src/i965_render.c
index 1cbfe24..93e2888 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -37,6 +37,7 @@
 #include "xf86.h"
 #include "i830.h"
 #include "i915_reg.h"
+#include "i915_drm.h"
 
 /* bring in brw structs */
 #include "brw_defines.h"
@@ -60,7 +61,7 @@ do { 							\
 #endif
 
 #define MAX_VERTEX_PER_COMPOSITE    24
-#define MAX_VERTEX_BUFFERS	    256
+#define VERTEX_BUFFER_SIZE	    (16 * MAX_VERTEX_PER_COMPOSITE)
 
 struct blendinfo {
     Bool dst_alpha;
@@ -502,14 +503,14 @@ typedef struct _gen4_state {
 				     [BRW_BLENDFACTOR_COUNT];
     struct brw_cc_viewport cc_viewport;
     PAD64 (brw_cc_viewport, 0);
-
-    float vb[MAX_VERTEX_PER_COMPOSITE * MAX_VERTEX_BUFFERS];
 } gen4_state_t;
 
 /** Private data for gen4 render accel implementation. */
 struct gen4_render_state {
     gen4_state_t *card_state;
     uint32_t card_state_offset;
+    dri_bo *vb_bo;
+    int vb_bo_busy;
 
     int binding_table_index;
     int surface_state_index;
@@ -1270,12 +1271,11 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
 {
     ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
     I830Ptr pI830 = I830PTR(pScrn);
-    gen4_state_t *card_state = pI830->gen4_render_state->card_state;
     struct gen4_render_state *render_state = pI830->gen4_render_state;
     Bool has_mask;
     Bool is_affine_src, is_affine_mask, is_affine;
     float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
-    float *vb = card_state->vb;
+    float *vb;
     int i;
 
     is_affine_src = i830_transform_is_affine (pI830->transform[0]);
@@ -1352,11 +1352,25 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
 	}
     }
 
-    if (render_state->vb_offset + MAX_VERTEX_PER_COMPOSITE >= ARRAY_SIZE(card_state->vb)) {
-	i830WaitSync(pScrn);
+    /* Arrange for a buffer object with sufficient space for our
+     * vertices, and that isn't "busy", that is, it is not already
+     * referenced by a batch that has been flushed. */
+    if (! render_state->vb_bo || render_state->vb_bo_busy ||
+	render_state->vb_offset + MAX_VERTEX_PER_COMPOSITE > VERTEX_BUFFER_SIZE)
+    {
+	if (render_state->vb_bo)
+	    dri_bo_unreference (render_state->vb_bo);
+
+	render_state->vb_bo = dri_bo_alloc (pI830->bufmgr, "vb",
+					    VERTEX_BUFFER_SIZE * sizeof (float),
+					    4096);
 	render_state->vb_offset = 0;
     }
 
+    /* Map the vertex buffer object so we can write to it. */
+    dri_bo_map (render_state->vb_bo, 1);
+    vb = render_state->vb_bo->virtual;
+
     i = render_state->vb_offset;
     /* rect (x2,y2) */
     vb[i++] = (float)(dstX + w);
@@ -1399,7 +1413,9 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
 	if (!is_affine)
 	    vb[i++] = mask_w[0];
     }
-    assert (i * 4 <= sizeof(card_state->vb));
+    assert (i <= VERTEX_BUFFER_SIZE);
+
+    dri_bo_unmap (render_state->vb_bo);
 
     BEGIN_BATCH(12);
     OUT_BATCH(MI_FLUSH);
@@ -1408,7 +1424,7 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
     OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) |
 	      VB0_VERTEXDATA |
 	      (render_state->vertex_size << VB0_BUFFER_PITCH_SHIFT));
-    OUT_BATCH(render_state->card_state_offset + offsetof(gen4_state_t, vb) +
+    OUT_RELOC(render_state->vb_bo, I915_GEM_DOMAIN_VERTEX, 0,
 	      render_state->vb_offset * 4);
     OUT_BATCH(3);
     OUT_BATCH(0); // ignore for VERTEXDATA, but still there
@@ -1446,11 +1462,6 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
 	OUT_BATCH(0); /* Immediate data high DW */
 	ADVANCE_BATCH();
     }
-
-    /* Mark sync so we can wait for it before setting up the VB on the next
-     * rectangle.
-     */
-    i830MarkSync(pScrn);
 }
 
 /**
commit e5fab0b7681be06a5a3be4bbd769ba5c435e2128
Author: Carl Worth <cworth at cworth.org>
Date:   Fri Jul 25 13:44:29 2008 -0700

    Add OUT_RELOC macro and backing intel_batch_emit_reloc function

diff --git a/src/i830_batchbuffer.h b/src/i830_batchbuffer.h
index 2b898c2..c9b8421 100644
--- a/src/i830_batchbuffer.h
+++ b/src/i830_batchbuffer.h
@@ -60,6 +60,20 @@ intel_batch_emit_dword(I830Ptr pI830, uint32_t dword)
 }
 
 static inline void
+intel_batch_emit_reloc (I830Ptr  pI830,
+			dri_bo  *bo,
+			uint32_t read_domains,
+			uint32_t write_domains,
+			uint32_t delta)
+{
+    assert(intel_batch_space(pI830) >= 4);
+    *(uint32_t *)(pI830->batch_ptr + pI830->batch_used) = bo->offset + delta;
+    intel_bo_emit_reloc (pI830->batch_bo, read_domains, write_domains, delta,
+			 pI830->batch_used, bo);
+    pI830->batch_used += 4;
+}
+
+static inline void
 intel_batch_emit_reloc_pixmap(I830Ptr pI830, PixmapPtr pPixmap, uint32_t delta)
 {
     assert(pI830->batch_ptr != NULL);
@@ -71,6 +85,9 @@ intel_batch_emit_reloc_pixmap(I830Ptr pI830, PixmapPtr pPixmap, uint32_t delta)
 
 #define OUT_BATCH(dword) intel_batch_emit_dword(pI830, dword)
 
+#define OUT_RELOC(bo, read_domains, write_domains, delta) \
+	intel_batch_emit_reloc (pI830, bo, read_domains, write_domains, delta)
+
 #define OUT_RELOC_PIXMAP(pPixmap, delta)	\
 	intel_batch_emit_reloc_pixmap(pI830, pPixmap, delta)
 


More information about the xorg-commit mailing list