xf86-video-intel: 3 commits - src/i965_render.c
Carl Worth
cworth at kemper.freedesktop.org
Sat Jul 12 08:23:44 PDT 2008
src/i965_render.c | 73 +++++++++++++++++++++++++++++++-----------------------
1 file changed, 43 insertions(+), 30 deletions(-)
New commits:
commit 757c00927a6f5760135136450b8d02d0f999ac1c
Author: Carl Worth <cworth at cworth.org>
Date: Mon Jul 7 15:58:27 2008 -0700
Use up to 256 separate vertex buffers
This allows us to only call i830WaitSync once every 128 calls to composite
rather than on every call. However, we do need to also call MI_FLUSH to
avoid the vertex cache getting in our way, (since our "separate" buffers
are all allocated as one contiguous chunk).
diff --git a/src/i965_render.c b/src/i965_render.c
index 3c7379c..1cbfe24 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -60,7 +60,7 @@ do { \
#endif
#define MAX_VERTEX_PER_COMPOSITE 24
-#define MAX_VERTEX_BUFFERS 1
+#define MAX_VERTEX_BUFFERS 256
struct blendinfo {
Bool dst_alpha;
@@ -1401,7 +1401,8 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
}
assert (i * 4 <= sizeof(card_state->vb));
- BEGIN_BATCH(11);
+ BEGIN_BATCH(12);
+ OUT_BATCH(MI_FLUSH);
/* Set up the pointer to our (single) vertex buffer */
OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3);
OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) |
commit 0c548cd040d0c5e1812470ccdf6de86b6a2926d7
Author: Carl Worth <cworth at cworth.org>
Date: Mon Jul 7 14:01:15 2008 -0700
Allow for multiple vertex buffers (though only use one for now)
Using more than one (in the future) will allow for doing less frequent calls
to i830WaitSync.
diff --git a/src/i965_render.c b/src/i965_render.c
index a9e3227..3c7379c 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -59,6 +59,9 @@ do { \
} while(0)
#endif
+#define MAX_VERTEX_PER_COMPOSITE 24
+#define MAX_VERTEX_BUFFERS 1
+
struct blendinfo {
Bool dst_alpha;
Bool src_alpha;
@@ -500,7 +503,7 @@ typedef struct _gen4_state {
struct brw_cc_viewport cc_viewport;
PAD64 (brw_cc_viewport, 0);
- float vb[(2 + 3 + 3) * 3]; /* (dst, src, mask) 3 vertices, 4 bytes */
+ float vb[MAX_VERTEX_PER_COMPOSITE * MAX_VERTEX_BUFFERS];
} gen4_state_t;
/** Private data for gen4 render accel implementation. */
@@ -510,6 +513,7 @@ struct gen4_render_state {
int binding_table_index;
int surface_state_index;
+ int vb_offset;
int vertex_size;
};
@@ -969,6 +973,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
i830WaitSync(pScrn);
render_state->binding_table_index = 0;
render_state->surface_state_index = 0;
+ render_state->vb_offset = 0;
}
binding_table = card_state->binding_table +
@@ -1347,12 +1352,12 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
}
}
- /* Wait for any existing composite rectangles to land before we overwrite
- * the VB with the next one.
- */
- i830WaitSync(pScrn);
+ if (render_state->vb_offset + MAX_VERTEX_PER_COMPOSITE >= ARRAY_SIZE(card_state->vb)) {
+ i830WaitSync(pScrn);
+ render_state->vb_offset = 0;
+ }
- i = 0;
+ i = render_state->vb_offset;
/* rect (x2,y2) */
vb[i++] = (float)(dstX + w);
vb[i++] = (float)(dstY + h);
@@ -1402,7 +1407,8 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) |
VB0_VERTEXDATA |
(render_state->vertex_size << VB0_BUFFER_PITCH_SHIFT));
- OUT_BATCH(render_state->card_state_offset + offsetof(gen4_state_t, vb));
+ OUT_BATCH(render_state->card_state_offset + offsetof(gen4_state_t, vb) +
+ render_state->vb_offset * 4);
OUT_BATCH(3);
OUT_BATCH(0); // ignore for VERTEXDATA, but still there
@@ -1418,6 +1424,8 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
OUT_BATCH(0); /* index buffer offset, ignored */
ADVANCE_BATCH();
+ render_state->vb_offset = i;
+
#ifdef I830DEBUG
ErrorF("sync after 3dprimitive\n");
I830Sync(pScrn);
commit cc2249333cd462b4d99d110a12c454ca141b2be8
Author: Carl Worth <cworth at cworth.org>
Date: Mon Jul 7 13:25:42 2008 -0700
Move VERTEX_BUFFERS setup from prepare_composite to composite
This is in preparation for having larger (or multiple) vertex buffers
in the future.
diff --git a/src/i965_render.c b/src/i965_render.c
index a13aec2..a9e3227 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -510,6 +510,7 @@ struct gen4_render_state {
int binding_table_index;
int surface_state_index;
+ int vertex_size;
};
/**
@@ -1195,6 +1196,8 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
int selem = is_affine ? 2 : 3;
uint32_t w_component;
uint32_t src_format;
+
+ render_state->vertex_size = 4 * (2 + nelem * selem);
if (is_affine)
{
@@ -1206,17 +1209,9 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
w_component = BRW_VFCOMPONENT_STORE_SRC;
}
- BEGIN_BATCH(pMask?12:10);
- /* Set up the pointer to our (single) vertex buffer */
- OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3);
- OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) |
- VB0_VERTEXDATA |
- ((4 * (2 + nelem * selem)) << VB0_BUFFER_PITCH_SHIFT));
- OUT_BATCH(state_base_offset + offsetof(gen4_state_t, vb));
- OUT_BATCH(3);
- OUT_BATCH(0); // ignore for VERTEXDATA, but still there
-
+ BEGIN_BATCH(pMask?7:5);
/* Set up our vertex elements, sourced from the single vertex buffer.
+ * that will be set up later.
*/
OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * (1 + nelem)) - 1));
@@ -1271,6 +1266,7 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
I830Ptr pI830 = I830PTR(pScrn);
gen4_state_t *card_state = pI830->gen4_render_state->card_state;
+ struct gen4_render_state *render_state = pI830->gen4_render_state;
Bool has_mask;
Bool is_affine_src, is_affine_mask, is_affine;
float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
@@ -1400,20 +1396,28 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
}
assert (i * 4 <= sizeof(card_state->vb));
- {
- BEGIN_BATCH(6);
- OUT_BATCH(BRW_3DPRIMITIVE |
- BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
- (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
- (0 << 9) | /* CTG - indirect vertex count */
- 4);
- OUT_BATCH(3); /* vertex count per instance */
- OUT_BATCH(0); /* start vertex offset */
- OUT_BATCH(1); /* single instance */
- OUT_BATCH(0); /* start instance location */
- OUT_BATCH(0); /* index buffer offset, ignored */
- ADVANCE_BATCH();
- }
+ BEGIN_BATCH(11);
+ /* Set up the pointer to our (single) vertex buffer */
+ OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3);
+ OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) |
+ VB0_VERTEXDATA |
+ (render_state->vertex_size << VB0_BUFFER_PITCH_SHIFT));
+ OUT_BATCH(render_state->card_state_offset + offsetof(gen4_state_t, vb));
+ OUT_BATCH(3);
+ OUT_BATCH(0); // ignore for VERTEXDATA, but still there
+
+ OUT_BATCH(BRW_3DPRIMITIVE |
+ BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
+ (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
+ (0 << 9) | /* CTG - indirect vertex count */
+ 4);
+ OUT_BATCH(3); /* vertex count per instance */
+ OUT_BATCH(0); /* start vertex offset */
+ OUT_BATCH(1); /* single instance */
+ OUT_BATCH(0); /* start instance location */
+ OUT_BATCH(0); /* index buffer offset, ignored */
+ ADVANCE_BATCH();
+
#ifdef I830DEBUG
ErrorF("sync after 3dprimitive\n");
I830Sync(pScrn);
More information about the xorg-commit
mailing list