xf86-video-intel: 6 commits - src/common.h src/i830_accel.c src/i830_batchbuffer.c src/i830_batchbuffer.h src/i830_dri.c src/i830_driver.c src/i830.h src/i830_render.c src/i830_uxa.c src/i915_render.c src/i965_render.c uxa/uxa-glyphs.c
Chris Wilson
ickle at kemper.freedesktop.org
Sun Nov 29 16:59:01 PST 2009
src/common.h | 11 +
src/i830.h | 94 +++++++++-
src/i830_accel.c | 21 --
src/i830_batchbuffer.c | 58 ++++++
src/i830_batchbuffer.h | 35 +++
src/i830_dri.c | 8
src/i830_driver.c | 14 -
src/i830_render.c | 4
src/i830_uxa.c | 434 +++++++++++++++++++++++++++++++++++++------------
src/i915_render.c | 5
src/i965_render.c | 23 +-
uxa/uxa-glyphs.c | 30 +++
12 files changed, 579 insertions(+), 158 deletions(-)
New commits:
commit 3f11bbec420080151406c203af292e55177e77d1
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Nov 29 21:39:41 2009 +0000
uxa-glyphs: Enable TILING_X on glyph caches.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/common.h b/src/common.h
index b9269b5..3169cdf 100644
--- a/src/common.h
+++ b/src/common.h
@@ -389,4 +389,15 @@ extern int I810_DEBUG;
struct pci_device *
intel_host_bridge (void);
+/**
+ * Hints to CreatePixmap to tell the driver how the pixmap is going to be
+ * used.
+ *
+ * Compare to CREATE_PIXMAP_USAGE_* in the server.
+ */
+enum {
+ INTEL_CREATE_PIXMAP_TILING_X = 0x10000000,
+ INTEL_CREATE_PIXMAP_TILING_Y,
+};
+
#endif /* _INTEL_COMMON_H_ */
diff --git a/src/i830.h b/src/i830.h
index 610f5ba..9769e22 100644
--- a/src/i830.h
+++ b/src/i830.h
@@ -597,17 +597,6 @@ extern const int I830CopyROP[16];
#define ALLOW_SHARING 0x00000010
#define DISABLE_REUSE 0x00000020
-/**
- * Hints to CreatePixmap to tell the driver how the pixmap is going to be
- * used.
- *
- * Compare to CREATE_PIXMAP_USAGE_* in the server.
- */
-enum {
- INTEL_CREATE_PIXMAP_TILING_X = 0x10000000,
- INTEL_CREATE_PIXMAP_TILING_Y,
-};
-
void i830_debug_flush(ScrnInfoPtr scrn);
static inline PixmapPtr get_drawable_pixmap(DrawablePtr drawable)
diff --git a/uxa/uxa-glyphs.c b/uxa/uxa-glyphs.c
index ff16781..5c23321 100644
--- a/uxa/uxa-glyphs.c
+++ b/uxa/uxa-glyphs.c
@@ -47,6 +47,7 @@
#include <stdlib.h>
#include "uxa-priv.h"
+#include "../src/common.h"
#include "mipict.h"
@@ -189,7 +190,8 @@ static Bool uxa_realize_glyph_caches(ScreenPtr pScreen, unsigned int format)
pPixmap = (*pScreen->CreatePixmap) (pScreen,
CACHE_PICTURE_WIDTH,
- height, depth, 0);
+ height, depth,
+ INTEL_CREATE_PIXMAP_TILING_X);
if (!pPixmap)
return FALSE;
commit 19d8c0cf50e98909c533ebfce3a0dd3f72b755c1
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Nov 29 21:16:49 2009 +0000
uxa: PutImage acceleration
Avoid waiting on dirty buffer object by streaming the upload to a fresh,
non-GPU hot buffer and blitting to the destination.
This should help to redress the regression reported in bug 18075:
[UXA] XPutImage performance regression
https://bugs.freedesktop.org/show_bug.cgi?id=18075
Using the particular synthetic benchmark in question on a g45:
Before:
9542.910448 Ops/s; put composition (!); 15x15
5623.271889 Ops/s; put composition (!); 75x75
1685.520362 Ops/s; put composition (!); 250x250
After:
40173.865300 Ops/s; put composition (!); 15x15
28670.280612 Ops/s; put composition (!); 75x75
4794.368601 Ops/s; put composition (!); 250x250
which while not stellar performance is at least an improvement. As
anticipated this has little impact on the non-fallback RENDER paths, for
instance the current cairo-xlib backend is unaffected by this change.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/i830_uxa.c b/src/i830_uxa.c
index b11f2f7..5f3d505 100644
--- a/src/i830_uxa.c
+++ b/src/i830_uxa.c
@@ -627,6 +627,164 @@ static void i830_uxa_finish_access(PixmapPtr pixmap)
}
}
+static Bool
+i830_uxa_pixmap_swap_bo_with_image(PixmapPtr pixmap,
+ char *src, int src_pitch)
+{
+ ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+ struct intel_pixmap *priv;
+ dri_bo *bo;
+ uint32_t tiling = I915_TILING_X;
+ int stride;
+ int w = pixmap->drawable.width;
+ int h = pixmap->drawable.height;
+
+ priv = i830_get_pixmap_intel(pixmap);
+
+ if (priv->batch_read_domains || drm_intel_bo_busy(priv->bo)) {
+ unsigned int size;
+
+ size = i830_uxa_pixmap_compute_size (pixmap, w, h,
+ &tiling, &stride);
+ if (size > intel->max_gtt_map_size)
+ return FALSE;
+
+ bo = drm_intel_bo_alloc(intel->bufmgr, "pixmap", size, 0);
+ if (bo == NULL)
+ return FALSE;
+
+ if (tiling != I915_TILING_NONE)
+ drm_intel_bo_set_tiling(bo, &tiling, stride);
+
+ dri_bo_unreference(priv->bo);
+ priv->bo = bo;
+ priv->tiling = tiling;
+ priv->batch_read_domains = priv->batch_write_domain = 0;
+ priv->flush_read_domains = priv->flush_write_domain = 0;
+ list_del(&priv->batch);
+ list_del(&priv->flush);
+ pixmap->drawable.pScreen->ModifyPixmapHeader(pixmap,
+ w, h,
+ 0, 0,
+ stride, NULL);
+ } else {
+ bo = priv->bo;
+ stride = i830_pixmap_pitch(pixmap);
+ }
+
+ if (drm_intel_gem_bo_map_gtt(bo)) {
+ xf86DrvMsg(scrn->scrnIndex, X_WARNING,
+ "%s: bo map failed\n", __FUNCTION__);
+ return FALSE;
+ }
+
+ if (src_pitch == stride) {
+ memcpy (bo->virtual, src, src_pitch * h);
+ } else {
+ char *dst = bo->virtual;
+
+ w *= pixmap->drawable.bitsPerPixel/8;
+ while (h--) {
+ memcpy (dst, src, w);
+ src += src_pitch;
+ dst += stride;
+ }
+ }
+
+ drm_intel_gem_bo_unmap_gtt(bo);
+
+ return TRUE;
+}
+
+static Bool i830_uxa_put_image(PixmapPtr pixmap,
+ int x, int y,
+ int w, int h,
+ char *src, int src_pitch)
+{
+ ScreenPtr screen = pixmap->drawable.pScreen;
+ ScrnInfoPtr scrn = xf86Screens[screen->myNum];
+ PixmapPtr scratch;
+ struct intel_pixmap *priv;
+ Bool scratch_pixmap;
+ GCPtr gc;
+ Bool ret;
+
+ if (x == 0 && y == 0 &&
+ w == pixmap->drawable.width &&
+ h == pixmap->drawable.height)
+ {
+ /* Replace GPU hot bo with new CPU data. */
+ return i830_uxa_pixmap_swap_bo_with_image(pixmap,
+ src, src_pitch);
+ }
+
+ priv = i830_get_pixmap_intel(pixmap);
+ if (priv->batch_read_domains || drm_intel_bo_busy(priv->bo)) {
+ dri_bo *bo;
+ int stride;
+
+ /* Partial replacement, copy incoming image to a bo and blit. */
+ scratch = (*screen->CreatePixmap)(screen, w, h,
+ pixmap->drawable.depth,
+ UXA_CREATE_PIXMAP_FOR_MAP);
+ if (!scratch)
+ return FALSE;
+
+ bo = i830_get_pixmap_bo(scratch);
+ if (drm_intel_gem_bo_map_gtt(bo)) {
+ (*screen->DestroyPixmap) (scratch);
+ xf86DrvMsg(scrn->scrnIndex, X_WARNING,
+ "%s: bo map failed\n", __FUNCTION__);
+ return FALSE;
+ }
+
+ stride = i830_pixmap_pitch(scratch);
+ if (src_pitch == stride) {
+ memcpy (bo->virtual, src, stride * h);
+ } else {
+ char *dst = bo->virtual;
+ int row_length = w * pixmap->drawable.bitsPerPixel/8;
+ int num_rows = h;
+ while (num_rows--) {
+ memcpy (dst, src, row_length);
+ src += src_pitch;
+ dst += stride;
+ }
+ }
+
+ drm_intel_gem_bo_unmap_gtt(bo);
+ scratch_pixmap = FALSE;
+ } else {
+ /* bo is not busy so can be mapped without a stall, upload in-place. */
+ scratch = GetScratchPixmapHeader(screen, w, h,
+ pixmap->drawable.depth,
+ pixmap->drawable.bitsPerPixel,
+ src_pitch, src);
+ scratch_pixmap = TRUE;
+ }
+
+ ret = FALSE;
+ gc = GetScratchGC(pixmap->drawable.depth, screen);
+ if (gc) {
+ ValidateGC(&pixmap->drawable, gc);
+
+ (*gc->ops->CopyArea)(&scratch->drawable,
+ &pixmap->drawable,
+ gc, 0, 0, w, h, x, y);
+
+ FreeScratchGC(gc);
+ ret = TRUE;
+ }
+
+ if (scratch_pixmap)
+ FreeScratchPixmapHeader(scratch);
+ else
+ (*screen->DestroyPixmap)(scratch);
+
+ return ret;
+}
+
void i830_uxa_block_handler(ScreenPtr screen)
{
ScrnInfoPtr scrn = xf86Screens[screen->myNum];
@@ -797,6 +955,9 @@ Bool i830_uxa_init(ScreenPtr screen)
intel->uxa_driver->done_composite = i830_done_composite;
}
+ /* PutImage */
+ intel->uxa_driver->put_image = i830_uxa_put_image;
+
intel->uxa_driver->prepare_access = i830_uxa_prepare_access;
intel->uxa_driver->finish_access = i830_uxa_finish_access;
intel->uxa_driver->pixmap_is_offscreen = i830_uxa_pixmap_is_offscreen;
commit f7540f06090753cba1190aa9e8cdea05a9512077
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Nov 29 21:12:07 2009 +0000
Only flush batch during prepare access if it may modify the pixmap.
As we track when a pixmap is active inside a batch buffer, we can avoid
unnecessary flushes of the batch when mapping a pixmap back to the CPU.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/i830_uxa.c b/src/i830_uxa.c
index 8f8c5e9..b11f2f7 100644
--- a/src/i830_uxa.c
+++ b/src/i830_uxa.c
@@ -558,22 +558,20 @@ void i830_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo)
static Bool i830_uxa_prepare_access(PixmapPtr pixmap, uxa_access_t access)
{
- dri_bo *bo = i830_get_pixmap_bo(pixmap);
ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
intel_screen_private *intel = intel_get_screen_private(scrn);
+ struct intel_pixmap *priv = i830_get_pixmap_intel(pixmap);
+ dri_bo *bo = priv->bo;
- intel_batch_flush(scrn, FALSE);
+ if (!list_is_empty(&priv->batch) &&
+ (access == UXA_ACCESS_RW || priv->batch_write_domain))
+ intel_batch_flush(scrn, TRUE);
/* No VT sema or GEM? No GTT mapping. */
if (!scrn->vtSema) {
if (dri_bo_map(bo, access == UXA_ACCESS_RW) != 0)
return FALSE;
- pixmap->devPrivate.ptr = bo->virtual;
- return TRUE;
- }
-
- /* Kernel manages fences at GTT map/fault time */
- if (bo->size < intel->max_gtt_map_size) {
+ } else if (bo->size < intel->max_gtt_map_size) {
if (drm_intel_gem_bo_map_gtt(bo)) {
xf86DrvMsg(scrn->scrnIndex, X_WARNING,
"%s: bo map failed\n", __FUNCTION__);
@@ -588,6 +586,18 @@ static Bool i830_uxa_prepare_access(PixmapPtr pixmap, uxa_access_t access)
}
pixmap->devPrivate.ptr = bo->virtual;
+ /* This acts as a synchronisation point. */
+ while (!list_is_empty(&intel->flush_pixmaps)) {
+ struct intel_pixmap *entry;
+
+ entry = list_first_entry(&intel->flush_pixmaps,
+ struct intel_pixmap,
+ flush);
+
+ entry->flush_read_domains = entry->flush_write_domain = 0;
+ list_del(&entry->flush);
+ }
+
return TRUE;
}
commit 9a2c18fb92659d57741bfdcacbe4f69aab361532
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Nov 29 21:07:45 2009 +0000
batch: Emit a 'pipelined' flush when using a dirty source.
Ensure that the render caches and texture caches are appropriately
flushed when switching a pixmap from a target to a source.
This should fix bug 24315,
[855GM] Rendering corruption in text (usually)
https://bugs.freedesktop.org/show_bug.cgi?id=24315
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/i830_render.c b/src/i830_render.c
index ee89950..fd8003f 100644
--- a/src/i830_render.c
+++ b/src/i830_render.c
@@ -553,6 +553,10 @@ i830_prepare_composite(int op, PicturePtr source_picture,
intel->s8_blendctl = blendctl;
}
+ if(i830_uxa_pixmap_is_dirty(source) ||
+ (mask && i830_uxa_pixmap_is_dirty(mask)))
+ intel_batch_pipelined_flush(scrn);
+
intel->needs_render_state_emit = TRUE;
return TRUE;
diff --git a/src/i830_uxa.c b/src/i830_uxa.c
index 6dac79e..8f8c5e9 100644
--- a/src/i830_uxa.c
+++ b/src/i830_uxa.c
@@ -310,6 +310,9 @@ i830_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir,
if (!intel_check_pitch_2d(dest))
return FALSE;
+ if(i830_uxa_pixmap_is_dirty(source))
+ intel_batch_pipelined_flush(scrn);
+
intel->render_source = source;
intel->BR[13] = I830CopyROP[alu] << 16;
@@ -372,11 +375,14 @@ i830_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int dst_x1,
OUT_BATCH(intel->BR[13] | dst_pitch);
OUT_BATCH((dst_y1 << 16) | (dst_x1 & 0xffff));
OUT_BATCH((dst_y2 << 16) | (dst_x2 & 0xffff));
- OUT_RELOC_PIXMAP(dest, I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER, 0);
+ OUT_RELOC_PIXMAP(dest,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ 0);
OUT_BATCH((src_y1 << 16) | (src_x1 & 0xffff));
OUT_BATCH(src_pitch);
- OUT_RELOC_PIXMAP(intel->render_source, I915_GEM_DOMAIN_RENDER, 0,
+ OUT_RELOC_PIXMAP(intel->render_source,
+ I915_GEM_DOMAIN_RENDER, 0,
0);
ADVANCE_BATCH();
diff --git a/src/i915_render.c b/src/i915_render.c
index c720f2f..34fd253 100644
--- a/src/i915_render.c
+++ b/src/i915_render.c
@@ -459,6 +459,11 @@ i915_prepare_composite(int op, PicturePtr source_picture,
}
intel->i915_render_state.op = op;
+
+ if(i830_uxa_pixmap_is_dirty(source) ||
+ (mask && i830_uxa_pixmap_is_dirty(mask)))
+ intel_batch_pipelined_flush(scrn);
+
intel->needs_render_state_emit = TRUE;
return TRUE;
diff --git a/src/i965_render.c b/src/i965_render.c
index 8746eb9..cb057d7 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -1636,6 +1636,10 @@ i965_prepare_composite(int op, PicturePtr source_picture,
}
}
+ if(i830_uxa_pixmap_is_dirty(source) ||
+ (mask && i830_uxa_pixmap_is_dirty(mask)))
+ intel_batch_pipelined_flush(scrn);
+
intel->needs_render_state_emit = TRUE;
return TRUE;
commit 285f286597df5af13ac3f3d366f2fc9d0468dafa
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Nov 29 22:42:03 2009 +0000
batch: Track pixmap domains.
In order to detect when we require cache flushes we need to track which
domains the pixmap currently belongs to. So to do so we create a device
private structure to hold the extra information and hook it up.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/i830.h b/src/i830.h
index 87b3dba..610f5ba 100644
--- a/src/i830.h
+++ b/src/i830.h
@@ -77,6 +77,87 @@ void i830_uxa_block_handler(ScreenPtr pScreen);
Bool i830_get_aperture_space(ScrnInfoPtr scrn, drm_intel_bo ** bo_table,
int num_bos);
+/* classic doubly-link circular list */
+struct list {
+ struct list *next, *prev;
+};
+
+static void
+list_init(struct list *list)
+{
+ list->next = list->prev = list;
+}
+
+static inline void
+__list_add(struct list *entry,
+ struct list *prev,
+ struct list *next)
+{
+ next->prev = entry;
+ entry->next = next;
+ entry->prev = prev;
+ prev->next = entry;
+}
+
+static inline void
+list_add(struct list *entry, struct list *head)
+{
+ __list_add(entry, head, head->next);
+}
+
+static inline void
+__list_del(struct list *prev, struct list *next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+static inline void
+list_del(struct list *entry)
+{
+ __list_del(entry->prev, entry->next);
+ list_init(entry);
+}
+
+static inline Bool
+list_is_empty(struct list *head)
+{
+ return head->next == head;
+}
+
+#ifndef container_of
+#define container_of(ptr, type, member) \
+ (type *)((char *)(ptr) - (char *) &((type *)0)->member)
+#endif
+
+#define list_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+
+#define list_first_entry(ptr, type, member) \
+ list_entry((ptr)->next, type, member)
+
+struct intel_pixmap {
+ dri_bo *bo;
+ uint32_t tiling;
+ uint32_t flush_write_domain;
+ uint32_t flush_read_domains;
+ uint32_t batch_write_domain;
+ uint32_t batch_read_domains;
+ struct list flush, batch;
+};
+
+struct intel_pixmap *i830_get_pixmap_intel(PixmapPtr pixmap);
+
+static inline Bool i830_uxa_pixmap_is_dirty(PixmapPtr pixmap)
+{
+ return i830_get_pixmap_intel(pixmap)->flush_write_domain != 0;
+}
+
+static inline Bool i830_pixmap_tiled(PixmapPtr pixmap)
+{
+ return i830_get_pixmap_intel(pixmap)->tiling != I915_TILING_NONE;
+}
+
dri_bo *i830_get_pixmap_bo(PixmapPtr pixmap);
void i830_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo);
@@ -194,6 +275,8 @@ typedef struct intel_screen_private {
Bool in_batch_atomic;
/** Ending batch_used that was verified by i830_start_batch_atomic() */
int batch_atomic_limit;
+ struct list batch_pixmaps;
+ struct list flush_pixmaps;
/* For Xvideo */
Bool use_drmmode_overlay;
diff --git a/src/i830_accel.c b/src/i830_accel.c
index 1e4c5c6..83fdf8a 100644
--- a/src/i830_accel.c
+++ b/src/i830_accel.c
@@ -63,35 +63,16 @@ void I830Sync(ScrnInfoPtr scrn)
if (!scrn->vtSema || !intel->batch_bo)
return;
- I830EmitFlush(scrn);
-
intel_batch_flush(scrn, TRUE);
intel_batch_wait_last(scrn);
}
-void I830EmitFlush(ScrnInfoPtr scrn)
-{
- intel_screen_private *intel = intel_get_screen_private(scrn);
- int flags = MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE;
-
- if (IS_I965G(intel))
- flags = 0;
-
- {
- BEGIN_BATCH(1);
- OUT_BATCH(MI_FLUSH | flags);
- ADVANCE_BATCH();
- }
-}
-
void i830_debug_flush(ScrnInfoPtr scrn)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
- if (intel->debug_flush & DEBUG_FLUSH_BATCHES)
+ if (intel->debug_flush & (DEBUG_FLUSH_BATCHES | DEBUG_FLUSH_CACHES))
intel_batch_flush(scrn, FALSE);
- else if (intel->debug_flush & DEBUG_FLUSH_CACHES)
- I830EmitFlush(scrn);
}
/* The following function sets up the supported acceleration. Call it
diff --git a/src/i830_batchbuffer.c b/src/i830_batchbuffer.c
index e5ddb47..351c0c8 100644
--- a/src/i830_batchbuffer.c
+++ b/src/i830_batchbuffer.c
@@ -90,17 +90,45 @@ void intel_batch_teardown(ScrnInfoPtr scrn)
}
}
-void intel_batch_flush(ScrnInfoPtr scrn, Bool flushed)
+void intel_batch_pipelined_flush(ScrnInfoPtr scrn)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
- int ret;
+ int flags;
assert (!intel->in_batch_atomic);
if (intel->batch_used == 0)
return;
- if (intel->debug_flush & DEBUG_FLUSH_CACHES) {
+ /* Big hammer, look to the pipelined flushes in future. */
+ flags = MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE;
+ if (IS_I965G(intel))
+ flags = 0;
+
+ BEGIN_BATCH(1);
+ OUT_BATCH(MI_FLUSH | flags);
+ ADVANCE_BATCH();
+
+ while (!list_is_empty(&intel->flush_pixmaps)) {
+ struct intel_pixmap *entry;
+
+ entry = list_first_entry(&intel->flush_pixmaps,
+ struct intel_pixmap,
+ flush);
+
+ entry->flush_read_domains = entry->flush_write_domain = 0;
+ list_del(&entry->flush);
+ }
+}
+
+void intel_batch_flush(ScrnInfoPtr scrn, Bool flush)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+ int ret;
+
+ assert (!intel->in_batch_atomic);
+
+ if (flush || intel->debug_flush & DEBUG_FLUSH_CACHES) {
int flags = MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE;
if (IS_I965G(intel))
@@ -111,6 +139,9 @@ void intel_batch_flush(ScrnInfoPtr scrn, Bool flushed)
intel->batch_used += 4;
}
+ if (intel->batch_used == 0)
+ return;
+
/* Emit a padding dword if we aren't going to be quad-word aligned. */
if ((intel->batch_used & 4) == 0) {
*(uint32_t *) (intel->batch_ptr + intel->batch_used) = MI_NOOP;
@@ -132,6 +163,27 @@ void intel_batch_flush(ScrnInfoPtr scrn, Bool flushed)
FatalError("Failed to submit batchbuffer: %s\n",
strerror(-ret));
+ while (!list_is_empty(&intel->batch_pixmaps)) {
+ struct intel_pixmap *entry;
+
+ entry = list_first_entry(&intel->batch_pixmaps,
+ struct intel_pixmap,
+ batch);
+
+ entry->batch_read_domains = entry->batch_write_domain = 0;
+ list_del(&entry->batch);
+ }
+ while (!list_is_empty(&intel->flush_pixmaps)) {
+ struct intel_pixmap *entry;
+
+ entry = list_first_entry(&intel->flush_pixmaps,
+ struct intel_pixmap,
+ flush);
+
+ entry->flush_read_domains = entry->flush_write_domain = 0;
+ list_del(&entry->flush);
+ }
+
/* Save a ref to the last batch emitted, which we use for syncing
* in debug code.
*/
diff --git a/src/i830_batchbuffer.h b/src/i830_batchbuffer.h
index a7dd289..520179c 100644
--- a/src/i830_batchbuffer.h
+++ b/src/i830_batchbuffer.h
@@ -32,9 +32,11 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define BATCH_RESERVED 16
+
void intel_batch_init(ScrnInfoPtr scrn);
void intel_batch_teardown(ScrnInfoPtr scrn);
-void intel_batch_flush(ScrnInfoPtr scrn, Bool flushed);
+void intel_batch_pipelined_flush(ScrnInfoPtr scrn);
+void intel_batch_flush(ScrnInfoPtr scrn, Bool flush);
void intel_batch_wait_last(ScrnInfoPtr scrn);
static inline int intel_batch_space(intel_screen_private *intel)
@@ -93,14 +95,41 @@ intel_batch_emit_reloc(intel_screen_private *intel,
}
static inline void
+intel_batch_mark_pixmap_domains(intel_screen_private *intel,
+ struct intel_pixmap *priv,
+ uint32_t read_domains, uint32_t write_domain)
+{
+ assert (read_domains);
+ assert (write_domain == 0 || write_domain == read_domains);
+ assert (write_domain == 0 ||
+ priv->flush_write_domain == 0 ||
+ priv->flush_write_domain == write_domain);
+
+ priv->flush_read_domains |= read_domains;
+ priv->batch_read_domains |= read_domains;
+ priv->flush_write_domain |= write_domain;
+ priv->batch_write_domain |= write_domain;
+ if (list_is_empty(&priv->batch))
+ list_add(&priv->batch, &intel->batch_pixmaps);
+ if (list_is_empty(&priv->flush))
+ list_add(&priv->flush, &intel->flush_pixmaps);
+}
+
+static inline void
intel_batch_emit_reloc_pixmap(intel_screen_private *intel, PixmapPtr pixmap,
uint32_t read_domains, uint32_t write_domain,
uint32_t delta)
{
- dri_bo *bo = i830_get_pixmap_bo(pixmap);
+ struct intel_pixmap *priv = i830_get_pixmap_intel(pixmap);
+
assert(intel->batch_ptr != NULL);
assert(intel_batch_space(intel) >= 4);
- intel_batch_emit_reloc(intel, bo, read_domains, write_domain, delta);
+
+ intel_batch_mark_pixmap_domains(intel, priv, read_domains, write_domain);
+
+ intel_batch_emit_reloc(intel, priv->bo,
+ read_domains, write_domain,
+ delta);
}
#define OUT_BATCH(dword) intel_batch_emit_dword(intel, dword)
diff --git a/src/i830_dri.c b/src/i830_dri.c
index 98c1a15..64ee454 100644
--- a/src/i830_dri.c
+++ b/src/i830_dri.c
@@ -349,12 +349,10 @@ I830DRI2CopyRegion(DrawablePtr drawable, RegionPtr pRegion,
/* Emit a flush of the rendering cache, or on the 965 and beyond
* rendering results may not hit the framebuffer until significantly
* later.
- */
- I830EmitFlush(scrn);
- intel->need_mi_flush = FALSE;
-
- /* We can't rely on getting into the block handler before the DRI
+ *
+ * We can't rely on getting into the block handler before the DRI
* client gets to run again so flush now. */
+ intel->need_mi_flush = FALSE;
intel_batch_flush(scrn, TRUE);
#if ALWAYS_SYNC
I830Sync(scrn);
diff --git a/src/i830_driver.c b/src/i830_driver.c
index 5069142..94837eb 100644
--- a/src/i830_driver.c
+++ b/src/i830_driver.c
@@ -985,20 +985,19 @@ I830BlockHandler(int i, pointer blockData, pointer pTimeout, pointer pReadmask)
screen->BlockHandler = I830BlockHandler;
if (scrn->vtSema) {
- Bool flushed = FALSE;
+ Bool flush = FALSE;
+
/* Emit a flush of the rendering cache, or on the 965 and beyond
* rendering results may not hit the framebuffer until significantly
* later.
*/
- if (intel->need_mi_flush || intel->batch_used) {
- flushed = TRUE;
- I830EmitFlush(scrn);
- }
+ if (intel->need_mi_flush || intel->batch_used)
+ flush = TRUE;
/* Flush the batch, so that any rendering is executed in a timely
* fashion.
*/
- intel_batch_flush(scrn, flushed);
+ intel_batch_flush(scrn, flush);
drmCommandNone(intel->drmSubFD, DRM_I915_GEM_THROTTLE);
intel->need_mi_flush = FALSE;
@@ -1138,6 +1137,9 @@ void i830_init_bufmgr(ScrnInfoPtr scrn)
intel->bufmgr = intel_bufmgr_gem_init(intel->drmSubFD, batch_size);
intel_bufmgr_gem_enable_reuse(intel->bufmgr);
+
+ list_init(&intel->batch_pixmaps);
+ list_init(&intel->flush_pixmaps);
}
Bool i830_crtc_on(xf86CrtcPtr crtc)
diff --git a/src/i830_uxa.c b/src/i830_uxa.c
index 0cf87c1..6dac79e 100644
--- a/src/i830_uxa.c
+++ b/src/i830_uxa.c
@@ -79,31 +79,6 @@ const int I830PatternROP[16] = {
static int uxa_pixmap_index;
-/**
- * Returns whether a given pixmap is tiled or not.
- *
- * Currently, we only have one pixmap that might be tiled, which is the front
- * buffer. At the point where we are tiling some pixmaps managed by the
- * general allocator, we should move this to using pixmap privates.
- */
-Bool i830_pixmap_tiled(PixmapPtr pixmap)
-{
- dri_bo *bo;
- uint32_t tiling_mode, swizzle_mode;
- int ret;
-
- bo = i830_get_pixmap_bo(pixmap);
- assert(bo != NULL);
-
- ret = drm_intel_bo_get_tiling(bo, &tiling_mode, &swizzle_mode);
- if (ret != 0) {
- FatalError("Couldn't get tiling on bo %p: %s\n",
- bo, strerror(-ret));
- }
-
- return tiling_mode != I915_TILING_NONE;
-}
-
Bool
i830_get_aperture_space(ScrnInfoPtr scrn, drm_intel_bo ** bo_table,
int num_bos)
@@ -143,6 +118,62 @@ static int i830_pixmap_pitch_is_aligned(PixmapPtr pixmap)
intel->accel_pixmap_pitch_alignment == 0;
}
+static unsigned int
+i830_uxa_pixmap_compute_size(PixmapPtr pixmap,
+ int w, int h,
+ uint32_t *tiling,
+ int *stride)
+{
+ ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+ int pitch_align;
+ int size;
+
+ if (*tiling != I915_TILING_NONE) {
+ /* First check whether tiling is necessary. */
+ pitch_align = intel->accel_pixmap_pitch_alignment;
+ size = ROUND_TO((w * pixmap->drawable.bitsPerPixel + 7) / 8,
+ pitch_align) * ALIGN (h, 2);
+ if (size < 4096)
+ *tiling = I915_TILING_NONE;
+ }
+
+ if (*tiling == I915_TILING_NONE) {
+ pitch_align = intel->accel_pixmap_pitch_alignment;
+ } else {
+ pitch_align = 512;
+ }
+
+ *stride = ROUND_TO((w * pixmap->drawable.bitsPerPixel + 7) / 8,
+ pitch_align);
+
+ if (*tiling == I915_TILING_NONE) {
+ /* Round the height up so that the GPU's access to a 2x2 aligned
+ * subspan doesn't address an invalid page offset beyond the
+ * end of the GTT.
+ */
+ size = *stride * ALIGN(h, 2);
+ } else {
+ int aligned_h = h;
+ if (*tiling == I915_TILING_X)
+ aligned_h = ALIGN(h, 8);
+ else
+ aligned_h = ALIGN(h, 32);
+
+ *stride = i830_get_fence_pitch(intel, *stride, *tiling);
+ /* Round the object up to the size of the fence it will live in
+ * if necessary. We could potentially make the kernel allocate
+ * a larger aperture space and just bind the subset of pages in,
+ * but this is easier and also keeps us out of trouble (as much)
+ * with drm_intel_bufmgr_check_aperture().
+ */
+ size = i830_get_fence_size(intel, *stride * aligned_h);
+ assert(size >= *stride * aligned_h);
+ }
+
+ return size;
+}
+
/**
* Sets up hardware state for a series of solid fills.
*/
@@ -452,25 +483,71 @@ Bool i830_transform_is_affine(PictTransformPtr t)
return t->matrix[2][0] == 0 && t->matrix[2][1] == 0;
}
-dri_bo *i830_get_pixmap_bo(PixmapPtr pixmap)
+struct intel_pixmap *i830_get_pixmap_intel(PixmapPtr pixmap)
{
return dixLookupPrivate(&pixmap->devPrivates, &uxa_pixmap_index);
}
-void i830_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo)
+static void i830_uxa_set_pixmap_intel(PixmapPtr pixmap, struct intel_pixmap *intel)
{
- dri_bo *old_bo = i830_get_pixmap_bo(pixmap);
+ dixSetPrivate(&pixmap->devPrivates, &uxa_pixmap_index, intel);
+}
- if (old_bo)
- dri_bo_unreference(old_bo);
- if (bo != NULL)
- dri_bo_reference(bo);
- dixSetPrivate(&pixmap->devPrivates, &uxa_pixmap_index, bo);
+dri_bo *i830_get_pixmap_bo(PixmapPtr pixmap)
+{
+ struct intel_pixmap *intel;
+
+ intel = i830_get_pixmap_intel(pixmap);
+ if (intel == NULL)
+ return NULL;
+
+ return intel->bo;
}
-static void i830_uxa_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo)
+void i830_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo)
{
- dixSetPrivate(&pixmap->devPrivates, &uxa_pixmap_index, bo);
+ struct intel_pixmap *priv;
+
+ priv = i830_get_pixmap_intel(pixmap);
+
+ if (priv != NULL) {
+ dri_bo_unreference(priv->bo);
+
+ priv->flush_read_domains = priv->flush_write_domain = 0;
+ priv->batch_read_domains = priv->batch_write_domain = 0;
+ list_del(&priv->batch);
+ list_del(&priv->flush);
+ }
+
+ if (bo != NULL) {
+ uint32_t swizzle_mode;
+ int ret;
+
+ if (priv == NULL) {
+ priv = xcalloc(1, sizeof (struct intel_pixmap));
+ if (priv == NULL)
+ goto BAIL;
+ }
+
+ dri_bo_reference(bo);
+ priv->bo = bo;
+
+ ret = drm_intel_bo_get_tiling(bo,
+ &priv->tiling,
+ &swizzle_mode);
+ if (ret != 0) {
+ FatalError("Couldn't get tiling on bo %p: %s\n",
+ bo, strerror(-ret));
+ }
+ } else {
+ if (priv != NULL) {
+ xfree(priv);
+ priv = NULL;
+ }
+ }
+
+ BAIL:
+ i830_uxa_set_pixmap_intel(pixmap, priv);
}
static Bool i830_uxa_prepare_access(PixmapPtr pixmap, uxa_access_t access)
@@ -562,8 +639,6 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
{
ScrnInfoPtr scrn = xf86Screens[screen->myNum];
intel_screen_private *intel = intel_get_screen_private(scrn);
- dri_bo *bo;
- int stride;
PixmapPtr pixmap;
if (w > 32767 || h > 32767)
@@ -575,47 +650,26 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
pixmap = fbCreatePixmap(screen, 0, 0, depth, usage);
if (w && h) {
+ struct intel_pixmap *priv;
unsigned int size;
- uint32_t tiling = I915_TILING_NONE;
- int pitch_align;
-
- if (usage == INTEL_CREATE_PIXMAP_TILING_X) {
- tiling = I915_TILING_X;
- pitch_align = 512;
- } else if (usage == INTEL_CREATE_PIXMAP_TILING_Y) {
- tiling = I915_TILING_Y;
- pitch_align = 512;
- } else {
- pitch_align = intel->accel_pixmap_pitch_alignment;
- }
+ int stride;
- stride = ROUND_TO((w * pixmap->drawable.bitsPerPixel + 7) / 8,
- pitch_align);
-
- if (tiling == I915_TILING_NONE) {
- /* Round the height up so that the GPU's access to a 2x2 aligned
- * subspan doesn't address an invalid page offset beyond the
- * end of the GTT.
- */
- size = stride * ALIGN(h, 2);
- } else {
- int aligned_h = h;
- if (tiling == I915_TILING_X)
- aligned_h = ALIGN(h, 8);
- else
- aligned_h = ALIGN(h, 32);
-
- stride = i830_get_fence_pitch(intel, stride, tiling);
- /* Round the object up to the size of the fence it will live in
- * if necessary. We could potentially make the kernel allocate
- * a larger aperture space and just bind the subset of pages in,
- * but this is easier and also keeps us out of trouble (as much)
- * with drm_intel_bufmgr_check_aperture().
- */
- size = i830_get_fence_size(intel, stride * aligned_h);
- assert(size >= stride * aligned_h);
+ priv = xcalloc(1, sizeof (struct intel_pixmap));
+ if (priv == NULL) {
+ fbDestroyPixmap(pixmap);
+ return NullPixmap;
}
+ if (usage == INTEL_CREATE_PIXMAP_TILING_X)
+ priv->tiling = I915_TILING_X;
+ else if (usage == INTEL_CREATE_PIXMAP_TILING_Y)
+ priv->tiling = I915_TILING_Y;
+ else
+ priv->tiling = I915_TILING_NONE;
+
+ size = i830_uxa_pixmap_compute_size(pixmap, w, h,
+ &priv->tiling, &stride);
+
/* Fail very large allocations on 32-bit systems. Large BOs will
* tend to hit SW fallbacks frequently, and also will tend to fail
* to successfully map when doing SW fallbacks because we overcommit
@@ -626,27 +680,34 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
*/
if (sizeof(unsigned long) == 4 &&
size > (unsigned int)(1024 * 1024 * 1024)) {
+ xfree(priv);
fbDestroyPixmap(pixmap);
return NullPixmap;
}
if (usage == UXA_CREATE_PIXMAP_FOR_MAP)
- bo = drm_intel_bo_alloc(intel->bufmgr, "pixmap", size,
- 0);
+ priv->bo = drm_intel_bo_alloc(intel->bufmgr,
+ "pixmap", size, 0);
else
- bo = drm_intel_bo_alloc_for_render(intel->bufmgr,
- "pixmap", size, 0);
- if (!bo) {
+ priv->bo = drm_intel_bo_alloc_for_render(intel->bufmgr,
+ "pixmap",
+ size, 0);
+ if (!priv->bo) {
+ xfree(priv);
fbDestroyPixmap(pixmap);
return NullPixmap;
}
- if (tiling != I915_TILING_NONE)
- drm_intel_bo_set_tiling(bo, &tiling, stride);
+ if (priv->tiling != I915_TILING_NONE)
+ drm_intel_bo_set_tiling(priv->bo,
+ &priv->tiling,
+ stride);
screen->ModifyPixmapHeader(pixmap, w, h, 0, 0, stride, NULL);
- i830_uxa_set_pixmap_bo(pixmap, bo);
+ list_init(&priv->batch);
+ list_init(&priv->flush);
+ i830_uxa_set_pixmap_intel(pixmap, priv);
}
return pixmap;
@@ -654,16 +715,13 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
static Bool i830_uxa_destroy_pixmap(PixmapPtr pixmap)
{
- if (pixmap->refcnt == 1) {
- dri_bo *bo = i830_get_pixmap_bo(pixmap);
-
- if (bo)
- dri_bo_unreference(bo);
- }
+ if (pixmap->refcnt == 1)
+ i830_set_pixmap_bo(pixmap, NULL);
fbDestroyPixmap(pixmap);
return TRUE;
}
+
void i830_uxa_create_screen_resources(ScreenPtr screen)
{
ScrnInfoPtr scrn = xf86Screens[screen->myNum];
@@ -672,8 +730,7 @@ void i830_uxa_create_screen_resources(ScreenPtr screen)
if (bo != NULL) {
PixmapPtr pixmap = screen->GetScreenPixmap(screen);
- i830_uxa_set_pixmap_bo(pixmap, bo);
- dri_bo_reference(bo);
+ i830_set_pixmap_bo(pixmap, bo);
}
}
diff --git a/src/i965_render.c b/src/i965_render.c
index 236ce49..8746eb9 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -1046,13 +1046,14 @@ static sampler_state_extend_t sampler_state_extend_from_picture(int repeat_type)
* picture in the given surface state buffer.
*/
static void
-i965_set_picture_surface_state(dri_bo * ss_bo, int ss_index,
+i965_set_picture_surface_state(intel_screen_private *intel,
+ dri_bo * ss_bo, int ss_index,
PicturePtr picture, PixmapPtr pixmap,
Bool is_dst)
{
struct brw_surface_state_padded *ss;
struct brw_surface_state local_ss;
- dri_bo *pixmap_bo = i830_get_pixmap_bo(pixmap);
+ struct intel_pixmap *priv = i830_get_pixmap_intel(pixmap);
ss = (struct brw_surface_state_padded *)ss_bo->virtual + ss_index;
@@ -1082,7 +1083,7 @@ i965_set_picture_surface_state(dri_bo * ss_bo, int ss_index,
local_ss.ss0.vert_line_stride_ofs = 0;
local_ss.ss0.mipmap_layout_mode = 0;
local_ss.ss0.render_cache_read_mode = 0;
- local_ss.ss1.base_addr = pixmap_bo->offset;
+ local_ss.ss1.base_addr = priv->bo->offset;
local_ss.ss2.mip_count = 0;
local_ss.ss2.render_target_rotation = 0;
@@ -1094,7 +1095,7 @@ i965_set_picture_surface_state(dri_bo * ss_bo, int ss_index,
memcpy(ss, &local_ss, sizeof(local_ss));
- if (pixmap_bo != NULL) {
+ if (priv->bo != NULL) {
uint32_t write_domain, read_domains;
if (is_dst) {
@@ -1104,11 +1105,13 @@ i965_set_picture_surface_state(dri_bo * ss_bo, int ss_index,
write_domain = 0;
read_domains = I915_GEM_DOMAIN_SAMPLER;
}
+
+ intel_batch_mark_pixmap_domains(intel, priv, read_domains, write_domain);
dri_bo_emit_reloc(ss_bo, read_domains, write_domain,
0,
ss_index * sizeof(*ss) +
offsetof(struct brw_surface_state, ss1),
- pixmap_bo);
+ priv->bo);
}
}
@@ -1508,14 +1511,14 @@ i965_prepare_composite(int op, PicturePtr source_picture,
return FALSE;
}
/* Set up the state buffer for the destination surface */
- i965_set_picture_surface_state(surface_state_bo, 0,
+ i965_set_picture_surface_state(intel, surface_state_bo, 0,
dest_picture, dest, TRUE);
/* Set up the source surface state buffer */
- i965_set_picture_surface_state(surface_state_bo, 1,
+ i965_set_picture_surface_state(intel, surface_state_bo, 1,
source_picture, source, FALSE);
if (mask) {
/* Set up the mask surface state buffer */
- i965_set_picture_surface_state(surface_state_bo, 2,
+ i965_set_picture_surface_state(intel, surface_state_bo, 2,
mask_picture, mask, FALSE);
}
dri_bo_unmap(surface_state_bo);
commit 2c3aee2b570dadd9270a08d8ff675d07ac405e33
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Nov 29 20:53:35 2009 +0000
uxa-glyphs: Stream uploads via temporary bo
Avoid mapping the glyph cache back to the cpu by allocating temporary
buffer objects to store the glyph pixmap and blit to the cache.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/uxa/uxa-glyphs.c b/uxa/uxa-glyphs.c
index 5901552..ff16781 100644
--- a/uxa/uxa-glyphs.c
+++ b/uxa/uxa-glyphs.c
@@ -363,6 +363,7 @@ uxa_glyph_cache_upload_glyph(ScreenPtr pScreen,
PicturePtr pGlyphPicture = GlyphPicture(pGlyph)[pScreen->myNum];
PixmapPtr pGlyphPixmap = (PixmapPtr) pGlyphPicture->pDrawable;
PixmapPtr pCachePixmap = (PixmapPtr) cache->picture->pDrawable;
+ PixmapPtr scratch;
GCPtr pGC;
/* UploadToScreen only works if bpp match */
@@ -372,12 +373,35 @@ uxa_glyph_cache_upload_glyph(ScreenPtr pScreen,
pGC = GetScratchGC(pCachePixmap->drawable.depth, pScreen);
ValidateGC(&pCachePixmap->drawable, pGC);
- (void)uxa_copy_area(&pGlyphPixmap->drawable,
+
+ /* Create a temporary bo to stream the updates to the cache */
+ scratch = (*pScreen->CreatePixmap)(pScreen,
+ pGlyph->info.width,
+ pGlyph->info.height,
+ pGlyphPixmap->drawable.depth,
+ UXA_CREATE_PIXMAP_FOR_MAP);
+ if (scratch) {
+ (void)uxa_copy_area(&pGlyphPixmap->drawable,
+ &scratch->drawable,
+ pGC,
+ 0, 0,
+ pGlyph->info.width, pGlyph->info.height,
+ 0, 0);
+ } else {
+ scratch = pGlyphPixmap;
+ }
+
+ (void)uxa_copy_area(&scratch->drawable,
&pCachePixmap->drawable,
pGC,
0, 0, pGlyph->info.width, pGlyph->info.height,
CACHE_X(pos), CACHE_Y(pos));
+
+ if (scratch != pGlyphPixmap)
+ (*pScreen->DestroyPixmap)(scratch);
+
FreeScratchGC(pGC);
+
return TRUE;
}
More information about the xorg-commit
mailing list