xf86-video-intel: 10 commits - src/sna/blt.c src/sna/gen6_render.c src/sna/kgem.c src/sna/sna_accel.c src/sna/sna_blt.c src/sna/sna.h src/sna/sna_io.c src/sna/sna_reg.h src/sna/sna_render.h src/sna/sna_trapezoids.c test/.gitignore test/Makefile.am test/render-copy-alphaless.c
Chris Wilson
ickle at kemper.freedesktop.org
Fri Jan 6 10:12:58 PST 2012
src/sna/blt.c | 105 ++++++++++++
src/sna/gen6_render.c | 50 ++++-
src/sna/kgem.c | 3
src/sna/sna.h | 13 +
src/sna/sna_accel.c | 72 +++++---
src/sna/sna_blt.c | 375 ++++++++++++++++++++++++++++++++++++++-----
src/sna/sna_io.c | 241 +++++++++++++++++++++++++++
src/sna/sna_reg.h | 1
src/sna/sna_render.h | 1
src/sna/sna_trapezoids.c | 3
test/.gitignore | 1
test/Makefile.am | 1
test/render-copy-alphaless.c | 285 ++++++++++++++++++++++++++++++++
13 files changed, 1075 insertions(+), 76 deletions(-)
New commits:
commit e2ad0f6272c776939499c6202fb60bdcc2e955fa
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Jan 6 17:11:00 2012 +0000
sna/blt: Amalgamate many PolyFillRect of single boxes
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 07771a9..038105a 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -1763,7 +1763,7 @@ bool sna_blt_copy(struct sna *sna, uint8_t alu,
return TRUE;
}
-static Bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
+static bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
struct kgem_bo *bo, int bpp,
uint32_t color,
const BoxRec *box)
@@ -1831,6 +1831,13 @@ static Bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
return TRUE;
}
+ /* If we are currently emitting SCANLINES, keep doing so */
+ if (sna->blt_state.fill_bo == bo->handle &&
+ sna->blt_state.fill_pixel == color &&
+ (sna->blt_state.fill_alu == alu ||
+ sna->blt_state.fill_alu == ~alu))
+ return FALSE;
+
kgem_set_mode(kgem, KGEM_BLT);
if (!kgem_check_batch(kgem, 6) ||
!kgem_check_reloc(kgem, 1) ||
@@ -1851,7 +1858,9 @@ static Bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
b[5] = color;
kgem->nbatch += 6;
- sna->blt_state.fill_bo = 0;
+ sna->blt_state.fill_bo = bo->handle;
+ sna->blt_state.fill_pixel = color;
+ sna->blt_state.fill_alu = ~alu;
return TRUE;
}
@@ -1875,8 +1884,8 @@ Bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
return FALSE;
}
- if (nbox == 1)
- return sna_blt_fill_box(sna, alu, bo, bpp, pixel, box);
+ if (nbox == 1 && sna_blt_fill_box(sna, alu, bo, bpp, pixel, box))
+ return TRUE;
br13 = bo->pitch;
cmd = XY_SCANLINE_BLT;
@@ -1969,7 +1978,7 @@ Bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
b = kgem->batch + kgem->nbatch;
kgem->nbatch += 3;
b[0] = cmd;
- *(uint64_t *)(b+1) = *(uint64_t *)box;
+ *(uint64_t *)(b+1) = *(const uint64_t *)box;
box++;
} while (--nbox_this_time);
commit c085de905c30cb5bae6b339841badf1e105c6ee6
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Jan 6 15:47:47 2012 +0000
sna: Also mark a bo created by force-to-gpu as being all-damaged
Similar to the action taken into move-to-gpu so that we forgo the
overhead of damage tracking when the initial act of creation is on the
render paths.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 7ae76eb..e3816b3 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1507,6 +1507,20 @@ sna_pixmap_force_to_gpu(PixmapPtr pixmap, unsigned flags)
return NULL;
DBG(("%s: created gpu bo\n", __FUNCTION__));
+
+ if (flags & MOVE_WRITE && priv->cpu_damage == NULL) {
+ /* Presume that we will only ever write to the GPU
+ * bo. Readbacks are expensive but fairly constant
+ * in cost for all sizes i.e. it is the act of
+ * synchronisation that takes the most time. This is
+ * mitigated by avoiding fallbacks in the first place.
+ */
+ sna_damage_all(&priv->gpu_damage,
+ pixmap->drawable.width,
+ pixmap->drawable.height);
+ DBG(("%s: marking as all-damaged for GPU\n",
+ __FUNCTION__));
+ }
}
if (!sna_pixmap_move_to_gpu(pixmap, flags))
@@ -1557,6 +1571,8 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags)
sna_damage_all(&priv->gpu_damage,
pixmap->drawable.width,
pixmap->drawable.height);
+ DBG(("%s: marking as all-damaged for GPU\n",
+ __FUNCTION__));
}
}
commit 9f1935bb4e894264053d94e53c99d5ad607700fb
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Jan 6 15:26:11 2012 +0000
sna: Support performing alpha-fixup on the source
By inlining the swizzling of the alpha-channel we can support BLT copies
from an alpha-less pixmap to an alpha-destination.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/blt.c b/src/sna/blt.c
index 7a77fa4..d28ad98 100644
--- a/src/sna/blt.c
+++ b/src/sna/blt.c
@@ -106,3 +106,108 @@ memcpy_blt(const void *src, void *dst, int bpp,
break;
}
}
+
+void
+memcpy_xor(const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height,
+ uint32_t and, uint32_t or)
+{
+ uint8_t *src_bytes;
+ uint8_t *dst_bytes;
+ int i;
+
+ assert(width && height);
+ assert(bpp >= 8);
+
+ DBG(("%s: src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d, bpp=%d, and=%x, xor=%x\n",
+ __FUNCTION__,
+ src_x, src_y, dst_x, dst_y,
+ width, height,
+ src_stride, dst_stride,
+ bpp, and, or));
+
+ bpp /= 8;
+ src_bytes = (uint8_t *)src + src_stride * src_y + src_x * bpp;
+ dst_bytes = (uint8_t *)dst + dst_stride * dst_y + dst_x * bpp;
+
+ if (and == 0xffffffff) {
+ switch (bpp) {
+ case 1:
+ do {
+ for (i = 0; i < width; i++)
+ dst_bytes[i] = src_bytes[i] | or;
+
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ } while (--height);
+ break;
+
+ case 2:
+ do {
+ uint16_t *d = (uint16_t *)dst_bytes;
+ uint16_t *s = (uint16_t *)src_bytes;
+
+ for (i = 0; i < width; i++)
+ d[i] = s[i] | or;
+
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ } while (--height);
+ break;
+
+ case 4:
+ do {
+ uint32_t *d = (uint32_t *)dst_bytes;
+ uint32_t *s = (uint32_t *)src_bytes;
+
+ for (i = 0; i < width; i++)
+ d[i] = s[i] | or;
+
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ } while (--height);
+ break;
+ }
+ } else {
+ switch (bpp) {
+ case 1:
+ do {
+ for (i = 0; i < width; i++)
+ dst_bytes[i] = (src_bytes[i] & and) | or;
+
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ } while (--height);
+ break;
+
+ case 2:
+ do {
+ uint16_t *d = (uint16_t *)dst_bytes;
+ uint16_t *s = (uint16_t *)src_bytes;
+
+ for (i = 0; i < width; i++)
+ d[i] = (s[i] & and) | or;
+
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ } while (--height);
+ break;
+
+ case 4:
+ do {
+ uint32_t *d = (uint32_t *)dst_bytes;
+ uint32_t *s = (uint32_t *)src_bytes;
+
+ for (i = 0; i < width; i++)
+ d[i] = (s[i] & and) | or;
+
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ } while (--height);
+ break;
+ }
+ }
+}
diff --git a/src/sna/sna.h b/src/sna/sna.h
index f16324e..de4de5c 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -685,6 +685,11 @@ void sna_write_boxes(struct sna *sna, PixmapPtr dst,
struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
const void *src, int stride, int16_t src_dx, int16_t src_dy,
const BoxRec *box, int n);
+void sna_write_boxes__xor(struct sna *sna, PixmapPtr dst,
+ struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+ const void *src, int stride, int16_t src_dx, int16_t src_dy,
+ const BoxRec *box, int nbox,
+ uint32_t and, uint32_t or);
struct kgem_bo *sna_replace(struct sna *sna,
PixmapPtr pixmap,
@@ -713,6 +718,14 @@ memcpy_blt(const void *src, void *dst, int bpp,
int16_t dst_x, int16_t dst_y,
uint16_t width, uint16_t height);
+void
+memcpy_xor(const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height,
+ uint32_t and, uint32_t or);
+
#define SNA_CREATE_FB 0x10
#define SNA_CREATE_SCRATCH 0x11
#define SNA_CREATE_GLYPH 0x12
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 5879e97..07771a9 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -307,6 +307,104 @@ static Bool sna_blt_copy_init(struct sna *sna,
return TRUE;
}
+static Bool sna_blt_alpha_fixup_init(struct sna *sna,
+ struct sna_blt_state *blt,
+ struct kgem_bo *src,
+ struct kgem_bo *dst,
+ int bpp, uint32_t alpha)
+{
+ struct kgem *kgem = &sna->kgem;
+
+ blt->bo[0] = src;
+ blt->bo[1] = dst;
+
+ blt->cmd = XY_FULL_MONO_PATTERN_BLT;
+ blt->pitch[0] = src->pitch;
+ if (kgem->gen >= 40 && src->tiling) {
+ blt->cmd |= BLT_SRC_TILED;
+ blt->pitch[0] >>= 2;
+ }
+ assert(blt->pitch[0] < MAXSHORT);
+
+ blt->pitch[1] = dst->pitch;
+ if (kgem->gen >= 40 && dst->tiling) {
+ blt->cmd |= BLT_DST_TILED;
+ blt->pitch[1] >>= 2;
+ }
+ assert(blt->pitch[1] < MAXSHORT);
+
+ blt->overwrites = 1;
+ blt->br13 = (0xfc << 16) | blt->pitch[1];
+ switch (bpp) {
+ default: assert(0);
+ case 32: blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+ blt->br13 |= 1 << 25; /* RGB8888 */
+ case 16: blt->br13 |= 1 << 24; /* RGB565 */
+ case 8: break;
+ }
+ blt->pixel = alpha;
+
+ kgem_set_mode(kgem, KGEM_BLT);
+ if (!kgem_check_bo_fenced(kgem, src, dst, NULL)) {
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ }
+
+ sna->blt_state.fill_bo = 0;
+ return TRUE;
+}
+
+static void sna_blt_alpha_fixup_one(struct sna *sna,
+ const struct sna_blt_state *blt,
+ int src_x, int src_y,
+ int width, int height,
+ int dst_x, int dst_y)
+{
+ struct kgem *kgem = &sna->kgem;
+ uint32_t *b;
+
+ DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d)\n",
+ __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height));
+
+ assert(src_x >= 0);
+ assert(src_y >= 0);
+ assert((src_y + height) * blt->bo[0]->pitch <= blt->bo[0]->size);
+ assert(dst_x >= 0);
+ assert(dst_y >= 0);
+ assert((dst_y + height) * blt->bo[1]->pitch <= blt->bo[1]->size);
+ assert(width > 0);
+ assert(height > 0);
+
+ if (!kgem_check_batch(kgem, 12) || !kgem_check_reloc(kgem, 2)) {
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ }
+
+ b = kgem->batch + kgem->nbatch;
+ b[0] = blt->cmd;
+ b[1] = blt->br13;
+ b[2] = (dst_y << 16) | dst_x;
+ b[3] = ((dst_y + height) << 16) | (dst_x + width);
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4,
+ blt->bo[1],
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = blt->pitch[0];
+ b[6] = (src_y << 16) | src_x;
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7,
+ blt->bo[0],
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[8] = blt->pixel;
+ b[9] = blt->pixel;
+ b[10] = 0;
+ b[11] = 0;
+ kgem->nbatch += 12;
+}
+
static void sna_blt_copy_one(struct sna *sna,
const struct sna_blt_state *blt,
int src_x, int src_y,
@@ -930,9 +1028,90 @@ static void blt_composite_copy_boxes(struct sna *sna,
} while(--nbox);
}
+fastcall static void
+blt_composite_copy_with_alpha(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ int x1, x2, y1, y2;
+ int src_x, src_y;
+
+ DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
+ __FUNCTION__,
+ r->src.x, r->src.y,
+ r->dst.x, r->dst.y,
+ r->width, r->height));
+
+ /* XXX higher layer should have clipped? */
+
+ x1 = r->dst.x + op->dst.x;
+ y1 = r->dst.y + op->dst.y;
+ x2 = x1 + r->width;
+ y2 = y1 + r->height;
+
+ src_x = r->src.x - x1;
+ src_y = r->src.y - y1;
+
+ /* clip against dst */
+ if (x1 < 0)
+ x1 = 0;
+ if (y1 < 0)
+ y1 = 0;
+
+ if (x2 > op->dst.width)
+ x2 = op->dst.width;
+
+ if (y2 > op->dst.height)
+ y2 = op->dst.height;
+
+ DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2));
+
+ if (x2 <= x1 || y2 <= y1)
+ return;
+
+ sna_blt_alpha_fixup_one(sna, &op->u.blt,
+ x1 + src_x, y1 + src_y,
+ x2 - x1, y2 - y1,
+ x1, y1);
+}
+
+fastcall static void
+blt_composite_copy_box_with_alpha(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box)
+{
+ DBG(("%s: box (%d, %d), (%d, %d)\n",
+ __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
+ sna_blt_alpha_fixup_one(sna, &op->u.blt,
+ box->x1 + op->u.blt.sx,
+ box->y1 + op->u.blt.sy,
+ box->x2 - box->x1,
+ box->y2 - box->y1,
+ box->x1 + op->dst.x,
+ box->y1 + op->dst.y);
+}
+
+static void
+blt_composite_copy_boxes_with_alpha(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
+ do {
+ DBG(("%s: box (%d, %d), (%d, %d)\n",
+ __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
+ sna_blt_alpha_fixup_one(sna, &op->u.blt,
+ box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
+ box->x2 - box->x1, box->y2 - box->y1,
+ box->x1 + op->dst.x, box->y1 + op->dst.y);
+ box++;
+ } while(--nbox);
+}
+
static Bool
prepare_blt_copy(struct sna *sna,
- struct sna_composite_op *op)
+ struct sna_composite_op *op,
+ uint32_t alpha_fixup)
{
PixmapPtr src = op->u.blt.src_pixmap;
struct sna_pixmap *priv = sna_pixmap(src);
@@ -947,19 +1126,32 @@ prepare_blt_copy(struct sna *sna,
DBG(("%s\n", __FUNCTION__));
- op->blt = blt_composite_copy;
- op->box = blt_composite_copy_box;
- op->boxes = blt_composite_copy_boxes;
if (sna->kgem.gen >= 60)
op->done = gen6_blt_copy_done;
else
op->done = blt_done;
- return sna_blt_copy_init(sna, &op->u.blt,
- priv->gpu_bo,
- op->dst.bo,
- src->drawable.bitsPerPixel,
- GXcopy);
+ if (alpha_fixup) {
+ op->blt = blt_composite_copy_with_alpha;
+ op->box = blt_composite_copy_box_with_alpha;
+ op->boxes = blt_composite_copy_boxes_with_alpha;
+
+ return sna_blt_alpha_fixup_init(sna, &op->u.blt,
+ priv->gpu_bo,
+ op->dst.bo,
+ src->drawable.bitsPerPixel,
+ alpha_fixup);
+ } else {
+ op->blt = blt_composite_copy;
+ op->box = blt_composite_copy_box;
+ op->boxes = blt_composite_copy_boxes;
+
+ return sna_blt_copy_init(sna, &op->u.blt,
+ priv->gpu_bo,
+ op->dst.bo,
+ src->drawable.bitsPerPixel,
+ GXcopy);
+ }
}
static void blt_vmap_done(struct sna *sna, const struct sna_composite_op *op)
@@ -1082,9 +1274,80 @@ static void blt_put_composite_boxes(struct sna *sna,
}
}
+fastcall static void
+blt_put_composite_with_alpha(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ PixmapPtr dst = op->dst.pixmap;
+ PixmapPtr src = op->u.blt.src_pixmap;
+ struct sna_pixmap *dst_priv = sna_pixmap(dst);
+ int pitch = src->devKind;
+ char *data = src->devPrivate.ptr;
+
+ int16_t dst_x = r->dst.x + op->dst.x;
+ int16_t dst_y = r->dst.y + op->dst.y;
+ int16_t src_x = r->src.x + op->u.blt.sx;
+ int16_t src_y = r->src.y + op->u.blt.sy;
+ BoxRec box;
+
+ box.x1 = dst_x;
+ box.y1 = dst_y;
+ box.x2 = dst_x + r->width;
+ box.y2 = dst_y + r->height;
+
+ sna_write_boxes__xor(sna, dst,
+ dst_priv->gpu_bo, 0, 0,
+ data, pitch, src_x, src_y,
+ &box, 1,
+ 0xffffffff, op->u.blt.pixel);
+}
+
+fastcall static void
+blt_put_composite_box_with_alpha(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box)
+{
+ PixmapPtr src = op->u.blt.src_pixmap;
+
+ DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__,
+ op->u.blt.sx, op->u.blt.sy,
+ op->dst.x, op->dst.y));
+
+ sna_write_boxes__xor(sna, op->dst.pixmap,
+ op->dst.bo, op->dst.x, op->dst.y,
+ src->devPrivate.ptr,
+ src->devKind,
+ op->u.blt.sx, op->u.blt.sy,
+ box, 1,
+ 0xffffffff, op->u.blt.pixel);
+}
+
+static void
+blt_put_composite_boxes_with_alpha(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int n)
+{
+ PixmapPtr src = op->u.blt.src_pixmap;
+
+ DBG(("%s: src=(%d, %d), dst=(%d, %d), [(%d, %d), (%d, %d) x %d]\n", __FUNCTION__,
+ op->u.blt.sx, op->u.blt.sy,
+ op->dst.x, op->dst.y,
+ box->x1, box->y1, box->x2, box->y2, n));
+
+ sna_write_boxes__xor(sna, op->dst.pixmap,
+ op->dst.bo, op->dst.x, op->dst.y,
+ src->devPrivate.ptr,
+ src->devKind,
+ op->u.blt.sx, op->u.blt.sy,
+ box, n,
+ 0xffffffff, op->u.blt.pixel);
+}
+
static Bool
prepare_blt_put(struct sna *sna,
- struct sna_composite_op *op)
+ struct sna_composite_op *op,
+ uint32_t alpha_fixup)
{
PixmapPtr src = op->u.blt.src_pixmap;
struct sna_pixmap *priv = sna_pixmap(src);
@@ -1105,26 +1368,43 @@ prepare_blt_put(struct sna *sna,
free_bo = src_bo;
}
if (src_bo) {
- op->blt = blt_composite_copy;
- op->box = blt_composite_copy_box;
- op->boxes = blt_composite_copy_boxes;
-
op->u.blt.src_pixmap = (void *)free_bo;
op->done = blt_vmap_done;
src_bo->pitch = src->devKind;
- if (!sna_blt_copy_init(sna, &op->u.blt,
- src_bo, op->dst.bo,
- op->dst.pixmap->drawable.bitsPerPixel,
- GXcopy))
- return FALSE;
+ if (alpha_fixup) {
+ op->blt = blt_composite_copy_with_alpha;
+ op->box = blt_composite_copy_box_with_alpha;
+ op->boxes = blt_composite_copy_boxes_with_alpha;
+
+ return sna_blt_alpha_fixup_init(sna, &op->u.blt,
+ src_bo, op->dst.bo,
+ op->dst.pixmap->drawable.bitsPerPixel,
+ alpha_fixup);
+ } else {
+ op->blt = blt_composite_copy;
+ op->box = blt_composite_copy_box;
+ op->boxes = blt_composite_copy_boxes;
+
+ return sna_blt_copy_init(sna, &op->u.blt,
+ src_bo, op->dst.bo,
+ op->dst.pixmap->drawable.bitsPerPixel,
+ GXcopy);
+ }
} else {
if (!sna_pixmap_move_to_cpu(src, MOVE_READ))
return FALSE;
- op->blt = blt_put_composite;
- op->box = blt_put_composite_box;
- op->boxes = blt_put_composite_boxes;
+ if (alpha_fixup) {
+ op->u.blt.pixel = alpha_fixup;
+ op->blt = blt_put_composite_with_alpha;
+ op->box = blt_put_composite_box_with_alpha;
+ op->boxes = blt_put_composite_boxes_with_alpha;
+ } else {
+ op->blt = blt_put_composite;
+ op->box = blt_put_composite_box;
+ op->boxes = blt_put_composite_boxes;
+ }
op->done = nop_done;
}
@@ -1209,6 +1489,13 @@ reduce_damage(struct sna_composite_op *op,
op->damage = NULL;
}
+#define alphaless(format) PICT_FORMAT(PICT_FORMAT_BPP(format), \
+ PICT_FORMAT_TYPE(format), \
+ 0, \
+ PICT_FORMAT_R(format), \
+ PICT_FORMAT_G(format), \
+ PICT_FORMAT_B(format))
+
Bool
sna_blt_composite(struct sna *sna,
uint32_t op,
@@ -1223,6 +1510,7 @@ sna_blt_composite(struct sna *sna,
PictFormat src_format = src->format;
struct sna_pixmap *priv;
int16_t tx, ty;
+ uint32_t alpha_fixup;
Bool ret;
#if DEBUG_NO_BLT || NO_BLT_COMPOSITE
@@ -1309,13 +1597,13 @@ sna_blt_composite(struct sna *sna,
return FALSE;
}
+ alpha_fixup = 0;
if (!(dst->format == src_format ||
- dst->format == PICT_FORMAT(PICT_FORMAT_BPP(src_format),
- PICT_FORMAT_TYPE(src_format),
- 0,
- PICT_FORMAT_R(src_format),
- PICT_FORMAT_G(src_format),
- PICT_FORMAT_B(src_format)))) {
+ dst->format == alphaless(src_format) ||
+ (alphaless(dst->format) == alphaless(src_format) &&
+ sna_get_pixel_from_rgba(&alpha_fixup,
+ 0, 0, 0, 0xffff,
+ dst->format)))) {
DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n",
__FUNCTION__, (unsigned)src_format, dst->format));
return FALSE;
@@ -1349,18 +1637,18 @@ sna_blt_composite(struct sna *sna,
tmp->u.blt.sx = x - dst_x;
tmp->u.blt.sy = y - dst_y;
- DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d)\n",
+ DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n",
__FUNCTION__,
- tmp->dst.x, tmp->dst.y, tmp->u.blt.sx, tmp->u.blt.sy));
+ tmp->dst.x, tmp->dst.y, tmp->u.blt.sx, tmp->u.blt.sy, alpha_fixup));
if (has_gpu_area(blt->src_pixmap, x, y, width, height))
- ret = prepare_blt_copy(sna, tmp);
+ ret = prepare_blt_copy(sna, tmp, alpha_fixup);
else if (has_cpu_area(blt->src_pixmap, x, y, width, height))
- ret = prepare_blt_put(sna, tmp);
+ ret = prepare_blt_put(sna, tmp, alpha_fixup);
else if (sna_pixmap_move_to_gpu(blt->src_pixmap, MOVE_READ))
- ret = prepare_blt_copy(sna, tmp);
+ ret = prepare_blt_copy(sna, tmp, alpha_fixup);
else
- ret = prepare_blt_put(sna, tmp);
+ ret = prepare_blt_put(sna, tmp, alpha_fixup);
return ret;
}
diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index c5e66f1..aef3f50 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -504,6 +504,247 @@ fallback:
sna->blt_state.fill_bo = 0;
}
+static void
+write_boxes_inplace__xor(struct kgem *kgem,
+ const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy,
+ struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy,
+ const BoxRec *box, int n,
+ uint32_t and, uint32_t or)
+{
+ int dst_pitch = bo->pitch;
+ int src_pitch = stride;
+ void *dst;
+
+ DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling));
+
+ kgem_bo_submit(kgem, bo);
+
+ dst = kgem_bo_map(kgem, bo, PROT_READ | PROT_WRITE);
+ if (dst == NULL)
+ return;
+
+ do {
+ DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d) [bpp=%d, src_pitch=%d, dst_pitch=%d]\n", __FUNCTION__,
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x1 + dst_dx, box->y1 + dst_dy,
+ box->x2 - box->x1, box->y2 - box->y1,
+ bpp, src_pitch, dst_pitch));
+
+ memcpy_xor(src, dst, bpp,
+ src_pitch, dst_pitch,
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x1 + dst_dx, box->y1 + dst_dy,
+ box->x2 - box->x1, box->y2 - box->y1,
+ and, or);
+ box++;
+ } while (--n);
+}
+
+void sna_write_boxes__xor(struct sna *sna, PixmapPtr dst,
+ struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+ const void *src, int stride, int16_t src_dx, int16_t src_dy,
+ const BoxRec *box, int nbox,
+ uint32_t and, uint32_t or)
+{
+ struct kgem *kgem = &sna->kgem;
+ struct kgem_bo *src_bo;
+ void *ptr;
+ int offset;
+ int n, cmd, br13;
+
+ DBG(("%s x %d\n", __FUNCTION__, nbox));
+
+ if (DEBUG_NO_IO || kgem->wedged ||
+ !kgem_bo_map_will_stall(kgem, dst_bo)) {
+fallback:
+ write_boxes_inplace__xor(kgem,
+ src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ box, nbox,
+ and, or);
+ return;
+ }
+
+ /* Try to avoid switching rings... */
+ if (dst_bo->tiling == I915_TILING_Y || kgem->ring == KGEM_RENDER) {
+ PixmapRec tmp;
+ BoxRec extents;
+
+ /* XXX Composite? Not that we should ever reach here! */
+
+ extents = box[0];
+ for (n = 1; n < nbox; n++) {
+ if (box[n].x1 < extents.x1)
+ extents.x1 = box[n].x1;
+ if (box[n].x2 > extents.x2)
+ extents.x2 = box[n].x2;
+
+ if (box[n].y1 < extents.y1)
+ extents.y1 = box[n].y1;
+ if (box[n].y2 > extents.y2)
+ extents.y2 = box[n].y2;
+ }
+
+ tmp.drawable.width = extents.x2 - extents.x1;
+ tmp.drawable.height = extents.y2 - extents.y1;
+ tmp.drawable.depth = dst->drawable.depth;
+ tmp.drawable.bitsPerPixel = dst->drawable.bitsPerPixel;
+ tmp.devPrivate.ptr = NULL;
+
+ assert(tmp.drawable.width);
+ assert(tmp.drawable.height);
+
+ tmp.devKind = tmp.drawable.width * tmp.drawable.bitsPerPixel / 8;
+ tmp.devKind = ALIGN(tmp.devKind, 4);
+
+ src_bo = kgem_create_buffer(kgem,
+ tmp.drawable.height * tmp.devKind,
+ KGEM_BUFFER_WRITE,
+ &ptr);
+ if (!src_bo)
+ goto fallback;
+
+ src_bo->pitch = tmp.devKind;
+
+ for (n = 0; n < nbox; n++) {
+ memcpy_xor(src, ptr, tmp.drawable.bitsPerPixel,
+ stride, tmp.devKind,
+ box[n].x1 + src_dx,
+ box[n].y1 + src_dy,
+ box[n].x1 - extents.x1,
+ box[n].y1 - extents.y1,
+ box[n].x2 - box[n].x1,
+ box[n].y2 - box[n].y1,
+ and, or);
+ }
+
+ n = sna->render.copy_boxes(sna, GXcopy,
+ &tmp, src_bo, -extents.x1, -extents.y1,
+ dst, dst_bo, dst_dx, dst_dy,
+ box, nbox);
+
+ kgem_bo_destroy(&sna->kgem, src_bo);
+
+ if (!n)
+ goto fallback;
+
+ return;
+ }
+
+ cmd = XY_SRC_COPY_BLT_CMD;
+ br13 = dst_bo->pitch;
+ if (kgem->gen >= 40 && dst_bo->tiling) {
+ cmd |= BLT_DST_TILED;
+ br13 >>= 2;
+ }
+ br13 |= 0xcc << 16;
+ switch (dst->drawable.bitsPerPixel) {
+ default:
+ case 32: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+ br13 |= 1 << 25; /* RGB8888 */
+ case 16: br13 |= 1 << 24; /* RGB565 */
+ case 8: break;
+ }
+
+ kgem_set_mode(kgem, KGEM_BLT);
+ if (kgem->nexec + 2 > KGEM_EXEC_SIZE(kgem) ||
+ kgem->nreloc + 2 > KGEM_RELOC_SIZE(kgem) ||
+ !kgem_check_batch(kgem, 8) ||
+ !kgem_check_bo_fenced(kgem, dst_bo, NULL)) {
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ }
+
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
+ if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ /* Count the total number of bytes to be read and allocate a
+ * single buffer large enough. Or if it is very small, combine
+ * with other allocations. */
+ offset = 0;
+ for (n = 0; n < nbox_this_time; n++) {
+ int height = box[n].y2 - box[n].y1;
+ int width = box[n].x2 - box[n].x1;
+ offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
+ }
+
+ src_bo = kgem_create_buffer(kgem, offset,
+ KGEM_BUFFER_WRITE | (nbox ? KGEM_BUFFER_LAST : 0),
+ &ptr);
+ if (!src_bo)
+ break;
+
+ offset = 0;
+ do {
+ int height = box->y2 - box->y1;
+ int width = box->x2 - box->x1;
+ int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
+ uint32_t *b;
+
+ DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
+ __FUNCTION__,
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x1 + dst_dx, box->y1 + dst_dy,
+ width, height,
+ offset, pitch));
+
+ assert(box->x1 + src_dx >= 0);
+ assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
+ assert(box->y1 + src_dy >= 0);
+
+ assert(box->x1 + dst_dx >= 0);
+ assert(box->y1 + dst_dy >= 0);
+
+ memcpy_xor(src, (char *)ptr + offset,
+ dst->drawable.bitsPerPixel,
+ stride, pitch,
+ box->x1 + src_dx, box->y1 + src_dy,
+ 0, 0,
+ width, height,
+ and, or);
+
+ b = kgem->batch + kgem->nbatch;
+ b[0] = cmd;
+ b[1] = br13;
+ b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
+ b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = 0;
+ b[6] = pitch;
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ offset);
+ kgem->nbatch += 8;
+
+ box++;
+ offset += pitch * height;
+ } while (--nbox_this_time);
+ assert(offset == src_bo->size);
+
+ if (nbox) {
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ }
+
+ kgem_bo_destroy(kgem, src_bo);
+ } while (nbox);
+
+ sna->blt_state.fill_bo = 0;
+}
+
struct kgem_bo *sna_replace(struct sna *sna,
PixmapPtr pixmap,
struct kgem_bo *bo,
diff --git a/src/sna/sna_reg.h b/src/sna/sna_reg.h
index ff2ff3b..551d64b 100644
--- a/src/sna/sna_reg.h
+++ b/src/sna/sna_reg.h
@@ -55,6 +55,7 @@
#define XY_MONO_PAT ((0x2<<29)|(0x52<<22)|0x7)
#define XY_MONO_SRC_COPY ((0x2<<29)|(0x54<<22)|(0x6))
#define XY_MONO_SRC_COPY_IMM ((0x2<<29)|(0x71<<22))
+#define XY_FULL_MONO_PATTERN_BLT ((0x2<<29)|(0x57<<22)|0xa)
#define XY_FULL_MONO_PATTERN_MONO_SRC_BLT ((0x2<<29)|(0x58<<22)|0xa)
/* FLUSH commands */
diff --git a/test/.gitignore b/test/.gitignore
index 4bfc70d..e24e3fd 100644
--- a/test/.gitignore
+++ b/test/.gitignore
@@ -11,4 +11,5 @@ render-fill-copy
render-composite-solid
render-copyarea
render-copyarea-size
+render-copy-alphaless
mixed-stress
diff --git a/test/Makefile.am b/test/Makefile.am
index dc35f9f..a14396e 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -12,6 +12,7 @@ stress_TESTS = \
render-composite-solid \
render-copyarea \
render-copyarea-size \
+ render-copy-alphaless \
mixed-stress \
$(NULL)
diff --git a/test/render-copy-alphaless.c b/test/render-copy-alphaless.c
new file mode 100644
index 0000000..b968704
--- /dev/null
+++ b/test/render-copy-alphaless.c
@@ -0,0 +1,285 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <X11/Xutil.h> /* for XDestroyImage */
+#include <pixman.h> /* for pixman blt functions */
+
+#include "test.h"
+
+static void
+show_cells(char *buf,
+ const uint32_t *real, const uint32_t *ref,
+ int x, int y, int w, int h)
+{
+ int i, j, len = 0;
+
+ for (j = y - 2; j <= y + 2; j++) {
+ if (j < 0 || j >= h)
+ continue;
+
+ for (i = x - 2; i <= x + 2; i++) {
+ if (i < 0 || i >= w)
+ continue;
+
+ len += sprintf(buf+len, "%08x ", real[j*w+i]);
+ }
+
+ len += sprintf(buf+len, "\t");
+
+ for (i = x - 2; i <= x + 2; i++) {
+ if (i < 0 || i >= w)
+ continue;
+
+ len += sprintf(buf+len, "%08x ", ref[j*w+i]);
+ }
+
+ len += sprintf(buf+len, "\n");
+ }
+}
+
+static void fill_rect(struct test_display *t, Picture p,
+ int x, int y, int w, int h,
+ uint8_t red, uint8_t green, uint8_t blue)
+{
+ Drawable tmp;
+ XRenderColor c;
+ Picture src;
+ XRenderPictFormat *format;
+
+ format = XRenderFindStandardFormat(t->dpy, PictStandardRGB24);
+
+ tmp = XCreatePixmap(t->dpy, DefaultRootWindow(t->dpy),
+ w, h, format->depth);
+
+ src = XRenderCreatePicture(t->dpy, tmp, format, 0, NULL);
+ c.red = (int)red << 8 | red;
+ c.green = (int)green << 8 | green;
+ c.blue = (int)blue << 8 | blue;
+ c.alpha = 0xffff;
+ XRenderFillRectangle(t->dpy, PictOpSrc, src, &c, 0, 0, w, h);
+ XRenderComposite(t->dpy, PictOpOver, src, 0, p, 0, 0, 0, 0, x, y, w, h);
+
+ XRenderFreePicture(t->dpy, src);
+ XFreePixmap(t->dpy, tmp);
+}
+
+static void pixel_tests(struct test *t, int reps, int sets, enum target target)
+{
+ struct test_target tt;
+ XImage image;
+ uint32_t *cells = malloc(t->real.width*t->real.height*4);
+ struct {
+ uint16_t x, y;
+ } *pixels = malloc(reps*sizeof(*pixels));
+ int r, s;
+
+ test_target_create_render(&t->real, target, &tt);
+
+ printf("Testing setting of single pixels (%s): ",
+ test_target_name(target));
+ fflush(stdout);
+
+ for (s = 0; s < sets; s++) {
+ for (r = 0; r < reps; r++) {
+ int x = rand() % (tt.width - 1);
+ int y = rand() % (tt.height - 1);
+ uint8_t red = rand();
+ uint8_t green = rand();
+ uint8_t blue = rand();
+
+ fill_rect(&t->real, tt.picture,
+ x, y, 1, 1,
+ red, green, blue);
+
+ pixels[r].x = x;
+ pixels[r].y = y;
+ cells[y*tt.width+x] = color(red, green, blue, 0xff);
+ }
+
+ test_init_image(&image, &t->real.shm, tt.format, 1, 1);
+
+ for (r = 0; r < reps; r++) {
+ uint32_t x = pixels[r].x;
+ uint32_t y = pixels[r].y;
+ uint32_t result;
+
+ XShmGetImage(t->real.dpy, tt.draw, &image,
+ x, y, AllPlanes);
+
+ result = *(uint32_t *)image.data;
+ if (!pixel_equal(image.depth, result,
+ cells[y*tt.width+x])) {
+ uint32_t mask = depth_mask(image.depth);
+
+ die("failed to set pixel (%d,%d) to %08x [%08x], found %08x [%08x] instead\n",
+ x, y,
+ cells[y*tt.width+x] & mask,
+ cells[y*tt.width+x],
+ result & mask,
+ result);
+ }
+ }
+ }
+ printf("passed [%d iterations x %d]\n", reps, sets);
+
+ test_target_destroy_render(&t->real, &tt);
+ free(pixels);
+ free(cells);
+}
+
+static void clear(struct test_display *dpy, struct test_target *tt)
+{
+ XRenderColor render_color = {0};
+ XRenderFillRectangle(dpy->dpy, PictOpClear, tt->picture, &render_color,
+ 0, 0, tt->width, tt->height);
+}
+
+static void area_tests(struct test *t, int reps, int sets, enum target target)
+{
+ struct test_target tt;
+ XImage image;
+ uint32_t *cells = calloc(sizeof(uint32_t), t->real.width*t->real.height);
+ int r, s, x, y;
+
+ printf("Testing area sets (%s): ", test_target_name(target));
+ fflush(stdout);
+
+ test_target_create_render(&t->real, target, &tt);
+ clear(&t->real, &tt);
+
+ test_init_image(&image, &t->real.shm, tt.format, tt.width, tt.height);
+
+ for (s = 0; s < sets; s++) {
+ for (r = 0; r < reps; r++) {
+ int w = 1 + rand() % (tt.width - 1);
+ int h = 1 + rand() % (tt.height - 1);
+ uint8_t red = rand();
+ uint8_t green = rand();
+ uint8_t blue = rand();
+
+ x = rand() % (2*tt.width) - tt.width;
+ y = rand() % (2*tt.height) - tt.height;
+
+ fill_rect(&t->real, tt.picture,
+ x, y, w, h,
+ red, green, blue);
+
+ if (x < 0)
+ w += x, x = 0;
+ if (y < 0)
+ h += y, y = 0;
+ if (x >= tt.width || y >= tt.height)
+ continue;
+
+ if (x + w > tt.width)
+ w = tt.width - x;
+ if (y + h > tt.height)
+ h = tt.height - y;
+ if (w <= 0 || h <= 0)
+ continue;
+
+ pixman_fill(cells, tt.width, 32, x, y, w, h,
+ color(red, green, blue, 0xff));
+ }
+
+ XShmGetImage(t->real.dpy, tt.draw, &image, 0, 0, AllPlanes);
+
+ for (y = 0; y < tt.height; y++) {
+ for (x = 0; x < tt.width; x++) {
+ uint32_t result = *(uint32_t *)
+ (image.data +
+ y*image.bytes_per_line +
+ x*image.bits_per_pixel/8);
+ if (!pixel_equal(image.depth, result, cells[y*tt.width+x])) {
+ char buf[600];
+ uint32_t mask = depth_mask(image.depth);
+ show_cells(buf,
+ (uint32_t*)image.data, cells,
+ x, y, tt.width, tt.height);
+
+ die("failed to set pixel (%d,%d) to %08x [%08x], found %08x [%08x] instead (set %d, reps %d)\n%s",
+ x, y,
+ cells[y*tt.width+x] & mask,
+ cells[y*tt.width+x],
+ result & mask,
+ result, s, reps, buf);
+ }
+ }
+ }
+ }
+
+ printf("passed [%d iterations x %d]\n", reps, sets);
+
+ test_target_destroy_render(&t->real, &tt);
+ free(cells);
+}
+
+static void rect_tests(struct test *t, int reps, int sets, enum target target, int use_window)
+{
+ struct test_target real, ref;
+ int r, s;
+ printf("Testing area fills (%s, using %s source): ",
+ test_target_name(target), use_window ? "window" : "pixmap");
+ fflush(stdout);
+
+ test_target_create_render(&t->real, target, &real);
+ clear(&t->real, &real);
+
+ test_target_create_render(&t->ref, target, &ref);
+ clear(&t->ref, &ref);
+
+ for (s = 0; s < sets; s++) {
+ for (r = 0; r < reps; r++) {
+ int x, y, w, h;
+ uint8_t red = rand();
+ uint8_t green = rand();
+ uint8_t blue = rand();
+
+ x = rand() % (real.width - 1);
+ y = rand() % (real.height - 1);
+ w = 1 + rand() % (real.width - x - 1);
+ h = 1 + rand() % (real.height - y - 1);
+
+ fill_rect(&t->real, real.picture,
+ x, y, w, h,
+ red, green, blue);
+ fill_rect(&t->ref, ref.picture,
+ x, y, w, h,
+ red, green, blue);
+ }
+
+ test_compare(t,
+ real.draw, real.format,
+ ref.draw, ref.format,
+ 0, 0, real.width, real.height,
+ "");
+ }
+
+ printf("passed [%d iterations x %d]\n", reps, sets);
+
+ test_target_destroy_render(&t->real, &real);
+ test_target_destroy_render(&t->ref, &ref);
+}
+
+int main(int argc, char **argv)
+{
+ struct test test;
+ int i;
+
+ test_init(&test, argc, argv);
+
+ for (i = 0; i <= DEFAULT_ITERATIONS; i++) {
+ int reps = 1 << i;
+ int sets = 1 << (12 - i);
+
+ if (sets < 2)
+ sets = 2;
+
+ pixel_tests(&test, reps, sets, PIXMAP);
+ area_tests(&test, reps, sets, PIXMAP);
+ rect_tests(&test, reps, sets, PIXMAP, 0);
+ }
+
+ return 0;
+}
commit 141001df6c9c3485c500ed531a214c09b46c1d3b
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Jan 6 13:35:27 2012 +0000
sna: always skip active search when requested for find an inactive bo
References: https://bugs.freedesktop.org/show_bug.cgi?id=44504
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 58df935..4bccccb 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1996,9 +1996,10 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
return kgem_bo_reference(bo);
}
} while (!list_is_empty(&kgem->vma_cache) && kgem_retire(kgem));
+ }
+ if (flags & CREATE_INACTIVE)
goto skip_active_search;
- }
untiled_pitch = kgem_untiled_pitch(kgem,
width, bpp,
commit eac0d9652b2399f8c36ba0288db6fe347ed78dc9
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Jan 6 12:39:49 2012 +0000
sna: Optimise sna_poly_segment() for the frequent no-op case
Strange as it may seem... But the principle of doing less work with
greater locality should help everywhere, just not as noticeable when
real work is performed.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 8d6f584..7ae76eb 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -5556,15 +5556,11 @@ sna_poly_segment_extents(DrawablePtr drawable, GCPtr gc,
BoxPtr out)
{
BoxRec box;
- int extra = gc->lineWidth;
bool clipped, can_blit;
if (n == 0)
return 0;
- if (gc->capStyle != CapProjecting)
- extra >>= 1;
-
if (seg->x2 >= seg->x1) {
box.x1 = seg->x1;
box.x2 = seg->x2;
@@ -5607,11 +5603,16 @@ sna_poly_segment_extents(DrawablePtr drawable, GCPtr gc,
box.x2++;
box.y2++;
- if (extra) {
- box.x1 -= extra;
- box.x2 += extra;
- box.y1 -= extra;
- box.y2 += extra;
+ if (gc->lineWidth) {
+ int extra = gc->lineWidth;
+ if (gc->capStyle != CapProjecting)
+ extra >>= 1;
+ if (extra) {
+ box.x1 -= extra;
+ box.x2 += extra;
+ box.y1 -= extra;
+ box.y2 += extra;
+ }
}
DBG(("%s: unclipped, untranslated extents (%d, %d), (%d, %d)\n",
@@ -5628,8 +5629,8 @@ sna_poly_segment_extents(DrawablePtr drawable, GCPtr gc,
static void
sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
{
- PixmapPtr pixmap = get_drawable_pixmap(drawable);
- struct sna *sna = to_sna_from_pixmap(pixmap);
+ PixmapPtr pixmap;
+ struct sna *sna;
struct sna_damage **damage;
RegionRec region;
unsigned flags;
@@ -5650,6 +5651,9 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
if (FORCE_FALLBACK)
goto fallback;
+ pixmap = get_drawable_pixmap(drawable);
+ sna = to_sna_from_pixmap(pixmap);
+
if (wedged(sna)) {
DBG(("%s: fallback -- wedged\n", __FUNCTION__));
goto fallback;
commit a3699fff5ada85e4dea739aade25ebbb728e18f4
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Jan 6 12:15:46 2012 +0000
sna: Only force a pipeline flush for a change of destination, not sources
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index d35942c..86bf460 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -693,11 +693,11 @@ gen6_emit_wm(struct sna *sna, unsigned int kernel, int nr_surfaces, int nr_input
OUT_BATCH(0);
}
-static bool
+static void
gen6_emit_binding_table(struct sna *sna, uint16_t offset)
{
if (sna->render_state.gen6.surface_table == offset)
- return false;
+ return;
/* Binding table pointers */
OUT_BATCH(GEN6_3DSTATE_BINDING_TABLE_POINTERS |
@@ -709,7 +709,6 @@ gen6_emit_binding_table(struct sna *sna, uint16_t offset)
OUT_BATCH(offset*4);
sna->render_state.gen6.surface_table = offset;
- return true;
}
static void
@@ -720,6 +719,8 @@ gen6_emit_drawing_rectangle(struct sna *sna,
uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
+ force |= sna->render_state.gen6.target != op->dst.bo->handle;
+
if (!force &&
sna->render_state.gen6.drawrect_limit == limit &&
sna->render_state.gen6.drawrect_offset == offset)
@@ -727,6 +728,7 @@ gen6_emit_drawing_rectangle(struct sna *sna,
sna->render_state.gen6.drawrect_offset = offset;
sna->render_state.gen6.drawrect_limit = limit;
+ sna->render_state.gen6.target = op->dst.bo->handle;
OUT_BATCH(GEN6_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
OUT_BATCH(0);
@@ -848,7 +850,7 @@ gen6_emit_state(struct sna *sna,
gen6_emit_vertex_elements(sna, op);
/* XXX updating the binding table requires a non-pipelined cmd? */
- need_flush |= gen6_emit_binding_table(sna, wm_binding_table);
+ gen6_emit_binding_table(sna, wm_binding_table);
gen6_emit_drawing_rectangle(sna, op, need_flush & !flushed);
}
@@ -3581,6 +3583,7 @@ static void gen6_render_reset(struct sna *sna)
sna->render_state.gen6.samplers = -1;
sna->render_state.gen6.blend = -1;
sna->render_state.gen6.kernel = -1;
+ sna->render_state.gen6.target = -1;
sna->render_state.gen6.drawrect_offset = -1;
sna->render_state.gen6.drawrect_limit = -1;
sna->render_state.gen6.surface_table = -1;
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index d5c7b2e..5cd0d7c 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -383,6 +383,7 @@ struct gen6_render_state {
uint32_t blend;
uint32_t samplers;
uint32_t kernel;
+ uint32_t target;
uint16_t num_sf_outputs;
uint16_t vb_id;
commit 2fefee6015ed2df52c8513ae180ca83c01ff83c0
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Jan 6 12:10:27 2012 +0000
sna/gen6: Reuse current no-blending setup for PictOpClear
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index bb0aab1..d35942c 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -585,6 +585,17 @@ gen6_emit_cc(struct sna *sna,
if (render->blend == blend)
return false;
+ if (op == PictOpClear) {
+ uint32_t src;
+
+ /* We can emulate a clear using src, which is beneficial if
+ * the blend unit is already disabled.
+ */
+ src = BLEND_OFFSET(GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO);
+ if (render->blend == src)
+ return false;
+ }
+
OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
OUT_BATCH((render->cc_blend + blend) | 1);
if (render->blend == (unsigned)-1) {
commit 800ca0b4d1194544fe2461f91cbdc632c4d3dd7a
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Jan 6 12:00:43 2012 +0000
sna/gen6: Tidy emission of CC state (blending)
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 37b1016..bb0aab1 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -228,7 +228,7 @@ static const struct formatinfo {
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
static uint32_t gen6_get_blend(int op,
- Bool has_component_alpha,
+ bool has_component_alpha,
uint32_t dst_format)
{
uint32_t src, dst;
@@ -570,15 +570,23 @@ gen6_emit_invariant(struct sna *sna)
}
static bool
-gen6_emit_cc(struct sna *sna, uint32_t blend_offset)
+gen6_emit_cc(struct sna *sna,
+ int op, bool has_component_alpha, uint32_t dst_format)
{
struct gen6_render_state *render = &sna->render_state.gen6;
+ uint32_t blend;
- if (render->blend == blend_offset)
+ blend = gen6_get_blend(op, has_component_alpha, dst_format);
+
+ DBG(("%s(op=%d, ca=%d, format=%x): new=%x, current=%x\n",
+ __FUNCTION__,
+ op, has_component_alpha, dst_format,
+ blend, render->blend));
+ if (render->blend == blend)
return false;
OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
- OUT_BATCH((render->cc_blend + blend_offset) | 1);
+ OUT_BATCH((render->cc_blend + blend) | 1);
if (render->blend == (unsigned)-1) {
OUT_BATCH(1);
OUT_BATCH(1);
@@ -587,7 +595,7 @@ gen6_emit_cc(struct sna *sna, uint32_t blend_offset)
OUT_BATCH(0);
}
- render->blend = blend_offset;
+ render->blend = blend;
return true;
}
@@ -804,9 +812,9 @@ gen6_emit_state(struct sna *sna,
bool need_flush;
need_flush = gen6_emit_cc(sna,
- gen6_get_blend(op->op,
- op->has_component_alpha,
- op->dst.format));
+ op->op,
+ op->has_component_alpha,
+ op->dst.format);
DBG(("%s: sampler src=(%d, %d), mask=(%d, %d), offset=%d\n",
__FUNCTION__,
@@ -845,9 +853,7 @@ static void gen6_magic_ca_pass(struct sna *sna,
DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
sna->render.vertex_start, sna->render.vertex_index));
- need_flush =
- gen6_emit_cc(sna,
- gen6_get_blend(PictOpAdd, TRUE, op->dst.format));
+ need_flush = gen6_emit_cc(sna, PictOpAdd, TRUE, op->dst.format);
gen6_emit_wm(sna,
gen6_choose_composite_kernel(PictOpAdd,
TRUE, TRUE,
commit b9c9e9970cdb542173e3ed0da2bef614abedd3f7
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Jan 6 11:38:31 2012 +0000
sna/trapezoids: Add debug markers for move-to-cpu
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 903bc42..062a2bc 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -3439,6 +3439,7 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
}
}
+ DBG(("%s: move-to-cpu\n", __FUNCTION__));
region.data = NULL;
if (!sna_drawable_move_region_to_cpu(dst->pDrawable, ®ion,
op == PictOpSrc ? MOVE_WRITE : MOVE_WRITE | MOVE_READ))
@@ -3578,6 +3579,7 @@ trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
region.extents.y2 = dst_y + extents.y2;
region.data = NULL;
+ DBG(("%s: move-to-cpu\n", __FUNCTION__));
if (!sna_drawable_move_region_to_cpu(dst->pDrawable, ®ion,
MOVE_READ | MOVE_WRITE))
goto done;
@@ -3595,6 +3597,7 @@ trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
goto done;
}
+ DBG(("%s: fbComposite()\n", __FUNCTION__));
fbComposite(op, src, mask, dst,
src_x + dst_x - pixman_fixed_to_int(traps[0].left.p1.x),
src_y + dst_y - pixman_fixed_to_int(traps[0].left.p1.y),
commit 2841c5fee79c42bca3e098ec620755d341b6888f
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Jan 6 11:32:54 2012 +0000
sna: fast path move-to-cpu of an all-damaged CPU bo
When the bo is already completely damaged on the CPU, all we need to do
is to sync with the CPU bo.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 06826d2..8d6f584 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -707,16 +707,19 @@ _sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags)
struct sna *sna = to_sna_from_pixmap(pixmap);
struct sna_pixmap *priv;
- DBG(("%s(pixmap=%p, flags=%x)\n", __FUNCTION__, pixmap, flags));
+ DBG(("%s(pixmap=%ld, flags=%x)\n", __FUNCTION__,
+ pixmap->drawable.serialNumber, flags));
priv = sna_pixmap(pixmap);
if (priv == NULL) {
- DBG(("%s: not attached to %p\n", __FUNCTION__, pixmap));
+ DBG(("%s: not attached\n", __FUNCTION__));
return true;
}
- DBG(("%s: gpu_bo=%p, gpu_damage=%p\n",
- __FUNCTION__, priv->gpu_bo, priv->gpu_damage));
+ DBG(("%s: gpu_bo=%d, gpu_damage=%p\n",
+ __FUNCTION__,
+ priv->gpu_bo ? priv->gpu_bo->handle : 0,
+ priv->gpu_damage));
if ((flags & MOVE_READ) == 0) {
assert(flags == MOVE_WRITE);
@@ -766,6 +769,11 @@ skip_inplace_map:
}
}
+ if (priv->cpu_damage && priv->cpu_damage->mode == DAMAGE_ALL) {
+ DBG(("%s: CPU all-damaged\n", __FUNCTION__));
+ goto done;
+ }
+
if (priv->mapped) {
pixmap->devPrivate.ptr = NULL;
priv->mapped = 0;
@@ -805,11 +813,6 @@ skip_inplace_map:
priv->gpu_damage = NULL;
}
- if (priv->cpu_bo) {
- DBG(("%s: syncing CPU bo\n", __FUNCTION__));
- kgem_bo_sync__cpu(&sna->kgem, priv->cpu_bo);
- }
-
if (flags & MOVE_WRITE) {
DBG(("%s: marking as damaged\n", __FUNCTION__));
sna_damage_all(&priv->cpu_damage,
@@ -819,9 +822,16 @@ skip_inplace_map:
if (priv->flush)
list_move(&priv->list, &sna->dirty_pixmaps);
+
+ priv->source_count = SOURCE_BIAS;
+ }
+
+done:
+ if (priv->cpu_bo) {
+ DBG(("%s: syncing CPU bo\n", __FUNCTION__));
+ kgem_bo_sync__cpu(&sna->kgem, priv->cpu_bo);
}
- priv->source_count = SOURCE_BIAS;
return true;
}
More information about the xorg-commit
mailing list