xf86-video-intel: 20 commits - src/sna/gen2_render.c src/sna/gen3_render.c src/sna/gen4_render.c src/sna/gen5_render.c src/sna/gen6_render.c src/sna/kgem.c src/sna/kgem.h src/sna/sna_accel.c src/sna/sna_blt.c src/sna/sna_glyphs.c src/sna/sna.h src/sna/sna_render.h
Chris Wilson
ickle at kemper.freedesktop.org
Tue Oct 18 07:35:36 PDT 2011
src/sna/gen2_render.c | 47 +++++++---
src/sna/gen3_render.c | 226 +++++++++++++++++++++++++++++++++++++-------------
src/sna/gen4_render.c | 119 ++++++++++++++++++++++++--
src/sna/gen5_render.c | 121 +++++++++++++++++++++++++-
src/sna/gen6_render.c | 63 +++++++------
src/sna/kgem.c | 68 +++++++++------
src/sna/kgem.h | 3
src/sna/sna.h | 6 +
src/sna/sna_accel.c | 136 +++++++++++++++++++++++++++---
src/sna/sna_blt.c | 37 +++++---
src/sna/sna_glyphs.c | 27 ++---
src/sna/sna_render.h | 1
12 files changed, 676 insertions(+), 178 deletions(-)
New commits:
commit 0b83abfb2bc3f65447205048ae0af1a94fa9ef15
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Tue Oct 18 14:43:37 2011 +0100
sna/gen6: Don't modify composite arguments whilst checking for BLT
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 2392c9b..badade0 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -2592,18 +2592,15 @@ gen6_render_fill_boxes(struct sna *sna,
dst->drawable.width > 8192 ||
dst->drawable.height > 8192 ||
!gen6_check_dst_format(format)) {
- uint8_t alu = GXcopy;
+ uint8_t alu = -1;
- if (op == PictOpClear) {
+ if (op == PictOpClear || (op == PictOpOutReverse && color->alpha >= 0xff00))
alu = GXclear;
- pixel = 0;
- op = PictOpSrc;
- }
- if (op == PictOpOver && color->alpha >= 0xff00)
- op = PictOpSrc;
+ if (op == PictOpSrc || (op == PictOpOver && color->alpha >= 0xff00))
+ alu = GXcopy;
- if (op == PictOpSrc &&
+ if (alu != -1 &&
sna_get_pixel_from_rgba(&pixel,
color->red,
color->green,
commit ae7bd2bf70b42b8c12e5e5ae074891595827a1c0
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Tue Oct 18 11:54:05 2011 +0100
sna/blt: Move the conditional out of the fill-many hotpath
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 15b09bf..1597b1f 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -119,19 +119,15 @@ static bool sna_blt_fill_init(struct sna *sna,
blt->bo[0] = bo;
- blt->cmd = XY_SETUP_MONO_PATTERN_SL_BLT;
- if (bpp == 32)
- blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
-
pitch = bo->pitch;
- if (kgem->gen >= 40 && bo->tiling) {
- blt->cmd |= BLT_DST_TILED;
+ blt->cmd = XY_SCANLINE_BLT;
+ if (kgem->gen >= 40 && blt->bo[0]->tiling) {
+ blt->cmd |= 1 << 11;
pitch >>= 2;
}
if (pitch > MAXSHORT)
return FALSE;
- blt->overwrites = alu == GXcopy || alu == GXclear;
blt->br13 = 1<<31 | (fill_ROP[alu] << 16) | pitch;
switch (bpp) {
default: assert(0);
@@ -141,6 +137,7 @@ static bool sna_blt_fill_init(struct sna *sna,
}
blt->pixel = pixel;
+ blt->bpp = bpp;
kgem_set_mode(kgem, KGEM_BLT);
if (!kgem_check_bo_fenced(kgem, bo, NULL) ||
@@ -160,7 +157,9 @@ static bool sna_blt_fill_init(struct sna *sna,
}
b = kgem->batch + kgem->nbatch;
- b[0] = blt->cmd;
+ b[0] = XY_SETUP_MONO_PATTERN_SL_BLT;
+ if (bpp == 32)
+ b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
b[1] = blt->br13;
b[2] = 0;
b[3] = 0;
@@ -202,7 +201,9 @@ static void sna_blt_fill_one(struct sna *sna,
_kgem_set_mode(kgem, KGEM_BLT);
b = kgem->batch + kgem->nbatch;
- b[0] = blt->cmd;
+ b[0] = XY_SETUP_MONO_PATTERN_SL_BLT;
+ if (blt->bpp == 32)
+ b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
b[1] = blt->br13;
b[2] = 0;
b[3] = 0;
@@ -219,9 +220,7 @@ static void sna_blt_fill_one(struct sna *sna,
}
b = kgem->batch + kgem->nbatch;
- b[0] = XY_SCANLINE_BLT;
- if (kgem->gen >= 40 && blt->bo[0]->tiling)
- b[0] |= 1 << 11;
+ b[0] = blt->cmd;
b[1] = (y << 16) | x;
b[2] = ((y + height) << 16) | (x + width);
kgem->nbatch += 3;
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index 1cbd0f5..b4b4085 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -90,6 +90,7 @@ struct sna_composite_op {
uint32_t inplace :1;
uint32_t overwrites:1;
+ uint32_t bpp : 6;
uint32_t cmd;
uint32_t br13;
commit 86f99379ee55b157634174c4c8aad61a30b4f57f
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Tue Oct 18 11:35:12 2011 +0100
sna/gen4: Add fill-one
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index fb60c39..2c988ff 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -2370,9 +2370,9 @@ gen4_fill_bind_surfaces(struct sna *sna, const struct sna_composite_op *op)
}
static void
-gen4_render_fill_one(struct sna *sna,
- const struct sna_composite_op *op,
- int x, int y, int w, int h)
+gen4_render_fill_rectangle(struct sna *sna,
+ const struct sna_composite_op *op,
+ int x, int y, int w, int h)
{
if (!gen4_get_rectangles(sna, op, 1)) {
gen4_fill_bind_surfaces(sna, op);
@@ -2484,9 +2484,10 @@ gen4_render_fill_boxes(struct sna *sna,
gen4_align_vertex(sna, &tmp);
do {
- gen4_render_fill_one(sna, &tmp,
- box->x1, box->y1,
- box->x2 - box->x1, box->y2 - box->y1);
+ gen4_render_fill_rectangle(sna, &tmp,
+ box->x1, box->y1,
+ box->x2 - box->x1,
+ box->y2 - box->y1);
box++;
} while (--n);
@@ -2499,7 +2500,7 @@ static void
gen4_render_fill_blt(struct sna *sna, const struct sna_fill_op *op,
int16_t x, int16_t y, int16_t w, int16_t h)
{
- gen4_render_fill_one(sna, &op->base, x, y, w, h);
+ gen4_render_fill_rectangle(sna, &op->base, x, y, w, h);
}
static void
@@ -2571,6 +2572,109 @@ gen4_render_fill(struct sna *sna, uint8_t alu,
return TRUE;
}
+static Bool
+gen4_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
+ uint32_t color,
+ int16_t x1, int16_t y1, int16_t x2, int16_t y2,
+ uint8_t alu)
+{
+ BoxRec box;
+
+ box.x1 = x1;
+ box.y1 = y1;
+ box.x2 = x2;
+ box.y2 = y2;
+
+ return sna_blt_fill_boxes(sna, alu,
+ bo, dst->drawable.bitsPerPixel,
+ color, &box, 1);
+}
+
+static Bool
+gen4_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
+ uint32_t color,
+ int16_t x1, int16_t y1,
+ int16_t x2, int16_t y2,
+ uint8_t alu)
+{
+ struct sna_composite_op tmp;
+
+#if NO_FILL_ONE
+ return gen4_render_fill_one_try_blt(sna, dst, bo, color,
+ x1, y1, x2, y2, alu);
+#endif
+
+ if (gen4_render_fill_one_try_blt(sna, dst, bo, color,
+ x1, y1, x2, y2, alu))
+ return TRUE;
+
+ /* Must use the BLT if we can't RENDER... */
+ if (!(alu == GXcopy || alu == GXclear) ||
+ dst->drawable.width > 8192 || dst->drawable.height > 8192)
+ return FALSE;
+
+ if (alu == GXclear)
+ color = 0;
+
+ tmp.op = color == 0 ? PictOpClear : PictOpSrc;
+
+ tmp.dst.pixmap = dst;
+ tmp.dst.width = dst->drawable.width;
+ tmp.dst.height = dst->drawable.height;
+ tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
+ tmp.dst.bo = bo;
+ tmp.dst.x = tmp.dst.y = 0;
+
+ tmp.src.bo =
+ sna_render_get_solid(sna,
+ sna_rgba_for_color(color,
+ dst->drawable.depth));
+ tmp.src.filter = SAMPLER_FILTER_NEAREST;
+ tmp.src.repeat = SAMPLER_EXTEND_REPEAT;
+
+ tmp.mask.bo = NULL;
+ tmp.mask.filter = SAMPLER_FILTER_NEAREST;
+ tmp.mask.repeat = SAMPLER_EXTEND_NONE;
+
+ tmp.is_affine = TRUE;
+ tmp.floats_per_vertex = 3;
+ tmp.has_component_alpha = 0;
+ tmp.need_magic_ca_pass = FALSE;
+
+ tmp.u.gen4.wm_kernel = WM_KERNEL;
+ tmp.u.gen4.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, bo, NULL))
+ _kgem_submit(&sna->kgem);
+
+ gen4_fill_bind_surfaces(sna, &tmp);
+ gen4_align_vertex(sna, &tmp);
+
+ if (!gen4_get_rectangles(sna, &tmp, 1)) {
+ gen4_fill_bind_surfaces(sna, &tmp);
+ gen4_get_rectangles(sna, &tmp, 1);
+ }
+
+ DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2));
+ OUT_VERTEX(x2, y2);
+ OUT_VERTEX_F(1);
+ OUT_VERTEX_F(1);
+
+ OUT_VERTEX(x1, y2);
+ OUT_VERTEX_F(0);
+ OUT_VERTEX_F(1);
+
+ OUT_VERTEX(x1, y1);
+ OUT_VERTEX_F(0);
+ OUT_VERTEX_F(0);
+
+ gen4_vertex_flush(sna);
+ kgem_bo_destroy(&sna->kgem, tmp.src.bo);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+
+ return TRUE;
+}
+
static void
gen4_render_flush(struct sna *sna)
{
@@ -2840,6 +2944,7 @@ Bool gen4_render_init(struct sna *sna)
sna->render.fill_boxes = gen4_render_fill_boxes;
sna->render.fill = gen4_render_fill;
+ sna->render.fill_one = gen4_render_fill_one;
sna->render.flush = gen4_render_flush;
sna->render.reset = gen4_render_reset;
commit 26c082dbd3f89b987168d321f72e78db8c5ddc2c
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Tue Oct 18 10:50:53 2011 +0100
sna/gen6: Precompute floats_per_rect
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index f49cf31..2392c9b 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -1485,9 +1485,9 @@ inline static int gen6_get_rectangles(struct sna *sna,
{
int rem = vertex_space(sna);
- if (rem < 3*op->floats_per_vertex) {
+ if (rem < op->floats_per_rect) {
DBG(("flushing vbo for %s: %d < %d\n",
- __FUNCTION__, rem, 3*op->floats_per_vertex));
+ __FUNCTION__, rem, op->floats_per_rect));
rem = gen6_get_rectangles__flush(sna, op->need_magic_ca_pass);
if (rem == 0)
return 0;
@@ -1497,8 +1497,8 @@ inline static int gen6_get_rectangles(struct sna *sna,
!gen6_rectangle_begin(sna, op))
return 0;
- if (want > 1 && want * op->floats_per_vertex*3 > rem)
- want = rem / (3*op->floats_per_vertex);
+ if (want > 1 && want * op->floats_per_rect > rem)
+ want = rem / op->floats_per_rect;
sna->render.vertex_index += 3*want;
return want;
@@ -1847,6 +1847,7 @@ gen6_render_video(struct sna *sna,
tmp.is_affine = TRUE;
tmp.floats_per_vertex = 3;
+ tmp.floats_per_rect = 9;
if (is_planar_fourcc(frame->id)) {
tmp.u.gen6.wm_kernel = GEN6_WM_KERNEL_VIDEO_PLANAR;
@@ -2226,6 +2227,7 @@ gen6_render_composite(struct sna *sna,
tmp->floats_per_vertex = 3 + !tmp->is_affine;
}
+ tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
tmp->u.gen6.wm_kernel =
gen6_choose_composite_kernel(tmp->op,
@@ -2364,6 +2366,7 @@ gen6_render_copy_boxes(struct sna *sna, uint8_t alu,
tmp.is_affine = TRUE;
tmp.floats_per_vertex = 3;
+ tmp.floats_per_rect = 9;
tmp.has_component_alpha = 0;
tmp.need_magic_ca_pass = 0;
@@ -2512,6 +2515,7 @@ gen6_render_copy(struct sna *sna, uint8_t alu,
op->base.is_affine = true;
op->base.floats_per_vertex = 3;
+ op->base.floats_per_rect = 9;
op->base.u.gen6.wm_kernel = GEN6_WM_KERNEL_NOMASK;
op->base.u.gen6.nr_surfaces = 2;
@@ -2649,6 +2653,7 @@ gen6_render_fill_boxes(struct sna *sna,
tmp.is_affine = TRUE;
tmp.floats_per_vertex = 3;
+ tmp.floats_per_rect = 9;
tmp.u.gen6.wm_kernel = GEN6_WM_KERNEL_NOMASK;
tmp.u.gen6.nr_surfaces = 2;
@@ -2775,6 +2780,7 @@ gen6_render_fill(struct sna *sna, uint8_t alu,
op->base.is_affine = TRUE;
op->base.floats_per_vertex = 3;
+ op->base.floats_per_rect = 9;
op->base.u.gen6.wm_kernel = GEN6_WM_KERNEL_NOMASK;
op->base.u.gen6.nr_surfaces = 2;
@@ -2861,6 +2867,7 @@ gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
tmp.is_affine = TRUE;
tmp.floats_per_vertex = 3;
+ tmp.floats_per_rect = 9;
tmp.has_component_alpha = 0;
tmp.need_magic_ca_pass = FALSE;
commit c69a7989b3c9cedc961e29b1a569b174207964ae
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Tue Oct 18 10:48:37 2011 +0100
sna/gen5: Add fill-one implementation
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 516bc35..d246d01 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -2571,6 +2571,112 @@ gen5_render_fill(struct sna *sna, uint8_t alu,
return TRUE;
}
+static Bool
+gen5_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
+ uint32_t color,
+ int16_t x1, int16_t y1, int16_t x2, int16_t y2,
+ uint8_t alu)
+{
+ BoxRec box;
+
+ box.x1 = x1;
+ box.y1 = y1;
+ box.x2 = x2;
+ box.y2 = y2;
+
+ return sna_blt_fill_boxes(sna, alu,
+ bo, dst->drawable.bitsPerPixel,
+ color, &box, 1);
+}
+
+static Bool
+gen5_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
+ uint32_t color,
+ int16_t x1, int16_t y1,
+ int16_t x2, int16_t y2,
+ uint8_t alu)
+{
+ struct sna_composite_op tmp;
+
+#if NO_FILL_ONE
+ return gen5_render_fill_one_try_blt(sna, dst, bo, color,
+ x1, y1, x2, y2, alu);
+#endif
+
+ /* Prefer to use the BLT if already engaged */
+ if (sna->kgem.mode != KGEM_RENDER &&
+ gen5_render_fill_one_try_blt(sna, dst, bo, color,
+ x1, y1, x2, y2, alu))
+ return TRUE;
+
+ /* Must use the BLT if we can't RENDER... */
+ if (!(alu == GXcopy || alu == GXclear) ||
+ dst->drawable.width > 8192 || dst->drawable.height > 8192)
+ return gen5_render_fill_one_try_blt(sna, dst, bo, color,
+ x1, y1, x2, y2, alu);
+
+ if (alu == GXclear)
+ color = 0;
+
+ tmp.op = color == 0 ? PictOpClear : PictOpSrc;
+
+ tmp.dst.pixmap = dst;
+ tmp.dst.width = dst->drawable.width;
+ tmp.dst.height = dst->drawable.height;
+ tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
+ tmp.dst.bo = bo;
+ tmp.dst.x = tmp.dst.y = 0;
+
+ tmp.src.bo =
+ sna_render_get_solid(sna,
+ sna_rgba_for_color(color,
+ dst->drawable.depth));
+ tmp.src.filter = SAMPLER_FILTER_NEAREST;
+ tmp.src.repeat = SAMPLER_EXTEND_REPEAT;
+
+ tmp.mask.bo = NULL;
+ tmp.mask.filter = SAMPLER_FILTER_NEAREST;
+ tmp.mask.repeat = SAMPLER_EXTEND_NONE;
+
+ tmp.is_affine = TRUE;
+ tmp.floats_per_vertex = 3;
+ tmp.has_component_alpha = 0;
+ tmp.need_magic_ca_pass = FALSE;
+
+ tmp.u.gen5.wm_kernel = WM_KERNEL;
+ tmp.u.gen5.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, bo, NULL))
+ _kgem_submit(&sna->kgem);
+
+ gen5_fill_bind_surfaces(sna, &tmp);
+ gen5_align_vertex(sna, &tmp);
+
+ if (!gen5_get_rectangles(sna, &tmp, 1)) {
+ gen5_fill_bind_surfaces(sna, &tmp);
+ gen5_get_rectangles(sna, &tmp, 1);
+ }
+
+ DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2));
+ OUT_VERTEX(x2, y2);
+ OUT_VERTEX_F(1);
+ OUT_VERTEX_F(1);
+
+ OUT_VERTEX(x1, y2);
+ OUT_VERTEX_F(0);
+ OUT_VERTEX_F(1);
+
+ OUT_VERTEX(x1, y1);
+ OUT_VERTEX_F(0);
+ OUT_VERTEX_F(0);
+
+ gen5_vertex_flush(sna);
+ kgem_bo_destroy(&sna->kgem, tmp.src.bo);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+
+ return TRUE;
+}
+
static void
gen5_render_flush(struct sna *sna)
{
@@ -2863,6 +2969,7 @@ Bool gen5_render_init(struct sna *sna)
sna->render.fill_boxes = gen5_render_fill_boxes;
sna->render.fill = gen5_render_fill;
+ sna->render.fill_one = gen5_render_fill_one;
sna->render.flush = gen5_render_flush;
sna->render.reset = gen5_render_reset;
commit 26a1918290b2d23de72a67805fe5e766da0c5309
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Tue Oct 18 10:42:48 2011 +0100
sna/gen6: Try continuing with the BLT if the last batch was also BLT
In the vain hope of reducing switching between rings and introducing
stalls between batches.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 147693d..f49cf31 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -2086,7 +2086,7 @@ gen6_composite_set_target(struct sna_composite_op *op, PicturePtr dst)
static Bool
try_blt(struct sna *sna, int width, int height)
{
- if (sna->kgem.mode == KGEM_BLT) {
+ if (sna->kgem.ring == KGEM_BLT) {
DBG(("%s: already performing BLT\n", __FUNCTION__));
return TRUE;
}
@@ -2125,7 +2125,7 @@ gen6_render_composite(struct sna *sna,
#endif
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
- width, height, sna->kgem.mode));
+ width, height, sna->kgem.ring));
if (mask == NULL &&
try_blt(sna, width, height) &&
@@ -2318,7 +2318,7 @@ gen6_render_copy_boxes(struct sna *sna, uint8_t alu,
__FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu,
src_bo == dst_bo));
- if (sna->kgem.mode == KGEM_BLT &&
+ if (sna->kgem.ring == KGEM_BLT &&
sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy_boxes(sna, alu,
src_bo, src_dx, src_dy,
@@ -2473,7 +2473,7 @@ gen6_render_copy(struct sna *sna, uint8_t alu,
src->drawable.width, src->drawable.height,
dst->drawable.width, dst->drawable.height));
- if (sna->kgem.mode == KGEM_BLT &&
+ if (sna->kgem.ring == KGEM_BLT &&
sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy(sna, alu,
src_bo, dst_bo,
@@ -2584,7 +2584,7 @@ gen6_render_fill_boxes(struct sna *sna,
return FALSE;
}
- if (sna->kgem.mode != KGEM_RENDER ||
+ if (sna->kgem.ring != KGEM_RENDER ||
dst->drawable.width > 8192 ||
dst->drawable.height > 8192 ||
!gen6_check_dst_format(format)) {
@@ -2741,7 +2741,7 @@ gen6_render_fill(struct sna *sna, uint8_t alu,
op);
#endif
- if (sna->kgem.mode != KGEM_RENDER &&
+ if (sna->kgem.ring != KGEM_RENDER &&
sna_blt_fill(sna, alu,
dst_bo, dst->drawable.bitsPerPixel,
color,
@@ -2825,7 +2825,7 @@ gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
#endif
/* Prefer to use the BLT if already engaged */
- if (sna->kgem.mode != KGEM_RENDER &&
+ if (sna->kgem.ring != KGEM_RENDER &&
gen6_render_fill_one_try_blt(sna, dst, bo, color,
x1, y1, x2, y2, alu))
return TRUE;
@@ -2912,6 +2912,8 @@ gen6_render_context_switch(struct kgem *kgem,
if (!new_mode)
return;
+ DBG(("%s: from %d to %d\n", __FUNCTION__, kgem->mode, new_mode));
+
if (kgem->mode)
_kgem_submit(kgem);
commit 4b1398f26d4de44167096c6548ff7384c24d5e06
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Tue Oct 18 00:29:18 2011 +0100
sna/gen6: Micro-optimise gen6_rectangle_begin
We can only emit state between primitives, ergo we need only check for
state updates if we've finished the vbo or are starting a new operation.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 2ba7c39..147693d 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -1448,25 +1448,19 @@ static void gen6_emit_primitive(struct sna *sna)
static bool gen6_rectangle_begin(struct sna *sna,
const struct sna_composite_op *op)
{
- int id = op->u.gen6.ve_id;
+ int id = 1 << op->u.gen6.ve_id;
int ndwords;
- ndwords = 0;
- if ((sna->render_state.gen6.vb_id & (1 << id)) == 0)
+ ndwords = op->need_magic_ca_pass ? 60 : 6;
+ if ((sna->render_state.gen6.vb_id & id) == 0)
ndwords += 5;
- if (sna->render_state.gen6.vertex_offset == 0)
- ndwords += op->need_magic_ca_pass ? 60 : 6;
- if (ndwords == 0)
- return true;
-
if (!kgem_check_batch(&sna->kgem, ndwords))
return false;
- if ((sna->render_state.gen6.vb_id & (1 << id)) == 0)
+ if ((sna->render_state.gen6.vb_id & id) == 0)
gen6_emit_vertex_buffer(sna, op);
- if (sna->render_state.gen6.vertex_offset == 0)
- gen6_emit_primitive(sna);
+ gen6_emit_primitive(sna);
return true;
}
@@ -1499,7 +1493,8 @@ inline static int gen6_get_rectangles(struct sna *sna,
return 0;
}
- if (!gen6_rectangle_begin(sna, op))
+ if (sna->render_state.gen6.vertex_offset == 0 &&
+ !gen6_rectangle_begin(sna, op))
return 0;
if (want > 1 && want * op->floats_per_vertex*3 > rem)
commit b2d842df5e3198dab401fbd450d4af83d44917dd
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Mon Oct 17 23:34:19 2011 +0100
sna/gen2: Precompute floats-per-rect
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index 041b4c0..6b30069 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -899,7 +899,7 @@ inline static int gen2_get_rectangles(struct sna *sna,
assert(op->floats_per_vertex);
need = 1;
- size = 3*op->floats_per_vertex;
+ size = op->floats_per_rect;
if (op->need_magic_ca_pass)
need += 6 + size*sna->render.vertex_index, size *= 2;
@@ -929,7 +929,7 @@ inline static int gen2_get_rectangles(struct sna *sna,
want = rem / size;
assert(want);
- sna->render.vertex_index += 3*want*op->floats_per_vertex;
+ sna->render.vertex_index += want*op->floats_per_rect;
return want;
}
@@ -1348,6 +1348,7 @@ gen2_render_composite(struct sna *sna,
tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 3;
if (tmp->mask.bo)
tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 3;
+ tmp->floats_per_rect = 3*tmp->floats_per_vertex;
tmp->prim_emit = gen2_emit_composite_primitive;
if (tmp->mask.bo) {
@@ -1756,6 +1757,7 @@ gen2_render_composite_spans(struct sna *sna,
}
}
}
+ tmp->base.floats_per_rect = 3*tmp->base.floats_per_vertex;
tmp->box = gen2_render_composite_spans_box;
tmp->boxes = gen2_render_composite_spans_boxes;
@@ -1954,6 +1956,7 @@ gen2_render_fill_boxes(struct sna *sna,
tmp.dst.format = format;
tmp.dst.bo = dst_bo;
tmp.floats_per_vertex = 2;
+ tmp.floats_per_rect = 6;
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL))
kgem_submit(&sna->kgem);
@@ -2079,6 +2082,7 @@ gen2_render_fill(struct sna *sna, uint8_t alu,
tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
tmp->base.dst.bo = dst_bo;
tmp->base.floats_per_vertex = 2;
+ tmp->base.floats_per_rect = 6;
tmp->base.src.u.gen2.pixel =
sna_rgba_for_color(color, dst->drawable.depth);
@@ -2156,6 +2160,7 @@ gen2_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
tmp.dst.bo = bo;
tmp.floats_per_vertex = 2;
+ tmp.floats_per_rect = 6;
tmp.need_magic_ca_pass = false;
tmp.src.u.gen2.pixel =
@@ -2325,6 +2330,7 @@ gen2_render_copy_boxes(struct sna *sna, uint8_t alu,
tmp.dst.bo = dst_bo;
tmp.floats_per_vertex = 4;
+ tmp.floats_per_rect = 12;
gen2_render_copy_setup_source(&tmp.src, src, src_bo);
gen2_emit_copy_state(sna, &tmp);
@@ -2450,6 +2456,7 @@ gen2_render_copy(struct sna *sna, uint8_t alu,
gen2_render_copy_setup_source(&tmp->base.src, src, src_bo);
tmp->base.floats_per_vertex = 4;
+ tmp->base.floats_per_rect = 12;
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
kgem_submit(&sna->kgem);
commit 729f1ec7869167ff91fa969d06dbdfbf0e8b4126
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Mon Oct 17 23:20:20 2011 +0100
sna/gen2: Prefer to use the BLT
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index 34c4432..041b4c0 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -53,10 +53,8 @@
#define NO_FILL_ONE 0
#define NO_FILL_BOXES 0
-#define PREFER_3D_COPY 0
-#define PREFER_3D_COPY_BOXES 0
-#define PREFER_3D_FILL 0
-#define PREFER_3D_FILL_BOXES 0
+#define PREFER_BLT_FILL 1
+#define PREFER_BLT_COPY 1
#define BATCH(v) batch_emit(sna, v)
#define BATCH_F(v) batch_emit_float(sna, v)
@@ -1220,7 +1218,7 @@ try_blt(struct sna *sna,
{
uint32_t color;
- if (sna->kgem.mode == KGEM_BLT) {
+ if (sna->kgem.mode != KGEM_RENDER) {
DBG(("%s: already performing BLT\n", __FUNCTION__));
return TRUE;
}
@@ -1875,6 +1873,24 @@ gen2_render_fill_boxes_try_blt(struct sna *sna,
pixel, box, n);
}
+static inline Bool prefer_blt_fill(struct sna *sna)
+{
+#if PREFER_BLT_FILL
+ return true;
+#else
+ return sna->kgem.mode != KGEM_RENDER;
+#endif
+}
+
+static inline Bool prefer_blt_copy(struct sna *sna)
+{
+#if PREFER_BLT_COPY
+ return true;
+#else
+ return sna->kgem.mode != KGEM_RENDER;
+#endif
+}
+
static Bool
gen2_render_fill_boxes(struct sna *sna,
CARD8 op,
@@ -1910,7 +1926,7 @@ gen2_render_fill_boxes(struct sna *sna,
dst, dst_bo,
box, n);
- if (!PREFER_3D_FILL_BOXES && sna->kgem.mode != KGEM_RENDER &&
+ if (prefer_blt_fill(sna) &&
gen2_render_fill_boxes_try_blt(sna, op, format, color,
dst, dst_bo,
box, n))
@@ -2040,7 +2056,7 @@ gen2_render_fill(struct sna *sna, uint8_t alu,
#endif
/* Prefer to use the BLT if already engaged */
- if (!PREFER_3D_FILL && sna->kgem.mode != KGEM_RENDER &&
+ if (prefer_blt_fill(sna) &&
sna_blt_fill(sna, alu,
dst_bo, dst->drawable.bitsPerPixel,
color,
@@ -2115,7 +2131,7 @@ gen2_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
#endif
/* Prefer to use the BLT if already engaged */
- if (!PREFER_3D_FILL && sna->kgem.mode != KGEM_RENDER &&
+ if (prefer_blt_fill(sna) &&
gen2_render_fill_one_try_blt(sna, dst, bo, color,
x1, y1, x2, y2, alu))
return TRUE;
@@ -2267,7 +2283,7 @@ gen2_render_copy_boxes(struct sna *sna, uint8_t alu,
DBG(("%s (%d, %d)->(%d, %d) x %d\n",
__FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
- if (!PREFER_3D_COPY_BOXES &&
+ if (prefer_blt_copy(sna) &&
sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy_boxes(sna, alu,
src_bo, src_dx, src_dy,
@@ -2403,7 +2419,7 @@ gen2_render_copy(struct sna *sna, uint8_t alu,
#endif
/* Prefer to use the BLT */
- if (!PREFER_3D_COPY && sna->kgem.mode != KGEM_RENDER &&
+ if (prefer_blt_copy(sna) &&
sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy(sna, alu,
src_bo, dst_bo,
commit adef3a56cde1210554dc8327baa37c36fc5f18e3
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Mon Oct 17 23:08:32 2011 +0100
sna/gen3: Prefer to use the BLT where possible for composite ops
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 4dd027f..90797d7 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -2127,7 +2127,7 @@ try_blt(struct sna *sna,
PicturePtr source,
int width, int height)
{
- if (sna->kgem.mode == KGEM_BLT) {
+ if (sna->kgem.mode != KGEM_RENDER) {
DBG(("%s: already performing BLT\n", __FUNCTION__));
return TRUE;
}
commit f40e6ab50b264ef7eb7750f72ab7418c0d3dc9c1
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Mon Oct 17 23:06:30 2011 +0100
sna/gen3: Prefer to use the BLT to clear the scratch glyph pixmaps
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index bae6505..4dd027f 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -49,8 +49,11 @@
#define NO_COPY 0
#define NO_COPY_BOXES 0
#define NO_FILL 0
+#define NO_FILL_ONE 0
#define NO_FILL_BOXES 0
+#define PREFER_BLT_FILL 1
+
enum {
SHADER_NONE = 0,
SHADER_ZERO,
@@ -2966,10 +2969,9 @@ gen3_emit_video_state(struct sna *sna,
S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
- OUT_BATCH((2 << S6_DEPTH_TEST_FUNC_SHIFT) |
- (2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
+ OUT_BATCH((2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
(1 << S6_CBUF_DST_BLEND_FACT_SHIFT) |
- S6_COLOR_WRITE_ENABLE | (2 << S6_TRISTRIP_PV_SHIFT));
+ S6_COLOR_WRITE_ENABLE);
sna->render_state.gen3.last_blend = 0;
sna->render_state.gen3.last_sampler = 0;
@@ -3663,6 +3665,15 @@ gen3_render_fill_boxes_try_blt(struct sna *sna,
pixel, box, n);
}
+static inline Bool prefer_fill_blt(struct sna *sna)
+{
+#if PREFER_BLT_FILL
+ return true;
+#else
+ return sna->kgem.mode != KGEM_RENDER;
+#endif
+}
+
static Bool
gen3_render_fill_boxes(struct sna *sna,
CARD8 op,
@@ -3698,7 +3709,7 @@ gen3_render_fill_boxes(struct sna *sna,
dst, dst_bo,
box, n);
- if (sna->kgem.mode != KGEM_RENDER &&
+ if (prefer_fill_blt(sna) &&
gen3_render_fill_boxes_try_blt(sna, op, format, color,
dst, dst_bo,
box, n))
@@ -3803,7 +3814,7 @@ gen3_render_fill(struct sna *sna, uint8_t alu,
#endif
/* Prefer to use the BLT if already engaged */
- if (sna->kgem.mode != KGEM_RENDER &&
+ if (prefer_fill_blt(sna) &&
sna_blt_fill(sna, alu,
dst_bo, dst->drawable.bitsPerPixel,
color,
@@ -3875,13 +3886,13 @@ gen3_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
{
struct sna_composite_op tmp;
-#if NO_FILL_BOXES
+#if NO_FILL_ONE
return gen3_render_fill_one_try_blt(sna, dst, bo, color,
x1, y1, x2, y2, alu);
#endif
/* Prefer to use the BLT if already engaged */
- if (sna->kgem.mode != KGEM_RENDER &&
+ if (prefer_fill_blt(sna) &&
gen3_render_fill_one_try_blt(sna, dst, bo, color,
x1, y1, x2, y2, alu))
return TRUE;
@@ -3905,10 +3916,17 @@ gen3_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
tmp.floats_per_vertex = 2;
tmp.floats_per_rect = 6;
- tmp.src.u.gen3.type = SHADER_CONSTANT;
- tmp.src.u.gen3.mode =
- sna_rgba_for_color(color, dst->drawable.depth);
- tmp.mask.u.gen3.mode = SHADER_NONE;
+ color = sna_rgba_for_color(color, dst->drawable.depth);
+ if (color == 0)
+ tmp.src.u.gen3.type = SHADER_ZERO;
+ else if (color == 0xff000000)
+ tmp.src.u.gen3.type = SHADER_BLACK;
+ else if (color == 0xffffffff)
+ tmp.src.u.gen3.type = SHADER_WHITE;
+ else
+ tmp.src.u.gen3.type = SHADER_CONSTANT;
+ tmp.src.u.gen3.mode = color;
+ tmp.mask.u.gen3.type = SHADER_NONE;
tmp.u.gen3.num_constants = 0;
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index a11e955..4d876db 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1313,30 +1313,29 @@ int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int
if (tiling < 0)
return tiling;
- if (tiling && width * bpp > 8 * 4096) {
+ if (tiling == I915_TILING_Y && height <= 16) {
+ DBG(("%s: too short [%d] for TILING_Y\n",
+ __FUNCTION__,height));
+ tiling = I915_TILING_X;
+ }
+ if (tiling && width * bpp >= 8 * 4096) {
DBG(("%s: TLB miss between lines %dx%d (pitch=%d), forcing tiling %d\n",
__FUNCTION__,
width, height, width*bpp/8,
tiling));
return -tiling;
}
+ if (tiling == I915_TILING_X && height < 4) {
+ DBG(("%s: too short [%d] for TILING_X\n",
+ __FUNCTION__, height));
+ tiling = I915_TILING_NONE;
+ }
/* Before the G33, we only have a small GTT to play with and tiled
* surfaces always require full fence regions and so cause excessive
* aperture thrashing.
*/
if (kgem->gen < 33) {
- if (tiling == I915_TILING_Y && height < 16) {
- DBG(("%s: too short [%d] for TILING_Y\n",
- __FUNCTION__,height));
- tiling = I915_TILING_X;
- }
- if (tiling == I915_TILING_X && height < 4) {
- DBG(("%s: too short [%d] for TILING_X\n",
- __FUNCTION__, height));
- tiling = I915_TILING_NONE;
- }
-
if (tiling == I915_TILING_X && width * bpp < 8*512/2) {
DBG(("%s: too thin [%d] for TILING_X\n",
__FUNCTION__, width));
diff --git a/src/sna/sna_glyphs.c b/src/sna/sna_glyphs.c
index 4db5baf..13b0cca 100644
--- a/src/sna/sna_glyphs.c
+++ b/src/sna/sna_glyphs.c
@@ -651,19 +651,16 @@ next_glyph:
return TRUE;
}
-static Bool
-clear_pixmap(struct sna *sna, PixmapPtr pixmap, PictFormat format)
+static void
+clear_pixmap(struct sna *sna, PixmapPtr pixmap)
{
- BoxRec box;
- xRenderColor color = { 0 };
-
- box.x1 = box.y1 = 0;
- box.x2 = pixmap->drawable.width;
- box.y2 = pixmap->drawable.height;
-
- return sna->render.fill_boxes(sna, PictOpClear, format, &color,
- pixmap, sna_pixmap_get_bo(pixmap),
- &box, 1);
+ struct sna_pixmap *priv = sna_pixmap(pixmap);
+ assert(priv->gpu_only);
+ sna->render.fill_one(sna, pixmap, priv->gpu_bo, 0,
+ 0, 0,
+ pixmap->drawable.width,
+ pixmap->drawable.height,
+ GXclear);
}
static Bool
@@ -742,11 +739,7 @@ glyphs_via_mask(struct sna *sna,
return FALSE;
ValidatePicture(mask);
-
- if (!clear_pixmap(sna, pixmap, mask->format)) {
- FreePicture(mask, 0);
- return FALSE;
- }
+ clear_pixmap(sna, pixmap);
memset(&tmp, 0, sizeof(tmp));
glyph_atlas = NULL;
commit 941e3504e049be8af50ca9df2d0aafe9eb477983
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Mon Oct 17 19:51:33 2011 +0100
sna: Some more debug output for request/bo retirement
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index a95ce3d..a11e955 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -183,10 +183,22 @@ static int gem_read(int fd, uint32_t handle, const void *dst, int length)
return drmIoctl(fd, DRM_IOCTL_I915_GEM_PREAD, &pread);
}
+static bool
+kgem_busy(struct kgem *kgem, int handle)
+{
+ struct drm_i915_gem_busy busy;
+
+ busy.handle = handle;
+ busy.busy = !kgem->wedged;
+ (void)drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
+
+ return busy.busy;
+}
+
Bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
const void *data, int length)
{
- assert(!gem_busy(kgem->fd, bo->handle));
+ assert(!kgem_busy(kgem, bo->handle));
if (gem_write(kgem->fd, bo->handle, 0, length, data))
return FALSE;
@@ -213,18 +225,6 @@ static uint32_t gem_create(int fd, int size)
}
static bool
-kgem_busy(struct kgem *kgem, int handle)
-{
- struct drm_i915_gem_busy busy;
-
- busy.handle = handle;
- busy.busy = !kgem->wedged;
- (void)drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
-
- return busy.busy;
-}
-
-static bool
gem_madvise(int fd, uint32_t handle, uint32_t state)
{
struct drm_i915_gem_madvise madv;
@@ -570,6 +570,7 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
list_add(&bo->request, &kgem->flushing);
list_move(&bo->list, active(kgem, bo->size));
} else {
+ assert(bo->gpu == 0);
list_move(&bo->list, inactive(kgem, bo->size));
}
@@ -593,10 +594,14 @@ void kgem_retire(struct kgem *kgem)
{
struct kgem_bo *bo, *next;
+ DBG(("%s\n", __FUNCTION__));
+
list_for_each_entry_safe(bo, next, &kgem->flushing, request) {
if (kgem_busy(kgem, bo->handle))
break;
+ DBG(("%s: moving %d from flush to inactive\n",
+ __FUNCTION__, bo->handle));
bo->needs_flush = 0;
bo->gpu = false;
list_move(&bo->list, inactive(kgem, bo->size));
@@ -612,6 +617,9 @@ void kgem_retire(struct kgem *kgem)
if (kgem_busy(kgem, rq->bo->handle))
break;
+ DBG(("%s: request %d complete\n",
+ __FUNCTION__, rq->bo->handle));
+
while (!list_is_empty(&rq->buffers)) {
bo = list_first_entry(&rq->buffers,
struct kgem_bo,
@@ -629,11 +637,17 @@ void kgem_retire(struct kgem *kgem)
if (bo->refcnt == 0) {
assert(bo->deleted);
if (bo->needs_flush) {
+ DBG(("%s: moving %d to flushing\n",
+ __FUNCTION__, bo->handle));
list_add(&bo->request, &kgem->flushing);
} else if (bo->reusable) {
+ DBG(("%s: moving %d to inactive\n",
+ __FUNCTION__, bo->handle));
list_move(&bo->list,
inactive(kgem, bo->size));
} else {
+ DBG(("%s: closing %d\n",
+ __FUNCTION__, bo->handle));
gem_close(kgem->fd, bo->handle);
free(bo);
}
@@ -644,6 +658,7 @@ void kgem_retire(struct kgem *kgem)
assert(rq->bo->refcnt == 0);
if (gem_madvise(kgem->fd, rq->bo->handle, I915_MADV_DONTNEED)) {
rq->bo->deleted = 1;
+ assert(rq->bo->gpu == 0);
list_move(&rq->bo->list,
inactive(kgem, rq->bo->size));
} else {
@@ -915,6 +930,7 @@ void _kgem_submit(struct kgem *kgem)
kgem_fixup_self_relocs(kgem, rq->bo);
kgem_finish_partials(kgem);
+ assert(rq->bo->gpu == 0);
if (kgem_batch_write(kgem, handle) == 0) {
struct drm_i915_gem_execbuffer2 execbuf;
int ret;
@@ -1210,6 +1226,7 @@ search_linear_cache(struct kgem *kgem, unsigned int size, bool use_active)
use_active ? "active" : "inactive"));
assert(bo->refcnt == 0);
assert(bo->reusable);
+ assert(use_active || bo->gpu == 0);
//assert(use_active || !kgem_busy(kgem, bo->handle));
return bo;
}
@@ -1512,6 +1529,7 @@ skip_active_search:
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->refcnt == 0);
assert(bo->reusable);
+ assert((flags & CREATE_INACTIVE) == 0 || bo->gpu == 0);
assert((flags & CREATE_INACTIVE) == 0 ||
!kgem_busy(kgem, bo->handle));
return kgem_bo_reference(bo);
commit 1d82fe3e528578d11d2a258b586b67c807eb7cb4
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Mon Oct 17 19:47:03 2011 +0100
sna: Only retire the flush handler if nothing was submitted within the period
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index d27ba78..a95ce3d 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -186,6 +186,8 @@ static int gem_read(int fd, uint32_t handle, const void *dst, int length)
Bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
const void *data, int length)
{
+ assert(!gem_busy(kgem->fd, bo->handle));
+
if (gem_write(kgem->fd, bo->handle, 0, length, data))
return FALSE;
@@ -1022,6 +1024,7 @@ void _kgem_submit(struct kgem *kgem)
kgem_cleanup(kgem);
kgem_reset(kgem);
+ kgem->busy = 1;
}
void kgem_throttle(struct kgem *kgem)
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 988281c..a43a712 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -105,6 +105,7 @@ struct kgem {
uint32_t flush:1;
uint32_t need_expire:1;
uint32_t need_purge:1;
+ uint32_t busy:1;
uint32_t has_vmap :1;
uint32_t has_relaxed_fencing :1;
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index b58cfc8..4f5c152 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -4064,13 +4064,15 @@ static bool sna_accel_flush(struct sna *sna)
bool nothing_to_do =
priv->cpu_damage == NULL && priv->gpu_bo->rq == NULL;
- DBG(("%s (time=%ld), nothing_to_do=%d\n",
- __FUNCTION__, (long)GetTimeInMillis(), nothing_to_do));
+ DBG(("%s (time=%ld), nothing_to_do=%d, busy? %d\n",
+ __FUNCTION__, (long)GetTimeInMillis(),
+ nothing_to_do, sna->kgem.busy));
- if (nothing_to_do)
+ if (nothing_to_do && !sna->kgem.busy)
_sna_accel_disarm_timer(sna, FLUSH_TIMER);
else
sna_pixmap_move_to_gpu(priv->pixmap);
+ sna->kgem.busy = 0;
kgem_bo_flush(&sna->kgem, priv->gpu_bo);
return !nothing_to_do;
}
commit 7758e333409409393c4c974adb2831a7b5b18fe4
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Mon Oct 17 19:32:44 2011 +0100
sna/gen3: Use immediates for black/white solid sources
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 11b56ac..bae6505 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -54,6 +54,8 @@
enum {
SHADER_NONE = 0,
SHADER_ZERO,
+ SHADER_BLACK,
+ SHADER_WHITE,
SHADER_CONSTANT,
SHADER_LINEAR,
SHADER_RADIAL,
@@ -620,6 +622,8 @@ gen3_emit_composite_texcoord(struct sna *sna,
case SHADER_OPACITY:
case SHADER_NONE:
case SHADER_ZERO:
+ case SHADER_BLACK:
+ case SHADER_WHITE:
case SHADER_CONSTANT:
break;
@@ -843,6 +847,8 @@ gen3_composite_emit_shader(struct sna *sna,
case SHADER_OPACITY:
assert(0);
case SHADER_ZERO:
+ case SHADER_BLACK:
+ case SHADER_WHITE:
break;
case SHADER_CONSTANT:
gen3_fs_dcl(FS_T8);
@@ -858,9 +864,16 @@ gen3_composite_emit_shader(struct sna *sna,
}
if (mask == NULL) {
- if (src->u.gen3.type == SHADER_ZERO) {
+ switch (src->u.gen3.type) {
+ case SHADER_ZERO:
gen3_fs_mov(FS_OC, gen3_fs_operand_zero());
goto done;
+ case SHADER_BLACK:
+ gen3_fs_mov(FS_OC, gen3_fs_operand(FS_R0, ZERO, ZERO, ZERO, ONE));
+ goto done;
+ case SHADER_WHITE:
+ gen3_fs_mov(FS_OC, gen3_fs_operand_one());
+ goto done;
}
if (src->alpha_fixup && dst_is_alpha) {
gen3_fs_mov(FS_OC, gen3_fs_operand_one());
@@ -893,7 +906,10 @@ gen3_composite_emit_shader(struct sna *sna,
case SHADER_NONE:
case SHADER_CONSTANT:
+ case SHADER_WHITE:
+ case SHADER_BLACK:
case SHADER_ZERO:
+ assert(0);
break;
}
@@ -930,6 +946,8 @@ gen3_composite_emit_shader(struct sna *sna,
break;
case SHADER_NONE:
case SHADER_ZERO:
+ case SHADER_BLACK:
+ case SHADER_WHITE:
assert(0);
break;
}
@@ -962,6 +980,8 @@ gen3_composite_emit_shader(struct sna *sna,
case SHADER_CONSTANT:
case SHADER_NONE:
case SHADER_ZERO:
+ case SHADER_BLACK:
+ case SHADER_WHITE:
break;
}
if (src->alpha_fixup)
@@ -991,20 +1011,36 @@ gen3_composite_emit_shader(struct sna *sna,
break;
case SHADER_OPACITY:
- if (dst_is_alpha) {
- gen3_fs_mul(out_reg,
- gen3_fs_operand(src_reg, W, W, W, W),
- gen3_fs_operand(FS_T0 + t, X, X, X, X));
- } else {
- gen3_fs_mul(out_reg,
- gen3_fs_operand(src_reg, X, Y, Z, W),
- gen3_fs_operand(FS_T0 + t, X, X, X, X));
+ switch (src->u.gen3.type) {
+ case SHADER_BLACK:
+ case SHADER_WHITE:
+ if (dst_is_alpha || src->u.gen3.type == SHADER_WHITE) {
+ gen3_fs_mov(out_reg,
+ gen3_fs_operand(FS_T0 + t, X, X, X, X));
+ } else {
+ gen3_fs_mov(out_reg,
+ gen3_fs_operand(FS_T0 + t, ZERO, ZERO, ZERO, X));
+ }
+ break;
+ default:
+ if (dst_is_alpha) {
+ gen3_fs_mul(out_reg,
+ gen3_fs_operand(src_reg, W, W, W, W),
+ gen3_fs_operand(FS_T0 + t, X, X, X, X));
+ } else {
+ gen3_fs_mul(out_reg,
+ gen3_fs_operand(src_reg, X, Y, Z, W),
+ gen3_fs_operand(FS_T0 + t, X, X, X, X));
+ }
}
goto mask_done;
case SHADER_CONSTANT:
- case SHADER_NONE:
case SHADER_ZERO:
+ case SHADER_BLACK:
+ case SHADER_WHITE:
+ assert(0);
+ case SHADER_NONE:
break;
}
if (mask->alpha_fixup)
@@ -1013,9 +1049,18 @@ gen3_composite_emit_shader(struct sna *sna,
gen3_fs_mov(mask_reg, gen3_fs_operand(mask_reg, Z, Y, X, W));
if (dst_is_alpha) {
- gen3_fs_mul(out_reg,
- gen3_fs_operand(src_reg, W, W, W, W),
- gen3_fs_operand(mask_reg, W, W, W, W));
+ switch (src->u.gen3.type) {
+ case SHADER_BLACK:
+ case SHADER_WHITE:
+ gen3_fs_mov(out_reg,
+ gen3_fs_operand(mask_reg, W, W, W, W));
+ break;
+ default:
+ gen3_fs_mul(out_reg,
+ gen3_fs_operand(src_reg, W, W, W, W),
+ gen3_fs_operand(mask_reg, W, W, W, W));
+ break;
+ }
} else {
/* If component alpha is active in the mask and the blend
* operation uses the source alpha, then we know we don't
@@ -1028,18 +1073,43 @@ gen3_composite_emit_shader(struct sna *sna,
* source value (src.X * mask.A).
*/
if (op->has_component_alpha) {
- if (gen3_blend_op[blend].src_alpha)
- gen3_fs_mul(out_reg,
- gen3_fs_operand(src_reg, W, W, W, W),
- gen3_fs_operand_reg(mask_reg));
- else
+ switch (src->u.gen3.type) {
+ case SHADER_WHITE:
+ case SHADER_BLACK:
+ if (gen3_blend_op[blend].src_alpha)
+ gen3_fs_mov(out_reg,
+ gen3_fs_operand_reg(mask_reg));
+ else
+ gen3_fs_mov(out_reg,
+ gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W));
+ break;
+ default:
+ if (gen3_blend_op[blend].src_alpha)
+ gen3_fs_mul(out_reg,
+ gen3_fs_operand(src_reg, W, W, W, W),
+ gen3_fs_operand_reg(mask_reg));
+ else
+ gen3_fs_mul(out_reg,
+ gen3_fs_operand_reg(src_reg),
+ gen3_fs_operand_reg(mask_reg));
+ break;
+ }
+ } else {
+ switch (src->u.gen3.type) {
+ case SHADER_WHITE:
+ gen3_fs_mov(out_reg,
+ gen3_fs_operand(mask_reg, W, W, W, W));
+ break;
+ case SHADER_BLACK:
+ gen3_fs_mov(out_reg,
+ gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W));
+ break;
+ default:
gen3_fs_mul(out_reg,
gen3_fs_operand_reg(src_reg),
- gen3_fs_operand_reg(mask_reg));
- } else {
- gen3_fs_mul(out_reg,
- gen3_fs_operand_reg(src_reg),
- gen3_fs_operand(mask_reg, W, W, W, W));
+ gen3_fs_operand(mask_reg, W, W, W, W));
+ break;
+ }
}
}
mask_done:
@@ -1202,6 +1272,8 @@ static void gen3_emit_composite_state(struct sna *sna,
case SHADER_NONE:
assert(0);
case SHADER_ZERO:
+ case SHADER_BLACK:
+ case SHADER_WHITE:
break;
case SHADER_CONSTANT:
if (op->src.u.gen3.mode != state->last_diffuse) {
@@ -1235,6 +1307,8 @@ static void gen3_emit_composite_state(struct sna *sna,
switch (op->mask.u.gen3.type) {
case SHADER_NONE:
case SHADER_ZERO:
+ case SHADER_BLACK:
+ case SHADER_WHITE:
break;
case SHADER_CONSTANT:
if (op->mask.u.gen3.mode != state->last_specular) {
@@ -1765,6 +1839,10 @@ gen3_init_solid(struct sna_composite_channel *channel, uint32_t color)
channel->u.gen3.type = SHADER_CONSTANT;
if (color == 0)
channel->u.gen3.type = SHADER_ZERO;
+ else if (color == 0xff000000)
+ channel->u.gen3.type = SHADER_BLACK;
+ else if (color == 0xffffffff)
+ channel->u.gen3.type = SHADER_WHITE;
if ((color & 0xff000000) == 0xff000000)
channel->is_opaque = true;
@@ -2141,6 +2219,20 @@ static inline uint8_t mult(uint32_t s, uint32_t m, int shift)
return (s * m) >> 8;
}
+static inline bool is_constant_ps(uint32_t type)
+{
+ switch (type) {
+ case SHADER_NONE: /* be warned! */
+ case SHADER_ZERO:
+ case SHADER_BLACK:
+ case SHADER_WHITE:
+ case SHADER_CONSTANT:
+ return true;
+ default:
+ return false;
+ }
+}
+
static Bool
gen3_render_composite(struct sna *sna,
uint8_t op,
@@ -2276,18 +2368,16 @@ gen3_render_composite(struct sna *sna,
* into the single source value that we get to blend with.
*/
tmp->has_component_alpha = TRUE;
- if (tmp->mask.u.gen3.type == SHADER_CONSTANT &&
- tmp->mask.u.gen3.mode == 0xffffffff) {
+ if (tmp->mask.u.gen3.type == SHADER_WHITE) {
tmp->mask.u.gen3.type = SHADER_NONE;
tmp->has_component_alpha = FALSE;
- } else if (tmp->src.u.gen3.type == SHADER_CONSTANT &&
- tmp->src.u.gen3.mode == 0xffffffff) {
+ } else if (tmp->src.u.gen3.type == SHADER_WHITE) {
tmp->src = tmp->mask;
tmp->mask.u.gen3.type = SHADER_NONE;
tmp->mask.bo = NULL;
tmp->has_component_alpha = FALSE;
- } else if (tmp->src.u.gen3.type == SHADER_CONSTANT &&
- tmp->mask.u.gen3.type == SHADER_CONSTANT) {
+ } else if (is_constant_ps(tmp->src.u.gen3.type) &&
+ is_constant_ps(tmp->mask.u.gen3.type)) {
uint32_t a,r,g,b;
a = mult(tmp->src.u.gen3.mode,
@@ -2309,6 +2399,7 @@ gen3_render_composite(struct sna *sna,
tmp->mask.u.gen3.mode,
a << 24 | r << 16 | g << 8 | b));
+ tmp->src.u.gen3.type = SHADER_CONSTANT;
tmp->src.u.gen3.mode =
a << 24 | r << 16 | g << 8 | b;
@@ -2330,10 +2421,12 @@ gen3_render_composite(struct sna *sna,
tmp->src.is_affine, tmp->mask.is_affine));
tmp->prim_emit = gen3_emit_composite_primitive;
- if (tmp->mask.u.gen3.type == SHADER_NONE ||
- tmp->mask.u.gen3.type == SHADER_CONSTANT) {
+ if (is_constant_ps(tmp->mask.u.gen3.type)) {
switch (tmp->src.u.gen3.type) {
case SHADER_NONE:
+ case SHADER_ZERO:
+ case SHADER_BLACK:
+ case SHADER_WHITE:
case SHADER_CONSTANT:
tmp->prim_emit = gen3_emit_composite_primitive_constant;
break;
@@ -2353,7 +2446,7 @@ gen3_render_composite(struct sna *sna,
}
} else if (tmp->mask.u.gen3.type == SHADER_TEXTURE) {
if (tmp->mask.transform == NULL) {
- if (tmp->src.u.gen3.type == SHADER_CONSTANT)
+ if (is_constant_ps(tmp->src.u.gen3.type))
tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask;
else if (tmp->src.transform == NULL)
tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask;
@@ -2363,19 +2456,13 @@ gen3_render_composite(struct sna *sna,
}
tmp->floats_per_vertex = 2;
- if (tmp->src.u.gen3.type != SHADER_CONSTANT &&
- tmp->src.u.gen3.type != SHADER_ZERO)
+ if (!is_constant_ps(tmp->src.u.gen3.type))
tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 4;
- if (tmp->mask.u.gen3.type != SHADER_NONE &&
- tmp->mask.u.gen3.type != SHADER_CONSTANT)
+ if (!is_constant_ps(tmp->mask.u.gen3.type))
tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 4;
DBG(("%s: floats_per_vertex = 2 + %d + %d = %d\n", __FUNCTION__,
- (tmp->src.u.gen3.type != SHADER_CONSTANT &&
- tmp->src.u.gen3.type != SHADER_ZERO) ?
- tmp->src.is_affine ? 2 : 4 : 0,
- (tmp->mask.u.gen3.type != SHADER_NONE &&
- tmp->mask.u.gen3.type != SHADER_CONSTANT) ?
- tmp->mask.is_affine ? 2 : 4 : 0,
+ !is_constant_ps(tmp->src.u.gen3.type) ? tmp->src.is_affine ? 2 : 4 : 0,
+ !is_constant_ps(tmp->mask.u.gen3.type) ? tmp->mask.is_affine ? 2 : 4 : 0,
tmp->floats_per_vertex));
tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
@@ -2795,6 +2882,8 @@ gen3_render_composite_spans(struct sna *sna,
case SHADER_ZERO:
tmp->prim_emit = no_offset ? gen3_emit_composite_spans_primitive_zero_no_offset : gen3_emit_composite_spans_primitive_zero;
break;
+ case SHADER_BLACK:
+ case SHADER_WHITE:
case SHADER_CONSTANT:
tmp->prim_emit = no_offset ? gen3_emit_composite_spans_primitive_constant_no_offset : gen3_emit_composite_spans_primitive_constant;
break;
@@ -2814,8 +2903,7 @@ gen3_render_composite_spans(struct sna *sna,
}
tmp->base.floats_per_vertex = 2;
- if (tmp->base.src.u.gen3.type != SHADER_CONSTANT &&
- tmp->base.src.u.gen3.type != SHADER_ZERO)
+ if (!is_constant_ps(tmp->base.src.u.gen3.type))
tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3;
tmp->base.floats_per_vertex +=
tmp->base.mask.u.gen3.type == SHADER_OPACITY;
@@ -2887,6 +2975,7 @@ gen3_emit_video_state(struct sna *sna,
sna->render_state.gen3.last_sampler = 0;
sna->render_state.gen3.floats_per_vertex = 4;
sna->render_state.gen3.last_shader = -1;
+ sna->render_state.gen3.last_constants = 0;
if (!is_planar_fourcc(frame->id)) {
OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4);
commit 29ca1a3922cb0e6f3d7b71857a252e5de81941b5
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Mon Oct 17 17:16:16 2011 +0100
sna/gen3: More removal of memset(0)
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index a6afbb1..11b56ac 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -3745,6 +3745,8 @@ gen3_render_fill(struct sna *sna, uint8_t alu,
tmp->base.src.u.gen3.type = SHADER_CONSTANT;
tmp->base.src.u.gen3.mode =
sna_rgba_for_color(color, dst->drawable.depth);
+ tmp->base.mask.u.gen3.type = SHADER_NONE;
+ tmp->base.u.gen3.num_constants = 0;
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL))
kgem_submit(&sna->kgem);
@@ -3757,7 +3759,6 @@ gen3_render_fill(struct sna *sna, uint8_t alu,
return TRUE;
}
-
static Bool
gen3_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
uint32_t color,
@@ -3806,7 +3807,6 @@ gen3_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
if (alu == GXclear)
color = 0;
- memset(&tmp, 0, sizeof(tmp));
tmp.op = color == 0 ? PictOpClear : PictOpSrc;
tmp.dst.pixmap = dst;
tmp.dst.width = dst->drawable.width;
@@ -3819,6 +3819,8 @@ gen3_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
tmp.src.u.gen3.type = SHADER_CONSTANT;
tmp.src.u.gen3.mode =
sna_rgba_for_color(color, dst->drawable.depth);
+ tmp.mask.u.gen3.mode = SHADER_NONE;
+ tmp.u.gen3.num_constants = 0;
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
kgem_submit(&sna->kgem);
commit 88bc10878830dffefda6ed905c1f598d7a7d67ef
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Oct 16 20:05:49 2011 +0100
sna/gen5: Rearrange the BLT->RENDER workaround
So that we can simply use the pending DRAWRECT as the non-pipelined
flush required following use of a BLT command.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index ab30c9c..516bc35 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -1418,11 +1418,13 @@ gen5_emit_state(struct sna *sna,
const struct sna_composite_op *op,
uint16_t offset)
{
+ /* drawrect must be first for Ironlake BLT workaround */
+ gen5_emit_drawing_rectangle(sna, op);
+
gen5_emit_binding_table(sna, offset);
if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel))
gen5_emit_urb(sna);
gen5_emit_vertex_elements(sna, op);
- gen5_emit_drawing_rectangle(sna, op);
}
static void gen5_bind_surfaces(struct sna *sna,
@@ -2119,7 +2121,7 @@ gen5_copy_bind_surfaces(struct sna *sna,
offset = sna->render_state.gen5.surface_table;
}
- gen5_emit_state(sna, op,offset);
+ gen5_emit_state(sna, op, offset);
}
static Bool
@@ -2186,7 +2188,6 @@ gen5_render_copy_boxes(struct sna *sna, uint8_t alu,
if (kgem_bo_is_dirty(src_bo))
kgem_emit_flush(&sna->kgem);
- gen5_get_batch(sna);
gen5_copy_bind_surfaces(sna, &tmp);
gen5_align_vertex(sna, &tmp);
@@ -2583,10 +2584,13 @@ gen5_render_context_switch(struct kgem *kgem,
/* Ironlake has a limitation that a 3D or Media command can't
* be the first command after a BLT, unless it's
* non-pipelined.
+ *
+ * We do this by ensuring that the non-pipelined drawrect
+ * is always emitted first following a switch from BLT.
*/
if (kgem->mode == KGEM_BLT) {
- kgem->batch[kgem->nbatch++] = CMD_POLY_STIPPLE_OFFSET << 16;
- kgem->batch[kgem->nbatch++] = 0;
+ struct sna *sna = to_sna_from_kgem(kgem);
+ sna->render_state.gen5.drawrect_limit = -1;
}
}
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index aba8d3f..988281c 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -120,7 +120,7 @@ struct kgem {
struct drm_i915_gem_relocation_entry reloc[384];
};
-#define KGEM_BATCH_RESERVED 4 /* need a bit of extra room for workarounds */
+#define KGEM_BATCH_RESERVED 1
#define KGEM_RELOC_RESERVED 4
#define KGEM_EXEC_RESERVED 1
diff --git a/src/sna/sna.h b/src/sna/sna.h
index 4ba3f57..ae043ed 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -301,6 +301,12 @@ to_sna_from_drawable(DrawablePtr drawable)
return to_sna_from_screen(drawable->pScreen);
}
+static inline struct sna *
+to_sna_from_kgem(struct kgem *kgem)
+{
+ return container_of(kgem, struct sna, kgem);
+}
+
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
#endif
commit 18aed47ba37ccfbcdd72ed251fc97222741c66a7
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Mon Oct 17 16:41:14 2011 +0100
sna: Fast path common colour conversions
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 9d84747..15b09bf 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -433,6 +433,20 @@ sna_get_pixel_from_rgba(uint32_t * pixel,
int rbits, bbits, gbits, abits;
int rshift, bshift, gshift, ashift;
+ switch (format) {
+ case PICT_x8r8g8b8:
+ alpha = 0xffff;
+ case PICT_a8r8g8b8:
+ *pixel = ((alpha >> 8 << 24) |
+ (red >> 8 << 16) |
+ (green & 0xff00) |
+ (blue >> 8));
+ return TRUE;
+ case PICT_a8:
+ *pixel = alpha >> 8;
+ return TRUE;
+ }
+
rbits = PICT_FORMAT_R(format);
gbits = PICT_FORMAT_G(format);
bbits = PICT_FORMAT_B(format);
commit a3466c8b69afeda95180fcdc97b56d31f7a1d1ad
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Oct 16 20:52:47 2011 +0100
sna/accel: Implement a simpler path for CopyArea between the same pixmaps
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index fe6ad48..b58cfc8 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1252,6 +1252,102 @@ move_to_gpu(PixmapPtr pixmap, struct sna_pixmap *priv,
}
static void
+sna_self_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
+ BoxPtr box, int n,
+ int dx, int dy,
+ Bool reverse, Bool upsidedown, Pixel bitplane,
+ void *closure)
+{
+ struct sna *sna = to_sna_from_drawable(src);
+ PixmapPtr pixmap = get_drawable_pixmap(src);
+ struct sna_pixmap *priv = sna_pixmap(pixmap);
+ int alu = gc ? gc->alu : GXcopy;
+ RegionRec region;
+ int16_t tx, ty;
+
+ if (n == 0 || (dx | dy) == 0)
+ return;
+
+ DBG(("%s (boxes=%dx[(%d, %d), (%d, %d)...], src=+(%d, %d), alu=%d, pix.size=%dx%d)\n",
+ __FUNCTION__, n,
+ box[0].x1, box[0].y1, box[0].x2, box[0].y2,
+ dx, dy, alu,
+ pixmap->drawable.width, pixmap->drawable.height));
+
+ pixman_region_init_rects(®ion, box, n);
+ get_drawable_deltas(dst, pixmap, &tx, &ty);
+ RegionTranslate(®ion, tx, ty);
+ assert_pixmap_contains_box(pixmap, RegionExtents(®ion));
+
+ if (priv && priv->gpu_bo) {
+ if (!sna_pixmap_move_to_gpu(pixmap)) {
+ DBG(("%s: fallback - not a pure copy and failed to move dst to GPU\n",
+ __FUNCTION__));
+ goto fallback;
+ }
+
+ if (!sna->render.copy_boxes(sna, alu,
+ pixmap, priv->gpu_bo, dx, dy,
+ pixmap, priv->gpu_bo, 0, 0,
+ box, n)) {
+ DBG(("%s: fallback - accelerated copy boxes failed\n",
+ __FUNCTION__));
+ goto fallback;
+ }
+
+ sna_damage_add(&priv->gpu_damage, ®ion);
+ } else {
+ FbBits *dst_bits, *src_bits;
+ int stride, bpp;
+
+fallback:
+ DBG(("%s: fallback", __FUNCTION__));
+ sna_pixmap_move_to_cpu(pixmap, true);
+
+ stride = pixmap->devKind;
+ bpp = pixmap->drawable.bitsPerPixel;
+ if (alu == GXcopy && !reverse && !upsidedown && bpp >= 8) {
+ dst_bits = pixmap->devPrivate.ptr;
+ src_bits = (FbBits *)
+ ((char *)pixmap->devPrivate.ptr +
+ dy * stride + dx * bpp / 8);
+
+ do {
+ memcpy_blt(src_bits, dst_bits, bpp,
+ stride, stride,
+ box->x1, box->y1,
+ box->x1, box->y1,
+ box->x2 - box->x1,
+ box->y2 - box->y1);
+ box++;
+ } while (--n);
+ } else {
+ DBG(("%s: alu==GXcopy? %d, reverse? %d, upsidedown? %d, bpp? %d\n",
+ __FUNCTION__, alu == GXcopy, reverse, upsidedown, bpp));
+ dst_bits = pixmap->devPrivate.ptr;
+ stride /= sizeof(FbBits);
+ do {
+ fbBlt(dst_bits + (box->y1 + dy) * stride,
+ stride,
+ (box->x1 + dx) * bpp,
+
+ dst_bits + box->y1 * stride,
+ stride,
+ box->x1 * bpp,
+
+ (box->x2 - box->x1) * bpp,
+ (box->y2 - box->y1),
+
+ alu, -1, bpp,
+ reverse, upsidedown);
+ box++;
+ }while (--n);
+ }
+ }
+ RegionUninit(®ion);
+}
+
+static void
sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
BoxPtr box, int n,
int dx, int dy,
@@ -1274,6 +1370,13 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
if (n == 0)
return;
+ if (src_pixmap == dst_pixmap)
+ return sna_self_copy_boxes(src, dst, gc,
+ box, n,
+ dx, dy,
+ reverse, upsidedown, bitplane,
+ closure);
+
DBG(("%s (boxes=%dx[(%d, %d), (%d, %d)...], src=+(%d, %d), alu=%d, src.size=%dx%d, dst.size=%dx%d)\n",
__FUNCTION__, n,
box[0].x1, box[0].y1, box[0].x2, box[0].y2,
@@ -1617,7 +1720,8 @@ sna_copy_area(DrawablePtr src, DrawablePtr dst, GCPtr gc,
src_x, src_y,
width, height,
dst_x, dst_y,
- sna_copy_boxes, 0, NULL);
+ src == dst ? sna_self_copy_boxes : sna_copy_boxes,
+ 0, NULL);
}
static Bool
@@ -3775,7 +3879,7 @@ sna_copy_window(WindowPtr win, DDXPointRec origin, RegionPtr src)
#endif
miCopyRegion(&pixmap->drawable, &pixmap->drawable,
- NULL, &dst, dx, dy, sna_copy_boxes, 0, NULL);
+ NULL, &dst, dx, dy, sna_self_copy_boxes, 0, NULL);
RegionUninit(&dst);
}
commit 8afa05191a9a337ce6e02b1064de2876f5635435
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Mon Oct 17 00:55:51 2011 +0100
sna: Improve debug output for mi/fb fallbacks
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 49bd604..fe6ad48 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -2304,8 +2304,8 @@ sna_poly_line(DrawablePtr drawable, GCPtr gc,
BoxRec extents;
RegionRec region;
- DBG(("%s(mode=%d, n=%d, pt[0]=(%d, %d)\n",
- __FUNCTION__, mode, n, pt[0].x, pt[0].y));
+ DBG(("%s(mode=%d, n=%d, pt[0]=(%d, %d), lineWidth=%d\n",
+ __FUNCTION__, mode, n, pt[0].x, pt[0].y, gc->lineWidth));
if (sna_poly_line_extents(drawable, gc, mode, n, pt, &extents))
return;
@@ -2365,6 +2365,7 @@ sna_poly_line(DrawablePtr drawable, GCPtr gc,
}
fallback:
+ DBG(("%s: fallback\n", __FUNCTION__));
if (gc->lineWidth) {
if (gc->lineStyle != LineSolid)
miWideDash(drawable, gc, mode, n, pt);
@@ -2373,7 +2374,6 @@ fallback:
return;
}
- DBG(("%s: fallback\n", __FUNCTION__));
region_set(®ion, &extents);
region_maybe_clip(®ion, gc->pCompositeClip);
if (!RegionNotEmpty(®ion))
@@ -2383,6 +2383,7 @@ fallback:
sna_drawable_move_region_to_cpu(drawable, ®ion, true);
RegionUninit(®ion);
+ DBG(("%s: fbPolyLine\n", __FUNCTION__));
fbPolyLine(drawable, gc, mode, n, pt);
}
@@ -2607,8 +2608,10 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
BoxRec extents;
RegionRec region;
- DBG(("%s(n=%d, first=((%d, %d), (%d, %d))\n", __FUNCTION__,
- n, seg->x1, seg->y1, seg->x2, seg->y2));
+ DBG(("%s(n=%d, first=((%d, %d), (%d, %d)), lineWidth=%d\n",
+ __FUNCTION__,
+ n, seg->x1, seg->y1, seg->x2, seg->y2,
+ gc->lineWidth));
if (sna_poly_segment_extents(drawable, gc, n, seg, &extents))
return;
@@ -2678,12 +2681,12 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
}
fallback:
+ DBG(("%s: fallback\n", __FUNCTION__));
if (gc->lineWidth) {
miPolySegment(drawable, gc, n, seg);
return;
}
- DBG(("%s: fallback\n", __FUNCTION__));
region_set(®ion, &extents);
region_maybe_clip(®ion, gc->pCompositeClip);
if (!RegionNotEmpty(®ion))
@@ -2693,6 +2696,7 @@ fallback:
sna_drawable_move_region_to_cpu(drawable, ®ion, true);
RegionUninit(®ion);
+ DBG(("%s: fbPolySegment\n", __FUNCTION__));
fbPolySegment(drawable, gc, n, seg);
}
@@ -2764,6 +2768,8 @@ sna_poly_arc(DrawablePtr drawable, GCPtr gc, int n, xArc *arc)
BoxRec extents;
RegionRec region;
+ DBG(("%s(n=%d, lineWidth=%d\n", __FUNCTION__, n, gc->lineWidth));
+
if (sna_poly_arc_extents(drawable, gc, n, arc, &extents))
return;
@@ -2787,6 +2793,7 @@ sna_poly_arc(DrawablePtr drawable, GCPtr gc, int n, xArc *arc)
}
fallback:
+ DBG(("%s -- fallback\n", __FUNCTION__));
if (gc->lineWidth) {
miPolyArc(drawable, gc, n, arc);
return;
@@ -2802,6 +2809,7 @@ fallback:
RegionUninit(®ion);
/* XXX may still fallthrough to miZeroPolyArc */
+ DBG(("%s -- fbPolyArc\n", __FUNCTION__));
fbPolyArc(drawable, gc, n, arc);
}
commit 2b118658d5b9cfa56132f91238f2ea61fba8b357
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Mon Oct 17 15:00:01 2011 +0100
sna/gen3: Remove memset(0) from fill_boxes
And initialise only the state used for the operation.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 0a51e64..a6afbb1 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -3629,7 +3629,6 @@ gen3_render_fill_boxes(struct sna *sna,
if (pixel == 0)
op = PictOpClear;
- memset(&tmp, 0, sizeof(tmp));
tmp.op = op;
tmp.dst.pixmap = dst;
tmp.dst.width = dst->drawable.width;
@@ -3641,6 +3640,8 @@ gen3_render_fill_boxes(struct sna *sna,
tmp.src.u.gen3.type = op == PictOpClear ? SHADER_ZERO : SHADER_CONSTANT;
tmp.src.u.gen3.mode = pixel;
+ tmp.mask.u.gen3.type = SHADER_NONE;
+ tmp.u.gen3.num_constants = 0;
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL))
kgem_submit(&sna->kgem);
More information about the xorg-commit
mailing list