xf86-video-intel: 2 commits - src/sna/gen6_render.c src/sna/sna_accel.c
Chris Wilson
ickle at kemper.freedesktop.org
Mon Oct 20 13:26:02 PDT 2014
src/sna/gen6_render.c | 129 ++++++++++++++++----------
src/sna/sna_accel.c | 248 +++++++++++++++++++++++---------------------------
2 files changed, 203 insertions(+), 174 deletions(-)
New commits:
commit 83a3d9147308f4777324abdea859ac0c108f03c6
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Oct 19 09:11:33 2014 +0100
sna: Prefer the GPU for wide lines and arcs
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 1903b11..d4ee2b0 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -69,8 +69,7 @@
#define DEFAULT_TILING I915_TILING_X
#define USE_INPLACE 1
-#define USE_WIDE_SPANS 0 /* -1 force CPU, 1 force GPU */
-#define USE_ZERO_SPANS 1 /* -1 force CPU, 1 force GPU */
+#define USE_SPANS 0 /* -1 force CPU, 1 force GPU */
#define USE_CPU_BO 1
#define USE_USERPTR_UPLOADS 1
#define USE_USERPTR_DOWNLOADS 1
@@ -9620,49 +9619,30 @@ sna_poly_line_extents(DrawablePtr drawable, GCPtr gc,
return 1 | blt << 2 | clip << 1;
}
-/* Only use our spans code if the destination is busy and we can't perform
- * the operation in place.
- *
- * Currently it looks to be faster to use the GPU for zero spans on all
- * platforms.
- */
inline static int
-_use_zero_spans(DrawablePtr drawable, GCPtr gc, const BoxRec *extents)
+_use_line_spans(DrawablePtr drawable, GCPtr gc, const BoxRec *extents, unsigned flags)
{
- if (USE_ZERO_SPANS)
- return USE_ZERO_SPANS > 0;
+ uint32_t ignored;
- return !drawable_gc_inplace_hint(drawable, gc);
-}
+ if (USE_SPANS)
+ return USE_SPANS > 0;
-static int
-use_zero_spans(DrawablePtr drawable, GCPtr gc, const BoxRec *extents)
-{
- bool ret = _use_zero_spans(drawable, gc, extents);
- DBG(("%s? %d\n", __FUNCTION__, ret));
- return ret;
-}
+ if (flags & RECTILINEAR)
+ return PREFER_GPU;
-/* Only use our spans code if the destination is busy and we can't perform
- * the operation in place.
- *
- * Currently it looks to be faster to use the CPU for wide spans on all
- * platforms, slow MI code. But that does not take into account the true
- * cost of readback?
- */
-inline static int
-_use_wide_spans(DrawablePtr drawable, GCPtr gc, const BoxRec *extents)
-{
- if (USE_WIDE_SPANS)
- return USE_WIDE_SPANS > 0;
+ if (gc->lineStyle != LineSolid && gc->lineWidth == 0)
+ return 0;
+
+ if (gc_is_solid(gc, &ignored))
+ return PREFER_GPU;
return !drawable_gc_inplace_hint(drawable, gc);
}
-static int
-use_wide_spans(DrawablePtr drawable, GCPtr gc, const BoxRec *extents)
+inline static int
+use_line_spans(DrawablePtr drawable, GCPtr gc, const BoxRec *extents, unsigned flags)
{
- int ret = _use_wide_spans(drawable, gc, extents);
+ int ret = _use_line_spans(drawable, gc, extents, flags);
DBG(("%s? %d\n", __FUNCTION__, ret));
return ret;
}
@@ -9733,27 +9713,24 @@ sna_poly_line(DrawablePtr drawable, GCPtr gc,
goto spans_fallback;
}
+ data.bo = sna_drawable_use_bo(drawable, PREFER_GPU,
+ &data.region.extents,
+ &data.damage);
+ if (data.bo == NULL)
+ goto fallback;
+
if (gc_is_solid(gc, &color)) {
DBG(("%s: trying solid fill [%08x]\n",
__FUNCTION__, (unsigned)color));
-
if (data.flags & RECTILINEAR) {
- data.bo = sna_drawable_use_bo(drawable, PREFER_GPU,
- &data.region.extents,
- &data.damage);
- if (data.bo &&
- sna_poly_line_blt(drawable,
+ if (sna_poly_line_blt(drawable,
data.bo, data.damage,
gc, color, mode, n, pt,
&data.region.extents,
data.flags & IS_CLIPPED))
return;
} else { /* !rectilinear */
- if ((data.bo = sna_drawable_use_bo(drawable,
- use_zero_spans(drawable, gc, &data.region.extents),
- &data.region.extents,
- &data.damage)) &&
- sna_poly_zero_line_blt(drawable,
+ if (sna_poly_zero_line_blt(drawable,
data.bo, data.damage,
gc, mode, n, pt,
&data.region.extents,
@@ -9763,80 +9740,76 @@ sna_poly_line(DrawablePtr drawable, GCPtr gc,
}
} else if (data.flags & RECTILINEAR) {
/* Try converting these to a set of rectangles instead */
- data.bo = sna_drawable_use_bo(drawable, PREFER_GPU,
- &data.region.extents, &data.damage);
- if (data.bo) {
- DDXPointRec p1, p2;
- xRectangle *rect;
- int i;
-
- DBG(("%s: converting to rectagnles\n", __FUNCTION__));
+ DDXPointRec p1, p2;
+ xRectangle *rect;
+ int i;
- rect = malloc (n * sizeof (xRectangle));
- if (rect == NULL)
- return;
+ DBG(("%s: converting to rectagnles\n", __FUNCTION__));
- p1 = pt[0];
- for (i = 1; i < n; i++) {
- if (mode == CoordModePrevious) {
- p2.x = p1.x + pt[i].x;
- p2.y = p1.y + pt[i].y;
- } else
- p2 = pt[i];
- if (p1.x < p2.x) {
- rect[i].x = p1.x;
- rect[i].width = p2.x - p1.x + 1;
- } else if (p1.x > p2.x) {
- rect[i].x = p2.x;
- rect[i].width = p1.x - p2.x + 1;
- } else {
- rect[i].x = p1.x;
- rect[i].width = 1;
- }
- if (p1.y < p2.y) {
- rect[i].y = p1.y;
- rect[i].height = p2.y - p1.y + 1;
- } else if (p1.y > p2.y) {
- rect[i].y = p2.y;
- rect[i].height = p1.y - p2.y + 1;
- } else {
- rect[i].y = p1.y;
- rect[i].height = 1;
- }
+ rect = malloc (n * sizeof (xRectangle));
+ if (rect == NULL)
+ return;
- /* don't paint last pixel */
- if (gc->capStyle == CapNotLast) {
- if (p1.x == p2.x)
- rect[i].height--;
- else
- rect[i].width--;
- }
- p1 = p2;
+ p1 = pt[0];
+ for (i = 1; i < n; i++) {
+ if (mode == CoordModePrevious) {
+ p2.x = p1.x + pt[i].x;
+ p2.y = p1.y + pt[i].y;
+ } else
+ p2 = pt[i];
+ if (p1.x < p2.x) {
+ rect[i].x = p1.x;
+ rect[i].width = p2.x - p1.x + 1;
+ } else if (p1.x > p2.x) {
+ rect[i].x = p2.x;
+ rect[i].width = p1.x - p2.x + 1;
+ } else {
+ rect[i].x = p1.x;
+ rect[i].width = 1;
}
-
- if (gc->fillStyle == FillTiled) {
- i = sna_poly_fill_rect_tiled_blt(drawable,
- data.bo, data.damage,
- gc, n - 1, rect + 1,
- &data.region.extents,
- data.flags & IS_CLIPPED);
+ if (p1.y < p2.y) {
+ rect[i].y = p1.y;
+ rect[i].height = p2.y - p1.y + 1;
+ } else if (p1.y > p2.y) {
+ rect[i].y = p2.y;
+ rect[i].height = p1.y - p2.y + 1;
} else {
- i = sna_poly_fill_rect_stippled_blt(drawable,
- data.bo, data.damage,
- gc, n - 1, rect + 1,
- &data.region.extents,
- data.flags & IS_CLIPPED);
+ rect[i].y = p1.y;
+ rect[i].height = 1;
}
- free (rect);
- if (i)
- return;
+ /* don't paint last pixel */
+ if (gc->capStyle == CapNotLast) {
+ if (p1.x == p2.x)
+ rect[i].height--;
+ else
+ rect[i].width--;
+ }
+ p1 = p2;
}
+
+ if (gc->fillStyle == FillTiled) {
+ i = sna_poly_fill_rect_tiled_blt(drawable,
+ data.bo, data.damage,
+ gc, n - 1, rect + 1,
+ &data.region.extents,
+ data.flags & IS_CLIPPED);
+ } else {
+ i = sna_poly_fill_rect_stippled_blt(drawable,
+ data.bo, data.damage,
+ gc, n - 1, rect + 1,
+ &data.region.extents,
+ data.flags & IS_CLIPPED);
+ }
+ free (rect);
+
+ if (i)
+ return;
}
spans_fallback:
if ((data.bo = sna_drawable_use_bo(drawable,
- use_wide_spans(drawable, gc, &data.region.extents),
+ use_line_spans(drawable, gc, &data.region.extents, data.flags),
&data.region.extents, &data.damage))) {
DBG(("%s: converting line into spans\n", __FUNCTION__));
get_drawable_deltas(drawable, data.pixmap, &data.dx, &data.dy);
@@ -10645,26 +10618,26 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
if (gc->lineStyle != LineSolid || gc->lineWidth > 1)
goto spans_fallback;
+
+ data.bo = sna_drawable_use_bo(drawable, PREFER_GPU,
+ &data.region.extents,
+ &data.damage);
+ if (data.bo == NULL)
+ goto fallback;
+
if (gc_is_solid(gc, &color)) {
DBG(("%s: trying blt solid fill [%08x, flags=%x] paths\n",
__FUNCTION__, (unsigned)color, data.flags));
if (data.flags & RECTILINEAR) {
- if ((data.bo = sna_drawable_use_bo(drawable, PREFER_GPU,
- &data.region.extents,
- &data.damage)) &&
- sna_poly_segment_blt(drawable,
+ if (sna_poly_segment_blt(drawable,
data.bo, data.damage,
gc, color, n, seg,
&data.region.extents,
data.flags & IS_CLIPPED))
return;
} else {
- if ((data.bo = sna_drawable_use_bo(drawable,
- use_zero_spans(drawable, gc, &data.region.extents),
- &data.region.extents,
- &data.damage)) &&
- sna_poly_zero_segment_blt(drawable,
+ if (sna_poly_zero_segment_blt(drawable,
data.bo, data.damage,
gc, n, seg,
&data.region.extents,
@@ -10676,13 +10649,7 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
xRectangle *rect;
int i;
- data.bo = sna_drawable_use_bo(drawable, PREFER_GPU,
- &data.region.extents,
- &data.damage);
- if (data.bo == NULL)
- goto fallback;
-
- DBG(("%s: converting to rectagnles\n", __FUNCTION__));
+ DBG(("%s: converting to rectangles\n", __FUNCTION__));
rect = malloc (n * sizeof (xRectangle));
if (rect == NULL)
@@ -10740,7 +10707,7 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
spans_fallback:
if ((data.bo = sna_drawable_use_bo(drawable,
- use_wide_spans(drawable, gc, &data.region.extents),
+ use_line_spans(drawable, gc, &data.region.extents, data.flags),
&data.region.extents,
&data.damage))) {
void (*line)(DrawablePtr, GCPtr, int, int, DDXPointPtr);
@@ -11564,8 +11531,7 @@ sna_poly_arc(DrawablePtr drawable, GCPtr gc, int n, xArc *arc)
if (!PM_IS_SOLID(drawable, gc->planemask))
goto fallback;
- if ((data.bo = sna_drawable_use_bo(drawable,
- use_wide_spans(drawable, gc, &data.region.extents),
+ if ((data.bo = sna_drawable_use_bo(drawable, PREFER_GPU,
&data.region.extents, &data.damage))) {
uint32_t color;
@@ -11883,6 +11849,29 @@ get_pixel(PixmapPtr pixmap)
}
}
+inline static int
+_use_fill_spans(DrawablePtr drawable, GCPtr gc, const BoxRec *extents, unsigned flags)
+{
+ if (USE_SPANS)
+ return USE_SPANS > 0;
+
+ if (gc->fillStyle == FillTiled && !gc->tileIsPixel &&
+ sna_pixmap_is_gpu(gc->tile.pixmap)) {
+ DBG(("%s: source is already on the gpu\n", __FUNCTION__));
+ return PREFER_GPU | FORCE_GPU;
+ }
+
+ return PREFER_GPU;
+}
+
+static int
+use_fill_spans(DrawablePtr drawable, GCPtr gc, const BoxRec *extents, unsigned flags)
+{
+ int ret = _use_fill_spans(drawable, gc, extents, flags);
+ DBG(("%s? %d\n", __FUNCTION__, ret));
+ return ret;
+}
+
static void
sna_poly_fill_polygon(DrawablePtr draw, GCPtr gc,
int shape, int mode,
@@ -11938,7 +11927,7 @@ sna_poly_fill_polygon(DrawablePtr draw, GCPtr gc,
goto fallback;
if ((data.bo = sna_drawable_use_bo(draw,
- (shape == Convex ? use_zero_spans : use_wide_spans)(draw, gc, &data.region.extents),
+ use_fill_spans(draw, gc, &data.region.extents, data.flags),
&data.region.extents,
&data.damage))) {
uint32_t color;
@@ -14999,7 +14988,8 @@ sna_poly_fill_arc(DrawablePtr draw, GCPtr gc, int n, xArc *arc)
if (!PM_IS_SOLID(draw, gc->planemask))
goto fallback;
- if ((data.bo = sna_drawable_use_bo(draw, PREFER_GPU,
+ if ((data.bo = sna_drawable_use_bo(draw,
+ use_fill_spans(draw, gc, &data.region.extents, data.flags),
&data.region.extents,
&data.damage))) {
uint32_t color;
commit 316155db98aac4d5d0a7077e86453e4d41a3029d
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sat Oct 18 10:58:03 2014 +0100
sna/gen6: Apply gen7 flushing
Clemens Eisserer noticed that glyphs would randomly disapper whilst
being rendered on his Sandybridge, a sign that the GPU is not flushing
its internal state on pipeline changes. As a precaution, adopt the
Ivybridge flush semantics (whilst preserving the required Sandybridge
workarounds).
Reported-and-tested-by: Clemens Eisserer <linuxhippy at gmail.com>
Reported-and-tested-by: Ilia Mirkin <imirkin at alum.mit.edu>
References: https://bugs.freedesktop.org/show_bug.cgi?id=85158
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index a05196a..95eb415 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -47,6 +47,10 @@
#include "gen4_source.h"
#include "gen4_vertex.h"
+#define ALWAYS_INVALIDATE 0
+#define ALWAYS_FLUSH 0
+#define ALWAYS_STALL 0
+
#define NO_COMPOSITE 0
#define NO_COMPOSITE_SPANS 0
#define NO_COPY 0
@@ -397,6 +401,42 @@ gen6_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
return base + !is_affine;
}
+inline static void
+gen6_emit_pipe_invalidate(struct sna *sna)
+{
+ OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
+ OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH |
+ GEN6_PIPE_CONTROL_TC_FLUSH |
+ GEN6_PIPE_CONTROL_CS_STALL);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+}
+
+inline static void
+gen6_emit_pipe_flush(struct sna *sna, bool need_stall)
+{
+ unsigned stall;
+
+ stall = 0;
+ if (need_stall)
+ stall = GEN6_PIPE_CONTROL_CS_STALL;
+
+ OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
+ OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH | stall);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+}
+
+inline static void
+gen6_emit_pipe_stall(struct sna *sna)
+{
+ OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
+ OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL |
+ GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+}
+
static void
gen6_emit_urb(struct sna *sna)
{
@@ -547,13 +587,13 @@ gen6_emit_invariant(struct sna *sna)
sna->render_state.gen6.needs_invariant = false;
}
-static bool
+static void
gen6_emit_cc(struct sna *sna, int blend)
{
struct gen6_render_state *render = &sna->render_state.gen6;
if (render->blend == blend)
- return blend != NO_BLEND;
+ return;
DBG(("%s: blend = %x\n", __FUNCTION__, blend));
@@ -568,7 +608,6 @@ gen6_emit_cc(struct sna *sna, int blend)
}
render->blend = blend;
- return blend != NO_BLEND;
}
static void
@@ -692,7 +731,7 @@ gen6_emit_drawing_rectangle(struct sna *sna,
if (sna->render_state.gen6.drawrect_limit == limit &&
sna->render_state.gen6.drawrect_offset == offset)
- return false;
+ return true;
/* [DevSNB-C+{W/A}] Before any depth stall flush (including those
* produced by non-pipelined state commands), software needs to first
@@ -703,13 +742,8 @@ gen6_emit_drawing_rectangle(struct sna *sna,
* BEFORE the pipe-control with a post-sync op and no write-cache
* flushes.
*/
- if (!sna->render_state.gen6.first_state_packet) {
- OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
- OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL |
- GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD);
- OUT_BATCH(0);
- OUT_BATCH(0);
- }
+ if (!sna->render_state.gen6.first_state_packet)
+ gen6_emit_pipe_stall(sna);
OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN6_PIPE_CONTROL_WRITE_TIME);
@@ -729,7 +763,7 @@ gen6_emit_drawing_rectangle(struct sna *sna,
sna->render_state.gen6.drawrect_offset = offset;
sna->render_state.gen6.drawrect_limit = limit;
- return true;
+ return false;
}
static void
@@ -853,51 +887,56 @@ gen6_emit_vertex_elements(struct sna *sna,
}
static void
-gen6_emit_flush(struct sna *sna)
-{
- OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
- OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH |
- GEN6_PIPE_CONTROL_TC_FLUSH |
- GEN6_PIPE_CONTROL_CS_STALL);
- OUT_BATCH(0);
- OUT_BATCH(0);
-}
-
-static void
gen6_emit_state(struct sna *sna,
const struct sna_composite_op *op,
uint16_t wm_binding_table)
{
- bool need_flush, need_stall;
+ bool need_invalidate;
+ bool need_flush;
+ bool need_stall;
assert(op->dst.bo->exec);
- need_stall = wm_binding_table & 1;
- need_flush = false;
- if (gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)))
- need_flush = need_stall;
- gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags));
- gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2);
- gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2);
- gen6_emit_vertex_elements(sna, op);
+ need_flush = wm_binding_table & 1;
+ if (ALWAYS_FLUSH)
+ need_flush = true;
- need_stall |= gen6_emit_binding_table(sna, wm_binding_table & ~1);
- if (gen6_emit_drawing_rectangle(sna, op))
- need_stall = false;
- if (need_flush || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
- gen6_emit_flush(sna);
+ wm_binding_table &= ~1;
+ need_stall = sna->render_state.gen6.surface_table != wm_binding_table;
+ if (ALWAYS_STALL)
+ need_stall = true;
+
+ need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo);
+ if (ALWAYS_INVALIDATE)
+ need_invalidate = true;
+
+ if (need_invalidate) {
+ gen6_emit_pipe_invalidate(sna);
kgem_clear_dirty(&sna->kgem);
assert(op->dst.bo->exec);
kgem_bo_mark_dirty(op->dst.bo);
+
+ need_flush = false;
need_stall = false;
+ sna->render_state.gen6.first_state_packet = true;
}
- if (need_stall) {
- OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
- OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL |
- GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD);
- OUT_BATCH(0);
- OUT_BATCH(0);
+ if (need_flush) {
+ gen6_emit_pipe_flush(sna, need_stall);
+ need_stall = false;
+ sna->render_state.gen6.first_state_packet = true;
}
+
+ need_stall &= gen6_emit_drawing_rectangle(sna, op);
+ if (need_stall)
+ gen6_emit_pipe_stall(sna);
+
+ gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags));
+ gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags));
+ gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2);
+ gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2);
+ gen6_emit_vertex_elements(sna, op);
+ gen6_emit_binding_table(sna, wm_binding_table);
+
sna->render_state.gen6.first_state_packet = false;
}
@@ -912,7 +951,7 @@ static bool gen6_magic_ca_pass(struct sna *sna,
DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
sna->render.vertex_start, sna->render.vertex_index));
- gen6_emit_flush(sna);
+ gen6_emit_pipe_stall(sna);
gen6_emit_cc(sna, gen6_get_blend(PictOpAdd, true, op->dst.format));
gen6_emit_wm(sna,
@@ -1176,7 +1215,7 @@ static int gen6_get_rectangles__flush(struct sna *sna,
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
if (gen6_magic_ca_pass(sna, op)) {
- gen6_emit_flush(sna);
+ gen6_emit_pipe_stall(sna);
gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags));
gen6_emit_wm(sna,
GEN6_KERNEL(op->u.gen6.flags),
More information about the xorg-commit
mailing list