xf86-video-intel: 4 commits - src/sna/gen4_render.c
Chris Wilson
ickle at kemper.freedesktop.org
Fri Nov 23 05:59:08 PST 2012
src/sna/gen4_render.c | 256 ++++++++++++++++++++++++--------------------------
1 file changed, 123 insertions(+), 133 deletions(-)
New commits:
commit 43aff6708fe97aa2fae0e30f98fc7cd9d7311b75
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Nov 23 13:37:44 2012 +0000
sna/gen4: Update render fill routines to use the dummy mask
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index 547676d..e8ad253 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -2714,8 +2714,7 @@ gen4_render_composite_spans(struct sna *sna,
break;
}
- tmp->base.mask.bo = sna_render_get_solid(sna, 0);
- if (tmp->base.mask.bo == NULL)
+ if (!gen4_composite_solid_init(sna, &tmp->base.mask, 0))
goto cleanup_src;
tmp->base.is_affine = tmp->base.src.is_affine;
@@ -3096,57 +3095,29 @@ fallback:
}
static void
-gen4_fill_bind_surfaces(struct sna *sna, const struct sna_composite_op *op)
-{
- bool dirty = kgem_bo_is_dirty(op->dst.bo);
- uint32_t *binding_table;
- uint16_t offset;
-
- gen4_get_batch(sna);
-
- binding_table = gen4_composite_get_binding_table(sna, &offset);
-
- binding_table[0] =
- gen4_bind_bo(sna,
- op->dst.bo, op->dst.width, op->dst.height,
- gen4_get_dest_format(op->dst.format),
- true);
- binding_table[1] =
- gen4_bind_bo(sna,
- op->src.bo, 1, 1,
- GEN4_SURFACEFORMAT_B8G8R8A8_UNORM,
- false);
-
- if (sna->kgem.surface == offset &&
- *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table) {
- sna->kgem.surface +=
- sizeof(struct gen4_surface_state_padded)/sizeof(uint32_t);
- offset = sna->render_state.gen4.surface_table;
- }
-
- gen4_emit_state(sna, op, offset | dirty);
-}
-
-static void
gen4_render_fill_rectangle(struct sna *sna,
const struct sna_composite_op *op,
int x, int y, int w, int h)
{
- gen4_get_rectangles(sna, op, 1, gen4_fill_bind_surfaces);
+ gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
OUT_VERTEX(x+w, y+h);
OUT_VERTEX_F(1);
OUT_VERTEX_F(1);
+ OUT_VERTEX_F(1);
+ OUT_VERTEX_F(1);
OUT_VERTEX(x, y+h);
OUT_VERTEX_F(0);
OUT_VERTEX_F(1);
+ OUT_VERTEX_F(0);
+ OUT_VERTEX_F(1);
OUT_VERTEX(x, y);
OUT_VERTEX_F(0);
OUT_VERTEX_F(0);
-
- _FLUSH();
+ OUT_VERTEX_F(0);
+ OUT_VERTEX_F(0);
}
static bool
@@ -3220,22 +3191,21 @@ gen4_render_fill_boxes(struct sna *sna,
tmp.dst.format = format;
tmp.dst.bo = dst_bo;
- tmp.src.bo = sna_render_get_solid(sna, pixel);
- tmp.src.filter = SAMPLER_FILTER_NEAREST;
- tmp.src.repeat = SAMPLER_EXTEND_REPEAT;
+ gen4_composite_solid_init(sna, &tmp.src, pixel);
+ gen4_composite_solid_init(sna, &tmp.mask, 0);
tmp.is_affine = true;
- tmp.floats_per_vertex = 3;
- tmp.floats_per_rect = 9;
+ tmp.floats_per_vertex = 5;
+ tmp.floats_per_rect = 15;
tmp.u.gen4.wm_kernel = WM_KERNEL;
- tmp.u.gen4.ve_id = 1;
+ tmp.u.gen4.ve_id = 1 | 1 << 1;
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
}
- gen4_fill_bind_surfaces(sna, &tmp);
+ gen4_bind_surfaces(sna, &tmp);
gen4_align_vertex(sna, &tmp);
do {
@@ -3248,6 +3218,7 @@ gen4_render_fill_boxes(struct sna *sna,
gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
+ kgem_bo_destroy(&sna->kgem, tmp.mask.bo);
return true;
}
@@ -3287,6 +3258,7 @@ gen4_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
{
gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, op->base.src.bo);
+ kgem_bo_destroy(&sna->kgem, op->base.mask.bo);
}
static bool
@@ -3324,29 +3296,23 @@ gen4_render_fill(struct sna *sna, uint8_t alu,
op->base.need_magic_ca_pass = 0;
op->base.has_component_alpha = 0;
- op->base.src.bo =
- sna_render_get_solid(sna,
- sna_rgba_for_color(color,
- dst->drawable.depth));
- op->base.src.filter = SAMPLER_FILTER_NEAREST;
- op->base.src.repeat = SAMPLER_EXTEND_REPEAT;
-
- op->base.mask.bo = NULL;
- op->base.mask.filter = SAMPLER_FILTER_NEAREST;
- op->base.mask.repeat = SAMPLER_EXTEND_NONE;
+ gen4_composite_solid_init(sna, &op->base.src,
+ sna_rgba_for_color(color,
+ dst->drawable.depth));
+ gen4_composite_solid_init(sna, &op->base.mask, 0);
op->base.is_affine = true;
- op->base.floats_per_vertex = 3;
- op->base.floats_per_rect = 9;
+ op->base.floats_per_vertex = 5;
+ op->base.floats_per_rect = 15;
op->base.u.gen4.wm_kernel = WM_KERNEL;
- op->base.u.gen4.ve_id = 1;
+ op->base.u.gen4.ve_id = 1 | 1 << 1;
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
}
- gen4_fill_bind_surfaces(sna, &op->base);
+ gen4_bind_surfaces(sna, &op->base);
gen4_align_vertex(sna, &op->base);
op->blt = gen4_render_fill_op_blt;
@@ -3406,38 +3372,33 @@ gen4_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
tmp.dst.bo = bo;
tmp.dst.x = tmp.dst.y = 0;
- tmp.src.bo =
- sna_render_get_solid(sna,
- sna_rgba_for_color(color,
- dst->drawable.depth));
- tmp.src.filter = SAMPLER_FILTER_NEAREST;
- tmp.src.repeat = SAMPLER_EXTEND_REPEAT;
-
- tmp.mask.bo = NULL;
- tmp.mask.filter = SAMPLER_FILTER_NEAREST;
- tmp.mask.repeat = SAMPLER_EXTEND_NONE;
+ gen4_composite_solid_init(sna, &tmp.src,
+ sna_rgba_for_color(color,
+ dst->drawable.depth));
+ gen4_composite_solid_init(sna, &tmp.mask, 0);
tmp.is_affine = true;
- tmp.floats_per_vertex = 3;
- tmp.floats_per_rect = 9;
+ tmp.floats_per_vertex = 5;
+ tmp.floats_per_rect = 15;
tmp.has_component_alpha = 0;
tmp.need_magic_ca_pass = false;
tmp.u.gen4.wm_kernel = WM_KERNEL;
- tmp.u.gen4.ve_id = 1;
+ tmp.u.gen4.ve_id = 1 | 1 << 1;
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
_kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, bo, NULL));
}
- gen4_fill_bind_surfaces(sna, &tmp);
+ gen4_bind_surfaces(sna, &tmp);
gen4_align_vertex(sna, &tmp);
gen4_render_fill_rectangle(sna, &tmp, x1, y1, x2 - x1, y2 - y1);
gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
+ kgem_bo_destroy(&sna->kgem, tmp.mask.bo);
return true;
}
commit 352910712266202ef017066891ec383fd037fc4a
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Nov 23 13:29:01 2012 +0000
sna/gen4: Move the flush from inside the spans to emit_state()
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index cb7e79c..547676d 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -1386,14 +1386,14 @@ gen4_emit_state(struct sna *sna,
const struct sna_composite_op *op,
uint16_t wm_binding_table)
{
- bool flush = false;
+ bool flush = wm_binding_table & 1;
gen4_emit_drawing_rectangle(sna, op);
- flush |= gen4_emit_binding_table(sna, wm_binding_table);
+ flush |= gen4_emit_binding_table(sna, wm_binding_table & ~1);
flush |= gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel);
gen4_emit_vertex_elements(sna, op);
- if (flush || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
+ if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
DBG(("%s: flushing dirty (%d, %d), forced? %d\n", __FUNCTION__,
kgem_bo_is_dirty(op->src.bo),
kgem_bo_is_dirty(op->mask.bo),
@@ -1401,13 +1401,17 @@ gen4_emit_state(struct sna *sna,
OUT_BATCH(MI_FLUSH);
kgem_clear_dirty(&sna->kgem);
kgem_bo_mark_dirty(op->dst.bo);
+ flush = false;
}
+ if (flush)
+ OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
}
static void
gen4_bind_surfaces(struct sna *sna,
const struct sna_composite_op *op)
{
+ bool dirty = kgem_bo_is_dirty(op->dst.bo);
uint32_t *binding_table;
uint16_t offset;
@@ -1442,7 +1446,7 @@ gen4_bind_surfaces(struct sna *sna,
offset = sna->render_state.gen4.surface_table;
}
- gen4_emit_state(sna, op, offset);
+ gen4_emit_state(sna, op, offset | dirty);
}
fastcall static void
@@ -1558,6 +1562,7 @@ static uint32_t gen4_bind_video_source(struct sna *sna,
static void gen4_video_bind_surfaces(struct sna *sna,
const struct sna_composite_op *op)
{
+ bool dirty = kgem_bo_is_dirty(op->dst.bo);
struct sna_video_frame *frame = op->priv;
uint32_t src_surf_format;
uint32_t src_surf_base[6];
@@ -1619,7 +1624,7 @@ static void gen4_video_bind_surfaces(struct sna *sna,
src_surf_format);
}
- gen4_emit_state(sna, op, offset);
+ gen4_emit_state(sna, op, offset | dirty);
}
static bool
@@ -2588,8 +2593,6 @@ gen4_render_composite_spans_box(struct sna *sna,
gen4_get_rectangles(sna, &op->base, 1, gen4_bind_surfaces);
op->prim_emit(sna, op, box, opacity);
-
- _FLUSH();
}
static void
@@ -2766,6 +2769,7 @@ cleanup_dst:
static void
gen4_copy_bind_surfaces(struct sna *sna, const struct sna_composite_op *op)
{
+ bool dirty = kgem_bo_is_dirty(op->dst.bo);
uint32_t *binding_table;
uint16_t offset;
@@ -2790,7 +2794,7 @@ gen4_copy_bind_surfaces(struct sna *sna, const struct sna_composite_op *op)
offset = sna->render_state.gen4.surface_table;
}
- gen4_emit_state(sna, op, offset);
+ gen4_emit_state(sna, op, offset | dirty);
}
static void
@@ -3094,6 +3098,7 @@ fallback:
static void
gen4_fill_bind_surfaces(struct sna *sna, const struct sna_composite_op *op)
{
+ bool dirty = kgem_bo_is_dirty(op->dst.bo);
uint32_t *binding_table;
uint16_t offset;
@@ -3119,7 +3124,7 @@ gen4_fill_bind_surfaces(struct sna *sna, const struct sna_composite_op *op)
offset = sna->render_state.gen4.surface_table;
}
- gen4_emit_state(sna, op, offset);
+ gen4_emit_state(sna, op, offset | dirty);
}
static void
commit 217f3e835b99002669999f818afa0d5c3a1cc852
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Nov 23 12:32:14 2012 +0000
sna/gen4: Backport the more efficient composite box emitters
Now that we aren't flushing after every single rectangle, we can strive
for a little efficiency.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index 35425c2..cb7e79c 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -1468,13 +1468,19 @@ gen4_render_composite_box(struct sna *sna,
{
struct sna_composite_rectangles r;
+ DBG((" %s: (%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1, box->x2, box->y2));
+
+ gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
+
r.dst.x = box->x1;
r.dst.y = box->y1;
r.width = box->x2 - box->x1;
r.height = box->y2 - box->y1;
r.mask = r.src = r.dst;
- gen4_render_composite_blt(sna, op, &r);
+ op->prim_emit(sna, op, &r);
}
static void
@@ -1490,16 +1496,28 @@ gen4_render_composite_boxes(struct sna *sna,
op->mask.width, op->mask.height));
do {
- struct sna_composite_rectangles r;
-
- r.dst.x = box->x1;
- r.dst.y = box->y1;
- r.width = box->x2 - box->x1;
- r.height = box->y2 - box->y1;
- r.mask = r.src = r.dst;
- gen4_render_composite_blt(sna, op, &r);
- box++;
- } while (--nbox);
+ int nbox_this_time;
+
+ nbox_this_time = gen4_get_rectangles(sna, op, nbox,
+ gen4_bind_surfaces);
+ nbox -= nbox_this_time;
+
+ do {
+ struct sna_composite_rectangles r;
+
+ DBG((" %s: (%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1, box->x2, box->y2));
+
+ r.dst.x = box->x1;
+ r.dst.y = box->y1;
+ r.width = box->x2 - box->x1;
+ r.height = box->y2 - box->y1;
+ r.mask = r.src = r.dst;
+ op->prim_emit(sna, op, &r);
+ box++;
+ } while (--nbox_this_time);
+ } while (nbox);
}
#ifndef MAX
commit d3145e3f8146e7d864d32aec49c44c04d619e56a
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Nov 23 12:28:21 2012 +0000
sna/gen4: Use a dummy white mask to avoid the flush w/a when compositing
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index f4ddb82..35425c2 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -65,12 +65,8 @@
gen4_vertex_flush(sna); \
OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); \
} while (0)
-#define FLUSH(OP) do { \
- if ((OP)->mask.bo == NULL) _FLUSH(); \
-} while (0)
#else
#define _FLUSH()
-#define FLUSH(OP)
#endif
#define GEN4_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
@@ -706,23 +702,23 @@ gen4_emit_composite_primitive_solid(struct sna *sna,
} dst;
v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 9;
+ sna->render.vertex_used += 15;
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
- v[1] = 1.;
- v[2] = 1.;
+ v[3] = v[1] = 1.;
+ v[4] = v[2] = 1.;
dst.p.x = r->dst.x;
- v[3] = dst.f;
- v[4] = 0.;
- v[5] = 1.;
+ v[5] = dst.f;
+ v[8] = v[6] = 0.;
+ v[9] = v[7] = 1.;
dst.p.y = r->dst.y;
- v[6] = dst.f;
- v[7] = 0.;
- v[8] = 0.;
+ v[10] = dst.f;
+ v[13] = v[11] = 0.;
+ v[14] = v[12] = 0.;
}
fastcall static void
@@ -738,7 +734,7 @@ gen4_emit_composite_primitive_identity_source(struct sna *sna,
} dst;
v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 9;
+ sna->render.vertex_used += 15;
sx = r->src.x + op->src.offset[0];
sy = r->src.y + op->src.offset[1];
@@ -748,16 +744,22 @@ gen4_emit_composite_primitive_identity_source(struct sna *sna,
v[0] = dst.f;
v[1] = (sx + r->width) * sf[0];
v[2] = (sy + r->height) * sf[1];
+ v[3] = 1.;
+ v[4] = 1.;
dst.p.x = r->dst.x;
- v[3] = dst.f;
- v[4] = sx * sf[0];
- v[5] = v[2];
+ v[5] = dst.f;
+ v[6] = sx * sf[0];
+ v[7] = v[2];
+ v[8] = 0.;
+ v[9] = 1.;
dst.p.y = r->dst.y;
- v[6] = dst.f;
- v[7] = v[4];
- v[8] = sy * sf[1];
+ v[10] = dst.f;
+ v[11] = v[6];
+ v[12] = sy * sf[1];
+ v[13] = 0.;
+ v[14] = 0.;
}
fastcall static void
@@ -772,7 +774,7 @@ gen4_emit_composite_primitive_affine_source(struct sna *sna,
float *v;
v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 9;
+ sna->render.vertex_used += 15;
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
@@ -783,24 +785,30 @@ gen4_emit_composite_primitive_affine_source(struct sna *sna,
&v[1], &v[2]);
v[1] *= op->src.scale[0];
v[2] *= op->src.scale[1];
+ v[3] = 1.;
+ v[4] = 1.;
dst.p.x = r->dst.x;
- v[3] = dst.f;
+ v[5] = dst.f;
_sna_get_transformed_coordinates(op->src.offset[0] + r->src.x,
op->src.offset[1] + r->src.y + r->height,
op->src.transform,
- &v[4], &v[5]);
- v[4] *= op->src.scale[0];
- v[5] *= op->src.scale[1];
+ &v[6], &v[7]);
+ v[6] *= op->src.scale[0];
+ v[7] *= op->src.scale[1];
+ v[8] = 0.;
+ v[9] = 1.;
dst.p.y = r->dst.y;
- v[6] = dst.f;
+ v[10] = dst.f;
_sna_get_transformed_coordinates(op->src.offset[0] + r->src.x,
op->src.offset[1] + r->src.y,
op->src.transform,
- &v[7], &v[8]);
- v[7] *= op->src.scale[0];
- v[8] *= op->src.scale[1];
+ &v[11], &v[12]);
+ v[11] *= op->src.scale[0];
+ v[12] *= op->src.scale[1];
+ v[13] = 0.;
+ v[14] = 0.;
}
fastcall static void
@@ -1312,19 +1320,17 @@ gen4_emit_vertex_elements(struct sna *sna,
* texture coordinate 1 if (has_mask is true): same as above
*/
struct gen4_render_state *render = &sna->render_state.gen4;
- bool has_mask = op->mask.bo != NULL;
- int nelem = has_mask ? 2 : 1;
- int selem;
+ int id = op->u.gen4.ve_id;
+ int selem, nelem;
uint32_t w_component;
uint32_t src_format;
- int id = op->u.gen4.ve_id;
if (render->ve_id == id)
return;
render->ve_id = id;
- if (op->is_affine) {
+ if (id & 1) {
src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
w_component = GEN4_VFCOMPONENT_STORE_1_FLT;
selem = 2;
@@ -1333,6 +1339,7 @@ gen4_emit_vertex_elements(struct sna *sna,
w_component = GEN4_VFCOMPONENT_STORE_SRC;
selem = 3;
}
+ nelem = id & 2 ? 2 : 1;
/* The VUE layout
* dword 0-3: position (x, y, 1.0, 1.0),
@@ -1362,7 +1369,7 @@ gen4_emit_vertex_elements(struct sna *sna,
(2*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); /* VUE offset in dwords */
/* u1, v1, w1 */
- if (has_mask) {
+ if (id & 2) {
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
src_format << VE0_FORMAT_SHIFT |
((1 + selem) * 4) << VE0_OFFSET_SHIFT); /* vb offset in bytes */
@@ -1452,9 +1459,6 @@ gen4_render_composite_blt(struct sna *sna,
gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
op->prim_emit(sna, op, r);
-
- /* XXX are the shaders fubar? */
- FLUSH(op);
}
fastcall static void
@@ -2379,25 +2383,27 @@ gen4_render_composite(struct sna *sna,
if (tmp->src.transform == NULL && tmp->mask.transform == NULL)
tmp->prim_emit = gen4_emit_composite_primitive_identity_source_mask;
- tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine;
} else {
+ /* Use a dummy mask to w/a the flushing issues */
+ if (!gen4_composite_solid_init(sna, &tmp->mask, 0))
+ goto cleanup_src;
+
if (tmp->src.is_solid)
tmp->prim_emit = gen4_emit_composite_primitive_solid;
else if (tmp->src.transform == NULL)
tmp->prim_emit = gen4_emit_composite_primitive_identity_source;
else if (tmp->src.is_affine)
tmp->prim_emit = gen4_emit_composite_primitive_affine_source;
-
- tmp->floats_per_vertex = 3 + !tmp->is_affine;
}
+ tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine;
tmp->floats_per_rect = 3*tmp->floats_per_vertex;
tmp->u.gen4.wm_kernel =
gen4_choose_composite_kernel(tmp->op,
- tmp->mask.bo != NULL,
+ mask != NULL,
tmp->has_component_alpha,
tmp->is_affine);
- tmp->u.gen4.ve_id = (tmp->mask.bo != NULL) << 1 | tmp->is_affine;
+ tmp->u.gen4.ve_id = 1 << 1 | tmp->is_affine;
tmp->blt = gen4_render_composite_blt;
tmp->box = gen4_render_composite_box;
More information about the xorg-commit
mailing list