xf86-video-intel: 3 commits - src/sna/brw src/sna/gen6_render.c src/sna/gen6_render.h src/sna/gen7_render.c src/sna/sna_render.h src/sna/sna_stream.c
Chris Wilson
ickle at kemper.freedesktop.org
Tue Jul 31 11:24:24 PDT 2012
src/sna/brw/brw.h | 20 ++--
src/sna/brw/brw_sf.c | 9 +
src/sna/brw/brw_wm.c | 229 +++++++++++++++++++++++++++++++-------------------
src/sna/gen6_render.c | 60 +++++++++----
src/sna/gen6_render.h | 5 -
src/sna/gen7_render.c | 4
src/sna/sna_render.h | 6 -
src/sna/sna_stream.c | 16 ++-
8 files changed, 224 insertions(+), 125 deletions(-)
New commits:
commit fd3a1236051265fab700aad689a171de47d7338f
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Tue Jul 31 10:20:51 2012 +0100
sna/gen6: Enable 8 pixel dispatch
This gives a small performance increase when operating with rectangles,
which is reasonably frequent.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index e3a103c..78baa3a 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -55,6 +55,14 @@
#define NO_RING_SWITCH 0
#define PREFER_RENDER 0
+#define USE_8_PIXEL_DISPATCH 1
+#define USE_16_PIXEL_DISPATCH 1
+#define USE_32_PIXEL_DISPATCH 0
+
+#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH
+#error "Must select at least 8, 16 or 32 pixel dispatch"
+#endif
+
#define GEN6_MAX_SIZE 8192
struct gt_info {
@@ -612,29 +620,36 @@ gen6_emit_sf(struct sna *sna, bool has_mask)
static void
gen6_emit_wm(struct sna *sna, unsigned int kernel)
{
+ const uint32_t *kernels;
+
if (sna->render_state.gen6.kernel == kernel)
return;
sna->render_state.gen6.kernel = kernel;
+ kernels = sna->render_state.gen6.wm_kernel[kernel];
- DBG(("%s: switching to %s, num_surfaces=%d\n",
+ DBG(("%s: switching to %s, num_surfaces=%d (8-pixel? %d, 16-pixel? %d,32-pixel? %d)\n",
__FUNCTION__,
- wm_kernels[kernel].name,
- wm_kernels[kernel].num_surfaces));
+ wm_kernels[kernel].name, wm_kernels[kernel].num_surfaces,
+ kernels[0], kernels[1], kernels[2]));
OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2));
- OUT_BATCH(sna->render_state.gen6.wm_kernel[kernel]);
+ OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]);
OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT |
wm_kernels[kernel].num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
- OUT_BATCH(0);
- OUT_BATCH(6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT); /* DW4 */
+ OUT_BATCH(0); /* scratch space */
+ OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT |
+ 8 << GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT |
+ 6 << GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT);
OUT_BATCH((sna->render_state.gen6.info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT |
- GEN6_3DSTATE_WM_DISPATCH_ENABLE |
- GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
+ (kernels[0] ? GEN6_3DSTATE_WM_8_DISPATCH_ENABLE : 0) |
+ (kernels[1] ? GEN6_3DSTATE_WM_16_DISPATCH_ENABLE : 0) |
+ (kernels[2] ? GEN6_3DSTATE_WM_32_DISPATCH_ENABLE : 0) |
+ GEN6_3DSTATE_WM_DISPATCH_ENABLE);
OUT_BATCH(wm_kernels[kernel].num_inputs << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT |
GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
- OUT_BATCH(0);
- OUT_BATCH(0);
+ OUT_BATCH(kernels[2]);
+ OUT_BATCH(kernels[1]);
}
static bool
@@ -4156,18 +4171,31 @@ static bool gen6_render_setup(struct sna *sna)
for (m = 0; m < GEN6_KERNEL_COUNT; m++) {
if (wm_kernels[m].size) {
- state->wm_kernel[m] =
+ state->wm_kernel[m][1] =
sna_static_stream_add(&general,
wm_kernels[m].data,
wm_kernels[m].size,
64);
} else {
- state->wm_kernel[m] =
- sna_static_stream_compile_wm(sna, &general,
- wm_kernels[m].data,
- 16);
+ if (USE_8_PIXEL_DISPATCH) {
+ state->wm_kernel[m][0] =
+ sna_static_stream_compile_wm(sna, &general,
+ wm_kernels[m].data, 8);
+ }
+
+ if (USE_16_PIXEL_DISPATCH) {
+ state->wm_kernel[m][1] =
+ sna_static_stream_compile_wm(sna, &general,
+ wm_kernels[m].data, 16);
+ }
+
+ if (USE_32_PIXEL_DISPATCH) {
+ state->wm_kernel[m][2] =
+ sna_static_stream_compile_wm(sna, &general,
+ wm_kernels[m].data, 32);
+ }
}
- assert(state->wm_kernel[m]);
+ assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]);
}
ss = sna_static_stream_map(&general,
diff --git a/src/sna/gen6_render.h b/src/sna/gen6_render.h
index 9cc8e14..2201a62 100644
--- a/src/sna/gen6_render.h
+++ b/src/sna/gen6_render.h
@@ -97,10 +97,13 @@
# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT 27
# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
/* DW4 */
-# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT 16
+# define GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT 16
+# define GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT 8
+# define GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT 0
/* DW5 */
# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25
# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19)
+# define GEN6_3DSTATE_WM_32_DISPATCH_ENABLE (1 << 2)
# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1)
# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0)
/* DW6 */
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 6381ccf..8c64016 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -4277,7 +4277,7 @@ static bool gen7_render_setup(struct sna *sna)
wm_kernels[m].data, 32);
}
}
- assert(state->wm_kernel[m][1]);
+ assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]);
}
ss = sna_static_stream_map(&general,
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index 943c248..142f222 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -396,7 +396,7 @@ struct gen6_render_state {
uint32_t sf_state;
uint32_t sf_mask_state;
uint32_t wm_state;
- uint32_t wm_kernel[GEN6_KERNEL_COUNT];
+ uint32_t wm_kernel[GEN6_KERNEL_COUNT][3];
uint32_t cc_vp;
uint32_t cc_blend;
commit 8922b804bc9ed27957c81f7cda4812ab4ecbd4de
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Tue Jul 31 10:20:51 2012 +0100
sna/gen7: Enable 8 pixel dispatch
This gives a small performance increase when operating with rectangles,
which is reasonably frequent.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 29ee4af..6381ccf 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -54,7 +54,7 @@
#define NO_RING_SWITCH 0
-#define USE_8_PIXEL_DISPATCH 0
+#define USE_8_PIXEL_DISPATCH 1
#define USE_16_PIXEL_DISPATCH 1
#define USE_32_PIXEL_DISPATCH 0
commit 492093d04b1486dd34aafe2f109a77ddeb836f18
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Tue Jul 31 18:39:17 2012 +0100
sna: Generate shaders for SNB+ 8-pixel dispatch
Not ideal yet, sampling an alpha-only surface using SIMD8 only seems to
ever return 0...
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/brw/brw.h b/src/sna/brw/brw.h
index a39b253..f0f3ac8 100644
--- a/src/sna/brw/brw.h
+++ b/src/sna/brw/brw.h
@@ -1,14 +1,14 @@
#include "brw_eu.h"
-void brw_sf_kernel__nomask(struct brw_compile *p);
-void brw_sf_kernel__mask(struct brw_compile *p);
+bool brw_sf_kernel__nomask(struct brw_compile *p);
+bool brw_sf_kernel__mask(struct brw_compile *p);
-void brw_wm_kernel__affine(struct brw_compile *p, int dispatch_width);
-void brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch_width);
-void brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch_width);
-void brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__affine(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch_width);
-void brw_wm_kernel__projective(struct brw_compile *p, int dispatch_width);
-void brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch_width);
-void brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch_width);
-void brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__projective(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch_width);
diff --git a/src/sna/brw/brw_sf.c b/src/sna/brw/brw_sf.c
index 0c69433..6f82171 100644
--- a/src/sna/brw/brw_sf.c
+++ b/src/sna/brw/brw_sf.c
@@ -1,6 +1,6 @@
#include "brw.h"
-void brw_sf_kernel__nomask(struct brw_compile *p)
+bool brw_sf_kernel__nomask(struct brw_compile *p)
{
struct brw_reg inv, v0, v1, v2, delta;
@@ -23,10 +23,11 @@ void brw_sf_kernel__nomask(struct brw_compile *p)
brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0 ,0),
false, true, 4, 0, true, true, 0,
BRW_URB_SWIZZLE_TRANSPOSE);
+
+ return true;
}
-void
-brw_sf_kernel__mask(struct brw_compile *p)
+bool brw_sf_kernel__mask(struct brw_compile *p)
{
struct brw_reg inv, v0, v1, v2;
@@ -48,4 +49,6 @@ brw_sf_kernel__mask(struct brw_compile *p)
brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0 ,0),
false, true, 4, 0, true, true, 0,
BRW_URB_SWIZZLE_TRANSPOSE);
+
+ return true;
}
diff --git a/src/sna/brw/brw_wm.c b/src/sna/brw/brw_wm.c
index 9a8af5f..f96881a 100644
--- a/src/sna/brw/brw_wm.c
+++ b/src/sna/brw/brw_wm.c
@@ -34,7 +34,8 @@ static void brw_wm_xy(struct brw_compile *p, int dw)
brw_ADD(p, brw_vec8_grf(Y16, 0), vec8(y_uw), brw_negate(__suboffset(r1, 1)));
}
-static void brw_wm_affine_st(struct brw_compile *p, int dw, int channel, int msg)
+static void brw_wm_affine_st(struct brw_compile *p, int dw,
+ int channel, int msg)
{
int uv;
@@ -88,8 +89,8 @@ static inline struct brw_reg sample_result(int dw, int result)
WRITEMASK_XYZW);
}
-static void brw_wm_sample(struct brw_compile *p, int dw,
- int channel, int msg, int result)
+static int brw_wm_sample(struct brw_compile *p, int dw,
+ int channel, int msg, int result)
{
struct brw_reg src0;
bool header;
@@ -107,15 +108,24 @@ static void brw_wm_sample(struct brw_compile *p, int dw,
brw_SAMPLE(p, sample_result(dw, result), msg, src0,
channel+1, channel, WRITEMASK_XYZW, 0,
2*len, len+header, header, simd(dw));
+ return result;
}
-static void brw_wm_sample__alpha(struct brw_compile *p, int dw,
- int channel, int msg, int result)
+static int brw_wm_sample__alpha(struct brw_compile *p, int dw,
+ int channel, int msg, int result)
{
struct brw_reg src0;
- int len;
+ int mlen, rlen;
+
+ if (dw == 8) {
+ /* SIMD8 sample return is not masked */
+ mlen = 3;
+ rlen = 4;
+ } else {
+ mlen = 5;
+ rlen = 2;
+ }
- len = dw == 16 ? 4 : 2;
if (p->gen >= 60)
src0 = brw_message_reg(msg);
else
@@ -123,27 +133,31 @@ static void brw_wm_sample__alpha(struct brw_compile *p, int dw,
brw_SAMPLE(p, sample_result(dw, result), msg, src0,
channel+1, channel, WRITEMASK_W, 0,
- len/2, len+1, true, simd(dw));
+ rlen, mlen, true, simd(dw));
+
+ if (dw == 8)
+ result += 3;
+
+ return result;
}
-static void brw_wm_affine(struct brw_compile *p, int dw,
- int channel, int msg, int result)
+static int brw_wm_affine(struct brw_compile *p, int dw,
+ int channel, int msg, int result)
{
brw_wm_affine_st(p, dw, channel, msg);
- brw_wm_sample(p, dw, channel, msg, result);
+ return brw_wm_sample(p, dw, channel, msg, result);
}
-static void brw_wm_affine__alpha(struct brw_compile *p, int dw,
- int channel, int msg, int result)
+static int brw_wm_affine__alpha(struct brw_compile *p, int dw,
+ int channel, int msg, int result)
{
brw_wm_affine_st(p, dw, channel, msg);
- brw_wm_sample__alpha(p, dw, channel, msg, result);
+ return brw_wm_sample__alpha(p, dw, channel, msg, result);
}
static inline struct brw_reg null_result(int dw)
{
- return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
- BRW_ARF_NULL, 0,
+ return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0,
BRW_REGISTER_TYPE_UW,
dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8,
dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8,
@@ -185,8 +199,8 @@ static void brw_fb_write(struct brw_compile *p, int dw)
insn->header.compression_control = BRW_COMPRESSION_NONE;
if (p->gen >= 60) {
- src0 = brw_message_reg(2);
msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
+ src0 = brw_message_reg(2);
header = false;
} else {
insn->header.destreg__conditionalmod = 0;
@@ -206,14 +220,19 @@ static void brw_wm_write(struct brw_compile *p, int dw, int src)
{
int n;
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
-
if (dw == 8 && p->gen >= 60) {
- brw_MOV(p, brw_message_reg(2), brw_vec8_grf(src, 0));
+ /* XXX pixel execution mask? */
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ brw_MOV(p, brw_message_reg(2), brw_vec8_grf(src+0, 0));
+ brw_MOV(p, brw_message_reg(3), brw_vec8_grf(src+1, 0));
brw_MOV(p, brw_message_reg(4), brw_vec8_grf(src+2, 0));
+ brw_MOV(p, brw_message_reg(5), brw_vec8_grf(src+3, 0));
goto done;
}
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+
for (n = 0; n < 4; n++) {
if (p->gen >= 60) {
brw_MOV(p,
@@ -242,38 +261,36 @@ done:
brw_fb_write(p, dw);
}
-static inline struct brw_reg mask_a8(int nr)
-{
- return brw_reg(BRW_GENERAL_REGISTER_FILE,
- nr, 0,
- BRW_REGISTER_TYPE_F,
- BRW_VERTICAL_STRIDE_0,
- BRW_WIDTH_8,
- BRW_HORIZONTAL_STRIDE_1,
- BRW_SWIZZLE_XYZW,
- WRITEMASK_XYZW);
-}
-
-static void brw_wm_write__mask(struct brw_compile *p,
- int dw,
+static void brw_wm_write__mask(struct brw_compile *p, int dw,
int src, int mask)
{
int n;
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
-
if (dw == 8 && p->gen >= 60) {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
brw_MUL(p,
brw_message_reg(2),
- brw_vec8_grf(src, 0),
- mask_a8(mask));
+ brw_vec8_grf(src+0, 0),
+ brw_vec8_grf(mask, 0));
+ brw_MUL(p,
+ brw_message_reg(3),
+ brw_vec8_grf(src+1, 0),
+ brw_vec8_grf(mask, 0));
brw_MUL(p,
brw_message_reg(4),
brw_vec8_grf(src+2, 0),
- mask_a8(mask));
+ brw_vec8_grf(mask, 0));
+ brw_MUL(p,
+ brw_message_reg(5),
+ brw_vec8_grf(src+3, 0),
+ brw_vec8_grf(mask, 0));
+
goto done;
}
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+
for (n = 0; n < 4; n++) {
if (p->gen >= 60) {
brw_MUL(p,
@@ -306,25 +323,36 @@ done:
brw_fb_write(p, dw);
}
-static void brw_wm_write__mask_ca(struct brw_compile *p,
- int dw, int src, int mask)
+static void brw_wm_write__mask_ca(struct brw_compile *p, int dw,
+ int src, int mask)
{
int n;
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
-
if (dw == 8 && p->gen >= 60) {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
brw_MUL(p,
brw_message_reg(2),
- brw_vec8_grf(src, 0),
- brw_vec8_grf(mask, 0));
+ brw_vec8_grf(src + 0, 0),
+ brw_vec8_grf(mask + 0, 0));
+ brw_MUL(p,
+ brw_message_reg(3),
+ brw_vec8_grf(src + 1, 0),
+ brw_vec8_grf(mask + 1, 0));
brw_MUL(p,
brw_message_reg(4),
- brw_vec8_grf(src + 2, 0),
+ brw_vec8_grf(src + 2, 0),
brw_vec8_grf(mask + 2, 0));
+ brw_MUL(p,
+ brw_message_reg(5),
+ brw_vec8_grf(src + 3, 0),
+ brw_vec8_grf(mask + 3, 0));
+
goto done;
}
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+
for (n = 0; n < 4; n++) {
if (p->gen >= 60) {
brw_MUL(p,
@@ -357,56 +385,71 @@ done:
brw_fb_write(p, dw);
}
-void
+bool
brw_wm_kernel__affine(struct brw_compile *p, int dispatch)
{
- int src = 12;
-
if (p->gen < 60)
brw_wm_xy(p, dispatch);
- brw_wm_affine(p, dispatch, 0, 1, src);
- brw_wm_write(p, dispatch, src);
+ brw_wm_write(p, dispatch, brw_wm_affine(p, dispatch, 0, 1, 12));
+
+ return true;
}
-void
+bool
brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch)
{
- int src = 12, mask = 20;
+ int src, mask;
+
+ if (dispatch == 8)
+ return false; /* XXX sampler alpha retuns all 0 */
if (p->gen < 60)
brw_wm_xy(p, dispatch);
- brw_wm_affine(p, dispatch, 0, 1, src);
- brw_wm_affine__alpha(p, dispatch, 1, 7, mask);
+
+ src = brw_wm_affine(p, dispatch, 0, 1, 12);
+ mask = brw_wm_affine__alpha(p, dispatch, 1, 6, 20);
brw_wm_write__mask(p, dispatch, src, mask);
+
+ return true;
}
-void
+bool
brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch)
{
- int src = 12, mask = 20;
+ int src, mask;
if (p->gen < 60)
brw_wm_xy(p, dispatch);
- brw_wm_affine(p, dispatch, 0, 1, src);
- brw_wm_affine(p, dispatch, 1, 7, mask);
+
+ src = brw_wm_affine(p, dispatch, 0, 1, 12);
+ mask = brw_wm_affine(p, dispatch, 1, 6, 20);
brw_wm_write__mask_ca(p, dispatch, src, mask);
+
+ return true;
}
-void
+bool
brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch)
{
- int src = 12, mask = 14;
+ int src, mask;
+
+ if (dispatch == 8)
+ return false; /* XXX sampler alpha retuns all 0 */
if (p->gen < 60)
brw_wm_xy(p, dispatch);
- brw_wm_affine__alpha(p, dispatch, 0, 1, src);
- brw_wm_affine(p, dispatch, 1, 7, mask);
+
+ src = brw_wm_affine__alpha(p, dispatch, 0, 1, 12);
+ mask = brw_wm_affine(p, dispatch, 1, 6, 16);
brw_wm_write__mask(p, dispatch, mask, src);
+
+ return true;
}
/* Projective variants */
-static void brw_wm_projective_st(struct brw_compile *p, int dw, int channel, int msg)
+static void brw_wm_projective_st(struct brw_compile *p, int dw,
+ int channel, int msg)
{
int uv;
@@ -480,63 +523,77 @@ static void brw_wm_projective_st(struct brw_compile *p, int dw, int channel, int
}
}
-static void brw_wm_projective(struct brw_compile *p, int dw,
- int channel, int msg, int result)
+static int brw_wm_projective(struct brw_compile *p, int dw,
+ int channel, int msg, int result)
{
brw_wm_projective_st(p, dw, channel, msg);
- brw_wm_sample(p, dw, channel, msg, result);
+ return brw_wm_sample(p, dw, channel, msg, result);
}
-static void brw_wm_projective__alpha(struct brw_compile *p, int dw,
+static int brw_wm_projective__alpha(struct brw_compile *p, int dw,
int channel, int msg, int result)
{
brw_wm_projective_st(p, dw, channel, msg);
- brw_wm_sample__alpha(p, dw, channel, msg, result);
+ return brw_wm_sample__alpha(p, dw, channel, msg, result);
}
-void
+bool
brw_wm_kernel__projective(struct brw_compile *p, int dispatch)
{
- int src = 12;
-
if (p->gen < 60)
brw_wm_xy(p, dispatch);
- brw_wm_projective(p, dispatch, 0, 1, src);
- brw_wm_write(p, dispatch, src);
+ brw_wm_write(p, dispatch, brw_wm_projective(p, dispatch, 0, 1, 12));
+
+ return true;
}
-void
+bool
brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch)
{
- int src = 12, mask = 20;
+ int src, mask;
+
+ if (dispatch == 8)
+ return false; /* XXX sampler alpha retuns all 0 */
if (p->gen < 60)
brw_wm_xy(p, dispatch);
- brw_wm_projective(p, dispatch, 0, 1, src);
- brw_wm_projective__alpha(p, dispatch, 1, 7, mask);
+
+ src = brw_wm_projective(p, dispatch, 0, 1, 12);
+ mask = brw_wm_projective__alpha(p, dispatch, 1, 6, 20);
brw_wm_write__mask(p, dispatch, src, mask);
+
+ return true;
}
-void
+bool
brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch)
{
- int src = 12, mask = 20;
+ int src, mask;
if (p->gen < 60)
brw_wm_xy(p, dispatch);
- brw_wm_projective(p, dispatch, 0, 1, src);
- brw_wm_projective(p, dispatch, 1,7, mask);
+
+ src = brw_wm_projective(p, dispatch, 0, 1, 12);
+ mask = brw_wm_projective(p, dispatch, 1, 6, 20);
brw_wm_write__mask_ca(p, dispatch, src, mask);
+
+ return true;
}
-void
+bool
brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch)
{
- int src = 12, mask = 14;
+ int src, mask;
+
+ if (dispatch == 8)
+ return false; /* XXX sampler alpha retuns all 0 */
if (p->gen < 60)
brw_wm_xy(p, dispatch);
- brw_wm_projective__alpha(p, dispatch, 0, 1, src);
- brw_wm_projective(p, dispatch, 1, 7, mask);
+
+ src = brw_wm_projective__alpha(p, dispatch, 0, 1, 12);
+ mask = brw_wm_projective(p, dispatch, 1, 6, 16);
brw_wm_write__mask(p, dispatch, mask, src);
+
+ return true;
}
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index 011b1b7..943c248 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -482,11 +482,11 @@ uint32_t sna_static_stream_offsetof(struct sna_static_stream *stream,
void *ptr);
unsigned sna_static_stream_compile_sf(struct sna *sna,
struct sna_static_stream *stream,
- void (*compile)(struct brw_compile *));
+ bool (*compile)(struct brw_compile *));
unsigned sna_static_stream_compile_wm(struct sna *sna,
struct sna_static_stream *stream,
- void (*compile)(struct brw_compile *, int),
+ bool (*compile)(struct brw_compile *, int),
int width);
struct kgem_bo *sna_static_stream_fini(struct sna *sna,
struct sna_static_stream *stream);
diff --git a/src/sna/sna_stream.c b/src/sna/sna_stream.c
index 66a8c46..1a0a86b 100644
--- a/src/sna/sna_stream.c
+++ b/src/sna/sna_stream.c
@@ -97,7 +97,7 @@ struct kgem_bo *sna_static_stream_fini(struct sna *sna,
unsigned
sna_static_stream_compile_sf(struct sna *sna,
struct sna_static_stream *stream,
- void (*compile)(struct brw_compile *))
+ bool (*compile)(struct brw_compile *))
{
struct brw_compile p;
@@ -105,7 +105,11 @@ sna_static_stream_compile_sf(struct sna *sna,
sna_static_stream_map(stream,
64*sizeof(uint32_t), 64));
- compile(&p);
+ if (!compile(&p)) {
+ stream->used -= 64*sizeof(uint32_t);
+ return 0;
+ }
+
assert(p.nr_insn*sizeof(struct brw_instruction) <= 64*sizeof(uint32_t));
stream->used -= 64*sizeof(uint32_t) - p.nr_insn*sizeof(struct brw_instruction);
@@ -115,7 +119,7 @@ sna_static_stream_compile_sf(struct sna *sna,
unsigned
sna_static_stream_compile_wm(struct sna *sna,
struct sna_static_stream *stream,
- void (*compile)(struct brw_compile *, int),
+ bool (*compile)(struct brw_compile *, int),
int dispatch_width)
{
struct brw_compile p;
@@ -124,7 +128,11 @@ sna_static_stream_compile_wm(struct sna *sna,
sna_static_stream_map(stream,
256*sizeof(uint32_t), 64));
- compile(&p, dispatch_width);
+ if (!compile(&p, dispatch_width)) {
+ stream->used -= 256*sizeof(uint32_t);
+ return 0;
+ }
+
assert(p.nr_insn*sizeof(struct brw_instruction) <= 256*sizeof(uint32_t));
stream->used -= 256*sizeof(uint32_t) - p.nr_insn*sizeof(struct brw_instruction);
More information about the xorg-commit
mailing list