xf86-video-intel: 4 commits - src/sna/gen8_eu.c src/sna/gen8_render.c

Wed Oct 22 11:42:56 PDT 2014

src/sna/gen8_eu.c     |   28 +++----------------------
 src/sna/gen8_render.c |   54 +++++++++++++++++++++++++-------------------------
 2 files changed, 32 insertions(+), 50 deletions(-)

New commits:
commit e46f26b9e2671468e9accd2f69ffc22fb96b5849
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Oct 22 19:37:42 2014 +0100

    sna/gen8: Load the sampler result directly into the fb write message
    
    With the unified register file (from gen6+), we no longer need to copy
    the sample results into the final write message.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen8_eu.c b/src/sna/gen8_eu.c
index 7b6d887..2fd990a 100644
--- a/src/sna/gen8_eu.c
+++ b/src/sna/gen8_eu.c
@@ -1075,28 +1075,6 @@ static void fb_write(struct brw_compile *p, int dw)
 			      false, true);
 }
 
-static void wm_write(struct brw_compile *p, int dw, int src)
-{
-	int n;
-
-	if (dw == 8) {
-		/* XXX pixel execution mask? */
-		gen8_set_compression_control(p, BRW_COMPRESSION_NONE);
-		for (n = 0; n < 4; n++)
-			gen8_MOV(p,
-				 brw_message_reg(2 + n),
-				 brw_vec8_grf(src + n, 0));
-	} else {
-		gen8_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
-		for (n = 0; n < 4; n++)
-			gen8_MOV(p,
-				 brw_message_reg(2 + 2*n),
-				 brw_vec8_grf(src + 2*n, 0));
-	}
-
-	fb_write(p, dw);
-}
-
 static void wm_write__mask(struct brw_compile *p, int dw,
 			   int src, int mask)
 {
@@ -1183,7 +1161,8 @@ gen8_wm_kernel__affine(struct brw_compile *p, int dispatch)
 {
 	gen8_compile_init(p);
 
-	wm_write(p, dispatch, wm_affine(p, dispatch, 0, 1, 12));
+	wm_affine(p, dispatch, 0, 10, MRF_HACK_START+2);
+	fb_write(p, dispatch);
 	return true;
 }
 
@@ -1304,7 +1283,8 @@ gen8_wm_kernel__projective(struct brw_compile *p, int dispatch)
 {
 	gen8_compile_init(p);
 
-	wm_write(p, dispatch, wm_projective(p, dispatch, 0, 1, 12));
+	wm_projective(p, dispatch, 0, 10, MRF_HACK_START+2);
+	fb_write(p, dispatch);
 	return true;
 }
 
commit dee9ea29917dd5ef4a7de6570cd091f08d334df9
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Oct 22 19:40:14 2014 +0100

    sna/gen8: Override the URB output from the VS
    
    Using the override seems to be marginally faster.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen8_render.c b/src/sna/gen8_render.c
index a6a5dd2..d375753 100644
--- a/src/sna/gen8_render.c
+++ b/src/sna/gen8_render.c
@@ -922,6 +922,8 @@ gen8_emit_sf(struct sna *sna, bool has_mask)
 
 	OUT_BATCH(GEN8_3DSTATE_SBE | (4 - 2));
 	OUT_BATCH(num_sf_outputs << SBE_NUM_OUTPUTS_SHIFT |
+		  SBE_FORCE_VERTEX_URB_READ_LENGTH | /* forced is faster */
+		  SBE_FORCE_VERTEX_URB_READ_OFFSET |
 		  1 << SBE_URB_ENTRY_READ_LENGTH_SHIFT |
 		  1 << SBE_URB_ENTRY_READ_OFFSET_SHIFT);
 	OUT_BATCH(0);
commit 7a9bdadd71730adc5266bc6758982abec5917b93
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Oct 22 19:31:10 2014 +0100

    sna/gen8: Clamp URB allocations for GT3
    
    GT3 requires some reserved space in the URB allocation and so we must
    reduce the amount we allocate to our vertices.
    
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=81583
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen8_render.c b/src/sna/gen8_render.c
index 1eac5a7..a6a5dd2 100644
--- a/src/sna/gen8_render.c
+++ b/src/sna/gen8_render.c
@@ -462,21 +462,21 @@ gen8_emit_urb(struct sna *sna)
 {
 	/* num of VS entries must be divisible by 8 if size < 9 */
 	OUT_BATCH(GEN8_3DSTATE_URB_VS | (2 - 2));
-	OUT_BATCH(1024 << URB_ENTRY_NUMBER_SHIFT |
+	OUT_BATCH(960 << URB_ENTRY_NUMBER_SHIFT |
 		  (2 - 1) << URB_ENTRY_SIZE_SHIFT |
-		  0 << URB_STARTING_ADDRESS_SHIFT);
+		  4 << URB_STARTING_ADDRESS_SHIFT);
 
 	OUT_BATCH(GEN8_3DSTATE_URB_HS | (2 - 2));
 	OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT |
-		  0 << URB_STARTING_ADDRESS_SHIFT);
+		  4 << URB_STARTING_ADDRESS_SHIFT);
 
 	OUT_BATCH(GEN8_3DSTATE_URB_DS | (2 - 2));
 	OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT |
-		  0 << URB_STARTING_ADDRESS_SHIFT);
+		  4 << URB_STARTING_ADDRESS_SHIFT);
 
 	OUT_BATCH(GEN8_3DSTATE_URB_GS | (2 - 2));
 	OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT |
-		  0 << URB_STARTING_ADDRESS_SHIFT);
+		  4 << URB_STARTING_ADDRESS_SHIFT);
 }
 
 static void
commit de54a93217cc550c44ee138f0511ede6925d84e0
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Oct 22 19:30:21 2014 +0100

    sna/gen8: Annotate more 64bit pointer locations
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen8_render.c b/src/sna/gen8_render.c
index b615fa0..1eac5a7 100644
--- a/src/sna/gen8_render.c
+++ b/src/sna/gen8_render.c
@@ -519,7 +519,7 @@ gen8_emit_vs_invariant(struct sna *sna)
 	OUT_BATCH(GEN8_3DSTATE_VS | (9 - 2));
 	OUT_BATCH64(0); /* no VS kernel */
 	OUT_BATCH(0);
-	OUT_BATCH64(0);
+	OUT_BATCH64(0); /* scratch */
 	OUT_BATCH(0);
 	OUT_BATCH(1 << 1); /* pass-through */
 	OUT_BATCH(1 << 16 | 1 << 21); /* urb write to SBE */
@@ -545,12 +545,10 @@ static void
 gen8_emit_hs_invariant(struct sna *sna)
 {
 	OUT_BATCH(GEN8_3DSTATE_HS | (9 - 2));
-	OUT_BATCH(0); /* no HS kernel */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
 	OUT_BATCH(0);
 	OUT_BATCH(0);
+	OUT_BATCH64(0); /* no HS kernel */
+	OUT_BATCH64(0); /* scratch */
 	OUT_BATCH(0);
 	OUT_BATCH(0); /* pass-through */
 
@@ -586,11 +584,9 @@ static void
 gen8_emit_ds_invariant(struct sna *sna)
 {
 	OUT_BATCH(GEN8_3DSTATE_DS | (9 - 2));
+	OUT_BATCH64(0); /* no kernel */
 	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
+	OUT_BATCH64(0); /* scratch */
 	OUT_BATCH(0);
 	OUT_BATCH(0);
 	OUT_BATCH(0);
@@ -618,15 +614,13 @@ static void
 gen8_emit_gs_invariant(struct sna *sna)
 {
 	OUT_BATCH(GEN8_3DSTATE_GS | (10 - 2));
-	OUT_BATCH(0); /* no GS kernel */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
+	OUT_BATCH64(0); /* no GS kernel */
 	OUT_BATCH(0);
+	OUT_BATCH64(0); /* scratch */
 	OUT_BATCH(0);
+	OUT_BATCH(0); /* pass-through */
 	OUT_BATCH(0);
 	OUT_BATCH(0);
-	OUT_BATCH(0); /* pass-through */
 
 #if SIM
 	OUT_BATCH(GEN8_3DSTATE_CONSTANT_GS | (11 - 2));
@@ -740,13 +734,15 @@ gen8_emit_wm_invariant(struct sna *sna)
 	OUT_BATCH(WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
 
 #if SIM
-	OUT_BATCH(GEN8_3DSTATE_WM_HZ_OP | (5 - 2));
+	OUT_BATCH(GEN8_3DSTATE_WM_CHROMAKEY | (2 - 2));
 	OUT_BATCH(0);
+#endif
+
+#if 0
+	OUT_BATCH(GEN8_3DSTATE_WM_HZ_OP | (5 - 2));
 	OUT_BATCH(0);
 	OUT_BATCH(0);
 	OUT_BATCH(0);
-
-	OUT_BATCH(GEN8_3DSTATE_WM_CHROMAKEY | (2 - 2));
 	OUT_BATCH(0);
 #endif
 
@@ -814,7 +810,6 @@ gen8_emit_invariant(struct sna *sna)
 #if SIM
 	OUT_BATCH(GEN8_STATE_SIP | (3 - 2));
 	OUT_BATCH64(0);
-
 #endif
 
 	OUT_BATCH(GEN8_3DSTATE_MULTISAMPLE | (2 - 2));
@@ -887,6 +882,7 @@ gen8_emit_cc(struct sna *sna, uint32_t blend)
 	} else
 		OUT_BATCH(PS_BLEND_HAS_WRITEABLE_RT);
 
+	assert(is_aligned(render->cc_blend + blend * GEN8_BLEND_STATE_PADDED_SIZE, 64));
 	OUT_BATCH(GEN8_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
 	OUT_BATCH((render->cc_blend + blend * GEN8_BLEND_STATE_PADDED_SIZE) | 1);
 
@@ -949,6 +945,9 @@ gen8_emit_wm(struct sna *sna, int kernel)
 	     wm_kernels[kernel].name,
 	     wm_kernels[kernel].num_surfaces,
 	     kernels[0], kernels[1], kernels[2]));
+	assert(is_aligned(kernels[0], 64));
+	assert(is_aligned(kernels[1], 64));
+	assert(is_aligned(kernels[2], 64));
 
 	OUT_BATCH(GEN8_3DSTATE_PS | (12 - 2));
 	OUT_BATCH64(kernels[0] ?: kernels[1] ?: kernels[2]);
@@ -1238,8 +1237,8 @@ static bool gen8_magic_ca_pass(struct sna *sna,
 						  true, true,
 						  op->is_affine));
 
-	OUT_BATCH(GEN8_3DPRIMITIVE | (7- 2));
-	OUT_BATCH(RECTLIST); /* ignored, see VF_TOPOLOGY */
+	OUT_BATCH(GEN8_3DPRIMITIVE | (7 - 2));
+	OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */
 	OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
 	OUT_BATCH(sna->render.vertex_start);
 	OUT_BATCH(1);	/* single instance */
@@ -1431,7 +1430,7 @@ static void gen8_emit_primitive(struct sna *sna)
 	}
 
 	OUT_BATCH(GEN8_3DPRIMITIVE | (7 - 2));
-	OUT_BATCH(RECTLIST); /* ignored, see VF_TOPOLOGY */
+	OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */
 	sna->render.vertex_offset = sna->kgem.nbatch;
 	OUT_BATCH(0);	/* vertex count, to be filled in later */
 	OUT_BATCH(sna->render.vertex_index);
@@ -1772,6 +1771,7 @@ gen8_create_blend_state(struct sna_static_stream *stream)
 			assert(((ptr - base) & 63) == 0);
 			COMPILE_TIME_ASSERT(sizeof(blend->common) == 4);
 			COMPILE_TIME_ASSERT(sizeof(blend->rt) == 8);
+			COMPILE_TIME_ASSERT((char *)&blend->rt - (char *)blend == 4);
 
 			blend->rt.post_blend_clamp = 1;
 			blend->rt.pre_blend_clamp = 1;