xf86-video-intel: 8 commits - src/brw_defines.h src/brw_structs.h src/i965_3d.c src/i965_reg.h src/i965_render.c src/i965_video.c src/intel_driver.h src/intel_module.c src/sna/gen7_render.c src/sna/gen7_render.h src/sna/kgem.c

Chris Wilson ickle at kemper.freedesktop.org
Fri Aug 3 04:30:13 PDT 2012


 src/brw_defines.h     |    7 +++++++
 src/brw_structs.h     |    6 +++++-
 src/i965_3d.c         |    7 ++++++-
 src/i965_reg.h        |    4 +++-
 src/i965_render.c     |   16 +++++++++++++++-
 src/i965_video.c      |   24 +++++++++++++++++++++++-
 src/intel_driver.h    |    1 +
 src/intel_module.c    |    4 ++++
 src/sna/gen7_render.c |   30 +++++++++++++++++++++++-------
 src/sna/gen7_render.h |   15 ++++++++++++++-
 src/sna/kgem.c        |    2 +-
 11 files changed, 102 insertions(+), 14 deletions(-)

New commits:
commit 5ff749727d3590368806508ac0d0fa8efd1d1d51
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Jul 25 22:21:29 2012 +0100

    sna/gen7: Add constant variations and hookup a basic GT descriptor for Haswell
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 193de00..b4a9223 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -83,20 +83,29 @@ struct gt_info {
 	} urb;
 };
 
-static const struct gt_info gt1_info = {
+static const struct gt_info ivb_gt1_info = {
 	.max_vs_threads = 36,
 	.max_gs_threads = 36,
-	.max_wm_threads = (48-1) << GEN7_PS_MAX_THREADS_SHIFT,
+	.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
 	.urb = { 128, 512, 192 },
 };
 
-static const struct gt_info gt2_info = {
+static const struct gt_info ivb_gt2_info = {
 	.max_vs_threads = 128,
 	.max_gs_threads = 128,
-	.max_wm_threads = (172-1) << GEN7_PS_MAX_THREADS_SHIFT,
+	.max_wm_threads = (172-1) << IVB_PS_MAX_THREADS_SHIFT,
 	.urb = { 256, 704, 320 },
 };
 
+static const struct gt_info hsw_gt_info = {
+	.max_vs_threads = 8,
+	.max_gs_threads = 8,
+	.max_wm_threads =
+		(8 - 1) << HSW_PS_MAX_THREADS_SHIFT |
+		1 << HSW_PS_SAMPLE_MASK_SHIFT,
+	.urb = { 128, 64, 64 },
+};
+
 static const uint32_t ps_kernel_packed[][4] = {
 #include "exa_wm_src_affine.g7b"
 #include "exa_wm_src_sample_argb.g7b"
@@ -1363,6 +1372,8 @@ gen7_bind_bo(struct sna *sna,
 	ss[5] = 0;
 	ss[6] = 0;
 	ss[7] = 0;
+	if (sna->kgem.gen == 75)
+		ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
 
 	kgem_bo_set_binding(bo, format, offset);
 
@@ -4234,9 +4245,14 @@ static bool gen7_render_setup(struct sna *sna)
 	struct gen7_sampler_state *ss;
 	int i, j, k, l, m;
 
-	state->info = &gt1_info;
-	if (DEVICE_ID(sna->PciInfo) & 0x20)
-		state->info = &gt2_info; /* XXX requires GT_MODE WiZ disabled */
+	if (sna->kgem.gen == 70) {
+		state->info = &ivb_gt1_info;
+		if (DEVICE_ID(sna->PciInfo) & 0x20)
+			state->info = &ivb_gt2_info; /* XXX requires GT_MODE WiZ disabled */
+	} else if (sna->kgem.gen == 75) {
+		state->info = &hsw_gt_info;
+	} else
+		return false;
 
 	sna_static_stream_init(&general);
 
diff --git a/src/sna/gen7_render.h b/src/sna/gen7_render.h
index 8de52a4..1661d4c 100644
--- a/src/sna/gen7_render.h
+++ b/src/sna/gen7_render.h
@@ -1237,6 +1237,17 @@ struct gen7_sampler_state {
 #define GEN7_SURFACE_DEPTH_SHIFT         21
 #define GEN7_SURFACE_PITCH_SHIFT         0
 
+#define HSW_SWIZZLE_ZERO		0
+#define HSW_SWIZZLE_ONE			1
+#define HSW_SWIZZLE_RED			4
+#define HSW_SWIZZLE_GREEN		5
+#define HSW_SWIZZLE_BLUE		6
+#define HSW_SWIZZLE_ALPHA		7
+#define __HSW_SURFACE_SWIZZLE(r,g,b,a) \
+	((a) << 16 | (b) << 19 | (g) << 22 | (r) << 25)
+#define HSW_SURFACE_SWIZZLE(r,g,b,a) \
+	__HSW_SURFACE_SWIZZLE(HSW_SWIZZLE_##r, HSW_SWIZZLE_##g, HSW_SWIZZLE_##b, HSW_SWIZZLE_##a)
+
 /* _3DSTATE_VERTEX_BUFFERS on GEN7*/
 /* DW1 */
 #define GEN7_VB0_ADDRESS_MODIFYENABLE   (1 << 14)
@@ -1281,7 +1292,9 @@ struct gen7_sampler_state {
 # define GEN7_PS_FLOATING_POINT_MODE_ALT                (1 << 16)
 /* DW3: scratch space */
 /* DW4 */
-# define GEN7_PS_MAX_THREADS_SHIFT                      24
+# define IVB_PS_MAX_THREADS_SHIFT                      24
+# define HSW_PS_MAX_THREADS_SHIFT                      23
+# define HSW_PS_SAMPLE_MASK_SHIFT                      12
 # define GEN7_PS_PUSH_CONSTANT_ENABLE                   (1 << 11)
 # define GEN7_PS_ATTRIBUTE_ENABLE                       (1 << 10)
 # define GEN7_PS_OMASK_TO_RENDER_TARGET                 (1 << 9)
commit cd028cad3dc9b059a3d83b818d581f86e16ec317
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Jul 26 13:17:11 2012 +0100

    sna: Limit the batch size on all gen7 variants
    
    Seems the limit on the surface state size is common across the family
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 635dd24..d7458ec 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -811,7 +811,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
 	if (gen == 22)
 		/* 865g cannot handle a batch spanning multiple pages */
 		kgem->batch_size = PAGE_SIZE / sizeof(uint32_t);
-	if (gen == 70)
+	if (gen >= 70 && gen < 80)
 		kgem->batch_size = 16*1024;
 	if (!kgem->has_relaxed_delta)
 		kgem->batch_size = 4*1024;
commit 4cd9ec9d404d934268952a1058afa07741b09efe
Author: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
Date:   Fri May 4 18:26:46 2012 +0200

    uxa: fix 3DSTATE_PS to fill in number of samples for Haswell
    
    The sample mask value must match what is set for 3DSTATE_SAMPLE_MASK,
    through gen6_upload_invariant_states().
    
    Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
    Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

diff --git a/src/i965_reg.h b/src/i965_reg.h
index 45b6d08..4bb5e4d 100644
--- a/src/i965_reg.h
+++ b/src/i965_reg.h
@@ -221,6 +221,7 @@
 /* DW4 */
 # define GEN7_PS_MAX_THREADS_SHIFT_IVB                  24
 # define GEN7_PS_MAX_THREADS_SHIFT_HSW                  23
+# define GEN7_PS_SAMPLE_MASK_SHIFT_HSW			12
 # define GEN7_PS_PUSH_CONSTANT_ENABLE                   (1 << 11)
 # define GEN7_PS_ATTRIBUTE_ENABLE                       (1 << 10)
 # define GEN7_PS_OMASK_TO_RENDER_TARGET                 (1 << 9)
diff --git a/src/i965_render.c b/src/i965_render.c
index f7b21c8..42b1959 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -2695,9 +2695,11 @@ gen7_composite_wm_state(intel_screen_private *intel,
 {
 	int num_surfaces = has_mask ? 3 : 2;
 	unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
+	unsigned int num_samples = 0;
 
 	if (IS_HSW(intel)) {
 		max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
+		num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
 	}
 
 	if (intel->gen6_render_state.kernel == bo)
@@ -2715,7 +2717,7 @@ gen7_composite_wm_state(intel_screen_private *intel,
 	OUT_BATCH((1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
 		  (num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
 	OUT_BATCH(0); /* scratch space base offset */
-	OUT_BATCH(((48 - 1) << max_threads_shift) |
+	OUT_BATCH(((48 - 1) << max_threads_shift) | num_samples |
 		  GEN7_PS_ATTRIBUTE_ENABLE |
 		  GEN7_PS_16_DISPATCH_ENABLE);
 	OUT_BATCH((6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
diff --git a/src/i965_video.c b/src/i965_video.c
index 58b6222..3276788 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -1641,9 +1641,11 @@ gen7_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
 	unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
+	unsigned int num_samples = 0;
 
 	if (IS_HSW(intel)) {
 		max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
+		num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
 	}
 
 	/* disable WM constant buffer */
@@ -1678,7 +1680,7 @@ gen7_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed)
 
 	OUT_BATCH(0); /* scratch space base offset */
 	OUT_BATCH(
-		((48 - 1) << max_threads_shift) |
+		((48 - 1) << max_threads_shift) | num_samples |
 		GEN7_PS_ATTRIBUTE_ENABLE |
 		GEN7_PS_16_DISPATCH_ENABLE);
 	OUT_BATCH(
commit 412668464cf9505629eac20001701af3402dc6e8
Author: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
Date:   Fri May 4 17:55:10 2012 +0200

    uxa: set "Shader Channel Select" fields in surface state for Haswell
    
    For normal behaviour, each Shader Channel Select should be set to the
    value indicating that same channel. i.e. Shader Channel Select Red is
    set to SCS_RED, Shader Channel Select Green is set to SCS_GREEN, etc.
    
    Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
    Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

diff --git a/src/brw_defines.h b/src/brw_defines.h
index 0df2491..e580a8f 100644
--- a/src/brw_defines.h
+++ b/src/brw_defines.h
@@ -469,6 +469,13 @@
 #define BRW_BORDER_COLOR_MODE_DEFAULT	0
 #define BRW_BORDER_COLOR_MODE_LEGACY	1
 
+#define HSW_SCS_ZERO					0
+#define HSW_SCS_ONE						1
+#define HSW_SCS_RED						4
+#define HSW_SCS_GREEN					5
+#define HSW_SCS_BLUE					6
+#define HSW_SCS_ALPHA					7
+
 #define BRW_TEXCOORDMODE_WRAP            0
 #define BRW_TEXCOORDMODE_MIRROR          1
 #define BRW_TEXCOORDMODE_CLAMP           2
diff --git a/src/brw_structs.h b/src/brw_structs.h
index f4dc927..20c2f85 100644
--- a/src/brw_structs.h
+++ b/src/brw_structs.h
@@ -1659,7 +1659,11 @@ struct gen7_surface_state
 
 	struct {
 		unsigned int resource_min_lod:12;
-		unsigned int pad0:16;
+		unsigned int pad0:4;
+		unsigned int shader_chanel_select_a:3;
+		unsigned int shader_chanel_select_b:3;
+		unsigned int shader_chanel_select_g:3;
+		unsigned int shader_chanel_select_r:3;
 		unsigned int alpha_clear_color:1;
 		unsigned int blue_clear_color:1;
 		unsigned int green_clear_color:1;
diff --git a/src/i965_render.c b/src/i965_render.c
index 30fef57..f7b21c8 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -1392,6 +1392,13 @@ gen7_set_picture_surface_state(intel_screen_private *intel,
 	ss->ss2.width = pixmap->drawable.width - 1;
 	ss->ss3.pitch = intel_pixmap_pitch(pixmap) - 1;
 
+	if (IS_HSW(intel)) {
+		ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
+		ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+		ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+		ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+	}
+
 	dri_bo_emit_reloc(intel->surface_bo,
 			  read_domains, write_domain,
 			  0,
diff --git a/src/i965_video.c b/src/i965_video.c
index bba282d..58b6222 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -510,6 +510,13 @@ static void gen7_create_dst_surface_state(ScrnInfoPtr scrn,
 
 	dest_surf_state.ss3.pitch = intel_pixmap_pitch(pixmap) - 1;
 
+	if (IS_HSW(intel)) {
+		dest_surf_state.ss7.shader_chanel_select_r = HSW_SCS_RED;
+		dest_surf_state.ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+		dest_surf_state.ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+		dest_surf_state.ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+	}
+
 	dri_bo_subdata(surf_bo,
 		       offset, sizeof(dest_surf_state),
 		       &dest_surf_state);
@@ -525,6 +532,7 @@ static void gen7_create_src_surface_state(ScrnInfoPtr scrn,
 					drm_intel_bo *surface_bo,
 					uint32_t offset)
 {
+	intel_screen_private * const intel = intel_get_screen_private(scrn);
 	struct gen7_surface_state src_surf_state;
 
 	memset(&src_surf_state, 0, sizeof(src_surf_state));
@@ -547,6 +555,13 @@ static void gen7_create_src_surface_state(ScrnInfoPtr scrn,
 
 	src_surf_state.ss3.pitch = src_pitch - 1;
 
+	if (IS_HSW(intel)) {
+		src_surf_state.ss7.shader_chanel_select_r = HSW_SCS_RED;
+		src_surf_state.ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+		src_surf_state.ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+		src_surf_state.ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+	}
+
 	dri_bo_subdata(surface_bo,
 		       offset, sizeof(src_surf_state),
 		       &src_surf_state);
commit a47ba68996f117fabcb601d35bcc5f99cbcd6122
Author: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
Date:   Fri May 4 17:17:22 2012 +0200

    uxa: fix max PS threads shift value for Haswell
    
    The maximum number of threads is now a 9-bit value. Thus, one more bit
    towards LSB was re-used. i.e. bit position is now 23 instead of 24 on
    Ivy Bridge.
    
    Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
    Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

diff --git a/src/i965_reg.h b/src/i965_reg.h
index e7b0d15..45b6d08 100644
--- a/src/i965_reg.h
+++ b/src/i965_reg.h
@@ -219,7 +219,8 @@
 # define GEN7_PS_FLOATING_POINT_MODE_ALT                (1 << 16)
 /* DW3: scratch space */
 /* DW4 */
-# define GEN7_PS_MAX_THREADS_SHIFT                      24
+# define GEN7_PS_MAX_THREADS_SHIFT_IVB                  24
+# define GEN7_PS_MAX_THREADS_SHIFT_HSW                  23
 # define GEN7_PS_PUSH_CONSTANT_ENABLE                   (1 << 11)
 # define GEN7_PS_ATTRIBUTE_ENABLE                       (1 << 10)
 # define GEN7_PS_OMASK_TO_RENDER_TARGET                 (1 << 9)
diff --git a/src/i965_render.c b/src/i965_render.c
index 2182df8..30fef57 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -2687,6 +2687,11 @@ gen7_composite_wm_state(intel_screen_private *intel,
 			drm_intel_bo *bo)
 {
 	int num_surfaces = has_mask ? 3 : 2;
+	unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
+
+	if (IS_HSW(intel)) {
+		max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
+	}
 
 	if (intel->gen6_render_state.kernel == bo)
 		return;
@@ -2703,7 +2708,7 @@ gen7_composite_wm_state(intel_screen_private *intel,
 	OUT_BATCH((1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
 		  (num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
 	OUT_BATCH(0); /* scratch space base offset */
-	OUT_BATCH(((48 - 1) << GEN7_PS_MAX_THREADS_SHIFT) |
+	OUT_BATCH(((48 - 1) << max_threads_shift) |
 		  GEN7_PS_ATTRIBUTE_ENABLE |
 		  GEN7_PS_16_DISPATCH_ENABLE);
 	OUT_BATCH((6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
diff --git a/src/i965_video.c b/src/i965_video.c
index bcd6063..bba282d 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -1625,6 +1625,11 @@ static void
 gen7_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
+	unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
+
+	if (IS_HSW(intel)) {
+		max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
+	}
 
 	/* disable WM constant buffer */
 	OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
@@ -1658,7 +1663,7 @@ gen7_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed)
 
 	OUT_BATCH(0); /* scratch space base offset */
 	OUT_BATCH(
-		((48 - 1) << GEN7_PS_MAX_THREADS_SHIFT) |
+		((48 - 1) << max_threads_shift) |
 		GEN7_PS_ATTRIBUTE_ENABLE |
 		GEN7_PS_16_DISPATCH_ENABLE);
 	OUT_BATCH(
commit ce4421e175ceb9259208c7c223af8d66282c3db3
Author: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
Date:   Fri May 4 17:09:19 2012 +0200

    uxa: use at least 64 URB entries for Haswell
    
    Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
    Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

diff --git a/src/i965_3d.c b/src/i965_3d.c
index d4d38e5..a18db12 100644
--- a/src/i965_3d.c
+++ b/src/i965_3d.c
@@ -104,12 +104,17 @@ gen6_upload_urb(intel_screen_private *intel)
 void
 gen7_upload_urb(intel_screen_private *intel)
 {
+	unsigned int num_urb_entries = 32;
+
+	if (IS_HSW(intel))
+		num_urb_entries = 64;
+
 	OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
 	OUT_BATCH(8); /* in 1KBs */
 
 	OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2));
 	OUT_BATCH(
-		(32 << GEN7_URB_ENTRY_NUMBER_SHIFT) | /* at least 32 */
+		(num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
 		(2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
 		(1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
 
commit 8c880aa34c522b0d67cbb932771f00c947d00dec
Author: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
Date:   Fri May 4 17:43:19 2012 +0200

    uxa: add IS_HSW() macro to distinguish Haswell from Ivybridge
    
    Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne at intel.com>

diff --git a/src/intel_driver.h b/src/intel_driver.h
index 31c11f6..d88f225 100644
--- a/src/intel_driver.h
+++ b/src/intel_driver.h
@@ -218,6 +218,7 @@
 #define IS_GEN5(intel) IS_GENx(intel, 5)
 #define IS_GEN6(intel) IS_GENx(intel, 6)
 #define IS_GEN7(intel) IS_GENx(intel, 7)
+#define IS_HSW(intel) (INTEL_INFO(intel)->gen == 75)
 
 /* Some chips have specific errata (or limits) that we need to workaround. */
 #define IS_I830(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_I830_M)
commit 0c0d1d956a8ba37d9e6f4a5e4f52018c8ce498e5
Author: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
Date:   Fri Aug 3 12:03:00 2012 +0100

    Introduce a chipset identifier for Haswell (Ivybridge successor)
    
    Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne at intel.com>

diff --git a/src/intel_module.c b/src/intel_module.c
index e1755ff..7640916 100644
--- a/src/intel_module.c
+++ b/src/intel_module.c
@@ -98,6 +98,10 @@ static const struct intel_device_info intel_ivybridge_info = {
 	.gen = 70,
 };
 
+static const struct intel_device_info intel_haswell_info = {
+	.gen = 75,
+};
+
 static const SymTabRec _intel_chipsets[] = {
 	{PCI_CHIP_I810,				"i810"},
 	{PCI_CHIP_I810_DC100,			"i810-dc100"},


More information about the xorg-commit mailing list