xf86-video-intel: 10 commits - src/i965_3d.c src/i965_render.c src/i965_video.c src/intel.h src/Makefile.am src/render_program/exa_wm_mask_affine.g7a src/render_program/exa_wm_mask_affine.g7b src/render_program/exa_wm_mask_projective.g7a src/render_program/exa_wm_mask_projective.g7b src/render_program/exa_wm_mask_sample_a.g7a src/render_program/exa_wm_mask_sample_a.g7b src/render_program/exa_wm_mask_sample_argb.g7a src/render_program/exa_wm_mask_sample_argb.g7b src/render_program/exa_wm_src_projective.g7a src/render_program/exa_wm_src_projective.g7b src/render_program/exa_wm_src_sample_a.g7a src/render_program/exa_wm_src_sample_a.g7b src/render_program/Makefile.am

Kenneth Graunke kwg at kemper.freedesktop.org
Fri Jul 29 23:31:55 PDT 2011


 src/Makefile.am                                |    1 
 src/i965_3d.c                                  |  438 ++++++++++++++++++
 src/i965_render.c                              |  580 ++++++++++++++++---------
 src/i965_video.c                               |  500 ---------------------
 src/intel.h                                    |   31 +
 src/render_program/Makefile.am                 |   12 
 src/render_program/exa_wm_mask_affine.g7a      |   41 +
 src/render_program/exa_wm_mask_affine.g7b      |    4 
 src/render_program/exa_wm_mask_projective.g7a  |   63 ++
 src/render_program/exa_wm_mask_projective.g7b  |   12 
 src/render_program/exa_wm_mask_sample_a.g7a    |   49 ++
 src/render_program/exa_wm_mask_sample_a.g7b    |    3 
 src/render_program/exa_wm_mask_sample_argb.g7a |   49 ++
 src/render_program/exa_wm_mask_sample_argb.g7b |    3 
 src/render_program/exa_wm_src_projective.g7a   |   63 ++
 src/render_program/exa_wm_src_projective.g7b   |   12 
 src/render_program/exa_wm_src_sample_a.g7a     |   48 ++
 src/render_program/exa_wm_src_sample_a.g7b     |    3 
 18 files changed, 1241 insertions(+), 671 deletions(-)

New commits:
commit 5691c8cdec2a5264b8552f096b4a34b97afe4ae5
Author: Kenneth Graunke <kenneth at whitecape.org>
Date:   Thu Jul 14 10:14:53 2011 -0700

    render: Enable RENDER acceleration on Ivybridge.
    
    Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
    Acked-by: Eric Anholt <eric at anholt.net>
    Acked-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/i965_render.c b/src/i965_render.c
index 596d070..7e1da5b 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -184,9 +184,6 @@ i965_check_composite(int op,
 	ScrnInfoPtr scrn = xf86Screens[dest_picture->pDrawable->pScreen->myNum];
 	intel_screen_private *intel = intel_get_screen_private(scrn);
 
-	if (IS_GEN7(intel))
-		return FALSE;
-
 	/* Check for unsupported compositing operations. */
 	if (op >= sizeof(i965_blend_op) / sizeof(i965_blend_op[0])) {
 		intel_debug_fallback(scrn,
commit 0d92612b2a2782f80196a08eb9a17af906169f18
Author: Kenneth Graunke <kenneth at whitecape.org>
Date:   Thu Jul 14 00:30:09 2011 -0700

    render: Update pixel shader state for Ivybridge.
    
    Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
    Acked-by: Eric Anholt <eric at anholt.net>
    Acked-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/i965_render.c b/src/i965_render.c
index 5222d1c..596d070 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -2657,6 +2657,37 @@ gen6_composite_wm_state(intel_screen_private *intel,
 }
 
 static void
+gen7_composite_wm_state(intel_screen_private *intel,
+			Bool has_mask,
+			drm_intel_bo *bo)
+{
+	int num_surfaces = has_mask ? 3 : 2;
+
+	if (intel->gen6_render_state.kernel == bo)
+		return;
+
+	intel->gen6_render_state.kernel = bo;
+
+	OUT_BATCH(GEN6_3DSTATE_WM | (3 - 2));
+	OUT_BATCH(GEN7_WM_DISPATCH_ENABLE |
+		  GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
+	OUT_BATCH(0);
+
+	OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2));
+	OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+	OUT_BATCH((1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
+		  (num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+	OUT_BATCH(0); /* scratch space base offset */
+	OUT_BATCH(((86 - 1) << GEN7_PS_MAX_THREADS_SHIFT) |
+		  GEN7_PS_ATTRIBUTE_ENABLE |
+		  GEN7_PS_16_DISPATCH_ENABLE);
+	OUT_BATCH((6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
+	OUT_BATCH(0); /* kernel 1 pointer */
+	OUT_BATCH(0); /* kernel 2 pointer */
+}
+
+
+static void
 gen6_composite_drawing_rectangle(intel_screen_private *intel,
 				 PixmapPtr dest)
 {
@@ -2809,12 +2840,13 @@ gen6_emit_composite_state(struct intel_screen_private *intel)
 					(src * BRW_BLENDFACTOR_COUNT + dst) * GEN6_BLEND_STATE_PADDED_SIZE);
 	gen6_composite_sampler_state_pointers(intel, ps_sampler_state_bo);
 	gen6_composite_sf_state(intel, has_mask);
-	gen6_composite_wm_state(intel,
-				has_mask,
-				render->wm_kernel_bo[composite_op->wm_kernel]);
 	if (ivb) {
+		gen7_composite_wm_state(intel, has_mask,
+					render->wm_kernel_bo[composite_op->wm_kernel]);
 		gen7_upload_binding_table(intel, intel->surface_table);
 	} else {
+		gen6_composite_wm_state(intel, has_mask,
+					render->wm_kernel_bo[composite_op->wm_kernel]);
 		gen6_upload_binding_table(intel, intel->surface_table);
 	}
 	gen6_composite_drawing_rectangle(intel, intel->render_dest);
commit 7460ee73d1fd22e6b02ce125f11ac38efff743ce
Author: Kenneth Graunke <kenneth at whitecape.org>
Date:   Thu Jul 14 00:16:54 2011 -0700

    render: Use Ivybridge variants for 3D pipeline setup.
    
    Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
    Acked-by: Eric Anholt <eric at anholt.net>
    Acked-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/i965_render.c b/src/i965_render.c
index 17e35c9..5222d1c 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -2569,7 +2569,11 @@ gen6_composite_cc_state_pointers(intel_screen_private *intel,
 		cc_bo = render_state->cc_state_bo;
 		depth_stencil_bo = render_state->gen6_depth_stencil_bo;
 	}
-	gen6_upload_cc_state_pointers(intel, render_state->gen6_blend_bo, cc_bo, depth_stencil_bo, blend_offset);
+	if (INTEL_INFO(intel)->gen >= 70) {
+		gen7_upload_cc_state_pointers(intel, render_state->gen6_blend_bo, cc_bo, depth_stencil_bo, blend_offset);
+	} else {
+		gen6_upload_cc_state_pointers(intel, render_state->gen6_blend_bo, cc_bo, depth_stencil_bo, blend_offset);
+	}
 
 	intel->gen6_render_state.blend = blend_offset;
 }
@@ -2583,18 +2587,26 @@ gen6_composite_sampler_state_pointers(intel_screen_private *intel,
 
 	intel->gen6_render_state.samplers = bo;
 
-	gen6_upload_sampler_state_pointers(intel, bo);
+	if (INTEL_INFO(intel)->gen >= 70)
+		gen7_upload_sampler_state_pointers(intel, bo);
+	else
+		gen6_upload_sampler_state_pointers(intel, bo);
 }
 
 static void
 gen6_composite_wm_constants(intel_screen_private *intel)
 {
+	Bool ivb = INTEL_INFO(intel)->gen >= 70;
 	/* disable WM constant buffer */
-	OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2));
+	OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | ((ivb ? 7 : 5) - 2));
 	OUT_BATCH(0);
 	OUT_BATCH(0);
 	OUT_BATCH(0);
 	OUT_BATCH(0);
+	if (ivb) {
+		OUT_BATCH(0);
+		OUT_BATCH(0);
+	}
 }
 
 static void
@@ -2608,7 +2620,10 @@ gen6_composite_sf_state(intel_screen_private *intel,
 
 	intel->gen6_render_state.num_sf_outputs = num_sf_outputs;
 
-	gen6_upload_sf_state(intel, num_sf_outputs, 1);
+	if (INTEL_INFO(intel)->gen >= 70)
+		gen7_upload_sf_state(intel, num_sf_outputs, 1);
+	else
+		gen6_upload_sf_state(intel, num_sf_outputs, 1);
 }
 
 static void
@@ -2754,20 +2769,30 @@ gen6_emit_composite_state(struct intel_screen_private *intel)
 	sampler_state_extend_t mask_extend = composite_op->mask_extend;
 	Bool is_affine = composite_op->is_affine;
 	Bool has_mask = intel->render_mask != NULL;
+	Bool ivb = INTEL_INFO(intel)->gen >= 70;
 	uint32_t src, dst;
 	drm_intel_bo *ps_sampler_state_bo = render->ps_sampler_state_bo[src_filter][src_extend][mask_filter][mask_extend];
 
 	intel->needs_render_state_emit = FALSE;
 	if (intel->needs_3d_invariant) {
 		gen6_upload_invariant_states(intel);
-		gen6_upload_viewport_state_pointers(intel, render->cc_vp_bo);
-		gen6_upload_urb(intel);
 
+		if (ivb) {
+			gen7_upload_viewport_state_pointers(intel, render->cc_vp_bo);
+			gen7_upload_urb(intel);
+			gen7_upload_bypass_states(intel);
+			gen7_upload_depth_buffer_state(intel);
+		} else {
+			gen6_upload_invariant_states(intel);
+			gen6_upload_viewport_state_pointers(intel, render->cc_vp_bo);
+			gen6_upload_urb(intel);
+
+			gen6_upload_gs_state(intel);
+			gen6_upload_depth_buffer_state(intel);
+		}
+		gen6_composite_wm_constants(intel);
 		gen6_upload_vs_state(intel);
-		gen6_upload_gs_state(intel);
 		gen6_upload_clip_state(intel);
-		gen6_composite_wm_constants(intel);
-		gen6_upload_depth_buffer_state(intel);
 
 		intel->needs_3d_invariant = FALSE;
 	}
@@ -2787,8 +2812,11 @@ gen6_emit_composite_state(struct intel_screen_private *intel)
 	gen6_composite_wm_state(intel,
 				has_mask,
 				render->wm_kernel_bo[composite_op->wm_kernel]);
-	gen6_upload_binding_table(intel, intel->surface_table);
-
+	if (ivb) {
+		gen7_upload_binding_table(intel, intel->surface_table);
+	} else {
+		gen6_upload_binding_table(intel, intel->surface_table);
+	}
 	gen6_composite_drawing_rectangle(intel, intel->render_dest);
 	gen6_composite_vertex_element_state(intel, has_mask, is_affine);
 }
commit e3a09608716c1dee2554e8fc26d6f77aeec684bd
Author: Kenneth Graunke <kenneth at whitecape.org>
Date:   Thu Jul 14 00:04:31 2011 -0700

    render: Refactor to use newly shared pipeline setup code in i965_3d.c.
    
    Slightly generalize the shared SF and CC code to accomodate both.
    
    Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
    Acked-by: Eric Anholt <eric at anholt.net>
    Acked-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/i965_3d.c b/src/i965_3d.c
index 19ddee7..d4d38e5 100644
--- a/src/i965_3d.c
+++ b/src/i965_3d.c
@@ -130,11 +130,13 @@ void
 gen6_upload_cc_state_pointers(intel_screen_private *intel,
 			      drm_intel_bo *blend_bo,
 			      drm_intel_bo *cc_bo,
-			      drm_intel_bo *depth_stencil_bo)
+			      drm_intel_bo *depth_stencil_bo,
+			      uint32_t blend_offset)
 {
 	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
 	if (blend_bo)
-		OUT_RELOC(blend_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+		OUT_RELOC(blend_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+			  blend_offset | 1);
 	else
 		OUT_BATCH(0);
 
@@ -153,11 +155,13 @@ void
 gen7_upload_cc_state_pointers(intel_screen_private *intel,
 			      drm_intel_bo *blend_bo,
 			      drm_intel_bo *cc_bo,
-			      drm_intel_bo *depth_stencil_bo)
+			      drm_intel_bo *depth_stencil_bo,
+			      uint32_t blend_offset)
 {
 	OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
 	if (blend_bo)
-		OUT_RELOC(blend_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+		OUT_RELOC(blend_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+			  blend_offset | 1);
 	else
 		OUT_BATCH(0);
 
@@ -320,12 +324,14 @@ gen6_upload_clip_state(intel_screen_private *intel)
 }
 
 void
-gen6_upload_sf_state(intel_screen_private *intel)
+gen6_upload_sf_state(intel_screen_private *intel,
+		     int num_sf_outputs,
+		     int read_offset)
 {
 	OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2));
-	OUT_BATCH((1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
+	OUT_BATCH((num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
 		(1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
-		(0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
+		(read_offset << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
 	OUT_BATCH(0);
 	OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
 	OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
@@ -347,12 +353,14 @@ gen6_upload_sf_state(intel_screen_private *intel)
 }
 
 void
-gen7_upload_sf_state(intel_screen_private *intel)
+gen7_upload_sf_state(intel_screen_private *intel,
+		     int num_sf_outputs,
+		     int read_offset)
 {
 	OUT_BATCH(GEN7_3DSTATE_SBE | (14 - 2));
-	OUT_BATCH((1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
+	OUT_BATCH((num_sf_outputs << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
 		(1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
-		(0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
+		(read_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
 	OUT_BATCH(0);
 	OUT_BATCH(0);
 	OUT_BATCH(0); /* DW4 */
diff --git a/src/i965_render.c b/src/i965_render.c
index c9b3c7a..17e35c9 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -2538,24 +2538,6 @@ gen6_composite_create_depth_stencil_state(intel_screen_private *intel)
 }
 
 static void
-gen6_composite_invariant_states(intel_screen_private *intel)
-{
-	OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
-
-	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
-	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
-		  GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
-	OUT_BATCH(0);
-
-	OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
-	OUT_BATCH(1);
-
-	/* Set system instruction pointer */
-	OUT_BATCH(BRW_STATE_SIP | 0);
-	OUT_BATCH(0);
-}
-
-static void
 gen6_composite_state_base_address(intel_screen_private *intel)
 {
 	OUT_BATCH(BRW_STATE_BASE_ADDRESS | (10 - 2));
@@ -2573,52 +2555,21 @@ gen6_composite_state_base_address(intel_screen_private *intel)
 }
 
 static void
-gen6_composite_viewport_state_pointers(intel_screen_private *intel,
-				       drm_intel_bo *cc_vp_bo)
-{
-
-	OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
-		  GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
-		  (4 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_RELOC(cc_vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
-}
-
-static void
-gen6_composite_urb(intel_screen_private *intel)
-{
-	OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2));
-	OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
-		  (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
-	OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
-		(0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
-}
-
-static void
 gen6_composite_cc_state_pointers(intel_screen_private *intel,
 				 uint32_t blend_offset)
 {
 	struct gen4_render_state *render_state = intel->gen4_render_state;
+	drm_intel_bo *cc_bo = NULL;
+	drm_intel_bo *depth_stencil_bo = NULL;
 
 	if (intel->gen6_render_state.blend == blend_offset)
 		return;
 
-	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
-	OUT_RELOC(render_state->gen6_blend_bo,
-		  I915_GEM_DOMAIN_INSTRUCTION, 0,
-		  blend_offset | 1);
 	if (intel->gen6_render_state.blend == -1) {
-		OUT_RELOC(render_state->gen6_depth_stencil_bo,
-			  I915_GEM_DOMAIN_INSTRUCTION, 0,
-			  1);
-		OUT_RELOC(render_state->cc_state_bo,
-			  I915_GEM_DOMAIN_INSTRUCTION, 0,
-			  1);
-	} else {
-		OUT_BATCH(0);
-		OUT_BATCH(0);
+		cc_bo = render_state->cc_state_bo;
+		depth_stencil_bo = render_state->gen6_depth_stencil_bo;
 	}
+	gen6_upload_cc_state_pointers(intel, render_state->gen6_blend_bo, cc_bo, depth_stencil_bo, blend_offset);
 
 	intel->gen6_render_state.blend = blend_offset;
 }
@@ -2632,49 +2583,7 @@ gen6_composite_sampler_state_pointers(intel_screen_private *intel,
 
 	intel->gen6_render_state.samplers = bo;
 
-	OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
-		  GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
-		  (4 - 2));
-	OUT_BATCH(0); /* VS */
-	OUT_BATCH(0); /* GS */
-	OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
-}
-
-static void
-gen6_composite_vs_state(intel_screen_private *intel)
-{
-	/* disable VS constant buffer */
-	OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2));
-	OUT_BATCH(0); /* without VS kernel */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* pass-through */
-}
-
-static void
-gen6_composite_gs_state(intel_screen_private *intel)
-{
-	/* disable GS constant buffer */
-	OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
-	OUT_BATCH(0); /* without GS kernel */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* pass-through */
+	gen6_upload_sampler_state_pointers(intel, bo);
 }
 
 static void
@@ -2689,15 +2598,6 @@ gen6_composite_wm_constants(intel_screen_private *intel)
 }
 
 static void
-gen6_composite_clip_state(intel_screen_private *intel)
-{
-	OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* pass-through */
-	OUT_BATCH(0);
-}
-
-static void
 gen6_composite_sf_state(intel_screen_private *intel,
 			Bool has_mask)
 {
@@ -2708,28 +2608,7 @@ gen6_composite_sf_state(intel_screen_private *intel,
 
 	intel->gen6_render_state.num_sf_outputs = num_sf_outputs;
 
-	OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2));
-	OUT_BATCH((num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
-		  (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
-		  (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
-	OUT_BATCH(0);
-	OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
-	OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* DW9 */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* DW14 */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* DW19 */
+	gen6_upload_sf_state(intel, num_sf_outputs, 1);
 }
 
 static void
@@ -2763,35 +2642,6 @@ gen6_composite_wm_state(intel_screen_private *intel,
 }
 
 static void
-gen6_composite_binding_table_pointers(intel_screen_private *intel)
-{
-	/* Binding table pointers */
-	OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS |
-		  GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS |
-		  (4 - 2));
-	OUT_BATCH(0);		/* vs */
-	OUT_BATCH(0);		/* gs */
-	/* Only the PS uses the binding table */
-	OUT_BATCH(intel->surface_table);
-}
-
-static void
-gen6_composite_depth_buffer_state(intel_screen_private *intel)
-{
-	OUT_BATCH(BRW_3DSTATE_DEPTH_BUFFER | (7 - 2));
-	OUT_BATCH((BRW_SURFACE_NULL << BRW_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT) |
-		  (BRW_DEPTHFORMAT_D32_FLOAT << BRW_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	OUT_BATCH(BRW_3DSTATE_CLEAR_PARAMS | (2 - 2));
-	OUT_BATCH(0);
-}
-
-static void
 gen6_composite_drawing_rectangle(intel_screen_private *intel,
 				 PixmapPtr dest)
 {
@@ -2909,16 +2759,15 @@ gen6_emit_composite_state(struct intel_screen_private *intel)
 
 	intel->needs_render_state_emit = FALSE;
 	if (intel->needs_3d_invariant) {
-		gen6_composite_invariant_states(intel);
-		gen6_composite_viewport_state_pointers(intel,
-						       render->cc_vp_bo);
-		gen6_composite_urb(intel);
-
-		gen6_composite_vs_state(intel);
-		gen6_composite_gs_state(intel);
-		gen6_composite_clip_state(intel);
+		gen6_upload_invariant_states(intel);
+		gen6_upload_viewport_state_pointers(intel, render->cc_vp_bo);
+		gen6_upload_urb(intel);
+
+		gen6_upload_vs_state(intel);
+		gen6_upload_gs_state(intel);
+		gen6_upload_clip_state(intel);
 		gen6_composite_wm_constants(intel);
-		gen6_composite_depth_buffer_state(intel);
+		gen6_upload_depth_buffer_state(intel);
 
 		intel->needs_3d_invariant = FALSE;
 	}
@@ -2938,7 +2787,7 @@ gen6_emit_composite_state(struct intel_screen_private *intel)
 	gen6_composite_wm_state(intel,
 				has_mask,
 				render->wm_kernel_bo[composite_op->wm_kernel]);
-	gen6_composite_binding_table_pointers(intel);
+	gen6_upload_binding_table(intel, intel->surface_table);
 
 	gen6_composite_drawing_rectangle(intel, intel->render_dest);
 	gen6_composite_vertex_element_state(intel, has_mask, is_affine);
diff --git a/src/i965_video.c b/src/i965_video.c
index 7d7ac79..eb5ff14 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -1670,12 +1670,12 @@ gen6_emit_video_setup(ScrnInfoPtr scrn,
 	gen6_upload_state_base_address(scrn, surface_state_binding_table_bo);
 	gen6_upload_viewport_state_pointers(intel, intel->video.gen4_cc_vp_bo);
 	gen6_upload_urb(intel);
-	gen6_upload_cc_state_pointers(intel, intel->video.gen6_blend_bo, intel->video.gen4_cc_bo, intel->video.gen6_depth_stencil_bo);
+	gen6_upload_cc_state_pointers(intel, intel->video.gen6_blend_bo, intel->video.gen4_cc_bo, intel->video.gen6_depth_stencil_bo, 0);
 	gen6_upload_sampler_state_pointers(intel, intel->video.gen4_sampler_bo);
 	gen6_upload_vs_state(intel);
 	gen6_upload_gs_state(intel);
 	gen6_upload_clip_state(intel);
-	gen6_upload_sf_state(intel);
+	gen6_upload_sf_state(intel, 1, 0);
 	gen6_upload_wm_state(scrn, n_src_surf == 1 ? TRUE : FALSE);
 	gen6_upload_binding_table(intel, (n_src_surf + 1) * SURFACE_STATE_PADDED_SIZE);
 	gen6_upload_depth_buffer_state(intel);
@@ -1779,12 +1779,12 @@ gen7_emit_video_setup(ScrnInfoPtr scrn,
 	gen6_upload_state_base_address(scrn, surface_state_binding_table_bo);
 	gen7_upload_viewport_state_pointers(intel, intel->video.gen4_cc_vp_bo);
 	gen7_upload_urb(intel);
-	gen7_upload_cc_state_pointers(intel, intel->video.gen6_blend_bo, intel->video.gen4_cc_bo, intel->video.gen6_depth_stencil_bo);
+	gen7_upload_cc_state_pointers(intel, intel->video.gen6_blend_bo, intel->video.gen4_cc_bo, intel->video.gen6_depth_stencil_bo, 0);
 	gen7_upload_sampler_state_pointers(intel, intel->video.gen4_sampler_bo);
 	gen7_upload_bypass_states(intel);
 	gen6_upload_vs_state(intel);
 	gen6_upload_clip_state(intel);
-	gen7_upload_sf_state(intel);
+	gen7_upload_sf_state(intel, 1, 0);
 	gen7_upload_wm_state(scrn, n_src_surf == 1 ? TRUE : FALSE);
 	gen7_upload_binding_table(intel, (n_src_surf + 1) * SURFACE_STATE_PADDED_SIZE);
 	gen7_upload_depth_buffer_state(intel);
diff --git a/src/intel.h b/src/intel.h
index 3f48dd4..42afaf4 100644
--- a/src/intel.h
+++ b/src/intel.h
@@ -607,10 +607,12 @@ void gen6_upload_urb(intel_screen_private *intel);
 void gen7_upload_urb(intel_screen_private *intel);
 void gen6_upload_cc_state_pointers(intel_screen_private *intel,
 				   drm_intel_bo *blend_bo, drm_intel_bo *cc_bo,
-				   drm_intel_bo *depth_stencil_bo);
+				   drm_intel_bo *depth_stencil_bo,
+				   uint32_t blend_offset);
 void gen7_upload_cc_state_pointers(intel_screen_private *intel,
 				   drm_intel_bo *blend_bo, drm_intel_bo *cc_bo,
-				   drm_intel_bo *depth_stencil_bo);
+				   drm_intel_bo *depth_stencil_bo,
+				   uint32_t blend_offset);
 void gen6_upload_sampler_state_pointers(intel_screen_private *intel,
 					drm_intel_bo *sampler_bo);
 void gen7_upload_sampler_state_pointers(intel_screen_private *intel,
@@ -619,8 +621,8 @@ void gen7_upload_bypass_states(intel_screen_private *intel);
 void gen6_upload_gs_state(intel_screen_private *intel);
 void gen6_upload_vs_state(intel_screen_private *intel);
 void gen6_upload_clip_state(intel_screen_private *intel);
-void gen6_upload_sf_state(intel_screen_private *intel);
-void gen7_upload_sf_state(intel_screen_private *intel);
+void gen6_upload_sf_state(intel_screen_private *intel, int num_sf_outputs, int read_offset);
+void gen7_upload_sf_state(intel_screen_private *intel, int num_sf_outputs, int read_offset);
 void gen6_upload_binding_table(intel_screen_private *intel, uint32_t ps_binding_table_offset);
 void gen7_upload_binding_table(intel_screen_private *intel, uint32_t ps_binding_table_offset);
 void gen6_upload_depth_buffer_state(intel_screen_private *intel);
commit 682a690bfeeabae710b1392282163eab35b58eed
Author: Kenneth Graunke <kenneth at whitecape.org>
Date:   Wed Jul 13 22:52:52 2011 -0700

    Xv: Refactor out pipeline setup functions for future reuse in render.
    
    While we're at it, make the functions simply take an intel_screen_private
    pointer directly instead of having to fetch it from ScrnInfoPtr.
    
    Also coalesce some gen6/gen7 functions that were 98% identical.
    
    Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
    Acked-by: Eric Anholt <eric at anholt.net>
    Acked-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/Makefile.am b/src/Makefile.am
index a7f219c..cd1bb36 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -67,6 +67,7 @@ intel_drv_la_SOURCES = \
 	 i915_render.c \
 	 i915_video.c \
 	 i965_reg.h \
+	 i965_3d.c \
 	 i965_video.c \
 	 i965_render.c \
 	 $(NULL)
diff --git a/src/i965_3d.c b/src/i965_3d.c
new file mode 100644
index 0000000..19ddee7
--- /dev/null
+++ b/src/i965_3d.c
@@ -0,0 +1,430 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <string.h>
+
+#include "intel.h"
+#include "i965_reg.h"
+#include "brw_defines.h"
+#include "brw_structs.h"
+
+void
+gen6_upload_invariant_states(intel_screen_private *intel)
+{
+	Bool ivb = INTEL_INFO(intel)->gen >= 70;
+
+	OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2));
+	OUT_BATCH(BRW_PIPE_CONTROL_IS_FLUSH |
+		BRW_PIPE_CONTROL_WC_FLUSH |
+		BRW_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+		BRW_PIPE_CONTROL_NOWRITE);
+	OUT_BATCH(0); /* write address */
+	OUT_BATCH(0); /* write data */
+
+	OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+
+	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | ((ivb ? 4 : 3) - 2));
+	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
+		GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
+	OUT_BATCH(0);
+	if (ivb)
+		OUT_BATCH(0);
+
+	OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
+	OUT_BATCH(1);
+
+	/* Set system instruction pointer */
+	OUT_BATCH(BRW_STATE_SIP | 0);
+	OUT_BATCH(0);
+}
+
+void
+gen6_upload_viewport_state_pointers(intel_screen_private *intel,
+				    drm_intel_bo *cc_vp_bo)
+{
+	OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
+		GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
+		(4 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_RELOC(cc_vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+}
+
+void
+gen7_upload_viewport_state_pointers(intel_screen_private *intel,
+				    drm_intel_bo *cc_vp_bo)
+{
+	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
+	OUT_RELOC(cc_vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+
+	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
+	OUT_BATCH(0);
+}
+
+void
+gen6_upload_urb(intel_screen_private *intel)
+{
+	OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2));
+	OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
+		(24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
+	OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
+		(0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
+}
+
+/*
+ * URB layout on GEN7
+ * ----------------------------------------
+ * | PS Push Constants (8KB) | VS entries |
+ * ----------------------------------------
+ */
+void
+gen7_upload_urb(intel_screen_private *intel)
+{
+	OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
+	OUT_BATCH(8); /* in 1KBs */
+
+	OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2));
+	OUT_BATCH(
+		(32 << GEN7_URB_ENTRY_NUMBER_SHIFT) | /* at least 32 */
+		(2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
+		(1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
+
+	OUT_BATCH(GEN7_3DSTATE_URB_GS | (2 - 2));
+	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
+		(1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
+
+	OUT_BATCH(GEN7_3DSTATE_URB_HS | (2 - 2));
+	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
+		(2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
+
+	OUT_BATCH(GEN7_3DSTATE_URB_DS | (2 - 2));
+	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
+		(2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
+}
+
+void
+gen6_upload_cc_state_pointers(intel_screen_private *intel,
+			      drm_intel_bo *blend_bo,
+			      drm_intel_bo *cc_bo,
+			      drm_intel_bo *depth_stencil_bo)
+{
+	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
+	if (blend_bo)
+		OUT_RELOC(blend_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+	else
+		OUT_BATCH(0);
+
+	if (depth_stencil_bo)
+		OUT_RELOC(depth_stencil_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+	else
+		OUT_BATCH(0);
+
+	if (cc_bo)
+		OUT_RELOC(cc_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+	else
+		OUT_BATCH(0);
+}
+
+void
+gen7_upload_cc_state_pointers(intel_screen_private *intel,
+			      drm_intel_bo *blend_bo,
+			      drm_intel_bo *cc_bo,
+			      drm_intel_bo *depth_stencil_bo)
+{
+	OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
+	if (blend_bo)
+		OUT_RELOC(blend_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+	else
+		OUT_BATCH(0);
+
+	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
+	if (cc_bo)
+		OUT_RELOC(cc_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+	else
+		OUT_BATCH(0);
+
+	OUT_BATCH(GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
+	if (depth_stencil_bo)
+		OUT_RELOC(depth_stencil_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+	else
+		OUT_BATCH(0);
+}
+
+void
+gen6_upload_sampler_state_pointers(intel_screen_private *intel,
+				   drm_intel_bo *sampler_bo)
+{
+	OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
+		GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
+		(4 - 2));
+	OUT_BATCH(0); /* VS */
+	OUT_BATCH(0); /* GS */
+	OUT_RELOC(sampler_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+}
+
+void
+gen7_upload_sampler_state_pointers(intel_screen_private *intel,
+				   drm_intel_bo *sampler_bo)
+{
+	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
+	OUT_RELOC(sampler_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+}
+
+void
+gen7_upload_bypass_states(intel_screen_private *intel)
+{
+	/* bypass GS */
+	OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
+	OUT_BATCH(0); /* without GS kernel */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* pass-through */
+
+	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
+	OUT_BATCH(0);
+
+	/* disable HS */
+	OUT_BATCH(GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_BATCH(GEN7_3DSTATE_HS | (7 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
+	OUT_BATCH(0);
+
+	/* Disable TE */
+	OUT_BATCH(GEN7_3DSTATE_TE | (4 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	/* Disable DS */
+	OUT_BATCH(GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_BATCH(GEN7_3DSTATE_DS | (6 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
+	OUT_BATCH(0);
+
+	/* Disable STREAMOUT */
+	OUT_BATCH(GEN7_3DSTATE_STREAMOUT | (3 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
+
+void
+gen6_upload_vs_state(intel_screen_private *intel)
+{
+	Bool ivb = INTEL_INFO(intel)->gen >= 70;
+	/* disable VS constant buffer */
+	OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | ((ivb ? 7 : 5) - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	if (ivb) {
+		OUT_BATCH(0);
+		OUT_BATCH(0);
+	}
+
+	OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2));
+	OUT_BATCH(0); /* without VS kernel */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* pass-through */
+}
+
+void
+gen6_upload_gs_state(intel_screen_private *intel)
+{
+	/* disable GS constant buffer */
+	OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
+	OUT_BATCH(0); /* without GS kernel */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* pass-through */
+}
+
+void
+gen6_upload_clip_state(intel_screen_private *intel)
+{
+	OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* pass-through */
+	OUT_BATCH(0);
+}
+
+void
+gen6_upload_sf_state(intel_screen_private *intel)
+{
+	OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2));
+	OUT_BATCH((1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
+		(1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
+		(0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
+	OUT_BATCH(0);
+	OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
+	OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* DW9 */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* DW14 */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* DW19 */
+}
+
+void
+gen7_upload_sf_state(intel_screen_private *intel)
+{
+	OUT_BATCH(GEN7_3DSTATE_SBE | (14 - 2));
+	OUT_BATCH((1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
+		(1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
+		(0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* DW4 */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* DW9 */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_BATCH(GEN6_3DSTATE_SF | (7 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
+	OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
+
+void
+gen6_upload_binding_table(intel_screen_private *intel,
+			  uint32_t ps_binding_table_offset)
+{
+	/* Binding table pointers */
+	OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS |
+		  GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS |
+		  (4 - 2));
+	OUT_BATCH(0); /* VS */
+	OUT_BATCH(0); /* GS */
+	/* Only the PS uses the binding table */
+	OUT_BATCH(ps_binding_table_offset);
+}
+
+void
+gen7_upload_binding_table(intel_screen_private *intel,
+			  uint32_t ps_binding_table_offset)
+{
+	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
+	OUT_BATCH(ps_binding_table_offset);
+}
+
+void
+gen6_upload_depth_buffer_state(intel_screen_private *intel)
+{
+	OUT_BATCH(BRW_3DSTATE_DEPTH_BUFFER | (7 - 2));
+	OUT_BATCH((BRW_SURFACE_NULL << BRW_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT) |
+		  (BRW_DEPTHFORMAT_D32_FLOAT << BRW_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_BATCH(BRW_3DSTATE_CLEAR_PARAMS | (2 - 2));
+	OUT_BATCH(0);
+}
+
+void
+gen7_upload_depth_buffer_state(intel_screen_private *intel)
+{
+	OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
+	OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | (BRW_SURFACE_NULL << 29));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
diff --git a/src/i965_video.c b/src/i965_video.c
index 1054914..7d7ac79 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -1530,34 +1530,6 @@ gen6_create_vidoe_objects(ScrnInfoPtr scrn)
 }
 
 static void
-gen6_upload_invarient_states(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2));
-	OUT_BATCH(BRW_PIPE_CONTROL_IS_FLUSH |
-		BRW_PIPE_CONTROL_WC_FLUSH |
-		BRW_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-		BRW_PIPE_CONTROL_NOWRITE);
-	OUT_BATCH(0); /* write address */
-	OUT_BATCH(0); /* write data */
-
-	OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
-
-	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
-	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
-		GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
-	OUT_BATCH(0);
-
-	OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
-	OUT_BATCH(1);
-
-	/* Set system instruction pointer */
-	OUT_BATCH(BRW_STATE_SIP | 0);
-	OUT_BATCH(0);
-}
-
-static void
 gen6_upload_state_base_address(ScrnInfoPtr scrn, drm_intel_bo *surface_state_binding_table_bo)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
@@ -1575,88 +1547,6 @@ gen6_upload_state_base_address(ScrnInfoPtr scrn, drm_intel_bo *surface_state_bin
 }
 
 static void
-gen6_upload_viewport_state_pointers(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
-		GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
-		(4 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_RELOC(intel->video.gen4_cc_vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
-}
-
-static void
-gen6_upload_urb(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2));
-	OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
-		(24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
-	OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
-		(0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
-}
-
-static void
-gen6_upload_cc_state_pointers(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
-	OUT_RELOC(intel->video.gen6_blend_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
-	OUT_RELOC(intel->video.gen6_depth_stencil_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
-	OUT_RELOC(intel->video.gen4_cc_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
-}
-
-static void
-gen6_upload_sampler_state_pointers(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
-		GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
-		(4 - 2));
-	OUT_BATCH(0); /* VS */
-	OUT_BATCH(0); /* GS */
-	OUT_RELOC(intel->video.gen4_sampler_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
-}
-
-static void
-gen6_upload_binding_table(ScrnInfoPtr scrn, uint32_t ps_binding_table_offset)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	/* Binding table pointers */
-	OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS |
-		GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS |
-		(4 - 2));
-	OUT_BATCH(0);		/* vs */
-	OUT_BATCH(0);		/* gs */
-	/* Only the PS uses the binding table */
-	OUT_BATCH(ps_binding_table_offset);
-}
-
-static void
-gen6_upload_depth_buffer_state(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(BRW_3DSTATE_DEPTH_BUFFER | (7 - 2));
-	OUT_BATCH((BRW_SURFACE_NULL << BRW_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT) |
-		(BRW_DEPTHFORMAT_D32_FLOAT << BRW_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	OUT_BATCH(BRW_3DSTATE_CLEAR_PARAMS | (2 - 2));
-	OUT_BATCH(0);
-}
-
-static void
 gen6_upload_drawing_rectangle(ScrnInfoPtr scrn, PixmapPtr pixmap)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
@@ -1668,87 +1558,6 @@ gen6_upload_drawing_rectangle(ScrnInfoPtr scrn, PixmapPtr pixmap)
 }
 
 static void 
-gen6_upload_vs_state(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	/* disable VS constant buffer */
-	OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	
-	OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2));
-	OUT_BATCH(0); /* without VS kernel */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* pass-through */
-}
-
-static void 
-gen6_upload_gs_state(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	/* disable GS constant buffer */
-	OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	
-	OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
-	OUT_BATCH(0); /* without GS kernel */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* pass-through */
-}
-
-static void 
-gen6_upload_clip_state(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* pass-through */
-	OUT_BATCH(0);
-}
-
-static void 
-gen6_upload_sf_state(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2));
-	OUT_BATCH((1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
-		(1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
-		(0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
-	OUT_BATCH(0);
-	OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
-	OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* DW9 */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* DW14 */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* DW19 */
-}
-
-static void 
 gen6_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
@@ -1857,262 +1666,25 @@ gen6_emit_video_setup(ScrnInfoPtr scrn,
 	IntelEmitInvarientState(scrn);
 	intel->last_3d = LAST_3D_VIDEO;
 
-	gen6_upload_invarient_states(scrn);
+	gen6_upload_invariant_states(intel);
 	gen6_upload_state_base_address(scrn, surface_state_binding_table_bo);
-	gen6_upload_viewport_state_pointers(scrn);
-	gen6_upload_urb(scrn);
-	gen6_upload_cc_state_pointers(scrn);
-	gen6_upload_sampler_state_pointers(scrn);
-	gen6_upload_vs_state(scrn);
-	gen6_upload_gs_state(scrn);
-	gen6_upload_clip_state(scrn);
-	gen6_upload_sf_state(scrn);
+	gen6_upload_viewport_state_pointers(intel, intel->video.gen4_cc_vp_bo);
+	gen6_upload_urb(intel);
+	gen6_upload_cc_state_pointers(intel, intel->video.gen6_blend_bo, intel->video.gen4_cc_bo, intel->video.gen6_depth_stencil_bo);
+	gen6_upload_sampler_state_pointers(intel, intel->video.gen4_sampler_bo);
+	gen6_upload_vs_state(intel);
+	gen6_upload_gs_state(intel);
+	gen6_upload_clip_state(intel);
+	gen6_upload_sf_state(intel);
 	gen6_upload_wm_state(scrn, n_src_surf == 1 ? TRUE : FALSE);
-	gen6_upload_binding_table(scrn, (n_src_surf + 1) * SURFACE_STATE_PADDED_SIZE);
-	gen6_upload_depth_buffer_state(scrn);
+	gen6_upload_binding_table(intel, (n_src_surf + 1) * SURFACE_STATE_PADDED_SIZE);
+	gen6_upload_depth_buffer_state(intel);
 	gen6_upload_drawing_rectangle(scrn, pixmap);
 	gen6_upload_vertex_element_state(scrn);
 	gen6_upload_vertex_buffer(scrn, vertex_bo, end_address_offset);
 	gen6_upload_primitive(scrn);
 }
 
-static void
-gen7_upload_invarient_states(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2));
-	OUT_BATCH(BRW_PIPE_CONTROL_IS_FLUSH |
-		BRW_PIPE_CONTROL_WC_FLUSH |
-		BRW_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-		BRW_PIPE_CONTROL_NOWRITE);
-	OUT_BATCH(0); /* write address */
-	OUT_BATCH(0); /* write data */
-
-	OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
-
-	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
-	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
-		GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
-	OUT_BATCH(1);
-
-	/* Set system instruction pointer */
-	OUT_BATCH(BRW_STATE_SIP | 0);
-	OUT_BATCH(0);
-}
-
-static void
-gen7_upload_viewport_state_pointers(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
-	OUT_RELOC(intel->video.gen4_cc_vp_bo, 
-		I915_GEM_DOMAIN_INSTRUCTION, 0,
-		0);
-
-	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
-	OUT_BATCH(0);
-}
-
-/*
- * URB layout for Xv on GEN7 
- * ----------------------------------------
- * | PS Push Constants (8KB) | VS entries |
- * ----------------------------------------
- */
-static void
-gen7_upload_urb(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
-	OUT_BATCH(8); /* in 1KBs */
-
-	OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2));
-	OUT_BATCH(
-		(32 << GEN7_URB_ENTRY_NUMBER_SHIFT) | /* at least 32 */
-		(2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
-		(1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
-
-	OUT_BATCH(GEN7_3DSTATE_URB_GS | (2 - 2));
-	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
-		(1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
-
-	OUT_BATCH(GEN7_3DSTATE_URB_HS | (2 - 2));
-	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
-		(2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
-
-	OUT_BATCH(GEN7_3DSTATE_URB_DS | (2 - 2));
-	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
-		(2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
-}
-
-static void
-gen7_upload_cc_state_pointers(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
-	OUT_RELOC(intel->video.gen4_cc_bo,
-		I915_GEM_DOMAIN_INSTRUCTION, 0,
-		1);
-
-	OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
-	OUT_RELOC(intel->video.gen6_blend_bo,
-		I915_GEM_DOMAIN_INSTRUCTION, 0,
-		1);
-
-	OUT_BATCH(GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
-	OUT_RELOC(intel->video.gen6_depth_stencil_bo,
-		I915_GEM_DOMAIN_INSTRUCTION, 0, 
-		1);
-}
-
-static void
-gen7_upload_sampler_state_pointers(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
-	OUT_RELOC(intel->video.gen4_sampler_bo,
-		I915_GEM_DOMAIN_INSTRUCTION, 0,
-		0);
-}
-
-static void 
-gen7_upload_bypass_states(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	/* bypass GS */
-	OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
-	OUT_BATCH(0); /* without GS kernel */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* pass-through */
-
-	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
-	OUT_BATCH(0);
-
-	/* disable HS */
-	OUT_BATCH(GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	OUT_BATCH(GEN7_3DSTATE_HS | (7 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
-	OUT_BATCH(0);
-
-	/* Disable TE */
-	OUT_BATCH(GEN7_3DSTATE_TE | (4 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* Disable DS */
-	OUT_BATCH(GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	OUT_BATCH(GEN7_3DSTATE_DS | (6 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
-	OUT_BATCH(0);
-
-	/* Disable STREAMOUT */
-	OUT_BATCH(GEN7_3DSTATE_STREAMOUT | (3 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-static void 
-gen7_upload_vs_state(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	/* disable VS constant buffer */
-	OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	
-	OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2));
-	OUT_BATCH(0); /* without VS kernel */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* pass-through */
-}
-
-static void 
-gen7_upload_sf_state(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN7_3DSTATE_SBE | (14 - 2));
-	OUT_BATCH((1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
-		(1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
-		(0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* DW4 */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* DW9 */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	OUT_BATCH(GEN6_3DSTATE_SF | (7 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
-	OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
 static void 
 gen7_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed)
 {
@@ -2159,34 +1731,6 @@ gen7_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed)
 	OUT_BATCH(0); /* kernel 2 pointer */
 }
 
-static void
-gen7_upload_binding_table(ScrnInfoPtr scrn, uint32_t ps_binding_table_offset)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
-	OUT_BATCH(ps_binding_table_offset);
-}
-
-static void
-gen7_upload_depth_buffer_state(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
-	OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
-		(BRW_SURFACE_NULL << 29));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
 static void 
 gen7_upload_vertex_buffer(ScrnInfoPtr scrn, drm_intel_bo *vertex_bo, uint32_t end_address_offset)
 {
@@ -2231,19 +1775,19 @@ gen7_emit_video_setup(ScrnInfoPtr scrn,
 	IntelEmitInvarientState(scrn);
 	intel->last_3d = LAST_3D_VIDEO;
 
-	gen7_upload_invarient_states(scrn);
+	gen6_upload_invariant_states(intel);
 	gen6_upload_state_base_address(scrn, surface_state_binding_table_bo);
-	gen7_upload_viewport_state_pointers(scrn);
-	gen7_upload_urb(scrn);
-	gen7_upload_cc_state_pointers(scrn);
-	gen7_upload_sampler_state_pointers(scrn);
-	gen7_upload_bypass_states(scrn);
-	gen7_upload_vs_state(scrn);
-	gen6_upload_clip_state(scrn);
-	gen7_upload_sf_state(scrn);
+	gen7_upload_viewport_state_pointers(intel, intel->video.gen4_cc_vp_bo);
+	gen7_upload_urb(intel);
+	gen7_upload_cc_state_pointers(intel, intel->video.gen6_blend_bo, intel->video.gen4_cc_bo, intel->video.gen6_depth_stencil_bo);
+	gen7_upload_sampler_state_pointers(intel, intel->video.gen4_sampler_bo);
+	gen7_upload_bypass_states(intel);
+	gen6_upload_vs_state(intel);
+	gen6_upload_clip_state(intel);
+	gen7_upload_sf_state(intel);
 	gen7_upload_wm_state(scrn, n_src_surf == 1 ? TRUE : FALSE);
-	gen7_upload_binding_table(scrn, (n_src_surf + 1) * SURFACE_STATE_PADDED_SIZE);
-	gen7_upload_depth_buffer_state(scrn);
+	gen7_upload_binding_table(intel, (n_src_surf + 1) * SURFACE_STATE_PADDED_SIZE);
+	gen7_upload_depth_buffer_state(intel);
 	gen6_upload_drawing_rectangle(scrn, pixmap);
 	gen6_upload_vertex_element_state(scrn);
 	gen7_upload_vertex_buffer(scrn, vertex_bo, end_address_offset);
diff --git a/src/intel.h b/src/intel.h
index 6135349..3f48dd4 100644
--- a/src/intel.h
+++ b/src/intel.h
@@ -597,6 +597,35 @@ void i965_vertex_flush(intel_screen_private *intel);
 void i965_batch_flush(intel_screen_private *intel);
 void i965_batch_commit_notify(intel_screen_private *intel);
 
+/* i965_3d.c */
+void gen6_upload_invariant_states(intel_screen_private *intel);
+void gen6_upload_viewport_state_pointers(intel_screen_private *intel,
+					 drm_intel_bo *cc_vp_bo);
+void gen7_upload_viewport_state_pointers(intel_screen_private *intel,
+					 drm_intel_bo *cc_vp_bo);
+void gen6_upload_urb(intel_screen_private *intel);
+void gen7_upload_urb(intel_screen_private *intel);
+void gen6_upload_cc_state_pointers(intel_screen_private *intel,
+				   drm_intel_bo *blend_bo, drm_intel_bo *cc_bo,
+				   drm_intel_bo *depth_stencil_bo);
+void gen7_upload_cc_state_pointers(intel_screen_private *intel,
+				   drm_intel_bo *blend_bo, drm_intel_bo *cc_bo,
+				   drm_intel_bo *depth_stencil_bo);
+void gen6_upload_sampler_state_pointers(intel_screen_private *intel,
+					drm_intel_bo *sampler_bo);
+void gen7_upload_sampler_state_pointers(intel_screen_private *intel,
+					drm_intel_bo *sampler_bo);
+void gen7_upload_bypass_states(intel_screen_private *intel);
+void gen6_upload_gs_state(intel_screen_private *intel);
+void gen6_upload_vs_state(intel_screen_private *intel);
+void gen6_upload_clip_state(intel_screen_private *intel);
+void gen6_upload_sf_state(intel_screen_private *intel);
+void gen7_upload_sf_state(intel_screen_private *intel);
+void gen6_upload_binding_table(intel_screen_private *intel, uint32_t ps_binding_table_offset);
+void gen7_upload_binding_table(intel_screen_private *intel, uint32_t ps_binding_table_offset);
+void gen6_upload_depth_buffer_state(intel_screen_private *intel);
+void gen7_upload_depth_buffer_state(intel_screen_private *intel);
+
 Bool intel_transform_is_affine(PictTransformPtr t);
 Bool
 intel_get_transformed_coordinates(int x, int y, PictTransformPtr transform,
commit 54b3222658a285d26b7800bdc5f8343c918a804e
Author: Kenneth Graunke <kenneth at whitecape.org>
Date:   Wed Jul 13 22:27:37 2011 -0700

    render: Update 3DPRIMITIVE for Ivybridge.
    
    Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
    Acked-by: Eric Anholt <eric at anholt.net>
    Acked-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/i965_render.c b/src/i965_render.c
index ec10392..c9b3c7a 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -2262,11 +2262,17 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
 	i965_select_vertex_buffer(intel);
 
 	if (intel->vertex_offset == 0) {
-		OUT_BATCH(BRW_3DPRIMITIVE |
-			  BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
-			  (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
-			  (0 << 9) |
-			  4);
+		if (INTEL_INFO(intel)->gen >= 70) {
+			OUT_BATCH(BRW_3DPRIMITIVE | (7 - 2));
+			OUT_BATCH(BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
+				  _3DPRIM_RECTLIST);
+		} else {
+			OUT_BATCH(BRW_3DPRIMITIVE |
+				  BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
+				  (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
+				  (0 << 9) |
+				  4);
+		}
 		intel->vertex_offset = intel->batch_used;
 		OUT_BATCH(0);	/* vertex count, to be filled in later */
 		OUT_BATCH(intel->vertex_index);
commit 4e491a1f6d7a15009904cb79e2c0b13cf2d9d6ec
Author: Kenneth Graunke <kenneth at whitecape.org>
Date:   Wed Jul 13 22:19:21 2011 -0700

    render: Set Address Modify Enable in 3DSTATE_VERTEX_BUFFERS on Gen7.
    
    Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
    Acked-by: Eric Anholt <eric at anholt.net>
    Acked-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/i965_render.c b/src/i965_render.c
index 1dfdde4..ec10392 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -2167,10 +2167,14 @@ i965_prepare_composite(int op, PicturePtr source_picture,
 static void i965_select_vertex_buffer(struct intel_screen_private *intel)
 {
 	int id = intel->gen4_render_state->composite_op.vertex_id;
+	int modifyenable = 0;
 
 	if (intel->vertex_id & (1 << id))
 		return;
 
+	if (INTEL_INFO(intel)->gen >= 70)
+		modifyenable = GEN7_VB0_ADDRESS_MODIFYENABLE;
+
 	/* Set up the pointer to our (single) vertex buffer */
 	OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3);
 
@@ -2180,6 +2184,7 @@ static void i965_select_vertex_buffer(struct intel_screen_private *intel)
 	if (INTEL_INFO(intel)->gen >= 60) {
 		OUT_BATCH((id << GEN6_VB0_BUFFER_INDEX_SHIFT) |
 			  GEN6_VB0_VERTEXDATA |
+			  modifyenable |
 			  (4*intel->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
 	} else {
 		OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) |
commit 2787cd66f0d2907110f774392370537df63a96fa
Author: Kenneth Graunke <kenneth at whitecape.org>
Date:   Wed Jul 13 22:10:17 2011 -0700

    render: Update SAMPLER_STATE for Ivybridge.
    
    Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
    Acked-by: Eric Anholt <eric at anholt.net>
    Acked-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/i965_render.c b/src/i965_render.c
index bb3c2b7..1dfdde4 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -842,7 +842,7 @@ static drm_intel_bo *sampler_border_color_create(intel_screen_private *intel)
 }
 
 static void
-sampler_state_init(drm_intel_bo * sampler_state_bo,
+gen4_sampler_state_init(drm_intel_bo * sampler_state_bo,
 		   struct brw_sampler_state *sampler_state,
 		   sampler_state_filter_t filter,
 		   sampler_state_extend_t extend,
@@ -907,6 +907,74 @@ sampler_state_init(drm_intel_bo * sampler_state_bo,
 	sampler_state->ss3.chroma_key_enable = 0;	/* disable chromakey */
 }
 
+static void
+gen7_sampler_state_init(drm_intel_bo * sampler_state_bo,
+		   struct gen7_sampler_state *sampler_state,
+		   sampler_state_filter_t filter,
+		   sampler_state_extend_t extend,
+		   drm_intel_bo * border_color_bo)
+{
+	uint32_t sampler_state_offset;
+
+	sampler_state_offset = (char *)sampler_state -
+	    (char *)sampler_state_bo->virtual;
+
+	/* PS kernel use this sampler */
+	memset(sampler_state, 0, sizeof(*sampler_state));
+
+	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
+
+	/* We use the legacy mode to get the semantics specified by
+	 * the Render extension. */
+	sampler_state->ss0.default_color_mode = BRW_BORDER_COLOR_MODE_LEGACY;
+
+	switch (filter) {
+	default:
+	case SAMPLER_STATE_FILTER_NEAREST:
+		sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+		sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
+		break;
+	case SAMPLER_STATE_FILTER_BILINEAR:
+		sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+		sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
+		break;
+	}
+
+	switch (extend) {
+	default:
+	case SAMPLER_STATE_EXTEND_NONE:
+		sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
+		sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
+		sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
+		break;
+	case SAMPLER_STATE_EXTEND_REPEAT:
+		sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+		sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+		sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+		break;
+	case SAMPLER_STATE_EXTEND_PAD:
+		sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+		sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+		sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+		break;
+	case SAMPLER_STATE_EXTEND_REFLECT:
+		sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_MIRROR;
+		sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_MIRROR;
+		sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_MIRROR;
+		break;
+	}
+
+	sampler_state->ss2.default_color_pointer =
+	    intel_emit_reloc(sampler_state_bo, sampler_state_offset +
+			     offsetof(struct gen7_sampler_state, ss2),
+			     border_color_bo, 0,
+			     I915_GEM_DOMAIN_SAMPLER, 0) >> 5;
+
+	sampler_state->ss3.chroma_key_enable = 0;	/* disable chromakey */
+}
+
+
+
 static drm_intel_bo *gen4_create_sampler_state(intel_screen_private *intel,
 					       sampler_state_filter_t src_filter,
 					       sampler_state_extend_t src_extend,
@@ -923,18 +991,65 @@ static drm_intel_bo *gen4_create_sampler_state(intel_screen_private *intel,
 	drm_intel_bo_map(sampler_state_bo, TRUE);
 	sampler_state = sampler_state_bo->virtual;
 
-	sampler_state_init(sampler_state_bo,
-			   &sampler_state[0],
-			   src_filter, src_extend, border_color_bo);
-	sampler_state_init(sampler_state_bo,
-			   &sampler_state[1],
-			   mask_filter, mask_extend, border_color_bo);
+	gen4_sampler_state_init(sampler_state_bo,
+				&sampler_state[0],
+				src_filter, src_extend, border_color_bo);
+	gen4_sampler_state_init(sampler_state_bo,
+				&sampler_state[1],
+				mask_filter, mask_extend, border_color_bo);
+
+	drm_intel_bo_unmap(sampler_state_bo);
+
+	return sampler_state_bo;
+}
+
+static drm_intel_bo *
+gen7_create_sampler_state(intel_screen_private *intel,
+			  sampler_state_filter_t src_filter,
+			  sampler_state_extend_t src_extend,
+			  sampler_state_filter_t mask_filter,
+			  sampler_state_extend_t mask_extend,
+			  drm_intel_bo * border_color_bo)
+{
+	drm_intel_bo *sampler_state_bo;
+	struct gen7_sampler_state *sampler_state;
+
+	sampler_state_bo =
+	    drm_intel_bo_alloc(intel->bufmgr, "gen7 sampler state",
+			       sizeof(struct gen7_sampler_state) * 2, 4096);
+	drm_intel_bo_map(sampler_state_bo, TRUE);
+	sampler_state = sampler_state_bo->virtual;
+
+	gen7_sampler_state_init(sampler_state_bo,
+				&sampler_state[0],
+				src_filter, src_extend, border_color_bo);
+	gen7_sampler_state_init(sampler_state_bo,
+				&sampler_state[1],
+				mask_filter, mask_extend, border_color_bo);
 
 	drm_intel_bo_unmap(sampler_state_bo);
 
 	return sampler_state_bo;
 }
 
+static inline drm_intel_bo *
+i965_create_sampler_state(intel_screen_private *intel,
+			  sampler_state_filter_t src_filter,
+			  sampler_state_extend_t src_extend,
+			  sampler_state_filter_t mask_filter,
+			  sampler_state_extend_t mask_extend,
+			  drm_intel_bo * border_color_bo)
+{
+	if (INTEL_INFO(intel)->gen < 70)
+		return gen4_create_sampler_state(intel, src_filter, src_extend,
+						 mask_filter, mask_extend,
+						 border_color_bo);
+	return gen7_create_sampler_state(intel, src_filter, src_extend,
+					 mask_filter, mask_extend,
+					 border_color_bo);
+}
+
+
 static void
 cc_state_init(drm_intel_bo * cc_state_bo,
 	      uint32_t cc_state_offset,
@@ -2267,7 +2382,7 @@ void gen4_render_state_init(ScrnInfoPtr scrn)
 					drm_intel_bo *sampler_state_bo;
 
 					sampler_state_bo =
-					    gen4_create_sampler_state(intel,
+					    i965_create_sampler_state(intel,
 								      i, j,
 								      k, l,
 								      border_color_bo);
@@ -2852,7 +2967,7 @@ gen6_render_state_init(ScrnInfoPtr scrn)
 			for (k = 0; k < FILTER_COUNT; k++) {
 				for (l = 0; l < EXTEND_COUNT; l++) {
 					render->ps_sampler_state_bo[i][j][k][l] =
-						gen4_create_sampler_state(intel,
+						i965_create_sampler_state(intel,
 								i, j,
 								k, l,
 								border_color_bo);
commit 1a7e541d0d33727953fa69f1e29fa4eda665a0d9
Author: Kenneth Graunke <kenneth at whitecape.org>
Date:   Wed Jul 13 21:53:27 2011 -0700

    render: Update SURFACE_STATE for Ivybridge.
    
    Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
    Acked-by: Eric Anholt <eric at anholt.net>
    Acked-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/i965_render.c b/src/i965_render.c
index 5ab53c4..bb3c2b7 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -723,10 +723,10 @@ typedef struct _brw_cc_unit_state_padded {
 	char pad[64 - sizeof(struct brw_cc_unit_state)];
 } brw_cc_unit_state_padded;
 
-typedef struct brw_surface_state_padded {
-	struct brw_surface_state state;
-	char pad[32 - sizeof(struct brw_surface_state)];
-} brw_surface_state_padded;
+#ifndef MAX
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#endif
+#define SURFACE_STATE_PADDED_SIZE ALIGN(MAX(sizeof(struct brw_surface_state), sizeof(struct gen7_surface_state)), 32)
 
 struct gen4_cc_unit_state {
 	/* Index by [src_blend][dst_blend] */
@@ -1161,7 +1161,7 @@ static sampler_state_extend_t sampler_state_extend_from_picture(int repeat_type)
  * picture in the given surface state buffer.
  */
 static int
-i965_set_picture_surface_state(intel_screen_private *intel,
+gen4_set_picture_surface_state(intel_screen_private *intel,
 			       PicturePtr picture, PixmapPtr pixmap,
 			       Bool is_dst)
 {
@@ -1215,7 +1215,70 @@ i965_set_picture_surface_state(intel_screen_private *intel,
 			  priv->bo);
 
 	offset = intel->surface_used;
-	intel->surface_used += sizeof(struct brw_surface_state_padded);
+	intel->surface_used += SURFACE_STATE_PADDED_SIZE;
+
+	if (is_dst)
+		priv->dst_bound = offset;
+	else
+		priv->src_bound = offset;
+
+	return offset;
+}
+
+static int
+gen7_set_picture_surface_state(intel_screen_private *intel,
+			       PicturePtr picture, PixmapPtr pixmap,
+			       Bool is_dst)
+{
+	struct intel_pixmap *priv = intel_get_pixmap_private(pixmap);
+	struct gen7_surface_state *ss;
+	uint32_t write_domain, read_domains;
+	int offset;
+
+	if (is_dst) {
+		write_domain = I915_GEM_DOMAIN_RENDER;
+		read_domains = I915_GEM_DOMAIN_RENDER;
+	} else {
+		write_domain = 0;
+		read_domains = I915_GEM_DOMAIN_SAMPLER;
+	}
+	intel_batch_mark_pixmap_domains(intel, priv,
+					read_domains, write_domain);
+	if (is_dst) {
+		if (priv->dst_bound)
+			return priv->dst_bound;
+	} else {
+		if (priv->src_bound)
+			return priv->src_bound;
+	}
+
+	ss = (struct gen7_surface_state *)
+		(intel->surface_data + intel->surface_used);
+
+	memset(ss, 0, sizeof(*ss));
+	ss->ss0.surface_type = BRW_SURFACE_2D;
+	if (is_dst)
+		ss->ss0.surface_format = i965_get_dest_format(picture);
+	else
+		ss->ss0.surface_format = i965_get_card_format(picture);
+
+	ss->ss0.tile_walk = 0;	/* Tiled X */
+	ss->ss0.tiled_surface = intel_pixmap_tiled(pixmap) ? 1 : 0;
+	ss->ss1.base_addr = priv->bo->offset;
+
+	ss->ss2.height = pixmap->drawable.height - 1;
+	ss->ss2.width = pixmap->drawable.width - 1;
+	ss->ss3.pitch = intel_pixmap_pitch(pixmap) - 1;
+
+	dri_bo_emit_reloc(intel->surface_bo,
+			  read_domains, write_domain,
+			  0,
+			  intel->surface_used +
+			  offsetof(struct gen7_surface_state, ss1),
+			  priv->bo);
+
+	offset = intel->surface_used;
+	intel->surface_used += SURFACE_STATE_PADDED_SIZE;
 
 	if (is_dst)
 		priv->dst_bound = offset;
@@ -1225,6 +1288,16 @@ i965_set_picture_surface_state(intel_screen_private *intel,
 	return offset;
 }
 
+static inline int
+i965_set_picture_surface_state(intel_screen_private *intel,
+			       PicturePtr picture, PixmapPtr pixmap,
+			       Bool is_dst)
+{
+    if (INTEL_INFO(intel)->gen < 70)
+        return gen4_set_picture_surface_state(intel, picture, pixmap, is_dst);
+    return gen7_set_picture_surface_state(intel, picture, pixmap, is_dst);
+}
+
 static void gen4_composite_vertex_elements(struct intel_screen_private *intel)
 {
 	struct gen4_render_state *render_state = intel->gen4_render_state;
@@ -1968,7 +2041,7 @@ i965_prepare_composite(int op, PicturePtr source_picture,
 	}
 
 	if (sizeof(intel->surface_data) - intel->surface_used <
-	    4 * sizeof(struct brw_surface_state_padded))
+	    4 * SURFACE_STATE_PADDED_SIZE)
 		i965_surface_flush(intel);
 
 	intel->needs_render_state_emit = TRUE;
@@ -2014,11 +2087,11 @@ static void i965_bind_surfaces(struct intel_screen_private *intel)
 {
 	uint32_t *binding_table;
 
-	assert(intel->surface_used + 4 * sizeof(struct brw_surface_state_padded) <= sizeof(intel->surface_data));
+	assert(intel->surface_used + 4 * SURFACE_STATE_PADDED_SIZE <= sizeof(intel->surface_data));
 
 	binding_table = (uint32_t*) (intel->surface_data + intel->surface_used);
 	intel->surface_table = intel->surface_used;
-	intel->surface_used += sizeof(struct brw_surface_state_padded);
+	intel->surface_used += SURFACE_STATE_PADDED_SIZE;
 
 	binding_table[0] =
 		i965_set_picture_surface_state(intel,
commit 07cc488bcf3f7653cd54928e3cedb1f4f102c5e0
Author: Kenneth Graunke <kenneth at whitecape.org>
Date:   Sat Jul 9 01:36:52 2011 -0700

    render: New Ivybridge assembly programs for render acceleration.
    
    These are exactly the same as the ones for Sandybridge, but with message
    registers translated (hopefully) in the same way as Haihao's new
    programs (m1 == g65).
    
    Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
    Acked-by: Eric Anholt <eric at anholt.net>
    Acked-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/i965_render.c b/src/i965_render.c
index b76107d..5ab53c4 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -538,6 +538,74 @@ static const uint32_t ps_kernel_masknoca_projective_static_gen6[][4] = {
 #include "exa_wm_write.g6b"
 };
 
+/* programs for GEN7 */
+static const uint32_t ps_kernel_nomask_affine_static_gen7[][4] = {
+#include "exa_wm_src_affine.g7b"
+#include "exa_wm_src_sample_argb.g7b"
+#include "exa_wm_write.g7b"
+};
+
+static const uint32_t ps_kernel_nomask_projective_static_gen7[][4] = {
+#include "exa_wm_src_projective.g7b"
+#include "exa_wm_src_sample_argb.g7b"
+#include "exa_wm_write.g7b"
+};
+
+static const uint32_t ps_kernel_maskca_affine_static_gen7[][4] = {
+#include "exa_wm_src_affine.g7b"
+#include "exa_wm_src_sample_argb.g7b"
+#include "exa_wm_mask_affine.g7b"
+#include "exa_wm_mask_sample_argb.g7b"
+#include "exa_wm_ca.g6b"
+#include "exa_wm_write.g7b"
+};
+
+static const uint32_t ps_kernel_maskca_projective_static_gen7[][4] = {
+#include "exa_wm_src_projective.g7b"
+#include "exa_wm_src_sample_argb.g7b"
+#include "exa_wm_mask_projective.g7b"
+#include "exa_wm_mask_sample_argb.g7b"
+#include "exa_wm_ca.g4b.gen5"
+#include "exa_wm_write.g7b"
+};
+
+static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen7[][4] = {
+#include "exa_wm_src_affine.g7b"
+#include "exa_wm_src_sample_a.g7b"
+#include "exa_wm_mask_affine.g7b"
+#include "exa_wm_mask_sample_argb.g7b"
+#include "exa_wm_ca_srcalpha.g6b"
+#include "exa_wm_write.g7b"
+};
+
+static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen7[][4] = {
+#include "exa_wm_src_projective.g7b"
+#include "exa_wm_src_sample_a.g7b"
+#include "exa_wm_mask_projective.g7b"
+#include "exa_wm_mask_sample_argb.g7b"
+#include "exa_wm_ca_srcalpha.g6b"
+#include "exa_wm_write.g7b"
+};
+
+static const uint32_t ps_kernel_masknoca_affine_static_gen7[][4] = {
+#include "exa_wm_src_affine.g7b"
+#include "exa_wm_src_sample_argb.g7b"
+#include "exa_wm_mask_affine.g7b"
+#include "exa_wm_mask_sample_a.g7b"
+#include "exa_wm_noca.g6b"
+#include "exa_wm_write.g7b"
+};
+
+static const uint32_t ps_kernel_masknoca_projective_static_gen7[][4] = {
+#include "exa_wm_src_projective.g7b"
+#include "exa_wm_src_sample_argb.g7b"
+#include "exa_wm_mask_projective.g7b"
+#include "exa_wm_mask_sample_a.g7b"
+#include "exa_wm_noca.g6b"
+#include "exa_wm_write.g7b"
+};
+
+
 typedef enum {
 	SAMPLER_STATE_FILTER_NEAREST,
 	SAMPLER_STATE_FILTER_BILINEAR,
@@ -629,6 +697,25 @@ static const struct wm_kernel_info wm_kernels_gen6[] = {
 	       ps_kernel_masknoca_projective_static_gen6, TRUE),
 };
 
+static const struct wm_kernel_info wm_kernels_gen7[] = {
+	KERNEL(WM_KERNEL_NOMASK_AFFINE,
+	       ps_kernel_nomask_affine_static_gen7, FALSE),
+	KERNEL(WM_KERNEL_NOMASK_PROJECTIVE,
+	       ps_kernel_nomask_projective_static_gen7, FALSE),
+	KERNEL(WM_KERNEL_MASKCA_AFFINE,
+	       ps_kernel_maskca_affine_static_gen7, TRUE),
+	KERNEL(WM_KERNEL_MASKCA_PROJECTIVE,
+	       ps_kernel_maskca_projective_static_gen7, TRUE),
+	KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE,
+	       ps_kernel_maskca_srcalpha_affine_static_gen7, TRUE),
+	KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
+	       ps_kernel_maskca_srcalpha_projective_static_gen7, TRUE),
+	KERNEL(WM_KERNEL_MASKNOCA_AFFINE,
+	       ps_kernel_masknoca_affine_static_gen7, TRUE),
+	KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE,
+	       ps_kernel_masknoca_projective_static_gen7, TRUE),
+};
+
 #undef KERNEL
 
 typedef struct _brw_cc_unit_state_padded {
@@ -2665,6 +2752,7 @@ gen6_render_state_init(ScrnInfoPtr scrn)
 	struct gen4_render_state *render;
 	int i, j, k, l, m;
 	drm_intel_bo *border_color_bo;
+	const struct wm_kernel_info *wm_kernels;
 
 	render= intel->gen4_render_state;
 	render->composite_op.vertex_id = -1;
@@ -2675,12 +2763,13 @@ gen6_render_state_init(ScrnInfoPtr scrn)
 	intel->gen6_render_state.kernel = NULL;
 	intel->gen6_render_state.drawrect = -1;
 
+	wm_kernels = IS_GEN7(intel) ? wm_kernels_gen7 : wm_kernels_gen6;
 	for (m = 0; m < KERNEL_COUNT; m++) {
 		render->wm_kernel_bo[m] =
 			intel_bo_alloc_for_data(intel,
-					wm_kernels_gen6[m].data,
-					wm_kernels_gen6[m].size,
-					"WM kernel gen6");
+					wm_kernels[m].data,
+					wm_kernels[m].size,
+					"WM kernel gen6/7");
 	}
 
 	border_color_bo = sampler_border_color_create(intel);
diff --git a/src/render_program/Makefile.am b/src/render_program/Makefile.am
index d7045fa..c70b1f7 100644
--- a/src/render_program/Makefile.am
+++ b/src/render_program/Makefile.am
@@ -100,14 +100,26 @@ INTEL_G6B =				\
 	exa_wm_yuv_rgb.g6b
 
 INTEL_G7A =				\
+	exa_wm_mask_affine.g7a		\
+	exa_wm_mask_projective.g7a	\
+	exa_wm_mask_sample_a.g7a	\
+	exa_wm_mask_sample_argb.g7a	\
 	exa_wm_src_affine.g7a 		\
+	exa_wm_src_projective.g7a	\
+	exa_wm_src_sample_a.g7a		\
 	exa_wm_src_sample_argb.g7a 	\
 	exa_wm_src_sample_planar.g7a 	\
 	exa_wm_write.g7a 		\
 	exa_wm_yuv_rgb.g7a
 
 INTEL_G7B =				\
+	exa_wm_mask_affine.g7b		\
+	exa_wm_mask_projective.g7b	\
+	exa_wm_mask_sample_a.g7b	\
+	exa_wm_mask_sample_argb.g7b	\
 	exa_wm_src_affine.g7b 		\
+	exa_wm_src_projective.g7b	\
+	exa_wm_src_sample_a.g7b		\
 	exa_wm_src_sample_argb.g7b 	\
 	exa_wm_src_sample_planar.g7b 	\
 	exa_wm_write.g7b 		\
diff --git a/src/render_program/exa_wm_mask_affine.g7a b/src/render_program/exa_wm_mask_affine.g7a
new file mode 100644
index 0000000..4277080
--- /dev/null
+++ b/src/render_program/exa_wm_mask_affine.g7a
@@ -0,0 +1,41 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Fragment to compute src u/v values
+ */
+include(`exa_wm.g4i')
+
+define(`ul',    `g72')
+define(`uh',    `g73')
+define(`vl',    `g74')
+define(`vh',    `g75')
+
+define(`bl',    `g2.0<8,8,1>F')
+define(`bh',    `g4.0<8,8,1>F')
+
+define(`a0_a_x',`g8.0<0,1,0>F')
+define(`a0_a_y',`g8.16<0,1,0>F')
+
+include(`exa_wm_affine.g6i')
diff --git a/src/render_program/exa_wm_mask_affine.g7b b/src/render_program/exa_wm_mask_affine.g7b
new file mode 100644
index 0000000..8d72599
--- /dev/null
+++ b/src/render_program/exa_wm_mask_affine.g7b
@@ -0,0 +1,4 @@
+   { 0x0060005a, 0x290077bd, 0x00000100, 0x008d0040 },
+   { 0x0060005a, 0x292077bd, 0x00000100, 0x008d0080 },
+   { 0x0060005a, 0x294077bd, 0x00000110, 0x008d0040 },
+   { 0x0060005a, 0x296077bd, 0x00000110, 0x008d0080 },
diff --git a/src/render_program/exa_wm_mask_projective.g7a b/src/render_program/exa_wm_mask_projective.g7a
new file mode 100644
index 0000000..ba4158f
--- /dev/null
+++ b/src/render_program/exa_wm_mask_projective.g7a
@@ -0,0 +1,63 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Fragment to compute src u/v values
+ */
+include(`exa_wm.g4i')
+
+define(`u',     `g72')
+define(`ul',    `g72')
+define(`uh',    `g73')
+define(`v',     `g74')
+define(`vl',    `g74')
+define(`vh',    `g75')
+define(`w',     `mask_w')
+define(`wl',    `mask_w_0')
+define(`wh',    `mask_w_1')
+
+define(`bl',    `g2.0<8,8,1>F')
+define(`bh',    `g4.0<8,8,1>F')
+
+define(`a0_a_x',`g8.0<0,1,0>F')
+define(`a0_a_y',`g8.16<0,1,0>F')
+define(`a0_a_z',`g9.0<0,1,0>F')
+
+/* W */
+pln (8) temp_x_0<1>F a0_a_z bl { align1 }; /* pixel 0-7 */
+pln (8) temp_x_1<1>F a0_a_z bh { align1 }; /* pixel 8-15 */
+math (8) wl<1>F temp_x_0<8,8,1>F null inv { align1 };
+math (8) wh<1>F temp_x_1<8,8,1>F null inv { align1 };
+
+/* U */
+pln (8) temp_x_0<1>F a0_a_x bl { align1 }; /* pixel 0-7 */
+pln (8) temp_x_1<1>F a0_a_x bh { align1 }; /* pixel 8-15 */
+mul (8) ul<1>F temp_x_0<8,8,1>F wl<8,8,1>F { align1 };
+mul (8) uh<1>F temp_x_1<8,8,1>F wh<8,8,1>F { align1 };
+
+/* V */
+pln (8) temp_x_0<1>F a0_a_y bl { align1 }; /* pixel 0-7 */
+pln (8) temp_x_1<1>F a0_a_y bh { align1 }; /* pixel 8-15 */
+mul (8) vl<1>F temp_x_0<8,8,1>F wl<8,8,1>F { align1 };
+mul (8) vh<1>F temp_x_1<8,8,1>F wh<8,8,1>F { align1 };
diff --git a/src/render_program/exa_wm_mask_projective.g7b b/src/render_program/exa_wm_mask_projective.g7b
new file mode 100644
index 0000000..a2e9267
--- /dev/null
+++ b/src/render_program/exa_wm_mask_projective.g7b
@@ -0,0 +1,12 @@
+   { 0x0060005a, 0x23c077bd, 0x00000120, 0x008d0040 },
+   { 0x0060005a, 0x23e077bd, 0x00000120, 0x008d0080 },
+   { 0x01600038, 0x218003bd, 0x008d03c0, 0x00000000 },
+   { 0x01600038, 0x21a003bd, 0x008d03e0, 0x00000000 },
+   { 0x0060005a, 0x23c077bd, 0x00000100, 0x008d0040 },
+   { 0x0060005a, 0x23e077bd, 0x00000100, 0x008d0080 },
+   { 0x00600041, 0x290077bd, 0x008d03c0, 0x008d0180 },
+   { 0x00600041, 0x292077bd, 0x008d03e0, 0x008d01a0 },
+   { 0x0060005a, 0x23c077bd, 0x00000110, 0x008d0040 },
+   { 0x0060005a, 0x23e077bd, 0x00000110, 0x008d0080 },
+   { 0x00600041, 0x294077bd, 0x008d03c0, 0x008d0180 },
+   { 0x00600041, 0x296077bd, 0x008d03e0, 0x008d01a0 },
diff --git a/src/render_program/exa_wm_mask_sample_a.g7a b/src/render_program/exa_wm_mask_sample_a.g7a
new file mode 100644
index 0000000..a0d38e1
--- /dev/null
+++ b/src/render_program/exa_wm_mask_sample_a.g7a
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Wang Zhenyu <zhenyu.z.wang at intel.com>
+ *    Keith Packard <keithp at keithp.com>
+ */
+
+/* Sample the mask surface */
+
+include(`exa_wm.g4i')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+
+/* load only alpha */
+mov (1) g0.8<1>UD	0x00007000UD { align1 mask_disable };
+mov (8) g71<1>UD g0<8,8,1>UD { align1 }; /* copy to msg start reg*/
+
+/* g71 will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) 71		/* msg reg index */
+	mask_sample_a_01<1>UW 	/* readback */
+	null
+	sampler (2,1,F)		/* sampler message description, (binding_table,sampler_index,datatype)
+				/* here(src->dst) we should use src_sampler and src_surface */
+	mlen 5 rlen 2 { align1 };   /* required message len 5, readback len 8 */
+
diff --git a/src/render_program/exa_wm_mask_sample_a.g7b b/src/render_program/exa_wm_mask_sample_a.g7b
new file mode 100644
index 0000000..fa36a59
--- /dev/null
+++ b/src/render_program/exa_wm_mask_sample_a.g7b
@@ -0,0 +1,3 @@
+   { 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
+   { 0x00600001, 0x28e00021, 0x008d0000, 0x00000000 },
+   { 0x02800031, 0x23801ca9, 0x000008e0, 0x0a2c0102 },
diff --git a/src/render_program/exa_wm_mask_sample_argb.g7a b/src/render_program/exa_wm_mask_sample_argb.g7a
new file mode 100644
index 0000000..984b622
--- /dev/null
+++ b/src/render_program/exa_wm_mask_sample_argb.g7a
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Wang Zhenyu <zhenyu.z.wang at intel.com>
+ *    Keith Packard <keithp at keithp.com>
+ */
+
+/* Sample the mask surface */
+
+include(`exa_wm.g4i')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+
+/* load argb */
+mov (1) g0.8<1>UD	0x00000000UD { align1 mask_disable };
+mov (8) g71<1>UD g0<8,8,1>UD { align1 }; /* copy to msg start reg*/
+
+/* g71 will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) 71		/* msg reg index */
+	mask_sample_base<1>UW 	/* readback */
+	null
+	sampler (2,1,F)		/* sampler message description, (binding_table,sampler_index,datatype)
+				/* here(src->dst) we should use src_sampler and src_surface */
+	mlen 5 rlen 8 { align1 };   /* required message len 5, readback len 8 */
+
diff --git a/src/render_program/exa_wm_mask_sample_argb.g7b b/src/render_program/exa_wm_mask_sample_argb.g7b
new file mode 100644
index 0000000..01edf7d
--- /dev/null
+++ b/src/render_program/exa_wm_mask_sample_argb.g7b
@@ -0,0 +1,3 @@
+   { 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28e00021, 0x008d0000, 0x00000000 },
+   { 0x02800031, 0x22c01ca9, 0x000008e0, 0x0a8c0102 },
diff --git a/src/render_program/exa_wm_src_projective.g7a b/src/render_program/exa_wm_src_projective.g7a
new file mode 100644
index 0000000..9fd495c
--- /dev/null
+++ b/src/render_program/exa_wm_src_projective.g7a
@@ -0,0 +1,63 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Fragment to compute src u/v values
+ */
+include(`exa_wm.g4i')
+
+define(`u',     `g66')
+define(`ul',    `g66')
+define(`uh',    `g67')
+define(`v',     `src_v')
+define(`vl',    `src_v')
+define(`vh',    `g69')
+define(`w',     `src_w')
+define(`wl',    `src_w_0')
+define(`wh',    `src_w_1')
+
+define(`bl',    `g2.0<8,8,1>F')
+define(`bh',    `g4.0<8,8,1>F')
+
+define(`a0_a_x',`g6.0<0,1,0>F')
+define(`a0_a_y',`g6.16<0,1,0>F')
+define(`a0_a_z',`g7.0<0,1,0>F')
+
+/* W */
+pln (8) temp_x_0<1>F a0_a_z bl { align1 }; /* pixel 0-7 */
+pln (8) temp_x_1<1>F a0_a_z bh { align1 }; /* pixel 8-15 */
+math (8) wl<1>F temp_x_0<8,8,1>F null inv { align1 };
+math (8) wh<1>F temp_x_1<8,8,1>F null inv { align1 };
+
+/* U */
+pln (8) temp_x_0<1>F a0_a_x bl { align1 }; /* pixel 0-7 */
+pln (8) temp_x_1<1>F a0_a_x bh { align1 }; /* pixel 8-15 */
+mul (8) ul<1>F temp_x_0<8,8,1>F wl<8,8,1>F { align1 };
+mul (8) uh<1>F temp_x_1<8,8,1>F wh<8,8,1>F { align1 };
+
+/* V */
+pln (8) temp_x_0<1>F a0_a_y bl { align1 }; /* pixel 0-7 */
+pln (8) temp_x_1<1>F a0_a_y bh { align1 }; /* pixel 8-15 */
+mul (8) vl<1>F temp_x_0<8,8,1>F wl<8,8,1>F { align1 };
+mul (8) vh<1>F temp_x_1<8,8,1>F wh<8,8,1>F { align1 };
diff --git a/src/render_program/exa_wm_src_projective.g7b b/src/render_program/exa_wm_src_projective.g7b
new file mode 100644
index 0000000..73727ff
--- /dev/null
+++ b/src/render_program/exa_wm_src_projective.g7b
@@ -0,0 +1,12 @@
+   { 0x0060005a, 0x23c077bd, 0x000000e0, 0x008d0040 },
+   { 0x0060005a, 0x23e077bd, 0x000000e0, 0x008d0080 },
+   { 0x01600038, 0x218003bd, 0x008d03c0, 0x00000000 },
+   { 0x01600038, 0x21a003bd, 0x008d03e0, 0x00000000 },
+   { 0x0060005a, 0x23c077bd, 0x000000c0, 0x008d0040 },
+   { 0x0060005a, 0x23e077bd, 0x000000c0, 0x008d0080 },
+   { 0x00600041, 0x284077bd, 0x008d03c0, 0x008d0180 },
+   { 0x00600041, 0x286077bd, 0x008d03e0, 0x008d01a0 },
+   { 0x0060005a, 0x23c077bd, 0x000000d0, 0x008d0040 },
+   { 0x0060005a, 0x23e077bd, 0x000000d0, 0x008d0080 },
+   { 0x00600041, 0x208077be, 0x008d03c0, 0x008d0180 },
+   { 0x00600041, 0x28a077bd, 0x008d03e0, 0x008d01a0 },
diff --git a/src/render_program/exa_wm_src_sample_a.g7a b/src/render_program/exa_wm_src_sample_a.g7a
new file mode 100644
index 0000000..d5d24fd
--- /dev/null
+++ b/src/render_program/exa_wm_src_sample_a.g7a
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Wang Zhenyu <zhenyu.z.wang at intel.com>
+ *    Keith Packard <keithp at keithp.com>
+ */
+
+/* Sample the src surface */
+
+include(`exa_wm.g4i')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+
+/* load alpha */
+mov (1) g0.8<1>UD	0x00007000UD { align1 mask_disable };
+mov (8) g65<1>UD g0<8,8,1>UD { align1 }; /* copy to msg start reg*/
+
+/* g65 will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) 65		/* msg reg index */
+	src_sample_a_01<1>UW 	/* readback */
+	null
+	sampler (1,0,F)		/* sampler message description, (binding_table,sampler_index,datatype)
+				/* here(src->dst) we should use src_sampler and src_surface */
+	mlen 5 rlen 2 { align1 };   /* required message len 5, readback len 8 */
diff --git a/src/render_program/exa_wm_src_sample_a.g7b b/src/render_program/exa_wm_src_sample_a.g7b
new file mode 100644
index 0000000..73912b7
--- /dev/null
+++ b/src/render_program/exa_wm_src_sample_a.g7b
@@ -0,0 +1,3 @@
+   { 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
+   { 0x00600001, 0x28200021, 0x008d0000, 0x00000000 },
+   { 0x02800031, 0x22801ca9, 0x00000820, 0x0a2c0001 },


More information about the xorg-commit mailing list