xf86-video-intel: 4 commits - src/sna/gen2_render.c src/sna/sna_blt.c src/sna/sna_gradient.c src/sna/sna_render.c src/sna/sna_render.h

Mon Jan 9 15:28:08 PST 2012

src/sna/gen2_render.c  |  409 +++++++++++++++++++++++++++++++++++++------------
 src/sna/sna_blt.c      |   26 ++-
 src/sna/sna_gradient.c |    2 
 src/sna/sna_render.c   |    4 
 src/sna/sna_render.h   |    6 
 5 files changed, 340 insertions(+), 107 deletions(-)

New commits:
commit b76865fa3deff2f44a1158914a124b9c81d67eca
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Jan 9 22:11:21 2012 +0000

    sna/gen2: Try to avoid creating a bo for solid colours
    
    As we try to use the diffuse/specular and only resort to using a texture
    operation for convenience in the rare case of a solid mask.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index 2566be3..8f6a164 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -1100,12 +1100,12 @@ gen2_composite_solid_init(struct sna *sna,
 	channel->height = 1;
 	channel->pict_format = PICT_a8r8g8b8;
 
-	channel->bo = sna_render_get_solid(sna, color);
+	channel->bo = NULL;
 	channel->u.gen2.pixel = color;
 
 	channel->scale[0]  = channel->scale[1]  = 1;
 	channel->offset[0] = channel->offset[1] = 0;
-	return channel->bo != NULL;
+	return TRUE;
 }
 
 #define xFixedToDouble(f) pixman_fixed_to_double(f)
@@ -1738,6 +1738,10 @@ gen2_render_composite(struct sna *sna,
 				tmp->op = PictOpOutReverse;
 			}
 		}
+
+		/* convert solid to a texture (pure convenience) */
+		if (tmp->mask.is_solid)
+			tmp->mask.bo = sna_render_get_solid(sna, tmp->mask.u.gen2.pixel);
 	}
 
 	tmp->floats_per_vertex = 2;
commit 981aae104a96b41db88cc381cc7592818f3e1298
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Jan 9 22:02:40 2012 +0000

    sna/gen2: Eliminate some switching between logic op and blend
    
    If the new mode can be done either using a logic op or with the blend
    unit, prefer the currently enabled unit.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index 674b41e..2566be3 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -377,9 +377,7 @@ gen2_get_blend_factors(const struct sna_composite_op *op,
 			ablend |= TB0A_ARG2_SEL_TEXEL1;
 		}
 
-		if (op->dst.format == PICT_a8 ||
-		    !op->has_component_alpha ||
-		    PICT_FORMAT_RGB(op->mask.pict_format) == 0)
+		if (op->dst.format == PICT_a8 || !op->has_component_alpha)
 			cblend |= TB0C_ARG2_REPLICATE_ALPHA;
 
 		cblend |= TB0C_OP_MODULATE;
@@ -399,6 +397,9 @@ static uint32_t gen2_get_blend_cntl(int op,
 {
 	uint32_t sblend, dblend;
 
+	if (op <= PictOpSrc)
+		return S8_ENABLE_COLOR_BUFFER_WRITE;
+
 	sblend = gen2_blend_op[op].src_blend;
 	dblend = gen2_blend_op[op].dst_blend;
 
@@ -424,7 +425,9 @@ static uint32_t gen2_get_blend_cntl(int op,
 	}
 
 	return (sblend << S8_SRC_BLEND_FACTOR_SHIFT |
-		dblend << S8_DST_BLEND_FACTOR_SHIFT);
+		dblend << S8_DST_BLEND_FACTOR_SHIFT |
+		S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD |
+		S8_ENABLE_COLOR_BUFFER_WRITE);
 }
 
 static void gen2_emit_invariant(struct sna *sna)
@@ -567,6 +570,8 @@ static void gen2_disable_logic_op(struct sna *sna)
 	if (!sna->render_state.gen2.logic_op_enabled)
 		return;
 
+	DBG(("%s\n", __FUNCTION__));
+
 	BATCH(_3DSTATE_ENABLES_1_CMD |
 	      DISABLE_LOGIC_OP | ENABLE_COLOR_BLEND);
 
@@ -595,9 +600,15 @@ static void gen2_enable_logic_op(struct sna *sna, int op)
 	};
 
 	if (sna->render_state.gen2.logic_op_enabled != op+1) {
-		if (!sna->render_state.gen2.logic_op_enabled)
+		if (!sna->render_state.gen2.logic_op_enabled) {
+			if (op == GXclear || op == GXcopy)
+				return;
+
+			DBG(("%s\n", __FUNCTION__));
+
 			BATCH(_3DSTATE_ENABLES_1_CMD |
 			      ENABLE_LOGIC_OP | DISABLE_COLOR_BLEND);
+		}
 
 		BATCH(_3DSTATE_MODES_4_CMD |
 		      ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(logic_op[op]));
@@ -620,14 +631,12 @@ static void gen2_emit_composite_state(struct sna *sna,
 	      I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
 	BATCH((!op->src.is_solid + (op->mask.bo != NULL)) << 12);
 	BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
-	BATCH(S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD |
-	      gen2_get_blend_cntl(op->op,
+	BATCH(gen2_get_blend_cntl(op->op,
 				  op->has_component_alpha,
-				  op->dst.format) |
-	      S8_ENABLE_COLOR_BUFFER_WRITE);
-	if (memcmp (sna->kgem.batch + sna->render_state.gen2.ls1,
-		    sna->kgem.batch + unwind,
-		    4 * sizeof(uint32_t)) == 0)
+				  op->dst.format));
+	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
+		   sna->kgem.batch + unwind + 1,
+		   3 * sizeof(uint32_t)) == 0)
 		sna->kgem.nbatch = unwind;
 	else
 		sna->render_state.gen2.ls1 = unwind;
@@ -640,9 +649,9 @@ static void gen2_emit_composite_state(struct sna *sna,
 	      LOAD_TEXTURE_BLEND_STAGE(0) | 1);
 	BATCH(cblend);
 	BATCH(ablend);
-	if (memcmp (sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
-		    sna->kgem.batch + unwind + 1,
-		    2 * sizeof(uint32_t)) == 0)
+	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
+		   sna->kgem.batch + unwind + 1,
+		   2 * sizeof(uint32_t)) == 0)
 		sna->kgem.nbatch = unwind;
 	else
 		sna->render_state.gen2.ls2 = unwind;
@@ -899,9 +908,16 @@ static void gen2_magic_ca_pass(struct sna *sna,
 	if (!op->need_magic_ca_pass)
 		return;
 
+	DBG(("%s: batch=%x, vertex=%x\n", __FUNCTION__,
+	     sna->kgem.nbatch, sna->render_state.gen2.vertex_offset));
+
+	assert(op->mask.bo);
+	assert(op->has_component_alpha);
+
 	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(8) | 0);
-	BATCH(S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD |
-	      gen2_get_blend_cntl(PictOpAdd, TRUE, op->dst.format) |
+	BATCH(BLENDFACTOR_ONE << S8_SRC_BLEND_FACTOR_SHIFT |
+	      BLENDFACTOR_ONE << S8_DST_BLEND_FACTOR_SHIFT |
+	      S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD |
 	      S8_ENABLE_COLOR_BUFFER_WRITE);
 	sna->render_state.gen2.ls1 = 0;
 
@@ -1978,9 +1994,9 @@ gen2_emit_spans_pipeline(struct sna *sna,
 	      LOAD_TEXTURE_BLEND_STAGE(0) | 1);
 	BATCH(cblend);
 	BATCH(ablend);
-	if (memcmp (sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
-		    sna->kgem.batch + unwind + 1,
-		    2 * sizeof(uint32_t)) == 0)
+	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
+		   sna->kgem.batch + unwind + 1,
+		   2 * sizeof(uint32_t)) == 0)
 		sna->kgem.nbatch = unwind;
 	else
 		sna->render_state.gen2.ls2 = unwind;
@@ -1999,12 +2015,10 @@ static void gen2_emit_composite_spans_state(struct sna *sna,
 	      I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
 	BATCH(!op->base.src.is_solid << 12);
 	BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY | S3_DIFFUSE_PRESENT);
-	BATCH(S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD |
-	      gen2_get_blend_cntl(op->base.op, FALSE, op->base.dst.format) |
-	      S8_ENABLE_COLOR_BUFFER_WRITE);
-	if (memcmp (sna->kgem.batch + sna->render_state.gen2.ls1,
-		    sna->kgem.batch + unwind,
-		    4 * sizeof(uint32_t)) == 0)
+	BATCH(gen2_get_blend_cntl(op->base.op, FALSE, op->base.dst.format));
+	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
+		   sna->kgem.batch + unwind + 1,
+		   3 * sizeof(uint32_t)) == 0)
 		sna->kgem.nbatch = unwind;
 	else
 		sna->render_state.gen2.ls1 = unwind;
@@ -2224,9 +2238,9 @@ gen2_emit_fill_pipeline(struct sna *sna, const struct sna_composite_op *op)
 	      TB0A_ARG1_SEL_DIFFUSE |
 	      TB0A_OUTPUT_WRITE_CURRENT);
 
-	if (memcmp (sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
-		    sna->kgem.batch + unwind + 1,
-		    2 * sizeof(uint32_t)) == 0)
+	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
+		   sna->kgem.batch + unwind + 1,
+		   2 * sizeof(uint32_t)) == 0)
 		sna->kgem.nbatch = unwind;
 	else
 		sna->render_state.gen2.ls2 = unwind;
@@ -2246,12 +2260,10 @@ static void gen2_emit_fill_composite_state(struct sna *sna,
 	      I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
 	BATCH(0);
 	BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
-	BATCH(S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD |
-	      gen2_get_blend_cntl(op->op, FALSE, op->dst.format) |
-	      S8_ENABLE_COLOR_BUFFER_WRITE);
-	if (memcmp (sna->kgem.batch + sna->render_state.gen2.ls1,
-		    sna->kgem.batch + ls1,
-		    4 * sizeof(uint32_t)) == 0)
+	BATCH(gen2_get_blend_cntl(op->op, FALSE, op->dst.format));
+	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
+		   sna->kgem.batch + ls1 + 1,
+		   3 * sizeof(uint32_t)) == 0)
 		sna->kgem.nbatch = ls1;
 	else
 		sna->render_state.gen2.ls1 = ls1;
@@ -2428,9 +2440,9 @@ static void gen2_emit_fill_state(struct sna *sna,
 	BATCH(0);
 	BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
 	BATCH(S8_ENABLE_COLOR_BUFFER_WRITE);
-	if (memcmp (sna->kgem.batch + sna->render_state.gen2.ls1,
-		    sna->kgem.batch + ls1,
-		    4 * sizeof(uint32_t)) == 0)
+	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
+		   sna->kgem.batch + ls1 + 1,
+		   3 * sizeof(uint32_t)) == 0)
 		sna->kgem.nbatch = ls1;
 	else
 		sna->render_state.gen2.ls1 = ls1;
@@ -2700,9 +2712,9 @@ gen2_emit_copy_pipeline(struct sna *sna, const struct sna_composite_op *op)
 		blend |= TB0A_ARG1_SEL_TEXEL0;
 	BATCH(blend);
 
-	if (memcmp (sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
-		    sna->kgem.batch + unwind + 1,
-		    2 * sizeof(uint32_t)) == 0)
+	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
+		   sna->kgem.batch + unwind + 1,
+		   2 * sizeof(uint32_t)) == 0)
 		sna->kgem.nbatch = unwind;
 	else
 		sna->render_state.gen2.ls2 = unwind;
@@ -2721,9 +2733,9 @@ static void gen2_emit_copy_state(struct sna *sna, const struct sna_composite_op
 	BATCH(1<<12);
 	BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
 	BATCH(S8_ENABLE_COLOR_BUFFER_WRITE);
-	if (memcmp (sna->kgem.batch + sna->render_state.gen2.ls1,
-		    sna->kgem.batch + ls1,
-		    4 * sizeof(uint32_t)) == 0)
+	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
+		   sna->kgem.batch + ls1 + 1,
+		   3 * sizeof(uint32_t)) == 0)
 		sna->kgem.nbatch = ls1;
 	else
 		sna->render_state.gen2.ls1 = ls1;
@@ -2942,7 +2954,7 @@ static void
 gen2_render_reset(struct sna *sna)
 {
 	sna->render_state.gen2.need_invariant = TRUE;
-	sna->render_state.gen2.logic_op_enabled = FALSE;
+	sna->render_state.gen2.logic_op_enabled = 0;
 	sna->render_state.gen2.vertex_offset = 0;
 	sna->render_state.gen2.target = 0;
 
diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index c4d8a58..d39aa16 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -466,10 +466,10 @@ sna_render_pixmap_bo(struct sna *sna,
 	BoxRec box;
 
 	DBG(("%s (%d, %d)x(%d, %d)/(%d, %d)\n", __FUNCTION__,
-	     x, y, w,h, pixmap->drawable.height, pixmap->drawable.width));
+	     x, y, w,h, pixmap->drawable.width, pixmap->drawable.height));
 
-	channel->height = pixmap->drawable.height;
 	channel->width  = pixmap->drawable.width;
+	channel->height = pixmap->drawable.height;
 	channel->scale[0] = 1.f / pixmap->drawable.width;
 	channel->scale[1] = 1.f / pixmap->drawable.height;
 	channel->offset[0] = x - dst_x;
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index dfaa606..f780428 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -280,7 +280,7 @@ struct sna_render {
 struct gen2_render_state {
 	uint32_t target;
 	Bool need_invariant;
-	Bool logic_op_enabled;
+	uint32_t logic_op_enabled;
 	uint32_t ls1, ls2, vft;
 	uint32_t diffuse;
 	uint32_t specular;
commit d65b7f9cf46a48e3bfb37c0b75df55aa1e7bff41
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Jan 9 21:58:03 2012 +0000

    sna/blt: Rearrange to reduce a out-of-bounds copy to a clear
    
    If we asked to use the BLT, try to avoid trigging a context switch for
    a trivial case where we sample outside of a NONE source and so can
    reduce the operation to a clear.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index dfc4b43..64fcd06 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -1595,6 +1595,24 @@ sna_blt_composite(struct sna *sna,
 		return FALSE;
 	}
 
+	if (!sna_transform_is_integer_translation(src->transform, &tx, &ty)) {
+		DBG(("%s: source transform is not an integer translation\n",
+		     __FUNCTION__));
+		return FALSE;
+	}
+	x += tx;
+	y += ty;
+
+	if ((x > src->pDrawable->width ||
+	     y > src->pDrawable->height ||
+	     x + width < 0 ||
+	     y + height < 0) &&
+	    (!src->repeat || src->repeatType == RepeatNone)) {
+		DBG(("%s: source is outside of valid area, converting to clear\n",
+		     __FUNCTION__));
+		return prepare_blt_clear(sna, tmp);
+	}
+
 	alpha_fixup = 0;
 	if (!(dst->format == src_format ||
 	      dst->format == alphaless(src_format) ||
@@ -1607,14 +1625,6 @@ sna_blt_composite(struct sna *sna,
 		return FALSE;
 	}
 
-	if (!sna_transform_is_integer_translation(src->transform, &tx, &ty)) {
-		DBG(("%s: source transform is not an integer translation\n",
-		     __FUNCTION__));
-		return FALSE;
-	}
-	x += tx;
-	y += ty;
-
 	/* XXX tiling? */
 	if (x < 0 || y < 0 ||
 	    x + width > src->pDrawable->width ||
commit 09e54c553680cbc2f3b4319cdab0f3d1af1574a8
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Jan 9 14:04:41 2012 +0000

    sna/gen2: Add poor-man's linear gradient support
    
    Convert the linear gradient to a texture ramp and compute the texture
    coordinates in the standard manner.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index 9771693..674b41e 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -60,19 +60,6 @@
 #define BATCH_F(v) batch_emit_float(sna, v)
 #define VERTEX(v) batch_emit_float(sna, v)
 
-/* TODO: Remaining items for the sufficiently motivated reader
- *
- * - Linear gradients (radial do require pixel shaders)
- *   - generate 1-d ramp for texture
- *   - compute 1-d texture coordinate using a linear projection matrix
- *   - issues? 1-stop, degenerate, fallback.
- *
- * - vmap
- *   - the texture sampler can use any type of memory apparently.
- *
- * - memory compaction?
- */
-
 static const struct blendinfo {
 	Bool dst_alpha;
 	Bool src_alpha;
@@ -262,9 +249,9 @@ gen2_emit_texture(struct sna *sna,
 		  const struct sna_composite_channel *channel,
 		  int unit)
 {
-	uint32_t filter;
-	uint32_t wrap_mode;
+	uint32_t wrap_mode_u, wrap_mode_v;
 	uint32_t texcoordtype;
+	uint32_t filter;
 
 	if (channel->is_affine)
 		texcoordtype = TEXCOORDTYPE_CARTESIAN;
@@ -275,18 +262,22 @@ gen2_emit_texture(struct sna *sna,
 	default:
 		assert(0);
 	case RepeatNone:
-		wrap_mode = TEXCOORDMODE_CLAMP_BORDER;
+		wrap_mode_u = TEXCOORDMODE_CLAMP_BORDER;
 		break;
 	case RepeatNormal:
-		wrap_mode = TEXCOORDMODE_WRAP;
+		wrap_mode_u = TEXCOORDMODE_WRAP;
 		break;
 	case RepeatPad:
-		wrap_mode = TEXCOORDMODE_CLAMP;
+		wrap_mode_u = TEXCOORDMODE_CLAMP;
 		break;
 	case RepeatReflect:
-		wrap_mode = TEXCOORDMODE_MIRROR;
+		wrap_mode_u = TEXCOORDMODE_MIRROR;
 		break;
 	}
+	if (channel->is_linear)
+		wrap_mode_v = TEXCOORDMODE_WRAP;
+	else
+		wrap_mode_v = wrap_mode_u;
 
 	switch (channel->filter) {
 	default:
@@ -309,7 +300,7 @@ gen2_emit_texture(struct sna *sna,
 			     I915_GEM_DOMAIN_SAMPLER << 16,
 			     0));
 	BATCH(((channel->height - 1) << TM0S1_HEIGHT_SHIFT) |
-	      ((channel->width - 1) << TM0S1_WIDTH_SHIFT) |
+	      ((channel->width - 1)  << TM0S1_WIDTH_SHIFT) |
 	      gen2_get_card_format(sna, channel->pict_format) |
 	      gen2_sampler_tiling_bits(channel->bo->tiling));
 	BATCH((channel->bo->pitch / 4 - 1) << TM0S2_PITCH_SHIFT | TM0S2_MAP_2D);
@@ -317,10 +308,9 @@ gen2_emit_texture(struct sna *sna,
 	BATCH(0);	/* default color */
 
 	BATCH(_3DSTATE_MAP_COORD_SET_CMD | TEXCOORD_SET(unit) |
-	      ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL |
-	      texcoordtype |
-	      ENABLE_ADDR_V_CNTL | TEXCOORD_ADDR_V_MODE(wrap_mode) |
-	      ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(wrap_mode));
+	      ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL | texcoordtype |
+	      ENABLE_ADDR_V_CNTL | TEXCOORD_ADDR_V_MODE(wrap_mode_v) |
+	      ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(wrap_mode_u));
 }
 
 static void
@@ -693,6 +683,21 @@ gen2_emit_composite_dstcoord(struct sna *sna, int dstX, int dstY)
 	VERTEX(dstY);
 }
 
+inline static void
+gen2_emit_composite_linear(struct sna *sna,
+			   const struct sna_composite_channel *channel,
+			   int16_t x, int16_t y)
+{
+	float v;
+
+	v = (x * channel->u.gen2.linear_dx +
+	     y * channel->u.gen2.linear_dy +
+	     channel->u.gen2.linear_offset);
+	DBG(("%s: (%d, %d) -> %f\n", __FUNCTION__, x, y, v));
+	VERTEX(v);
+	VERTEX(v);
+}
+
 static void
 gen2_emit_composite_texcoord(struct sna *sna,
 			     const struct sna_composite_channel *channel,
@@ -727,9 +732,14 @@ gen2_emit_composite_vertex(struct sna *sna,
 			   int16_t dstX, int16_t dstY)
 {
 	gen2_emit_composite_dstcoord(sna, dstX, dstY);
-	if (!op->src.is_solid)
+	if (op->src.is_linear)
+		gen2_emit_composite_linear(sna, &op->src, srcX, srcY);
+	else if (!op->src.is_solid)
 		gen2_emit_composite_texcoord(sna, &op->src, srcX, srcY);
-	if (op->mask.bo)
+
+	if (op->mask.is_linear)
+		gen2_emit_composite_linear(sna, &op->mask, mskX, mskY);
+	else if (op->mask.bo)
 		gen2_emit_composite_texcoord(sna, &op->mask, mskX, mskY);
 }
 
@@ -775,6 +785,27 @@ gen2_emit_composite_primitive_constant(struct sna *sna,
 }
 
 fastcall static void
+gen2_emit_composite_primitive_linear(struct sna *sna,
+				       const struct sna_composite_op *op,
+				       const struct sna_composite_rectangles *r)
+{
+	int16_t dst_x = r->dst.x + op->dst.x;
+	int16_t dst_y = r->dst.y + op->dst.y;
+
+	gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
+	gen2_emit_composite_linear(sna, &op->src,
+				   r->src.x + r->width, r->src.y + r->height);
+
+	gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
+	gen2_emit_composite_linear(sna, &op->src,
+				   r->src.x, r->src.y + r->height);
+
+	gen2_emit_composite_dstcoord(sna, dst_x, dst_y);
+	gen2_emit_composite_linear(sna, &op->src,
+				   r->src.x, r->src.y);
+}
+
+fastcall static void
 gen2_emit_composite_primitive_identity(struct sna *sna,
 				       const struct sna_composite_op *op,
 				       const struct sna_composite_rectangles *r)
@@ -862,7 +893,8 @@ gen2_emit_composite_primitive_constant_identity_mask(struct sna *sna,
 static void gen2_magic_ca_pass(struct sna *sna,
 			       const struct sna_composite_op *op)
 {
-	uint32_t ablend, cblend;
+	uint32_t ablend, cblend, *src, *dst;
+	int n;
 
 	if (!op->need_magic_ca_pass)
 		return;
@@ -880,10 +912,12 @@ static void gen2_magic_ca_pass(struct sna *sna,
 	BATCH(ablend);
 	sna->render_state.gen2.ls2 = 0;
 
-	memcpy(sna->kgem.batch + sna->kgem.nbatch,
-	       sna->kgem.batch + sna->render_state.gen2.vertex_offset,
-	       (1 + sna->render.vertex_index)*sizeof(uint32_t));
-	sna->kgem.nbatch += 1 + sna->render.vertex_index;
+	src = sna->kgem.batch + sna->render_state.gen2.vertex_offset;
+	dst = sna->kgem.batch + sna->kgem.nbatch;
+	n = 1 + sna->render.vertex_index;
+	sna->kgem.nbatch += n;
+	while (n--)
+		*dst++ = *src++;
 }
 
 static void gen2_vertex_flush(struct sna *sna)
@@ -1044,6 +1078,7 @@ gen2_composite_solid_init(struct sna *sna,
 	channel->repeat = RepeatNormal;
 	channel->is_affine = TRUE;
 	channel->is_solid  = TRUE;
+	channel->is_linear = FALSE;
 	channel->transform = NULL;
 	channel->width  = 1;
 	channel->height = 1;
@@ -1057,6 +1092,118 @@ gen2_composite_solid_init(struct sna *sna,
 	return channel->bo != NULL;
 }
 
+#define xFixedToDouble(f) pixman_fixed_to_double(f)
+
+static Bool
+gen2_composite_linear_init(struct sna *sna,
+			   PicturePtr picture,
+			   struct sna_composite_channel *channel,
+			   int x, int y,
+			   int w, int h,
+			   int dst_x, int dst_y)
+{
+	PictLinearGradient *linear =
+		(PictLinearGradient *)picture->pSourcePict;
+	pixman_fixed_t tx, ty;
+	float x0, y0, sf;
+	float dx, dy;
+
+	DBG(("%s: p1=(%f, %f), p2=(%f, %f)\n",
+	     __FUNCTION__,
+	     xFixedToDouble(linear->p1.x), xFixedToDouble(linear->p1.y),
+	     xFixedToDouble(linear->p2.x), xFixedToDouble(linear->p2.y)));
+
+	if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
+		return 0;
+
+	if (!sna_transform_is_affine(picture->transform)) {
+		DBG(("%s: fallback due to projective transform\n",
+		     __FUNCTION__));
+		return sna_render_picture_fixup(sna, picture, channel,
+						x, y, w, h, dst_x, dst_y);
+	}
+
+	channel->bo = sna_render_get_gradient(sna, (PictGradient *)linear);
+	if (!channel->bo)
+		return 0;
+
+	channel->filter = PictFilterNearest;
+	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
+	channel->is_affine = TRUE;
+	channel->is_opaque = FALSE;
+	channel->is_solid  = FALSE;
+	channel->is_linear = TRUE;
+	channel->transform = NULL;
+	channel->width  = channel->bo->pitch / 4;
+	channel->height = 1;
+	channel->pict_format = PICT_a8r8g8b8;
+
+	channel->scale[0]  = channel->scale[1]  = 1;
+	channel->offset[0] = channel->offset[1] = 0;
+
+	if (sna_transform_is_translation(picture->transform, &tx, &ty)) {
+		dx = xFixedToDouble(linear->p2.x - linear->p1.x);
+		dy = xFixedToDouble(linear->p2.y - linear->p1.y);
+
+		x0 = xFixedToDouble(linear->p1.x);
+		y0 = xFixedToDouble(linear->p1.y);
+
+		if (tx | ty) {
+			x0 -= pixman_fixed_to_double(tx);
+			y0 -= pixman_fixed_to_double(ty);
+		}
+	} else {
+		struct pixman_f_vector p1, p2;
+		struct pixman_f_transform m, inv;
+
+		DBG(("%s: transform = [%f %f %f, %f %f %f, %f %f %f]\n",
+		     __FUNCTION__,
+		     pixman_fixed_to_double(picture->transform->matrix[0][0]),
+		     pixman_fixed_to_double(picture->transform->matrix[0][1]),
+		     pixman_fixed_to_double(picture->transform->matrix[0][2]),
+		     pixman_fixed_to_double(picture->transform->matrix[1][0]),
+		     pixman_fixed_to_double(picture->transform->matrix[1][1]),
+		     pixman_fixed_to_double(picture->transform->matrix[1][2]),
+		     pixman_fixed_to_double(picture->transform->matrix[2][0]),
+		     pixman_fixed_to_double(picture->transform->matrix[2][1]),
+		     pixman_fixed_to_double(picture->transform->matrix[2][2])));
+
+		pixman_f_transform_from_pixman_transform(&m,
+							 picture->transform);
+		if (!pixman_f_transform_invert(&inv, &m))
+			return 0;
+
+		p1.v[0] = linear->p1.x;
+		p1.v[1] = linear->p1.y;
+		p1.v[2] = pixman_fixed_1;
+		pixman_f_transform_point(&inv, &p1);
+
+		p2.v[0] = linear->p2.x;
+		p2.v[1] = linear->p2.y;
+		p2.v[2] = pixman_fixed_1;
+		pixman_f_transform_point(&inv, &p2);
+
+		dx = p2.v[0] - p1.v[0];
+		dy = p2.v[1] - p1.v[1];
+
+		x0 = p1.v[0];
+		y0 = p1.v[1];
+	}
+
+	sf = dx*dx + dy*dy;
+	dx /= sf;
+	dy /= sf;
+
+	channel->u.gen2.linear_dx = dx;
+	channel->u.gen2.linear_dy = dy;
+	channel->u.gen2.linear_offset = -dx*(x0+x-dst_x) + -dy*(y0+y-dst_y);
+
+	DBG(("%s: dx=%f, dy=%f, offset=%f\n",
+	     __FUNCTION__, dx, dy, channel->u.gen2.linear_offset));
+
+	return channel->bo != NULL;
+}
+
 static Bool source_is_covered(PicturePtr picture,
 			      int x, int y,
 			      int width, int height)
@@ -1115,7 +1262,7 @@ gen2_check_card_format(struct sna *sna,
 			if (sna->kgem.gen >= 21)
 				return TRUE;
 
-			if ( source_is_covered(picture, x, y, w,h)) {
+			if (source_is_covered(picture, x, y, w,h)) {
 				channel->is_opaque = true;
 				return TRUE;
 			}
@@ -1143,34 +1290,40 @@ gen2_composite_picture(struct sna *sna,
 	     __FUNCTION__, x, y, w, h, dst_x, dst_y));
 
 	channel->is_solid = FALSE;
-	channel->card_format = -1;
+	channel->is_linear = FALSE;
 
 	if (sna_picture_is_solid(picture, &color))
 		return gen2_composite_solid_init(sna, channel, color);
 
-	if (picture->pDrawable == NULL) {
-		DBG(("%s -- fallback, unhandled source %d\n",
-		     __FUNCTION__, picture->pSourcePict->type));
+	if (!gen2_check_repeat(picture)) {
+		DBG(("%s -- fallback, unhandled repeat %d\n",
+		     __FUNCTION__, picture->repeat));
 		return sna_render_picture_fixup(sna, picture, channel,
 						x, y, w, h, dst_x, dst_y);
 	}
 
-	if (picture->alphaMap) {
-		DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
+	if (!gen2_check_filter(picture)) {
+		DBG(("%s -- fallback, unhandled filter %d\n",
+		     __FUNCTION__, picture->filter));
 		return sna_render_picture_fixup(sna, picture, channel,
 						x, y, w, h, dst_x, dst_y);
 	}
 
-	if (!gen2_check_repeat(picture)) {
-		DBG(("%s -- fallback, unhandled repeat %d\n",
-		     __FUNCTION__, picture->repeat));
+	if (picture->pDrawable == NULL) {
+		if (picture->pSourcePict->type == SourcePictTypeLinear)
+			return gen2_composite_linear_init(sna, picture, channel,
+							  x, y,
+							  w, h,
+							  dst_x, dst_y);
+
+		DBG(("%s -- fallback, unhandled source %d\n",
+		     __FUNCTION__, picture->pSourcePict->type));
 		return sna_render_picture_fixup(sna, picture, channel,
 						x, y, w, h, dst_x, dst_y);
 	}
 
-	if (!gen2_check_filter(picture)) {
-		DBG(("%s -- fallback, unhandled filter %d\n",
-		     __FUNCTION__, picture->filter));
+	if (picture->alphaMap) {
+		DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
 		return sna_render_picture_fixup(sna, picture, channel,
 						x, y, w, h, dst_x, dst_y);
 	}
@@ -1301,12 +1454,18 @@ is_solid(PicturePtr picture)
 }
 
 static bool
-is_gradient(PicturePtr picture)
+is_unhandled_gradient(PicturePtr picture)
 {
 	if (picture->pDrawable)
 		return FALSE;
 
-	return picture->pSourcePict->type != SourcePictTypeSolidFill;
+	switch (picture->pSourcePict->type) {
+	case SourcePictTypeSolidFill:
+	case SourcePictTypeLinear:
+		return FALSE;
+	default:
+		return TRUE;
+	}
 }
 
 static bool
@@ -1318,7 +1477,10 @@ has_alphamap(PicturePtr p)
 static bool
 source_fallback(PicturePtr p)
 {
-	return has_alphamap(p) || is_gradient(p) || !gen2_check_filter(p) || !gen2_check_repeat(p);
+	return (has_alphamap(p) ||
+		is_unhandled_gradient(p) ||
+		!gen2_check_filter(p) ||
+		!gen2_check_repeat(p));
 }
 
 static bool
@@ -1578,6 +1740,8 @@ gen2_render_composite(struct sna *sna,
 	} else {
 		if (tmp->src.is_solid)
 			tmp->prim_emit = gen2_emit_composite_primitive_constant;
+		else if (tmp->src.is_linear)
+			tmp->prim_emit = gen2_emit_composite_primitive_linear;
 		else if (tmp->src.transform == NULL)
 			tmp->prim_emit = gen2_emit_composite_primitive_identity;
 		else if (tmp->src.is_affine)
@@ -1644,6 +1808,38 @@ gen2_emit_composite_spans_primitive_constant(struct sna *sna,
 }
 
 fastcall static void
+gen2_emit_composite_spans_primitive_linear(struct sna *sna,
+					     const struct sna_composite_spans_op *op,
+					     const BoxRec *box,
+					     float opacity)
+{
+	union {
+		float f;
+		uint32_t u;
+	} alpha;
+
+	alpha.u = (uint8_t)(255 * opacity) << 24;
+
+	gen2_emit_composite_dstcoord(sna,
+				     op->base.dst.x + box->x2,
+				     op->base.dst.y + box->y2);
+	VERTEX(alpha.f);
+	gen2_emit_composite_linear(sna, &op->base.src, box->x2, box->y2);
+
+	gen2_emit_composite_dstcoord(sna,
+				     op->base.dst.x + box->x1,
+				     op->base.dst.y + box->y2);
+	VERTEX(alpha.f);
+	gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y2);
+
+	gen2_emit_composite_dstcoord(sna,
+				     op->base.dst.x + box->x1,
+				     op->base.dst.y + box->y1);
+	VERTEX(alpha.f);
+	gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y1);
+}
+
+fastcall static void
 gen2_emit_composite_spans_primitive_identity_source(struct sna *sna,
 						    const struct sna_composite_spans_op *op,
 						    const BoxRec *box,
@@ -1723,7 +1919,11 @@ gen2_emit_composite_spans_vertex(struct sna *sna,
 {
 	gen2_emit_composite_dstcoord(sna, x + op->base.dst.x, y + op->base.dst.y);
 	BATCH((uint8_t)(opacity * 255) << 24);
-	gen2_emit_composite_texcoord(sna, &op->base.src, x, y);
+	assert(!op->base.src.is_solid);
+	if (op->base.src.is_linear)
+		gen2_emit_composite_linear(sna, &op->base.src, x, y);
+	else
+		gen2_emit_composite_texcoord(sna, &op->base.src, x, y);
 }
 
 fastcall static void
@@ -1746,7 +1946,7 @@ gen2_emit_spans_pipeline(struct sna *sna,
 
 	cblend =
 		TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_MODULATE |
-	       	TB0C_ARG1_SEL_DIFFUSE | TB0C_ARG1_REPLICATE_ALPHA |
+		TB0C_ARG1_SEL_DIFFUSE | TB0C_ARG1_REPLICATE_ALPHA |
 		TB0C_OUTPUT_WRITE_CURRENT;
 	ablend =
 		TB0A_RESULT_SCALE_1X | TB0A_OP_MODULATE |
@@ -1962,6 +2162,9 @@ gen2_render_composite_spans(struct sna *sna,
 	tmp->base.floats_per_vertex = 3;
 	if (tmp->base.src.is_solid) {
 		tmp->prim_emit = gen2_emit_composite_spans_primitive_constant;
+	} else if (tmp->base.src.is_linear) {
+		tmp->base.floats_per_vertex += 2;
+		tmp->prim_emit = gen2_emit_composite_spans_primitive_linear;
 	} else {
 		assert(tmp->base.src.bo);
 		tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3;
diff --git a/src/sna/sna_gradient.c b/src/sna/sna_gradient.c
index c870076..2aa9bbd 100644
--- a/src/sna/sna_gradient.c
+++ b/src/sna/sna_gradient.c
@@ -104,7 +104,7 @@ sna_render_get_gradient(struct sna *sna,
 	int i, width;
 	struct kgem_bo *bo;
 
-	DBG(("%s: %dx[%f:%x...%f:%x...%f:%x]\n", __FUNCTION__,
+	DBG(("%s: %dx[%f:%x ... %f:%x ... %f:%x]\n", __FUNCTION__,
 	     pattern->nstops,
 	     pattern->stops[0].x / 65536.,
 	     pattern->stops[0].color.alpha >> 8 << 24 |
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index 2229c18..dfaa606 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -50,6 +50,7 @@ struct sna_composite_op {
 		uint32_t repeat;
 		uint32_t is_affine : 1;
 		uint32_t is_solid : 1;
+		uint32_t is_linear : 1;
 		uint32_t is_opaque : 1;
 		uint32_t alpha_fixup : 1;
 		uint32_t rb_reversed : 1;
@@ -59,6 +60,9 @@ struct sna_composite_op {
 		union {
 			struct {
 				uint32_t pixel;
+				float linear_dx;
+				float linear_dy;
+				float linear_offset;
 			} gen2;
 			struct gen3_shader_channel {
 				int type;