xf86-video-intel: Branch 'xwayland-1.12' - 108 commits - configure.ac NEWS src/intel_dri.c src/intel_driver.c src/intel_driver.h src/intel.h src/intel_module.c src/intel_video.c src/legacy/i810 src/sna/gen2_render.c src/sna/gen3_render.c src/sna/gen4_render.c src/sna/gen5_render.c src/sna/gen6_render.c src/sna/gen7_render.c src/sna/kgem.c src/sna/kgem_debug_gen7.c src/sna/kgem.h src/sna/Makefile.am src/sna/sna_accel.c src/sna/sna_blt.c src/sna/sna_damage.c src/sna/sna_damage.h src/sna/sna_display.c src/sna/sna_dri.c src/sna/sna_driver.c src/sna/sna_glyphs.c src/sna/sna_gradient.c src/sna/sna.h src/sna/sna_io.c src/sna/sna_render.c src/sna/sna_render.h src/sna/sna_render_inline.h src/sna/sna_tiling.c src/sna/sna_trapezoids.c src/sna/sna_video.c src/sna/sna_video.h src/sna/sna_video_sprite.c src/sna/sna_video_textured.c uxa/uxa-glyphs.c

Kristian Høgsberg krh at kemper.freedesktop.org
Mon May 7 12:04:49 PDT 2012


Rebased ref, commits from common ancestor:
commit ab2cfefaed51188cca454bbc7b8c4c72f9d39bb5
Author: Kristian Høgsberg <krh at bitplanet.net>
Date:   Fri Oct 15 17:58:14 2010 -0400

    Add xwayland support

diff --git a/configure.ac b/configure.ac
index c031a26..31d3a43 100644
--- a/configure.ac
+++ b/configure.ac
@@ -168,7 +168,6 @@ if test "x$VMAP" = xyes; then
 	AC_DEFINE(USE_VMAP,1,[Assume VMAP support])
 fi
 
-
 AC_ARG_ENABLE(debug,
 	      AS_HELP_STRING([--enable-debug],
 			     [Enables internal debugging [default=no]]),
diff --git a/src/intel.h b/src/intel.h
index f806aea..34ca650 100644
--- a/src/intel.h
+++ b/src/intel.h
@@ -349,10 +349,14 @@ typedef struct intel_screen_private {
 	 */
 	Bool fallback_debug;
 	unsigned debug_flush;
+
 #if HAVE_UDEV
 	struct udev_monitor *uevent_monitor;
 	InputHandlerProc uevent_handler;
 #endif
+
+	struct xwl_screen *xwl_screen;
+
 } intel_screen_private;
 
 enum {
diff --git a/src/intel_dri.c b/src/intel_dri.c
index a5ed545..1745aef 100644
--- a/src/intel_dri.c
+++ b/src/intel_dri.c
@@ -50,6 +50,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #include "xf86.h"
 #include "xf86_OSproc.h"
+#include "xf86Priv.h"
 
 #include "xf86Pci.h"
 #include "xf86drm.h"
@@ -58,6 +59,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "shadow.h"
 #include "fb.h"
 
+#ifdef XORG_WAYLAND
+#include <xwayland.h>
+#endif
+
 #include "intel.h"
 #include "i830_reg.h"
 
@@ -1611,6 +1616,29 @@ out_complete:
 	return TRUE;
 }
 
+#ifdef XORG_WAYLAND
+static int intel_auth_magic(int fd, uint32_t magic)
+{
+	ScrnInfoPtr scrn;
+	intel_screen_private *intel;
+	int i;
+
+	/* Not wayland, go stragight to drm */
+	if (!xorgWayland)
+		return drmAuthMagic(fd, magic);
+
+	for (i = 0; i < 1; i++) {
+		scrn = xf86Screens[i];
+		intel = intel_get_screen_private(scrn);
+		if (xwl_screen_get_drm_fd(intel->xwl_screen) == fd)
+			break;
+	}
+
+	/* Forward the request to our host */
+	return xwl_drm_authenticate(intel->xwl_screen, magic);
+}
+#endif
+
 static int dri2_server_generation;
 #endif
 
@@ -1696,6 +1724,11 @@ Bool I830DRI2ScreenInit(ScreenPtr screen)
 	}
 #endif
 
+#if DRI2INFOREC_VERSION >= 5 && defined(XORG_WAYLAND)
+	info.version = 5;
+	info.AuthMagic = intel_auth_magic;
+#endif
+
 	return DRI2ScreenInit(screen, &info);
 }
 
diff --git a/src/intel_driver.c b/src/intel_driver.c
index 4265de8..9124fdf 100644
--- a/src/intel_driver.c
+++ b/src/intel_driver.c
@@ -49,6 +49,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "xf86_OSproc.h"
 #include "xf86cmap.h"
 #include "xf86drm.h"
+#include "xf86Priv.h"
 #include "compiler.h"
 #include "mibstore.h"
 #include "mipointer.h"
@@ -65,6 +66,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "intel.h"
 #include "intel_video.h"
 
+#ifdef XORG_WAYLAND
+#include <xwayland.h>
+#endif
+
 #ifdef INTEL_XVMC
 #define _INTEL_XVMC_SERVER_
 #include "intel_hwmc.h"
@@ -224,6 +229,11 @@ static Bool i830CreateScreenResources(ScreenPtr screen)
 	if (!(*screen->CreateScreenResources) (screen))
 		return FALSE;
 
+#ifdef XORG_WAYLAND
+	if (intel->xwl_screen)
+		xwl_screen_init(intel->xwl_screen, screen);
+#endif
+
 	return intel_uxa_create_screen_resources(screen);
 }
 
@@ -514,6 +524,27 @@ static Bool can_accelerate_blt(struct intel_screen_private *intel)
 	return TRUE;
 }
 
+#ifdef XORG_WAYLAND
+static int intel_create_window_buffer(struct xwl_window *xwl_window,
+				      PixmapPtr pixmap)
+{
+	uint32_t name;
+	dri_bo *bo;
+
+	bo = intel_get_pixmap_bo(pixmap);
+	if (bo == NULL || dri_bo_flink(bo, &name) != 0)
+		return BadDrawable;
+
+	return xwl_create_window_buffer_drm(xwl_window, pixmap, name);
+}
+
+static struct xwl_driver xwl_driver = {
+	.version = 1,
+	.use_drm = 1,
+	.create_window_buffer = intel_create_window_buffer
+};
+#endif
+
 /**
  * This is called before ScreenInit to do any require probing of screen
  * configuration.
@@ -561,10 +592,6 @@ static Bool I830PreInit(ScrnInfoPtr scrn, int flags)
 
 	intel->PciInfo = xf86GetPciInfoForEntity(intel->pEnt->index);
 
-	if (!intel_open_drm_master(scrn))
-		xf86DrvMsg(scrn->scrnIndex, X_ERROR,
-			   "Failed to become DRM master.\n");
-
 	scrn->monitor = scrn->confScreen->monitor;
 	scrn->progClock = TRUE;
 	scrn->rgbBits = 8;
@@ -602,6 +629,26 @@ static Bool I830PreInit(ScrnInfoPtr scrn, int flags)
 	intel_check_chipset_option(scrn);
 	intel_check_dri_option(scrn);
 
+#ifdef XORG_WAYLAND
+	if (xorgWayland) {
+		xf86LoadSubModule(scrn, "xwayland");
+		intel->xwl_screen =
+			xwl_screen_pre_init(scrn, 0, &xwl_driver);
+		if (!intel->xwl_screen) {
+			xf86DrvMsg(scrn->scrnIndex, X_ERROR,
+				   "Failed to initialize xwayland.\n");
+			return FALSE;
+		}
+
+		intel->drmSubFD =
+			xwl_screen_get_drm_fd(intel->xwl_screen);
+	}
+#endif
+
+	if (!intel->xwl_screen && !intel_open_drm_master(scrn))
+		xf86DrvMsg(scrn->scrnIndex, X_ERROR,
+			   "Failed to become DRM master.\n");
+
 	if (!intel_init_bufmgr(intel)) {
 		PreInitCleanup(scrn);
 		return FALSE;
@@ -662,6 +709,9 @@ static Bool I830PreInit(ScrnInfoPtr scrn, int flags)
 	xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Triple buffering? %s\n",
 		   intel->use_triple_buffer ? "enabled" : "disabled");
 
+	if (!intel->xwl_screen)
+		intel->swapbuffers_wait = TRUE;
+
 	xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Framebuffer %s\n",
 		   intel->tiling & INTEL_TILING_FB ? "tiled" : "linear");
 	xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Pixmaps %s\n",
@@ -673,7 +723,8 @@ static Bool I830PreInit(ScrnInfoPtr scrn, int flags)
 
 	I830XvInit(scrn);
 
-	if (!intel_mode_pre_init(scrn, intel->drmSubFD, intel->cpp)) {
+	if (!intel->xwl_screen &&
+	    !intel_mode_pre_init(scrn, intel->drmSubFD, intel->cpp)) {
 		PreInitCleanup(scrn);
 		return FALSE;
 	}
@@ -807,9 +858,16 @@ intel_flush_callback(CallbackListPtr *list,
 		     pointer user_data, pointer call_data)
 {
 	ScrnInfoPtr scrn = user_data;
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+
 	if (scrn->vtSema) {
 		intel_batch_submit(scrn);
 		intel_glamor_flush(intel_get_screen_private(scrn));
+
+#ifdef XORG_WAYLAND
+		if (intel->xwl_screen)
+			xwl_screen_post_damage(intel->xwl_screen);
+#endif
 	}
 }
 
@@ -1194,7 +1252,8 @@ static Bool I830CloseScreen(int scrnIndex, ScreenPtr screen)
 		if (!intel->use_shadow)
 			intel_set_pixmap_bo(screen->GetScreenPixmap(screen),
 					    NULL);
-		intel_mode_remove_fb(intel);
+		if (!intel->xwl_screen)
+			intel_mode_remove_fb(intel);
 		drm_intel_bo_unreference(intel->front_buffer);
 		intel->front_buffer = NULL;
 	}
diff --git a/src/intel_module.c b/src/intel_module.c
index c6f94f5..25e6fd9 100644
--- a/src/intel_module.c
+++ b/src/intel_module.c
@@ -32,6 +32,7 @@
 #include <xf86_OSproc.h>
 #include <xf86cmap.h>
 #include <xf86drmMode.h>
+#include "xf86Priv.h"
 
 #include <xorgVersion.h>
 
@@ -265,6 +266,11 @@ static Bool intel_driver_func(ScrnInfoPtr pScrn,
 #else
 		(*flag) = HW_IO | HW_MMIO;
 #endif
+
+#ifdef XORG_WAYLAND
+		if (xorgWayland)
+			(*flag) = HW_SKIP_CONSOLE;
+#endif
 		return TRUE;
 	default:
 		/* Unknown or deprecated function */
commit 4ba366ead3a133136554579fe9a604da1fc1da68
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon May 7 08:55:35 2012 +0100

    sna: Manually execute the timer as TimerForce does not run an inactive timer
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 3280490..274facb 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -11995,6 +11995,7 @@ static bool sna_accel_do_flush(struct sna *sna)
 		}
 
 		if (sna->timer_ready & (1<<(FLUSH_TIMER))) {
+			DBG(("%s (time=%ld), triggered\n", __FUNCTION__, (long)sna->time));
 			sna->timer_expire[FLUSH_TIMER] =
 				sna->time + sna->vblank_interval;
 			return true;
@@ -12007,6 +12008,7 @@ static bool sna_accel_do_flush(struct sna *sna)
 			sna->timer_ready |= 1 << FLUSH_TIMER;
 			sna->timer_expire[FLUSH_TIMER] =
 				sna->time + sna->vblank_interval / 2;
+			DBG(("%s (time=%ld), starting\n", __FUNCTION__, (long)sna->time));
 		}
 	}
 
@@ -12017,6 +12019,7 @@ static bool sna_accel_do_expire(struct sna *sna)
 {
 	if (sna->timer_active & (1<<(EXPIRE_TIMER))) {
 		if (sna->timer_ready & (1<<(EXPIRE_TIMER))) {
+			DBG(("%s (time=%ld), triggered\n", __FUNCTION__, (long)sna->time));
 			sna->timer_expire[EXPIRE_TIMER] =
 				sna->time + MAX_INACTIVE_TIME * 1000;
 			return true;
@@ -12027,6 +12030,7 @@ static bool sna_accel_do_expire(struct sna *sna)
 			sna->timer_ready |= 1 << EXPIRE_TIMER;
 			sna->timer_expire[EXPIRE_TIMER] =
 				sna->time + MAX_INACTIVE_TIME * 1000;
+			DBG(("%s (time=%ld), starting\n", __FUNCTION__, (long)sna->time));
 		}
 	}
 
@@ -12039,6 +12043,7 @@ static bool sna_accel_do_inactive(struct sna *sna)
 		if (sna->timer_ready & (1<<(INACTIVE_TIMER))) {
 			sna->timer_expire[INACTIVE_TIMER] =
 				sna->time + 120 * 1000;
+			DBG(("%s (time=%ld), triggered\n", __FUNCTION__, (long)sna->time));
 			return true;
 		}
 	} else {
@@ -12047,6 +12052,7 @@ static bool sna_accel_do_inactive(struct sna *sna)
 			sna->timer_ready |= 1 << INACTIVE_TIMER;
 			sna->timer_expire[INACTIVE_TIMER] =
 				sna->time + 120 * 1000;
+			DBG(("%s (time=%ld), starting\n", __FUNCTION__, (long)sna->time));
 		}
 	}
 
@@ -12056,30 +12062,31 @@ static bool sna_accel_do_inactive(struct sna *sna)
 static CARD32 sna_timeout(OsTimerPtr timer, CARD32 now, pointer arg)
 {
 	struct sna *sna = arg;
-	CARD32 next = UINT32_MAX;
+	int32_t next = 0;
 	uint32_t active;
 	int i;
 
+	DBG(("%s: now=%d, active=%08x, ready=%08x\n",
+	     __FUNCTION__, now, sna->timer_active, sna->timer_ready));
 	active = sna->timer_active & ~sna->timer_ready;
 	if (active == 0)
 		return 0;
 
 	for (i = 0; i < NUM_TIMERS; i++) {
 		if (active & (1 << i)) {
-			if (now > sna->timer_expire[i]) {
+			int32_t delta = sna->timer_expire[i] - now;
+			DBG(("%s: timer[%d] expires in %d [%d]\n",
+			     __FUNCTION__, i, delta, sna->timer_expire[i]));
+			if (delta <= 0)
 				sna->timer_ready |= 1 << i;
-			} else {
-				if (sna->timer_expire[i] < next)
-					next = sna->timer_expire[i];
-			}
+			else if (next == 0 || delta < next)
+				next = delta;
 		}
 	}
 
-	if ((sna->timer_active & ~sna->timer_ready) == 0)
-		return 0;
-
-	assert(next != UINT32_MAX);
-	return next - now;
+	DBG(("%s: active=%08x, ready=%08x, next=+%d\n",
+	     __FUNCTION__, sna->timer_active, sna->timer_ready, next));
+	return next;
 }
 
 static bool sna_accel_flush(struct sna *sna)
@@ -12385,8 +12392,15 @@ void sna_accel_block_handler(struct sna *sna)
 	}
 
 	if (sna->timer_ready) {
+		DBG(("%s: evaluating timers, ready=%x\n",
+		     __FUNCTION__, sna->timer_ready));
 		sna->timer_ready = 0;
-		TimerForce(sna->timer);
+		TimerSet(sna->timer, 0,
+			 sna_timeout(sna->timer,
+				     sna->time,
+				     sna),
+			 sna_timeout, sna);
+		assert(sna->timer_ready == 0);
 	}
 }
 
commit 2c801c45dbad58d18f2673e17723825dda0c4e55
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun May 6 17:23:11 2012 +0100

    sna: Replace timerfd with OsTimer
    
    As timerfd is linux-specific, and OsTimer an OS-agnostic abraction,
    replace the former with the later. Arguably this has slightly better
    performance characteristics in select-bound loops.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/configure.ac b/configure.ac
index 3770983..c031a26 100644
--- a/configure.ac
+++ b/configure.ac
@@ -265,8 +265,6 @@ if test "x$DEBUG" = xfull; then
         CFLAGS="$CFLAGS -O0 -ggdb3"
 fi
 
-AC_CHECK_HEADERS([sys/timerfd.h])
-
 DRIVER_NAME=intel
 AC_SUBST([DRIVER_NAME])
 AC_SUBST([moduledir])
diff --git a/src/sna/sna.h b/src/sna/sna.h
index 3e4b8ec..d7a20b9 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -225,7 +225,9 @@ struct sna {
 	unsigned watch_flush;
 	unsigned flush;
 
-	int timer[NUM_TIMERS];
+	uint32_t time;
+	OsTimerPtr timer;
+	uint32_t timer_expire[NUM_TIMERS];
 	uint16_t timer_active;
 	uint16_t timer_ready;
 
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 3e03934..3280490 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -11964,165 +11964,123 @@ static struct sna_pixmap *sna_accel_scanout(struct sna *sna)
 	return priv && priv->gpu_bo ? priv : NULL;
 }
 
-#if HAVE_SYS_TIMERFD_H && !FORCE_FLUSH
-#include <sys/timerfd.h>
-#include <errno.h>
-
-static uint64_t read_timer(int fd)
-{
-	uint64_t count = 0;
-	int ret = read(fd, &count, sizeof(count));
-	return count;
-	(void)ret;
-}
-
-static void sna_accel_drain_timer(struct sna *sna, int id)
+static void sna_accel_disarm_timer(struct sna *sna, int id)
 {
-	if (sna->timer_active & (1<<id))
-		read_timer(sna->timer[id]);
-}
-
-static void _sna_accel_disarm_timer(struct sna *sna, int id)
-{
-	struct itimerspec to;
-
-	DBG(("%s[%d] (time=%ld)\n", __FUNCTION__, id, (long)GetTimeInMillis()));
-
-	memset(&to, 0, sizeof(to));
-	timerfd_settime(sna->timer[id], 0, &to, NULL);
+	DBG(("%s[%d] (time=%ld)\n", __FUNCTION__, id, (long)sna->time));
 	sna->timer_active &= ~(1<<id);
+	sna->timer_ready &= ~(1<<id);
 }
 
-#define return_if_timer_active(id) do {					\
-	if (sna->timer_active & (1<<(id)))				\
-		return (sna->timer_ready & (1<<(id))) && read_timer(sna->timer[id]) > 0;			\
-} while (0)
-
-static Bool sna_accel_do_flush(struct sna *sna)
+static bool sna_accel_do_flush(struct sna *sna)
 {
-	struct itimerspec to;
 	struct sna_pixmap *priv;
 
 	priv = sna_accel_scanout(sna);
 	if (priv == NULL) {
 		DBG(("%s -- no scanout attached\n", __FUNCTION__));
-		return FALSE;
-	}
-
-	if (sna->kgem.flush_now) {
-		sna->kgem.flush_now = 0;
-		if (priv->gpu_bo->exec) {
-			DBG(("%s -- forcing flush\n", __FUNCTION__));
-			sna_accel_drain_timer(sna, FLUSH_TIMER);
-			return TRUE;
-		}
-	}
-
-	return_if_timer_active(FLUSH_TIMER);
-
-	if (priv->cpu_damage == NULL && priv->gpu_bo->exec == NULL) {
-		DBG(("%s -- no pending write to scanout\n", __FUNCTION__));
-		return FALSE;
+		sna_accel_disarm_timer(sna, FLUSH_TIMER);
+		return false;
 	}
 
 	if (sna->flags & SNA_NO_DELAYED_FLUSH)
-		return TRUE;
-
-	if (sna->timer[FLUSH_TIMER] == -1)
-		return TRUE;
-
-	DBG(("%s, starting flush timer, at time=%ld\n",
-	     __FUNCTION__, (long)GetTimeInMillis()));
+		return true;
 
-	/* Initial redraw hopefully before this vblank */
-	to.it_value.tv_sec = 0;
-	to.it_value.tv_nsec = sna->vblank_interval / 2;
+	if (sna->timer_active & (1<<(FLUSH_TIMER))) {
+		if (sna->kgem.flush_now) {
+			sna->kgem.flush_now = 0;
+			if (priv->gpu_bo->exec) {
+				DBG(("%s -- forcing flush\n", __FUNCTION__));
+				sna->timer_ready |= 1 << FLUSH_TIMER;
+			}
+		}
 
-	/* Then periodic updates for every vblank */
-	to.it_interval.tv_sec = 0;
-	to.it_interval.tv_nsec = sna->vblank_interval;
-	timerfd_settime(sna->timer[FLUSH_TIMER], 0, &to, NULL);
+		if (sna->timer_ready & (1<<(FLUSH_TIMER))) {
+			sna->timer_expire[FLUSH_TIMER] =
+				sna->time + sna->vblank_interval;
+			return true;
+		}
+	} else {
+		if (priv->cpu_damage == NULL && priv->gpu_bo->exec == NULL) {
+			DBG(("%s -- no pending write to scanout\n", __FUNCTION__));
+		} else {
+			sna->timer_active |= 1 << FLUSH_TIMER;
+			sna->timer_ready |= 1 << FLUSH_TIMER;
+			sna->timer_expire[FLUSH_TIMER] =
+				sna->time + sna->vblank_interval / 2;
+		}
+	}
 
-	sna->timer_active |= 1 << FLUSH_TIMER;
-	return FALSE;
+	return false;
 }
 
-static Bool sna_accel_do_expire(struct sna *sna)
+static bool sna_accel_do_expire(struct sna *sna)
 {
-	struct itimerspec to;
-
-	return_if_timer_active(EXPIRE_TIMER);
-
-	if (!sna->kgem.need_expire)
-		return FALSE;
-
-	if (sna->timer[EXPIRE_TIMER] == -1)
-		return TRUE;
-
-	to.it_interval.tv_sec = MAX_INACTIVE_TIME;
-	to.it_interval.tv_nsec = 0;
-	to.it_value = to.it_interval;
-	timerfd_settime(sna->timer[EXPIRE_TIMER], 0, &to, NULL);
+	if (sna->timer_active & (1<<(EXPIRE_TIMER))) {
+		if (sna->timer_ready & (1<<(EXPIRE_TIMER))) {
+			sna->timer_expire[EXPIRE_TIMER] =
+				sna->time + MAX_INACTIVE_TIME * 1000;
+			return true;
+		}
+	} else {
+		if (sna->kgem.need_expire) {
+			sna->timer_active |= 1 << EXPIRE_TIMER;
+			sna->timer_ready |= 1 << EXPIRE_TIMER;
+			sna->timer_expire[EXPIRE_TIMER] =
+				sna->time + MAX_INACTIVE_TIME * 1000;
+		}
+	}
 
-	sna->timer_active |= 1 << EXPIRE_TIMER;
-	return FALSE;
+	return false;
 }
 
-static Bool sna_accel_do_inactive(struct sna *sna)
+static bool sna_accel_do_inactive(struct sna *sna)
 {
-	struct itimerspec to;
-
-	return_if_timer_active(INACTIVE_TIMER);
-
-	if (list_is_empty(&sna->active_pixmaps))
-		return FALSE;
-
-	if (sna->timer[INACTIVE_TIMER] == -1)
-		return FALSE;
-
-	/* Periodic expiration after every 2 minutes. */
-	to.it_interval.tv_sec = 120;
-	to.it_interval.tv_nsec = 0;
-	to.it_value = to.it_interval;
-	timerfd_settime(sna->timer[INACTIVE_TIMER], 0, &to, NULL);
+	if (sna->timer_active & (1<<(INACTIVE_TIMER))) {
+		if (sna->timer_ready & (1<<(INACTIVE_TIMER))) {
+			sna->timer_expire[INACTIVE_TIMER] =
+				sna->time + 120 * 1000;
+			return true;
+		}
+	} else {
+		if (!list_is_empty(&sna->active_pixmaps)) {
+			sna->timer_active |= 1 << INACTIVE_TIMER;
+			sna->timer_ready |= 1 << INACTIVE_TIMER;
+			sna->timer_expire[INACTIVE_TIMER] =
+				sna->time + 120 * 1000;
+		}
+	}
 
-	sna->timer_active |= 1 << INACTIVE_TIMER;
-	return FALSE;
+	return false;
 }
 
-static void sna_accel_create_timers(struct sna *sna)
+static CARD32 sna_timeout(OsTimerPtr timer, CARD32 now, pointer arg)
 {
-	int id;
+	struct sna *sna = arg;
+	CARD32 next = UINT32_MAX;
+	uint32_t active;
+	int i;
 
-	/* XXX Can we replace this with OSTimer provided by dix? */
+	active = sna->timer_active & ~sna->timer_ready;
+	if (active == 0)
+		return 0;
 
-#ifdef CLOCK_MONOTONIC_COARSE
-	for (id = 0; id < NUM_FINE_TIMERS; id++)
-		sna->timer[id] = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK);
-	for (; id < NUM_TIMERS; id++) {
-		sna->timer[id] = timerfd_create(CLOCK_MONOTONIC_COARSE, TFD_NONBLOCK);
-		if (sna->timer[id] == -1)
-			sna->timer[id] = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK);
+	for (i = 0; i < NUM_TIMERS; i++) {
+		if (active & (1 << i)) {
+			if (now > sna->timer_expire[i]) {
+				sna->timer_ready |= 1 << i;
+			} else {
+				if (sna->timer_expire[i] < next)
+					next = sna->timer_expire[i];
+			}
+		}
 	}
-#else
-	for (id = 0; id < NUM_TIMERS; id++)
-		sna->timer[id] = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK);
-#endif
-}
-#else
-static void sna_accel_create_timers(struct sna *sna)
-{
-	int id;
 
-	for (id = 0; id < NUM_TIMERS; id++)
-		sna->timer[id] = -1;
+	if ((sna->timer_active & ~sna->timer_ready) == 0)
+		return 0;
+
+	assert(next != UINT32_MAX);
+	return next - now;
 }
-static Bool sna_accel_do_flush(struct sna *sna) { return sna_accel_scanout(sna) != NULL; }
-static Bool sna_accel_do_expire(struct sna *sna) { return sna->kgem.need_expire; }
-static Bool sna_accel_do_inactive(struct sna *sna) { return FALSE; }
-static void sna_accel_drain_timer(struct sna *sna, int id) { }
-static void _sna_accel_disarm_timer(struct sna *sna, int id) { }
-#endif
 
 static bool sna_accel_flush(struct sna *sna)
 {
@@ -12131,14 +12089,14 @@ static bool sna_accel_flush(struct sna *sna)
 	bool busy = priv->cpu_damage || need_throttle;
 
 	DBG(("%s (time=%ld), cpu damage? %p, exec? %d nbatch=%d, busy? %d, need_throttle=%d\n",
-	     __FUNCTION__, (long)GetTimeInMillis(),
+	     __FUNCTION__, (long)sna->time,
 	     priv->cpu_damage,
 	     priv->gpu_bo->exec != NULL,
 	     sna->kgem.nbatch,
 	     sna->kgem.busy, need_throttle));
 
 	if (!sna->kgem.busy && !busy)
-		_sna_accel_disarm_timer(sna, FLUSH_TIMER);
+		sna_accel_disarm_timer(sna, FLUSH_TIMER);
 	sna->kgem.busy = busy;
 
 	if (priv->cpu_damage)
@@ -12152,10 +12110,10 @@ static bool sna_accel_flush(struct sna *sna)
 
 static void sna_accel_expire(struct sna *sna)
 {
-	DBG(("%s (time=%ld)\n", __FUNCTION__, (long)GetTimeInMillis()));
+	DBG(("%s (time=%ld)\n", __FUNCTION__, (long)sna->time));
 
 	if (!kgem_expire_cache(&sna->kgem))
-		_sna_accel_disarm_timer(sna, EXPIRE_TIMER);
+		sna_accel_disarm_timer(sna, EXPIRE_TIMER);
 }
 
 static void sna_accel_inactive(struct sna *sna)
@@ -12163,7 +12121,7 @@ static void sna_accel_inactive(struct sna *sna)
 	struct sna_pixmap *priv;
 	struct list preserve;
 
-	DBG(("%s (time=%ld)\n", __FUNCTION__, (long)GetTimeInMillis()));
+	DBG(("%s (time=%ld)\n", __FUNCTION__, (long)sna->time));
 
 #if DEBUG_ACCEL
 	{
@@ -12252,22 +12210,12 @@ static void sna_accel_inactive(struct sna *sna)
 	if (list_is_empty(&sna->inactive_clock[1]) &&
 	    list_is_empty(&sna->inactive_clock[0]) &&
 	    list_is_empty(&sna->active_pixmaps))
-		_sna_accel_disarm_timer(sna, INACTIVE_TIMER);
-}
-
-static void sna_accel_install_timers(struct sna *sna)
-{
-	int n;
-
-	for (n = 0; n < NUM_TIMERS; n++) {
-		if (sna->timer[n] != -1)
-			AddGeneralSocket(sna->timer[n]);
-	}
+		sna_accel_disarm_timer(sna, INACTIVE_TIMER);
 }
 
 Bool sna_accel_pre_init(struct sna *sna)
 {
-	sna_accel_create_timers(sna);
+	sna->timer = TimerSet(NULL, 0, 0, sna_timeout, sna);
 	return TRUE;
 }
 
@@ -12287,7 +12235,6 @@ Bool sna_accel_init(ScreenPtr screen, struct sna *sna)
 	list_init(&sna->inactive_clock[1]);
 
 	AddGeneralSocket(sna->kgem.fd);
-	sna_accel_install_timers(sna);
 
 	screen->CreateGC = sna_create_gc;
 	screen->GetImage = sna_get_image;
@@ -12413,17 +12360,17 @@ static void sna_accel_throttle(struct sna *sna)
 	if (sna->flags & SNA_NO_THROTTLE)
 		return;
 
-	DBG(("%s (time=%ld)\n", __FUNCTION__, (long)GetTimeInMillis()));
+	DBG(("%s (time=%ld)\n", __FUNCTION__, (long)sna->time));
 
 	kgem_throttle(&sna->kgem);
 }
 
 void sna_accel_block_handler(struct sna *sna)
 {
-	if (sna_accel_do_flush(sna)) {
-		if (sna_accel_flush(sna))
-			sna_accel_throttle(sna);
-	}
+	sna->time = GetTimeInMillis();
+
+	if (sna_accel_do_flush(sna) && sna_accel_flush(sna))
+		sna_accel_throttle(sna);
 
 	if (sna_accel_do_expire(sna))
 		sna_accel_expire(sna);
@@ -12437,31 +12384,20 @@ void sna_accel_block_handler(struct sna *sna)
 		sna->watch_flush = 0;
 	}
 
-	sna->timer_ready = 0;
+	if (sna->timer_ready) {
+		sna->timer_ready = 0;
+		TimerForce(sna->timer);
+	}
 }
 
 void sna_accel_wakeup_handler(struct sna *sna, fd_set *ready)
 {
-	int id, active;
-
 	if (sna->kgem.need_retire)
 		kgem_retire(&sna->kgem);
 	if (sna->kgem.need_purge)
 		kgem_purge_cache(&sna->kgem);
-
-	active = sna->timer_active & ~sna->timer_ready;
-	for (id = 0; id < NUM_TIMERS; id++)
-		if (active & (1 << id) && FD_ISSET(sna->timer[id], ready))
-			sna->timer_ready |= 1 << id;
 }
 
 void sna_accel_free(struct sna *sna)
 {
-	int id;
-
-	for (id = 0; id < NUM_TIMERS; id++)
-		if (sna->timer[id] != -1) {
-			close(sna->timer[id]);
-			sna->timer[id] = -1;
-		}
 }
diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
index 6287cd9..65d1992 100644
--- a/src/sna/sna_display.c
+++ b/src/sna/sna_display.c
@@ -643,8 +643,8 @@ static void update_flush_interval(struct sna *sna)
 	if (max_vrefresh == 0)
 		max_vrefresh = 40;
 
-	sna->vblank_interval = 1000 * 1000 * 1000 / max_vrefresh; /* Hz -> ns */
-	DBG(("max_vrefresh=%d, vblank_interval=%d ns\n",
+	sna->vblank_interval = 1000 / max_vrefresh; /* Hz -> ms */
+	DBG(("max_vrefresh=%d, vblank_interval=%d ms\n",
 	       max_vrefresh, sna->vblank_interval));
 }
 
commit 74d2707de4a0f94d2143f1e9c60762e4167b1ea6
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun May 6 21:51:27 2012 +0100

    sna: Remove short-circuiting for large font fallbacks
    
    Unlike the fallback for an unhandled depth, we need to ensure that the
    pixmaps are mapped to the CPU before calling the fb routines.
    
    Reported-by: Toralf Förster <toralf.foerster at gmx.de>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=49558
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 1bc34dc..3e03934 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -10796,9 +10796,6 @@ sna_poly_text8(DrawablePtr drawable, GCPtr gc,
 	if (drawable->depth < 8)
 		goto fallback;
 
-	if (sna_font_too_large(gc->font))
-		goto fallback;
-
 	for (i = n = 0; i < count; i++) {
 		if (sna_get_glyph8(gc->font, priv, chars[i], &info[n]))
 			n++;
@@ -10828,8 +10825,11 @@ sna_poly_text8(DrawablePtr drawable, GCPtr gc,
 	if (!ACCEL_POLY_TEXT8)
 		goto force_fallback;
 
+	if (sna_font_too_large(gc->font))
+		goto force_fallback;
+
 	if (!PM_IS_SOLID(drawable, gc->planemask))
-		return false;
+		goto force_fallback;
 
 	if (!gc_is_solid(gc, &fg))
 		goto force_fallback;
@@ -10888,9 +10888,6 @@ sna_poly_text16(DrawablePtr drawable, GCPtr gc,
 	if (drawable->depth < 8)
 		goto fallback;
 
-	if (sna_font_too_large(gc->font))
-		goto fallback;
-
 	for (i = n = 0; i < count; i++) {
 		if (sna_get_glyph16(gc->font, priv, chars[i], &info[n]))
 			n++;
@@ -10920,8 +10917,11 @@ sna_poly_text16(DrawablePtr drawable, GCPtr gc,
 	if (!ACCEL_POLY_TEXT16)
 		goto force_fallback;
 
+	if (sna_font_too_large(gc->font))
+		goto force_fallback;
+
 	if (!PM_IS_SOLID(drawable, gc->planemask))
-		return false;
+		goto force_fallback;
 
 	if (!gc_is_solid(gc, &fg))
 		goto force_fallback;
@@ -10980,9 +10980,6 @@ sna_image_text8(DrawablePtr drawable, GCPtr gc,
 	if (drawable->depth < 8)
 		goto fallback;
 
-	if (sna_font_too_large(gc->font))
-		goto fallback;
-
 	for (i = n = 0; i < count; i++) {
 		if (sna_get_glyph8(gc->font, priv, chars[i], &info[n]))
 			n++;
@@ -11024,6 +11021,9 @@ sna_image_text8(DrawablePtr drawable, GCPtr gc,
 	if (!ACCEL_IMAGE_TEXT8)
 		goto force_fallback;
 
+	if (sna_font_too_large(gc->font))
+		goto force_fallback;
+
 	if (!PM_IS_SOLID(drawable, gc->planemask))
 		goto force_fallback;
 
@@ -11074,9 +11074,6 @@ sna_image_text16(DrawablePtr drawable, GCPtr gc,
 	if (drawable->depth < 8)
 		goto fallback;
 
-	if (sna_font_too_large(gc->font))
-		goto fallback;
-
 	for (i = n = 0; i < count; i++) {
 		if (sna_get_glyph16(gc->font, priv, chars[i], &info[n]))
 			n++;
@@ -11118,6 +11115,9 @@ sna_image_text16(DrawablePtr drawable, GCPtr gc,
 	if (!ACCEL_IMAGE_TEXT16)
 		goto force_fallback;
 
+	if (sna_font_too_large(gc->font))
+		goto force_fallback;
+
 	if (!PM_IS_SOLID(drawable, gc->planemask))
 		goto force_fallback;
 
commit c5b6741d3729c6867702ab64a6c59cb8052c0ef3
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun May 6 12:40:54 2012 +0100

    sna/gen2+: Fix typo for computing redirected extents for render copy
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index 0ad346e..9717aac 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -2909,14 +2909,14 @@ fallback:
 		int i;
 
 		for (i = 1; i < n; i++) {
-			if (extents.x1 < box[i].x1)
+			if (box[i].x1 < extents.x1)
 				extents.x1 = box[i].x1;
-			if (extents.y1 < box[i].y1)
+			if (box[i].y1 < extents.y1)
 				extents.y1 = box[i].y1;
 
-			if (extents.x2 > box[i].x2)
+			if (box[i].x2 > extents.x2)
 				extents.x2 = box[i].x2;
-			if (extents.y2 > box[i].y2)
+			if (box[i].y2 > extents.y2)
 				extents.y2 = box[i].y2;
 		}
 		if (!sna_render_composite_redirect(sna, &tmp,
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 680d36f..e73c707 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -4069,14 +4069,14 @@ fallback_blt:
 		int i;
 
 		for (i = 1; i < n; i++) {
-			if (extents.x1 < box[i].x1)
+			if (box[i].x1 < extents.x1)
 				extents.x1 = box[i].x1;
-			if (extents.y1 < box[i].y1)
+			if (box[i].y1 < extents.y1)
 				extents.y1 = box[i].y1;
 
-			if (extents.x2 > box[i].x2)
+			if (box[i].x2 > extents.x2)
 				extents.x2 = box[i].x2;
-			if (extents.y2 > box[i].y2)
+			if (box[i].y2 > extents.y2)
 				extents.y2 = box[i].y2;
 		}
 		if (!sna_render_composite_redirect(sna, &tmp,
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index b8a2459..9cad75e 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -2655,14 +2655,14 @@ fallback_blt:
 		int i;
 
 		for (i = 1; i < n; i++) {
-			if (extents.x1 < box[i].x1)
+			if (box[i].x1 < extents.x1)
 				extents.x1 = box[i].x1;
-			if (extents.y1 < box[i].y1)
+			if (box[i].y1 < extents.y1)
 				extents.y1 = box[i].y1;
 
-			if (extents.x2 > box[i].x2)
+			if (box[i].x2 > extents.x2)
 				extents.x2 = box[i].x2;
-			if (extents.y2 > box[i].y2)
+			if (box[i].y2 > extents.y2)
 				extents.y2 = box[i].y2;
 		}
 		if (!sna_render_composite_redirect(sna, &tmp,
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 1fb7f65..54d0b22 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -2962,14 +2962,14 @@ fallback_blt:
 		int i;
 
 		for (i = 1; i < n; i++) {
-			if (extents.x1 < box[i].x1)
+			if (box[i].x1 < extents.x1)
 				extents.x1 = box[i].x1;
-			if (extents.y1 < box[i].y1)
+			if (box[i].y1 < extents.y1)
 				extents.y1 = box[i].y1;
 
-			if (extents.x2 > box[i].x2)
+			if (box[i].x2 > extents.x2)
 				extents.x2 = box[i].x2;
-			if (extents.y2 > box[i].y2)
+			if (box[i].y2 > extents.y2)
 				extents.y2 = box[i].y2;
 		}
 
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 38fb024..55673bf 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -3339,14 +3339,14 @@ fallback_blt:
 		int i;
 
 		for (i = 1; i < n; i++) {
-			if (extents.x1 < box[i].x1)
+			if (box[i].x1 < extents.x1)
 				extents.x1 = box[i].x1;
-			if (extents.y1 < box[i].y1)
+			if (box[i].y1 < extents.y1)
 				extents.y1 = box[i].y1;
 
-			if (extents.x2 > box[i].x2)
+			if (box[i].x2 > extents.x2)
 				extents.x2 = box[i].x2;
-			if (extents.y2 > box[i].y2)
+			if (box[i].y2 > extents.y2)
 				extents.y2 = box[i].y2;
 		}
 		if (!sna_render_composite_redirect(sna, &tmp,
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 6c9a833..e128f5c 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -3422,14 +3422,14 @@ fallback_blt:
 		int i;
 
 		for (i = 1; i < n; i++) {
-			if (extents.x1 < box[i].x1)
+			if (box[i].x1 < extents.x1)
 				extents.x1 = box[i].x1;
-			if (extents.y1 < box[i].y1)
+			if (box[i].y1 < extents.y1)
 				extents.y1 = box[i].y1;
 
-			if (extents.x2 > box[i].x2)
+			if (box[i].x2 > extents.x2)
 				extents.x2 = box[i].x2;
-			if (extents.y2 > box[i].y2)
+			if (box[i].y2 > extents.y2)
 				extents.y2 = box[i].y2;
 		}
 		if (!sna_render_composite_redirect(sna, &tmp,
commit 771090f25db702d25ebbd3f2b44429cf0acfe8fd
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri May 4 20:56:37 2012 +0100

    sna: Add a pair of asserts to track down a NULL pointer dereference
    
    Looks like the assumption for the location of the data is invalid,
    allocation failure, perhaps?
    
    References: https://bugs.freedesktop.org/show_bug.cgi?id=47597
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index dee6608..880e173 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -460,6 +460,8 @@ static struct kgem_bo *upload(struct sna *sna,
 			priv->mapped = false;
 		}
 		if (pixmap->devPrivate.ptr == NULL) {
+			assert(priv->ptr);
+			assert(priv->stride);
 			pixmap->devPrivate.ptr = priv->ptr;
 			pixmap->devKind = priv->stride;
 		}
commit d376a960df7a081a5d449f77b81ae13223b98929
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri May 4 10:09:46 2012 +0100

    sna/dri: Only track a single pending flip across all pipes
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna.h b/src/sna/sna.h
index e07a115..3e4b8ec 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -249,7 +249,7 @@ struct sna {
 	} mode;
 
 	struct sna_dri {
-		void *flip_pending[2];
+		void *flip_pending;
 	} dri;
 
 	unsigned int tiling;
diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index 32602d5..2b97e68 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -937,6 +937,9 @@ can_flip(struct sna * sna,
 	WindowPtr win = (WindowPtr)draw;
 	PixmapPtr pixmap;
 
+	if (!sna->scrn->vtSema)
+		return FALSE;
+
 	if (draw->type == DRAWABLE_PIXMAP)
 		return FALSE;
 
@@ -1109,7 +1112,7 @@ sna_dri_flip_continue(struct sna *sna,
 
 	info->next_front.name = 0;
 
-	sna->dri.flip_pending[info->pipe] = info;
+	sna->dri.flip_pending = info;
 
 	return TRUE;
 }
@@ -1171,8 +1174,8 @@ static void sna_dri_flip_event(struct sna *sna,
 		break;
 
 	case DRI2_FLIP_THROTTLE:
-		assert(sna->dri.flip_pending[flip->pipe] == flip);
-		sna->dri.flip_pending[flip->pipe] = NULL;
+		assert(sna->dri.flip_pending == flip);
+		sna->dri.flip_pending = NULL;
 
 		if (flip->next_front.name &&
 		    flip->drawable_id &&
@@ -1204,9 +1207,9 @@ static void sna_dri_flip_event(struct sna *sna,
 	case DRI2_ASYNC_FLIP:
 		DBG(("%s: async swap flip completed on pipe %d, pending? %d, new? %d\n",
 		     __FUNCTION__, flip->pipe,
-		     sna->dri.flip_pending[flip->pipe] != NULL,
+		     sna->dri.flip_pending != NULL,
 		     flip->front->name != flip->old_front.name));
-		assert(sna->dri.flip_pending[flip->pipe] == flip);
+		assert(sna->dri.flip_pending == flip);
 
 		if (flip->front->name != flip->next_front.name) {
 			DBG(("%s: async flip continuing\n", __FUNCTION__));
@@ -1240,7 +1243,7 @@ finish_async_flip:
 			flip->next_front.bo = NULL;
 
 			DBG(("%s: async flip completed\n", __FUNCTION__));
-			sna->dri.flip_pending[flip->pipe] = NULL;
+			sna->dri.flip_pending = NULL;
 			sna_dri_frame_event_info_free(flip);
 		}
 		break;
@@ -1326,9 +1329,9 @@ sna_dri_schedule_flip(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
 		int type = DRI2_FLIP_THROTTLE;
 
 		DBG(("%s: performing immediate swap on pipe %d, pending? %d\n",
-		     __FUNCTION__, pipe, sna->dri.flip_pending[pipe] != NULL));
+		     __FUNCTION__, pipe, sna->dri.flip_pending != NULL));
 
-		info = sna->dri.flip_pending[pipe];
+		info = sna->dri.flip_pending;
 		if (info) {
 			if (info->drawable_id == draw->id) {
 				DBG(("%s: chaining flip\n", __FUNCTION__));
@@ -1382,7 +1385,7 @@ sna_dri_schedule_flip(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
 				       CREATE_EXACT);
 		info->back->name = kgem_bo_flink(&sna->kgem,
 						 get_private(info->back)->bo);
-		sna->dri.flip_pending[info->pipe] = info;
+		sna->dri.flip_pending = info;
 
 		DRI2SwapComplete(info->client, draw, 0, 0, 0,
 				 DRI2_EXCHANGE_COMPLETE,
@@ -1697,16 +1700,17 @@ sna_dri_async_swap(ClientPtr client, DrawablePtr draw,
 	struct sna *sna = to_sna_from_drawable(draw);
 	struct sna_dri_frame_event *info;
 	struct kgem_bo *bo;
-	int name, pipe;
+	int name;
 
 	DBG(("%s()\n", __FUNCTION__));
 
-	pipe = sna_dri_get_pipe(draw);
-	if (pipe == -1) {
-		PixmapPtr pixmap = get_drawable_pixmap(draw);
+	if (!sna->scrn->vtSema) {
+		PixmapPtr pixmap;
 
+exchange:
 		DBG(("%s: unattached, exchange pixmaps\n", __FUNCTION__));
 
+		pixmap = get_drawable_pixmap(draw);
 		set_bo(pixmap, get_private(back)->bo);
 		sna_dri_exchange_attachment(front, back);
 		get_private(back)->pixmap = pixmap;
@@ -1733,10 +1737,14 @@ blit:
 	bo = NULL;
 	name = 0;
 
-	info = sna->dri.flip_pending[pipe];
+	info = sna->dri.flip_pending;
 	if (info == NULL) {
-		DBG(("%s: no pending flip on pipe %d, so updating scanout\n",
-		     __FUNCTION__, pipe));
+		int pipe = sna_dri_get_pipe(draw);
+		if (pipe == -1)
+			goto exchange;
+
+		DBG(("%s: no pending flip, so updating scanout\n",
+		     __FUNCTION__));
 
 		info = calloc(1, sizeof(struct sna_dri_frame_event));
 		if (!info)
@@ -1801,7 +1809,7 @@ blit:
 	info->back->name = name;
 
 	set_bo(sna->front, get_private(info->front)->bo);
-	sna->dri.flip_pending[info->pipe] = info;
+	sna->dri.flip_pending = info;
 
 	DRI2SwapComplete(client, draw, 0, 0, 0,
 			 DRI2_EXCHANGE_COMPLETE, func, data);
commit 450592f989efd0d3bc9ef2de245fce0a180e91a2
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri May 4 09:50:19 2012 +0100

    sna: Cache the framebuffer id
    
    Also fixup a weakness of only tracking scanout with a single bit, as we
    used to clear it forcibly after every flip.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 666a23f..6e0c3d0 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1088,6 +1088,25 @@ inline static void kgem_bo_remove_from_active(struct kgem *kgem,
 	assert(list_is_empty(&bo->vma));
 }
 
+static void kgem_bo_clear_scanout(struct kgem *kgem, struct kgem_bo *bo)
+{
+	if (!bo->scanout)
+		return;
+
+	assert(bo->proxy == NULL);
+
+	DBG(("%s: handle=%d, fb=%d\n", __FUNCTION__, bo->handle, bo->delta));
+	if (bo->delta) {
+		drmModeRmFB(kgem->fd, bo->delta);
+		bo->delta = 0;
+	}
+
+	bo->scanout = false;
+	bo->needs_flush = true;
+	bo->flush = false;
+	bo->reusable = true;
+}
+
 static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 {
 	DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
@@ -1096,6 +1115,7 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 	assert(bo->refcnt == 0);
 
 	bo->binding.offset = 0;
+	kgem_bo_clear_scanout(kgem, bo);
 
 	if (NO_CACHE)
 		goto destroy;
@@ -4151,18 +4171,6 @@ void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset)
 	}
 }
 
-void kgem_bo_clear_scanout(struct kgem *kgem, struct kgem_bo *bo)
-{
-	bo->needs_flush = true;
-	bo->flush = false;
-
-	if (!bo->scanout)
-		return;
-
-	bo->scanout = false;
-	bo->reusable = true;
-}
-
 struct kgem_bo *
 kgem_replace_bo(struct kgem *kgem,
 		struct kgem_bo *src,
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 9eac68e..186eaa0 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -509,8 +509,6 @@ struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem,
 bool kgem_buffer_is_inplace(struct kgem_bo *bo);
 void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *bo);
 
-void kgem_bo_clear_scanout(struct kgem *kgem, struct kgem_bo *bo);
-
 void kgem_throttle(struct kgem *kgem);
 #define MAX_INACTIVE_TIME 10
 bool kgem_expire_cache(struct kgem *kgem);
diff --git a/src/sna/sna.h b/src/sna/sna.h
index 5272a08..e07a115 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -318,8 +318,6 @@ extern int sna_page_flip(struct sna *sna,
 
 extern PixmapPtr sna_set_screen_pixmap(struct sna *sna, PixmapPtr pixmap);
 
-void sna_mode_delete_fb(struct sna *sna, uint32_t fb);
-
 constant static inline struct sna *
 to_sna(ScrnInfoPtr scrn)
 {
diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
index 5275d4a..6287cd9 100644
--- a/src/sna/sna_display.c
+++ b/src/sna/sna_display.c
@@ -151,6 +151,40 @@ int sna_crtc_to_plane(xf86CrtcPtr crtc)
 	return sna_crtc->plane;
 }
 
+static unsigned get_fb(struct sna *sna, struct kgem_bo *bo,
+		       int width, int height)
+{
+	ScrnInfoPtr scrn = sna->scrn;
+	int ret;
+
+	assert(bo->proxy == NULL);
+	if (bo->delta) {
+		DBG(("%s: reusing fb=%d for handle=%d\n",
+		     __FUNCTION__, bo->delta, bo->handle));
+		return bo->delta;
+	}
+
+	DBG(("%s: create fb %dx%d@%d/%d\n",
+	     __FUNCTION__, width, height, scrn->depth, scrn->bitsPerPixel));
+
+	assert(bo->tiling != I915_TILING_Y);
+	ret = drmModeAddFB(sna->kgem.fd,
+			   width, height,
+			   scrn->depth, scrn->bitsPerPixel,
+			   bo->pitch, bo->handle,
+			   &bo->delta);
+	if (ret < 0) {
+		ErrorF("%s: failed to add fb: %dx%d depth=%d, bpp=%d, pitch=%d\n",
+		       __FUNCTION__,
+		       width, height,
+		       scrn->depth, scrn->bitsPerPixel, bo->pitch);
+		return 0;
+	}
+
+	bo->scanout = true;
+	return bo->delta;
+}
+
 static uint32_t gem_create(int fd, int size)
 {
 	struct drm_i915_gem_create create;
@@ -434,10 +468,6 @@ sna_crtc_restore(struct sna *sna)
 	if (!bo)
 		return;
 
-	assert(bo->tiling != I915_TILING_Y);
-	bo->scanout = true;
-	bo->domain = DOMAIN_NONE;
-
 	DBG(("%s: create fb %dx%d@%d/%d\n",
 	     __FUNCTION__,
 	     sna->front->drawable.width,
@@ -446,13 +476,10 @@ sna_crtc_restore(struct sna *sna)
 	     sna->front->drawable.bitsPerPixel));
 
 	sna_mode_remove_fb(sna);
-	if (drmModeAddFB(sna->kgem.fd,
-			 sna->front->drawable.width,
-			 sna->front->drawable.height,
-			 sna->front->drawable.depth,
-			 sna->front->drawable.bitsPerPixel,
-			 bo->pitch, bo->handle,
-			 &sna->mode.fb_id))
+	sna->mode.fb_id = get_fb(sna, bo,
+				 sna->front->drawable.width,
+				 sna->front->drawable.height);
+	if (sna->mode.fb_id == 0)
 		return;
 
 	DBG(("%s: handle %d attached to fb %d\n",
@@ -468,6 +495,7 @@ sna_crtc_restore(struct sna *sna)
 			return;
 	}
 
+	bo->domain = DOMAIN_NONE;
 	scrn->displayWidth = bo->pitch / sna->mode.cpp;
 	sna->mode.fb_pixmap = sna->front->drawable.serialNumber;
 }
@@ -652,28 +680,16 @@ sna_crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode,
 		if (!bo)
 			return FALSE;
 
-		DBG(("%s: create fb %dx%d@%d/%d\n",
-		     __FUNCTION__,
-		     scrn->virtualX, scrn->virtualY,
-		     scrn->depth, scrn->bitsPerPixel));
-
-		assert(bo->tiling != I915_TILING_Y);
-		ret = drmModeAddFB(sna->kgem.fd,
-				   scrn->virtualX, scrn->virtualY,
-				   scrn->depth, scrn->bitsPerPixel,
-				   bo->pitch, bo->handle,
-				   &sna_mode->fb_id);
-		if (ret < 0) {
-			ErrorF("%s: failed to add fb: %dx%d depth=%d, bpp=%d, pitch=%d\n",
-			       __FUNCTION__,
-			       scrn->virtualX, scrn->virtualY,
-			       scrn->depth, scrn->bitsPerPixel, bo->pitch);
+		/* recreate the fb in case the size has changed */
+		assert(bo->delta == 0);
+		sna_mode->fb_id = get_fb(sna, bo,
+					 scrn->virtualX, scrn->virtualY);
+		if (sna_mode->fb_id == 0)
 			return FALSE;
-		}
 
 		DBG(("%s: handle %d attached to fb %d\n",
 		     __FUNCTION__, bo->handle, sna_mode->fb_id));
-		bo->scanout = true;
+
 		bo->domain = DOMAIN_NONE;
 		sna_mode->fb_pixmap = sna->front->drawable.serialNumber;
 	}
@@ -773,22 +789,14 @@ sna_crtc_shadow_allocate(xf86CrtcPtr crtc, int width, int height)
 		return NULL;
 	}
 
-	assert(bo->tiling != I915_TILING_Y);
-	if (drmModeAddFB(sna->kgem.fd,
-			 width, height, scrn->depth, scrn->bitsPerPixel,
-			 bo->pitch, bo->handle,
-			 &sna_crtc->shadow_fb_id)) {
-		ErrorF("%s: failed to add rotate  fb: %dx%d depth=%d, bpp=%d, pitch=%d\n",
-		       __FUNCTION__,
-		       width, height,
-		       scrn->depth, scrn->bitsPerPixel, bo->pitch);
+	sna_crtc->shadow_fb_id = get_fb(sna, bo, width, height);
+	if (sna_crtc->shadow_fb_id == 0) {
 		scrn->pScreen->DestroyPixmap(shadow);
 		return NULL;
 	}
 
 	DBG(("%s: attached handle %d to fb %d\n",
 	     __FUNCTION__, bo->handle, sna_crtc->shadow_fb_id));
-	bo->scanout = true;
 	bo->domain = DOMAIN_NONE;
 	return sna_crtc->shadow = shadow;
 }
@@ -813,10 +821,8 @@ sna_crtc_shadow_destroy(xf86CrtcPtr crtc, PixmapPtr pixmap, void *data)
 	DBG(("%s(fb=%d, handle=%d)\n", __FUNCTION__,
 	     sna_crtc->shadow_fb_id, sna_pixmap_get_bo(pixmap)->handle));
 
-	drmModeRmFB(sna->kgem.fd, sna_crtc->shadow_fb_id);
 	sna_crtc->shadow_fb_id = 0;
 
-	kgem_bo_clear_scanout(&sna->kgem, sna_pixmap_get_bo(pixmap));
 	pixmap->drawable.pScreen->DestroyPixmap(pixmap);
 	sna_crtc->shadow = NULL;
 }
@@ -1702,23 +1708,16 @@ sna_crtc_resize(ScrnInfoPtr scrn, int width, int height)
 	if (!bo)
 		goto fail;
 
-	assert(bo->tiling != I915_TILING_Y);
-	if (drmModeAddFB(sna->kgem.fd, width, height,
-			 scrn->depth, scrn->bitsPerPixel,
-			 bo->pitch, bo->handle,
-			 &mode->fb_id)) {
-		ErrorF("%s: failed to add fb: %dx%d depth=%d, bpp=%d, pitch=%d\n",
-		       __FUNCTION__,
-		       width, height,
-		       scrn->depth, scrn->bitsPerPixel, bo->pitch);
+	assert(bo->delta == 0);
+
+	mode->fb_id = get_fb(sna, bo, width, height);
+	if (mode->fb_id == 0)
 		goto fail;
-	}
 
 	DBG(("%s: handle %d, pixmap serial %lu attached to fb %d\n",
 	     __FUNCTION__, bo->handle,
 	     sna->front->drawable.serialNumber, mode->fb_id));
 
-	bo->scanout = true;
 	for (i = 0; i < xf86_config->num_crtc; i++) {
 		xf86CrtcPtr crtc = xf86_config->crtc[i];
 
@@ -1739,17 +1738,12 @@ sna_crtc_resize(ScrnInfoPtr scrn, int width, int height)
 	assert(scrn->pScreen->GetScreenPixmap(scrn->pScreen) == sna->front);
 	assert(scrn->pScreen->GetWindowPixmap(scrn->pScreen->root) == sna->front);
 
-	if (old_fb_id)
-		drmModeRmFB(sna->kgem.fd, old_fb_id);
-	kgem_bo_clear_scanout(&sna->kgem, sna_pixmap_get_bo(old_front));
 	scrn->pScreen->DestroyPixmap(old_front);
 
 	return TRUE;
 
 fail:
 	DBG(("%s: restoring original front pixmap and fb\n", __FUNCTION__));
-	if (old_fb_id != mode->fb_id)
-		drmModeRmFB(sna->kgem.fd, mode->fb_id);
 	mode->fb_id = old_fb_id;
 
 	if (sna->front)
@@ -1843,15 +1837,9 @@ sna_page_flip(struct sna *sna,
 	/*
 	 * Create a new handle for the back buffer
 	 */
-	assert(bo->tiling != I915_TILING_Y);
-	if (drmModeAddFB(sna->kgem.fd, scrn->virtualX, scrn->virtualY,
-			 scrn->depth, scrn->bitsPerPixel,
-			 bo->pitch, bo->handle,
-			 &mode->fb_id)) {
-		ErrorF("%s: failed to add fb: %dx%d depth=%d, bpp=%d, pitch=%d\n",
-		       __FUNCTION__,
-		       scrn->virtualX, scrn->virtualY,
-		       scrn->depth, scrn->bitsPerPixel, bo->pitch);
+	mode->fb_id = get_fb(sna, bo, scrn->virtualX, scrn->virtualY);
+	if (mode->fb_id == 0) {
+		mode->fb_id = *old_fb;
 		return 0;
 	}
 
@@ -1871,23 +1859,14 @@ sna_page_flip(struct sna *sna,
 	 */
 	count = do_page_flip(sna, data, ref_crtc_hw_id);
 	DBG(("%s: page flipped %d crtcs\n", __FUNCTION__, count));
-	if (count) {
-		bo->scanout = true;
+	if (count)
 		bo->domain = DOMAIN_NONE;
-	} else {
-		drmModeRmFB(sna->kgem.fd, mode->fb_id);
+	else
 		mode->fb_id = *old_fb;
-	}
 
 	return count;
 }
 
-void sna_mode_delete_fb(struct sna *sna, uint32_t fb)
-{
-	if (fb)
-		drmModeRmFB(sna->kgem.fd, fb);
-}
-
 static const xf86CrtcConfigFuncsRec sna_crtc_config_funcs = {
 	sna_crtc_resize
 };
@@ -1932,11 +1911,8 @@ sna_mode_remove_fb(struct sna *sna)
 	DBG(("%s: deleting fb id %d for pixmap serial %d\n",
 	     __FUNCTION__, mode->fb_id,mode->fb_pixmap));
 
-	if (mode->fb_id) {
-		drmModeRmFB(sna->kgem.fd, mode->fb_id);
-		mode->fb_id = 0;
-		mode->fb_pixmap = 0;
-	}
+	mode->fb_id = 0;
+	mode->fb_pixmap = 0;
 }
 
 void
@@ -1957,7 +1933,6 @@ sna_mode_fini(struct sna *sna)
 #endif
 
 	sna_mode_remove_fb(sna);
-	kgem_bo_clear_scanout(&sna->kgem, sna_pixmap_get_bo(sna->front));
 
 	/* mode->shadow_fb_id should have been destroyed already */
 }
diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index d5eefcb..32602d5 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -336,7 +336,6 @@ static void _sna_dri_destroy_buffer(struct sna *sna, DRI2Buffer2Ptr buffer)
 		}
 
 		private->bo->flush = 0;
-		kgem_bo_clear_scanout(&sna->kgem, private->bo); /* paranoia */
 		kgem_bo_destroy(&sna->kgem, private->bo);
 
 		free(buffer);
@@ -392,7 +391,6 @@ static void set_bo(PixmapPtr pixmap, struct kgem_bo *bo)
 	sna_damage_destroy(&priv->cpu_damage);
 	priv->undamaged = false;
 
-	kgem_bo_clear_scanout(&sna->kgem, priv->gpu_bo); /* paranoia */
 	kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
 	priv->gpu_bo = ref(bo);
 }
@@ -860,18 +858,10 @@ sna_dri_add_frame_event(struct sna_dri_frame_event *info)
 static void
 sna_dri_frame_event_release_bo(struct kgem *kgem, struct kgem_bo *bo)
 {
-	kgem_bo_clear_scanout(kgem, bo);
 	kgem_bo_destroy(kgem, bo);
 }
 
 static void
-sna_dri_frame_event_finish(struct sna_dri_frame_event *info)
-{
-	sna_mode_delete_fb(info->sna, info->old_fb);
-	kgem_bo_clear_scanout(&info->sna->kgem, info->old_front.bo);
-}
-
-static void
 sna_dri_frame_event_info_free(struct sna_dri_frame_event *info)
 {
 	DBG(("%s: del[%p] (%p, %ld)\n", __FUNCTION__,
@@ -1177,13 +1167,10 @@ static void sna_dri_flip_event(struct sna *sna,
 					 flip->event_data);
 		}
 
-		sna_dri_frame_event_finish(flip);
 		sna_dri_frame_event_info_free(flip);
 		break;
 
 	case DRI2_FLIP_THROTTLE:
-		sna_dri_frame_event_finish(flip);
-
 		assert(sna->dri.flip_pending[flip->pipe] == flip);
 		sna->dri.flip_pending[flip->pipe] = NULL;
 
@@ -1221,8 +1208,6 @@ static void sna_dri_flip_event(struct sna *sna,
 		     flip->front->name != flip->old_front.name));
 		assert(sna->dri.flip_pending[flip->pipe] == flip);
 
-		sna_dri_frame_event_finish(flip);
-
 		if (flip->front->name != flip->next_front.name) {
 			DBG(("%s: async flip continuing\n", __FUNCTION__));
 
@@ -1801,7 +1786,6 @@ blit:
 		}
 		info->front->name = info->back->name;
 		get_private(info->front)->bo = get_private(info->back)->bo;
-		__kgem_flush(&sna->kgem, get_private(info->back)->bo);
 	}
 
 	if (bo == NULL) {
diff --git a/src/sna/sna_driver.c b/src/sna/sna_driver.c
index 7b3cfce..150e973 100644
--- a/src/sna/sna_driver.c
+++ b/src/sna/sna_driver.c
@@ -784,9 +784,6 @@ static Bool sna_close_screen(int scrnIndex, ScreenPtr screen)
 
 	sna_mode_remove_fb(sna);
 	if (sna->front) {
-		struct kgem_bo *bo = sna_pixmap_get_bo(sna->front);
-		if (bo)
-			kgem_bo_clear_scanout(&sna->kgem, bo); /* valgrind */
 		screen->DestroyPixmap(sna->front);
 		sna->front = NULL;
 	}
commit 29d035279b2fe98d5ba9cf01125faea34d36fb76
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri May 4 09:11:31 2012 +0100

    sna/dri: pageflip unref debugging
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna.h b/src/sna/sna.h
index 956a038..5272a08 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -130,6 +130,8 @@ struct sna_pixmap {
 	uint32_t stride;
 	uint32_t clear_color;
 
+	uint32_t flush;
+
 #define SOURCE_BIAS 4
 	uint16_t source_count;
 	uint8_t pinned :1;
@@ -138,7 +140,6 @@ struct sna_pixmap {
 	uint8_t undamaged :1;
 	uint8_t create :3;
 	uint8_t header :1;
-	uint8_t flush :1;
 };
 
 struct sna_glyph {
diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index 1ad2ff1..d5eefcb 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -163,7 +163,7 @@ static struct kgem_bo *sna_pixmap_set_dri(struct sna *sna,
 	if (priv == NULL)
 		return NULL;
 
-	if (priv->flush)
+	if (priv->flush++)
 		return priv->gpu_bo;
 
 	tiling = color_tiling(sna, &pixmap->drawable);
@@ -181,7 +181,6 @@ static struct kgem_bo *sna_pixmap_set_dri(struct sna *sna,
 
 	/* Don't allow this named buffer to be replaced */
 	priv->pinned = 1;
-	priv->flush = true;
 
 	return priv->gpu_bo;
 }
@@ -317,17 +316,21 @@ static void _sna_dri_destroy_buffer(struct sna *sna, DRI2Buffer2Ptr buffer)
 	if (buffer == NULL)
 		return;
 
+	DBG(("%s: %p [handle=%d] -- refcnt=%d, pixmap=%d\n",
+	     __FUNCTION__, buffer, private->bo->handle, private->refcnt,
+	     private->pixmap ? private->pixmap->drawable.serialNumber : 0));
+
 	if (--private->refcnt == 0) {
 		if (private->pixmap) {
 			ScreenPtr screen = private->pixmap->drawable.pScreen;
 			struct sna_pixmap *priv = sna_pixmap(private->pixmap);
 
 			/* Undo the DRI markings on this pixmap */
-			assert(priv->flush);
-			list_del(&priv->list);
-			sna_accel_watch_flush(sna, -1);
-			priv->pinned = private->pixmap == sna->front;
-			priv->flush = false;
+			if (priv->flush && --priv->flush == 0) {
+				list_del(&priv->list);
+				sna_accel_watch_flush(sna, -1);
+				priv->pinned = private->pixmap == sna->front;
+			}
 
 			screen->DestroyPixmap(private->pixmap);
 		}
@@ -1238,6 +1241,8 @@ static void sna_dri_flip_event(struct sna *sna,
 			flip->next_front.name = flip->front->name;
 			flip->off_delay = 5;
 		} else if (--flip->off_delay) {
+			DBG(("%s: queuing no-flip [delay=%d]\n",
+			     __FUNCTION__, flip->off_delay));
 			/* Just queue a no-op flip to trigger another event */
 			flip->count = sna_page_flip(sna,
 						    get_private(flip->front)->bo,
@@ -1765,6 +1770,13 @@ blit:
 			goto blit;
 		}
 
+		DBG(("%s: referencing (%p:%d, %p:%d)\n",
+		     __FUNCTION__,
+		     front, get_private(front)->refcnt,
+		     back, get_private(back)->refcnt));
+		sna_dri_reference_buffer(front);
+		sna_dri_reference_buffer(back);
+
 		if (!sna_dri_page_flip(sna, info)) {
 			sna_dri_frame_event_info_free(info);
 			goto blit;
@@ -1773,9 +1785,6 @@ blit:
 		info->next_front.name = info->front->name;
 		info->next_front.bo = get_private(info->front)->bo;
 		info->off_delay = 5;
-
-		sna_dri_reference_buffer(front);
-		sna_dri_reference_buffer(back);
 	} else if (info->type != DRI2_ASYNC_FLIP) {
 		/* A normal vsync'ed client is finishing, wait for it
 		 * to unpin the old framebuffer before taking over.
commit 079b491ced2c9c3c73d938ef6025d040016ad3a7
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu May 3 23:52:17 2012 +0100

    sna: Ensure drawables are clipped against the pixmap before migration
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna.h b/src/sna/sna.h
index 424738a..956a038 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -446,16 +446,7 @@ bool must_check sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 						RegionPtr region,
 						unsigned flags);
 
-static inline bool must_check
-sna_drawable_move_to_cpu(DrawablePtr drawable, unsigned flags)
-{
-	RegionRec region;
-
-	pixman_region_init_rect(&region,
-				drawable->x, drawable->y,
-				drawable->width, drawable->height);
-	return sna_drawable_move_region_to_cpu(drawable, &region, flags);
-}
+bool must_check sna_drawable_move_to_cpu(DrawablePtr drawable, unsigned flags);
 
 static inline bool must_check
 sna_drawable_move_to_gpu(DrawablePtr drawable, unsigned flags)
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 6430de8..1bc34dc 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1697,6 +1697,43 @@ out:
 	return true;
 }
 
+bool
+sna_drawable_move_to_cpu(DrawablePtr drawable, unsigned flags)
+{
+	RegionRec region;
+	PixmapPtr pixmap;
+	int16_t dx, dy;
+
+	if (drawable->type == DRAWABLE_PIXMAP)
+		return sna_pixmap_move_to_cpu((PixmapPtr)drawable, flags);
+
+	pixmap = get_window_pixmap((WindowPtr)drawable);
+	get_drawable_deltas(drawable, pixmap, &dx, &dy);
+
+	DBG(("%s: (%d, %d)x(%d, %d) + (%d, %d), flags=%x\n",
+	     __FUNCTION__,
+	     drawable->x, drawable->y,
+	     drawable->width, drawable->height,
+	     dx, dy, flags));
+
+	region.extents.x1 = drawable->x + dx;
+	region.extents.y1 = drawable->y + dy;
+	region.extents.x2 = region.extents.x1 + drawable->width;
+	region.extents.y2 = region.extents.y1 + drawable->height;
+	region.data = NULL;
+
+	if (region.extents.x1 < 0)
+		region.extents.x1 = 0;
+	if (region.extents.y1 < 0)
+		region.extents.y1 = 0;
+	if (region.extents.x2 > pixmap->drawable.width)
+		region.extents.x2 = pixmap->drawable.width;
+	if (region.extents.y2 > pixmap->drawable.height)
+		region.extents.y2 = pixmap->drawable.height;
+
+	return sna_drawable_move_region_to_cpu(&pixmap->drawable, &region, flags);
+}
+
 static bool alu_overwrites(uint8_t alu)
 {
 	switch (alu) {
commit 1a5f8599b150064339d62a97c58026e62b49ff27
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu May 3 23:21:48 2012 +0100

    sna: Compile fix for fresh assertion
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index a116936..666a23f 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -4076,7 +4076,7 @@ void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo)
 	assert(_bo->proxy);
 
 	_bo = _bo->proxy;
-	assert(bo->proxy == NULL);
+	assert(_bo->proxy == NULL);
 	assert(_bo->exec == NULL);
 
 	bo = (struct kgem_partial_bo *)_bo;
commit 61cac5c265279d45677262216a0ba56f548cd898
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu May 3 22:33:59 2012 +0100

    sna: Maintain a reference to the chain of proxies
    
    Rather than attempt to flatten the chain to the last link, we may need
    to hold a reference to the intermediate links in case of batch buffer
    submission.
    
    Fixes http://tnsp.org/~ccr/intel-gfx/test.html
    
    Reported-by: Matti Hamalainen <ccr at tnsp.org>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=49436
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index ed2eaf1..680d36f 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -2381,9 +2381,14 @@ gen3_composite_picture(struct sna *sna,
 		return sna_render_picture_convert(sna, picture, channel, pixmap,
 						  x, y, w, h, dst_x, dst_y);
 
-	if (too_large(pixmap->drawable.width, pixmap->drawable.height))
+	if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
+		DBG(("%s: pixmap too large (%dx%d), extracting (%d, %d)x(%d,%d)\n",
+		     __FUNCTION__,
+		     pixmap->drawable.width, pixmap->drawable.height,
+		     x, y, w, h));
 		return sna_render_picture_extract(sna, picture, channel,
 						  x, y, w, h, dst_x, dst_y);
+	}
 
 	return sna_render_pixmap_bo(sna, channel, pixmap,
 				    x, y, w, h, dst_x, dst_y);
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 49fa173..a116936 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -2960,7 +2960,7 @@ bool kgem_check_bo(struct kgem *kgem, ...)
 		if (bo->exec)
 			continue;
 
-		if (bo->proxy) {
+		while (bo->proxy) {
 			bo = bo->proxy;
 			if (bo->exec)
 				continue;
@@ -2989,7 +2989,7 @@ bool kgem_check_bo_fenced(struct kgem *kgem, struct kgem_bo *bo)
 {
 	uint32_t size;
 
-	if (bo->proxy)
+	while (bo->proxy)
 		bo = bo->proxy;
 	if (bo->exec) {
 		if (kgem->gen < 40 &&
@@ -3034,7 +3034,7 @@ bool kgem_check_many_bo_fenced(struct kgem *kgem, ...)
 
 	va_start(ap, kgem);
 	while ((bo = va_arg(ap, struct kgem_bo *))) {
-		if (bo->proxy)
+		while (bo->proxy)
 			bo = bo->proxy;
 		if (bo->exec) {
 			if (kgem->gen >= 40 || bo->tiling == I915_TILING_NONE)
@@ -3098,10 +3098,10 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
 		assert(bo->refcnt);
 		assert(!bo->purged);
 
-		delta += bo->delta;
-		if (bo->proxy) {
-			DBG(("%s: adding proxy for handle=%d\n",
-			     __FUNCTION__, bo->handle));
+		while (bo->proxy) {
+			DBG(("%s: adding proxy [delta=%d] for handle=%d\n",
+			     __FUNCTION__, bo->delta, bo->handle));
+			delta += bo->delta;
 			assert(bo->handle == bo->proxy->handle);
 			/* need to release the cache upon batch submit */
 			list_move(&bo->request, &kgem->next_request->buffers);
@@ -3527,8 +3527,9 @@ struct kgem_bo *kgem_create_proxy(struct kgem_bo *target,
 {
 	struct kgem_bo *bo;
 
-	DBG(("%s: target handle=%d, offset=%d, length=%d, io=%d\n",
-	     __FUNCTION__, target->handle, offset, length, target->io));
+	DBG(("%s: target handle=%d [proxy? %d], offset=%d, length=%d, io=%d\n",
+	     __FUNCTION__, target->handle, target->proxy ? target->proxy->delta : -1,
+	     offset, length, target->io));
 
 	bo = __kgem_bo_alloc(target->handle, length);
 	if (bo == NULL)
@@ -3537,15 +3538,11 @@ struct kgem_bo *kgem_create_proxy(struct kgem_bo *target,
 	bo->reusable = false;
 	bo->size.bytes = length;
 
-	bo->io = target->io;
+	bo->io = target->io && target->proxy == NULL;
 	bo->dirty = target->dirty;
 	bo->tiling = target->tiling;
 	bo->pitch = target->pitch;
 
-	if (target->proxy) {
-		offset += target->delta;
-		target = target->proxy;
-	}
 	bo->proxy = kgem_bo_reference(target);
 	bo->delta = offset;
 	return bo;
@@ -4079,6 +4076,7 @@ void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo)
 	assert(_bo->proxy);
 
 	_bo = _bo->proxy;
+	assert(bo->proxy == NULL);
 	assert(_bo->exec == NULL);
 
 	bo = (struct kgem_partial_bo *)_bo;
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 3072345..6430de8 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -2138,7 +2138,8 @@ sna_pixmap_create_upload(ScreenPtr screen,
 	int bpp = BitsPerPixel(depth);
 	void *ptr;
 
-	DBG(("%s(%d, %d, %d)\n", __FUNCTION__, width, height, depth));
+	DBG(("%s(%d, %d, %d, flags=%x)\n", __FUNCTION__,
+	     width, height, depth, flags));
 	assert(width);
 	assert(height);
 
diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index 3d0f9e9..dee6608 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -957,6 +957,10 @@ sna_render_picture_partial(struct sna *sna,
 		kgem_get_tile_size(&sna->kgem, bo->tiling,
 				   &tile_width, &tile_height, &tile_size);
 
+		DBG(("%s: tiling=%d, size=%dx%d, chunk=%d\n",
+		     __FUNCTION__, bo->tiling,
+		     tile_width, tile_height, tile_size));
+
 		/* Ensure we align to an even tile row */
 		box.y1 = box.y1 & ~(2*tile_height - 1);
 		box.y2 = ALIGN(box.y2, 2*tile_height);
@@ -989,8 +993,6 @@ sna_render_picture_partial(struct sna *sna,
 	if (channel->bo == NULL)
 		return 0;
 
-	channel->bo->pitch = bo->pitch;
-
 	if (channel->transform) {
 		memset(&channel->embedded_transform,
 		       0,
diff --git a/src/sna/sna_tiling.c b/src/sna/sna_tiling.c
index d0e5afc..d7e6d40 100644
--- a/src/sna/sna_tiling.c
+++ b/src/sna/sna_tiling.c
@@ -188,6 +188,14 @@ sna_tiling_composite_done(struct sna *sna,
 					if (y2 > y + height)
 						y2 = y + height;
 
+					DBG(("%s: rect[%d] = (%d, %d)x(%d,%d), tile=(%d,%d)x(%d, %d), blt=(%d,%d),(%d,%d), delta=(%d,%d)\n",
+					     __FUNCTION__, n,
+					     r->dst.x, r->dst.y,
+					     r->width, r->height,
+					     x, y, width, height,
+					     x1, y1, x2, y2,
+					     dx, dy));
+
 					if (y2 > y1 && x2 > x1) {
 						struct sna_composite_rectangles rr;
 						rr.src.x = dx + r->src.x;
commit dea5d429f7a52dfc945b17a57ef79744cc796b0e
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu May 3 17:35:24 2012 +0100

    sna: Remove extraneous SCANOUT flags
    
    These are now fixed by obeying a minimum alignment restriction.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index f239555..3d0f9e9 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -1846,7 +1846,7 @@ sna_render_composite_redirect(struct sna *sna,
 			    width, height, bpp,
 			    kgem_choose_tiling(&sna->kgem, I915_TILING_X,
 					       width, height, bpp),
-			    CREATE_SCANOUT | CREATE_TEMPORARY);
+			    CREATE_TEMPORARY);
 	if (!bo)
 		return FALSE;
 
diff --git a/src/sna/sna_tiling.c b/src/sna/sna_tiling.c
index dcb4d1d..d0e5afc 100644
--- a/src/sna/sna_tiling.c
+++ b/src/sna/sna_tiling.c
@@ -384,7 +384,7 @@ sna_tiling_fill_boxes(struct sna *sna,
 							       tmp.drawable.width,
 							       tmp.drawable.height,
 							       dst->drawable.bitsPerPixel),
-					    CREATE_SCANOUT | CREATE_TEMPORARY);
+					    CREATE_TEMPORARY);
 			if (bo) {
 				int16_t dx = this.extents.x1;
 				int16_t dy = this.extents.y1;
commit 19fd24a4db994bb5c5ce4a73f06d9394a758ea91
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu May 3 17:35:10 2012 +0100

    sna: Fix offset for combining damage
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_damage.h b/src/sna/sna_damage.h
index fb661b2..1196912 100644
--- a/src/sna/sna_damage.h
+++ b/src/sna/sna_damage.h
@@ -36,7 +36,8 @@ static inline void sna_damage_combine(struct sna_damage **l,
 				      struct sna_damage *r,
 				      int dx, int dy)
 {
-	*l = _sna_damage_combine(*l, r, dx, dy);
+	assert(!DAMAGE_IS_ALL(*l));
+	*l = _sna_damage_combine(*l, DAMAGE_PTR(r), dx, dy);
 }
 
 fastcall struct sna_damage *_sna_damage_add(struct sna_damage *damage,
diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index ec2aaad..f239555 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -1869,6 +1869,7 @@ sna_render_composite_redirect(struct sna *sna,
 	t->real_bo = op->dst.bo;
 	t->real_damage = op->damage;
 	if (op->damage) {
+		assert(!DAMAGE_IS_ALL(op->damage));
 		t->damage = sna_damage_create();
 		op->damage = &t->damage;
 	}
@@ -1899,8 +1900,10 @@ sna_render_composite_redirect_done(struct sna *sna,
 					   &t->box, 1);
 		}
 		if (t->damage) {
+			DBG(("%s: combining damage, offset=(%d, %d)\n",
+			     __FUNCTION__, t->box.x1, t->box.y1));
 			sna_damage_combine(t->real_damage, t->damage,
-					   -t->box.x1, -t->box.y1);
+					   t->box.x1, t->box.y1);
 			__sna_damage_destroy(t->damage);
 		}
 
commit 1376c81dbf3b789e04e6804df1b1fd32bcb2bd1d
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu May 3 17:34:28 2012 +0100

    sna: Debug option to force particular upload/download paths
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index 7cec3ff..3de164b 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -42,6 +42,8 @@
 
 #define PITCH(x, y) ALIGN((x)*(y), 4)
 
+#define FORCE_INPLACE 0
+
 /* XXX Need to avoid using GTT fenced access for I915_TILING_Y on 855GM */
 
 static Bool
@@ -109,6 +111,15 @@ static void read_boxes_inplace(struct kgem *kgem,
 	} while (--n);
 }
 
+static bool download_inplace(struct kgem *kgem, struct kgem_bo *bo)
+{
+	if (FORCE_INPLACE)
+		return FORCE_INPLACE > 0;
+
+	return !kgem_bo_map_will_stall(kgem, bo) ||
+		bo->tiling == I915_TILING_NONE;
+}
+
 void sna_read_boxes(struct sna *sna,
 		    struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
 		    PixmapPtr dst, int16_t dst_dx, int16_t dst_dy,
@@ -150,8 +161,7 @@ void sna_read_boxes(struct sna *sna,
 	 * this path.
 	 */
 
-	if (!kgem_bo_map_will_stall(kgem, src_bo) ||
-	    src_bo->tiling == I915_TILING_NONE) {
+	if (download_inplace(kgem, src_bo)) {
 fallback:
 		read_boxes_inplace(kgem,
 				   src_bo, src_dx, src_dy,
@@ -512,6 +522,9 @@ static bool upload_inplace(struct kgem *kgem,
 			   const BoxRec *box,
 			   int n, int bpp)
 {
+	if (FORCE_INPLACE)
+		return FORCE_INPLACE > 0;
+
 	/* If we are writing through the GTT, check first if we might be
 	 * able to almagamate a series of small writes into a single
 	 * operation.
commit f0d464d6b18855616fc43d9a25aa6853f86c8e2b
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu May 3 16:17:35 2012 +0100

    sna/dri: Balance flush counting
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna.h b/src/sna/sna.h
index 308e329..424738a 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -129,7 +129,6 @@ struct sna_pixmap {
 
 	uint32_t stride;
 	uint32_t clear_color;
-	unsigned flush;
 
 #define SOURCE_BIAS 4
 	uint16_t source_count;
@@ -139,6 +138,7 @@ struct sna_pixmap {
 	uint8_t undamaged :1;
 	uint8_t create :3;
 	uint8_t header :1;
+	uint8_t flush :1;
 };
 
 struct sna_glyph {
diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index 1a7b6bd..1ad2ff1 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -163,7 +163,7 @@ static struct kgem_bo *sna_pixmap_set_dri(struct sna *sna,
 	if (priv == NULL)
 		return NULL;
 
-	if (priv->flush++)
+	if (priv->flush)
 		return priv->gpu_bo;
 
 	tiling = color_tiling(sna, &pixmap->drawable);
@@ -181,6 +181,7 @@ static struct kgem_bo *sna_pixmap_set_dri(struct sna *sna,
 
 	/* Don't allow this named buffer to be replaced */
 	priv->pinned = 1;
+	priv->flush = true;
 
 	return priv->gpu_bo;
 }
@@ -322,12 +323,11 @@ static void _sna_dri_destroy_buffer(struct sna *sna, DRI2Buffer2Ptr buffer)
 			struct sna_pixmap *priv = sna_pixmap(private->pixmap);
 
 			/* Undo the DRI markings on this pixmap */
-			assert(priv->flush > 0);
-			if (--priv->flush == 0) {
-				list_del(&priv->list);
-				sna_accel_watch_flush(sna, -1);
-				priv->pinned = private->pixmap == sna->front;
-			}
+			assert(priv->flush);
+			list_del(&priv->list);
+			sna_accel_watch_flush(sna, -1);
+			priv->pinned = private->pixmap == sna->front;
+			priv->flush = false;
 
 			screen->DestroyPixmap(private->pixmap);
 		}
@@ -649,13 +649,13 @@ sna_dri_copy_region(DrawablePtr draw,
 		     struct kgem_bo *, struct kgem_bo *, bool) = sna_dri_copy;
 
 	if (dst_buffer->attachment == DRI2BufferFrontLeft) {
-		dst = sna_pixmap_set_dri(sna, pixmap);
+		dst = sna_pixmap_get_bo(pixmap);
 		copy = sna_dri_copy_to_front;
 	} else
 		dst = get_private(dst_buffer)->bo;
 
 	if (src_buffer->attachment == DRI2BufferFrontLeft) {
-		src = sna_pixmap_set_dri(sna, pixmap);
+		src = sna_pixmap_get_bo(pixmap);
 		assert(copy == sna_dri_copy);
 		copy = sna_dri_copy_from_front;
 	} else
commit d47e98dd64c0b9fe2979db42622c5ee8168e8b35
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu May 3 14:54:17 2012 +0100

    sna: Minor glyph fallback fixes
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_glyphs.c b/src/sna/sna_glyphs.c
index f4f115e..07b2a94 100644
--- a/src/sna/sna_glyphs.c
+++ b/src/sna/sna_glyphs.c
@@ -1136,6 +1136,8 @@ glyphs_fallback(CARD8 op,
 		mask_image = dst_image;
 		src_x -= x;
 		src_y -= y;
+		x += dst->pDrawable->x;
+		y += dst->pDrawable->y;
 	}
 
 	do {
@@ -1152,7 +1154,7 @@ glyphs_fallback(CARD8 op,
 			glyph_image = sna_glyph(g)->image;
 			if (glyph_image == NULL) {
 				PicturePtr picture;
-				int dx, dy;
+				int gx, gy;
 
 				picture = GlyphPicture(g)[screen];
 				if (picture == NULL)
@@ -1160,11 +1162,11 @@ glyphs_fallback(CARD8 op,
 
 				glyph_image = image_from_pict(picture,
 							      FALSE,
-							      &dx, &dy);
+							      &gx, &gy);
 				if (!glyph_image)
 					goto next_glyph;
 
-				assert(dx == 0 && dy == 0);
+				assert(gx == 0 && gy == 0);
 				sna_glyph(g)->image = glyph_image;
 			}
 
@@ -1191,11 +1193,15 @@ glyphs_fallback(CARD8 op,
 				int xi = x - g->info.x;
 				int yi = y - g->info.y;
 
-				DBG(("%s: glyph+(%d, %d) to dst (%d, %d)x(%d, %d), src (%d, %d) [op=%d]\n",
+				DBG(("%s: glyph+(%d, %d) to dst (%d, %d)x(%d, %d)/[(%d, %d)x(%d, %d)], src (%d, %d) [op=%d]\n",
 				     __FUNCTION__,
 				     dx, dy,
 				     xi, yi,
 				     g->info.width, g->info.height,
+				     dst->pDrawable->x,
+				     dst->pDrawable->y,
+				     dst->pDrawable->width,
+				     dst->pDrawable->height,
 				     src_x + xi,
 				     src_y + yi,
 				     op));
@@ -1261,7 +1267,7 @@ sna_glyphs(CARD8 op,
 	if (REGION_NUM_RECTS(dst->pCompositeClip) == 0)
 		return;
 
-	if (FALLBACK || DEBUG_NO_RENDER)
+	if (FALLBACK || !sna->have_render)
 		goto fallback;
 
 	if (wedged(sna)) {
commit a1f08b8850616952fb0babe2275eb36b13a380ec
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu May 3 14:53:29 2012 +0100

    sna: Don't discard GPU buffer if we only want to read back for the operation
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index c84b23e..3072345 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1315,9 +1315,11 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 			      pixmap->drawable.width,
 			      pixmap->drawable.height)) {
 		sna_damage_destroy(&priv->gpu_damage);
-		sna_pixmap_free_gpu(sna, priv);
 		priv->undamaged = false;
 
+		if (flags & MOVE_WRITE)
+			sna_pixmap_free_gpu(sna, priv);
+
 		if (pixmap->devPrivate.ptr == NULL &&
 		    !sna_pixmap_alloc_cpu(sna, pixmap, priv, false))
 			return false;
@@ -1458,8 +1460,10 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 	    !sna_pixmap_alloc_cpu(sna, pixmap, priv, priv->gpu_damage != NULL))
 		return false;
 
-	if (priv->gpu_bo == NULL)
+	if (priv->gpu_bo == NULL) {
+		assert(priv->gpu_damage == NULL);
 		goto done;
+	}
 
 	assert(priv->gpu_bo->proxy == NULL);
 	if (priv->clear) {
commit fcccc5528b8696fb4f9b3f9f528673b95d98a907
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu May 3 11:27:44 2012 +0100

    sna: Improve handling of inplace IO for large transfers
    
    If the transfer is large enough to obliterate the caches, then it is
    preferrable to do it inplace rather than upload a proxy texture and
    queue a blit. This helps prevent an inconsistency where one layer
    believes the operation should be done inplace only for the IO layer to
    perform an indirect upload.
    
    Testing show no significant impact upon the cairo-traces, but it does
    prevent x11perf -shmput from exploding.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 51025ea..9eac68e 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -462,7 +462,7 @@ static inline bool kgem_bo_map_will_stall(struct kgem *kgem, struct kgem_bo *bo)
 	     __FUNCTION__, bo->handle,
 	     bo->domain, bo->presumed_offset, bo->size));
 
-	if (!kgem_bo_is_mappable(kgem, bo))
+	if (!kgem_bo_is_mappable(kgem, bo) && kgem_bo_is_busy(bo))
 		return true;
 
 	if (kgem->wedged)
diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index 2539518..7cec3ff 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -516,17 +516,16 @@ static bool upload_inplace(struct kgem *kgem,
 	 * able to almagamate a series of small writes into a single
 	 * operation.
 	 */
-	if (!bo->map) {
+	if (!bo->map || kgem_bo_map_will_stall(kgem, bo)) {
 		unsigned int bytes = 0;
 		while (n--) {
 			bytes += (box->x2 - box->x1) * (box->y2 - box->y1);
 			box++;
 		}
-		if (bytes * bpp >> 12 < kgem->half_cpu_cache_pages)
-			return false;
+		return bytes * bpp >> 12 >= kgem->half_cpu_cache_pages;
 	}
 
-	return !kgem_bo_map_will_stall(kgem, bo);
+	return true;
 }
 
 bool sna_write_boxes(struct sna *sna, PixmapPtr dst,
@@ -570,7 +569,9 @@ fallback:
 	}
 
 	/* Try to avoid switching rings... */
-	if (!can_blt || kgem->ring == KGEM_RENDER ||
+	if (!can_blt ||
+	    kgem->ring == KGEM_RENDER ||
+	    (kgem->has_semaphores && kgem->mode == KGEM_NONE) ||
 	    upload_too_large(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) {
 		PixmapRec tmp;
 
commit 53568e8e49559094ce5b24b8709669f1f76fe2bf
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Apr 30 09:24:06 2012 +0100

    sna/gen7: Fix debug printing of primitives
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 327714f..6c9a833 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -898,7 +898,7 @@ gen7_emit_vertex_elements(struct sna *sna,
 
 	if (op->is_affine) {
 		src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT;
-		w_component = GEN7_VFCOMPONENT_STORE_1_FLT;
+		w_component = GEN7_VFCOMPONENT_STORE_0;
 	} else {
 		src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT;
 		w_component = GEN7_VFCOMPONENT_STORE_SRC;
@@ -930,7 +930,7 @@ gen7_emit_vertex_elements(struct sna *sna,
 		  0 << GEN7_VE0_OFFSET_SHIFT); /* offsets vb in bytes */
 	OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT |
 		  GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT |
-		  GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_2_SHIFT |
+		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
 		  GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT);
 
 	/* u0, v0, w0 */
diff --git a/src/sna/kgem_debug_gen7.c b/src/sna/kgem_debug_gen7.c
index a7dbbf2..78eae01 100644
--- a/src/sna/kgem_debug_gen7.c
+++ b/src/sna/kgem_debug_gen7.c
@@ -287,8 +287,8 @@ static void primitive_out(struct kgem *kgem, uint32_t *data)
 
 	assert((data[0] & (1<<15)) == 0); /* XXX index buffers */
 
-	for (n = 0; n < data[1]; n++) {
-		int v = data[2] + n;
+	for (n = 0; n < data[2]; n++) {
+		int v = data[3] + n;
 		ErrorF("	[%d:%d] = ", n, v);
 		indirect_vertex_out(kgem, v);
 		ErrorF("\n");
@@ -358,7 +358,7 @@ get_965_depthformat(unsigned int depthformat)
 }
 
 static const char *
-get_965_element_component(uint32_t data, int component)
+get_element_component(uint32_t data, int component)
 {
 	uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7;
 
@@ -387,9 +387,9 @@ get_965_element_component(uint32_t data, int component)
 }
 
 static const char *
-get_965_prim_type(uint32_t data)
+get_prim_type(uint32_t data)
 {
-	uint32_t primtype = (data >> 10) & 0x1f;
+	uint32_t primtype = data & 0x1f;
 
 	switch (primtype) {
 	case 0x01: return "point list";
@@ -656,10 +656,10 @@ int kgem_gen7_decode_3d(struct kgem *kgem, uint32_t offset)
 			i++;
 			kgem_debug_print(data, offset, i, "(%s, %s, %s, %s), "
 				  "dst offset 0x%02x bytes\n",
-				  get_965_element_component(data[i], 0),
-				  get_965_element_component(data[i], 1),
-				  get_965_element_component(data[i], 2),
-				  get_965_element_component(data[i], 3),
+				  get_element_component(data[i], 0),
+				  get_element_component(data[i], 1),
+				  get_element_component(data[i], 2),
+				  get_element_component(data[i], 3),
 				  (data[i] & 0xff) * 4);
 			i++;
 		}
@@ -673,16 +673,16 @@ int kgem_gen7_decode_3d(struct kgem *kgem, uint32_t offset)
 		return len;
 
 	case 0x7b00:
-		assert(len == 6);
-		kgem_debug_print(data, offset, 0,
-			  "3DPRIMITIVE: %s %s\n",
-			  get_965_prim_type(data[0]),
-			  (data[0] & (1 << 15)) ? "random" : "sequential");
-		kgem_debug_print(data, offset, 1, "vertex count\n");
-		kgem_debug_print(data, offset, 2, "start vertex\n");
-		kgem_debug_print(data, offset, 3, "instance count\n");
-		kgem_debug_print(data, offset, 4, "start instance\n");
-		kgem_debug_print(data, offset, 5, "index bias\n");
+		assert(len == 7);
+		kgem_debug_print(data, offset, 0, "3DPRIMITIVE\n");
+		kgem_debug_print(data, offset, 1, "type %s, %s\n",
+			  get_prim_type(data[1]),
+			  (data[1] & (1 << 15)) ? "random" : "sequential");
+		kgem_debug_print(data, offset, 2, "vertex count\n");
+		kgem_debug_print(data, offset, 3, "start vertex\n");
+		kgem_debug_print(data, offset, 4, "instance count\n");
+		kgem_debug_print(data, offset, 5, "start instance\n");
+		kgem_debug_print(data, offset, 6, "index bias\n");
 		primitive_out(kgem, data);
 		return len;
 	}
commit 01c26a44fdce761781908be11102e7a6a3db523c
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed May 2 21:09:27 2012 +0100

    sna: Avoid reducing damage for synchronisation
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 8b52a40..c84b23e 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1523,9 +1523,10 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 		priv->undamaged = true;
 	}
 
-	if (sna_damage_contains_box(priv->gpu_damage,
-				    &region->extents) != PIXMAN_REGION_OUT) {
-		DBG(("%s: region (%dx%d) intersects gpu damage\n",
+	if (priv->gpu_damage &&
+	    (DAMAGE_IS_ALL(priv->gpu_damage) ||
+	     sna_damage_overlaps_box(priv->gpu_damage, &region->extents))) {
+		DBG(("%s: region (%dx%d) overlaps gpu damage\n",
 		     __FUNCTION__,
 		     region->extents.x2 - region->extents.x1,
 		     region->extents.y2 - region->extents.y1));
@@ -1538,9 +1539,18 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 				       priv->gpu_bo, 0, 0,
 				       pixmap, 0, 0,
 				       &region->extents, 1);
-		} else {
+			goto done;
+		}
+
+		if (sna_damage_contains_box(priv->gpu_damage,
+					    &region->extents) != PIXMAN_REGION_OUT) {
 			RegionRec want, *r = region;
 
+			DBG(("%s: region (%dx%d) intersects gpu damage\n",
+			     __FUNCTION__,
+			     region->extents.x2 - region->extents.x1,
+			     region->extents.y2 - region->extents.y1));
+
 			/* Expand the region to move 32x32 pixel blocks at a
 			 * time, as we assume that we will continue writing
 			 * afterwards and so aim to coallesce subsequent
diff --git a/src/sna/sna_damage.c b/src/sna/sna_damage.c
index f2d682d..b97edbe 100644
--- a/src/sna/sna_damage.c
+++ b/src/sna/sna_damage.c
@@ -984,20 +984,6 @@ static bool box_contains(const BoxRec *a, const BoxRec *b)
 	return true;
 }
 
-static inline Bool sna_damage_maybe_contains_box(const struct sna_damage *damage,
-						 const BoxRec *box)
-{
-	if (box->x2 <= damage->extents.x1 ||
-	    box->x1 >= damage->extents.x2)
-		return FALSE;
-
-	if (box->y2 <= damage->extents.y1 ||
-	    box->y1 >= damage->extents.y2)
-		return FALSE;
-
-	return TRUE;
-}
-
 static struct sna_damage *__sna_damage_subtract(struct sna_damage *damage,
 						RegionPtr region)
 {
@@ -1011,7 +997,7 @@ static struct sna_damage *__sna_damage_subtract(struct sna_damage *damage,
 
 	assert(RegionNotEmpty(region));
 
-	if (!sna_damage_maybe_contains_box(damage, &region->extents))
+	if (!sna_damage_overlaps_box(damage, &region->extents))
 		return damage;
 
 
@@ -1096,7 +1082,7 @@ inline static struct sna_damage *__sna_damage_subtract_box(struct sna_damage *da
 		return NULL;
 	}
 
-	if (!sna_damage_maybe_contains_box(damage, box))
+	if (!sna_damage_overlaps_box(damage, box))
 		return damage;
 
 	if (box_contains(box, &damage->extents)) {
@@ -1189,7 +1175,7 @@ static struct sna_damage *__sna_damage_subtract_boxes(struct sna_damage *damage,
 	extents.y1 += dy;
 	extents.y2 += dy;
 
-	if (!sna_damage_maybe_contains_box(damage, &extents))
+	if (!sna_damage_overlaps_box(damage, &extents))
 		return damage;
 
 	if (n == 1)
@@ -1248,7 +1234,7 @@ static int __sna_damage_contains_box(struct sna_damage *damage,
 	if (damage->mode == DAMAGE_ALL)
 		return PIXMAN_REGION_IN;
 
-	if (!sna_damage_maybe_contains_box(damage, box))
+	if (!sna_damage_overlaps_box(damage, box))
 		return PIXMAN_REGION_OUT;
 
 	ret = pixman_region_contains_rectangle(&damage->region, (BoxPtr)box);
@@ -1297,7 +1283,7 @@ bool _sna_damage_contains_box__no_reduce(const struct sna_damage *damage,
 	if (damage->mode == DAMAGE_SUBTRACT)
 		return false;
 
-	if (!sna_damage_maybe_contains_box(damage, box))
+	if (!sna_damage_overlaps_box(damage, box))
 		return false;
 
 	return pixman_region_contains_rectangle((RegionPtr)&damage->region,
diff --git a/src/sna/sna_damage.h b/src/sna/sna_damage.h
index 422a124..fb661b2 100644
--- a/src/sna/sna_damage.h
+++ b/src/sna/sna_damage.h
@@ -190,6 +190,21 @@ static inline Bool sna_damage_intersect(struct sna_damage *damage,
 	return _sna_damage_intersect(damage, region, result);
 }
 
+static inline bool
+sna_damage_overlaps_box(const struct sna_damage *damage,
+			const BoxRec *box)
+{
+	if (box->x2 <= damage->extents.x1 ||
+	    box->x1 >= damage->extents.x2)
+		return FALSE;
+
+	if (box->y2 <= damage->extents.y1 ||
+	    box->y1 >= damage->extents.y2)
+		return FALSE;
+
+	return TRUE;
+}
+
 int _sna_damage_contains_box(struct sna_damage *damage,
 			     const BoxRec *box);
 static inline int sna_damage_contains_box(struct sna_damage *damage,
commit f4c34e9ab32f31669896b8f626195827a85af337
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed May 2 19:09:10 2012 +0100

    sna: Bring back the minimum alignment for G33
    
    The underlying cause is still not fixed. It should be possible to use
    the much laxer alignment for single-stream linear. Still no idea how I
    fail to convince the GPU to drop the depth buffer.
    
    Reported-by: Matti Hamalainen <ccr at tnsp.org>
    References: https://bugs.freedesktop.org/show_bug.cgi?id=49391
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index d519ed6..49fa173 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -600,6 +600,10 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
 		/* 865g cannot handle a batch spanning multiple pages */
 		kgem->max_batch_size = PAGE_SIZE / sizeof(uint32_t);
 
+	kgem->min_alignment = 4;
+	if (gen < 40)
+		kgem->min_alignment = 64;
+
 	kgem->half_cpu_cache_pages = cpu_cache_size() >> 13;
 
 	list_init(&kgem->active_partials);
@@ -770,8 +774,8 @@ static uint32_t kgem_untiled_pitch(struct kgem *kgem,
 				   uint32_t width, uint32_t bpp,
 				   bool scanout)
 {
-	width = ALIGN(width, 4) * bpp >> 3;
-	return ALIGN(width, scanout ? 64 : 4);
+	width = ALIGN(width, 2) * bpp >> 3;
+	return ALIGN(width, scanout ? 64 : kgem->min_alignment);
 }
 
 void kgem_get_tile_size(struct kgem *kgem, int tiling,
@@ -832,13 +836,17 @@ static uint32_t kgem_surface_size(struct kgem *kgem,
 			tile_width = 512;
 			tile_height = kgem->gen < 30 ? 16 : 8;
 		} else {
-			tile_width = scanout ? 64 : 4 * bpp >> 3;
-			tile_height = 4;
+			tile_width = 2 * bpp >> 3;
+			tile_width = ALIGN(tile_width,
+					   scanout ? 64 : kgem->min_alignment);
+			tile_height = 2;
 		}
 	} else switch (tiling) {
 	default:
 	case I915_TILING_NONE:
-		tile_width = scanout ? 64 : 4 * bpp >> 3;
+		tile_width = 2 * bpp >> 3;
+		tile_width = ALIGN(tile_width,
+				   scanout ? 64 : kgem->min_alignment);
 		tile_height = 2;
 		break;
 	case I915_TILING_X:
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index ad2fe84..51025ea 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -143,6 +143,7 @@ struct kgem {
 	uint16_t nfence;
 	uint16_t wait;
 	uint16_t max_batch_size;
+	uint16_t min_alignment;
 
 	uint32_t flush:1;
 	uint32_t need_expire:1;
commit 10b4a9bb5f46ab9d9c8b165084ce4174b54a8d39
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed May 2 12:45:09 2012 +0100

    sna: Always try to operate inplace if we an LLC gpu bo

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index db2442a..8b52a40 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -904,6 +904,9 @@ static inline bool pixmap_inplace(struct sna *sna,
 	if (priv->mapped)
 		return true;
 
+	if (sna->kgem.has_llc && pixmap != sna->front)
+		return !priv->cpu_bo;
+
 	return (pixmap->devKind * pixmap->drawable.height >> 12) >
 		sna->kgem.half_cpu_cache_pages;
 }
@@ -1264,6 +1267,9 @@ static inline bool region_inplace(struct sna *sna,
 		return false;
 	}
 
+	if (sna->kgem.has_llc && pixmap != sna->front)
+		return !priv->cpu_bo;
+
 	DBG(("%s: (%dx%d), inplace? %d\n",
 	     __FUNCTION__,
 	     region->extents.x2 - region->extents.x1,
commit dd80fb00bf7acf37dc3b9125431a12b67d7e92d3
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed May 2 11:31:42 2012 +0100

    sna: Fallback for glyphs too large for XY_TEXT_IMMEDIATE
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 12017bd..db2442a 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -6746,7 +6746,6 @@ spans_fallback:
 		}
 
 		gc->ops = (GCOps *)&sna_gc_ops;
-		assert_pixmap_contains_box(data.pixmap, &data.region.extents);
 		if (data.damage) {
 			if (data.dx | data.dy)
 				pixman_region_translate(&data.region, data.dx, data.dy);
@@ -10622,8 +10621,6 @@ sna_glyph_extents(FontPtr font,
 
 		extents->overallWidth += p->metrics.characterWidth;
 	}
-
-	assert(extents->overallWidth > 0);
 }
 
 static bool sna_set_glyph(CharInfoPtr in, CharInfoPtr out)
@@ -10716,6 +10713,16 @@ inline static bool sna_get_glyph16(FontPtr font, struct sna_font *priv,
 	return sna_set_glyph(ret, *out = p);
 }
 
+static inline bool sna_font_too_large(FontPtr font)
+{
+	int top = max(FONTMAXBOUNDS(font, ascent), FONTASCENT(font));
+	int bot = max(FONTMAXBOUNDS(font, descent), FONTDESCENT(font));
+	int width = max(FONTMAXBOUNDS(font, characterWidth), -FONTMINBOUNDS(font, characterWidth));
+	DBG(("%s: (%d + %d) x %d: %d\n", __FUNCTION__,
+	     top, bot, width, (top + bot) * (width + 7)/8));
+	return (top + bot) * (width + 7)/8 > 124;
+}
+
 static int
 sna_poly_text8(DrawablePtr drawable, GCPtr gc,
 	       int x, int y,
@@ -10731,6 +10738,9 @@ sna_poly_text8(DrawablePtr drawable, GCPtr gc,
 	if (drawable->depth < 8)
 		goto fallback;
 
+	if (sna_font_too_large(gc->font))
+		goto fallback;
+
 	for (i = n = 0; i < count; i++) {
 		if (sna_get_glyph8(gc->font, priv, chars[i], &info[n]))
 			n++;
@@ -10820,6 +10830,9 @@ sna_poly_text16(DrawablePtr drawable, GCPtr gc,
 	if (drawable->depth < 8)
 		goto fallback;
 
+	if (sna_font_too_large(gc->font))
+		goto fallback;
+
 	for (i = n = 0; i < count; i++) {
 		if (sna_get_glyph16(gc->font, priv, chars[i], &info[n]))
 			n++;
@@ -10909,6 +10922,9 @@ sna_image_text8(DrawablePtr drawable, GCPtr gc,
 	if (drawable->depth < 8)
 		goto fallback;
 
+	if (sna_font_too_large(gc->font))
+		goto fallback;
+
 	for (i = n = 0; i < count; i++) {
 		if (sna_get_glyph8(gc->font, priv, chars[i], &info[n]))
 			n++;
@@ -11000,6 +11016,9 @@ sna_image_text16(DrawablePtr drawable, GCPtr gc,
 	if (drawable->depth < 8)
 		goto fallback;
 
+	if (sna_font_too_large(gc->font))
+		goto fallback;
+
 	for (i = n = 0; i < count; i++) {
 		if (sna_get_glyph16(gc->font, priv, chars[i], &info[n]))
 			n++;
@@ -11335,6 +11354,9 @@ sna_image_glyph(DrawablePtr drawable, GCPtr gc,
 	if (!PM_IS_SOLID(drawable, gc->planemask))
 		goto fallback;
 
+	if (sna_font_too_large(gc->font))
+		goto fallback;
+
 	if ((bo = sna_drawable_use_bo(drawable, true,
 				      &region.extents, &damage)) &&
 	    sna_reversed_glyph_blt(drawable, gc, x, y, n, info, base,
@@ -11412,6 +11434,9 @@ sna_poly_glyph(DrawablePtr drawable, GCPtr gc,
 	if (!gc_is_solid(gc, &fg))
 		goto fallback;
 
+	if (sna_font_too_large(gc->font))
+		goto fallback;
+
 	if ((bo = sna_drawable_use_bo(drawable, true,
 				      &region.extents, &damage)) &&
 	    sna_reversed_glyph_blt(drawable, gc, x, y, n, info, base,
commit 7e09babb3e9e23882db30ee7d0c22c503962faa9
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue May 1 12:37:45 2012 +0100

    sna: Only attempt to reuse exported scanout buffers
    
    Yet more mesa w/a.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 4302952..d519ed6 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -4148,13 +4148,13 @@ void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset)
 void kgem_bo_clear_scanout(struct kgem *kgem, struct kgem_bo *bo)
 {
 	bo->needs_flush = true;
-	bo->reusable = true;
 	bo->flush = false;
 
 	if (!bo->scanout)
 		return;
 
 	bo->scanout = false;
+	bo->reusable = true;
 }
 
 struct kgem_bo *
commit b4b32e7a0172a74372f800e9c74d639a23c5ff34
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue May 1 11:15:00 2012 +0100

    sna: Fast-path unclipped glyphs
    
    Avoid the redundant computation of the glyph intersection with the
    drawable bounding box.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_glyphs.c b/src/sna/sna_glyphs.c
index 87371aa..f4f115e 100644
--- a/src/sna/sna_glyphs.c
+++ b/src/sna/sna_glyphs.c
@@ -427,8 +427,12 @@ glyphs_to_dst(struct sna *sna,
 	     __FUNCTION__, op, src_x, src_y, nlist,
 	     list->xOff, list->yOff, dst->pDrawable->x, dst->pDrawable->y));
 
-	rects = REGION_RECTS(dst->pCompositeClip);
-	nrect = REGION_NUM_RECTS(dst->pCompositeClip);
+	if (dst->pCompositeClip->extents.x2 - dst->pCompositeClip->extents.x1 < dst->pDrawable->width ||
+	    dst->pCompositeClip->extents.y2 - dst->pCompositeClip->extents.y1 < dst->pDrawable->height) {
+		rects = REGION_RECTS(dst->pCompositeClip);
+		nrect = REGION_NUM_RECTS(dst->pCompositeClip);
+	} else
+		nrect = 0;
 
 	x = dst->pDrawable->x;
 	y = dst->pDrawable->y;
@@ -476,47 +480,68 @@ glyphs_to_dst(struct sna *sna,
 				glyph_atlas = priv.atlas;
 			}
 
-			for (i = 0; i < nrect; i++) {
+			if (nrect) {
+				for (i = 0; i < nrect; i++) {
+					struct sna_composite_rectangles r;
+					int16_t dx, dy;
+					int16_t x2, y2;
+
+					r.dst.x = x - glyph->info.x;
+					r.dst.y = y - glyph->info.y;
+					x2 = r.dst.x + glyph->info.width;
+					y2 = r.dst.y + glyph->info.height;
+					dx = dy = 0;
+
+					DBG(("%s: glyph=(%d, %d), (%d, %d), clip=(%d, %d), (%d, %d)\n",
+					     __FUNCTION__,
+					     r.dst.x, r.dst.y, x2, y2,
+					     rects[i].x1, rects[i].y1,
+					     rects[i].x2, rects[i].y2));
+					if (rects[i].y1 >= y2)
+						break;
+
+					if (r.dst.x < rects[i].x1)
+						dx = rects[i].x1 - r.dst.x, r.dst.x = rects[i].x1;
+					if (x2 > rects[i].x2)
+						x2 = rects[i].x2;
+					if (r.dst.y < rects[i].y1)
+						dy = rects[i].y1 - r.dst.y, r.dst.y = rects[i].y1;
+					if (y2 > rects[i].y2)
+						y2 = rects[i].y2;
+
+					if (r.dst.x < x2 && r.dst.y < y2) {
+						DBG(("%s: blt=(%d, %d), (%d, %d)\n",
+						     __FUNCTION__, r.dst.x, r.dst.y, x2, y2));
+
+						r.src.x = r.dst.x + src_x;
+						r.src.y = r.dst.y + src_y;
+						r.mask.x = dx + priv.coordinate.x;
+						r.mask.y = dy + priv.coordinate.y;
+						r.width  = x2 - r.dst.x;
+						r.height = y2 - r.dst.y;
+						tmp.blt(sna, &tmp, &r);
+						apply_damage(&tmp, &r);
+					}
+				}
+			} else {
 				struct sna_composite_rectangles r;
-				int16_t dx, dy;
-				int16_t x2, y2;
 
 				r.dst.x = x - glyph->info.x;
 				r.dst.y = y - glyph->info.y;
-				x2 = r.dst.x + glyph->info.width;
-				y2 = r.dst.y + glyph->info.height;
-				dx = dy = 0;
+				r.src.x = r.dst.x + src_x;
+				r.src.y = r.dst.y + src_y;
+				r.mask.x = priv.coordinate.x;
+				r.mask.y = priv.coordinate.y;
+				r.width  = glyph->info.width;
+				r.height = glyph->info.height;
 
-				DBG(("%s: glyph=(%d, %d), (%d, %d), clip=(%d, %d), (%d, %d)\n",
+				DBG(("%s: glyph=(%d, %d)x(%d, %d), unclipped\n",
 				     __FUNCTION__,
-				     r.dst.x, r.dst.y, x2, y2,
-				     rects[i].x1, rects[i].y1,
-				     rects[i].x2, rects[i].y2));
-				if (rects[i].y1 >= y2)
-					break;
+				     r.dst.x, r.dst.y,
+				     r.width, r.height));
 
-				if (r.dst.x < rects[i].x1)
-					dx = rects[i].x1 - r.dst.x, r.dst.x = rects[i].x1;
-				if (x2 > rects[i].x2)
-					x2 = rects[i].x2;
-				if (r.dst.y < rects[i].y1)
-					dy = rects[i].y1 - r.dst.y, r.dst.y = rects[i].y1;
-				if (y2 > rects[i].y2)
-					y2 = rects[i].y2;
-
-				if (r.dst.x < x2 && r.dst.y < y2) {
-					DBG(("%s: blt=(%d, %d), (%d, %d)\n",
-					     __FUNCTION__, r.dst.x, r.dst.y, x2, y2));
-
-					r.src.x = r.dst.x + src_x;
-					r.src.y = r.dst.y + src_y;
-					r.mask.x = dx + priv.coordinate.x;
-					r.mask.y = dy + priv.coordinate.y;
-					r.width  = x2 - r.dst.x;
-					r.height = y2 - r.dst.y;
-					tmp.blt(sna, &tmp, &r);
-					apply_damage(&tmp, &r);
-				}
+				tmp.blt(sna, &tmp, &r);
+				apply_damage(&tmp, &r);
 			}
 
 next_glyph:
commit ffdf9aca12adcfa1ec7ab7a1706873105a5f0d4a
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Apr 29 21:59:52 2012 +0100

    legacy/i810: hwmc additionally depends upon building DRI
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/legacy/i810/Makefile.am b/src/legacy/i810/Makefile.am
index a1bdd85..e7fa04f 100644
--- a/src/legacy/i810/Makefile.am
+++ b/src/legacy/i810/Makefile.am
@@ -25,10 +25,10 @@ liblegacy_i810_la_SOURCES +=\
          i810_dri.c \
          i810_dri.h \
 	 $(NULL)
-endif
 
 if XVMC
 liblegacy_i810_la_SOURCES += \
 	i810_hwmc.c \
 	$(NULL)
 endif
+endif
commit 444da84c47266bcbbdf5121507901de8eb36f11b
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Apr 29 21:49:04 2012 +0100

    configure: Version bump for 2.19.0 release

diff --git a/NEWS b/NEWS
index 9d2b15e..cc74879 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,29 @@
+Release 2.19.0 (2012-04-29)
+===========================
+More stability fixes for UXA and support for another variant of IvyBridge.
+Given the severity of the stability fixes, I strongly recommend everybody
+to upgrade to 2.19.0.
+
+* Prevent waiting on scanlines whilst not in control of the VT and therefore
+  whilst referencing foreign CRTC configurations.
+
+* Pixmap (and bo leak) during fallback glyph composition
+
+* Remove broken acceleration for rendering glyphs directly upon the
+  destination pixmap, exposed by cairo-1.12.0 (and coincidentally fix
+  another Pixmap leak upon fallback handling).
+
+* Add support for Ivy Bridge GT2 Server chipset [PCI id 0x016a]
+
+* Remove broken damage flushing with CompositeRectangles
+  https://bugs.freedesktop.org/show_bug.cgi?id=32547
+
+* Fix crash upon server start with multiple monitors
+  https://bugs.freedesktop.org/show_bug.cgi?id=47395
+
+* Fix composition issues resulting from overly aggressive Pixmap reuse
+  https://bugs.freedesktop.org/show_bug.cgi?id=47345
+
 Release 2.18.0 (2012-02-24)
 ===========================
 Time passes, a few more bugs have crept out of the woodwork that are a
diff --git a/configure.ac b/configure.ac
index 5124100..3770983 100644
--- a/configure.ac
+++ b/configure.ac
@@ -23,7 +23,7 @@
 # Initialize Autoconf
 AC_PREREQ([2.60])
 AC_INIT([xf86-video-intel],
-        [2.18.0],
+        [2.19.0],
         [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg],
         [xf86-video-intel])
 AC_CONFIG_SRCDIR([Makefile.am])
commit a206a1eee1fb799d567d68db564d663cfc66f6cc
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Apr 28 01:54:43 2012 +0100

    sna: Tune relocation array size
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 4def6b1..ad2fe84 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -171,7 +171,7 @@ struct kgem {
 
 	uint32_t batch[4*1024];
 	struct drm_i915_gem_exec_object2 exec[256];
-	struct drm_i915_gem_relocation_entry reloc[384];
+	struct drm_i915_gem_relocation_entry reloc[612];
 };
 
 #define KGEM_BATCH_RESERVED 1
commit 4f1908c651ef9e2af33d8831466a605234978c46
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Apr 27 23:19:56 2012 +0100

    sna: PolyPoint only uses the gc->fgPixel
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index f4d3de4..12017bd 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -5560,8 +5560,7 @@ static Bool
 sna_poly_point_blt(DrawablePtr drawable,
 		   struct kgem_bo *bo,
 		   struct sna_damage **damage,
-		   GCPtr gc, uint32_t pixel,
-		   int mode, int n, DDXPointPtr pt,
+		   GCPtr gc, int mode, int n, DDXPointPtr pt,
 		   bool clipped)
 {
 	PixmapPtr pixmap = get_drawable_pixmap(drawable);
@@ -5574,7 +5573,7 @@ sna_poly_point_blt(DrawablePtr drawable,
 	DBG(("%s: alu=%d, pixel=%08lx, clipped?=%d\n",
 	     __FUNCTION__, gc->alu, gc->fgPixel, clipped));
 
-	if (!sna_fill_init_blt(&fill, sna, pixmap, bo, gc->alu, pixel))
+	if (!sna_fill_init_blt(&fill, sna, pixmap, bo, gc->alu, gc->fgPixel))
 		return FALSE;
 
 	get_drawable_deltas(drawable, pixmap, &dx, &dy);
@@ -5704,7 +5703,6 @@ sna_poly_point(DrawablePtr drawable, GCPtr gc,
 	struct sna *sna = to_sna_from_pixmap(pixmap);
 	RegionRec region;
 	unsigned flags;
-	uint32_t color;
 
 	DBG(("%s(mode=%d, n=%d, pt[0]=(%d, %d)\n",
 	     __FUNCTION__, mode, n, pt[0].x, pt[0].y));
@@ -5729,7 +5727,7 @@ sna_poly_point(DrawablePtr drawable, GCPtr gc,
 		goto fallback;
 	}
 
-	if (PM_IS_SOLID(drawable, gc->planemask) && gc_is_solid(gc, &color)) {
+	if (PM_IS_SOLID(drawable, gc->planemask)) {
 		struct sna_damage **damage;
 		struct kgem_bo *bo;
 
@@ -5738,7 +5736,7 @@ sna_poly_point(DrawablePtr drawable, GCPtr gc,
 
 		if ((bo = sna_drawable_use_bo(drawable, false, &region.extents, &damage)) &&
 		    sna_poly_point_blt(drawable, bo, damage,
-				       gc, color, mode, n, pt, flags & 2))
+				       gc, mode, n, pt, flags & 2))
 			return;
 	}
 
commit 8453034c7dd893f1d4c32ee87724f3a13137595d
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Apr 27 22:08:51 2012 +0100

    sna/gen6: Allow ring switching at the start of a batch
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 5bbe5e3..38fb024 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -57,7 +57,7 @@
 #define NO_FILL_BOXES 0
 #define NO_CLEAR 0
 
-#define NO_RING_SWITCH 1
+#define NO_RING_SWITCH 0
 
 #define GEN6_MAX_SIZE 8192
 
@@ -2332,7 +2332,7 @@ static bool prefer_blt_ring(struct sna *sna)
 
 static bool can_switch_rings(struct sna *sna)
 {
-	return sna->kgem.has_semaphores && !NO_RING_SWITCH;
+	return sna->kgem.mode == KGEM_NONE && sna->kgem.has_semaphores && !NO_RING_SWITCH;
 }
 
 static Bool
@@ -2369,6 +2369,8 @@ try_blt(struct sna *sna,
 	if (can_switch_rings(sna)) {
 		if (sna_picture_is_solid(src, NULL))
 			return TRUE;
+		if (src->pDrawable)
+			return TRUE;
 	}
 
 	return FALSE;
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 2228873..327714f 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -57,7 +57,7 @@
 #define NO_FILL_BOXES 0
 #define NO_CLEAR 0
 
-#define NO_RING_SWITCH 1
+#define NO_RING_SWITCH 0
 
 #define GEN7_MAX_SIZE 16384
 
@@ -2425,7 +2425,7 @@ static bool prefer_blt_ring(struct sna *sna)
 
 static bool can_switch_rings(struct sna *sna)
 {
-	return sna->kgem.has_semaphores && !NO_RING_SWITCH;
+	return sna->kgem.mode == KGEM_NONE && sna->kgem.has_semaphores && !NO_RING_SWITCH;
 }
 
 static Bool
@@ -2462,6 +2462,8 @@ try_blt(struct sna *sna,
 	if (can_switch_rings(sna)) {
 		if (sna_picture_is_solid(src, NULL))
 			return TRUE;
+		if (src->pDrawable)
+			return TRUE;
 	}
 
 	return FALSE;
commit 93ad7793894787600c5074917c753fa7c6816134
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Apr 27 22:03:55 2012 +0100

    sna: Tweak semaphores-enabled heuristic
    
    The kernel module now defaults to -1, confusing the test.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 29f0e29..4302952 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -574,7 +574,7 @@ static bool semaphores_enabled(void)
 	if (file) {
 		int value;
 		if (fscanf(file, "%d", &value) == 1)
-			detected = value > 0;
+			detected = value != 0;
 		fclose(file);
 	}
 
commit 986dbdda3bf8dcf208e55543d8a3393c4b53f10b
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Apr 27 19:21:44 2012 +0100

    sna: Tweak placement choice for high-overhead operations
    
    Some operations cost more to setup than to transfer data back and forth!
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index a5c1648..f4d3de4 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1923,6 +1923,7 @@ box_inplace(PixmapPtr pixmap, const BoxRec *box)
 
 static inline struct kgem_bo *
 sna_drawable_use_bo(DrawablePtr drawable,
+		    bool prefer_gpu,
 		    const BoxRec *box,
 		    struct sna_damage ***damage)
 {
@@ -1949,6 +1950,9 @@ sna_drawable_use_bo(DrawablePtr drawable,
 		goto use_cpu_bo;
 	}
 
+	if (!prefer_gpu && priv->gpu_bo && !kgem_bo_is_busy(priv->gpu_bo))
+		goto use_cpu_bo;
+
 	if (DAMAGE_IS_ALL(priv->gpu_damage))
 		goto use_gpu_bo;
 
@@ -1968,6 +1972,12 @@ sna_drawable_use_bo(DrawablePtr drawable,
 			goto use_cpu_bo;
 		}
 
+		if (priv->cpu_damage && !prefer_gpu) {
+			DBG(("%s: prefer cpu",
+			     __FUNCTION__));
+			goto use_cpu_bo;
+		}
+
 		if (!sna_pixmap_move_to_gpu(pixmap, MOVE_WRITE | MOVE_READ))
 			goto use_cpu_bo;
 
@@ -2932,7 +2942,8 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 	int n;
 	uint8_t rop = copy_ROP[gc->alu];
 
-	bo = sna_drawable_use_bo(&pixmap->drawable, &region->extents, &damage);
+	bo = sna_drawable_use_bo(&pixmap->drawable, true,
+				 &region->extents, &damage);
 	if (bo == NULL)
 		return false;
 
@@ -3054,7 +3065,8 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 	if (gc->alu != GXcopy)
 		return false;
 
-	bo = sna_drawable_use_bo(&pixmap->drawable, &region->extents, &damage);
+	bo = sna_drawable_use_bo(&pixmap->drawable, true,
+				 &region->extents, &damage);
 	if (bo == NULL)
 		return false;
 
@@ -4952,7 +4964,7 @@ sna_fill_spans(DrawablePtr drawable, GCPtr gc, int n,
 	if (!PM_IS_SOLID(drawable, gc->planemask))
 		goto fallback;
 
-	bo = sna_drawable_use_bo(drawable, &region.extents, &damage);
+	bo = sna_drawable_use_bo(drawable, true, &region.extents, &damage);
 	if (bo) {
 		if (gc_is_solid(gc, &color)) {
 			DBG(("%s: trying solid fill [alu=%d, pixel=%08lx] blt paths\n",
@@ -5502,7 +5514,7 @@ sna_copy_plane(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 	if (!PM_IS_SOLID(dst, gc->planemask))
 		goto fallback;
 
-	arg.bo = sna_drawable_use_bo(dst, &region.extents, &arg.damage);
+	arg.bo = sna_drawable_use_bo(dst, true, &region.extents, &arg.damage);
 	if (arg.bo) {
 		if (arg.bo->tiling == I915_TILING_Y) {
 			assert(arg.bo == sna_pixmap_get_bo(pixmap));
@@ -5724,7 +5736,7 @@ sna_poly_point(DrawablePtr drawable, GCPtr gc,
 		DBG(("%s: trying solid fill [%08lx] blt paths\n",
 		     __FUNCTION__, gc->fgPixel));
 
-		if ((bo = sna_drawable_use_bo(drawable, &region.extents, &damage)) &&
+		if ((bo = sna_drawable_use_bo(drawable, false, &region.extents, &damage)) &&
 		    sna_poly_point_blt(drawable, bo, damage,
 				       gc, color, mode, n, pt, flags & 2))
 			return;
@@ -6417,50 +6429,10 @@ sna_poly_line_extents(DrawablePtr drawable, GCPtr gc,
 inline static bool
 _use_zero_spans(DrawablePtr drawable, GCPtr gc, const BoxRec *extents)
 {
-	PixmapPtr pixmap;
-	struct sna_pixmap *priv;
-	BoxRec area;
-	int16_t dx, dy;
-
 	if (USE_ZERO_SPANS)
 		return USE_ZERO_SPANS > 0;
 
-	if (!drawable_gc_inplace_hint(drawable, gc))
-		return TRUE;
-
-	/* XXX check for GPU stalls on the gc (stipple, tile, etc) */
-
-	pixmap = get_drawable_pixmap(drawable);
-	priv = sna_pixmap(pixmap);
-	if (priv == NULL)
-		return FALSE;
-
-	if (DAMAGE_IS_ALL(priv->cpu_damage))
-		return FALSE;
-
-	if (priv->stride == 0 || priv->gpu_bo == NULL)
-		return FALSE;
-
-	if (!kgem_bo_is_busy(priv->gpu_bo))
-		return FALSE;
-
-	if (DAMAGE_IS_ALL(priv->gpu_damage))
-		return TRUE;
-
-	if (priv->gpu_damage == NULL)
-		return FALSE;
-
-	get_drawable_deltas(drawable, pixmap, &dx, &dy);
-	area = *extents;
-	area.x1 += dx;
-	area.x2 += dx;
-	area.y1 += dy;
-	area.y2 += dy;
-	DBG(("%s extents (%d, %d), (%d, %d)\n", __FUNCTION__,
-	     area.x1, area.y1, area.x2, area.y2));
-
-	return sna_damage_contains_box(priv->gpu_damage,
-				       &area) != PIXMAN_REGION_OUT;
+	return !drawable_gc_inplace_hint(drawable, gc);
 }
 
 static bool
@@ -6481,50 +6453,10 @@ use_zero_spans(DrawablePtr drawable, GCPtr gc, const BoxRec *extents)
 inline static bool
 _use_wide_spans(DrawablePtr drawable, GCPtr gc, const BoxRec *extents)
 {
-	PixmapPtr pixmap;
-	struct sna_pixmap *priv;
-	BoxRec area;
-	int16_t dx, dy;
-
 	if (USE_WIDE_SPANS)
 		return USE_WIDE_SPANS > 0;
 
-	if (!drawable_gc_inplace_hint(drawable, gc))
-		return TRUE;
-
-	/* XXX check for GPU stalls on the gc (stipple, tile, etc) */
-
-	pixmap = get_drawable_pixmap(drawable);
-	priv = sna_pixmap(pixmap);
-	if (priv == NULL)
-		return FALSE;
-
-	if (DAMAGE_IS_ALL(priv->cpu_damage))
-		return FALSE;
-
-	if (priv->stride == 0 || priv->gpu_bo == NULL)
-		return FALSE;
-
-	if (!kgem_bo_is_busy(priv->gpu_bo))
-		return FALSE;
-
-	if (DAMAGE_IS_ALL(priv->gpu_damage))
-		return TRUE;
-
-	if (priv->gpu_damage == NULL)
-		return FALSE;
-
-	get_drawable_deltas(drawable, pixmap, &dx, &dy);
-	area = *extents;
-	area.x1 += dx;
-	area.x2 += dx;
-	area.y1 += dy;
-	area.y2 += dy;
-	DBG(("%s extents (%d, %d), (%d, %d)\n", __FUNCTION__,
-	     area.x1, area.y1, area.x2, area.y2));
-
-	return sna_damage_contains_box(priv->gpu_damage,
-				       &area) != PIXMAN_REGION_OUT;
+	return !drawable_gc_inplace_hint(drawable, gc);
 }
 
 static bool
@@ -6605,7 +6537,7 @@ sna_poly_line(DrawablePtr drawable, GCPtr gc,
 		     __FUNCTION__, (unsigned)color));
 
 		if (data.flags & 4) {
-			data.bo = sna_drawable_use_bo(drawable,
+			data.bo = sna_drawable_use_bo(drawable, true,
 						      &data.region.extents,
 						      &data.damage);
 			if (data.bo &&
@@ -6616,8 +6548,8 @@ sna_poly_line(DrawablePtr drawable, GCPtr gc,
 					      data.flags & 2))
 				return;
 		} else { /* !rectilinear */
-			if (use_zero_spans(drawable, gc, &data.region.extents) &&
-			    (data.bo = sna_drawable_use_bo(drawable,
+			if ((data.bo = sna_drawable_use_bo(drawable,
+							   use_zero_spans(drawable, gc, &data.region.extents),
 							   &data.region.extents,
 							   &data.damage)) &&
 			    sna_poly_zero_line_blt(drawable,
@@ -6630,7 +6562,8 @@ sna_poly_line(DrawablePtr drawable, GCPtr gc,
 		}
 	} else if (data.flags & 4) {
 		/* Try converting these to a set of rectangles instead */
-		data.bo = sna_drawable_use_bo(drawable, &data.region.extents, &data.damage);
+		data.bo = sna_drawable_use_bo(drawable, true,
+					      &data.region.extents, &data.damage);
 		if (data.bo) {
 			DDXPointRec p1, p2;
 			xRectangle *rect;
@@ -6701,8 +6634,9 @@ sna_poly_line(DrawablePtr drawable, GCPtr gc,
 	}
 
 spans_fallback:
-	if (use_wide_spans(drawable, gc, &data.region.extents) &&
-	    (data.bo = sna_drawable_use_bo(drawable, &data.region.extents, &data.damage))) {
+	if ((data.bo = sna_drawable_use_bo(drawable,
+					   use_wide_spans(drawable, gc, &data.region.extents),
+					   &data.region.extents, &data.damage))) {
 		DBG(("%s: converting line into spans\n", __FUNCTION__));
 		get_drawable_deltas(drawable, data.pixmap, &data.dx, &data.dy);
 		sna_gc(gc)->priv = &data;
@@ -7570,12 +7504,6 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
 	if (!PM_IS_SOLID(drawable, gc->planemask))
 		goto fallback;
 
-	data.bo = sna_drawable_use_bo(drawable,
-				      &data.region.extents,
-				      &data.damage);
-	if (data.bo == NULL)
-		goto fallback;
-
 	if (gc->lineStyle != LineSolid || gc->lineWidth > 1)
 		goto spans_fallback;
 	if (gc_is_solid(gc, &color)) {
@@ -7583,14 +7511,20 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
 		     __FUNCTION__, (unsigned)color, data.flags));
 
 		if (data.flags & 4) {
-			if (sna_poly_segment_blt(drawable,
+			if ((data.bo = sna_drawable_use_bo(drawable, true,
+							   &data.region.extents,
+							   &data.damage)) &&
+			     sna_poly_segment_blt(drawable,
 						 data.bo, data.damage,
 						 gc, color, n, seg,
 						 &data.region.extents,
 						 data.flags & 2))
 				return;
 		} else {
-			if (use_zero_spans(drawable, gc, &data.region.extents) &&
+			if ((data.bo = sna_drawable_use_bo(drawable,
+							   use_zero_spans(drawable, gc, &data.region.extents),
+							   &data.region.extents,
+							   &data.damage)) &&
 			    sna_poly_zero_segment_blt(drawable,
 						      data.bo, data.damage,
 						      gc, n, seg,
@@ -7603,6 +7537,12 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
 		xRectangle *rect;
 		int i;
 
+		data.bo = sna_drawable_use_bo(drawable, true,
+					      &data.region.extents,
+					      &data.damage);
+		if (data.bo == NULL)
+			goto fallback;
+
 		DBG(("%s: converting to rectagnles\n", __FUNCTION__));
 
 		rect = malloc (n * sizeof (xRectangle));
@@ -7660,7 +7600,10 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
 	}
 
 spans_fallback:
-	if (use_wide_spans(drawable, gc, &data.region.extents)) {
+	if ((data.bo = sna_drawable_use_bo(drawable,
+					   use_wide_spans(drawable, gc, &data.region.extents),
+					   &data.region.extents,
+					   &data.damage))) {
 		void (*line)(DrawablePtr, GCPtr, int, int, DDXPointPtr);
 		int i;
 
@@ -8280,7 +8223,8 @@ sna_poly_rectangle(DrawablePtr drawable, GCPtr gc, int n, xRectangle *r)
 	    PM_IS_SOLID(drawable, gc->planemask)) {
 		DBG(("%s: trying blt solid fill [%08lx] paths\n",
 		     __FUNCTION__, gc->fgPixel));
-		if ((bo = sna_drawable_use_bo(drawable, &region.extents, &damage)) &&
+		if ((bo = sna_drawable_use_bo(drawable, true,
+					      &region.extents, &damage)) &&
 		    sna_poly_rectangle_blt(drawable, bo, damage,
 					   gc, n, r, &region.extents, flags&2))
 			return;
@@ -8288,7 +8232,8 @@ sna_poly_rectangle(DrawablePtr drawable, GCPtr gc, int n, xRectangle *r)
 		/* Not a trivial outline, but we still maybe able to break it
 		 * down into simpler operations that we can accelerate.
 		 */
-		if (sna_drawable_use_bo(drawable, &region.extents, &damage)) {
+		if (sna_drawable_use_bo(drawable, true,
+					&region.extents, &damage)) {
 			miPolyRectangle(drawable, gc, n, r);
 			return;
 		}
@@ -8408,8 +8353,8 @@ sna_poly_arc(DrawablePtr drawable, GCPtr gc, int n, xArc *arc)
 	if (!PM_IS_SOLID(drawable, gc->planemask))
 		goto fallback;
 
-	if (use_wide_spans(drawable, gc, &data.region.extents) &&
-	    (data.bo = sna_drawable_use_bo(drawable,
+	if ((data.bo = sna_drawable_use_bo(drawable,
+					   use_wide_spans(drawable, gc, &data.region.extents),
 					   &data.region.extents, &data.damage))) {
 		uint32_t color;
 
@@ -8761,8 +8706,8 @@ sna_poly_fill_polygon(DrawablePtr draw, GCPtr gc,
 	if (!PM_IS_SOLID(draw, gc->planemask))
 		goto fallback;
 
-	if (use_wide_spans(draw, gc, &data.region.extents) &&
-	    (data.bo = sna_drawable_use_bo(draw,
+	if ((data.bo = sna_drawable_use_bo(draw,
+					   use_wide_spans(draw, gc, &data.region.extents),
 					   &data.region.extents,
 					   &data.damage))) {
 		uint32_t color;
@@ -10174,12 +10119,15 @@ sna_poly_fill_rect(DrawablePtr draw, GCPtr gc, int n, xRectangle *rect)
 		}
 	}
 
+	bo = sna_drawable_use_bo(draw, true, &region.extents, &damage);
+	if (bo == NULL)
+		goto fallback;
+
 	if (gc_is_solid(gc, &color)) {
 		DBG(("%s: solid fill [%08x], testing for blt\n",
 		     __FUNCTION__, color));
 
-		if ((bo = sna_drawable_use_bo(draw, &region.extents, &damage)) &&
-		    sna_poly_fill_rect_blt(draw,
+		if (sna_poly_fill_rect_blt(draw,
 					   bo, damage,
 					   gc, color, n, rect,
 					   &region.extents, flags & 2))
@@ -10187,16 +10135,14 @@ sna_poly_fill_rect(DrawablePtr draw, GCPtr gc, int n, xRectangle *rect)
 	} else if (gc->fillStyle == FillTiled) {
 		DBG(("%s: tiled fill, testing for blt\n", __FUNCTION__));
 
-		if ((bo = sna_drawable_use_bo(draw, &region.extents, &damage)) &&
-		    sna_poly_fill_rect_tiled_blt(draw, bo, damage,
+		if (sna_poly_fill_rect_tiled_blt(draw, bo, damage,
 						 gc, n, rect,
 						 &region.extents, flags & 2))
 			return;
 	} else {
 		DBG(("%s: stippled fill, testing for blt\n", __FUNCTION__));
 
-		if ((bo = sna_drawable_use_bo(draw, &region.extents, &damage)) &&
-		    sna_poly_fill_rect_stippled_blt(draw, bo, damage,
+		if (sna_poly_fill_rect_stippled_blt(draw, bo, damage,
 						    gc, n, rect,
 						    &region.extents, flags & 2))
 			return;
@@ -10330,8 +10276,7 @@ sna_poly_fill_arc(DrawablePtr draw, GCPtr gc, int n, xArc *arc)
 	if (!PM_IS_SOLID(draw, gc->planemask))
 		goto fallback;
 
-	if (use_wide_spans(draw, gc, &data.region.extents) &&
-	    (data.bo = sna_drawable_use_bo(draw,
+	if ((data.bo = sna_drawable_use_bo(draw, true,
 					   &data.region.extents,
 					   &data.damage))) {
 		uint32_t color;
@@ -10482,7 +10427,7 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc,
 		return false;
 	}
 
-	bo = sna_drawable_use_bo(drawable, &clip->extents, &damage);
+	bo = sna_drawable_use_bo(drawable, true, &clip->extents, &damage);
 	if (bo == NULL)
 		return false;
 
@@ -11392,7 +11337,8 @@ sna_image_glyph(DrawablePtr drawable, GCPtr gc,
 	if (!PM_IS_SOLID(drawable, gc->planemask))
 		goto fallback;
 
-	if ((bo = sna_drawable_use_bo(drawable, &region.extents, &damage)) &&
+	if ((bo = sna_drawable_use_bo(drawable, true,
+				      &region.extents, &damage)) &&
 	    sna_reversed_glyph_blt(drawable, gc, x, y, n, info, base,
 				   bo, damage, &region,
 				   gc->fgPixel, gc->bgPixel, false))
@@ -11468,7 +11414,8 @@ sna_poly_glyph(DrawablePtr drawable, GCPtr gc,
 	if (!gc_is_solid(gc, &fg))
 		goto fallback;
 
-	if ((bo = sna_drawable_use_bo(drawable, &region.extents, &damage)) &&
+	if ((bo = sna_drawable_use_bo(drawable, true,
+				      &region.extents, &damage)) &&
 	    sna_reversed_glyph_blt(drawable, gc, x, y, n, info, base,
 				   bo, damage, &region, fg, -1, true))
 		goto out;
@@ -11505,7 +11452,7 @@ sna_push_pixels_solid_blt(GCPtr gc,
 	int n;
 	uint8_t rop = copy_ROP[gc->alu];
 
-	bo = sna_drawable_use_bo(drawable, &region->extents, &damage);
+	bo = sna_drawable_use_bo(drawable, true, &region->extents, &damage);
 	if (bo == NULL)
 		return false;
 
commit daac9a1d036d80ccce83438b49115a236a16bfb6
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Apr 27 16:50:28 2012 +0100

    sna: Micro-optimise common case of checking a single fenced bo
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 4c4aa7c..29f0e29 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -2977,7 +2977,45 @@ bool kgem_check_bo(struct kgem *kgem, ...)
 	return true;
 }
 
-bool kgem_check_bo_fenced(struct kgem *kgem, ...)
+bool kgem_check_bo_fenced(struct kgem *kgem, struct kgem_bo *bo)
+{
+	uint32_t size;
+
+	if (bo->proxy)
+		bo = bo->proxy;
+	if (bo->exec) {
+		if (kgem->gen < 40 &&
+		    bo->tiling != I915_TILING_NONE &&
+		    (bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
+			if (kgem->nfence >= kgem->fence_max)
+				return false;
+
+			size = kgem->aperture_fenced;
+			size += kgem_bo_fenced_size(kgem, bo);
+			if (size > kgem->aperture_mappable)
+				return false;
+		}
+
+		return true;
+	}
+
+	if (kgem->aperture > kgem->aperture_low)
+		return false;
+
+	if (kgem->nexec >= KGEM_EXEC_SIZE(kgem) - 1)
+		return false;
+
+	if (kgem->gen < 40 &&
+	    bo->tiling != I915_TILING_NONE &&
+	    kgem->nfence >= kgem->fence_max)
+		return false;
+
+	size = kgem->aperture;
+	size += num_pages(bo);
+	return size <= kgem->aperture_high;
+}
+
+bool kgem_check_many_bo_fenced(struct kgem *kgem, ...)
 {
 	va_list ap;
 	struct kgem_bo *bo;
@@ -4165,7 +4203,7 @@ kgem_replace_bo(struct kgem *kgem,
 	kgem_set_mode(kgem, KGEM_BLT);
 	if (!kgem_check_batch(kgem, 8) ||
 	    !kgem_check_reloc(kgem, 2) ||
-	    !kgem_check_bo_fenced(kgem, src, dst, NULL)) {
+	    !kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
 		_kgem_submit(kgem);
 		_kgem_set_mode(kgem, KGEM_BLT);
 	}
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 1235b83..4def6b1 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -344,7 +344,8 @@ static inline void kgem_advance_batch(struct kgem *kgem, int num_dwords)
 }
 
 bool kgem_check_bo(struct kgem *kgem, ...) __attribute__((sentinel(0)));
-bool kgem_check_bo_fenced(struct kgem *kgem, ...) __attribute__((sentinel(0)));
+bool kgem_check_bo_fenced(struct kgem *kgem, struct kgem_bo *bo);
+bool kgem_check_many_bo_fenced(struct kgem *kgem, ...) __attribute__((sentinel(0)));
 
 void _kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo);
 static inline void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo)
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index dc084f0..a5c1648 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -2975,7 +2975,7 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 		void *ptr;
 
 		if (!kgem_check_batch(&sna->kgem, 8) ||
-		    !kgem_check_bo_fenced(&sna->kgem, bo, NULL) ||
+		    !kgem_check_bo_fenced(&sna->kgem, bo) ||
 		    !kgem_check_reloc(&sna->kgem, 2)) {
 			_kgem_submit(&sna->kgem);
 			_kgem_set_mode(&sna->kgem, KGEM_BLT);
@@ -3103,7 +3103,7 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 			void *ptr;
 
 			if (!kgem_check_batch(&sna->kgem, 12) ||
-			    !kgem_check_bo_fenced(&sna->kgem, bo, NULL) ||
+			    !kgem_check_bo_fenced(&sna->kgem, bo) ||
 			    !kgem_check_reloc(&sna->kgem, 2)) {
 				_kgem_submit(&sna->kgem);
 				_kgem_set_mode(&sna->kgem, KGEM_BLT);
@@ -5117,7 +5117,7 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc,
 		if (src_stride <= 128) {
 			src_stride = ALIGN(src_stride, 8) / 4;
 			if (!kgem_check_batch(&sna->kgem, 7+src_stride) ||
-			    !kgem_check_bo_fenced(&sna->kgem, arg->bo, NULL) ||
+			    !kgem_check_bo_fenced(&sna->kgem, arg->bo) ||
 			    !kgem_check_reloc(&sna->kgem, 1)) {
 				_kgem_submit(&sna->kgem);
 				_kgem_set_mode(&sna->kgem, KGEM_BLT);
@@ -5159,7 +5159,7 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc,
 			void *ptr;
 
 			if (!kgem_check_batch(&sna->kgem, 8) ||
-			    !kgem_check_bo_fenced(&sna->kgem, arg->bo, NULL) ||
+			    !kgem_check_bo_fenced(&sna->kgem, arg->bo) ||
 			    !kgem_check_reloc(&sna->kgem, 2)) {
 				_kgem_submit(&sna->kgem);
 				_kgem_set_mode(&sna->kgem, KGEM_BLT);
@@ -5277,7 +5277,7 @@ sna_copy_plane_blt(DrawablePtr source, DrawablePtr drawable, GCPtr gc,
 		     sx, sy, bx1, bx2));
 
 		if (!kgem_check_batch(&sna->kgem, 8) ||
-		    !kgem_check_bo_fenced(&sna->kgem, arg->bo, NULL) ||
+		    !kgem_check_bo_fenced(&sna->kgem, arg->bo) ||
 		    !kgem_check_reloc(&sna->kgem, 2)) {
 			_kgem_submit(&sna->kgem);
 			_kgem_set_mode(&sna->kgem, KGEM_BLT);
@@ -9195,7 +9195,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable,
 			     __FUNCTION__, r->x + dx, r->y + dy, r->width, r->height));
 
 			if (!kgem_check_batch(&sna->kgem, 9) ||
-			    !kgem_check_bo_fenced(&sna->kgem, bo, NULL) ||
+			    !kgem_check_bo_fenced(&sna->kgem, bo) ||
 			    !kgem_check_reloc(&sna->kgem, 1)) {
 				_kgem_submit(&sna->kgem);
 				_kgem_set_mode(&sna->kgem, KGEM_BLT);
@@ -9243,7 +9243,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable,
 					uint32_t *b;
 
 					if (!kgem_check_batch(&sna->kgem, 9) ||
-					    !kgem_check_bo_fenced(&sna->kgem, bo, NULL) ||
+					    !kgem_check_bo_fenced(&sna->kgem, bo) ||
 					    !kgem_check_reloc(&sna->kgem, 1)) {
 						_kgem_submit(&sna->kgem);
 						_kgem_set_mode(&sna->kgem, KGEM_BLT);
@@ -9293,7 +9293,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable,
 						uint32_t *b;
 
 						if (!kgem_check_batch(&sna->kgem, 9) ||
-						    !kgem_check_bo_fenced(&sna->kgem, bo, NULL) ||
+						    !kgem_check_bo_fenced(&sna->kgem, bo) ||
 						    !kgem_check_reloc(&sna->kgem, 1)) {
 							_kgem_submit(&sna->kgem);
 							_kgem_set_mode(&sna->kgem, KGEM_BLT);
@@ -9433,7 +9433,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
 			if (src_stride <= 128) {
 				src_stride = ALIGN(src_stride, 8) / 4;
 				if (!kgem_check_batch(&sna->kgem, 7+src_stride) ||
-				    !kgem_check_bo_fenced(&sna->kgem, bo, NULL) ||
+				    !kgem_check_bo_fenced(&sna->kgem, bo) ||
 				    !kgem_check_reloc(&sna->kgem, 1)) {
 					_kgem_submit(&sna->kgem);
 					_kgem_set_mode(&sna->kgem, KGEM_BLT);
@@ -9475,7 +9475,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
 				void *ptr;
 
 				if (!kgem_check_batch(&sna->kgem, 8) ||
-				    !kgem_check_bo_fenced(&sna->kgem, bo, NULL) ||
+				    !kgem_check_bo_fenced(&sna->kgem, bo) ||
 				    !kgem_check_reloc(&sna->kgem, 2)) {
 					_kgem_submit(&sna->kgem);
 					_kgem_set_mode(&sna->kgem, KGEM_BLT);
@@ -9575,7 +9575,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
 				if (src_stride <= 128) {
 					src_stride = ALIGN(src_stride, 8) / 4;
 					if (!kgem_check_batch(&sna->kgem, 7+src_stride) ||
-					    !kgem_check_bo_fenced(&sna->kgem, bo, NULL) ||
+					    !kgem_check_bo_fenced(&sna->kgem, bo) ||
 					    !kgem_check_reloc(&sna->kgem, 1)) {
 						_kgem_submit(&sna->kgem);
 						_kgem_set_mode(&sna->kgem, KGEM_BLT);
@@ -9614,7 +9614,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
 					} while (--bh);
 				} else {
 					if (!kgem_check_batch(&sna->kgem, 8) ||
-					    !kgem_check_bo_fenced(&sna->kgem, bo, NULL) ||
+					    !kgem_check_bo_fenced(&sna->kgem, bo) ||
 					    !kgem_check_reloc(&sna->kgem, 2)) {
 						_kgem_submit(&sna->kgem);
 						_kgem_set_mode(&sna->kgem, KGEM_BLT);
@@ -9715,7 +9715,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
 					if (src_stride <= 128) {
 						src_stride = ALIGN(src_stride, 8) / 4;
 						if (!kgem_check_batch(&sna->kgem, 7+src_stride) ||
-						    !kgem_check_bo_fenced(&sna->kgem, bo, NULL) ||
+						    !kgem_check_bo_fenced(&sna->kgem, bo) ||
 						    !kgem_check_reloc(&sna->kgem, 1)) {
 							_kgem_submit(&sna->kgem);
 							_kgem_set_mode(&sna->kgem, KGEM_BLT);
@@ -9754,7 +9754,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
 						} while (--bh);
 					} else {
 						if (!kgem_check_batch(&sna->kgem, 8) ||
-						    !kgem_check_bo_fenced(&sna->kgem, bo, NULL) ||
+						    !kgem_check_bo_fenced(&sna->kgem, bo) ||
 						    !kgem_check_reloc(&sna->kgem, 2)) {
 							_kgem_submit(&sna->kgem);
 							_kgem_set_mode(&sna->kgem, KGEM_BLT);
@@ -9856,7 +9856,7 @@ sna_poly_fill_rect_stippled_n_box(struct sna *sna,
 			len = bw*bh;
 			len = ALIGN(len, 8) / 4;
 			if (!kgem_check_batch(&sna->kgem, 7+len) ||
-			    !kgem_check_bo_fenced(&sna->kgem, bo, NULL) ||
+			    !kgem_check_bo_fenced(&sna->kgem, bo) ||
 			    !kgem_check_reloc(&sna->kgem, 1)) {
 				_kgem_submit(&sna->kgem);
 				_kgem_set_mode(&sna->kgem, KGEM_BLT);
@@ -10512,7 +10512,7 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc,
 
 	kgem_set_mode(&sna->kgem, KGEM_BLT);
 	if (!kgem_check_batch(&sna->kgem, 16) ||
-	    !kgem_check_bo_fenced(&sna->kgem, bo, NULL) ||
+	    !kgem_check_bo_fenced(&sna->kgem, bo) ||
 	    !kgem_check_reloc(&sna->kgem, 1)) {
 		_kgem_submit(&sna->kgem);
 		_kgem_set_mode(&sna->kgem, KGEM_BLT);
@@ -11180,7 +11180,7 @@ sna_reversed_glyph_blt(DrawablePtr drawable, GCPtr gc,
 
 	kgem_set_mode(&sna->kgem, KGEM_BLT);
 	if (!kgem_check_batch(&sna->kgem, 16) ||
-	    !kgem_check_bo_fenced(&sna->kgem, bo, NULL) ||
+	    !kgem_check_bo_fenced(&sna->kgem, bo) ||
 	    !kgem_check_reloc(&sna->kgem, 1)) {
 		_kgem_submit(&sna->kgem);
 		_kgem_set_mode(&sna->kgem, KGEM_BLT);
@@ -11549,7 +11549,7 @@ sna_push_pixels_solid_blt(GCPtr gc,
 		void *ptr;
 
 		if (!kgem_check_batch(&sna->kgem, 8) ||
-		    !kgem_check_bo_fenced(&sna->kgem, bo, NULL) ||
+		    !kgem_check_bo_fenced(&sna->kgem, bo) ||
 		    !kgem_check_reloc(&sna->kgem, 2)) {
 			_kgem_submit(&sna->kgem);
 			_kgem_set_mode(&sna->kgem, KGEM_BLT);
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index a81a145..82c61df 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -151,10 +151,10 @@ static bool sna_blt_fill_init(struct sna *sna,
 	blt->bpp = bpp;
 
 	kgem_set_mode(kgem, KGEM_BLT);
-	if (!kgem_check_bo_fenced(kgem, bo, NULL) ||
+	if (!kgem_check_bo_fenced(kgem, bo) ||
 	    !kgem_check_batch(kgem, 12)) {
 		_kgem_submit(kgem);
-		assert(kgem_check_bo_fenced(kgem, bo, NULL));
+		assert(kgem_check_bo_fenced(kgem, bo));
 		_kgem_set_mode(kgem, KGEM_BLT);
 	}
 
@@ -293,9 +293,9 @@ static Bool sna_blt_copy_init(struct sna *sna,
 	}
 
 	kgem_set_mode(kgem, KGEM_BLT);
-	if (!kgem_check_bo_fenced(kgem, src, dst, NULL)) {
+	if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
 		_kgem_submit(kgem);
-		if (!kgem_check_bo_fenced(kgem, src, dst, NULL))
+		if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL))
 			return FALSE;
 		_kgem_set_mode(kgem, KGEM_BLT);
 	}
@@ -345,9 +345,9 @@ static Bool sna_blt_alpha_fixup_init(struct sna *sna,
 	blt->pixel = alpha;
 
 	kgem_set_mode(kgem, KGEM_BLT);
-	if (!kgem_check_bo_fenced(kgem, src, dst, NULL)) {
+	if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
 		_kgem_submit(kgem);
-		if (!kgem_check_bo_fenced(kgem, src, dst, NULL))
+		if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL))
 			return FALSE;
 		_kgem_set_mode(kgem, KGEM_BLT);
 	}
@@ -1103,10 +1103,10 @@ prepare_blt_copy(struct sna *sna,
 	if (!kgem_bo_can_blt(&sna->kgem, priv->gpu_bo))
 		return FALSE;
 
-	if (!kgem_check_bo_fenced(&sna->kgem, op->dst.bo, priv->gpu_bo, NULL)) {
+	if (!kgem_check_many_bo_fenced(&sna->kgem, op->dst.bo, priv->gpu_bo, NULL)) {
 		_kgem_submit(&sna->kgem);
-		if (!kgem_check_bo_fenced(&sna->kgem,
-					  op->dst.bo, priv->gpu_bo, NULL))
+		if (!kgem_check_many_bo_fenced(&sna->kgem,
+					       op->dst.bo, priv->gpu_bo, NULL))
 			return FALSE;
 		_kgem_set_mode(&sna->kgem, KGEM_BLT);
 	}
@@ -1577,9 +1577,9 @@ sna_blt_composite(struct sna *sna,
 	if (width && height)
 		reduce_damage(tmp, dst_x, dst_y, width, height);
 
-	if (!kgem_check_bo_fenced(&sna->kgem, priv->gpu_bo, NULL)) {
+	if (!kgem_check_bo_fenced(&sna->kgem, priv->gpu_bo)) {
 		_kgem_submit(&sna->kgem);
-		assert(kgem_check_bo_fenced(&sna->kgem, priv->gpu_bo, NULL));
+		assert(kgem_check_bo_fenced(&sna->kgem, priv->gpu_bo));
 		_kgem_set_mode(&sna->kgem, KGEM_BLT);
 	}
 
@@ -1884,9 +1884,9 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
 	kgem_set_mode(kgem, KGEM_BLT);
 	if (!kgem_check_batch(kgem, 6) ||
 	    !kgem_check_reloc(kgem, 1) ||
-	    !kgem_check_bo_fenced(kgem, bo, NULL)) {
+	    !kgem_check_bo_fenced(kgem, bo)) {
 		_kgem_submit(kgem);
-		assert(kgem_check_bo_fenced(&sna->kgem, bo, NULL));
+		assert(kgem_check_bo_fenced(&sna->kgem, bo));
 		_kgem_set_mode(kgem, KGEM_BLT);
 	}
 
@@ -1957,10 +1957,10 @@ Bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
 	}
 
 	kgem_set_mode(kgem, KGEM_BLT);
-	if (!kgem_check_bo_fenced(kgem, bo, NULL) ||
+	if (!kgem_check_bo_fenced(kgem, bo) ||
 	    !kgem_check_batch(kgem, 12)) {
 		_kgem_submit(kgem);
-		assert(kgem_check_bo_fenced(&sna->kgem, bo, NULL));
+		assert(kgem_check_bo_fenced(&sna->kgem, bo));
 		_kgem_set_mode(kgem, KGEM_BLT);
 	}
 
@@ -2122,9 +2122,9 @@ Bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
 	kgem_set_mode(kgem, KGEM_BLT);
 	if (!kgem_check_batch(kgem, 8) ||
 	    !kgem_check_reloc(kgem, 2) ||
-	    !kgem_check_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
+	    !kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
 		_kgem_submit(kgem);
-		if (!kgem_check_bo_fenced(kgem, dst_bo, src_bo, NULL))
+		if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL))
 			return sna_tiling_blt_copy_boxes(sna, alu,
 							 src_bo, src_dx, src_dy,
 							 dst_bo, dst_dx, dst_dy,
diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index 02a5c75..2539518 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -360,7 +360,7 @@ fallback:
 	if (kgem->nexec + 2 > KGEM_EXEC_SIZE(kgem) ||
 	    kgem->nreloc + 2 > KGEM_RELOC_SIZE(kgem) ||
 	    !kgem_check_batch(kgem, 8) ||
-	    !kgem_check_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
+	    !kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
 		_kgem_submit(kgem);
 		_kgem_set_mode(kgem, KGEM_BLT);
 	}
@@ -732,7 +732,7 @@ tile:
 	if (kgem->nexec + 2 > KGEM_EXEC_SIZE(kgem) ||
 	    kgem->nreloc + 2 > KGEM_RELOC_SIZE(kgem) ||
 	    !kgem_check_batch(kgem, 8) ||
-	    !kgem_check_bo_fenced(kgem, dst_bo, NULL)) {
+	    !kgem_check_bo_fenced(kgem, dst_bo)) {
 		_kgem_submit(kgem);
 		_kgem_set_mode(kgem, KGEM_BLT);
 	}
@@ -969,7 +969,7 @@ fallback:
 	if (kgem->nexec + 2 > KGEM_EXEC_SIZE(kgem) ||
 	    kgem->nreloc + 2 > KGEM_RELOC_SIZE(kgem) ||
 	    !kgem_check_batch(kgem, 8) ||
-	    !kgem_check_bo_fenced(kgem, dst_bo, NULL)) {
+	    !kgem_check_bo_fenced(kgem, dst_bo)) {
 		_kgem_submit(kgem);
 		_kgem_set_mode(kgem, KGEM_BLT);
 	}
commit 5967d76ca09a257ec9db66ea664158e1dfd083ba
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Apr 25 17:15:37 2012 +0100

    sna: Fixup broken assertion
    
    It is valid for the cpu_bo to be NULL, as we may be choosing to free the
    large shadow pixel buffer instead.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 382c671..dc084f0 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -2256,7 +2256,7 @@ sna_pixmap_force_to_gpu(PixmapPtr pixmap, unsigned flags)
 		sna_damage_destroy(&priv->cpu_damage);
 		priv->undamaged = false;
 		list_del(&priv->list);
-		assert(!priv->cpu_bo->sync);
+		assert(priv->cpu_bo == NULL || !priv->cpu_bo->sync);
 		sna_pixmap_free_cpu(to_sna_from_pixmap(pixmap), priv);
 	}
 
commit 860d3859b586939cd52e45b944cb6abd2a2ca71b
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Apr 25 16:04:33 2012 +0100

    sna/gen7: Add CS stall before changing WM binding table
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index e2741c4..2228873 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -837,11 +837,11 @@ gen7_emit_wm(struct sna *sna, unsigned int kernel, int nr_surfaces, int nr_input
 	OUT_BATCH(0); /* kernel 2 */
 }
 
-static void
+static bool
 gen7_emit_binding_table(struct sna *sna, uint16_t offset)
 {
 	if (sna->render_state.gen7.surface_table == offset)
-		return;
+		return false;
 
 	/* Binding table pointers */
 	assert(is_aligned(4*offset, 32));
@@ -849,6 +849,7 @@ gen7_emit_binding_table(struct sna *sna, uint16_t offset)
 	OUT_BATCH(offset*4);
 
 	sna->render_state.gen7.surface_table = offset;
+	return true;
 }
 
 static void
@@ -970,11 +971,7 @@ gen7_emit_state(struct sna *sna,
 		const struct sna_composite_op *op,
 		uint16_t wm_binding_table)
 {
-	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
-		gen7_emit_flush(sna);
-		kgem_clear_dirty(&sna->kgem);
-		kgem_bo_mark_dirty(op->dst.bo);
-	}
+	bool need_stall = false;
 
 	gen7_emit_cc(sna,
 		     gen7_get_blend(op->op,
@@ -1001,8 +998,22 @@ gen7_emit_state(struct sna *sna,
 		     op->u.gen7.nr_inputs);
 	gen7_emit_vertex_elements(sna, op);
 
-	gen7_emit_binding_table(sna, wm_binding_table);
+	need_stall |= gen7_emit_binding_table(sna, wm_binding_table);
 	gen7_emit_drawing_rectangle(sna, op);
+
+	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
+		gen7_emit_flush(sna);
+		kgem_clear_dirty(&sna->kgem);
+		kgem_bo_mark_dirty(op->dst.bo);
+		need_stall = false;
+	}
+	if (need_stall) {
+		OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
+		OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL |
+			  GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
+		OUT_BATCH(0);
+		OUT_BATCH(0);
+	}
 }
 
 static void gen7_magic_ca_pass(struct sna *sna,
commit c219283460c0f2dfdb823e0cb139d05075c6afce
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Apr 25 15:04:01 2012 +0100

    sna/gen7: Apply more recent improvements from SNB perf tuning
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 7dff02f..e2741c4 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -57,6 +57,8 @@
 #define NO_FILL_BOXES 0
 #define NO_CLEAR 0
 
+#define NO_RING_SWITCH 1
+
 #define GEN7_MAX_SIZE 16384
 
 /* XXX Todo
@@ -967,7 +969,6 @@ static void
 gen7_emit_state(struct sna *sna,
 		const struct sna_composite_op *op,
 		uint16_t wm_binding_table)
-
 {
 	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
 		gen7_emit_flush(sna);
@@ -1353,18 +1354,13 @@ gen7_emit_composite_primitive_solid(struct sna *sna,
 	dst.p.x = r->dst.x + r->width;
 	dst.p.y = r->dst.y + r->height;
 	v[0] = dst.f;
-	v[1] = 1.;
-	v[2] = 1.;
-
 	dst.p.x = r->dst.x;
 	v[3] = dst.f;
-	v[4] = 0.;
-	v[5] = 1.;
-
 	dst.p.y = r->dst.y;
 	v[6] = dst.f;
-	v[7] = 0.;
-	v[8] = 0.;
+
+	v[5] = v[2] = v[1] = 1.;
+	v[8] = v[7] = v[4] = 0.;
 }
 
 fastcall static void
@@ -1397,6 +1393,44 @@ gen7_emit_composite_primitive_identity_source(struct sna *sna,
 }
 
 fastcall static void
+gen7_emit_composite_primitive_simple_source(struct sna *sna,
+					    const struct sna_composite_op *op,
+					    const struct sna_composite_rectangles *r)
+{
+	float *v;
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+
+	float xx = op->src.transform->matrix[0][0];
+	float x0 = op->src.transform->matrix[0][2];
+	float yy = op->src.transform->matrix[1][1];
+	float y0 = op->src.transform->matrix[1][2];
+	float sx = op->src.scale[0];
+	float sy = op->src.scale[1];
+	int16_t tx = op->src.offset[0];
+	int16_t ty = op->src.offset[1];
+
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 3*3;
+
+	dst.p.x = r->dst.x + r->width;
+	dst.p.y = r->dst.y + r->height;
+	v[0] = dst.f;
+	v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
+	v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
+
+	dst.p.x = r->dst.x;
+	v[3] = dst.f;
+	v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
+
+	dst.p.y = r->dst.y;
+	v[6] = dst.f;
+	v[8] = ((r->src.y + ty) * yy + y0) * sy;
+}
+
+fastcall static void
 gen7_emit_composite_primitive_affine_source(struct sna *sna,
 					    const struct sna_composite_op *op,
 					    const struct sna_composite_rectangles *r)
@@ -1486,141 +1520,63 @@ gen7_emit_composite_primitive_identity_source_mask(struct sna *sna,
 	v[14] = msk_y * op->mask.scale[1];
 }
 
-fastcall static void
-gen7_emit_composite_primitive(struct sna *sna,
-			      const struct sna_composite_op *op,
-			      const struct sna_composite_rectangles *r)
+inline static void
+gen7_emit_composite_texcoord(struct sna *sna,
+			     const struct sna_composite_channel *channel,
+			     int16_t x, int16_t y)
 {
-	float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
-	Bool is_affine = op->is_affine;
-	const float *src_sf = op->src.scale;
-	const float *mask_sf = op->mask.scale;
-
-	if (is_affine) {
-		sna_get_transformed_coordinates(r->src.x + op->src.offset[0],
-						r->src.y + op->src.offset[1],
-						op->src.transform,
-						&src_x[0],
-						&src_y[0]);
-
-		sna_get_transformed_coordinates(r->src.x + op->src.offset[0],
-						r->src.y + op->src.offset[1] + r->height,
-						op->src.transform,
-						&src_x[1],
-						&src_y[1]);
-
-		sna_get_transformed_coordinates(r->src.x + op->src.offset[0] + r->width,
-						r->src.y + op->src.offset[1] + r->height,
-						op->src.transform,
-						&src_x[2],
-						&src_y[2]);
-	} else {
-		if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0],
-							r->src.y + op->src.offset[1],
-							op->src.transform,
-							&src_x[0],
-							&src_y[0],
-							&src_w[0]))
-			return;
-
-		if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0],
-							r->src.y + op->src.offset[1] + r->height,
-							op->src.transform,
-							&src_x[1],
-							&src_y[1],
-							&src_w[1]))
-			return;
-
-		if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0] + r->width,
-							r->src.y + op->src.offset[1] + r->height,
-							op->src.transform,
-							&src_x[2],
-							&src_y[2],
-							&src_w[2]))
-			return;
-	}
+	x += channel->offset[0];
+	y += channel->offset[1];
 
-	if (op->mask.bo) {
-		if (is_affine) {
-			sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0],
-							r->mask.y + op->mask.offset[1],
-							op->mask.transform,
-							&mask_x[0],
-							&mask_y[0]);
-
-			sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0],
-							r->mask.y + op->mask.offset[1] + r->height,
-							op->mask.transform,
-							&mask_x[1],
-							&mask_y[1]);
-
-			sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0] + r->width,
-							r->mask.y + op->mask.offset[1] + r->height,
-							op->mask.transform,
-							&mask_x[2],
-							&mask_y[2]);
-		} else {
-			if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0],
-								r->mask.y + op->mask.offset[1],
-								op->mask.transform,
-								&mask_x[0],
-								&mask_y[0],
-								&mask_w[0]))
-				return;
-
-			if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0],
-								r->mask.y + op->mask.offset[1] + r->height,
-								op->mask.transform,
-								&mask_x[1],
-								&mask_y[1],
-								&mask_w[1]))
-				return;
-
-			if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0] + r->width,
-								r->mask.y + op->mask.offset[1] + r->height,
-								op->mask.transform,
-								&mask_x[2],
-								&mask_y[2],
-								&mask_w[2]))
-				return;
-		}
-	}
+	if (channel->is_affine) {
+		float s, t;
 
-	OUT_VERTEX(r->dst.x + r->width, r->dst.y + r->height);
-	OUT_VERTEX_F(src_x[2] * src_sf[0]);
-	OUT_VERTEX_F(src_y[2] * src_sf[1]);
-	if (!is_affine)
-		OUT_VERTEX_F(src_w[2]);
-	if (op->mask.bo) {
-		OUT_VERTEX_F(mask_x[2] * mask_sf[0]);
-		OUT_VERTEX_F(mask_y[2] * mask_sf[1]);
-		if (!is_affine)
-			OUT_VERTEX_F(mask_w[2]);
-	}
+		sna_get_transformed_coordinates(x, y,
+						channel->transform,
+						&s, &t);
+		OUT_VERTEX_F(s * channel->scale[0]);
+		OUT_VERTEX_F(t * channel->scale[1]);
+	} else {
+		float s, t, w;
 
-	OUT_VERTEX(r->dst.x, r->dst.y + r->height);
-	OUT_VERTEX_F(src_x[1] * src_sf[0]);
-	OUT_VERTEX_F(src_y[1] * src_sf[1]);
-	if (!is_affine)
-		OUT_VERTEX_F(src_w[1]);
-	if (op->mask.bo) {
-		OUT_VERTEX_F(mask_x[1] * mask_sf[0]);
-		OUT_VERTEX_F(mask_y[1] * mask_sf[1]);
-		if (!is_affine)
-			OUT_VERTEX_F(mask_w[1]);
+		sna_get_transformed_coordinates_3d(x, y,
+						   channel->transform,
+						   &s, &t, &w);
+		OUT_VERTEX_F(s * channel->scale[0]);
+		OUT_VERTEX_F(t * channel->scale[1]);
+		OUT_VERTEX_F(w);
 	}
+}
 
-	OUT_VERTEX(r->dst.x, r->dst.y);
-	OUT_VERTEX_F(src_x[0] * src_sf[0]);
-	OUT_VERTEX_F(src_y[0] * src_sf[1]);
-	if (!is_affine)
-		OUT_VERTEX_F(src_w[0]);
-	if (op->mask.bo) {
-		OUT_VERTEX_F(mask_x[0] * mask_sf[0]);
-		OUT_VERTEX_F(mask_y[0] * mask_sf[1]);
-		if (!is_affine)
-			OUT_VERTEX_F(mask_w[0]);
-	}
+static void
+gen7_emit_composite_vertex(struct sna *sna,
+			   const struct sna_composite_op *op,
+			   int16_t srcX, int16_t srcY,
+			   int16_t mskX, int16_t mskY,
+			   int16_t dstX, int16_t dstY)
+{
+	OUT_VERTEX(dstX, dstY);
+	gen7_emit_composite_texcoord(sna, &op->src, srcX, srcY);
+	gen7_emit_composite_texcoord(sna, &op->mask, mskX, mskY);
+}
+
+fastcall static void
+gen7_emit_composite_primitive(struct sna *sna,
+			      const struct sna_composite_op *op,
+			      const struct sna_composite_rectangles *r)
+{
+	gen7_emit_composite_vertex(sna, op,
+				   r->src.x + r->width,  r->src.y + r->height,
+				   r->mask.x + r->width, r->mask.y + r->height,
+				   r->dst.x + r->width, r->dst.y + r->height);
+	gen7_emit_composite_vertex(sna, op,
+				   r->src.x,  r->src.y + r->height,
+				   r->mask.x, r->mask.y + r->height,
+				   r->dst.x,  r->dst.y + r->height);
+	gen7_emit_composite_vertex(sna, op,
+				   r->src.x,  r->src.y,
+				   r->mask.x, r->mask.y,
+				   r->dst.x,  r->dst.y);
 }
 
 static void gen7_emit_vertex_buffer(struct sna *sna,
@@ -2064,7 +2020,7 @@ gen7_render_video(struct sna *sna,
 	tmp.dst.pixmap = pixmap;
 	tmp.dst.width  = pixmap->drawable.width;
 	tmp.dst.height = pixmap->drawable.height;
-	tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
+	tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth);
 	tmp.dst.bo = priv->gpu_bo;
 
 	tmp.src.bo = frame->bo;
@@ -2163,6 +2119,7 @@ gen7_composite_solid_init(struct sna *sna,
 	channel->repeat = RepeatNormal;
 	channel->is_affine = TRUE;
 	channel->is_solid  = TRUE;
+	channel->is_opaque = (color >> 24) == 0xff;
 	channel->transform = NULL;
 	channel->width  = 1;
 	channel->height = 1;
@@ -2366,7 +2323,7 @@ gen7_composite_picture(struct sna *sna,
 		channel->transform = picture->transform;
 
 	channel->card_format = gen7_get_card_format(picture->format);
-	if (channel->card_format == -1)
+	if (channel->card_format == (unsigned)-1)
 		return sna_render_picture_convert(sna, picture, channel, pixmap,
 						  x, y, w, h, dst_x, dst_y);
 
@@ -2411,9 +2368,6 @@ gen7_composite_set_target(struct sna *sna, struct sna_composite_op *op, PictureP
 {
 	struct sna_pixmap *priv;
 
-	if (!gen7_check_dst_format(dst->format))
-		return FALSE;
-
 	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
 	op->dst.width  = op->dst.pixmap->drawable.width;
 	op->dst.height = op->dst.pixmap->drawable.height;
@@ -2453,12 +2407,22 @@ gen7_composite_set_target(struct sna *sna, struct sna_composite_op *op, PictureP
 	return TRUE;
 }
 
+static bool prefer_blt_ring(struct sna *sna)
+{
+	return sna->kgem.ring != KGEM_RENDER;
+}
+
+static bool can_switch_rings(struct sna *sna)
+{
+	return sna->kgem.has_semaphores && !NO_RING_SWITCH;
+}
+
 static Bool
 try_blt(struct sna *sna,
 	PicturePtr dst, PicturePtr src,
 	int width, int height)
 {
-	if (sna->kgem.ring != KGEM_RENDER) {
+	if (prefer_blt_ring(sna)) {
 		DBG(("%s: already performing BLT\n", __FUNCTION__));
 		return TRUE;
 	}
@@ -2484,11 +2448,16 @@ try_blt(struct sna *sna,
 		return TRUE;
 	}
 
+	if (can_switch_rings(sna)) {
+		if (sna_picture_is_solid(src, NULL))
+			return TRUE;
+	}
+
 	return FALSE;
 }
 
 static bool
-is_gradient(PicturePtr picture)
+check_gradient(PicturePtr picture)
 {
 	if (picture->pDrawable)
 		return FALSE;
@@ -2542,9 +2511,10 @@ source_fallback(PicturePtr p, PixmapPtr pixmap)
 	if (sna_picture_is_solid(p, NULL))
 		return false;
 
-	if (is_gradient(p) ||
-	    !gen7_check_repeat(p) ||
-	    !gen7_check_format(p->format))
+	if (p->pSourcePict)
+		return check_gradient(p);
+
+	if (!gen7_check_repeat(p) || !gen7_check_format(p->format))
 		return true;
 
 	if (pixmap && source_is_busy(pixmap))
@@ -2578,7 +2548,7 @@ gen7_composite_fallback(struct sna *sna,
 
 	if (mask) {
 		mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
-		mask_fallback = source_fallback(src, mask_pixmap);
+		mask_fallback = source_fallback(mask, mask_pixmap);
 	} else {
 		mask_pixmap = NULL;
 		mask_fallback = false;
@@ -2743,6 +2713,8 @@ gen7_render_composite(struct sna *sna,
 					    width, height,
 					    tmp);
 
+	if (op == PictOpClear)
+		op = PictOpSrc;
 	tmp->op = op;
 	if (!gen7_composite_set_target(sna, tmp, dst))
 		return FALSE;
@@ -2842,12 +2814,21 @@ gen7_render_composite(struct sna *sna,
 
 		tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine;
 	} else {
-		if (tmp->src.is_solid)
+		if (tmp->src.is_solid) {
 			tmp->prim_emit = gen7_emit_composite_primitive_solid;
-		else if (tmp->src.transform == NULL)
+			if (tmp->src.is_opaque && op == PictOpOver)
+				tmp->op = PictOpSrc;
+		} else if (tmp->src.transform == NULL)
 			tmp->prim_emit = gen7_emit_composite_primitive_identity_source;
-		else if (tmp->src.is_affine)
-			tmp->prim_emit = gen7_emit_composite_primitive_affine_source;
+		else if (tmp->src.is_affine) {
+			if (tmp->src.transform->matrix[0][1] == 0 &&
+			    tmp->src.transform->matrix[1][0] == 0) {
+				tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
+				tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
+				tmp->prim_emit = gen7_emit_composite_primitive_simple_source;
+			} else
+				tmp->prim_emit = gen7_emit_composite_primitive_affine_source;
+		}
 
 		tmp->floats_per_vertex = 3 + !tmp->is_affine;
 	}
@@ -2920,32 +2901,6 @@ gen7_composite_alpha_gradient_init(struct sna *sna,
 }
 
 inline static void
-gen7_emit_composite_texcoord(struct sna *sna,
-			     const struct sna_composite_channel *channel,
-			     int16_t x, int16_t y)
-{
-	float t[3];
-
-	if (channel->is_affine) {
-		sna_get_transformed_coordinates(x + channel->offset[0],
-						y + channel->offset[1],
-						channel->transform,
-						&t[0], &t[1]);
-		OUT_VERTEX_F(t[0] * channel->scale[0]);
-		OUT_VERTEX_F(t[1] * channel->scale[1]);
-	} else {
-		t[0] = t[1] = 0; t[2] = 1;
-		sna_get_transformed_coordinates_3d(x + channel->offset[0],
-						   y + channel->offset[1],
-						   channel->transform,
-						   &t[0], &t[1], &t[2]);
-		OUT_VERTEX_F(t[0] * channel->scale[0]);
-		OUT_VERTEX_F(t[1] * channel->scale[1]);
-		OUT_VERTEX_F(t[2]);
-	}
-}
-
-inline static void
 gen7_emit_composite_texcoord_affine(struct sna *sna,
 				    const struct sna_composite_channel *channel,
 				    int16_t x, int16_t y)
@@ -3344,7 +3299,7 @@ static inline bool prefer_blt_copy(struct sna *sna,
 				   PixmapPtr src, struct kgem_bo *src_bo,
 				   PixmapPtr dst, struct kgem_bo *dst_bo)
 {
-	return (sna->kgem.ring != KGEM_RENDER ||
+	return (sna->kgem.ring == KGEM_BLT ||
 		prefer_blt_bo(sna, src, src_bo) ||
 		prefer_blt_bo(sna, dst, dst_bo));
 }
@@ -3667,7 +3622,7 @@ fallback:
 	if (!gen7_check_format(op->base.src.pict_format))
 		goto fallback;
 
-	op->base.op = alu == GXcopy ? PictOpSrc : PictOpClear;
+	op->base.op = PictOpSrc;
 
 	op->base.dst.pixmap = dst;
 	op->base.dst.width  = dst->drawable.width;
@@ -3751,7 +3706,9 @@ gen7_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
 static inline bool prefer_blt_fill(struct sna *sna,
 				   struct kgem_bo *bo)
 {
-	return sna->kgem.ring != KGEM_RENDER || untiled_tlb_miss(bo);
+	return (can_switch_rings(sna) ||
+		prefer_blt_ring(sna) ||
+		untiled_tlb_miss(bo));
 }
 
 static Bool
@@ -3810,9 +3767,10 @@ gen7_render_fill_boxes(struct sna *sna,
 	return FALSE;
 #endif
 
-	if (op == PictOpClear)
+	if (op == PictOpClear) {
 		pixel = 0;
-	else if (!sna_get_pixel_from_rgba(&pixel,
+		op = PictOpSrc;
+	} else if (!sna_get_pixel_from_rgba(&pixel,
 				     color->red,
 				     color->green,
 				     color->blue,
@@ -3824,8 +3782,6 @@ gen7_render_fill_boxes(struct sna *sna,
 	     __FUNCTION__, pixel, n,
 	     box[0].x1, box[0].y1, box[0].x2, box[0].y2));
 
-	memset(&tmp, 0, sizeof(tmp));
-
 	tmp.op = op;
 
 	tmp.dst.pixmap = dst;
@@ -3833,14 +3789,21 @@ gen7_render_fill_boxes(struct sna *sna,
 	tmp.dst.height = dst->drawable.height;
 	tmp.dst.format = format;
 	tmp.dst.bo = dst_bo;
+	tmp.dst.x = tmp.dst.y = 0;
 
 	tmp.src.bo = sna_render_get_solid(sna, pixel);
 	tmp.src.filter = SAMPLER_FILTER_NEAREST;
 	tmp.src.repeat = SAMPLER_EXTEND_REPEAT;
 
+	tmp.mask.bo = NULL;
+	tmp.mask.filter = SAMPLER_FILTER_NEAREST;
+	tmp.mask.repeat = SAMPLER_EXTEND_NONE;
+
 	tmp.is_affine = TRUE;
 	tmp.floats_per_vertex = 3;
 	tmp.floats_per_rect = 9;
+	tmp.has_component_alpha = FALSE;
+	tmp.need_magic_ca_pass = FALSE;
 
 	tmp.u.gen7.wm_kernel = GEN7_WM_KERNEL_NOMASK;
 	tmp.u.gen7.nr_surfaces = 2;
@@ -4004,7 +3967,7 @@ gen7_render_fill(struct sna *sna, uint8_t alu,
 	if (alu == GXclear)
 		color = 0;
 
-	op->base.op = color == 0 ? PictOpClear : PictOpSrc;
+	op->base.op = PictOpSrc;
 
 	op->base.dst.pixmap = dst;
 	op->base.dst.width  = dst->drawable.width;
@@ -4097,7 +4060,7 @@ gen7_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 	if (alu == GXclear)
 		color = 0;
 
-	tmp.op = color == 0 ? PictOpClear : PictOpSrc;
+	tmp.op = PictOpSrc;
 
 	tmp.dst.pixmap = dst;
 	tmp.dst.width  = dst->drawable.width;
@@ -4195,7 +4158,7 @@ gen7_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
 	if (too_large(dst->drawable.width, dst->drawable.height))
 		return gen7_render_clear_try_blt(sna, dst, bo);
 
-	tmp.op = PictOpClear;
+	tmp.op = PictOpSrc;
 
 	tmp.dst.pixmap = dst;
 	tmp.dst.width  = dst->drawable.width;
commit a3371613c9bf577a69cdf811ca1bebaea46bbe95
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Apr 25 11:09:35 2012 +0100

    sna: Do not automagically convert GTT mappings on untiled scanout to CPU
    
    The likelihood of an untiled mapping of the scanout is slim, except for
    gen3 with large desktops, and there it should never be in the CPU
    domain...
    
    The issue is that we may perform an operation "inplace", yet incoherent
    with the display engine, and never flush the CPU cache, resulting in
    render corruption. In theory at least!
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index d97f559..4c4aa7c 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -3167,7 +3167,7 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
 	assert(bo->exec == NULL);
 	assert(list_is_empty(&bo->list));
 
-	if (bo->tiling == I915_TILING_NONE &&
+	if (bo->tiling == I915_TILING_NONE && !bo->scanout &&
 	    (kgem->has_llc || bo->domain == DOMAIN_CPU)) {
 		DBG(("%s: converting request for GTT map into CPU map\n",
 		     __FUNCTION__));
@@ -3274,6 +3274,7 @@ void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
 	DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__, bo->handle, bytes(bo)));
 	assert(!bo->purged);
 	assert(list_is_empty(&bo->list));
+	assert(!bo->scanout);
 
 	if (IS_CPU_MAP(bo->map))
 		return MAP(bo->map);
commit 1abd92cd012ee46d44ed4873a5e750d56ae6668f
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Apr 25 11:16:30 2012 +0100

    sna: Clear the domain tracking after attaching the bo to scanout
    
    This is basically to make sure we don't continue treating it as CPU
    coherent.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
index cb051b0..5275d4a 100644
--- a/src/sna/sna_display.c
+++ b/src/sna/sna_display.c
@@ -436,6 +436,7 @@ sna_crtc_restore(struct sna *sna)
 
 	assert(bo->tiling != I915_TILING_Y);
 	bo->scanout = true;
+	bo->domain = DOMAIN_NONE;
 
 	DBG(("%s: create fb %dx%d@%d/%d\n",
 	     __FUNCTION__,
@@ -673,6 +674,7 @@ sna_crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode,
 		DBG(("%s: handle %d attached to fb %d\n",
 		     __FUNCTION__, bo->handle, sna_mode->fb_id));
 		bo->scanout = true;
+		bo->domain = DOMAIN_NONE;
 		sna_mode->fb_pixmap = sna->front->drawable.serialNumber;
 	}
 
@@ -787,6 +789,7 @@ sna_crtc_shadow_allocate(xf86CrtcPtr crtc, int width, int height)
 	DBG(("%s: attached handle %d to fb %d\n",
 	     __FUNCTION__, bo->handle, sna_crtc->shadow_fb_id));
 	bo->scanout = true;
+	bo->domain = DOMAIN_NONE;
 	return sna_crtc->shadow = shadow;
 }
 
@@ -1725,6 +1728,7 @@ sna_crtc_resize(ScrnInfoPtr scrn, int width, int height)
 		if (!sna_crtc_apply(crtc))
 			goto fail;
 	}
+	bo->domain = DOMAIN_NONE;
 
 	scrn->virtualX = width;
 	scrn->virtualY = height;
@@ -1869,6 +1873,7 @@ sna_page_flip(struct sna *sna,
 	DBG(("%s: page flipped %d crtcs\n", __FUNCTION__, count));
 	if (count) {
 		bo->scanout = true;
+		bo->domain = DOMAIN_NONE;
 	} else {
 		drmModeRmFB(sna->kgem.fd, mode->fb_id);
 		mode->fb_id = *old_fb;
commit 8c58c840b1ba579a5601804fc710c58e1e00213f
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Apr 23 11:09:37 2012 +0100

    sna/dri: Always clear the scanout when destroying dri2 buffers
    
    As we may end up holding onto and releasing the Screen pixmap last, we
    may also be responsible for flushing the last reference to the scanout.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index f4d55e0..1a7b6bd 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -333,6 +333,7 @@ static void _sna_dri_destroy_buffer(struct sna *sna, DRI2Buffer2Ptr buffer)
 		}
 
 		private->bo->flush = 0;
+		kgem_bo_clear_scanout(&sna->kgem, private->bo); /* paranoia */
 		kgem_bo_destroy(&sna->kgem, private->bo);
 
 		free(buffer);
@@ -388,6 +389,7 @@ static void set_bo(PixmapPtr pixmap, struct kgem_bo *bo)
 	sna_damage_destroy(&priv->cpu_damage);
 	priv->undamaged = false;
 
+	kgem_bo_clear_scanout(&sna->kgem, priv->gpu_bo); /* paranoia */
 	kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
 	priv->gpu_bo = ref(bo);
 }
commit caf9144271a10f90ea580c246b2df3f69a10b7a0
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Apr 20 17:15:37 2012 +0100

    sna: Remove the assertions that the cached upload buffers are active
    
    These were added to track down some corruption, but the assertions
    themselves are incorrect, just very rare. The upload buffer may
    genuinely be cached if we abort the render operation after uploading the
    source data, leaving the proxy not coupled to any request.
    
    References: https://bugs.freedesktop.org/show_bug.cgi?id=48400
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 06bd2c0..382c671 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1943,7 +1943,6 @@ sna_drawable_use_bo(DrawablePtr drawable,
 	}
 
 	if (priv->gpu_bo && priv->gpu_bo->proxy) {
-		assert(priv->gpu_bo->proxy->rq);
 		kgem_bo_destroy(&to_sna_from_pixmap(pixmap)->kgem,
 				priv->gpu_bo);
 		priv->gpu_bo = NULL;
@@ -2290,7 +2289,6 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags)
 
 	if (flags & MOVE_WRITE && priv->gpu_bo && priv->gpu_bo->proxy) {
 		DBG(("%s: discarding cached upload buffer\n", __FUNCTION__));
-		assert(priv->gpu_bo->proxy->rq);
 		kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
 		priv->gpu_bo = NULL;
 	}
@@ -2744,7 +2742,6 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 
 	if (priv->gpu_bo && priv->gpu_bo->proxy) {
 		DBG(("%s: discarding cached upload buffer\n", __FUNCTION__));
-		assert(priv->gpu_bo->proxy->rq);
 		kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
 		priv->gpu_bo = NULL;
 	}
@@ -3540,7 +3537,6 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 
 	if (dst_priv->gpu_bo && dst_priv->gpu_bo->proxy) {
 		DBG(("%s: discarding cached upload\n", __FUNCTION__));
-		assert(dst_priv->gpu_bo->proxy->rq);
 		kgem_bo_destroy(&sna->kgem, dst_priv->gpu_bo);
 		dst_priv->gpu_bo = NULL;
 	}
@@ -8383,9 +8379,10 @@ sna_poly_arc(DrawablePtr drawable, GCPtr gc, int n, xArc *arc)
 	if (data.flags == 0)
 		return;
 
-	DBG(("%s: extents=(%d, %d), (%d, %d)\n", __FUNCTION__,
+	DBG(("%s: extents=(%d, %d), (%d, %d), flags=%x\n", __FUNCTION__,
 	     data.region.extents.x1, data.region.extents.y1,
-	     data.region.extents.x2, data.region.extents.y2));
+	     data.region.extents.x2, data.region.extents.y2,
+	     data.flags));
 
 	data.region.data = NULL;
 
commit aff3614efd5c12e658fa5723934e5bd50a83a316
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Apr 20 13:21:40 2012 +0100

    sna: Always clear the mmapped domains when reusing  partial upload buffers
    
    As we need to make sure that we do invalidate the caches appropriately
    on reuse. Mildly paranoid, but strictly required by the spec.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 72b6ad7..d97f559 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -3414,6 +3414,29 @@ void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo)
 	}
 }
 
+void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo)
+{
+	assert(bo->proxy == NULL);
+	kgem_bo_submit(kgem, bo);
+
+	if (bo->domain != DOMAIN_GTT) {
+		struct drm_i915_gem_set_domain set_domain;
+
+		DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
+		     bo->needs_flush, bo->domain, kgem_busy(kgem, bo->handle)));
+
+		VG_CLEAR(set_domain);
+		set_domain.handle = bo->handle;
+		set_domain.read_domains = I915_GEM_DOMAIN_GTT;
+		set_domain.write_domain = I915_GEM_DOMAIN_GTT;
+
+		if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) {
+			kgem_bo_retire(kgem, bo);
+			bo->domain = DOMAIN_GTT;
+		}
+	}
+}
+
 void kgem_bo_set_sync(struct kgem *kgem, struct kgem_bo *bo)
 {
 	assert(!bo->reusable);
@@ -3424,7 +3447,6 @@ void kgem_bo_set_sync(struct kgem *kgem, struct kgem_bo *bo)
 
 void kgem_sync(struct kgem *kgem)
 {
-	struct drm_i915_gem_set_domain set_domain;
 	struct kgem_request *rq;
 	struct kgem_bo *bo;
 
@@ -3437,14 +3459,7 @@ void kgem_sync(struct kgem *kgem)
 	if (rq == kgem->next_request)
 		_kgem_submit(kgem);
 
-	VG_CLEAR(set_domain);
-	set_domain.handle = rq->bo->handle;
-	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
-	set_domain.write_domain = I915_GEM_DOMAIN_GTT;
-
-	drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
-	kgem_retire(kgem);
-
+	kgem_bo_sync__gtt(kgem, rq->bo);
 	list_for_each_entry(bo, &kgem->sync_list, list)
 		kgem_bo_sync__cpu(kgem, bo);
 
@@ -3599,8 +3614,12 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 			bo->used = size;
 			list_move(&bo->base.list, &kgem->active_partials);
 
-			if (bo->base.vmap)
-				kgem_bo_sync__cpu(kgem, &bo->base);
+			if (bo->mmapped) {
+				if (IS_CPU_MAP(bo->base.map))
+					kgem_bo_sync__cpu(kgem, &bo->base);
+				else
+					kgem_bo_sync__gtt(kgem, &bo->base);
+			}
 
 			goto done;
 		} while (kgem_retire(kgem));
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 913e1a9..1235b83 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -365,6 +365,7 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
 
 void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo);
 void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo);
+void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo);
 void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo);
 void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo);
 void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo);
commit cb6a3dc2edf3cd612f833bc9a4656166735ee856
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Apr 19 10:34:23 2012 +0100

    sna: Discard proxy upload buffer if we choose to render to it
    
    Even if we try to avoid treating an upload buffer as a real GPU target,
    we may still choose to migrate the buffer to the GPU in order to keep
    other buffers on the GPU. In that case, we do want to create a real GPU
    bo.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 1c43fb7..06bd2c0 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -2288,6 +2288,13 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags)
 		goto active;
 	}
 
+	if (flags & MOVE_WRITE && priv->gpu_bo && priv->gpu_bo->proxy) {
+		DBG(("%s: discarding cached upload buffer\n", __FUNCTION__));
+		assert(priv->gpu_bo->proxy->rq);
+		kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
+		priv->gpu_bo = NULL;
+	}
+
 	if ((flags & MOVE_READ) == 0)
 		sna_damage_destroy(&priv->cpu_damage);
 
commit 4cf74d409ca63c6a479c1ee2187908c04f3b830b
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Apr 19 09:09:32 2012 +0100

    sna: Don't consider upload proxies as being on the GPU for render targets
    
    The upload proxy is a fake buffer that we do not want to render to as
    then the damage tracking become extremely confused and the buffer it
    self is not optimised for persistent rendering. We assert that we do not
    use it as a render target, and this patch adds the check so that we
    avoid treating the proxy as a valid target when choosing the render
    path.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_render_inline.h b/src/sna/sna_render_inline.h
index 03e1969..b9acea1 100644
--- a/src/sna/sna_render_inline.h
+++ b/src/sna/sna_render_inline.h
@@ -75,7 +75,7 @@ is_gpu(DrawablePtr drawable)
 	if (priv == NULL || priv->clear)
 		return false;
 
-	if (priv->gpu_damage || (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)))
+	if (priv->gpu_damage || (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo) && !priv->gpu_bo->proxy))
 		return true;
 
 	return priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo);
commit fd81408b978c9b57c046ee43d2d32e1370e83a7d
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Apr 18 11:39:43 2012 +0100

    sna: Increase the render target alignment to 4 pixels on gen4+ as well
    
    Repoerted-and-tested-by: Toralf Förster <toralf.foerster at gmx.de
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=48865
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 3d722d0..72b6ad7 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -838,13 +838,8 @@ static uint32_t kgem_surface_size(struct kgem *kgem,
 	} else switch (tiling) {
 	default:
 	case I915_TILING_NONE:
-		if (kgem->gen < 40) {
-			tile_width = scanout ? 64 : 4 * bpp >> 3;
-			tile_height = 4;
-		} else {
-			tile_width = scanout ? 64 : 2 * bpp >> 3;
-			tile_height = 2;
-		}
+		tile_width = scanout ? 64 : 4 * bpp >> 3;
+		tile_height = 2;
 		break;
 	case I915_TILING_X:
 		tile_width = 512;
@@ -898,7 +893,7 @@ static uint32_t kgem_aligned_height(struct kgem *kgem,
 	} else switch (tiling) {
 	default:
 	case I915_TILING_NONE:
-		tile_height = kgem->gen < 40 ? 4 : 2;
+		tile_height = 2;
 		break;
 	case I915_TILING_X:
 		tile_height = 8;
@@ -2881,7 +2876,7 @@ struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem,
 
 		stride = ALIGN(width, 2) * bpp >> 3;
 		stride = ALIGN(stride, 4);
-		size = ALIGN(height, kgem->gen < 40 ? 4 : 2) * stride;
+		size = ALIGN(height, 2) * stride;
 
 		assert(size >= PAGE_SIZE);
 
commit 11599e52b842b5db76798879b0fbb57762fe6002
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Apr 17 20:08:14 2012 +0100

    sna/dri: Decouple the frame event info after attaching along error paths
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index afec831..f4d55e0 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -1372,15 +1372,15 @@ sna_dri_schedule_flip(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
 			return FALSE;
 		}
 
+		sna_dri_reference_buffer(front);
+		sna_dri_reference_buffer(back);
+
 		if (!sna_dri_page_flip(sna, info)) {
 			DBG(("%s: failed to queue page flip\n", __FUNCTION__));
-			free(info);
+			sna_dri_frame_event_info_free(info);
 			return FALSE;
 		}
 
-		sna_dri_reference_buffer(front);
-		sna_dri_reference_buffer(back);
-
 		get_private(info->back)->bo =
 			kgem_create_2d(&sna->kgem,
 				       draw->width,
@@ -1426,7 +1426,7 @@ sna_dri_schedule_flip(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
 			vbl.request.type |= DRM_VBLANK_SECONDARY;
 		vbl.request.sequence = 0;
 		if (drmWaitVBlank(sna->kgem.fd, &vbl)) {
-			free(info);
+			sna_dri_frame_event_info_free(info);
 			return FALSE;
 		}
 
@@ -1482,7 +1482,7 @@ sna_dri_schedule_flip(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
 		vbl.request.sequence -= 1;
 		vbl.request.signal = (unsigned long)info;
 		if (drmWaitVBlank(sna->kgem.fd, &vbl)) {
-			free(info);
+			sna_dri_frame_event_info_free(info);
 			return FALSE;
 		}
 
@@ -1610,9 +1610,8 @@ sna_dri_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
 	if (pipe > 0)
 		vbl.request.type |= DRM_VBLANK_SECONDARY;
 	vbl.request.sequence = 0;
-	if (drmWaitVBlank(sna->kgem.fd, &vbl)) {
+	if (drmWaitVBlank(sna->kgem.fd, &vbl))
 		goto blit_fallback;
-	}
 
 	current_msc = vbl.reply.sequence;
 
@@ -1677,9 +1676,8 @@ sna_dri_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
 		vbl.request.sequence += divisor;
 
 	vbl.request.signal = (unsigned long)info;
-	if (drmWaitVBlank(sna->kgem.fd, &vbl)) {
+	if (drmWaitVBlank(sna->kgem.fd, &vbl))
 		goto blit_fallback;
-	}
 
 	*target_msc = vbl.reply.sequence;
 	info->frame = *target_msc;
@@ -1762,12 +1760,11 @@ blit:
 		if (!sna_dri_add_frame_event(info)) {
 			DBG(("%s: failed to hook up frame event\n", __FUNCTION__));
 			free(info);
-			info = NULL;
 			goto blit;
 		}
 
 		if (!sna_dri_page_flip(sna, info)) {
-			free(info);
+			sna_dri_frame_event_info_free(info);
 			goto blit;
 		}
 
@@ -1935,7 +1932,6 @@ sna_dri_schedule_wait_msc(ClientPtr client, DrawablePtr draw, CARD64 target_msc,
 	if (!sna_dri_add_frame_event(info)) {
 		DBG(("%s: failed to hook up frame event\n", __FUNCTION__));
 		free(info);
-		info = NULL;
 		goto out_complete;
 	}
 
@@ -1959,7 +1955,7 @@ sna_dri_schedule_wait_msc(ClientPtr client, DrawablePtr draw, CARD64 target_msc,
 					   strerror(errno));
 				limit--;
 			}
-			goto out_complete;
+			goto out_free_info;
 		}
 
 		info->frame = vbl.reply.sequence;
@@ -1996,15 +1992,16 @@ sna_dri_schedule_wait_msc(ClientPtr client, DrawablePtr draw, CARD64 target_msc,
 				   strerror(errno));
 			limit--;
 		}
-		goto out_complete;
+		goto out_free_info;
 	}
 
 	info->frame = vbl.reply.sequence;
 	DRI2BlockClient(client, draw);
 	return TRUE;
 
+out_free_info:
+	sna_dri_frame_event_info_free(info);
 out_complete:
-	free(info);
 	DRI2WaitMSCComplete(client, draw, target_msc, 0, 0);
 	return TRUE;
 }
commit b817200371bfe16f44b879a793cf4a75ad17bc5c
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Apr 17 17:54:58 2012 +0100

    Don't issue a scanline wait while VT switched
    
    Be paranoid and check that we own the VT before emitting a scanline
    wait. If we attempt to wait on a fb/pipe that we do not own, we may
    issue an illegal command and cause a lockup.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/intel_dri.c b/src/intel_dri.c
index f6f0c86..a5ed545 100644
--- a/src/intel_dri.c
+++ b/src/intel_dri.c
@@ -553,7 +553,8 @@ I830DRI2CopyRegion(DrawablePtr drawable, RegionPtr pRegion,
 	ValidateGC(dst, gc);
 
 	/* Wait for the scanline to be outside the region to be copied */
-	if (pixmap_is_scanout(get_drawable_pixmap(dst)) &&
+	if (scrn->vtSema &&
+	    pixmap_is_scanout(get_drawable_pixmap(dst)) &&
 	    intel->swapbuffers_wait && INTEL_INFO(intel)->gen < 60) {
 		BoxPtr box;
 		BoxRec crtcbox;
diff --git a/src/intel_video.c b/src/intel_video.c
index 25f6483..0834bb2 100644
--- a/src/intel_video.c
+++ b/src/intel_video.c
@@ -1285,7 +1285,7 @@ intel_wait_for_scanline(ScrnInfoPtr scrn, PixmapPtr pixmap,
 	int y1, y2;
 
 	pipe = -1;
-	if (pixmap_is_scanout(pixmap))
+	if (scrn->vtSema && pixmap_is_scanout(pixmap))
 		pipe = intel_crtc_to_pipe(crtc);
 	if (pipe < 0)
 		return;
diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
index ef3b0f9..cb051b0 100644
--- a/src/sna/sna_display.c
+++ b/src/sna/sna_display.c
@@ -2018,6 +2018,10 @@ sna_covering_crtc(ScrnInfoPtr scrn,
 	int best_coverage, c;
 	BoxRec best_crtc_box;
 
+	/* If we do not own the VT, we do not own the CRTC either */
+	if (!scrn->vtSema)
+		return NULL;
+
 	DBG(("%s for box=(%d, %d), (%d, %d)\n",
 	     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
 
commit af4a6e8cb52ace594934446e6d8a7aaa1945a9b0
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Apr 16 20:57:03 2012 +0100

    sna: Don't assert exported buffers are not busy
    
    As we do not fully control these buffers, we cannot truly say when they
    are idle, we can only trust that the split between us and the compositor
    doesn't lead to much corruption.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 9b080f1..3d722d0 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -312,7 +312,7 @@ static void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo)
 {
 	DBG(("%s: handle=%d, domain=%d\n",
 	     __FUNCTION__, bo->handle, bo->domain));
-	assert(!kgem_busy(kgem, bo->handle));
+	assert(bo->flush || !kgem_busy(kgem, bo->handle));
 
 	if (bo->rq)
 		kgem_retire(kgem);
@@ -332,7 +332,7 @@ Bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
 {
 	assert(bo->refcnt);
 	assert(!bo->purged);
-	assert(!kgem_busy(kgem, bo->handle));
+	assert(bo->flush || !kgem_busy(kgem, bo->handle));
 	assert(bo->proxy == NULL);
 
 	assert(length <= bytes(bo));
@@ -1033,13 +1033,13 @@ static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
 inline static void kgem_bo_move_to_inactive(struct kgem *kgem,
 					    struct kgem_bo *bo)
 {
+	assert(bo->reusable);
+	assert(bo->rq == NULL);
+	assert(bo->domain != DOMAIN_GPU);
 	assert(!kgem_busy(kgem, bo->handle));
 	assert(!bo->proxy);
 	assert(!bo->io);
 	assert(!bo->needs_flush);
-	assert(bo->rq == NULL);
-	assert(bo->domain != DOMAIN_GPU);
-	assert(bo->reusable);
 	assert(list_is_empty(&bo->vma));
 
 	if (bucket(bo) >= NUM_CACHE_BUCKETS) {
@@ -1098,6 +1098,7 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 		goto destroy;
 
 	if (bo->vmap) {
+		assert(!bo->flush);
 		DBG(("%s: handle=%d is vmapped, tracking until free\n",
 		     __FUNCTION__, bo->handle));
 		if (bo->rq == NULL) {
@@ -2279,6 +2280,7 @@ struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name)
 	}
 
 	bo->reusable = false;
+	bo->flush = true;
 	return bo;
 }
 
@@ -2807,8 +2809,7 @@ search_inactive:
 		assert(bo->refcnt == 0);
 		assert(bo->reusable);
 		assert((flags & CREATE_INACTIVE) == 0 || bo->domain != DOMAIN_GPU);
-		assert((flags & CREATE_INACTIVE) == 0 ||
-		       !kgem_busy(kgem, bo->handle));
+		assert((flags & CREATE_INACTIVE) == 0 || !kgem_busy(kgem, bo->handle));
 		assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
 		bo->refcnt = 1;
 		return bo;
@@ -3206,8 +3207,8 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
 	if (bo->domain != DOMAIN_GTT) {
 		struct drm_i915_gem_set_domain set_domain;
 
-		DBG(("%s: sync: needs_flush? %d, domain? %d\n", __FUNCTION__,
-		     bo->needs_flush, bo->domain));
+		DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
+		     bo->needs_flush, bo->domain, kgem_busy(kgem, bo->handle)));
 
 		/* XXX use PROT_READ to avoid the write flush? */
 
commit a16616209bb2dcb7aaa859b38e154f0a10faa82b
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Apr 14 19:03:25 2012 +0100

    uxa: Fix leak of glyph mask for unhandled glyph composition
    
    ==1401== 7,344 bytes in 34 blocks are possibly lost in loss record 570 of 587
    ==1401==    at 0x4027034: calloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
    ==1401==    by 0x8BE5150: drm_intel_gem_bo_alloc_internal (intel_bufmgr_gem.c:689)
    ==1401==    by 0x899FC04: intel_uxa_create_pixmap (intel_uxa.c:1077)
    ==1401==    by 0x89C2C41: uxa_glyphs (uxa-glyphs.c:254)
    ==1401==    by 0x21F05E: damageGlyphs (damage.c:647)
    ==1401==    by 0x218E06: ProcRenderCompositeGlyphs (render.c:1434)
    ==1401==    by 0x15AA40: Dispatch (dispatch.c:439)
    ==1401==    by 0x1499E9: main (main.c:287)
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/uxa/uxa-glyphs.c b/uxa/uxa-glyphs.c
index b754f4e..921b99c 100644
--- a/uxa/uxa-glyphs.c
+++ b/uxa/uxa-glyphs.c
@@ -812,8 +812,10 @@ uxa_glyphs_via_mask(CARD8 op,
 				if (!uxa_pixmap_is_offscreen(src_pixmap) ||
 				    !uxa_screen->info->prepare_composite(PictOpAdd,
 									 this_atlas, NULL, mask,
-									 src_pixmap, NULL, pixmap))
+									 src_pixmap, NULL, pixmap)) {
+					FreePicture(mask, 0);
 					return -1;
+				}
 
 				glyph_atlas = this_atlas;
 			}
commit ae145c21e9fd3a12164f8b4720d059f9c158249e
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Apr 14 18:42:23 2012 +0100

    sna: Avoid leaking the plane resources when determining sprite planes
    
    Fixes the tiny, one-off leak:
    
    ==1407== 8 bytes in 1 blocks are definitely lost in loss record 48 of 527
    ==1407==    at 0x402894D: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
    ==1407==    by 0x8580BE8: drmMalloc (xf86drm.c:147)
    ==1407==    by 0x8583D54: drmAllocCpy (xf86drmMode.c:73)
    ==1407==    by 0x8585265: drmModeGetPlaneResources (xf86drmMode.c:955)
    ==1407==    by 0x8A1BCE9: sna_video_sprite_setup (sna_video_sprite.c:367)
    ==1407==    by 0x8A1A0A3: sna_video_init (sna_video.c:523)
    ==1407==    by 0x89FD4E0: sna_screen_init (sna_driver.c:935)
    ==1407==    by 0x15AD80: AddScreen (dispatch.c:3909)
    ==1407==    by 0x19A2DB: InitOutput (xf86Init.c:817)
    ==1407==    by 0x14981C: main (main.c:204)
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_video_sprite.c b/src/sna/sna_video_sprite.c
index 82db122..0e5f3ab 100644
--- a/src/sna/sna_video_sprite.c
+++ b/src/sna/sna_video_sprite.c
@@ -361,21 +361,21 @@ XF86VideoAdaptorPtr sna_video_sprite_setup(struct sna *sna,
 					   ScreenPtr screen)
 {
 	XF86VideoAdaptorPtr adaptor;
+	struct drm_mode_get_plane_res r;
 	struct sna_video *video;
-	drmModePlaneRes *plane_resources;
 
-	plane_resources = drmModeGetPlaneResources(sna->kgem.fd);
-	if (!plane_resources)
+	memset(&r, 0, sizeof(struct drm_mode_get_plane_res));
+	if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPLANERESOURCES, &r))
+		return NULL;
+	if (r.count_planes == 0)
 		return NULL;
 
 	adaptor = calloc(1,
 			 sizeof(XF86VideoAdaptorRec) +
 			 sizeof(struct sna_video) +
 			 sizeof(DevUnion));
-	if (!adaptor) {
-		free(plane_resources);
+	if (!adaptor)
 		return NULL;
-	}
 
 	adaptor->type = XvWindowMask | XvInputMask | XvImageMask;
 	adaptor->flags = VIDEO_OVERLAID_IMAGES /*| VIDEO_CLIP_TO_VIEWPORT */ ;
@@ -428,7 +428,5 @@ XF86VideoAdaptorPtr sna_video_sprite_setup(struct sna *sna,
 
 	xvColorKey = MAKE_ATOM("XV_COLORKEY");
 
-	free(plane_resources);
-
 	return adaptor;
 }
commit 69a7737abeded6ee923643bd8a80a5a84e6a979c
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Apr 14 12:06:51 2012 +0100

    sna: Align texture subsurfaces to 2x2 texture samples
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index 66e078a..ec2aaad 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -839,8 +839,15 @@ sna_render_pixmap_partial(struct sna *sna,
 		box.x2 = ALIGN(box.x2, tile_width * 8 / pixmap->drawable.bitsPerPixel);
 
 		offset = box.x1 * pixmap->drawable.bitsPerPixel / 8 / tile_width * tile_size;
-	} else
+	} else {
+		box.y1 = box.y1 & ~1;
+		box.y2 = ALIGN(box.y2, 2);
+
+		box.x1 = box.x1 & ~1;
+		box.x2 = ALIGN(box.x2, 2);
+
 		offset = box.x1 * pixmap->drawable.bitsPerPixel / 8;
+	}
 
 	if (box.x2 > pixmap->drawable.width)
 		box.x2 = pixmap->drawable.width;
commit 1d2a46e0902d82b43a5e12af36521a6a7fd6ba39
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Apr 14 12:04:23 2012 +0100

    sna: Align redirect subsurfaces to 2x2 or 4x4 render spans
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index 8af80f2..66e078a 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -1767,8 +1767,23 @@ sna_render_composite_redirect(struct sna *sna,
 			box.x2 = ALIGN(box.x2, tile_width * 8 / op->dst.pixmap->drawable.bitsPerPixel);
 
 			offset = box.x1 * op->dst.pixmap->drawable.bitsPerPixel / 8 / tile_width * tile_size;
-		} else
+		} else {
+			if (sna->kgem.gen < 40) {
+				box.y1 = box.y1 & ~3;
+				box.y2 = ALIGN(box.y2, 4);
+
+				box.x1 = box.x1 & ~3;
+				box.x2 = ALIGN(box.x2, 4);
+			} else {
+				box.y1 = box.y1 & ~1;
+				box.y2 = ALIGN(box.y2, 2);
+
+				box.x1 = box.x1 & ~1;
+				box.x2 = ALIGN(box.x2, 2);
+			}
+
 			offset = box.x1 * op->dst.pixmap->drawable.bitsPerPixel / 8;
+		}
 
 		if (box.y2 > op->dst.pixmap->drawable.height)
 			box.y2 = op->dst.pixmap->drawable.height;
commit 1ce2b65d622797000e0a4db7dc851d5b1da04f85
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Apr 14 11:59:31 2012 +0100

    sna: Align render target sizes on gen2/3 to 4x4 render spans
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index f1b0376..9b080f1 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -686,12 +686,6 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
 	DBG(("%s: partial buffer size=%d [%d KiB]\n", __FUNCTION__,
 	     kgem->partial_buffer_size, kgem->partial_buffer_size / 1024));
 
-	kgem->min_alignment = 4;
-	if (gen < 60)
-		/* XXX workaround an issue where we appear to fail to
-		 * disable dual-stream mode */
-		kgem->min_alignment = 64;
-
 	kgem->max_object_size = 2 * aperture.aper_size / 3;
 	kgem->max_gpu_size = kgem->max_object_size;
 	if (!kgem->has_llc)
@@ -776,8 +770,8 @@ static uint32_t kgem_untiled_pitch(struct kgem *kgem,
 				   uint32_t width, uint32_t bpp,
 				   bool scanout)
 {
-	width = width * bpp >> 3;
-	return ALIGN(width, scanout ? 64 : kgem->min_alignment);
+	width = ALIGN(width, 4) * bpp >> 3;
+	return ALIGN(width, scanout ? 64 : 4);
 }
 
 void kgem_get_tile_size(struct kgem *kgem, int tiling,
@@ -838,14 +832,19 @@ static uint32_t kgem_surface_size(struct kgem *kgem,
 			tile_width = 512;
 			tile_height = kgem->gen < 30 ? 16 : 8;
 		} else {
-			tile_width = scanout ? 64 : kgem->min_alignment;
-			tile_height = 2;
+			tile_width = scanout ? 64 : 4 * bpp >> 3;
+			tile_height = 4;
 		}
 	} else switch (tiling) {
 	default:
 	case I915_TILING_NONE:
-		tile_width = scanout ? 64 : kgem->min_alignment;
-		tile_height = 2;
+		if (kgem->gen < 40) {
+			tile_width = scanout ? 64 : 4 * bpp >> 3;
+			tile_height = 4;
+		} else {
+			tile_width = scanout ? 64 : 2 * bpp >> 3;
+			tile_height = 2;
+		}
 		break;
 	case I915_TILING_X:
 		tile_width = 512;
@@ -899,7 +898,7 @@ static uint32_t kgem_aligned_height(struct kgem *kgem,
 	} else switch (tiling) {
 	default:
 	case I915_TILING_NONE:
-		tile_height = 2;
+		tile_height = kgem->gen < 40 ? 4 : 2;
 		break;
 	case I915_TILING_X:
 		tile_height = 8;
@@ -2881,7 +2880,7 @@ struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem,
 
 		stride = ALIGN(width, 2) * bpp >> 3;
 		stride = ALIGN(stride, 4);
-		size = ALIGN(height, 2) * stride;
+		size = ALIGN(height, kgem->gen < 40 ? 4 : 2) * stride;
 
 		assert(size >= PAGE_SIZE);
 
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index e52645c..913e1a9 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -161,7 +161,6 @@ struct kgem {
 	uint16_t half_cpu_cache_pages;
 	uint32_t aperture_total, aperture_high, aperture_low, aperture_mappable;
 	uint32_t aperture, aperture_fenced;
-	uint32_t min_alignment;
 	uint32_t max_upload_tile_size, max_copy_tile_size;
 	uint32_t max_gpu_size, max_cpu_size;
 	uint32_t large_object_size, max_object_size;
commit 89f2b09b1e5be9842747998ea4fe32a6f1ede4cc
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Apr 13 16:37:43 2012 +0100

    sna: Avoid using TILING_Y for large objects on gen2/3
    
    References: https://bugs.freedesktop.org/show_bug.cgi?id=48636
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 8fcdd14..1c43fb7 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -458,6 +458,16 @@ static inline uint32_t default_tiling(PixmapPtr pixmap)
 		     __FUNCTION__));
 		sna_damage_destroy(&priv->gpu_damage);
 		priv->undamaged = false;
+
+		/* Only on later generations was the render pipeline
+		 * more flexible than the BLT. So on gen2/3, prefer to
+		 * keep large objects accessible through the BLT.
+		 */
+		if (sna->kgem.gen < 40 &&
+		    (pixmap->drawable.width > sna->render.max_3d_size ||
+		     pixmap->drawable.height > sna->render.max_3d_size))
+			return I915_TILING_X;
+
 		return I915_TILING_Y;
 	}
 
commit eaadbce122059066353743f1653aa16e9d9b747f
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Apr 13 15:12:36 2012 +0100

    sna: Relax bogus assertion
    
    The bo may be considered unmappable due to being bound to outside the
    mappable region, which we are attempting to rectify through mapping into
    the GTT domain.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 2d09666..f1b0376 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -3186,7 +3186,7 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
 
 	ptr = bo->map;
 	if (ptr == NULL) {
-		assert(kgem_bo_is_mappable(kgem, bo));
+		assert(kgem_bo_size(bo) <= kgem->aperture_mappable / 2);
 
 		kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
 
commit b478420740d05fa87ddbd92042b1f7f2d002f73e
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Apr 13 15:07:13 2012 +0100

    sna: Limit the buffer reuse for mappable uploads to only those with mmaps
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index f0c971e..2d09666 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -2156,6 +2156,9 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
 			//assert(!kgem_busy(kgem, bo->handle));
 			return bo;
 		}
+
+		if (flags & CREATE_EXACT)
+			return NULL;
 	}
 
 	cache = use_active ? active(kgem, num_pages, I915_TILING_NONE) : inactive(kgem, num_pages);
@@ -3683,7 +3686,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 		 * devices like gen2 or with relatively slow gpu like i3.
 		 */
 		old = search_linear_cache(kgem, alloc,
-					  CREATE_INACTIVE | CREATE_GTT_MAP);
+					  CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP);
 #if HAVE_I915_GEM_BUFFER_INFO
 		if (old) {
 			struct drm_i915_gem_buffer_info info;
@@ -3705,7 +3708,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 #endif
 		if (old == NULL)
 			old = search_linear_cache(kgem, NUM_PAGES(size),
-						  CREATE_INACTIVE | CREATE_GTT_MAP);
+						  CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP);
 		if (old) {
 			DBG(("%s: reusing handle=%d for buffer\n",
 			     __FUNCTION__, old->handle));
commit 90e2740e7e459c56205fa65bab1ae3dbfd5d3945
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Apr 13 13:51:57 2012 +0100

    sna: Remove the conflicting assertion during GTT map
    
    Reported-by: Clemens Eisserer <linuxhippy at gmail.com>
    References: https://bugs.freedesktop.org/show_bug.cgi?id=48636
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 14a0067..f0c971e 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -3183,7 +3183,7 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
 
 	ptr = bo->map;
 	if (ptr == NULL) {
-		assert(bytes(bo) <= kgem->aperture_mappable / 4);
+		assert(kgem_bo_is_mappable(kgem, bo));
 
 		kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
 
commit 9e6d55a8d63f10ca6a2b10e44d00c84b07724485
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Apr 13 09:56:39 2012 +0100

    sna: Don't use miSpan code for wide-spans by default, too expensive
    
    Only use the fall-forward miSpans code when it prevents a readback.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index b3507ad..8fcdd14 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -60,7 +60,7 @@
 #define FORCE_FLUSH 0
 
 #define USE_INPLACE 1
-#define USE_WIDE_SPANS 1 /* -1 force CPU, 1 force GPU */
+#define USE_WIDE_SPANS 0 /* -1 force CPU, 1 force GPU */
 #define USE_ZERO_SPANS 1 /* -1 force CPU, 1 force GPU */
 #define USE_SHM_VMAP 0
 #define PREFER_VMAP 0
commit 9becfbbf89f2b170e50f705cabfc7bbf1dcf9846
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Apr 13 09:38:57 2012 +0100

    sna: Draw dashed PolyLines twice, once for the fgPixel, once for the bgPixel
    
    As the fast paths only setup state upfront, we were missing the state
    changes required between dash-on/off. Take advantage of that each pixel
    is only drawn once to batch the state changes and run the
    miZeroDashLines twice.
    
    A future task would be to use a custom line drawing routine...
    
    Fixes regression from ec1267df746512c2e262ef0bd9e9527bc5efe6f4.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 590cc11..b3507ad 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -4275,6 +4275,18 @@ sna_fill_spans__fill(DrawablePtr drawable,
 }
 
 static void
+sna_fill_spans__dash(DrawablePtr drawable,
+		     GCPtr gc, int n,
+		     DDXPointPtr pt, int *width, int sorted)
+{
+	struct sna_fill_spans *data = sna_gc(gc)->priv;
+	struct sna_fill_op *op = data->op;
+
+	if (op->base.u.blt.pixel == gc->fgPixel)
+		sna_fill_spans__fill(drawable, gc, n, pt, width, sorted);
+}
+
+static void
 sna_fill_spans__fill_offset(DrawablePtr drawable,
 			    GCPtr gc, int n,
 			    DDXPointPtr pt, int *width, int sorted)
@@ -4306,6 +4318,18 @@ sna_fill_spans__fill_offset(DrawablePtr drawable,
 }
 
 static void
+sna_fill_spans__dash_offset(DrawablePtr drawable,
+			    GCPtr gc, int n,
+			    DDXPointPtr pt, int *width, int sorted)
+{
+	struct sna_fill_spans *data = sna_gc(gc)->priv;
+	struct sna_fill_op *op = data->op;
+
+	if (op->base.u.blt.pixel == gc->fgPixel)
+		sna_fill_spans__fill_offset(drawable, gc, n, pt, width, sorted);
+}
+
+static void
 sna_fill_spans__fill_clip_extents(DrawablePtr drawable,
 				  GCPtr gc, int n,
 				  DDXPointPtr pt, int *width, int sorted)
@@ -4349,6 +4373,18 @@ sna_fill_spans__fill_clip_extents(DrawablePtr drawable,
 }
 
 static void
+sna_fill_spans__dash_clip_extents(DrawablePtr drawable,
+				  GCPtr gc, int n,
+				  DDXPointPtr pt, int *width, int sorted)
+{
+	struct sna_fill_spans *data = sna_gc(gc)->priv;
+	struct sna_fill_op *op = data->op;
+
+	if (op->base.u.blt.pixel == gc->fgPixel)
+		sna_fill_spans__fill_clip_extents(drawable, gc, n, pt, width, sorted);
+}
+
+static void
 sna_fill_spans__fill_clip_boxes(DrawablePtr drawable,
 				GCPtr gc, int n,
 				DDXPointPtr pt, int *width, int sorted)
@@ -4422,6 +4458,18 @@ sna_fill_spans__fill_clip_boxes(DrawablePtr drawable,
 		op->boxes(data->sna, op, box, b - box);
 }
 
+static void
+sna_fill_spans__dash_clip_boxes(DrawablePtr drawable,
+				GCPtr gc, int n,
+				DDXPointPtr pt, int *width, int sorted)
+{
+	struct sna_fill_spans *data = sna_gc(gc)->priv;
+	struct sna_fill_op *op = data->op;
+
+	if (op->base.u.blt.pixel == gc->fgPixel)
+		sna_fill_spans__fill_clip_boxes(drawable, gc, n, pt, width, sorted);
+}
+
 static Bool
 sna_fill_spans_blt(DrawablePtr drawable,
 		   struct kgem_bo *bo, struct sna_damage **damage,
@@ -6646,44 +6694,78 @@ spans_fallback:
 		get_drawable_deltas(drawable, data.pixmap, &data.dx, &data.dy);
 		sna_gc(gc)->priv = &data;
 
-		if (gc->lineWidth == 0 &&
-		    gc_is_solid(gc, &color)) {
+		if (gc->lineWidth == 0 && gc_is_solid(gc, &color)) {
 			struct sna_fill_op fill;
 
-			if (!sna_fill_init_blt(&fill,
-					       data.sna, data.pixmap,
-					       data.bo, gc->alu, color))
-				goto fallback;
+			if (gc->lineStyle == LineSolid) {
+				if (!sna_fill_init_blt(&fill,
+						       data.sna, data.pixmap,
+						       data.bo, gc->alu, color))
+					goto fallback;
 
-			data.op = &fill;
+				data.op = &fill;
 
-			if ((data.flags & 2) == 0) {
-				if (data.dx | data.dy)
-					sna_gc_ops__tmp.FillSpans = sna_fill_spans__fill_offset;
-				else
-					sna_gc_ops__tmp.FillSpans = sna_fill_spans__fill;
-			} else {
-				region_maybe_clip(&data.region,
-						  gc->pCompositeClip);
-				if (!RegionNotEmpty(&data.region))
-					return;
+				if ((data.flags & 2) == 0) {
+					if (data.dx | data.dy)
+						sna_gc_ops__tmp.FillSpans = sna_fill_spans__fill_offset;
+					else
+						sna_gc_ops__tmp.FillSpans = sna_fill_spans__fill;
+				} else {
+					region_maybe_clip(&data.region,
+							  gc->pCompositeClip);
+					if (!RegionNotEmpty(&data.region))
+						return;
 
-				if (region_is_singular(&data.region))
-					sna_gc_ops__tmp.FillSpans = sna_fill_spans__fill_clip_extents;
-				else
-					sna_gc_ops__tmp.FillSpans = sna_fill_spans__fill_clip_boxes;
-			}
-			assert(gc->miTranslate);
+					if (region_is_singular(&data.region))
+						sna_gc_ops__tmp.FillSpans = sna_fill_spans__fill_clip_extents;
+					else
+						sna_gc_ops__tmp.FillSpans = sna_fill_spans__fill_clip_boxes;
+				}
+				assert(gc->miTranslate);
 
-			gc->ops = &sna_gc_ops__tmp;
-			if (gc->lineStyle == LineSolid) {
+				gc->ops = &sna_gc_ops__tmp;
 				DBG(("%s: miZeroLine (solid fill)\n", __FUNCTION__));
 				miZeroLine(drawable, gc, mode, n, pt);
+				fill.done(data.sna, &fill);
 			} else {
-				DBG(("%s: miZeroDashLine (solid fill)\n", __FUNCTION__));
+				data.op = &fill;
+
+				if ((data.flags & 2) == 0) {
+					if (data.dx | data.dy)
+						sna_gc_ops__tmp.FillSpans = sna_fill_spans__dash_offset;
+					else
+						sna_gc_ops__tmp.FillSpans = sna_fill_spans__dash;
+				} else {
+					region_maybe_clip(&data.region,
+							  gc->pCompositeClip);
+					if (!RegionNotEmpty(&data.region))
+						return;
+
+					if (region_is_singular(&data.region))
+						sna_gc_ops__tmp.FillSpans = sna_fill_spans__dash_clip_extents;
+					else
+						sna_gc_ops__tmp.FillSpans = sna_fill_spans__dash_clip_boxes;
+				}
+				assert(gc->miTranslate);
+
+				gc->ops = &sna_gc_ops__tmp;
+				DBG(("%s: miZeroLine (solid dash)\n", __FUNCTION__));
+				if (!sna_fill_init_blt(&fill,
+						       data.sna, data.pixmap,
+						       data.bo, gc->alu, color))
+					goto fallback;
+
 				miZeroDashLine(drawable, gc, mode, n, pt);
+				fill.done(data.sna, &fill);
+
+				if (sna_fill_init_blt(&fill,
+						       data.sna, data.pixmap,
+						       data.bo, gc->alu,
+						       gc->bgPixel)) {
+					miZeroDashLine(drawable, gc, mode, n, pt);
+					fill.done(data.sna, &fill);
+				}
 			}
-			fill.done(data.sna, &fill);
 		} else {
 			/* Note that the WideDash functions alternate
 			 * between filling using fgPixel and bgPixel
commit e269ed5d4e1b9d758aeb9a85ed0fa631f0aff0b1
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Apr 12 22:46:22 2012 +0100

    sna: Restore CPU domain for vmapped buffers when reusing
    
    For a vmapped upload buffer, we need to notify the kernel (and thereby
    the GPU) to invalidate the sampler and flush its caches when we reuse an
    idle buffer.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 6ea4d48..14a0067 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -3600,6 +3600,10 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 			offset = 0;
 			bo->used = size;
 			list_move(&bo->base.list, &kgem->active_partials);
+
+			if (bo->base.vmap)
+				kgem_bo_sync__cpu(kgem, &bo->base);
+
 			goto done;
 		} while (kgem_retire(kgem));
 	}
commit d29b8650c40c673e6ddddaf52db9247e9836cba8
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Apr 12 22:23:12 2012 +0100

    sna: Revert use of mmap64()
    
    As this just causes mayhem on a 64-bit platform. Doomed if you, doomed
    if you don't. :(
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 109f3b7..6ea4d48 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -220,7 +220,7 @@ static void *gem_mmap(int fd, uint32_t handle, int size, int prot)
 		return NULL;
 	}
 
-	ptr = mmap64(0, size, prot, MAP_SHARED, fd, mmap_arg.offset);
+	ptr = mmap(0, size, prot, MAP_SHARED, fd, mmap_arg.offset);
 	if (ptr == MAP_FAILED) {
 		assert(0);
 		ptr = NULL;
commit a78b1d71a39ae29d5f85bd82c09202ebec3e6539
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Apr 12 15:37:25 2012 +0100

    sna: Declare AC_SYS_LARGEFILE for mmap64
    
    In order to use the full 32-bits of mmap address space on small
    platforms we need to use mmap64().
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/configure.ac b/configure.ac
index b5919bf..5124100 100644
--- a/configure.ac
+++ b/configure.ac
@@ -52,6 +52,7 @@ m4_ifndef([XORG_DRIVER_CHECK_EXT],
 # Initialize libtool
 AC_DISABLE_STATIC
 AC_PROG_LIBTOOL
+AC_SYS_LARGEFILE
 
 # Are we in a git checkout?
 dot_git=no
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 6ea4d48..109f3b7 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -220,7 +220,7 @@ static void *gem_mmap(int fd, uint32_t handle, int size, int prot)
 		return NULL;
 	}
 
-	ptr = mmap(0, size, prot, MAP_SHARED, fd, mmap_arg.offset);
+	ptr = mmap64(0, size, prot, MAP_SHARED, fd, mmap_arg.offset);
 	if (ptr == MAP_FAILED) {
 		assert(0);
 		ptr = NULL;
commit 09deba927daa96be6230b1c3e1b425622512d8a2
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Apr 10 20:59:36 2012 +0100

    sna: Check ioctl return from set-domain
    
    Let's not assume it succeeds and so avoid altering our bookkeeping along
    failure paths.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 2a8b3cd..6ea4d48 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -3213,10 +3213,10 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
 		set_domain.handle = bo->handle;
 		set_domain.read_domains = I915_GEM_DOMAIN_GTT;
 		set_domain.write_domain = I915_GEM_DOMAIN_GTT;
-		drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
-
-		kgem_bo_retire(kgem, bo);
-		bo->domain = DOMAIN_GTT;
+		if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) {
+			kgem_bo_retire(kgem, bo);
+			bo->domain = DOMAIN_GTT;
+		}
 	}
 
 	return ptr;
@@ -3409,10 +3409,10 @@ void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo)
 		set_domain.read_domains = I915_GEM_DOMAIN_CPU;
 		set_domain.write_domain = I915_GEM_DOMAIN_CPU;
 
-		drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
-
-		kgem_bo_retire(kgem, bo);
-		bo->domain = DOMAIN_CPU;
+		if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) {
+			kgem_bo_retire(kgem, bo);
+			bo->domain = DOMAIN_CPU;
+		}
 	}
 }
 
@@ -4033,11 +4033,15 @@ void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo)
 		set_domain.read_domains =
 			IS_CPU_MAP(bo->base.map) ? I915_GEM_DOMAIN_CPU : I915_GEM_DOMAIN_GTT;
 
-		drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
+		if (drmIoctl(kgem->fd,
+			     DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain))
+			return;
 	} else {
-		gem_read(kgem->fd,
-			 bo->base.handle, (char *)bo->mem+offset,
-			 offset, length);
+		if (gem_read(kgem->fd,
+			     bo->base.handle, (char *)bo->mem+offset,
+			     offset, length))
+			return;
+
 		kgem_bo_map__cpu(kgem, &bo->base);
 	}
 	kgem_bo_retire(kgem, &bo->base);
commit 0b12f1d8e4d0a4fafac9553f144535efc4ebe0be
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Apr 10 15:23:54 2012 +0100

    sna: Fix shadowed variable
    
    sna_accel.c: In function 'sna_pixmap_move_area_to_gpu':
    sna_accel.c:1751:12: warning: declaration of 'flags' shadows a parameter
    [-Wshadow]
    sna_accel.c:1731:72: warning: shadowed declaration is here [-Wshadow]
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index b6adc60..590cc11 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1748,20 +1748,20 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, BoxPtr box, unsigned int flags)
 	}
 
 	if (priv->gpu_bo == NULL) {
-		unsigned flags;
+		unsigned create;
 
-		flags = 0;
+		create = 0;
 		if (priv->cpu_damage)
-			flags |= CREATE_INACTIVE;
+			create |= CREATE_INACTIVE;
 		if (pixmap->usage_hint == SNA_CREATE_FB)
-			flags |= CREATE_EXACT | CREATE_SCANOUT;
+			create |= CREATE_EXACT | CREATE_SCANOUT;
 
 		priv->gpu_bo = kgem_create_2d(&sna->kgem,
 					      pixmap->drawable.width,
 					      pixmap->drawable.height,
 					      pixmap->drawable.bitsPerPixel,
 					      sna_pixmap_choose_tiling(pixmap),
-					      flags);
+					      create);
 		if (priv->gpu_bo == NULL)
 			return false;
 
commit 755a7107aed268d87c5cc0feb1ba388b0cb7fc59
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Apr 10 15:19:19 2012 +0100

    sna: Fix typo and use the right pointer for kgem_bo_destroy
    
    Useless warnings in xorg headers ftl.
    
    References: https://bugs.freedesktop.org/show_bug.cgi?id=48400
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index ab2997d..b6adc60 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1934,7 +1934,8 @@ sna_drawable_use_bo(DrawablePtr drawable,
 
 	if (priv->gpu_bo && priv->gpu_bo->proxy) {
 		assert(priv->gpu_bo->proxy->rq);
-		kgem_bo_destroy(to_sna_from_pixmap(pixmap), priv->gpu_bo);
+		kgem_bo_destroy(&to_sna_from_pixmap(pixmap)->kgem,
+				priv->gpu_bo);
 		priv->gpu_bo = NULL;
 		goto use_cpu_bo;
 	}
@@ -2727,7 +2728,7 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 	if (priv->gpu_bo && priv->gpu_bo->proxy) {
 		DBG(("%s: discarding cached upload buffer\n", __FUNCTION__));
 		assert(priv->gpu_bo->proxy->rq);
-		kgem_bo_destroy(sna, priv->gpu_bo);
+		kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
 		priv->gpu_bo = NULL;
 	}
 
commit c8502e350cb18f6f5d821d237ffcee453f347eba
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Apr 10 14:43:28 2012 +0100

    sna: Add missing alloc failure check for creating tile source
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 083cd3c..ab2997d 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -8767,6 +8767,9 @@ sna_pixmap_get_source_bo(PixmapPtr pixmap)
 					       pixmap->drawable.bitsPerPixel,
 					       KGEM_BUFFER_WRITE_INPLACE,
 					       &ptr);
+		if (upload == NULL)
+			return NULL;
+
 		memcpy_blt(pixmap->devPrivate.ptr, ptr,
 			   pixmap->drawable.bitsPerPixel,
 			   pixmap->devKind, upload->pitch,
commit 9cc6f7ccc55cc11f47b3b7d626c9f5a7c1327d57
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Apr 10 12:06:56 2012 +0100

    sna: Release the freed bo cache upon expire
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 581e3c8..2a8b3cd 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1968,6 +1968,12 @@ bool kgem_expire_cache(struct kgem *kgem)
 	bool idle;
 	unsigned int i;
 
+	while (__kgem_freed_bo) {
+		bo = __kgem_freed_bo;
+		__kgem_freed_bo = *(struct kgem_bo **)bo;
+		free(bo);
+	}
+
 	kgem_retire(kgem);
 	if (kgem->wedged)
 		kgem_cleanup(kgem);
commit 102d11906a672140bac099e7bd1b35345d13a2fc
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Apr 9 22:54:51 2012 +0100

    sna: Check for an inactive partial buffer to reuse after retiring requests
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 470cefb..581e3c8 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -3575,7 +3575,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 	}
 
 	if (flags & KGEM_BUFFER_WRITE) {
-		list_for_each_entry_reverse(bo, &kgem->inactive_partials, base.list) {
+		do list_for_each_entry_reverse(bo, &kgem->inactive_partials, base.list) {
 			assert(bo->base.io);
 			assert(bo->base.refcnt == 1);
 
@@ -3595,7 +3595,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 			bo->used = size;
 			list_move(&bo->base.list, &kgem->active_partials);
 			goto done;
-		}
+		} while (kgem_retire(kgem));
 	}
 
 #if !DBG_NO_MAP_UPLOAD
commit e2fb2421d8f5c07925d1699673aa4b1dd6c6b22c
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Apr 9 22:54:15 2012 +0100

    sna: Release partial buffers during cache expiration
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 6dd1871..470cefb 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1925,6 +1925,7 @@ void kgem_throttle(struct kgem *kgem)
 
 static void kgem_expire_partial(struct kgem *kgem)
 {
+	kgem_retire_partials(kgem);
 	while (!list_is_empty(&kgem->inactive_partials)) {
 		struct kgem_partial_bo *bo =
 			list_first_entry(&kgem->inactive_partials,
commit 333fdcad8677675a4758223c1a980c90d970ee42
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Apr 9 22:53:31 2012 +0100

    sna: Repeat expire whilst there remaining outstanding requests
    
    Do not allow the cache expiration to finish if we are still running
    requests.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index dde9d1d..6dd1871 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1979,7 +1979,7 @@ bool kgem_expire_cache(struct kgem *kgem)
 	time(&now);
 	expire = 0;
 
-	idle = true;
+	idle = !kgem->need_retire;
 	for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
 		idle &= list_is_empty(&kgem->inactive[i]);
 		list_for_each_entry(bo, &kgem->inactive[i], list) {
@@ -1999,7 +1999,7 @@ bool kgem_expire_cache(struct kgem *kgem)
 	if (expire == 0)
 		return true;
 
-	idle = true;
+	idle = !kgem->need_retire;
 	for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
 		struct list preserve;
 
commit 778232e3d2fb5340a3092014801dc00a56c56d42
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Apr 10 11:14:50 2012 +0100

    sna: Only move the bo into the read domain for readback
    
    And mark it as currently in no domain afterwards, so that if we reuse
    it, it will be appropriately moved later.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index c8be7c8..dde9d1d 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -3997,7 +3997,6 @@ void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo)
 {
 	struct kgem_partial_bo *bo;
 	uint32_t offset = _bo->delta, length = _bo->size.bytes;
-	int domain;
 
 	assert(_bo->io);
 	assert(_bo->exec == &_kgem_dummy_exec);
@@ -4023,15 +4022,9 @@ void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo)
 
 		VG_CLEAR(set_domain);
 		set_domain.handle = bo->base.handle;
-		if (IS_CPU_MAP(bo->base.map)) {
-			set_domain.read_domains = I915_GEM_DOMAIN_CPU;
-			set_domain.write_domain = I915_GEM_DOMAIN_CPU;
-			domain = DOMAIN_CPU;
-		} else {
-			set_domain.read_domains = I915_GEM_DOMAIN_GTT;
-			set_domain.write_domain = I915_GEM_DOMAIN_GTT;
-			domain = DOMAIN_GTT;
-		}
+		set_domain.write_domain = 0;
+		set_domain.read_domains =
+			IS_CPU_MAP(bo->base.map) ? I915_GEM_DOMAIN_CPU : I915_GEM_DOMAIN_GTT;
 
 		drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
 	} else {
@@ -4039,11 +4032,9 @@ void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo)
 			 bo->base.handle, (char *)bo->mem+offset,
 			 offset, length);
 		kgem_bo_map__cpu(kgem, &bo->base);
-		domain = DOMAIN_NONE;
 	}
-	kgem_retire(kgem);
-	assert(bo->base.rq == NULL);
-	bo->base.domain = domain;
+	kgem_bo_retire(kgem, &bo->base);
+	bo->base.domain = DOMAIN_NONE;
 }
 
 uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format)
commit dd093eafb9b94b8e4cd8853d74078c3aa7e72f57
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Apr 9 14:09:42 2012 +0100

    sna: Add assertions around proxy list handling
    
    References: https://bugs.freedesktop.org/show_bug.cgi?id=48400
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index f23bf0c..c8be7c8 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -320,6 +320,7 @@ static void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo)
 	if (bo->exec == NULL) {
 		DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d\n",
 		     __FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL));
+		assert(list_is_empty(&bo->vma));
 		bo->rq = NULL;
 		list_del(&bo->request);
 		bo->needs_flush = bo->flush;
@@ -1040,6 +1041,7 @@ inline static void kgem_bo_move_to_inactive(struct kgem *kgem,
 	assert(bo->rq == NULL);
 	assert(bo->domain != DOMAIN_GPU);
 	assert(bo->reusable);
+	assert(list_is_empty(&bo->vma));
 
 	if (bucket(bo) >= NUM_CACHE_BUCKETS) {
 		kgem_bo_free(kgem, bo);
@@ -1051,12 +1053,11 @@ inline static void kgem_bo_move_to_inactive(struct kgem *kgem,
 		int type = IS_CPU_MAP(bo->map);
 		if (bucket(bo) >= NUM_CACHE_BUCKETS ||
 		    (!type && !kgem_bo_is_mappable(kgem, bo))) {
-			list_del(&bo->vma);
 			munmap(MAP(bo->map), bytes(bo));
 			bo->map = NULL;
 		}
 		if (bo->map) {
-			list_move(&bo->vma, &kgem->vma[type].inactive[bucket(bo)]);
+			list_add(&bo->vma, &kgem->vma[type].inactive[bucket(bo)]);
 			kgem->vma[type].count++;
 		}
 	}
commit 7f0bede3e7e3f92a637d1c886304b16afc0e34f2
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Apr 9 10:48:08 2012 +0100

    sna/traps: Use a temporary variable for the write pointer
    
    To avoid accumulating the write offset for wide spans, we need to reset
    the destination pointer between spans.
    
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=48332
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index f7a2111..120e755 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -1476,55 +1476,61 @@ inplace_row(struct active_list *active, uint8_t *row, int width)
 		} else
 			FAST_SAMPLES_X_TO_INT_FRAC(right->x.quo, rix, rfx);
 		if (lix == rix) {
-			if (rfx != lfx)
+			if (rfx != lfx) {
+				assert(lix < width);
 				row[lix] += (rfx-lfx) * 256 / FAST_SAMPLES_X;
+			}
 		} else {
+			assert(lix < width);
 			if (lfx == 0)
 				row[lix] = 0xff;
 			else
 				row[lix] += 256 - lfx * 256 / FAST_SAMPLES_X;
 
-			if (rfx)
+			assert(rix <= width);
+			if (rfx) {
+				assert(rix < width);
 				row[rix] += rfx * 256 / FAST_SAMPLES_X;
+			}
 
 			if (rix > ++lix) {
+				uint8_t *r = row + lix;
 				rix -= lix;
-				row += lix;
 #if 0
 				if (rix == 1)
 					*row = 0xff;
 				else
 					memset(row, 0xff, rix);
 #else
-				if ((uintptr_t)row & 1 && rix) {
-					*row++ = 0xff;
+				if ((uintptr_t)r & 1 && rix) {
+					*r++ = 0xff;
 					rix--;
 				}
-				if ((uintptr_t)row & 2 && rix >= 2) {
-					*(uint16_t *)row = 0xffff;
-					row += 2;
+				if ((uintptr_t)r & 2 && rix >= 2) {
+					*(uint16_t *)r = 0xffff;
+					r += 2;
 					rix -= 2;
 				}
-				if ((uintptr_t)row & 4 && rix >= 4) {
-					*(uint32_t *)row = 0xffffffff;
-					row += 4;
+				if ((uintptr_t)r & 4 && rix >= 4) {
+					*(uint32_t *)r = 0xffffffff;
+					r += 4;
 					rix -= 4;
 				}
 				while (rix >= 8) {
-					*(uint64_t *)row = 0xffffffffffffffff;
-					row += 8;
+					*(uint64_t *)r = 0xffffffffffffffff;
+					r += 8;
 					rix -= 8;
 				}
 				if (rix & 4) {
-					*(uint32_t *)row = 0xffffffff;
-					row += 4;
+					*(uint32_t *)r = 0xffffffff;
+					r += 4;
 				}
 				if (rix & 2) {
-					*(uint16_t *)row = 0xffff;
-					row += 2;
+					*(uint16_t *)r = 0xffff;
+					r += 2;
 				}
 				if (rix & 1)
-					*row = 0xff;
+					*r = 0xff;
 #endif
 			}
 		}
commit 2e4da00e3e03b873f5cad0cc5b1f6cc791852ca5
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Apr 9 10:42:18 2012 +0100

    sna/traps: Assert that the inplace row is contained before writing
    
    References: https://bugs.freedesktop.org/show_bug.cgi?id=48332
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index dbf581e..f7a2111 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -1786,6 +1786,7 @@ tor_inplace(struct tor *converter, PixmapPtr scratch, int mono, uint8_t *buf)
 
 				inplace_subrow(active, ptr, width, &min, &max);
 			}
+			assert(min >= 0 && max <= width);
 			memset(row, 0, min);
 			if (max > min)
 				inplace_end_subrows(active, row+min, (int8_t*)ptr+min, max-min);
commit 0464e93a088a9e8bc29ad8b36b6e12c3dda32ec6
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Apr 8 17:16:03 2012 +0100

    sna: Add some assertions for misuse of proxies
    
    References: https://bugs.freedesktop.org/show_bug.cgi?id=48400
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index de02e9d..f23bf0c 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -332,6 +332,7 @@ Bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
 	assert(bo->refcnt);
 	assert(!bo->purged);
 	assert(!kgem_busy(kgem, bo->handle));
+	assert(bo->proxy == NULL);
 
 	assert(length <= bytes(bo));
 	if (gem_write(kgem->fd, bo->handle, 0, length, data))
@@ -3156,6 +3157,7 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
 	     bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain));
 
 	assert(!bo->purged);
+	assert(bo->proxy == NULL);
 	assert(bo->exec == NULL);
 	assert(list_is_empty(&bo->list));
 
@@ -3385,6 +3387,7 @@ struct kgem_bo *kgem_create_map(struct kgem *kgem,
 
 void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo)
 {
+	assert(bo->proxy == NULL);
 	kgem_bo_submit(kgem, bo);
 
 	if (bo->domain != DOMAIN_CPU) {
@@ -4037,7 +4040,8 @@ void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo)
 		kgem_bo_map__cpu(kgem, &bo->base);
 		domain = DOMAIN_NONE;
 	}
-	kgem_bo_retire(kgem, &bo->base);
+	kgem_retire(kgem);
+	assert(bo->base.rq == NULL);
 	bo->base.domain = domain;
 }
 
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 7425a51..083cd3c 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1933,6 +1933,7 @@ sna_drawable_use_bo(DrawablePtr drawable,
 	}
 
 	if (priv->gpu_bo && priv->gpu_bo->proxy) {
+		assert(priv->gpu_bo->proxy->rq);
 		kgem_bo_destroy(to_sna_from_pixmap(pixmap), priv->gpu_bo);
 		priv->gpu_bo = NULL;
 		goto use_cpu_bo;
@@ -2725,6 +2726,7 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 
 	if (priv->gpu_bo && priv->gpu_bo->proxy) {
 		DBG(("%s: discarding cached upload buffer\n", __FUNCTION__));
+		assert(priv->gpu_bo->proxy->rq);
 		kgem_bo_destroy(sna, priv->gpu_bo);
 		priv->gpu_bo = NULL;
 	}
@@ -3520,6 +3522,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 
 	if (dst_priv->gpu_bo && dst_priv->gpu_bo->proxy) {
 		DBG(("%s: discarding cached upload\n", __FUNCTION__));
+		assert(dst_priv->gpu_bo->proxy->rq);
 		kgem_bo_destroy(&sna->kgem, dst_priv->gpu_bo);
 		dst_priv->gpu_bo = NULL;
 	}
commit 479cb6ba71038fe44f66fb31fad90d0d454fea7a
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Apr 8 14:25:49 2012 +0100

    sna: Compress adjoining spans during FillSpans
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 8f02380..7425a51 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -4255,8 +4255,15 @@ sna_fill_spans__fill(DrawablePtr drawable,
 			b->y2 = b->y1 + 1;
 			DBG(("%s: (%d, %d), (%d, %d)\n",
 			     __FUNCTION__, b->x1, b->y1, b->x2, b->y2));
-			if (b->x2 > b->x1)
-				b++;
+			if (b->x2 > b->x1) {
+				if (b != box &&
+				    b->y1 == b[-1].y2 &&
+				    b->x1 == b[-1].x1 &&
+				    b->x2 == b[-1].x2)
+					b[-1].y2 = b->y2;
+				else
+					b++;
+			}
 		} while (--nbox);
 		if (b != box)
 			op->boxes(data->sna, op, box, b - box);
@@ -4322,7 +4329,12 @@ sna_fill_spans__fill_clip_extents(DrawablePtr drawable,
 				b->x1 += data->dx; b->x2 += data->dx;
 				b->y1 += data->dy; b->y2 += data->dy;
 			}
-			if (++b == last_box) {
+			if (b != box &&
+			    b->y1 == b[-1].y2 &&
+			    b->x1 == b[-1].x1 &&
+			    b->x2 == b[-1].x2) {
+				b[-1].y2 = b->y2;
+			} else if (++b == last_box) {
 				op->boxes(data->sna, op, box, last_box - box);
 				b = box;
 			}
commit c5c01c13badeb7c2ead0c848b746d8d474277a77
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Apr 8 13:51:13 2012 +0100

    sna: Remove the duplicated check for use-bo? in PolySegments
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 61710ad..8f02380 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -7458,6 +7458,13 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
 	     data.flags & 4));
 	if (!PM_IS_SOLID(drawable, gc->planemask))
 		goto fallback;
+
+	data.bo = sna_drawable_use_bo(drawable,
+				      &data.region.extents,
+				      &data.damage);
+	if (data.bo == NULL)
+		goto fallback;
+
 	if (gc->lineStyle != LineSolid || gc->lineWidth > 1)
 		goto spans_fallback;
 	if (gc_is_solid(gc, &color)) {
@@ -7465,10 +7472,7 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
 		     __FUNCTION__, (unsigned)color, data.flags));
 
 		if (data.flags & 4) {
-			if ((data.bo = sna_drawable_use_bo(drawable,
-							   &data.region.extents,
-							   &data.damage)) &&
-			    sna_poly_segment_blt(drawable,
+			if (sna_poly_segment_blt(drawable,
 						 data.bo, data.damage,
 						 gc, color, n, seg,
 						 &data.region.extents,
@@ -7476,9 +7480,6 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
 				return;
 		} else {
 			if (use_zero_spans(drawable, gc, &data.region.extents) &&
-			    (data.bo = sna_drawable_use_bo(drawable,
-							   &data.region.extents,
-							   &data.damage)) &&
 			    sna_poly_zero_segment_blt(drawable,
 						      data.bo, data.damage,
 						      gc, n, seg,
@@ -7488,70 +7489,67 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
 		}
 	} else if (data.flags & 4) {
 		/* Try converting these to a set of rectangles instead */
-		if ((data.bo = sna_drawable_use_bo(drawable, &data.region.extents, &data.damage))) {
-			xRectangle *rect;
-			int i;
-
-			DBG(("%s: converting to rectagnles\n", __FUNCTION__));
+		xRectangle *rect;
+		int i;
 
-			rect = malloc (n * sizeof (xRectangle));
-			if (rect == NULL)
-				return;
+		DBG(("%s: converting to rectagnles\n", __FUNCTION__));
 
-			for (i = 0; i < n; i++) {
-				if (seg[i].x1 < seg[i].x2) {
-					rect[i].x = seg[i].x1;
-					rect[i].width = seg[i].x2 - seg[i].x1 + 1;
-				} else if (seg[i].x1 > seg[i].x2) {
-					rect[i].x = seg[i].x2;
-					rect[i].width = seg[i].x1 - seg[i].x2 + 1;
-				} else {
-					rect[i].x = seg[i].x1;
-					rect[i].width = 1;
-				}
-				if (seg[i].y1 < seg[i].y2) {
-					rect[i].y = seg[i].y1;
-					rect[i].height = seg[i].y2 - seg[i].y1 + 1;
-				} else if (seg[i].x1 > seg[i].y2) {
-					rect[i].y = seg[i].y2;
-					rect[i].height = seg[i].y1 - seg[i].y2 + 1;
-				} else {
-					rect[i].y = seg[i].y1;
-					rect[i].height = 1;
-				}
+		rect = malloc (n * sizeof (xRectangle));
+		if (rect == NULL)
+			return;
 
-				/* don't paint last pixel */
-				if (gc->capStyle == CapNotLast) {
-					if (seg[i].x1 == seg[i].x2)
-						rect[i].height--;
-					else
-						rect[i].width--;
-				}
+		for (i = 0; i < n; i++) {
+			if (seg[i].x1 < seg[i].x2) {
+				rect[i].x = seg[i].x1;
+				rect[i].width = seg[i].x2 - seg[i].x1 + 1;
+			} else if (seg[i].x1 > seg[i].x2) {
+				rect[i].x = seg[i].x2;
+				rect[i].width = seg[i].x1 - seg[i].x2 + 1;
+			} else {
+				rect[i].x = seg[i].x1;
+				rect[i].width = 1;
 			}
-
-			if (gc->fillStyle == FillTiled) {
-				i = sna_poly_fill_rect_tiled_blt(drawable,
-								 data.bo, data.damage,
-								 gc, n, rect,
-								 &data.region.extents,
-								 data.flags & 2);
+			if (seg[i].y1 < seg[i].y2) {
+				rect[i].y = seg[i].y1;
+				rect[i].height = seg[i].y2 - seg[i].y1 + 1;
+			} else if (seg[i].x1 > seg[i].y2) {
+				rect[i].y = seg[i].y2;
+				rect[i].height = seg[i].y1 - seg[i].y2 + 1;
 			} else {
-				i = sna_poly_fill_rect_stippled_blt(drawable,
-								    data.bo, data.damage,
-								    gc, n, rect,
-								    &data.region.extents,
-								    data.flags & 2);
+				rect[i].y = seg[i].y1;
+				rect[i].height = 1;
 			}
-			free (rect);
 
-			if (i)
-				return;
+			/* don't paint last pixel */
+			if (gc->capStyle == CapNotLast) {
+				if (seg[i].x1 == seg[i].x2)
+					rect[i].height--;
+				else
+					rect[i].width--;
+			}
+		}
+
+		if (gc->fillStyle == FillTiled) {
+			i = sna_poly_fill_rect_tiled_blt(drawable,
+							 data.bo, data.damage,
+							 gc, n, rect,
+							 &data.region.extents,
+							 data.flags & 2);
+		} else {
+			i = sna_poly_fill_rect_stippled_blt(drawable,
+							    data.bo, data.damage,
+							    gc, n, rect,
+							    &data.region.extents,
+							    data.flags & 2);
 		}
+		free (rect);
+
+		if (i)
+			return;
 	}
 
 spans_fallback:
-	if (use_wide_spans(drawable, gc, &data.region.extents) &&
-	    (data.bo = sna_drawable_use_bo(drawable, &data.region.extents, &data.damage))) {
+	if (use_wide_spans(drawable, gc, &data.region.extents)) {
 		void (*line)(DrawablePtr, GCPtr, int, int, DDXPointPtr);
 		int i;
 
commit f5deea4f60433ee2b0c2d02fba682fff8b7829e8
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Apr 8 13:39:47 2012 +0100

    sna: Correct partial-write flag for PolySegments fallback
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 6afd582..61710ad 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -7642,7 +7642,7 @@ fallback:
 		goto out;
 	if (!sna_drawable_move_region_to_cpu(drawable, &data.region,
 					     drawable_gc_flags(drawable, gc,
-							       !(data.flags & 4 && n > 1))))
+							       !(data.flags & 4 && n == 1))))
 		goto out;
 
 	/* Install FillSpans in case we hit a fallback path in fbPolySegment */
commit c3d7f4c1cf7d052163b7c4e74bb202c618f0eb76
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Apr 8 13:18:56 2012 +0100

    sna/gen3: Reset accumulated constants for each composite
    
    In particular the glyph routines require the composite setup to
    reinitialise state between glyph runs. This affects anything trying to
    use glyphs without a mask with a gradient source, for example.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 529884c..ed2eaf1 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -2437,7 +2437,7 @@ try_blt(struct sna *sna,
 
 static void
 gen3_align_vertex(struct sna *sna,
-		  struct sna_composite_op *op)
+		  const struct sna_composite_op *op)
 {
 	if (op->floats_per_vertex != sna->render_state.gen3.last_floats_per_vertex) {
 		if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
@@ -2450,6 +2450,7 @@ gen3_align_vertex(struct sna *sna,
 		     (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
 		sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
 		sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
+		assert(sna->render.vertex_used < sna->render.vertex_size - op->floats_per_rect);
 		sna->render_state.gen3.last_floats_per_vertex = op->floats_per_vertex;
 	}
 }
@@ -2824,6 +2825,7 @@ gen3_render_composite(struct sna *sna,
 			return FALSE;
 	}
 
+	tmp->u.gen3.num_constants = 0;
 	tmp->src.u.gen3.type = SHADER_TEXTURE;
 	tmp->src.is_affine = TRUE;
 	DBG(("%s: preparing source\n", __FUNCTION__));
commit 701473d20485a0557b4fb36efcbfbb8656e2f619
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Apr 8 10:09:42 2012 +0100

    sna: Release cached upload buffers when reusing a write buffer for readback
    
    References: https://bugs.freedesktop.org/show_bug.cgi?id=48400
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 27fa165..de02e9d 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1234,6 +1234,24 @@ static void bubble_sort_partial(struct list *head, struct kgem_partial_bo *bo)
 	}
 }
 
+static void kgem_partial_buffer_release(struct kgem *kgem,
+					struct kgem_partial_bo *bo)
+{
+	while (!list_is_empty(&bo->base.vma)) {
+		struct kgem_bo *cached;
+
+		cached = list_first_entry(&bo->base.vma, struct kgem_bo, vma);
+		assert(cached->proxy == &bo->base);
+		list_del(&cached->vma);
+
+		assert(*(struct kgem_bo **)cached->map == cached);
+		*(struct kgem_bo **)cached->map = NULL;
+		cached->map = NULL;
+
+		kgem_bo_destroy(kgem, cached);
+	}
+}
+
 static void kgem_retire_partials(struct kgem *kgem)
 {
 	struct kgem_partial_bo *bo, *next;
@@ -1247,19 +1265,7 @@ static void kgem_retire_partials(struct kgem *kgem)
 
 		DBG(("%s: releasing upload cache for handle=%d? %d\n",
 		     __FUNCTION__, bo->base.handle, !list_is_empty(&bo->base.vma)));
-		while (!list_is_empty(&bo->base.vma)) {
-			struct kgem_bo *cached;
-
-			cached = list_first_entry(&bo->base.vma, struct kgem_bo, vma);
-			assert(cached->proxy == &bo->base);
-			list_del(&cached->vma);
-
-			assert(*(struct kgem_bo **)cached->map == cached);
-			*(struct kgem_bo **)cached->map = NULL;
-			cached->map = NULL;
-
-			kgem_bo_destroy(kgem, cached);
-		}
+		kgem_partial_buffer_release(kgem, bo);
 
 		assert(bo->base.refcnt > 0);
 		if (bo->base.refcnt != 1)
@@ -3519,6 +3525,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 			     __FUNCTION__, size, bo->used, bytes(&bo->base)));
 			gem_write(kgem->fd, bo->base.handle,
 				  0, bo->used, bo->mem);
+			kgem_partial_buffer_release(kgem, bo);
 			bo->need_io = 0;
 			bo->write = 0;
 			offset = 0;
@@ -3991,8 +3998,9 @@ void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo)
 	assert(_bo->io);
 	assert(_bo->exec == &_kgem_dummy_exec);
 	assert(_bo->rq == NULL);
-	if (_bo->proxy)
-		_bo = _bo->proxy;
+	assert(_bo->proxy);
+
+	_bo = _bo->proxy;
 	assert(_bo->exec == NULL);
 
 	bo = (struct kgem_partial_bo *)_bo;
commit 1ecf17b2507f95e1fefea15833fa9f57ec256a2e
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Apr 7 10:01:01 2012 +0100

    sna/gradient: Compute the absolute delta between color stops
    
    Otherwise we do not detect gradients that start from white!
    
    Reported-by: Clemens Eisserer <linuxhippy at gmail.com>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=48407
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_gradient.c b/src/sna/sna_gradient.c
index 943cbf9..32d26c8 100644
--- a/src/sna/sna_gradient.c
+++ b/src/sna/sna_gradient.c
@@ -44,39 +44,48 @@ sna_gradient_sample_width(PictGradient *gradient)
 {
 	int n, width;
 
-	width = 2;
+	width = 0;
 	for (n = 1; n < gradient->nstops; n++) {
 		xFixed dx = gradient->stops[n].x - gradient->stops[n-1].x;
-		uint16_t delta, max;
-		int ramp;
+		int delta, max, ramp;
 
 		if (dx == 0)
 			return 1024;
 
 		max = gradient->stops[n].color.red -
 			gradient->stops[n-1].color.red;
+		if (max < 0)
+			max = -max;
 
 		delta = gradient->stops[n].color.green -
 			gradient->stops[n-1].color.green;
+		if (delta < 0)
+			delta = -delta;
 		if (delta > max)
 			max = delta;
 
 		delta = gradient->stops[n].color.blue -
 			gradient->stops[n-1].color.blue;
+		if (delta < 0)
+			delta = -delta;
 		if (delta > max)
 			max = delta;
 
 		delta = gradient->stops[n].color.alpha -
 			gradient->stops[n-1].color.alpha;
+		if (delta < 0)
+			delta = -delta;
 		if (delta > max)
 			max = delta;
 
-		ramp = 128 * max / dx;
+		ramp = 256 * max / dx;
 		if (ramp > width)
 			width = ramp;
 	}
 
-	width *= gradient->nstops-1;
+	if (width == 0)
+		return 1;
+
 	width = (width + 7) & -8;
 	return min(width, 1024);
 }
commit 4356fae72db3a33935b575edf95c84fbb48072a7
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Apr 6 21:10:50 2012 +0100

    sna/video: Only wait upon the scanout pixmap
    
    Caught by the addition of the assertion.
    
    Reported-by: Jiri Slaby <jirislaby at gmail.com>
    References: https://bugs.freedesktop.org/show_bug.cgi?id=47597
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_video_textured.c b/src/sna/sna_video_textured.c
index b740b6a..4975f55 100644
--- a/src/sna/sna_video_textured.c
+++ b/src/sna/sna_video_textured.c
@@ -287,7 +287,8 @@ sna_video_textured_put_image(ScrnInfoPtr scrn,
 		}
 	}
 
-	if (crtc && video->SyncToVblank != 0)
+	if (crtc && video->SyncToVblank != 0 &&
+	    pixmap == sna->front && !sna->shadow)
 		flush = sna_wait_for_scanline(sna, pixmap, crtc,
 					      &clip->extents);
 
commit b790ba2ec9ead51227d85fc8630bc7505eb7d7b3
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Apr 6 15:38:02 2012 +0100

    sna: Correct the damage offset for redirected rendering
    
    Reported-by: Clemens Eisserer <linuxhippy at gmail.com>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=48385
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_damage.c b/src/sna/sna_damage.c
index 9c646d8..f2d682d 100644
--- a/src/sna/sna_damage.c
+++ b/src/sna/sna_damage.c
@@ -945,8 +945,11 @@ struct sna_damage *_sna_damage_is_all(struct sna_damage *damage,
 {
 	DBG(("%s(%d, %d)%s?\n", __FUNCTION__, width, height,
 	     damage->dirty ? "*" : ""));
-	assert(damage->mode == DAMAGE_ADD);
+	DBG(("%s: (%d, %d), (%d, %d)\n", __FUNCTION__,
+	     damage->extents.x1, damage->extents.y1,
+	     damage->extents.x2, damage->extents.y2));
 
+	assert(damage->mode == DAMAGE_ADD);
 	assert(damage->extents.x1 == 0 &&
 	       damage->extents.y1 == 0 &&
 	       damage->extents.x2 == width &&
@@ -962,10 +965,6 @@ struct sna_damage *_sna_damage_is_all(struct sna_damage *damage,
 		return damage;
 	}
 
-	DBG(("%s: (%d, %d), (%d, %d)\n", __FUNCTION__,
-	     damage->extents.x1, damage->extents.y1,
-	     damage->extents.x2, damage->extents.y2));
-
 	assert(damage->extents.x1 == 0 &&
 	       damage->extents.y1 == 0 &&
 	       damage->extents.x2 == width &&
diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index d774a34..8af80f2 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -1878,8 +1878,7 @@ sna_render_composite_redirect_done(struct sna *sna,
 		}
 		if (t->damage) {
 			sna_damage_combine(t->real_damage, t->damage,
-					   t->box.x1 - op->dst.x,
-					   t->box.y1 - op->dst.y);
+					   -t->box.x1, -t->box.y1);
 			__sna_damage_destroy(t->damage);
 		}
 
commit 0b81bafb802bb86454739ed46cf45571bccef735
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Apr 6 15:14:45 2012 +0100

    sna/glyphs: Prefer a temporary upload mask for large glyph masks
    
    If the required temporary mask is larger than the 3D pipeline can
    handle, just render to a CPU buffer rather than redirect every glyph
    composition.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_glyphs.c b/src/sna/sna_glyphs.c
index 235528c..87371aa 100644
--- a/src/sna/sna_glyphs.c
+++ b/src/sna/sna_glyphs.c
@@ -662,6 +662,13 @@ clear_pixmap(struct sna *sna, PixmapPtr pixmap)
 	return sna->render.clear(sna, pixmap, priv->gpu_bo);
 }
 
+static bool
+too_large(struct sna *sna, int width, int height)
+{
+	return (width > sna->render.max_3d_size ||
+		height > sna->render.max_3d_size);
+}
+
 static Bool
 glyphs_via_mask(struct sna *sna,
 		CARD8 op,
@@ -724,7 +731,8 @@ glyphs_via_mask(struct sna *sna,
 
 	component_alpha = NeedsComponent(format->format);
 	if (!NO_SMALL_MASK &&
-	    (uint32_t)width * height * format->depth < 8 * 4096) {
+	    ((uint32_t)width * height * format->depth < 8 * 4096 ||
+	     too_large(sna, width, height))) {
 		pixman_image_t *mask_image;
 		int s;
 
commit 42a84613e34522af885b4b50d6c68ef77e81ffc3
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Apr 6 14:27:15 2012 +0100

    sna: Relase the upload cache when overwriting with PutImage
    
    Reported-by: Clemens Eisserer <linuxhippy at gmail.com>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=48359
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 0d229cd..529884c 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -2499,6 +2499,7 @@ gen3_composite_set_target(struct sna *sna,
 			kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
 			priv->gpu_bo = bo;
 		}
+		assert(priv->gpu_bo->proxy == NULL);
 
 		op->dst.bo = priv->gpu_bo;
 		op->damage = &priv->gpu_damage;
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index f1d4d00..6afd582 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1032,6 +1032,8 @@ skip_inplace_map:
 		goto done;
 	}
 
+	assert(priv->gpu_bo == NULL || priv->gpu_bo->proxy == NULL);
+
 	if (flags & MOVE_INPLACE_HINT &&
 	    priv->stride && priv->gpu_bo &&
 	    !kgem_bo_map_will_stall(&sna->kgem, priv->gpu_bo) &&
@@ -1772,6 +1774,7 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, BoxPtr box, unsigned int flags)
 			goto done;
 		}
 	}
+	assert(priv->gpu_bo->proxy == NULL);
 
 	if ((flags & MOVE_READ) == 0)
 		sna_damage_subtract_box(&priv->cpu_damage, box);
@@ -1929,6 +1932,12 @@ sna_drawable_use_bo(DrawablePtr drawable,
 		return NULL;
 	}
 
+	if (priv->gpu_bo && priv->gpu_bo->proxy) {
+		kgem_bo_destroy(to_sna_from_pixmap(pixmap), priv->gpu_bo);
+		priv->gpu_bo = NULL;
+		goto use_cpu_bo;
+	}
+
 	if (DAMAGE_IS_ALL(priv->gpu_damage))
 		goto use_gpu_bo;
 
@@ -2320,7 +2329,8 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags)
 		goto done;
 
 	if (priv->gpu_bo->proxy) {
-		assert((flags & MOVE_WRITE) ==0);
+		DBG(("%s: reusing cached upload\n", __FUNCTION__));
+		assert((flags & MOVE_WRITE) == 0);
 		goto done;
 	}
 
@@ -2604,6 +2614,7 @@ sna_put_image_upload_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 		return FALSE;
 
 	assert(priv->gpu_bo);
+	assert(priv->gpu_bo->proxy == NULL);
 
 	if (!priv->pinned && nbox == 1 &&
 	    box->x1 <= 0 && box->y1 <= 0 &&
@@ -2712,6 +2723,12 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 		return true;
 	}
 
+	if (priv->gpu_bo && priv->gpu_bo->proxy) {
+		DBG(("%s: discarding cached upload buffer\n", __FUNCTION__));
+		kgem_bo_destroy(sna, priv->gpu_bo);
+		priv->gpu_bo = NULL;
+	}
+
 	if (priv->cpu_bo) {
 		/* If the GPU is currently accessing the CPU pixmap, then
 		 * we will need to wait for that to finish before we can
@@ -3307,6 +3324,7 @@ sna_self_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 		goto fallback;
 
 	if (priv->gpu_bo) {
+		assert(priv->gpu_bo->proxy == NULL);
 		if (!sna_pixmap_move_to_gpu(pixmap, MOVE_WRITE | MOVE_READ)) {
 			DBG(("%s: fallback - not a pure copy and failed to move dst to GPU\n",
 			     __FUNCTION__));
@@ -3501,6 +3519,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 	}
 
 	if (dst_priv->gpu_bo && dst_priv->gpu_bo->proxy) {
+		DBG(("%s: discarding cached upload\n", __FUNCTION__));
 		kgem_bo_destroy(&sna->kgem, dst_priv->gpu_bo);
 		dst_priv->gpu_bo = NULL;
 	}
commit 999aa210ff87919945c673bdd34bae76ac097681
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Apr 6 09:24:36 2012 +0100

    sna: Use a sentinel value to prevent accessing beyond the end of the y_buckets
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 9000f82..dbf581e 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -650,12 +650,13 @@ polygon_init(struct polygon *polygon,
 			goto bail_no_mem;
 	}
 
-	if (num_buckets > ARRAY_SIZE(polygon->y_buckets_embedded)) {
-		polygon->y_buckets = malloc(num_buckets*sizeof(struct edge *));
+	if (num_buckets >= ARRAY_SIZE(polygon->y_buckets_embedded)) {
+		polygon->y_buckets = malloc((1+num_buckets)*sizeof(struct edge *));
 		if (unlikely(NULL == polygon->y_buckets))
 			goto bail_no_mem;
 	}
 	memset(polygon->y_buckets, 0, num_buckets * sizeof(struct edge *));
+	polygon->y_buckets[num_buckets] = (void *)-1;
 
 	polygon->ymin = ymin;
 	polygon->ymax = ymax;
@@ -1363,7 +1364,7 @@ tor_render(struct sna *sna,
 			if (active->head.next == &active->tail) {
 				active->min_height = INT_MAX;
 				active->is_vertical = 1;
-				for (; j < h && !polygon->y_buckets[j]; j++)
+				for (; !polygon->y_buckets[j]; j++)
 					;
 				__DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n",
 				       __FUNCTION__, i, j));
@@ -1386,8 +1387,7 @@ tor_render(struct sna *sna,
 			assert(active->is_vertical);
 			nonzero_row(active, coverages);
 
-			while (j < h &&
-			       polygon->y_buckets[j] == NULL &&
+			while (polygon->y_buckets[j] == NULL &&
 			       active->min_height >= 2*FAST_SAMPLES_Y)
 			{
 				active->min_height -= FAST_SAMPLES_Y;
@@ -1713,6 +1713,9 @@ tor_inplace(struct tor *converter, PixmapPtr scratch, int mono, uint8_t *buf)
 
 	__DBG(("%s: mono=%d, buf=%d\n", __FUNCTION__, mono, buf));
 	assert(!mono);
+	assert(converter->ymin == 0);
+	assert(converter->xmin == 0);
+	assert(scratch->drawable.depth == 8);
 
 	/* Render each pixel row. */
 	for (i = 0; i < h; i = j) {
@@ -1727,7 +1730,7 @@ tor_inplace(struct tor *converter, PixmapPtr scratch, int mono, uint8_t *buf)
 			if (active->head.next == &active->tail) {
 				active->min_height = INT_MAX;
 				active->is_vertical = 1;
-				for (; j < h && !polygon->y_buckets[j]; j++)
+				for (; !polygon->y_buckets[j]; j++)
 					;
 				__DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n",
 				       __FUNCTION__, i, j));
@@ -1754,8 +1757,7 @@ tor_inplace(struct tor *converter, PixmapPtr scratch, int mono, uint8_t *buf)
 			if (row != ptr)
 				memcpy(row, ptr, width);
 
-			while (j < h &&
-			       polygon->y_buckets[j] == NULL &&
+			while (polygon->y_buckets[j] == NULL &&
 			       active->min_height >= 2*FAST_SAMPLES_Y)
 			{
 				active->min_height -= FAST_SAMPLES_Y;
commit 1ae6328c57eb496072f0d0e27440f5d0901633b0
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Apr 6 09:12:08 2012 +0100

    sna: Remove redundant check from tor_inplace()
    
    We only execute full-steps for vertical edges so we do not need the
    second check.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 2b4b2db..9000f82 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -1383,22 +1383,21 @@ tor_render(struct sna *sna,
 		       active->min_height,
 		       active->is_vertical));
 		if (do_full_step) {
+			assert(active->is_vertical);
 			nonzero_row(active, coverages);
 
-			if (active->is_vertical) {
-				while (j < h &&
-				       polygon->y_buckets[j] == NULL &&
-				       active->min_height >= 2*FAST_SAMPLES_Y)
-				{
-					active->min_height -= FAST_SAMPLES_Y;
-					j++;
-				}
-				if (j != i + 1)
-					step_edges(active, j - (i + 1));
-
-				__DBG(("%s: vertical edges, full step (%d, %d)\n",
-				       __FUNCTION__,  i, j));
+			while (j < h &&
+			       polygon->y_buckets[j] == NULL &&
+			       active->min_height >= 2*FAST_SAMPLES_Y)
+			{
+				active->min_height -= FAST_SAMPLES_Y;
+				j++;
 			}
+			if (j != i + 1)
+				step_edges(active, j - (i + 1));
+
+			__DBG(("%s: vertical edges, full step (%d, %d)\n",
+			       __FUNCTION__,  i, j));
 		} else {
 			grid_scaled_y_t suby;
 
@@ -1748,27 +1747,27 @@ tor_inplace(struct tor *converter, PixmapPtr scratch, int mono, uint8_t *buf)
 		       active->min_height,
 		       active->is_vertical));
 		if (do_full_step) {
+			assert(active->is_vertical);
+
 			memset(ptr, 0, width);
 			inplace_row(active, ptr, width);
 			if (row != ptr)
 				memcpy(row, ptr, width);
 
-			if (active->is_vertical) {
-				while (j < h &&
-				       polygon->y_buckets[j] == NULL &&
-				       active->min_height >= 2*FAST_SAMPLES_Y)
-				{
-					active->min_height -= FAST_SAMPLES_Y;
-					row += stride;
-					memcpy(row, ptr, width);
-					j++;
-				}
-				if (j != i + 1)
-					step_edges(active, j - (i + 1));
-
-				__DBG(("%s: vertical edges, full step (%d, %d)\n",
-				       __FUNCTION__,  i, j));
+			while (j < h &&
+			       polygon->y_buckets[j] == NULL &&
+			       active->min_height >= 2*FAST_SAMPLES_Y)
+			{
+				active->min_height -= FAST_SAMPLES_Y;
+				row += stride;
+				memcpy(row, ptr, width);
+				j++;
 			}
+			if (j != i + 1)
+				step_edges(active, j - (i + 1));
+
+			__DBG(("%s: vertical edges, full step (%d, %d)\n",
+			       __FUNCTION__,  i, j));
 		} else {
 			grid_scaled_y_t suby;
 			int min = width, max = 0;
commit 51b9202d27db3d98c6d82ba224bd8eb218533dd9
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Apr 4 11:13:27 2012 +0100

    sna: Only engage the GPU detiler for multiple rows
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index 4f5a634..02a5c75 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -178,7 +178,7 @@ fallback:
 	}
 	if (kgem_bo_is_mappable(kgem, src_bo)) {
 		/* Is it worth detiling? */
-		if ((extents.y2 - extents.y1) * src_bo->pitch < 4096)
+		if ((extents.y2 - extents.y1 - 1) * src_bo->pitch < 4096)
 			goto fallback;
 	}
 
commit 98ad4c3cd8647ba3ec90fb45157773c8e85e886c
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Apr 3 19:47:15 2012 +0100

    sna/gen3: Don't force use of the render pipeline just for vmap
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index f77521f..0d229cd 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -2404,10 +2404,6 @@ source_use_blt(struct sna *sna, PicturePtr picture)
 	if (too_large(picture->pDrawable->width, picture->pDrawable->height))
 		return true;
 
-	/* If we can sample directly from user-space, do so */
-	if (sna->kgem.has_vmap)
-		return false;
-
 	return is_cpu(picture->pDrawable) || is_dirty(picture->pDrawable);
 }
 
commit 0915d414f55a1bff4171981feb87bae212f29f23
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Apr 3 19:04:29 2012 +0100

    sna/gen3: Fix pre-multiplication of mask value
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index ca3d141..f77521f 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -2364,7 +2364,6 @@ gen3_composite_picture(struct sna *sna,
 	x += dx + picture->pDrawable->x;
 	y += dy + picture->pDrawable->y;
 
-	channel->is_affine = sna_transform_is_affine(picture->transform);
 	if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
 		DBG(("%s: integer translation (%d, %d), removing\n",
 		     __FUNCTION__, dx, dy));
@@ -2372,8 +2371,10 @@ gen3_composite_picture(struct sna *sna,
 		y += dy;
 		channel->transform = NULL;
 		channel->filter = PictFilterNearest;
-	} else
+	} else {
 		channel->transform = picture->transform;
+		channel->is_affine = sna_transform_is_affine(picture->transform);
+	}
 
 	if (!gen3_composite_channel_set_format(channel, picture->format) &&
 	    !gen3_composite_channel_set_xformat(picture, channel, x, y, w, h))
@@ -2912,13 +2913,13 @@ gen3_render_composite(struct sna *sna,
 					v = multa(tmp->src.u.gen3.mode,
 						  tmp->mask.u.gen3.mode,
 						  24);
-					v != multa(tmp->src.u.gen3.mode,
+					v |= multa(tmp->src.u.gen3.mode,
 						   tmp->mask.u.gen3.mode,
 						   16);
-					v != multa(tmp->src.u.gen3.mode,
+					v |= multa(tmp->src.u.gen3.mode,
 						   tmp->mask.u.gen3.mode,
 						   8);
-					v != multa(tmp->src.u.gen3.mode,
+					v |= multa(tmp->src.u.gen3.mode,
 						   tmp->mask.u.gen3.mode,
 						   0);
 
@@ -2986,10 +2987,11 @@ gen3_render_composite(struct sna *sna,
 		tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 4;
 	if (!is_constant_ps(tmp->mask.u.gen3.type))
 		tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 4;
-	DBG(("%s: floats_per_vertex = 2 + %d + %d = %d\n", __FUNCTION__,
+	DBG(("%s: floats_per_vertex = 2 + %d + %d = %d [specialised emitter? %d]\n", __FUNCTION__,
 	     !is_constant_ps(tmp->src.u.gen3.type) ? tmp->src.is_affine ? 2 : 4 : 0,
 	     !is_constant_ps(tmp->mask.u.gen3.type) ? tmp->mask.is_affine ? 2 : 4 : 0,
-	     tmp->floats_per_vertex));
+	     tmp->floats_per_vertex,
+	     tmp->prim_emit != gen3_emit_composite_primitive));
 	tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
 
 	tmp->blt   = gen3_render_composite_blt;
commit 04851e4210d2d71542359c14d4b68d0851b36326
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Apr 3 12:34:24 2012 +0100

    sna/gen3: Convert the clear-color from picture->format to a8r8g8b8
    
    The shaders treat colours as an argb value, however the clear color is
    stored in the pixmap's native format (a8, r5g6b5, x8r8g8b8 etc). So
    before using the value of the clear color as a solid we need to convert
    it into the a8r8g8b8 format.
    
    Reported-by: Clemens Eisserer <linuxhippy at gmail.com>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=48204
    Reported-by: Paul Neumann <paul104x at yahoo.de>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=47308
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 0478709..ca3d141 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -2080,7 +2080,7 @@ gen3_init_solid(struct sna_composite_channel *channel, uint32_t color)
 		channel->u.gen3.type = SHADER_WHITE;
 
 	channel->bo = NULL;
-	channel->is_opaque = (color & 0xff000000) == 0xff000000;
+	channel->is_opaque = (color >> 24) == 0xff;
 	channel->is_affine = 1;
 	channel->alpha_fixup = 0;
 	channel->rb_reversed = 0;
@@ -2303,6 +2303,8 @@ gen3_composite_picture(struct sna *sna,
 
 		switch (source->type) {
 		case SourcePictTypeSolidFill:
+			DBG(("%s: solid fill [%08x], format %x\n",
+			     __FUNCTION__, source->solidFill.color, picture->format));
 			ret = gen3_init_solid(channel, source->solidFill.color);
 			break;
 
@@ -2334,11 +2336,15 @@ gen3_composite_picture(struct sna *sna,
 						x, y, w, h, dst_x, dst_y);
 	}
 
-	if (sna_picture_is_solid(picture, &color))
+	if (sna_picture_is_solid(picture, &color)) {
+		DBG(("%s: solid drawable [%08x]\n", __FUNCTION__, color));
 		return gen3_init_solid(channel, color);
+	}
 
-	if (sna_picture_is_clear(picture, x, y, w, h, &color))
-		return gen3_init_solid(channel, color);
+	if (sna_picture_is_clear(picture, x, y, w, h, &color)) {
+		DBG(("%s: clear drawable [%08x]\n", __FUNCTION__, color));
+		return gen3_init_solid(channel, color_convert(color, picture->format, PICT_a8r8g8b8));
+	}
 
 	if (!gen3_check_repeat(picture))
 		return sna_render_picture_fixup(sna, picture, channel,
@@ -2517,11 +2523,16 @@ gen3_composite_set_target(struct sna *sna,
 	return TRUE;
 }
 
+static inline uint8_t
+mul_8_8(uint8_t a, uint8_t b)
+{
+    uint16_t t = a * (uint16_t)b + 0x7f;
+    return ((t >> 8) + t) >> 8;
+}
+
 static inline uint8_t multa(uint32_t s, uint32_t m, int shift)
 {
-	s = (s >> shift) & 0xff;
-	m >>= 24;
-	return (s * m) >> 8;
+	return mul_8_8((s >> shift) & 0xff, m >> 24) << shift;
 }
 
 static inline bool is_constant_ps(uint32_t type)
@@ -2894,33 +2905,31 @@ gen3_render_composite(struct sna *sna,
 			} else {
 				if (tmp->mask.is_opaque) {
 					tmp->mask.u.gen3.type = SHADER_NONE;
-					tmp->has_component_alpha = FALSE;
 				} else if (is_constant_ps(tmp->src.u.gen3.type) &&
 					   is_constant_ps(tmp->mask.u.gen3.type)) {
-					uint32_t a,r,g,b;
+					uint32_t v;
 
-					a = multa(tmp->src.u.gen3.mode,
+					v = multa(tmp->src.u.gen3.mode,
 						  tmp->mask.u.gen3.mode,
 						  24);
-					r = multa(tmp->src.u.gen3.mode,
-						  tmp->mask.u.gen3.mode,
-						  16);
-					g = multa(tmp->src.u.gen3.mode,
-						  tmp->mask.u.gen3.mode,
-						  8);
-					b = multa(tmp->src.u.gen3.mode,
-						  tmp->mask.u.gen3.mode,
-						  0);
+					v != multa(tmp->src.u.gen3.mode,
+						   tmp->mask.u.gen3.mode,
+						   16);
+					v != multa(tmp->src.u.gen3.mode,
+						   tmp->mask.u.gen3.mode,
+						   8);
+					v != multa(tmp->src.u.gen3.mode,
+						   tmp->mask.u.gen3.mode,
+						   0);
 
 					DBG(("%s: combining constant source/mask: %x x %x -> %x\n",
 					     __FUNCTION__,
 					     tmp->src.u.gen3.mode,
 					     tmp->mask.u.gen3.mode,
-					     a << 24 | r << 16 | g << 8 | b));
+					     v));
 
 					tmp->src.u.gen3.type = SHADER_CONSTANT;
-					tmp->src.u.gen3.mode =
-						a << 24 | r << 16 | g << 8 | b;
+					tmp->src.u.gen3.mode = v;
 
 					tmp->mask.u.gen3.type = SHADER_NONE;
 				}
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index e7a6182..a81a145 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -476,13 +476,13 @@ static void sna_blt_copy_one(struct sna *sna,
 	kgem->nbatch += 8;
 }
 
-static Bool
-get_rgba_from_pixel(uint32_t pixel,
-		    uint16_t *red,
-		    uint16_t *green,
-		    uint16_t *blue,
-		    uint16_t *alpha,
-		    uint32_t format)
+Bool
+sna_get_rgba_from_pixel(uint32_t pixel,
+			uint16_t *red,
+			uint16_t *green,
+			uint16_t *blue,
+			uint16_t *alpha,
+			uint32_t format)
 {
 	int rbits, bbits, gbits, abits;
 	int rshift, bshift, gshift, ashift;
@@ -607,31 +607,6 @@ _sna_get_pixel_from_rgba(uint32_t * pixel,
 	return TRUE;
 }
 
-static uint32_t
-color_convert(uint32_t pixel,
-	      uint32_t src_format,
-	      uint32_t dst_format)
-{
-	DBG(("%s: src=%08x [%08x]\n", __FUNCTION__, pixel, src_format));
-
-	if (src_format != dst_format) {
-		uint16_t red, green, blue, alpha;
-
-		if (!get_rgba_from_pixel(pixel,
-					 &red, &green, &blue, &alpha,
-					 src_format))
-			return 0;
-
-		if (!sna_get_pixel_from_rgba(&pixel,
-					     red, green, blue, alpha,
-					     dst_format))
-			return 0;
-	}
-
-	DBG(("%s: dst=%08x [%08x]\n", __FUNCTION__, pixel, dst_format));
-	return pixel;
-}
-
 uint32_t
 sna_rgba_for_color(uint32_t color, int depth)
 {
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index 73ef568..9c49281 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -485,6 +485,12 @@ sna_render_get_gradient(struct sna *sna,
 
 uint32_t sna_rgba_for_color(uint32_t color, int depth);
 uint32_t sna_rgba_to_color(uint32_t rgba, uint32_t format);
+Bool sna_get_rgba_from_pixel(uint32_t pixel,
+			     uint16_t *red,
+			     uint16_t *green,
+			     uint16_t *blue,
+			     uint16_t *alpha,
+			     uint32_t format);
 Bool sna_picture_is_solid(PicturePtr picture, uint32_t *color);
 
 void no_render_init(struct sna *sna);
diff --git a/src/sna/sna_render_inline.h b/src/sna/sna_render_inline.h
index 956e2aa..03e1969 100644
--- a/src/sna/sna_render_inline.h
+++ b/src/sna/sna_render_inline.h
@@ -190,4 +190,29 @@ sna_render_reduce_damage(struct sna_composite_op *op,
 	}
 }
 
+inline static uint32_t
+color_convert(uint32_t pixel,
+	      uint32_t src_format,
+	      uint32_t dst_format)
+{
+	DBG(("%s: src=%08x [%08x]\n", __FUNCTION__, pixel, src_format));
+
+	if (src_format != dst_format) {
+		uint16_t red, green, blue, alpha;
+
+		if (!sna_get_rgba_from_pixel(pixel,
+					     &red, &green, &blue, &alpha,
+					     src_format))
+			return 0;
+
+		if (!sna_get_pixel_from_rgba(&pixel,
+					     red, green, blue, alpha,
+					     dst_format))
+			return 0;
+	}
+
+	DBG(("%s: dst=%08x [%08x]\n", __FUNCTION__, pixel, dst_format));
+	return pixel;
+}
+
 #endif /* SNA_RENDER_INLINE_H */
commit 87a672dafd9d6f47f31b77b406b7f0fb2b4030ac
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Apr 3 09:24:06 2012 +0100

    sna: Apply CoordMode when computing point extents
    
    Reported-by: Patrick Truebe <eko-priv at gmx.net>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=48220
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 6eee977..f1d4d00 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -5556,9 +5556,19 @@ sna_poly_point_extents(DrawablePtr drawable, GCPtr gc,
 
 	box.x2 = box.x1 = pt->x;
 	box.y2 = box.y1 = pt->y;
-	while (--n) {
-		pt++;
-		box_add_pt(&box, pt->x, pt->y);
+	if (mode == CoordModePrevious) {
+		DDXPointRec last = *pt++;
+		while (--n) {
+			last.x += pt->x;
+			last.y += pt->y;
+			pt++;
+			box_add_pt(&box, last.x, last.y);
+		}
+	} else {
+		while (--n) {
+			++pt;
+			box_add_pt(&box, pt->x, pt->y);
+		}
 	}
 	box.x2++;
 	box.y2++;
commit 0a0ee491ea18dc59748ff4419ae73bd1a369ae79
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Apr 3 09:23:49 2012 +0100

    sna: Debugging flil spans and their clipping
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 32eae52..6eee977 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -4291,14 +4291,18 @@ sna_fill_spans__fill_clip_extents(DrawablePtr drawable,
 	     extents->x2, extents->y2));
 
 	while (n--) {
+		DBG(("%s: [%d] pt=(%d, %d), width=%d\n",
+		     __FUNCTION__, n, pt->x, pt->y, *width));
 		*(DDXPointRec *)b = *pt++;
 		b->x2 = b->x1 + (int)*width++;
 		b->y2 = b->y1 + 1;
 		if (box_intersect(b, extents)) {
-			b->x1 += data->dx;
-			b->x2 += data->dx;
-			b->y1 += data->dy;
-			b->y2 += data->dy;
+			DBG(("%s: [%d] clipped=(%d, %d), (%d, %d)\n",
+			     __FUNCTION__, n, b->x1, b->y1, b->x2, b->y2));
+			if (data->dx|data->dy) {
+				b->x1 += data->dx; b->x2 += data->dx;
+				b->y1 += data->dy; b->y2 += data->dy;
+			}
 			if (++b == last_box) {
 				op->boxes(data->sna, op, box, last_box - box);
 				b = box;
@@ -8578,6 +8582,9 @@ sna_poly_fill_polygon(DrawablePtr draw, GCPtr gc,
 	      (gc->fillStyle == FillTiled && gc->tileIsPixel)),
 	     gc->fillStyle, gc->tileIsPixel,
 	     gc->alu));
+	DBG(("%s: draw=%d, offset=(%d, %d), size=%dx%d\n",
+	     __FUNCTION__, draw->serialNumber,
+	     draw->x, draw->y, draw->width, draw->height));
 
 	data.flags = sna_poly_point_extents(draw, gc, mode, n, pt,
 					    &data.region.extents);
commit ec1267df746512c2e262ef0bd9e9527bc5efe6f4
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Apr 2 16:16:24 2012 +0100

    sna: Use the solid spans fast paths for dashed zero-width lines as well
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 9829ada..32eae52 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -4499,10 +4499,10 @@ no_damage_clipped:
 				b->y2 = b->y1 + 1;
 
 				if (box_intersect(b, &clip.extents)) {
-					b->x1 += dx;
-					b->x2 += dx;
-					b->y1 += dy;
-					b->y2 += dy;
+					if (dx|dy) {
+						b->x1 += dx; b->x2 += dx;
+						b->y1 += dy; b->y2 += dy;
+					}
 					if (++b == last_box) {
 						fill.boxes(sna, &fill, box, last_box - box);
 						b = box;
@@ -6598,7 +6598,6 @@ spans_fallback:
 		sna_gc(gc)->priv = &data;
 
 		if (gc->lineWidth == 0 &&
-		    gc->lineStyle == LineSolid &&
 		    gc_is_solid(gc, &color)) {
 			struct sna_fill_op fill;
 
@@ -6628,12 +6627,19 @@ spans_fallback:
 			assert(gc->miTranslate);
 
 			gc->ops = &sna_gc_ops__tmp;
-			miZeroLine(drawable, gc, mode, n, pt);
+			if (gc->lineStyle == LineSolid) {
+				DBG(("%s: miZeroLine (solid fill)\n", __FUNCTION__));
+				miZeroLine(drawable, gc, mode, n, pt);
+			} else {
+				DBG(("%s: miZeroDashLine (solid fill)\n", __FUNCTION__));
+				miZeroDashLine(drawable, gc, mode, n, pt);
+			}
 			fill.done(data.sna, &fill);
 		} else {
-			/* Note that the WideDash functions alternate between filling
-			 * using fgPixel and bgPixel so we need to reset state between
-			 * FillSpans.
+			/* Note that the WideDash functions alternate
+			 * between filling using fgPixel and bgPixel
+			 * so we need to reset state between FillSpans and
+			 * cannot use the fill fast paths.
 			 */
 			sna_gc_ops__tmp.FillSpans = sna_fill_spans__gpu;
 			gc->ops = &sna_gc_ops__tmp;
commit 2d1f3cb198f2fe9602356a334a076abf3c68a9c7
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Apr 2 13:51:37 2012 +0100

    sna/gen4: Remove the accidental debugging hack from the last commit
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index 1868e2f..b8a2459 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -1932,7 +1932,7 @@ gen4_composite_linear_init(struct sna *sna,
 
 	channel->embedded_transform.matrix[0][0] = pixman_double_to_fixed(dx);
 	channel->embedded_transform.matrix[0][1] = pixman_double_to_fixed(dy);
-	channel->embedded_transform.matrix[0][2] = -pixman_double_to_fixed(dx*(x0+dst_x-x) + dy*(y0+dst_y-y) - .5/sf);
+	channel->embedded_transform.matrix[0][2] = -pixman_double_to_fixed(dx*(x0+dst_x-x) + dy*(y0+dst_y-y));
 
 	channel->embedded_transform.matrix[1][0] = 0;
 	channel->embedded_transform.matrix[1][1] = 0;
commit 5c4dc9c5db7b2a5b936bdbc15536c5cf0f7f5f23
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Apr 2 13:40:22 2012 +0100

    sna/gen3+: Fix sampling of borders around gradients
    
    Incurs a slight loss of precision for the internal gradient, but much
    more preferable to the artefacts around the borders with RepeatNone.
    
    Reported-by: Clemens Eisserer <linuxhippy at gmail.com>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=45016
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index e798096..0478709 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -2137,7 +2137,7 @@ static Bool gen3_gradient_setup(struct sna *sna,
 
 	channel->pict_format = PICT_a8r8g8b8;
 	channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
-	channel->filter = PictFilterBilinear;
+	channel->filter = PictFilterNearest;
 	channel->is_affine = sna_transform_is_affine(picture->transform);
 	if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
 		DBG(("%s: integer translation (%d, %d), removing\n",
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index 2e78a92..1868e2f 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -1871,7 +1871,7 @@ gen4_composite_linear_init(struct sna *sna,
 	if (!channel->bo)
 		return 0;
 
-	channel->filter = PictFilterBilinear;
+	channel->filter = PictFilterNearest;
 	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
 	channel->width  = channel->bo->pitch / 4;
 	channel->height = 1;
@@ -1932,7 +1932,7 @@ gen4_composite_linear_init(struct sna *sna,
 
 	channel->embedded_transform.matrix[0][0] = pixman_double_to_fixed(dx);
 	channel->embedded_transform.matrix[0][1] = pixman_double_to_fixed(dy);
-	channel->embedded_transform.matrix[0][2] = -pixman_double_to_fixed(dx*(x0+dst_x-x) + dy*(y0+dst_y-y));
+	channel->embedded_transform.matrix[0][2] = -pixman_double_to_fixed(dx*(x0+dst_x-x) + dy*(y0+dst_y-y) - .5/sf);
 
 	channel->embedded_transform.matrix[1][0] = 0;
 	channel->embedded_transform.matrix[1][1] = 0;
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index c27accd..1fb7f65 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -1904,7 +1904,7 @@ gen5_composite_linear_init(struct sna *sna,
 	if (!channel->bo)
 		return 0;
 
-	channel->filter = PictFilterBilinear;
+	channel->filter = PictFilterNearest;
 	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
 	channel->width  = channel->bo->pitch / 4;
 	channel->height = 1;
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 3be9195..5bbe5e3 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -2080,7 +2080,7 @@ gen6_composite_linear_init(struct sna *sna,
 	if (!channel->bo)
 		return 0;
 
-	channel->filter = PictFilterBilinear;
+	channel->filter = PictFilterNearest;
 	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
 	channel->width  = channel->bo->pitch / 4;
 	channel->height = 1;
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 6917c21..7dff02f 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -2209,7 +2209,7 @@ gen7_composite_linear_init(struct sna *sna,
 	if (!channel->bo)
 		return 0;
 
-	channel->filter = PictFilterBilinear;
+	channel->filter = PictFilterNearest;
 	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
 	channel->width  = channel->bo->pitch / 4;
 	channel->height = 1;
commit 0b2651dc04cef8f9692b2557684f044b4980700f
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Apr 1 19:43:15 2012 +0100

    sna: Apply composite offset to damage for spans fast paths
    
    Reported-by: Jiri Slaby <jirislaby at gmail.com>
    References: https://bugs.freedesktop.org/show_bug.cgi?id=47597
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 20d9166..9829ada 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -6665,8 +6665,12 @@ spans_fallback:
 
 		gc->ops = (GCOps *)&sna_gc_ops;
 		assert_pixmap_contains_box(data.pixmap, &data.region.extents);
-		if (data.damage)
+		if (data.damage) {
+			if (data.dx | data.dy)
+				pixman_region_translate(&data.region, data.dx, data.dy);
+			assert_pixmap_contains_box(data.pixmap, &data.region.extents);
 			sna_damage_add(data.damage, &data.region);
+		}
 		RegionUninit(&data.region);
 		return;
 	}
@@ -7579,8 +7583,12 @@ spans_fallback:
 		}
 
 		gc->ops = (GCOps *)&sna_gc_ops;
-		if (data.damage)
+		if (data.damage) {
+			if (data.dx | data.dy)
+				pixman_region_translate(&data.region, data.dx, data.dy);
+			assert_pixmap_contains_box(data.pixmap, &data.region.extents);
 			sna_damage_add(data.damage, &data.region);
+		}
 		RegionUninit(&data.region);
 		return;
 	}
@@ -8299,8 +8307,12 @@ sna_poly_arc(DrawablePtr drawable, GCPtr gc, int n, xArc *arc)
 			gc->ops = (GCOps *)&sna_gc_ops;
 
 			fill.done(data.sna, &fill);
-			if (data.damage)
+			if (data.damage) {
+				if (data.dx | data.dy)
+					pixman_region_translate(&data.region, data.dx, data.dy);
+				assert_pixmap_contains_box(data.pixmap, &data.region.extents);
 				sna_damage_add(data.damage, &data.region);
+			}
 			RegionUninit(&data.region);
 			return;
 		}
@@ -8645,8 +8657,12 @@ sna_poly_fill_polygon(DrawablePtr draw, GCPtr gc,
 		}
 
 		gc->ops = (GCOps *)&sna_gc_ops;
-		if (data.damage)
+		if (data.damage) {
+			if (data.dx | data.dy)
+				pixman_region_translate(&data.region, data.dx, data.dy);
+			assert_pixmap_contains_box(data.pixmap, &data.region.extents);
 			sna_damage_add(data.damage, &data.region);
+		}
 		RegionUninit(&data.region);
 		return;
 	}
@@ -10207,8 +10223,12 @@ sna_poly_fill_arc(DrawablePtr draw, GCPtr gc, int n, xArc *arc)
 		}
 
 		gc->ops = (GCOps *)&sna_gc_ops;
-		if (data.damage)
+		if (data.damage) {
+			if (data.dx | data.dy)
+				pixman_region_translate(&data.region, data.dx, data.dy);
+			assert_pixmap_contains_box(data.pixmap, &data.region.extents);
 			sna_damage_add(data.damage, &data.region);
+		}
 		RegionUninit(&data.region);
 		return;
 	}
commit 4ea9ab9303d21a62683055b75eaed66c97a5f289
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Apr 1 16:36:16 2012 +0100

    sna: Fix assertion to look at bbox of all boxes/points
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 47a627a..20d9166 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -233,6 +233,8 @@ static void _assert_pixmap_contains_boxes(PixmapPtr pixmap, BoxPtr box, int n, i
 
 	extents = *box;
 	while (--n) {
+		++box;
+
 		if (box->x1 < extents.x1)
 			extents.x1 = box->x1;
 		if (box->x2 > extents.x2)
@@ -258,6 +260,8 @@ static void _assert_pixmap_contains_points(PixmapPtr pixmap, DDXPointRec *pt, in
 	extents.x2 = extents.x1 = pt->x;
 	extents.y2 = extents.y1 = pt->y;
 	while (--n) {
+		++pt;
+
 		if (pt->x < extents.x1)
 			extents.x1 = pt->x;
 		else if (pt->x > extents.x2)
commit 932743bb333e35d5f6529a701137aad4d7490555
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Apr 1 09:54:43 2012 +0100

    sna: Assert that drawing boxes are within bounds
    
    More sanity checks required.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index bf76948..47a627a 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -227,6 +227,54 @@ static void _assert_pixmap_contains_box(PixmapPtr pixmap, BoxPtr box, const char
 	}
 }
 
+static void _assert_pixmap_contains_boxes(PixmapPtr pixmap, BoxPtr box, int n, int dx, int dy, const char *function)
+{
+	BoxRec extents;
+
+	extents = *box;
+	while (--n) {
+		if (box->x1 < extents.x1)
+			extents.x1 = box->x1;
+		if (box->x2 > extents.x2)
+			extents.x2 = box->x2;
+
+		if (box->y1 < extents.y1)
+			extents.y1 = box->y1;
+		if (box->y2 > extents.y2)
+			extents.y2 = box->y2;
+	}
+	extents.x1 += dx;
+	extents.x2 += dx;
+	extents.y1 += dy;
+	extents.y2 += dy;
+	_assert_pixmap_contains_box(pixmap, &extents, function);
+}
+
+
+static void _assert_pixmap_contains_points(PixmapPtr pixmap, DDXPointRec *pt, int n, int dx, int dy, const char *function)
+{
+	BoxRec extents;
+
+	extents.x2 = extents.x1 = pt->x;
+	extents.y2 = extents.y1 = pt->y;
+	while (--n) {
+		if (pt->x < extents.x1)
+			extents.x1 = pt->x;
+		else if (pt->x > extents.x2)
+			extents.x2 = pt->x;
+
+		if (pt->y < extents.y1)
+			extents.y1 = pt->y;
+		else if (pt->y > extents.y2)
+			extents.y2 = pt->y;
+	}
+	extents.x1 += dx;
+	extents.x2 += dx + 1;
+	extents.y1 += dy;
+	extents.y2 += dy + 1;
+	_assert_pixmap_contains_box(pixmap, &extents, function);
+}
+
 static void _assert_drawable_contains_box(DrawablePtr drawable, const BoxRec *box, const char *function)
 {
 	if (box->x1 < drawable->x ||
@@ -244,8 +292,12 @@ static void _assert_drawable_contains_box(DrawablePtr drawable, const BoxRec *bo
 }
 #define assert_pixmap_contains_box(p, b) _assert_pixmap_contains_box(p, b, __FUNCTION__)
 #define assert_drawable_contains_box(d, b) _assert_drawable_contains_box(d, b, __FUNCTION__)
+#define assert_pixmap_contains_boxes(p, b, n, x, y) _assert_pixmap_contains_boxes(p, b, n, x, y, __FUNCTION__)
+#define assert_pixmap_contains_points(p, pt, n, x, y) _assert_pixmap_contains_points(p, pt, n, x, y, __FUNCTION__)
 #else
 #define assert_pixmap_contains_box(p, b)
+#define assert_pixmap_contains_boxes(p, b, n, x, y)
+#define assert_pixmap_contains_points(p, pt, n, x, y)
 #define assert_drawable_contains_box(d, b)
 #endif
 
@@ -4403,12 +4455,14 @@ damage:
 		b->y2 = b->y1 + 1;
 
 		if (++b == last_box) {
+			assert_pixmap_contains_boxes(pixmap, box, last_box-box, 0, 0);
 			fill.boxes(sna, &fill, box, last_box - box);
 			sna_damage_add_boxes(damage, box, last_box - box, 0, 0);
 			b = box;
 		}
 	} while (--n);
 	if (b != box) {
+		assert_pixmap_contains_boxes(pixmap, box, b-box, 0, 0);
 		fill.boxes(sna, &fill, box, b - box);
 		sna_damage_add_boxes(damage, box, b - box, 0, 0);
 	}
@@ -4549,6 +4603,7 @@ damage_clipped:
 					b->y1 += dy;
 					b->y2 += dy;
 					if (++b == last_box) {
+						assert_pixmap_contains_boxes(pixmap, box, b-box, 0, 0);
 						fill.boxes(sna, &fill, box, last_box - box);
 						sna_damage_add_boxes(damage, box, b - box, 0, 0);
 						b = box;
@@ -4609,6 +4664,7 @@ damage_clipped:
 					b->y1 = y + dy;
 					b->y2 = b->y1 + 1;
 					if (++b == last_box) {
+						assert_pixmap_contains_boxes(pixmap, box, last_box-box, 0, 0);
 						fill.boxes(sna, &fill, box, last_box - box);
 						sna_damage_add_boxes(damage, box, last_box - box, 0, 0);
 						b = box;
@@ -4618,6 +4674,7 @@ damage_clipped:
 			RegionUninit(&clip);
 		}
 		if (b != box) {
+			assert_pixmap_contains_boxes(pixmap, box, b-box, 0, 0);
 			fill.boxes(sna, &fill, box, b - box);
 			sna_damage_add_boxes(damage, box, b - box, 0, 0);
 		}
@@ -4922,6 +4979,7 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc,
 		return;
 
 	get_drawable_deltas(drawable, pixmap, &dx, &dy);
+	assert_pixmap_contains_boxes(pixmap, box, n, dx, dy);
 	if (arg->damage)
 		sna_damage_add_boxes(arg->damage, box, n, dx, dy);
 
@@ -5082,6 +5140,7 @@ sna_copy_plane_blt(DrawablePtr source, DrawablePtr drawable, GCPtr gc,
 	sy += dy;
 
 	get_drawable_deltas(drawable, dst_pixmap, &dx, &dy);
+	assert_pixmap_contains_boxes(dst_pixmap, box, n, dx, dy);
 	if (arg->damage)
 		sna_damage_add_boxes(arg->damage, box, n, dx, dy);
 
@@ -5411,6 +5470,7 @@ sna_poly_point_blt(DrawablePtr drawable,
 		last.x += dx;
 		last.y += dy;
 
+		assert_pixmap_contains_points(pixmap, pt, n, last.x, last.y);
 		sna_damage_add_points(damage, pt, n, last.x, last.y);
 		do {
 			unsigned nbox = n;
@@ -5457,6 +5517,7 @@ sna_poly_point_blt(DrawablePtr drawable,
 				b->x2 = b->x1 + 1;
 				b->y2 = b->y1 + 1;
 				if (++b == last_box){
+					assert_pixmap_contains_boxes(pixmap, box, last_box-box, 0, 0);
 					fill.boxes(sna, &fill, box, last_box - box);
 					if (damage)
 						sna_damage_add_boxes(damage, box, last_box-box, 0, 0);
@@ -5465,6 +5526,7 @@ sna_poly_point_blt(DrawablePtr drawable,
 			}
 		}
 		if (b != box){
+			assert_pixmap_contains_boxes(pixmap, box, b-box, 0, 0);
 			fill.boxes(sna, &fill, box, b - box);
 			if (damage)
 				sna_damage_add_boxes(damage, box, b-box, 0, 0);
@@ -5916,6 +5978,7 @@ done:
 	return true;
 
 damage:
+	assert_pixmap_contains_boxes(pixmap, box, b-box, 0, 0);
 	sna_damage_add_boxes(damage, box, b-box, 0, 0);
 no_damage:
 	fill.boxes(sna, &fill, box, b-box);
@@ -5930,6 +5993,7 @@ no_damage_offset:
 			bb->y1 += dy;
 			bb->y2 += dy;
 		} while (++bb != b);
+		assert_pixmap_contains_boxes(pixmap, box, b-box, 0, 0);
 		fill.boxes(sna, &fill, box, b - box);
 	}
 	goto *ret;
@@ -5943,6 +6007,7 @@ damage_offset:
 			bb->y1 += dy;
 			bb->y2 += dy;
 		} while (++bb != b);
+		assert_pixmap_contains_boxes(pixmap, box, b-box, 0, 0);
 		fill.boxes(sna, &fill, box, b - box);
 		sna_damage_add_boxes(damage, box, b - box, 0, 0);
 	}
@@ -6014,6 +6079,7 @@ sna_poly_line_blt(DrawablePtr drawable,
 			     __FUNCTION__,
 			     b->x1, b->y1, b->x2, b->y2));
 			if (++b == last_box) {
+				assert_pixmap_contains_boxes(pixmap, boxes, last_box-boxes, 0, 0);
 				fill.boxes(sna, &fill, boxes, last_box - boxes);
 				if (damage)
 					sna_damage_add_boxes(damage, boxes, last_box - boxes, 0, 0);
@@ -6075,6 +6141,7 @@ sna_poly_line_blt(DrawablePtr drawable,
 					b->y1 += dy;
 					b->y2 += dy;
 					if (++b == last_box) {
+						assert_pixmap_contains_boxes(pixmap, boxes, last_box-boxes, 0, 0);
 						fill.boxes(sna, &fill, boxes, last_box - boxes);
 						if (damage)
 							sna_damage_add_boxes(damage, boxes, last_box - boxes, 0, 0);
@@ -6139,6 +6206,7 @@ sna_poly_line_blt(DrawablePtr drawable,
 						b->y1 += dy;
 						b->y2 += dy;
 						if (++b == last_box) {
+							assert_pixmap_contains_boxes(pixmap, boxes, last_box-boxes, 0, 0);
 							fill.boxes(sna, &fill, boxes, last_box-boxes);
 							if (damage)
 								sna_damage_add_boxes(damage, boxes, last_box-boxes, 0, 0);
@@ -6153,6 +6221,7 @@ sna_poly_line_blt(DrawablePtr drawable,
 		RegionUninit(&clip);
 	}
 	if (b != boxes) {
+		assert_pixmap_contains_boxes(pixmap, boxes, b-boxes, 0, 0);
 		fill.boxes(sna, &fill, boxes, b - boxes);
 		if (damage)
 			sna_damage_add_boxes(damage, boxes, b - boxes, 0, 0);
@@ -6591,6 +6660,7 @@ spans_fallback:
 		}
 
 		gc->ops = (GCOps *)&sna_gc_ops;
+		assert_pixmap_contains_box(data.pixmap, &data.region.extents);
 		if (data.damage)
 			sna_damage_add(data.damage, &data.region);
 		RegionUninit(&data.region);
commit cc20c45aa0ca15720510668d6918bf3c99104626
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Mar 30 22:51:21 2012 +0100

    sna: Minimise the risk of hotplug hangs by checking fb before vsync
    
    Everytime we issue a MI_WAIT_FOR_EVENT on a scan-line from userspace we
    run the risk of that pipe being disable before we submit a batch. As the
    pipe is then disabled or configured differently, we encounter an
    indefinite wait and trigger a GPU hang.
    
    To minimise the risk of a hotplug event being detected and submitting a
    vsynced batch prior to noticing the removal of the pipe, perform an
    explicit query of the current CRTC and delete the wait if we spot that
    our framebuffer is no longer attached. This is about as good as we can
    achieve without extra help from the kernel.
    
    Reported-by: Francis Leblanc <Francis.Leblanc-Lebeau at verint.com>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=45413 (and others)
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 27e0e04..e52645c 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -141,6 +141,7 @@ struct kgem {
 	uint16_t nexec;
 	uint16_t nreloc;
 	uint16_t nfence;
+	uint16_t wait;
 	uint16_t max_batch_size;
 
 	uint32_t flush:1;
diff --git a/src/sna/sna.h b/src/sna/sna.h
index 7a1e2f6..308e329 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -363,6 +363,7 @@ extern xf86CrtcPtr sna_covering_crtc(ScrnInfoPtr scrn,
 
 extern bool sna_wait_for_scanline(struct sna *sna, PixmapPtr pixmap,
 				  xf86CrtcPtr crtc, const BoxRec *clip);
+extern bool sna_crtc_is_bound(struct sna *sna, xf86CrtcPtr crtc);
 
 Bool sna_dri_open(struct sna *sna, ScreenPtr pScreen);
 void sna_dri_wakeup(struct sna *sna);
diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
index f413ac1..ef3b0f9 100644
--- a/src/sna/sna_display.c
+++ b/src/sna/sna_display.c
@@ -2102,6 +2102,7 @@ static void sna_emit_wait_for_scanline_gen6(struct sna *sna,
 	b[1] = pipe;
 	b[2] = y2 - 1;
 	b[3] = MI_WAIT_FOR_EVENT | event;
+	sna->kgem.wait = sna->kgem.nbatch + 3;
 	kgem_advance_batch(&sna->kgem, 4);
 }
 
@@ -2131,6 +2132,7 @@ static void sna_emit_wait_for_scanline_gen4(struct sna *sna,
 	b[2] = b[0] = MI_LOAD_SCAN_LINES_INCL | pipe << 20;
 	b[3] = b[1] = (y1 << 16) | (y2-1);
 	b[4] = MI_WAIT_FOR_EVENT | event;
+	sna->kgem.wait = sna->kgem.nbatch + 4;
 	kgem_advance_batch(&sna->kgem, 5);
 }
 
@@ -2158,6 +2160,7 @@ static void sna_emit_wait_for_scanline_gen2(struct sna *sna,
 		b[4] = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW;
 	else
 		b[4] = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW;
+	sna->kgem.wait = sna->kgem.nbatch + 4;
 	kgem_advance_batch(&sna->kgem, 5);
 }
 
@@ -2171,21 +2174,15 @@ sna_wait_for_scanline(struct sna *sna,
 	Bool full_height;
 	int y1, y2, pipe;
 
+	assert(crtc);
+	assert(sna_crtc_on(crtc));
+	assert(pixmap_is_scanout(pixmap));
+
 	/* XXX WAIT_EVENT is still causing hangs on SNB */
 	if (sna->kgem.gen >= 60)
 		return false;
 
-	if (!pixmap_is_scanout(pixmap))
-		return false;
-
-	if (crtc == NULL) {
-		crtc = sna_covering_crtc(sna->scrn, clip, NULL, &crtc_box);
-		if (crtc == NULL)
-			return false;
-	} else
-		sna_crtc_box(crtc, &crtc_box);
-	assert(sna_crtc_on(crtc));
-
+	sna_crtc_box(crtc, &crtc_box);
 	if (crtc->transform_in_use) {
 		box = *clip;
 		pixman_f_transform_bounds(&crtc->f_framebuffer_to_crtc, &box);
@@ -2227,3 +2224,14 @@ sna_wait_for_scanline(struct sna *sna,
 
 	return true;
 }
+
+bool sna_crtc_is_bound(struct sna *sna, xf86CrtcPtr crtc)
+{
+	struct drm_mode_crtc mode;
+
+	mode.crtc_id = crtc_id(crtc->driver_private);
+	if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETCRTC, &mode))
+		return false;
+
+	return mode.mode_valid && sna->mode.fb_id == mode.fb_id;
+}
diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index 95ec07e..afec831 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -400,6 +400,7 @@ sna_dri_copy_to_front(struct sna *sna, DrawablePtr draw, RegionPtr region,
 	PixmapPtr pixmap = get_drawable_pixmap(draw);
 	pixman_region16_t clip;
 	bool flush = false;
+	xf86CrtcPtr crtc;
 	BoxRec box, *boxes;
 	int16_t dx, dy;
 	int n;
@@ -442,9 +443,15 @@ sna_dri_copy_to_front(struct sna *sna, DrawablePtr draw, RegionPtr region,
 			return;
 		}
 
-		if (pixmap == sna->front && sync)
-			flush = sna_wait_for_scanline(sna, pixmap, NULL,
-						      &region->extents);
+		if (pixmap == sna->front && sync) {
+			BoxRec crtc_box;
+
+			crtc = sna_covering_crtc(sna->scrn, &region->extents,
+						 NULL, &crtc_box);
+			if (crtc)
+				flush = sna_wait_for_scanline(sna, pixmap, crtc,
+							      &region->extents);
+		}
 
 		get_drawable_deltas(draw, pixmap, &dx, &dy);
 	}
@@ -482,8 +489,11 @@ sna_dri_copy_to_front(struct sna *sna, DrawablePtr draw, RegionPtr region,
 			       boxes, n);
 
 	DBG(("%s: flushing? %d\n", __FUNCTION__, flush));
-	if (flush) /* STAT! */
+	if (flush) { /* STAT! */
+		if (!sna_crtc_is_bound(sna, crtc))
+			sna->kgem.batch[sna->kgem.wait] = 0;
 		kgem_submit(&sna->kgem);
+	}
 
 	pixman_region_translate(region, dx, dy);
 	DamageRegionAppend(&pixmap->drawable, region);
diff --git a/src/sna/sna_video_textured.c b/src/sna/sna_video_textured.c
index a71751c..b740b6a 100644
--- a/src/sna/sna_video_textured.c
+++ b/src/sna/sna_video_textured.c
@@ -306,8 +306,11 @@ sna_video_textured_put_image(ScrnInfoPtr scrn,
 	/* Push the frame to the GPU as soon as possible so
 	 * we can hit the next vsync.
 	 */
-	if (flush)
+	if (flush) {
+		if (!sna_crtc_is_bound(sna, crtc))
+			sna->kgem.batch[sna->kgem.wait] = 0;
 		kgem_submit(&sna->kgem);
+	}
 
 	return ret;
 }
commit 305734ebdf3d51c084cfbee8804b6c60b1f03a98
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Mar 30 21:27:29 2012 +0100

    sna: Separate out scanline waiting for gen4
    
    So that we do not set a gen4 bit on gen2 and apply the old workaround of
    trimming y2 instead.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
index ecfe670..f413ac1 100644
--- a/src/sna/sna_display.c
+++ b/src/sna/sna_display.c
@@ -2105,44 +2105,62 @@ static void sna_emit_wait_for_scanline_gen6(struct sna *sna,
 	kgem_advance_batch(&sna->kgem, 4);
 }
 
-static void sna_emit_wait_for_scanline_gen2(struct sna *sna,
+static void sna_emit_wait_for_scanline_gen4(struct sna *sna,
 					    int pipe, int y1, int y2,
 					    bool full_height)
 {
 	uint32_t event;
 	uint32_t *b;
 
-	/*
-	 * Pre-965 doesn't have SVBLANK, so we need a bit
-	 * of extra time for the blitter to start up and
-	 * do its job for a full height blit
-	 */
 	if (pipe == 0) {
-		pipe = MI_LOAD_SCAN_LINES_DISPLAY_PIPEA;
-		event = MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW;
 		if (full_height)
 			event = MI_WAIT_FOR_PIPEA_SVBLANK;
+		else
+			event = MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW;
 	} else {
-		pipe = MI_LOAD_SCAN_LINES_DISPLAY_PIPEB;
-		event = MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW;
 		if (full_height)
 			event = MI_WAIT_FOR_PIPEB_SVBLANK;
+		else
+			event = MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW;
 	}
 
-	if (sna->kgem.mode == KGEM_NONE)
-		kgem_set_mode(&sna->kgem, KGEM_BLT);
-
+	kgem_set_mode(&sna->kgem, KGEM_BLT);
 	b = kgem_get_batch(&sna->kgem, 5);
 	/* The documentation says that the LOAD_SCAN_LINES command
 	 * always comes in pairs. Don't ask me why. */
-	b[0] = MI_LOAD_SCAN_LINES_INCL | pipe;
-	b[1] = (y1 << 16) | (y2-1);
-	b[2] = MI_LOAD_SCAN_LINES_INCL | pipe;
-	b[3] = (y1 << 16) | (y2-1);
+	b[2] = b[0] = MI_LOAD_SCAN_LINES_INCL | pipe << 20;
+	b[3] = b[1] = (y1 << 16) | (y2-1);
 	b[4] = MI_WAIT_FOR_EVENT | event;
 	kgem_advance_batch(&sna->kgem, 5);
 }
 
+static void sna_emit_wait_for_scanline_gen2(struct sna *sna,
+					    int pipe, int y1, int y2,
+					    bool full_height)
+{
+	uint32_t *b;
+
+	/*
+	 * Pre-965 doesn't have SVBLANK, so we need a bit
+	 * of extra time for the blitter to start up and
+	 * do its job for a full height blit
+	 */
+	if (full_height)
+		y2 -= 2;
+
+	kgem_set_mode(&sna->kgem, KGEM_BLT);
+	b = kgem_get_batch(&sna->kgem, 5);
+	/* The documentation says that the LOAD_SCAN_LINES command
+	 * always comes in pairs. Don't ask me why. */
+	b[2] = b[0] = MI_LOAD_SCAN_LINES_INCL | pipe << 20;
+	b[3] = b[1] = (y1 << 16) | (y2-1);
+	if (pipe == 0)
+		b[4] = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW;
+	else
+		b[4] = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW;
+	kgem_advance_batch(&sna->kgem, 5);
+}
+
 bool
 sna_wait_for_scanline(struct sna *sna,
 		      PixmapPtr pixmap,
@@ -2202,6 +2220,8 @@ sna_wait_for_scanline(struct sna *sna,
 
 	if (sna->kgem.gen >= 60)
 		sna_emit_wait_for_scanline_gen6(sna, pipe, y1, y2, full_height);
+	else if (sna->kgem.gen >= 40)
+		sna_emit_wait_for_scanline_gen4(sna, pipe, y1, y2, full_height);
 	else
 		sna_emit_wait_for_scanline_gen2(sna, pipe, y1, y2, full_height);
 
commit 6f2814db6f7b89e94e54b8d73c7e176ab7d1c469
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Mar 30 20:45:55 2012 +0100

    sna/traps: Align the pointer+index
    
    It's the location of the pixels within the row that matter for
    alignment!
    
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=47418
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
    Tested-by: Magnus Kessler <Magnus.Kessler at gmx.net>

diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index b6c5b65..2b4b2db 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -1490,42 +1490,42 @@ inplace_row(struct active_list *active, uint8_t *row, int width)
 
 			if (rix > ++lix) {
 				rix -= lix;
+				row += lix;
 #if 0
 				if (rix == 1)
-					row[lix] = 0xff;
+					*row = 0xff;
 				else
-					memset(row+lix, 0xff, rix);
+					memset(row, 0xff, rix);
 #else
 				if ((uintptr_t)row & 1 && rix) {
-					row[lix] = 0xff;
-					lix++;
+					*row++ = 0xff;
 					rix--;
 				}
 				if ((uintptr_t)row & 2 && rix >= 2) {
-					*(uint16_t *)(row+lix) = 0xffff;
-					lix += 2;
+					*(uint16_t *)row = 0xffff;
+					row += 2;
 					rix -= 2;
 				}
 				if ((uintptr_t)row & 4 && rix >= 4) {
-					*(uint32_t *)(row+lix) = 0xffffffff;
-					lix += 4;
+					*(uint32_t *)row = 0xffffffff;
+					row += 4;
 					rix -= 4;
 				}
 				while (rix >= 8) {
-					*(uint64_t *)(row+lix) = 0xffffffffffffffff;
-					lix += 8;
+					*(uint64_t *)row = 0xffffffffffffffff;
+					row += 8;
 					rix -= 8;
 				}
 				if (rix & 4) {
-					*(uint32_t *)(row+lix) = 0xffffffff;
-					lix += 4;
+					*(uint32_t *)row = 0xffffffff;
+					row += 4;
 				}
 				if (rix & 2) {
-					*(uint16_t *)(row+lix) = 0xffff;
-					lix += 2;
+					*(uint16_t *)row = 0xffff;
+					row += 2;
 				}
 				if (rix & 1)
-					row[lix] = 0xff;
+					*row = 0xff;
 #endif
 			}
 		}
commit ee075ced844350785685a0f93f88f1dc310bcc73
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Mar 30 19:09:30 2012 +0100

    sna/traps: Align the pointer not the indices
    
    Magnus found that inplace_row was still crashing on his setup when it
    tried to perform an 8-byte aligned write to an unaligned pointer. This
    time it looks like the row pointer itself was not 8-byte aligned, so
    instead of assuming that and fixing up the indices, ensure that the
    (index+row) results in an 8-byte aligned value.
    
    Reported-by: Magnus Kessler <Magnus.Kessler at gmx.net>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=47418
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 4067757..b6c5b65 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -1496,17 +1496,17 @@ inplace_row(struct active_list *active, uint8_t *row, int width)
 				else
 					memset(row+lix, 0xff, rix);
 #else
-				if (lix & 1 && rix) {
+				if ((uintptr_t)row & 1 && rix) {
 					row[lix] = 0xff;
 					lix++;
 					rix--;
 				}
-				if (lix & 2 && rix >= 2) {
+				if ((uintptr_t)row & 2 && rix >= 2) {
 					*(uint16_t *)(row+lix) = 0xffff;
 					lix += 2;
 					rix -= 2;
 				}
-				if (lix & 4 && rix >= 4) {
+				if ((uintptr_t)row & 4 && rix >= 4) {
 					*(uint32_t *)(row+lix) = 0xffffffff;
 					lix += 4;
 					rix -= 4;
commit fde8a010b3d9406c2f65ee99978360a6ca54e006
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Mar 30 12:47:21 2012 +0100

    uxa: Remove broken render glyphs-to-dst
    
    Reported-by: Vincent Untz <vuntz at gnome.org>
    Reported-by: Robert Bradford <robert.bradford at intel.com>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=48045
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/uxa/uxa-glyphs.c b/uxa/uxa-glyphs.c
index 6172f2f..b754f4e 100644
--- a/uxa/uxa-glyphs.c
+++ b/uxa/uxa-glyphs.c
@@ -663,190 +663,6 @@ uxa_glyph_cache(ScreenPtr screen, GlyphPtr glyph, int *out_x, int *out_y)
 	return cache->picture;
 }
 
-static int
-uxa_glyphs_to_dst(CARD8 op,
-		  PicturePtr pSrc,
-		  PicturePtr pDst,
-		  INT16 src_x, INT16 src_y,
-		  INT16 xDst, INT16 yDst,
-		  int nlist, GlyphListPtr list, GlyphPtr * glyphs,
-		  BoxPtr extents)
-{
-	ScreenPtr screen = pDst->pDrawable->pScreen;
-	uxa_screen_t *uxa_screen = uxa_get_screen(screen);
-	PixmapPtr src_pixmap, dst_pixmap;
-	PicturePtr localSrc, glyph_atlas;
-	int x, y, n;
-	BoxRec box;
-
-	if (uxa_screen->info->check_composite_texture &&
-	    uxa_screen->info->check_composite_texture(screen, pSrc)) {
-		if (pSrc->pDrawable) {
-			int src_off_x, src_off_y;
-
-			src_pixmap = uxa_get_offscreen_pixmap(pSrc->pDrawable, &src_off_x, &src_off_y);
-			if (src_pixmap == NULL)
-				return -1;
-
-			src_x += pSrc->pDrawable->x + src_off_x;
-			src_y += pSrc->pDrawable->y + src_off_y;
-		} else {
-			src_pixmap = NULL;
-		}
-		localSrc = pSrc;
-	} else {
-		int width, height;
-
-		if (extents == NULL) {
-			uxa_glyph_extents(nlist, list, glyphs, &box);
-			extents = &box;
-		}
-
-		width  = extents->x2 - extents->x1;
-		height = extents->y2 - extents->y1;
-		if (width == 0 || height == 0)
-			return 0;
-
-		if (pSrc->pDrawable) {
-			int src_off_x, src_off_y;
-
-			src_off_x = extents->x1 - xDst;
-			src_off_y = extents->y1 - yDst;
-			localSrc = uxa_acquire_drawable(screen, pSrc,
-							src_x + src_off_x, src_y + src_off_y,
-							width, height,
-							&src_x, &src_y);
-			if (uxa_screen->info->check_composite_texture &&
-			    !uxa_screen->info->check_composite_texture(screen, localSrc)) {
-				if (localSrc != pSrc)
-					FreePicture(localSrc, 0);
-				return -1;
-			}
-
-			src_pixmap = uxa_get_offscreen_pixmap(localSrc->pDrawable, &src_off_x, &src_off_y);
-			if (src_pixmap == NULL) {
-				if (localSrc != pSrc)
-					FreePicture(localSrc, 0);
-				return -1;
-			}
-
-			src_x += localSrc->pDrawable->x + src_off_x;
-			src_y += localSrc->pDrawable->y + src_off_y;
-		} else {
-			localSrc = uxa_acquire_pattern(screen, pSrc,
-						       PICT_a8r8g8b8, x, y, width, height);
-			if (!localSrc)
-				return 1;
-
-			src_pixmap = uxa_get_drawable_pixmap(localSrc->pDrawable);
-			if (src_pixmap == NULL) {
-				FreePicture(localSrc, 0);
-				return -1;
-			}
-
-			src_x = src_y = 0;
-		}
-	}
-
-	dst_pixmap = uxa_get_offscreen_pixmap(pDst->pDrawable, &x, &y);
-	x += xDst + pDst->pDrawable->x - list->xOff;
-	y += yDst + pDst->pDrawable->y - list->yOff;
-
-	glyph_atlas = NULL;
-	while (nlist--) {
-		x += list->xOff;
-		y += list->yOff;
-		n = list->len;
-		while (n--) {
-			GlyphPtr glyph = *glyphs++;
-			PicturePtr this_atlas;
-			int mask_x, mask_y, nrect;
-			struct uxa_glyph *priv;
-			BoxPtr rects;
-
-			if (glyph->info.width == 0 || glyph->info.height == 0)
-				goto next_glyph;
-
-			priv = uxa_glyph_get_private(glyph);
-			if (priv != NULL) {
-				mask_x = priv->x;
-				mask_y = priv->y;
-				this_atlas = priv->cache->picture;
-			} else {
-				if (glyph_atlas) {
-					uxa_screen->info->done_composite(dst_pixmap);
-					glyph_atlas = NULL;
-				}
-				this_atlas = uxa_glyph_cache(screen, glyph, &mask_x, &mask_y);
-				if (this_atlas == NULL) {
-					/* no cache for this glyph */
-					this_atlas = GlyphPicture(glyph)[screen->myNum];
-					mask_x = mask_y = 0;
-				}
-			}
-
-			if (this_atlas != glyph_atlas) {
-				PixmapPtr mask_pixmap;
-
-				if (glyph_atlas)
-					uxa_screen->info->done_composite(dst_pixmap);
-
-				mask_pixmap =
-					uxa_get_drawable_pixmap(this_atlas->pDrawable);
-				if (!uxa_pixmap_is_offscreen(mask_pixmap) ||
-				    !uxa_screen->info->prepare_composite(op,
-									 localSrc, this_atlas, pDst,
-									 src_pixmap, mask_pixmap, dst_pixmap))
-					return -1;
-
-				glyph_atlas = this_atlas;
-			}
-
-			rects = REGION_RECTS(pDst->pCompositeClip);
-			nrect = REGION_NUM_RECTS(pDst->pCompositeClip);
-			while (nrect--) {
-				int x1 = x - glyph->info.x, dx = 0;
-				int y1 = y - glyph->info.y, dy = 0;
-				int x2 = x1 + glyph->info.width;
-				int y2 = y1 + glyph->info.height;
-
-				if (rects->y1 >= y2)
-					break;
-
-				if (x1 < rects->x1)
-					dx = rects->x1 - x1, x1 = rects->x1;
-				if (x2 > rects->x2)
-					x2 = rects->x2;
-				if (y1 < rects->y1)
-					dy = rects->y1 - y1, y1 = rects->y1;
-				if (y2 > rects->y2)
-					y2 = rects->y2;
-
-				if (x1 < x2 && y1 < y2) {
-					uxa_screen->info->composite(dst_pixmap,
-								    x1 + src_x,  y1 + src_y,
-								    dx + mask_x, dy + mask_y,
-								    x1, y1,
-								    x2 - x1, y2 - y1);
-				}
-				rects++;
-			}
-
-next_glyph:
-			x += glyph->info.xOff;
-			y += glyph->info.yOff;
-		}
-		list++;
-	}
-	if (glyph_atlas)
-		uxa_screen->info->done_composite(dst_pixmap);
-
-	if (localSrc != pSrc)
-		FreePicture(localSrc, 0);
-
-	return 0;
-}
-
 static void
 uxa_clear_pixmap(ScreenPtr screen,
 		 uxa_screen_t *uxa_screen,
@@ -894,37 +710,30 @@ uxa_glyphs_via_mask(CARD8 op,
 		    PicturePtr pDst,
 		    PictFormatPtr maskFormat,
 		    INT16 xSrc, INT16 ySrc,
-		    INT16 xDst, INT16 yDst,
-		    int nlist, GlyphListPtr list, GlyphPtr * glyphs,
-		    BoxPtr extents)
+		    int nlist, GlyphListPtr list, GlyphPtr * glyphs)
 {
 	ScreenPtr screen = pDst->pDrawable->pScreen;
 	uxa_screen_t *uxa_screen = uxa_get_screen(screen);
 	CARD32 component_alpha;
 	PixmapPtr pixmap;
 	PicturePtr glyph_atlas, mask;
+	int xDst = list->xOff, yDst = list->yOff;
 	int x, y, width, height;
 	int dst_off_x, dst_off_y;
 	int n, error;
 	BoxRec box;
 
-	if (!extents) {
-		uxa_glyph_extents(nlist, list, glyphs, &box);
+	uxa_glyph_extents(nlist, list, glyphs, &box);
+	if (box.x2 <= box.x1 || box.y2 <= box.y1)
+		return 0;
 
-		if (box.x2 <= box.x1 || box.y2 <= box.y1)
-			return 0;
+	dst_off_x = box.x1;
+	dst_off_y = box.y1;
 
-		extents = &box;
-		dst_off_x = box.x1;
-		dst_off_y = box.y1;
-	} else {
-		dst_off_x = dst_off_y = 0;
-	}
-
-	width  = extents->x2 - extents->x1;
-	height = extents->y2 - extents->y1;
-	x = -extents->x1;
-	y = -extents->y1;
+	width  = box.x2 - box.x1;
+	height = box.y2 - box.y1;
+	x = -box.x1;
+	y = -box.y1;
 
 	if (maskFormat->depth == 1) {
 		PictFormatPtr a8Format =
@@ -1061,11 +870,6 @@ uxa_glyphs(CARD8 op,
 {
 	ScreenPtr screen = pDst->pDrawable->pScreen;
 	uxa_screen_t *uxa_screen = uxa_get_screen(screen);
-	int xDst = list->xOff, yDst = list->yOff;
-	BoxRec extents = { 0, 0, 0, 0 };
-	Bool have_extents = FALSE;
-	int width, height, ret;
-	PicturePtr localDst = pDst;
 
 	if (uxa_screen->info->flags & UXA_USE_GLAMOR) {
 		int ok;
@@ -1128,112 +932,12 @@ fallback:
 		}
 	}
 
-	if (!maskFormat &&
-	    uxa_screen->info->check_composite_target &&
-	    !uxa_screen->info->check_composite_target(uxa_get_drawable_pixmap(pDst->pDrawable))) {
-		int depth = pDst->pDrawable->depth;
-		PixmapPtr pixmap;
-		int x, y, error;
-		GCPtr gc;
-
-		pixmap = uxa_get_drawable_pixmap(pDst->pDrawable);
-		if (uxa_screen->info->check_copy &&
-		    !uxa_screen->info->check_copy(pixmap, pixmap, GXcopy, FB_ALLONES))
-			goto fallback;
-
-		uxa_glyph_extents(nlist, list, glyphs, &extents);
-
-		/* clip against dst bounds */
-		if (extents.x1 < 0)
-			extents.x1 = 0;
-		if (extents.y1 < 0)
-			extents.y1 = 0;
-		if (extents.x2 > pDst->pDrawable->width)
-			extents.x2 = pDst->pDrawable->width;
-		if (extents.y2 > pDst->pDrawable->height)
-			extents.y2 = pDst->pDrawable->height;
-
-		if (extents.x2 <= extents.x1 || extents.y2 <= extents.y1)
-			return;
-		width  = extents.x2 - extents.x1;
-		height = extents.y2 - extents.y1;
-		x = -extents.x1;
-		y = -extents.y1;
-		have_extents = TRUE;
-
-		xDst += x;
-		yDst += y;
-
-		pixmap = screen->CreatePixmap(screen,
-					      width, height, depth,
-					      CREATE_PIXMAP_USAGE_SCRATCH);
-		if (!pixmap)
-			return;
-
-		if (!uxa_pixmap_is_offscreen(pixmap)) {
-			screen->DestroyPixmap(pixmap);
-			goto fallback;
-		}
-
-		gc = GetScratchGC(depth, screen);
-		if (!gc) {
-			screen->DestroyPixmap(pixmap);
-			return;
-		}
-
-		ValidateGC(&pixmap->drawable, gc);
-		gc->ops->CopyArea(pDst->pDrawable, &pixmap->drawable, gc,
-				  extents.x1, extents.y1,
-				  width, height,
-				  0, 0);
-		FreeScratchGC(gc);
-
-		localDst = CreatePicture(0, &pixmap->drawable,
-					 PictureMatchFormat(screen, depth, pDst->format),
-					 0, 0, serverClient, &error);
-		screen->DestroyPixmap(pixmap);
-
-		if (!localDst)
-			return;
-
-		ValidatePicture(localDst);
-	}
-
-	if (maskFormat) {
-		ret = uxa_glyphs_via_mask(op,
-					  pSrc, localDst, maskFormat,
-					  xSrc, ySrc,
-					  xDst, yDst,
-					  nlist, list, glyphs,
-					  have_extents ? &extents : NULL);
-	} else {
-		ret = uxa_glyphs_to_dst(op,
-					pSrc, localDst,
-					xSrc, ySrc,
-					xDst, yDst,
-					nlist, list, glyphs,
-					have_extents ? &extents : NULL);
-	}
-	if (ret) {
-		if (localDst != pDst)
-			FreePicture(localDst, 0);
-
+	if (!maskFormat)
 		goto fallback;
-	}
 
-	if (localDst != pDst) {
-		GCPtr gc;
-
-		gc = GetScratchGC(pDst->pDrawable->depth, screen);
-		if (gc) {
-			ValidateGC(pDst->pDrawable, gc);
-			gc->ops->CopyArea(localDst->pDrawable, pDst->pDrawable, gc,
-					  0, 0,
-					  width, height,
-					  extents.x1, extents.y1);
-			FreeScratchGC(gc);
-		}
-
-		FreePicture(localDst, 0);
-	}
+	if (uxa_glyphs_via_mask(op,
+				pSrc, pDst, maskFormat,
+				xSrc, ySrc,
+				nlist, list, glyphs))
+		goto fallback;
 }
commit 451489b49916cf5a9d27844196f9656e590d9124
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Mar 30 10:21:26 2012 +0100

    sna/gen7: Allow per-device specific maxima
    
    As the maximum thread count and urb size differs between different
    incarnations of the GT units, be a little more flexible in programming
    those maximums.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 1491167..6917c21 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -67,6 +67,31 @@
 
 #define is_aligned(x, y) (((x) & ((y) - 1)) == 0)
 
+struct gt_info {
+	int max_vs_threads;
+	int max_gs_threads;
+	int max_wm_threads;
+	struct {
+		int size;
+		int max_vs_entries;
+		int max_gs_entries;
+	} urb;
+};
+
+static const struct gt_info gt1_info = {
+	.max_vs_threads = 36,
+	.max_gs_threads = 36,
+	.max_wm_threads = 86,
+	.urb = { 128, 512, 192 },
+};
+
+static const struct gt_info gt2_info = {
+	.max_vs_threads = 128,
+	.max_gs_threads = 128,
+	.max_wm_threads = 86,
+	.urb = { 256, 704, 320 },
+};
+
 static const uint32_t ps_kernel_nomask_affine[][4] = {
 #include "exa_wm_src_affine.g7b"
 #include "exa_wm_src_sample_argb.g7b"
@@ -427,7 +452,7 @@ gen7_emit_urb(struct sna *sna)
 
 	/* num of VS entries must be divisible by 8 if size < 9 */
 	OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2));
-	OUT_BATCH((32 << GEN7_URB_ENTRY_NUMBER_SHIFT) | /* at least 32 */
+	OUT_BATCH((sna->render_state.gen7.info->urb.max_vs_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
 		  (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
 		  (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
 
@@ -802,7 +827,7 @@ gen7_emit_wm(struct sna *sna, unsigned int kernel, int nr_surfaces, int nr_input
 	OUT_BATCH(1 << GEN7_PS_SAMPLER_COUNT_SHIFT |
 		  nr_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
 	OUT_BATCH(0); /* scratch address */
-	OUT_BATCH((86 - 1) << GEN7_PS_MAX_THREADS_SHIFT |
+	OUT_BATCH((sna->render_state.gen7.info->max_wm_threads - 1) << GEN7_PS_MAX_THREADS_SHIFT |
 		  GEN7_PS_ATTRIBUTE_ENABLE |
 		  GEN7_PS_16_DISPATCH_ENABLE);
 	OUT_BATCH(6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
@@ -4292,6 +4317,10 @@ static Bool gen7_render_setup(struct sna *sna)
 	struct gen7_sampler_state *ss;
 	int i, j, k, l, m;
 
+	state->info = &gt1_info;
+	if (DEVICE_ID(sna->PciInfo) & 0x20)
+		state->info = &gt2_info;
+
 	sna_static_stream_init(&general);
 
 	/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
diff --git a/src/sna/sna.h b/src/sna/sna.h
index 8ad8efc..7a1e2f6 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -257,7 +257,6 @@ struct sna {
 #define SNA_TILING_3D		0x4
 #define SNA_TILING_ALL (~0)
 
-	int Chipset;
 	EntityInfoPtr pEnt;
 	struct pci_device *PciInfo;
 	struct intel_chipset chipset;
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index a83f78e..73ef568 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -428,6 +428,7 @@ enum {
 };
 
 struct gen7_render_state {
+	const struct gt_info *info;
 	struct kgem_bo *general_bo;
 
 	uint32_t vs_state;
commit ea36f2c4a3fa9afa8184eeaf944af9924c080368
Author: Eugeni Dodonov <eugeni.dodonov at intel.com>
Date:   Thu Mar 29 21:08:29 2012 -0300

    Add support for Ivy Bridge GT2 Server chipset
    
    Sometimes known as Bromlow.
    
    Signed-off-by: Eugeni Dodonov <eugeni.dodonov at intel.com>
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/intel_driver.h b/src/intel_driver.h
index e9d6d9e..98973e5 100644
--- a/src/intel_driver.h
+++ b/src/intel_driver.h
@@ -190,6 +190,7 @@
 #define PCI_CHIP_IVYBRIDGE_D_GT1	0x0152
 #define PCI_CHIP_IVYBRIDGE_D_GT2	0x0162
 #define PCI_CHIP_IVYBRIDGE_S_GT1	0x015a
+#define PCI_CHIP_IVYBRIDGE_S_GT2	0x016a
 
 #endif
 
diff --git a/src/intel_module.c b/src/intel_module.c
index 2c0e5cc..c6f94f5 100644
--- a/src/intel_module.c
+++ b/src/intel_module.c
@@ -142,6 +142,7 @@ static const SymTabRec _intel_chipsets[] = {
 	{PCI_CHIP_IVYBRIDGE_D_GT1,		"Ivybridge Desktop (GT1)" },
 	{PCI_CHIP_IVYBRIDGE_D_GT2,		"Ivybridge Desktop (GT2)" },
 	{PCI_CHIP_IVYBRIDGE_S_GT1,		"Ivybridge Server" },
+	{PCI_CHIP_IVYBRIDGE_S_GT2,		"Ivybridge Server (GT2)" },
 	{-1,					NULL}
 };
 #define NUM_CHIPSETS (sizeof(_intel_chipsets) / sizeof(_intel_chipsets[0]))
@@ -210,6 +211,7 @@ static const struct pci_id_match intel_device_match[] = {
 	INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_D_GT1, &intel_ivybridge_info ),
 	INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_D_GT2, &intel_ivybridge_info ),
 	INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_S_GT1, &intel_ivybridge_info ),
+	INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_S_GT2, &intel_ivybridge_info ),
 
 	{ 0, 0, 0 },
 };
commit 6142232fa0feeb39412cda85ca727cc770eaa042
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Mar 28 18:59:26 2012 +0100

    sna: Add video sprite support for ILK+
    
    Based on the work by Jesse Barnes.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/Makefile.am b/src/sna/Makefile.am
index 70afd53..911a857 100644
--- a/src/sna/Makefile.am
+++ b/src/sna/Makefile.am
@@ -60,6 +60,7 @@ libsna_la_SOURCES = \
 	sna_video.c \
 	sna_video.h \
 	sna_video_overlay.c \
+	sna_video_sprite.c \
 	sna_video_textured.c \
 	gen2_render.c \
 	gen2_render.h \
diff --git a/src/sna/sna.h b/src/sna/sna.h
index 949c18f..8ad8efc 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -371,6 +371,7 @@ void sna_dri_close(struct sna *sna, ScreenPtr pScreen);
 
 extern Bool sna_crtc_on(xf86CrtcPtr crtc);
 int sna_crtc_to_pipe(xf86CrtcPtr crtc);
+int sna_crtc_to_plane(xf86CrtcPtr crtc);
 
 CARD32 sna_format_for_depth(int depth);
 CARD32 sna_render_format_for_depth(int depth);
diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
index a5d69dd..ecfe670 100644
--- a/src/sna/sna_display.c
+++ b/src/sna/sna_display.c
@@ -60,6 +60,7 @@ struct sna_crtc {
 	int num;
 	int id;
 	int pipe;
+	int plane;
 	int active;
 	struct list link;
 };
@@ -144,6 +145,12 @@ int sna_crtc_to_pipe(xf86CrtcPtr crtc)
 	return sna_crtc->pipe;
 }
 
+int sna_crtc_to_plane(xf86CrtcPtr crtc)
+{
+	struct sna_crtc *sna_crtc = crtc->driver_private;
+	return sna_crtc->plane;
+}
+
 static uint32_t gem_create(int fd, int size)
 {
 	struct drm_i915_gem_create create;
@@ -852,6 +859,37 @@ static const xf86CrtcFuncsRec sna_crtc_funcs = {
 	.destroy = sna_crtc_destroy,
 };
 
+static uint32_t
+sna_crtc_find_plane(struct sna *sna, int pipe)
+{
+	drmModePlaneRes *resources;
+	uint32_t id = 0;
+	int i;
+
+	resources = drmModeGetPlaneResources(sna->kgem.fd);
+	if (!resources) {
+		xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR,
+			   "failed to get plane resources: %s\n",
+			   strerror(errno));
+		return 0;
+	}
+
+	for (i = 0; id == 0 && i < resources->count_planes; i++) {
+		drmModePlane *p;
+
+		p = drmModeGetPlane(sna->kgem.fd, resources->planes[i]);
+		if (p) {
+			if (p->possible_crtcs & (1 << pipe))
+				id = p->plane_id;
+
+			drmModeFreePlane(p);
+		}
+	}
+
+	free(resources);
+	return id;
+}
+
 static void
 sna_crtc_init(ScrnInfoPtr scrn, struct sna_mode *mode, int num)
 {
@@ -878,6 +916,7 @@ sna_crtc_init(ScrnInfoPtr scrn, struct sna_mode *mode, int num)
 		 DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID,
 		 &get_pipe);
 	sna_crtc->pipe = get_pipe.pipe;
+	sna_crtc->plane = sna_crtc_find_plane(sna, sna_crtc->pipe);
 
 	if (xf86IsEntityShared(scrn->entityList[0]) &&
 	    scrn->confScreen->device->screen != sna_crtc->pipe) {
diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c
index 56cf260..c80ccfb 100644
--- a/src/sna/sna_video.c
+++ b/src/sna/sna_video.c
@@ -83,12 +83,17 @@ void sna_video_free_buffers(struct sna *sna, struct sna_video *video)
 
 	for (i = 0; i < ARRAY_SIZE(video->old_buf); i++) {
 		if (video->old_buf[i]) {
+			if (video->old_buf[i]->unique_id)
+				drmModeRmFB(sna->kgem.fd,
+						video->old_buf[i]->unique_id);
 			kgem_bo_destroy(&sna->kgem, video->old_buf[i]);
 			video->old_buf[i] = NULL;
 		}
 	}
 
 	if (video->buf) {
+		if (video->buf->unique_id)
+			drmModeRmFB(sna->kgem.fd, video->buf->unique_id);
 		kgem_bo_destroy(&sna->kgem, video->buf);
 		video->buf = NULL;
 	}
@@ -440,6 +445,11 @@ sna_video_copy_data(struct sna *sna,
 	if (frame->bo == NULL)
 		return FALSE;
 
+	DBG(("%s: handle=%d, size=%dx%d, rotation=%d\n",
+	     __FUNCTION__, frame->bo->handle, frame->width, frame->height,
+	     video->rotation));
+	DBG(("%s: top=%d, left=%d\n", __FUNCTION__, frame->top, frame->left));
+
 	/* In the common case, we can simply the upload in a single pwrite */
 	if (video->rotation == RR_Rotate_0) {
 		if (is_planar_fourcc(frame->id)) {
@@ -472,8 +482,8 @@ sna_video_copy_data(struct sna *sna,
 		}
 	}
 
-	/* copy data */
-	dst = kgem_bo_map(&sna->kgem, frame->bo);
+	/* copy data, must use GTT so that we keep the overlay uncached */
+	dst = kgem_bo_map__gtt(&sna->kgem, frame->bo);
 	if (dst == NULL)
 		return FALSE;
 
@@ -510,7 +520,9 @@ void sna_video_init(struct sna *sna, ScreenPtr screen)
 	 * supported hardware.
 	 */
 	textured = sna_video_textured_setup(sna, screen);
-	overlay = sna_video_overlay_setup(sna, screen);
+	overlay = sna_video_sprite_setup(sna, screen);
+	if (overlay == NULL)
+		overlay = sna_video_overlay_setup(sna, screen);
 
 	if (overlay && prefer_overlay)
 		adaptors[num_adaptors++] = overlay;
diff --git a/src/sna/sna_video.h b/src/sna/sna_video.h
index 47ddab0..687fbe1 100644
--- a/src/sna/sna_video.h
+++ b/src/sna/sna_video.h
@@ -51,6 +51,7 @@ struct sna_video {
 	uint32_t gamma5;
 
 	int color_key;
+	int color_key_changed;
 
 	/** YUV data buffers */
 	struct kgem_bo *old_buf[2];
@@ -58,6 +59,7 @@ struct sna_video {
 
 	Bool textured;
 	Rotation rotation;
+	int plane;
 
 	int SyncToVblank;	/* -1: auto, 0: off, 1: on */
 };
@@ -78,10 +80,9 @@ struct sna_video_frame {
 };
 
 void sna_video_init(struct sna *sna, ScreenPtr screen);
-XF86VideoAdaptorPtr sna_video_overlay_setup(struct sna *sna,
-					    ScreenPtr screen);
-XF86VideoAdaptorPtr sna_video_textured_setup(struct sna *sna,
-					     ScreenPtr screen);
+XF86VideoAdaptorPtr sna_video_overlay_setup(struct sna *sna, ScreenPtr screen);
+XF86VideoAdaptorPtr sna_video_sprite_setup(struct sna *sna, ScreenPtr screen);
+XF86VideoAdaptorPtr sna_video_textured_setup(struct sna *sna, ScreenPtr screen);
 
 #define FOURCC_XVMC     (('C' << 24) + ('M' << 16) + ('V' << 8) + 'X')
 
diff --git a/src/sna/sna_video_sprite.c b/src/sna/sna_video_sprite.c
new file mode 100644
index 0000000..82db122
--- /dev/null
+++ b/src/sna/sna_video_sprite.c
@@ -0,0 +1,434 @@
+/***************************************************************************
+
+ Copyright 2000-2011 Intel Corporation.  All Rights Reserved.
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sub license, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial portions
+ of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ IN NO EVENT SHALL INTEL, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "sna.h"
+#include "sna_video.h"
+
+#include <xf86xv.h>
+#include <X11/extensions/Xv.h>
+#include <fourcc.h>
+#include <drm_fourcc.h>
+#include <i915_drm.h>
+
+#if DEBUG_VIDEO_OVERLAY
+#undef DBG
+#define DBG(x) ErrorF x
+#endif
+
+#define IMAGE_MAX_WIDTH		2048
+#define IMAGE_MAX_HEIGHT	2048
+
+#define MAKE_ATOM(a) MakeAtom(a, sizeof(a) - 1, TRUE)
+
+static Atom xvColorKey;
+
+static XF86VideoFormatRec xv_formats[] = {
+	{15, TrueColor}, {16, TrueColor}, {24, TrueColor}
+};
+static XF86ImageRec xv_images[] = { XVIMAGE_YUY2, XVIMAGE_UYVY, };
+static const XF86VideoEncodingRec xv_dummy_encoding[] = {
+	{ 0, "XV_IMAGE", IMAGE_MAX_WIDTH, IMAGE_MAX_HEIGHT, {1, 1} }
+};
+static XF86AttributeRec attribs[] = {
+	{XvSettable | XvGettable, 0, 0xffffff, "XV_COLORKEY"},
+};
+
+static void sna_video_sprite_off(struct sna *sna, struct sna_video *video)
+{
+	if (video->plane == 0)
+		return;
+
+	if (drmModeSetPlane(sna->kgem.fd,
+			    video->plane, 0, 0, 0,
+			    0, 0, 0, 0,
+			    0, 0, 0, 0))
+		xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR,
+			   "failed to disable plane\n");
+
+	video->plane = 0;
+}
+
+static void sna_video_sprite_stop(ScrnInfoPtr scrn, pointer data, Bool shutdown)
+{
+	return sna_video_sprite_off(to_sna(scrn), data);
+}
+
+static int sna_video_sprite_set_attr(ScrnInfoPtr scrn,
+				     Atom attribute, INT32 value,
+				     pointer data)
+{
+	struct sna_video *video = data;
+
+	if (attribute == xvColorKey) {
+		video->color_key_changed = TRUE;
+		video->color_key = value;
+		DBG(("COLORKEY = %d\n", value));
+	} else
+		return BadMatch;
+
+	return Success;
+}
+
+static int sna_video_sprite_get_attr(ScrnInfoPtr scrn,
+				     Atom attribute, INT32 *value,
+				     pointer data)
+{
+	struct sna_video *video = data;
+
+	if (attribute == xvColorKey)
+		*value = video->color_key;
+	else
+		return BadMatch;
+
+	return Success;
+}
+
+static void sna_video_sprite_best_size(ScrnInfoPtr scrn, Bool motion,
+				       short vid_w, short vid_h,
+				       short drw_w, short drw_h,
+				       unsigned int *p_w, unsigned int *p_h,
+				       pointer data)
+{
+	*p_w = vid_w;
+	*p_h = vid_h;
+}
+
+static void
+update_dst_box_to_crtc_coords(struct sna *sna, xf86CrtcPtr crtc, BoxPtr dstBox)
+{
+	ScrnInfoPtr scrn = sna->scrn;
+	int tmp;
+
+	switch (crtc->rotation & 0xf) {
+	case RR_Rotate_0:
+		dstBox->x1 -= crtc->x;
+		dstBox->x2 -= crtc->x;
+		dstBox->y1 -= crtc->y;
+		dstBox->y2 -= crtc->y;
+		break;
+
+	case RR_Rotate_90:
+		tmp = dstBox->x1;
+		dstBox->x1 = dstBox->y1 - crtc->x;
+		dstBox->y1 = scrn->virtualX - tmp - crtc->y;
+		tmp = dstBox->x2;
+		dstBox->x2 = dstBox->y2 - crtc->x;
+		dstBox->y2 = scrn->virtualX - tmp - crtc->y;
+		tmp = dstBox->y1;
+		dstBox->y1 = dstBox->y2;
+		dstBox->y2 = tmp;
+		break;
+
+	case RR_Rotate_180:
+		tmp = dstBox->x1;
+		dstBox->x1 = scrn->virtualX - dstBox->x2 - crtc->x;
+		dstBox->x2 = scrn->virtualX - tmp - crtc->x;
+		tmp = dstBox->y1;
+		dstBox->y1 = scrn->virtualY - dstBox->y2 - crtc->y;
+		dstBox->y2 = scrn->virtualY - tmp - crtc->y;
+		break;
+
+	case RR_Rotate_270:
+		tmp = dstBox->x1;
+		dstBox->x1 = scrn->virtualY - dstBox->y1 - crtc->x;
+		dstBox->y1 = tmp - crtc->y;
+		tmp = dstBox->x2;
+		dstBox->x2 = scrn->virtualY - dstBox->y2 - crtc->x;
+		dstBox->y2 = tmp - crtc->y;
+		tmp = dstBox->x1;
+		dstBox->x1 = dstBox->x2;
+		dstBox->x2 = tmp;
+		break;
+	}
+}
+
+static Bool
+sna_video_sprite_show(struct sna *sna,
+		      struct sna_video *video,
+		      struct sna_video_frame *frame,
+		      xf86CrtcPtr crtc,
+		      BoxPtr dstBox)
+{
+	int plane = sna_crtc_to_plane(crtc);
+
+	update_dst_box_to_crtc_coords(sna, crtc, dstBox);
+	if (crtc->rotation & (RR_Rotate_90 | RR_Rotate_270)) {
+		int tmp = frame->width;
+		frame->width = frame->height;
+		frame->height = tmp;
+	}
+
+#if defined(DRM_I915_SET_SPRITE_DESTKEY)
+	if (video->color_key_changed || video->plane != plane) {
+		struct drm_intel_set_sprite_destkey set;
+
+		DBG(("%s: updating color key: %x\n",
+		     __FUNCTION__, video->color_key));
+
+		set.plane_id = plane;
+		set.value = video->color_key;
+
+		if (drmCommandWrite(sna->kgem.fd,
+				    DRM_I915_SET_SPRITE_DESTKEY,
+				    &set, sizeof(set)))
+			xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR,
+				   "failed to update color key\n");
+
+		video->color_key_changed = FALSE;
+	}
+#endif
+
+	if (frame->bo->unique_id == 0) {
+		uint32_t offsets[4], pitches[4], handles[4];
+		uint32_t pixel_format;
+
+		switch (frame->id) {
+		case FOURCC_UYVY:
+			pixel_format = DRM_FORMAT_UYVY;
+			break;
+		case FOURCC_YUY2:
+		default:
+			pixel_format = DRM_FORMAT_YUYV;
+			break;
+		}
+
+		handles[0] = frame->bo->handle;
+		pitches[0] = frame->pitch[0];
+		offsets[0] = 0;
+
+		DBG(("%s: creating new fb for handle=%d\n",
+		     __FUNCTION__, frame->bo->handle));
+
+		if (drmModeAddFB2(sna->kgem.fd,
+				  frame->width, frame->height, pixel_format,
+				  handles, pitches, offsets,
+				  &frame->bo->unique_id, 0)) {
+			xf86DrvMsg(sna->scrn->scrnIndex,
+				   X_ERROR, "failed to add fb\n");
+			return false;
+		}
+	}
+
+	DBG(("%s: updating plane=%d, handle=%d [fb %d], dst=(%d,%d)x(%d,%d)\n",
+	     __FUNCTION__, plane, frame->bo->handle, frame->bo->unique_id,
+	     dstBox->x1, dstBox->y1,
+	     dstBox->x2 - dstBox->x1, dstBox->y2 - dstBox->y1));
+	if (drmModeSetPlane(sna->kgem.fd,
+			    plane, sna_crtc_id(crtc), frame->bo->unique_id, 0,
+			    dstBox->x1, dstBox->y1,
+			    dstBox->x2 - dstBox->x1, dstBox->y2 - dstBox->y1,
+			    0, 0, frame->width << 16, frame->height << 16))
+		return false;
+
+	video->plane = plane;
+	return true;
+}
+
+static int sna_video_sprite_put_image(ScrnInfoPtr scrn,
+				      short src_x, short src_y,
+				      short drw_x, short drw_y,
+				      short src_w, short src_h,
+				      short drw_w, short drw_h,
+				      int id, unsigned char *buf,
+				      short width, short height,
+				      Bool sync, RegionPtr clip, pointer data,
+				      DrawablePtr drawable)
+{
+	struct sna *sna = to_sna(scrn);
+	struct sna_video *video = data;
+	struct sna_video_frame frame;
+	xf86CrtcPtr crtc;
+	BoxRec dst_box;
+
+	sna_video_frame_init(sna, video, id, width, height, &frame);
+
+	if (!sna_video_clip_helper(scrn, video, &frame, &crtc, &dst_box,
+				   src_x, src_y, drw_x, drw_y,
+				   src_w, src_h, drw_w, drw_h,
+				   clip))
+		return Success;
+
+	if (!crtc || !sna_crtc_to_plane(crtc)) {
+		/* If the video isn't visible on any CRTC, turn it off */
+		sna_video_sprite_off(sna, video);
+		return Success;
+	}
+
+	/* sprites can't handle rotation natively, store it for the copy func */
+	video->rotation = crtc->rotation;
+
+	frame.bo = sna_video_buffer(sna, video, &frame);
+	if (frame.bo == NULL) {
+		DBG(("%s: failed to allocate video bo\n", __FUNCTION__));
+		return BadAlloc;
+	}
+
+	if (!sna_video_copy_data(sna, video, &frame, buf)) {
+		DBG(("%s: failed to copy video data\n", __FUNCTION__));
+		return BadAlloc;
+	}
+
+	if (!sna_video_sprite_show(sna, video, &frame, crtc, &dst_box)) {
+		DBG(("%s: failed to show video frame\n", __FUNCTION__));
+		return BadAlloc;
+	}
+
+	sna_video_buffer_fini(sna, video);
+
+	if (!REGION_EQUAL(scrn->pScreen, &video->clip, clip)) {
+		REGION_COPY(scrn->pScreen, &video->clip, clip);
+		xf86XVFillKeyHelperDrawable(drawable, video->color_key, clip);
+	}
+
+	return Success;
+}
+
+static int sna_video_sprite_query_attrs(ScrnInfoPtr scrn, int id,
+					unsigned short *w, unsigned short *h,
+					int *pitches, int *offsets)
+{
+	int size;
+
+	if (*w > IMAGE_MAX_WIDTH)
+		*w = IMAGE_MAX_WIDTH;
+	if (*h > IMAGE_MAX_HEIGHT)
+		*h = IMAGE_MAX_HEIGHT;
+
+	*w = (*w + 1) & ~1;
+	if (offsets)
+		offsets[0] = 0;
+
+	switch (id) {
+	case FOURCC_YUY2:
+	default:
+		size = *w << 1;
+		if (pitches)
+			pitches[0] = size;
+		size *= *h;
+		break;
+	}
+
+	return size;
+}
+
+static int sna_video_sprite_color_key(struct sna *sna)
+{
+	ScrnInfoPtr scrn = sna->scrn;
+	int color_key;
+
+	if (xf86GetOptValInteger(sna->Options, OPTION_VIDEO_KEY,
+				 &color_key)) {
+	} else if (xf86GetOptValInteger(sna->Options, OPTION_COLOR_KEY,
+					&color_key)) {
+	} else {
+		color_key =
+		    (1 << scrn->offset.red) |
+		    (1 << scrn->offset.green) |
+		    (((scrn->mask.blue >> scrn->offset.blue) - 1) << scrn->offset.blue);
+	}
+
+	return color_key & ((1 << scrn->depth) - 1);
+}
+
+XF86VideoAdaptorPtr sna_video_sprite_setup(struct sna *sna,
+					   ScreenPtr screen)
+{
+	XF86VideoAdaptorPtr adaptor;
+	struct sna_video *video;
+	drmModePlaneRes *plane_resources;
+
+	plane_resources = drmModeGetPlaneResources(sna->kgem.fd);
+	if (!plane_resources)
+		return NULL;
+
+	adaptor = calloc(1,
+			 sizeof(XF86VideoAdaptorRec) +
+			 sizeof(struct sna_video) +
+			 sizeof(DevUnion));
+	if (!adaptor) {
+		free(plane_resources);
+		return NULL;
+	}
+
+	adaptor->type = XvWindowMask | XvInputMask | XvImageMask;
+	adaptor->flags = VIDEO_OVERLAID_IMAGES /*| VIDEO_CLIP_TO_VIEWPORT */ ;
+	adaptor->name = "Intel(R) Video Sprite";
+	adaptor->nEncodings = ARRAY_SIZE(xv_dummy_encoding);
+	adaptor->pEncodings = xnfalloc(sizeof(xv_dummy_encoding));
+	memcpy(adaptor->pEncodings, xv_dummy_encoding, sizeof(xv_dummy_encoding));
+	adaptor->nFormats = ARRAY_SIZE(xv_formats);
+	adaptor->pFormats = xv_formats;
+	adaptor->nPorts = 1;
+	adaptor->pPortPrivates = (DevUnion *)&adaptor[1];
+
+	video = (struct sna_video *)&adaptor->pPortPrivates[1];
+	adaptor->pPortPrivates[0].ptr = video;
+
+	adaptor->nAttributes = ARRAY_SIZE(attribs);
+	adaptor->pAttributes = attribs;
+
+	adaptor->nImages = ARRAY_SIZE(xv_images);
+	adaptor->pImages = xv_images;
+
+	adaptor->PutVideo = NULL;
+	adaptor->PutStill = NULL;
+	adaptor->GetVideo = NULL;
+	adaptor->GetStill = NULL;
+	adaptor->StopVideo = sna_video_sprite_stop;
+	adaptor->SetPortAttribute = sna_video_sprite_set_attr;
+	adaptor->GetPortAttribute = sna_video_sprite_get_attr;
+	adaptor->QueryBestSize = sna_video_sprite_best_size;
+	adaptor->PutImage = sna_video_sprite_put_image;
+	adaptor->QueryImageAttributes = sna_video_sprite_query_attrs;
+
+	video->textured = FALSE;
+	video->color_key = sna_video_sprite_color_key(sna);
+	video->color_key_changed = TRUE;
+	video->brightness = -19;	/* (255/219) * -16 */
+	video->contrast = 75;	/* 255/219 * 64 */
+	video->saturation = 146;	/* 128/112 * 128 */
+	video->desired_crtc = NULL;
+	video->gamma5 = 0xc0c0c0;
+	video->gamma4 = 0x808080;
+	video->gamma3 = 0x404040;
+	video->gamma2 = 0x202020;
+	video->gamma1 = 0x101010;
+	video->gamma0 = 0x080808;
+
+	video->rotation = RR_Rotate_0;
+
+	REGION_NULL(screen, &video->clip);
+
+	xvColorKey = MAKE_ATOM("XV_COLORKEY");
+
+	free(plane_resources);
+
+	return adaptor;
+}
commit ae8aa172a7330439a8e6dda41f5e33eb257a139b
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Mar 28 22:07:10 2012 +0100

    sna: Fix up 32-bit overflow for maximum object size calculation
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 964c6e9..27fa165 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -84,7 +84,7 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
 #if defined(USE_VMAP) && !defined(I915_PARAM_HAS_VMAP)
 #define DRM_I915_GEM_VMAP       0x2c
 #define DRM_IOCTL_I915_GEM_VMAP DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_VMAP, struct drm_i915_gem_vmap)
-#define I915_PARAM_HAS_VMAP              18
+#define I915_PARAM_HAS_VMAP              19
 struct drm_i915_gem_vmap {
 	uint64_t user_ptr;
 	uint32_t user_size;
@@ -690,7 +690,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
 		 * disable dual-stream mode */
 		kgem->min_alignment = 64;
 
-	kgem->max_object_size = 2 * kgem->aperture_total / 3;
+	kgem->max_object_size = 2 * aperture.aper_size / 3;
 	kgem->max_gpu_size = kgem->max_object_size;
 	if (!kgem->has_llc)
 		kgem->max_gpu_size = MAX_CACHE_SIZE;
@@ -741,9 +741,11 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
 	} else
 		kgem->max_cpu_size = 0;
 
+	DBG(("%s: maximum object size=%d\n",
+	     __FUNCTION__, kgem->max_object_size));
 	DBG(("%s: large object thresold=%d\n",
 	     __FUNCTION__, kgem->large_object_size));
-	DBG(("%s: max object size (gpu=%d, cpu=%d, tile upload=%d, copy=%d)\n",
+	DBG(("%s: max object sizes (gpu=%d, cpu=%d, tile upload=%d, copy=%d)\n",
 	     __FUNCTION__,
 	     kgem->max_gpu_size, kgem->max_cpu_size,
 	     kgem->max_upload_tile_size, kgem->max_copy_tile_size));
@@ -2392,11 +2394,16 @@ unsigned kgem_can_create_2d(struct kgem *kgem,
 	uint32_t pitch, size;
 	unsigned flags = 0;
 
-	if (depth < 8)
+	if (depth < 8) {
+		DBG(("%s: unhandled depth %d\n", __FUNCTION__, depth));
 		return 0;
+	}
 
-	if (width > MAXSHORT || height > MAXSHORT)
+	if (width > MAXSHORT || height > MAXSHORT) {
+		DBG(("%s: unhandled size %dx%d\n",
+		     __FUNCTION__, width, height));
 		return 0;
+	}
 
 	size = kgem_surface_size(kgem, false, false,
 				 width, height, bpp,
@@ -2405,8 +2412,11 @@ unsigned kgem_can_create_2d(struct kgem *kgem,
 		flags |= KGEM_CAN_CREATE_CPU | KGEM_CAN_CREATE_GPU;
 	if (size > kgem->large_object_size)
 		flags |= KGEM_CAN_CREATE_LARGE;
-	if (size > kgem->max_object_size)
+	if (size > kgem->max_object_size) {
+		DBG(("%s: too large (untiled) %d > %d\n",
+		     __FUNCTION__, size, kgem->max_object_size));
 		return 0;
+	}
 
 	size = kgem_surface_size(kgem, false, false,
 				 width, height, bpp,
@@ -2417,8 +2427,11 @@ unsigned kgem_can_create_2d(struct kgem *kgem,
 		flags |= KGEM_CAN_CREATE_GPU;
 	if (size > kgem->large_object_size)
 		flags |= KGEM_CAN_CREATE_LARGE;
-	if (size > kgem->max_object_size)
+	if (size > kgem->max_object_size) {
+		DBG(("%s: too large (tiled) %d > %d\n",
+		     __FUNCTION__, size, kgem->max_object_size));
 		return 0;
+	}
 
 	return flags;
 }


More information about the xorg-commit mailing list