xf86-video-intel: 6 commits - src/sna/gen6_render.c src/sna/gen7_render.c src/sna/kgem.c src/sna/kgem.h src/sna/sna_accel.c src/sna/sna_render_inline.h src/sna/sna_trapezoids.c

Chris Wilson ickle at kemper.freedesktop.org
Sun Dec 16 02:06:42 PST 2012


 src/sna/gen6_render.c       |   39 +++++++++++++++++++++++++++------------
 src/sna/gen7_render.c       |   42 ++++++++++++++++++++++++++++++------------
 src/sna/kgem.c              |   18 +++++++++++-------
 src/sna/kgem.h              |    6 +++++-
 src/sna/sna_accel.c         |    9 ++++++++-
 src/sna/sna_render_inline.h |   13 +++++++++----
 src/sna/sna_trapezoids.c    |    2 +-
 7 files changed, 91 insertions(+), 38 deletions(-)

New commits:
commit 6490585f65bde487da7bc41fa5cb1c5a028d0bf4
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Dec 15 23:26:30 2012 +0000

    sna: Do not force use of the GPU for a copy from a SHM pixmap
    
    As we will undoubtably flush and sync upon the SHM request very shortly
    afterwards, we only want to use the GPU for the SHM upload iff it is
    currently busy.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index d2f132c..23acf30 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -4258,7 +4258,7 @@ sna_self_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 	if (dst != src)
 		get_drawable_deltas(dst, pixmap, &tx, &ty);
 
-	if (priv == NULL || DAMAGE_IS_ALL(priv->cpu_damage))
+	if (priv == NULL || DAMAGE_IS_ALL(priv->cpu_damage) || priv->shm)
 		goto fallback;
 
 	if (priv->gpu_damage) {
@@ -4563,6 +4563,13 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 
 			assert(bo != dst_priv->cpu_bo);
 
+			if (src_priv->shm &&
+			    alu == GXcopy &&
+			    DAMAGE_IS_ALL(src_priv->cpu_damage) &&
+			    !__kgem_bo_is_busy(&sna->kgem, src_priv->cpu_bo) &&
+			    !__kgem_bo_is_busy(&sna->kgem, bo))
+				goto fallback;
+
 			RegionTranslate(region, src_dx, src_dy);
 			ret = sna_drawable_move_region_to_cpu(&src_pixmap->drawable,
 							      region,
commit 3a08f091875f2f0f49697ba9852077094b3a704b
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Dec 15 22:53:44 2012 +0000

    sna/gen6+: Tweak prefer-blt-bo
    
    Split the decision between where it is imperative to use the BLT to
    avoid TLB misses and the second case where it is merely preferential to
    witch.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index c5ddba5..970e943 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -2423,12 +2423,12 @@ static inline bool untiled_tlb_miss(struct kgem_bo *bo)
 	return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096;
 }
 
-static bool prefer_blt_bo(struct sna *sna, struct kgem_bo *bo)
+static int prefer_blt_bo(struct sna *sna, struct kgem_bo *bo)
 {
-	if (RQ_IS_BLT(bo->rq))
-		return true;
+	if (bo->rq)
+		return RQ_IS_BLT(bo->rq) ? 1 : -1;
 
-	return untiled_tlb_miss(bo) && bo->pitch < MAXSHORT;
+	return bo->tiling == I915_TILING_NONE;
 }
 
 static bool
@@ -2670,11 +2670,15 @@ prefer_blt_composite(struct sna *sna, struct sna_composite_op *tmp)
 	if (sna->kgem.ring == KGEM_BLT)
 		return true;
 
+	if (untiled_tlb_miss(tmp->dst.bo) ||
+	    untiled_tlb_miss(tmp->src.bo))
+		return true;
+
 	if (!prefer_blt_ring(sna))
 		return false;
 
-	return (prefer_blt_bo(sna, tmp->dst.bo) ||
-		prefer_blt_bo(sna, tmp->src.bo));
+	return (prefer_blt_bo(sna, tmp->dst.bo) >= 0 &&
+		prefer_blt_bo(sna, tmp->src.bo) >= 0);
 }
 
 static bool
@@ -3299,9 +3303,15 @@ static inline bool prefer_blt_copy(struct sna *sna,
 	if (src_bo == dst_bo && can_switch_to_blt(sna))
 		return true;
 
-	return ((flags & COPY_LAST && sna->kgem.ring != KGEM_RENDER) ||
-		prefer_blt_bo(sna, src_bo) ||
-		prefer_blt_bo(sna, dst_bo));
+	if ((flags & COPY_LAST && sna->kgem.ring != KGEM_RENDER))
+		return true;
+
+	if (untiled_tlb_miss(src_bo) ||
+	    untiled_tlb_miss(dst_bo))
+		return true;
+
+	return (prefer_blt_bo(sna, src_bo) >= 0 &&
+		prefer_blt_bo(sna, dst_bo) >= 0);
 }
 
 inline static void boxes_extents(const BoxRec *box, int n, BoxRec *extents)
@@ -3699,9 +3709,12 @@ static inline bool prefer_blt_fill(struct sna *sna,
 	if (PREFER_RENDER)
 		return PREFER_RENDER < 0;
 
+	if (untiled_tlb_miss(bo))
+		return true;
+
 	return (can_switch_to_blt(sna) ||
 		prefer_blt_ring(sna) ||
-		untiled_tlb_miss(bo));
+		prefer_blt_bo(sna, bo) >= 0);
 }
 
 static bool
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 5e62a60..65989ee 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -2526,14 +2526,15 @@ static inline bool untiled_tlb_miss(struct kgem_bo *bo)
 	return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096;
 }
 
-static bool prefer_blt_bo(struct sna *sna, struct kgem_bo *bo)
+static int prefer_blt_bo(struct sna *sna, struct kgem_bo *bo)
 {
-	if (RQ_IS_BLT(bo->rq))
-		return true;
+	if (bo->rq)
+		return RQ_IS_BLT(bo->rq) ? 1 : -1;
 
-	return untiled_tlb_miss(bo) && bo->pitch < MAXSHORT;
+	return bo->tiling == I915_TILING_NONE;
 }
 
+
 inline static bool prefer_blt_ring(struct sna *sna)
 {
 	return sna->kgem.ring != KGEM_RENDER || can_switch_to_blt(sna);
@@ -2787,11 +2788,15 @@ prefer_blt_composite(struct sna *sna, struct sna_composite_op *tmp)
 	if (sna->kgem.ring == KGEM_BLT)
 		return true;
 
+	if (untiled_tlb_miss(tmp->dst.bo) ||
+	    untiled_tlb_miss(tmp->src.bo))
+		return true;
+
 	if (!prefer_blt_ring(sna))
 		return false;
 
-	return (prefer_blt_bo(sna, tmp->dst.bo) ||
-		prefer_blt_bo(sna, tmp->src.bo));
+	return (prefer_blt_bo(sna, tmp->dst.bo) >= 0 &&
+		prefer_blt_bo(sna, tmp->src.bo) >= 0);
 }
 
 static bool
@@ -3384,9 +3389,15 @@ static inline bool prefer_blt_copy(struct sna *sna,
 	if (src_bo == dst_bo && can_switch_to_blt(sna))
 		return true;
 
-	return ((flags & COPY_LAST && sna->kgem.ring != KGEM_RENDER) ||
-		prefer_blt_bo(sna, src_bo) ||
-		prefer_blt_bo(sna, dst_bo));
+	if ((flags & COPY_LAST && sna->kgem.ring != KGEM_RENDER))
+		return true;
+
+	if (untiled_tlb_miss(src_bo) ||
+	    untiled_tlb_miss(dst_bo))
+		return true;
+
+	return (prefer_blt_bo(sna, src_bo) >= 0 &&
+		prefer_blt_bo(sna, dst_bo) >= 0);
 }
 
 inline static void boxes_extents(const BoxRec *box, int n, BoxRec *extents)
@@ -3773,7 +3784,12 @@ gen7_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
 static inline bool prefer_blt_fill(struct sna *sna,
 				   struct kgem_bo *bo)
 {
-	return prefer_blt_ring(sna) || untiled_tlb_miss(bo);
+	if (untiled_tlb_miss(bo))
+		return true;
+
+	return (can_switch_to_blt(sna) ||
+		prefer_blt_ring(sna) ||
+		prefer_blt_bo(sna, bo) >= 0);
 }
 
 static bool
commit ac9ef1fc606e87b48baa47be22bf828dcfe6659f
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Dec 15 20:49:56 2012 +0000

    sna/gen6+: Keep the bo on its current ring
    
    Track the most recent ring each bo is executed on, and prefer to keep it
    on that ring for the next operation.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 6883928..c5ddba5 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -2406,7 +2406,7 @@ static bool prefer_blt_ring(struct sna *sna)
 
 static bool can_switch_to_blt(struct sna *sna)
 {
-	if (sna->kgem.ring == KGEM_BLT)
+	if (sna->kgem.ring != KGEM_RENDER)
 		return true;
 
 	if (NO_RING_SWITCH)
@@ -2415,8 +2415,7 @@ static bool can_switch_to_blt(struct sna *sna)
 	if (!sna->kgem.has_semaphores)
 		return false;
 
-	return (sna->kgem.mode == KGEM_NONE ||
-		kgem_ring_is_idle(&sna->kgem, KGEM_BLT));
+	return kgem_ring_is_idle(&sna->kgem, KGEM_BLT);
 }
 
 static inline bool untiled_tlb_miss(struct kgem_bo *bo)
@@ -2426,6 +2425,9 @@ static inline bool untiled_tlb_miss(struct kgem_bo *bo)
 
 static bool prefer_blt_bo(struct sna *sna, struct kgem_bo *bo)
 {
+	if (RQ_IS_BLT(bo->rq))
+		return true;
+
 	return untiled_tlb_miss(bo) && bo->pitch < MAXSHORT;
 }
 
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 30dd641..5e62a60 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -2509,7 +2509,7 @@ gen7_composite_set_target(struct sna *sna,
 
 inline static bool can_switch_to_blt(struct sna *sna)
 {
-	if (sna->kgem.ring == KGEM_BLT)
+	if (sna->kgem.ring != KGEM_RENDER)
 		return true;
 
 	if (NO_RING_SWITCH)
@@ -2518,8 +2518,7 @@ inline static bool can_switch_to_blt(struct sna *sna)
 	if (!sna->kgem.has_semaphores)
 		return false;
 
-	return (sna->kgem.mode == KGEM_NONE ||
-		kgem_ring_is_idle(&sna->kgem, KGEM_BLT));
+	return kgem_ring_is_idle(&sna->kgem, KGEM_BLT);
 }
 
 static inline bool untiled_tlb_miss(struct kgem_bo *bo)
@@ -2529,6 +2528,9 @@ static inline bool untiled_tlb_miss(struct kgem_bo *bo)
 
 static bool prefer_blt_bo(struct sna *sna, struct kgem_bo *bo)
 {
+	if (RQ_IS_BLT(bo->rq))
+		return true;
+
 	return untiled_tlb_miss(bo) && bo->pitch < MAXSHORT;
 }
 
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 66bf473..6a10b33 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -106,6 +106,8 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
 #define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 2)
 #define __MAP_TYPE(ptr) ((uintptr_t)(ptr) & 3)
 
+#define MAKE_REQUEST(rq, ring) ((struct kgem_request *)((uintptr_t)(rq) | (ring)))
+
 #define LOCAL_I915_PARAM_HAS_SEMAPHORES		20
 #define LOCAL_I915_PARAM_HAS_SECURE_BATCHES	23
 #define LOCAL_I915_PARAM_HAS_NO_RELOC		24
@@ -1332,7 +1334,7 @@ kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo)
 static void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo)
 {
 	bo->exec = kgem_add_handle(kgem, bo);
-	bo->rq = kgem->next_request;
+	bo->rq = MAKE_REQUEST(kgem->next_request, kgem->ring);
 
 	list_move_tail(&bo->request, &kgem->next_request->buffers);
 
@@ -1845,7 +1847,7 @@ static bool kgem_retire__requests_ring(struct kgem *kgem, int ring)
 					      struct kgem_bo,
 					      request);
 
-			assert(bo->rq == rq);
+			assert(RQ(bo->rq) == rq);
 			assert(bo->exec == NULL);
 			assert(bo->domain == DOMAIN_GPU);
 
@@ -2005,7 +2007,7 @@ static void kgem_commit(struct kgem *kgem)
 		assert(!bo->purged);
 		assert(bo->exec);
 		assert(bo->proxy == NULL || bo->exec == &_kgem_dummy_exec);
-		assert(bo->rq == rq || (bo->proxy->rq == rq));
+		assert(RQ(bo->rq) == rq || (RQ(bo->proxy->rq) == rq));
 
 		bo->presumed_offset = bo->exec->offset;
 		bo->exec = NULL;
@@ -2125,7 +2127,7 @@ static void kgem_finish_buffers(struct kgem *kgem)
 		}
 
 		assert(bo->need_io);
-		assert(bo->base.rq == kgem->next_request);
+		assert(bo->base.rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
 		assert(bo->base.domain != DOMAIN_GPU);
 
 		if (bo->base.refcnt == 1 &&
@@ -2452,7 +2454,7 @@ void _kgem_submit(struct kgem *kgem)
 
 		rq->bo->target_handle = kgem->has_handle_lut ? i : handle;
 		rq->bo->exec = &kgem->exec[i];
-		rq->bo->rq = rq; /* useful sanity check */
+		rq->bo->rq = MAKE_REQUEST(rq, kgem->ring); /* useful sanity check */
 		list_add(&rq->bo->request, &rq->buffers);
 		rq->ring = kgem->ring == KGEM_BLT;
 
@@ -4039,7 +4041,8 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
 			if (bo->exec == NULL) {
 				list_move_tail(&bo->request,
 					       &kgem->next_request->buffers);
-				bo->rq = kgem->next_request;
+				bo->rq = MAKE_REQUEST(kgem->next_request,
+						      kgem->ring);
 				bo->exec = &_kgem_dummy_exec;
 			}
 
@@ -4053,7 +4056,8 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
 
 		if (bo->exec == NULL)
 			kgem_add_bo(kgem, bo);
-		assert(bo->rq == kgem->next_request);
+		assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
+		assert(RQ_RING(bo->rq) == kgem->ring);
 
 		if (kgem->gen < 040 && read_write_domain & KGEM_RELOC_FENCED) {
 			if (bo->tiling &&
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 2d90cf3..53e252d 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -53,6 +53,10 @@ struct kgem_bo {
 #define IS_CPU_MAP(ptr) ((uintptr_t)(ptr) & 1)
 #define IS_GTT_MAP(ptr) (ptr && ((uintptr_t)(ptr) & 1) == 0)
 	struct kgem_request *rq;
+#define RQ(rq) ((struct kgem_request *)((uintptr_t)(rq) & ~3))
+#define RQ_RING(rq) ((uintptr_t)(rq) & 3)
+#define RQ_IS_BLT(rq) (RQ_RING(rq) == KGEM_BLT)
+
 	struct drm_i915_gem_exec_object2 *exec;
 
 	struct kgem_bo_binding {
@@ -586,7 +590,7 @@ static inline void __kgem_bo_mark_dirty(struct kgem_bo *bo)
 
 	bo->exec->flags |= LOCAL_EXEC_OBJECT_WRITE;
 	bo->needs_flush = bo->dirty = true;
-	list_move(&bo->request, &bo->rq->buffers);
+	list_move(&bo->request, &RQ(bo->rq)->buffers);
 }
 
 static inline void kgem_bo_mark_dirty(struct kgem_bo *bo)
commit 15ccb7148d15d776a661c1b8c5b9b2360fcae4ad
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Dec 15 20:07:56 2012 +0000

    sna/gen6+: Apply the is_scanout to the key not value in the binding cache
    
    Oops, we never managed to reuse the cached location of the target
    surface as we entered it into the cache with the wrong key.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index ec29124..6883928 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -1248,7 +1248,7 @@ gen6_bind_bo(struct sna *sna,
 	ss[4] = 0;
 	ss[5] = is_scanout ? 0 : 3 << 16;
 
-	kgem_bo_set_binding(bo, format, offset | is_scanout << 31);
+	kgem_bo_set_binding(bo, format | is_scanout << 31, offset);
 
 	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
 	     offset, bo->handle, ss[1],
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 29b1cdc..30dd641 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -1373,7 +1373,7 @@ gen7_bind_bo(struct sna *sna,
 	if (sna->kgem.gen == 075)
 		ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
 
-	kgem_bo_set_binding(bo, format, offset | is_scanout << 31);
+	kgem_bo_set_binding(bo, format | is_scanout << 31, offset);
 
 	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
 	     offset, bo->handle, ss[1],
commit fde25b08922d97ca0d4a69c654bf690edbd53b3d
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Dec 15 18:59:53 2012 +0000

    sna/trapezoids: Add another inline hint
    
    cell_list_alloc() is only called from one place, and the compiler should
    already be inlining it - but does not appear to be. Hint harder.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 289780b..81ae9c4 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -471,7 +471,7 @@ cell_list_reset(struct cell_list *cells)
 	pool_reset(cells->cell_pool.base);
 }
 
-static struct cell *
+inline static struct cell *
 cell_list_alloc(struct cell_list *cells,
 		struct cell *tail,
 		int x)
commit 2a21c8b351052be9c32c5669264fb05a8510c957
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Dec 15 17:56:27 2012 +0000

    sna: Include shm hint in render placement
    
    The goal is to reduce the preference of rendering to a SHM pixmap - only
    if it is already active, will we consider continuing to use it on the
    GPU.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_render_inline.h b/src/sna/sna_render_inline.h
index e31db15..51c78bc 100644
--- a/src/sna/sna_render_inline.h
+++ b/src/sna/sna_render_inline.h
@@ -226,17 +226,22 @@ inline static bool dst_use_gpu(PixmapPtr pixmap)
 	if (priv == NULL)
 		return false;
 
-	if (priv->gpu_damage && !priv->clear &&
-	    (!priv->cpu || !priv->cpu_damage || kgem_bo_is_busy(priv->gpu_bo)))
+	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
+		return true;
+
+	if (priv->clear)
+		return false;
+
+	if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
 		return true;
 
-	return priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo);
+	return priv->gpu_damage && (!priv->cpu || !priv->cpu_damage);
 }
 
 inline static bool dst_use_cpu(PixmapPtr pixmap)
 {
 	struct sna_pixmap *priv = sna_pixmap(pixmap);
-	if (priv == NULL)
+	if (priv == NULL || priv->shm)
 		return true;
 
 	return priv->cpu_damage && priv->cpu;


More information about the xorg-commit mailing list