xf86-video-intel: 3 commits - src/sna/Makefile.am src/sna/sna_composite.c src/sna/sna_dri.c src/sna/sna_driver.c src/sna/sna.h src/sna/sna_render.c src/sna/sna_threads.c src/sna/sna_trapezoids.c

Chris Wilson ickle at kemper.freedesktop.org
Thu Jan 24 07:41:15 PST 2013


 src/sna/Makefile.am      |    2 
 src/sna/sna.h            |   23 ++++
 src/sna/sna_composite.c  |   84 +++++++++++++++-
 src/sna/sna_dri.c        |   15 ++
 src/sna/sna_driver.c     |    2 
 src/sna/sna_render.c     |   20 +--
 src/sna/sna_threads.c    |  236 +++++++++++++++++++++++++++++++++++++++++++++++
 src/sna/sna_trapezoids.c |   32 +-----
 8 files changed, 372 insertions(+), 42 deletions(-)

New commits:
commit d60128c55e8f5f69476d42c20f2fd62ccc0f411e
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Jan 24 15:41:29 2013 +0000

    sna/dri: Compensate clipExtents for drawable offset
    
    The clipExtents is in screen coordinates whereas we just want to confirm
    that the maximum pixel to be copied lies with the DRI2 buffer, which is
    relative to the drawable.
    
    Reported-by: Matthieu Baerts <matttbe at gmail.com>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=59806
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index 9d249e3..15b87dd 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -842,20 +842,31 @@ can_blit(struct sna * sna,
 	 DRI2BufferPtr back)
 {
 	RegionPtr clip;
+	int w, h;
 	uint32_t s;
 
 	if (draw->type == DRAWABLE_PIXMAP)
 		return true;
 
 	clip = &((WindowPtr)draw)->clipList;
+	w = clip->extents.x2 - draw->x;
+	h = clip->extents.y2 - draw->y;
+	if ((w|h) < 0)
+		return false;
 
 	s = get_private(front)->size;
-	if ((s>>16) < clip->extents.y2 || (s&0xffff) < clip->extents.x2)
+	if ((s>>16) < h || (s&0xffff) < w) {
+		DBG(("%s: reject front size (%dx%d) < (%dx%d)\n", __func__,
+		       s&0xffff, s>>16, w, h));
 		return false;
+	}
 
 	s = get_private(back)->size;
-	if ((s>>16) < clip->extents.y2 || (s&0xffff) < clip->extents.x2)
+	if ((s>>16) < h || (s&0xffff) < w) {
+		DBG(("%s:reject back size (%dx%d) < (%dx%d)\n", __func__,
+		     s&0xffff, s>>16, w, h));
 		return false;
+	}
 
 	return true;
 }
commit 264b3b72500c5af74d124a214347d45c9cb90a1d
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Jan 24 15:06:12 2013 +0000

    sna: Refactor to use a common fbComposite fallback
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna.h b/src/sna/sna.h
index 549a337..389884f 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -698,6 +698,15 @@ void sna_composite(CARD8 op,
 		   INT16 mask_x, INT16 mask_y,
 		   INT16 dst_x,  INT16 dst_y,
 		   CARD16 width, CARD16 height);
+void sna_composite_fb(CARD8 op,
+		      PicturePtr src,
+		      PicturePtr mask,
+		      PicturePtr dst,
+		      RegionPtr region,
+		      INT16 src_x,  INT16 src_y,
+		      INT16 mask_x, INT16 mask_y,
+		      INT16 dst_x,  INT16 dst_y,
+		      CARD16 width, CARD16 height);
 void sna_composite_rectangles(CARD8		 op,
 			      PicturePtr		 dst,
 			      xRenderColor	*color,
diff --git a/src/sna/sna_composite.c b/src/sna/sna_composite.c
index 7898b2c..1a2adc8 100644
--- a/src/sna/sna_composite.c
+++ b/src/sna/sna_composite.c
@@ -439,6 +439,80 @@ static inline bool use_cpu(PixmapPtr pixmap, struct sna_pixmap *priv,
 }
 
 void
+sna_composite_fb(CARD8 op,
+		 PicturePtr src,
+		 PicturePtr mask,
+		 PicturePtr dst,
+		 RegionPtr region,
+		 INT16 src_x,  INT16 src_y,
+		 INT16 mask_x, INT16 mask_y,
+		 INT16 dst_x,  INT16 dst_y,
+		 CARD16 width, CARD16 height)
+{
+	pixman_image_t *src_image, *mask_image, *dest_image;
+	int src_xoff, src_yoff;
+	int msk_xoff, msk_yoff;
+	int dst_xoff, dst_yoff;
+	unsigned flags;
+
+	DBG(("%s: fallback -- move dst to cpu\n", __FUNCTION__));
+	if (op <= PictOpSrc && !dst->alphaMap)
+		flags = MOVE_WRITE | MOVE_INPLACE_HINT;
+	else
+		flags = MOVE_WRITE | MOVE_READ;
+	if (!sna_drawable_move_region_to_cpu(dst->pDrawable, region, flags))
+		return;
+	if (dst->alphaMap &&
+	    !sna_drawable_move_to_cpu(dst->alphaMap->pDrawable, flags))
+		return;
+
+	if (src->pDrawable) {
+		DBG(("%s: fallback -- move src to cpu\n", __FUNCTION__));
+		if (!sna_drawable_move_to_cpu(src->pDrawable,
+					      MOVE_READ))
+			return;
+
+		if (src->alphaMap &&
+		    !sna_drawable_move_to_cpu(src->alphaMap->pDrawable,
+					      MOVE_READ))
+			return;
+	}
+
+	if (mask && mask->pDrawable) {
+		DBG(("%s: fallback -- move mask to cpu\n", __FUNCTION__));
+		if (!sna_drawable_move_to_cpu(mask->pDrawable,
+					      MOVE_READ))
+			return;
+
+		if (mask->alphaMap &&
+		    !sna_drawable_move_to_cpu(mask->alphaMap->pDrawable,
+					      MOVE_READ))
+			return;
+	}
+
+	DBG(("%s: fallback -- fbComposite\n", __FUNCTION__));
+
+	miCompositeSourceValidate(src);
+	if (mask)
+		miCompositeSourceValidate(mask);
+
+	src_image = image_from_pict(src, FALSE, &src_xoff, &src_yoff);
+	mask_image = image_from_pict(mask, FALSE, &msk_xoff, &msk_yoff);
+	dest_image = image_from_pict(dst, TRUE, &dst_xoff, &dst_yoff);
+
+	if (src_image && dest_image && !(mask && !mask_image))
+		sna_image_composite(op, src_image, mask_image, dest_image,
+				       src_x + src_xoff, src_y + src_yoff,
+				       mask_x + msk_xoff, mask_y + msk_yoff,
+				       dst_x + dst_xoff, dst_y + dst_yoff,
+				       width, height);
+
+	free_pixman_pict(src, src_image);
+	free_pixman_pict(mask, mask_image);
+	free_pixman_pict(dst, dest_image);
+}
+
+void
 sna_composite(CARD8 op,
 	      PicturePtr src,
 	      PicturePtr mask,
@@ -610,11 +684,11 @@ fallback:
 	}
 
 	DBG(("%s: fallback -- fbComposite\n", __FUNCTION__));
-	fbComposite(op, src, mask, dst,
-		    src_x,  src_y,
-		    mask_x, mask_y,
-		    dst_x,  dst_y,
-		    width,  height);
+	sna_composite_fb(op, src, mask, dst, &region,
+			 src_x,  src_y,
+			 mask_x, mask_y,
+			 dst_x,  dst_y,
+			 width,  height);
 out:
 	REGION_UNINIT(NULL, &region);
 }
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 2a51598..0b81018 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -5599,32 +5599,14 @@ trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
 		region.extents.y2 = region.extents.y1 + extents.y2;
 		region.data = NULL;
 
-		DBG(("%s: move-to-cpu\n", __FUNCTION__));
-		if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region,
-						     MOVE_READ | MOVE_WRITE))
-			goto done;
-		if (dst->alphaMap  &&
-		    !sna_drawable_move_to_cpu(dst->alphaMap->pDrawable,
-					      MOVE_READ | MOVE_WRITE))
-			goto done;
-		if (src->pDrawable) {
-			if (!sna_drawable_move_to_cpu(src->pDrawable,
-						      MOVE_READ))
-				goto done;
-			if (src->alphaMap &&
-			    !sna_drawable_move_to_cpu(src->alphaMap->pDrawable,
-						      MOVE_READ))
-				goto done;
-		}
-
 		DBG(("%s: fbComposite()\n", __FUNCTION__));
-		fbComposite(op, src, mask, dst,
-			    src_x + dst_x - pixman_fixed_to_int(traps[0].left.p1.x),
-			    src_y + dst_y - pixman_fixed_to_int(traps[0].left.p1.y),
-			    0, 0,
-			    dst_x, dst_y,
-			    extents.x2, extents.y2);
-done:
+		sna_composite_fb(op, src, mask, dst, &region,
+				 src_x + dst_x - pixman_fixed_to_int(traps[0].left.p1.x),
+				 src_y + dst_y - pixman_fixed_to_int(traps[0].left.p1.y),
+				 0, 0,
+				 dst_x, dst_y,
+				 extents.x2, extents.y2);
+
 		FreePicture(mask, 0);
 	}
 	sna_pixmap_destroy(scratch);
commit 8ecfbea9d1f83b2de62bee0f58299e7a90c741d1
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Jan 24 14:46:03 2013 +0000

    sna: Experiment with a threaded renderer for fallback compositing
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/Makefile.am b/src/sna/Makefile.am
index bcf757e..bfa836f 100644
--- a/src/sna/Makefile.am
+++ b/src/sna/Makefile.am
@@ -34,6 +34,7 @@ AM_CFLAGS += @VALGRIND_CFLAGS@
 endif
 
 noinst_LTLIBRARIES = libsna.la
+libsna_la_LDFLAGS = -pthread
 libsna_la_LIBADD = @UDEV_LIBS@ -lm @DRM_LIBS@ brw/libbrw.la fb/libfb.la
 
 libsna_la_SOURCES = \
@@ -62,6 +63,7 @@ libsna_la_SOURCES = \
 	sna_trapezoids.c \
 	sna_tiling.c \
 	sna_transform.c \
+	sna_threads.c \
 	sna_video.c \
 	sna_video.h \
 	sna_video_overlay.c \
diff --git a/src/sna/sna.h b/src/sna/sna.h
index 2ba5fe4..549a337 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -835,4 +835,18 @@ inline static bool is_clipped(const RegionRec *r,
 		r->extents.y2 - r->extents.y1 != d->height);
 }
 
+void sna_threads_init(void);
+void sna_image_composite(pixman_op_t        op,
+			 pixman_image_t    *src,
+			 pixman_image_t    *mask,
+			 pixman_image_t    *dst,
+			 int16_t            src_x,
+			 int16_t            src_y,
+			 int16_t            mask_x,
+			 int16_t            mask_y,
+			 int16_t            dst_x,
+			 int16_t            dst_y,
+			 uint16_t           width,
+			 uint16_t           height);
+
 #endif /* _SNA_H */
diff --git a/src/sna/sna_driver.c b/src/sna/sna_driver.c
index a0707ed..8420aeb 100644
--- a/src/sna/sna_driver.c
+++ b/src/sna/sna_driver.c
@@ -1174,5 +1174,7 @@ Bool sna_init_scrn(ScrnInfoPtr scrn, int entity_num)
 	xf86SetEntityInstanceForScreen(scrn, entity_num,
 				       xf86GetNumEntityInstances(entity_num)-1);
 
+	sna_threads_init();
+
 	return TRUE;
 }
diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index 46ce64b..84c6b35 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -1465,11 +1465,11 @@ sna_render_picture_approximate_gradient(struct sna *sna,
 		pixman_transform_multiply(&t, picture->transform, &t);
 	pixman_image_set_transform(src, &t);
 
-	pixman_image_composite(PictOpSrc, src, NULL, dst,
-			       x + dx, y + dy,
-			       0, 0,
-			       0, 0,
-			       w2, h2);
+	sna_image_composite(PictOpSrc, src, NULL, dst,
+			    x+dx, y+dy,
+			    0, 0,
+			    0, 0,
+			    w2, h2);
 	free_pixman_pict(picture, src);
 	pixman_image_unref(dst);
 
@@ -1580,11 +1580,11 @@ do_fixup:
 
 	DBG(("%s: compositing tmp=(%d+%d, %d+%d)x(%d, %d)\n",
 	     __FUNCTION__, x, dx, y, dy, w, h));
-	pixman_image_composite(PictOpSrc, src, NULL, dst,
-			       x + dx, y + dy,
-			       0, 0,
-			       0, 0,
-			       w, h);
+	sna_image_composite(PictOpSrc, src, NULL, dst,
+			    x + dx, y + dy,
+			    0, 0,
+			    0, 0,
+			    w, h);
 	free_pixman_pict(picture, src);
 
 	/* Then convert to card format */
diff --git a/src/sna/sna_threads.c b/src/sna/sna_threads.c
new file mode 100644
index 0000000..afa260f
--- /dev/null
+++ b/src/sna/sna_threads.c
@@ -0,0 +1,236 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris at chris-wilson.co.uk>
+ *
+ */
+
+#include "sna.h"
+
+#include <unistd.h>
+#include <pthread.h>
+
+static int max_threads = -1;
+
+static struct thread {
+    pthread_t thread;
+    pthread_mutex_t mutex;
+    pthread_cond_t cond;
+
+    void (*func)(void *arg);
+    void *arg;
+} *threads;
+
+static void *__run__(void *arg)
+{
+	struct thread *t = arg;
+
+	pthread_mutex_lock(&t->mutex);
+	while (1) {
+		while (t->func == NULL)
+			pthread_cond_wait(&t->cond, &t->mutex);
+		pthread_mutex_unlock(&t->mutex);
+
+		assert(t->func);
+		t->func(t->arg);
+
+		pthread_mutex_lock(&t->mutex);
+		t->func = NULL;
+		pthread_cond_signal(&t->cond);
+	}
+	pthread_mutex_unlock(&t->mutex);
+
+	return NULL;
+}
+
+void sna_threads_init(void)
+{
+	int n;
+
+	if (max_threads != -1)
+		return;
+
+	max_threads = sysconf (_SC_NPROCESSORS_ONLN) / 2;
+	if (max_threads <= 1)
+		goto bail;
+
+	DBG(("%s: creating a thread pool of %d threads\n",
+	     __func__, max_threads));
+
+	threads = malloc (sizeof(threads[0])*max_threads);
+	if (threads == NULL)
+		goto bail;
+
+	for (n = 0; n < max_threads; n++) {
+		pthread_mutex_init(&threads[n].mutex, NULL);
+		pthread_cond_init(&threads[n].cond, NULL);
+
+		threads[n].func = NULL;
+		if (pthread_create(&threads[n].thread, NULL,
+				   __run__, &threads[n]))
+			goto bail;
+	}
+
+	return;
+
+bail:
+	max_threads = 0;
+}
+
+static void
+threads_run(void (*func)(void *arg), void *arg)
+{
+	int n;
+
+	assert(max_threads > 0);
+
+	for (n = 0; n < max_threads; n++) {
+		if (threads[n].func)
+			continue;
+
+		pthread_mutex_lock(&threads[n].mutex);
+		if (threads[n].func) {
+			pthread_mutex_unlock(&threads[n].mutex);
+			continue;
+		}
+
+		goto execute;
+	}
+
+	n = rand() % max_threads;
+	pthread_mutex_lock(&threads[n].mutex);
+	while (threads[n].func)
+		pthread_cond_wait(&threads[n].cond, &threads[n].mutex);
+
+execute:
+	threads[n].func = func;
+	threads[n].arg = arg;
+	pthread_cond_signal(&threads[n].cond);
+	pthread_mutex_unlock(&threads[n].mutex);
+}
+
+static void threads_wait(void)
+{
+	int n;
+
+	assert(max_threads > 0);
+
+	for (n = 0; n < max_threads; n++) {
+		if (threads[n].func == NULL)
+			continue;
+
+		pthread_mutex_lock(&threads[n].mutex);
+		while (threads[n].func)
+			pthread_cond_wait(&threads[n].cond, &threads[n].mutex);
+		pthread_mutex_unlock(&threads[n].mutex);
+	}
+}
+
+static int
+use_threads (int width, int height, int threshold)
+{
+	int num_threads;
+
+	if (max_threads <= 0)
+		return 1;
+
+	num_threads = height / (128/width + 1) / threshold-1;
+	if (num_threads <= 0)
+		return 1;
+
+	if (num_threads > max_threads)
+		num_threads = max_threads;
+	return num_threads;
+}
+
+struct thread_composite {
+	pixman_image_t *src, *mask, *dst;
+	pixman_op_t op;
+	int16_t src_x, src_y;
+	int16_t mask_x, mask_y;
+	int16_t dst_x, dst_y;
+	uint16_t width, height;
+};
+
+static void thread_composite(void *arg)
+{
+	struct thread_composite *t = arg;
+	pixman_image_composite(t->op, t->src, t->mask, t->dst,
+			       t->src_x, t->src_y,
+			       t->mask_x, t->mask_y,
+			       t->dst_x, t->dst_y,
+			       t->width, t->height);
+}
+
+void sna_image_composite(pixman_op_t        op,
+			 pixman_image_t    *src,
+			 pixman_image_t    *mask,
+			 pixman_image_t    *dst,
+			 int16_t            src_x,
+			 int16_t            src_y,
+			 int16_t            mask_x,
+			 int16_t            mask_y,
+			 int16_t            dst_x,
+			 int16_t            dst_y,
+			 uint16_t           width,
+			 uint16_t           height)
+{
+	int num_threads;
+
+	num_threads = use_threads(width, height, 16);
+	if (num_threads <= 1) {
+		pixman_image_composite(op, src, mask, dst,
+				       src_x, src_y,
+				       mask_x, mask_y,
+				       dst_x, dst_y,
+				       width, height);
+		return;
+	} else {
+		struct thread_composite threads[num_threads];
+		int y, dy, n;
+
+		y = dst_y;
+		dy = (height + num_threads - 1) / num_threads;
+		for (n = 0; n < num_threads; n++) {
+			threads[n].op = op;
+			threads[n].src = src;
+			threads[n].mask = mask;
+			threads[n].dst = dst;
+			threads[n].src_x = src_x;
+			threads[n].src_y = src_y + y - dst_y;
+			threads[n].mask_x = mask_x;
+			threads[n].mask_y = mask_y + y - dst_y;
+			threads[n].dst_x = dst_x;
+			threads[n].dst_y = y;
+			threads[n].width = width;
+			threads[n].height = dy;
+
+			threads_run(thread_composite, &threads[n]);
+
+			y += dy;
+			if (y + dy > dst_y + height)
+				dy = dst_y + height - y;
+		}
+		threads_wait();
+	}
+}


More information about the xorg-commit mailing list