xf86-video-intel: 4 commits - configure.ac src/intel_device.c src/intel_driver.h src/sna/gen7_render.c src/sna/sna_dri.c src/sna/sna_video_sprite.c

Fri Aug 23 11:28:12 PDT 2013

configure.ac               |   11 +++++++++
 src/intel_device.c         |   54 +++++++++++++++++++++++++++++++++++++++------
 src/intel_driver.h         |    2 -
 src/sna/gen7_render.c      |   45 +++++++++++++++++++++++++++++++++++++
 src/sna/sna_dri.c          |    4 +--
 src/sna/sna_video_sprite.c |   12 +++++++++-
 6 files changed, 117 insertions(+), 11 deletions(-)

New commits:
commit 8b0d69e76c50155ea404f0e8a97d60a3f710c8a3
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Aug 23 18:53:34 2013 +0100

    intel: Add experimental rendernode support
    
    Render nodes allow clients full access to off-screen rendering and GPU
    offload, without assuming any master responsiblities (for device and
    display management). As they have a more limited interface, they can be
    used in a more permissive manner.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/configure.ac b/configure.ac
index 9fc011e..1e73c0c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -336,6 +336,17 @@ fi
 
 xp_msg=""
 
+AC_ARG_ENABLE(rendernode,
+	      AS_HELP_STRING([--enable-rendernode],
+			     [Enable use of render nodes (experimental) [default=no]]),
+	      [RENDERNODE="$enableval"],
+	      [RENDERNODE=no])
+AM_CONDITIONAL(USE_RENDERNODE, test x$RENDERNODE = xyes)
+if test "x$RENDERNODE" = xyes; then
+	AC_DEFINE(USE_RENDERNODE,1,[Assume "rendernode" support])
+	xp_msg="$xp_msg rendernode"
+fi
+
 AC_ARG_ENABLE(create2,
 	      AS_HELP_STRING([--enable-create2],
 			     [Enable use of create2 ioctl (experimental) [default=no]]),
diff --git a/src/intel_device.c b/src/intel_device.c
index d9ff8bc..751875e 100644
--- a/src/intel_device.c
+++ b/src/intel_device.c
@@ -24,6 +24,12 @@
 
  **************************************************************************/
 
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
 #include <assert.h>
 #include <string.h>
 #include <unistd.h>
@@ -43,7 +49,8 @@
 #include "intel_driver.h"
 
 struct intel_device {
-	char *path;
+	char *master_node;
+	char *render_node;
 	int fd;
 	int open_count;
 	int master_count;
@@ -164,6 +171,32 @@ static int __intel_open_device(const struct pci_device *pci, char **path)
 	return fd;
 }
 
+static char *find_render_node(int fd)
+{
+#if defined(USE_RENDERNODE)
+	struct stat master, render;
+	char buf[128];
+
+	if (fstat(fd, &master))
+		return NULL;
+
+	if (!S_ISCHR(master.st_mode))
+		return NULL;
+
+	/* Are we a render-node ourselves? */
+	if (master.st_rdev & 0x80)
+		return NULL;
+
+	sprintf(buf, "/dev/dri/renderD%d", (int)((master.st_rdev | 0x80) & 0xff));
+	if (stat(buf, &render) == 0 &&
+	    master.st_mode == render.st_mode &&
+	    render.st_rdev == (master.st_rdev | 0x80))
+		return strdup(buf);
+#endif
+
+	return NULL;
+}
+
 int intel_open_device(int entity_num,
 		      const struct pci_device *pci,
 		      const char *path)
@@ -194,10 +227,13 @@ int intel_open_device(int entity_num,
 	if (dev == NULL)
 		goto err_close;
 
-	dev->path = local_path;
 	dev->fd = fd;
 	dev->open_count = 0;
 	dev->master_count = 0;
+	dev->master_node = local_path;
+	dev->render_node = find_render_node(fd);
+	if (dev->render_node == NULL)
+		dev->render_node = dev->master_node;
 
 	/* If hosted under a system compositor, just pretend to be master */
 	if (hosted()) {
@@ -257,11 +293,11 @@ int intel_get_device(ScrnInfoPtr scrn)
 	return dev->fd;
 }
 
-const char *intel_get_device_name(ScrnInfoPtr scrn)
+const char *intel_get_client_name(ScrnInfoPtr scrn)
 {
 	struct intel_device *dev = intel_device(scrn);
-	assert(dev && dev->path);
-	return dev->path;
+	assert(dev && dev->render_node);
+	return dev->render_node;
 }
 
 int intel_get_master(ScrnInfoPtr scrn)
@@ -312,7 +348,9 @@ void __intel_uxa_release_device(ScrnInfoPtr scrn)
 		intel_set_device(scrn, NULL);
 
 		drmClose(dev->fd);
-		free(dev->path);
+		if (dev->render_node != dev->master_node)
+			free(dev->render_node);
+		free(dev->master_node);
 		free(dev);
 	}
 }
@@ -331,6 +369,8 @@ void intel_put_device(ScrnInfoPtr scrn)
 	intel_set_device(scrn, NULL);
 
 	drmClose(dev->fd);
-	free(dev->path);
+	if (dev->render_node != dev->master_node)
+		free(dev->render_node);
+	free(dev->master_node);
 	free(dev);
 }
diff --git a/src/intel_driver.h b/src/intel_driver.h
index 4768536..e54054f 100644
--- a/src/intel_driver.h
+++ b/src/intel_driver.h
@@ -122,7 +122,7 @@ void intel_detect_chipset(ScrnInfoPtr scrn,
 
 int intel_open_device(int entity_num, const struct pci_device *pci, const char *path);
 int intel_get_device(ScrnInfoPtr scrn);
-const char *intel_get_device_name(ScrnInfoPtr scrn);
+const char *intel_get_client_name(ScrnInfoPtr scrn);
 int intel_get_master(ScrnInfoPtr scrn);
 int intel_put_master(ScrnInfoPtr scrn);
 void intel_put_device(ScrnInfoPtr scrn);
diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index ff96075..f31ca4e 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -2321,7 +2321,7 @@ bool sna_dri_open(struct sna *sna, ScreenPtr screen)
 	memset(&info, '\0', sizeof(info));
 	info.fd = sna->kgem.fd;
 	info.driverName = dri_driver_name(sna);
-	info.deviceName = intel_get_device_name(sna->scrn);
+	info.deviceName = intel_get_client_name(sna->scrn);
 
 	DBG(("%s: loading dri driver '%s' [gen=%d] for device '%s'\n",
 	     __FUNCTION__, info.driverName, sna->kgem.gen, info.deviceName));
commit 846436c1a26b2c8a9d787ec707edb075fac57ee0
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Aug 23 16:25:08 2013 +0100

    sna/dri: Make async blits async again
    
    Fixes the regression introduced in
    commit 6f5fd772c7ca656b86394a0f036d4e0cf5b33d8e [2.21.13]
    Author: Chris Wilson <chris at chris-wilson.co.uk>
    Date:   Thu Jul 25 08:29:55 2013 +0100
    
        sna/dri: Discard the strict checking for stale bo before performing a blit
    
    which added the sync request flag along the explicit async blit path.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index 1569251..ff96075 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -1415,7 +1415,7 @@ sna_dri_immediate_blit(struct sna *sna,
 			ret = true;
 	} else {
 		info->bo = __sna_dri_copy_region(sna, draw, NULL,
-						 info->back, info->front, true);
+						 info->back, info->front, false);
 		if (event)
 			DRI2SwapComplete(info->client, draw, 0, 0, 0,
 					 DRI2_BLIT_COMPLETE,
commit fc4e81726d03399bfbbba79d2e76556836f03bd2
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Aug 23 02:25:34 2013 +0100

    sna/video: Disable the existing sprite when switching CRTCs
    
    After starting a new video on another CRTC, disable the old one as we
    currently only track the single video port. However, showing a video
    split across multiple CRTCs would be a useful extension in the future.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_video_sprite.c b/src/sna/sna_video_sprite.c
index 0323e46..8b094c4 100644
--- a/src/sna/sna_video_sprite.c
+++ b/src/sna/sna_video_sprite.c
@@ -201,6 +201,8 @@ sna_video_sprite_show(struct sna *sna,
 {
 	struct drm_mode_set_plane s;
 
+	/* XXX handle video spanning multiple CRTC */
+
 	VG_CLEAR(s);
 	s.plane_id = sna_crtc_to_plane(crtc);
 
@@ -300,7 +302,15 @@ sna_video_sprite_show(struct sna *sna,
 	}
 
 	frame->bo->domain = DOMAIN_NONE;
-	video->plane = s.plane_id;
+
+	if (video->plane != s.plane_id) {
+		if (video->plane) {
+			memset(&s, 0, sizeof(s));
+			s.plane_id = video->plane;
+			drmIoctl(video->sna->kgem.fd, DRM_IOCTL_MODE_SETPLANE, &s);
+		}
+		video->plane = s.plane_id;
+	}
 
 	if (video->bo != frame->bo) {
 		if (video->bo)
commit 509e7aaf8446f568e133e1b450ea13f73e9b366b
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Aug 23 02:10:11 2013 +0100

    sna/gen7: Prefer the render ring for more operations
    
    As we get more well-endowed GPUs with ever more execution units, it
    becomes advantageous to do even basic copies through the render ring.
    However, the extra performance comes at a cost - higher power usage. To
    mitigate this, we apply a heuristic of only allowing a switch over to
    the render ring if the render ring is already active with an early
    request (in addition to the usual stall avoidance and general
    performance heuristics).
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index cd851f0..c0ca623 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -85,6 +85,7 @@ struct gt_info {
 		int max_gs_entries;
 		int push_ps_size; /* in 1KBs */
 	} urb;
+	int gt;
 };
 
 static const struct gt_info ivb_gt_info = {
@@ -93,6 +94,7 @@ static const struct gt_info ivb_gt_info = {
 	.max_gs_threads = 16,
 	.max_wm_threads = (16-1) << IVB_PS_MAX_THREADS_SHIFT,
 	.urb = { 128, 64, 64, 8 },
+	.gt = 0,
 };
 
 static const struct gt_info ivb_gt1_info = {
@@ -101,6 +103,7 @@ static const struct gt_info ivb_gt1_info = {
 	.max_gs_threads = 36,
 	.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
 	.urb = { 128, 512, 192, 8 },
+	.gt = 1,
 };
 
 static const struct gt_info ivb_gt2_info = {
@@ -109,6 +112,7 @@ static const struct gt_info ivb_gt2_info = {
 	.max_gs_threads = 128,
 	.max_wm_threads = (172-1) << IVB_PS_MAX_THREADS_SHIFT,
 	.urb = { 256, 704, 320, 8 },
+	.gt = 2,
 };
 
 static const struct gt_info byt_gt_info = {
@@ -118,6 +122,7 @@ static const struct gt_info byt_gt_info = {
 	.max_gs_threads = 36,
 	.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
 	.urb = { 128, 512, 192, 8 },
+	.gt = 1,
 };
 
 static const struct gt_info hsw_gt_info = {
@@ -128,6 +133,7 @@ static const struct gt_info hsw_gt_info = {
 		(8 - 1) << HSW_PS_MAX_THREADS_SHIFT |
 		1 << HSW_PS_SAMPLE_MASK_SHIFT,
 	.urb = { 128, 64, 64, 8 },
+	.gt = 0,
 };
 
 static const struct gt_info hsw_gt1_info = {
@@ -138,6 +144,7 @@ static const struct gt_info hsw_gt1_info = {
 		(102 - 1) << HSW_PS_MAX_THREADS_SHIFT |
 		1 << HSW_PS_SAMPLE_MASK_SHIFT,
 	.urb = { 128, 640, 256, 8 },
+	.gt = 1,
 };
 
 static const struct gt_info hsw_gt2_info = {
@@ -148,6 +155,7 @@ static const struct gt_info hsw_gt2_info = {
 		(140 - 1) << HSW_PS_MAX_THREADS_SHIFT |
 		1 << HSW_PS_SAMPLE_MASK_SHIFT,
 	.urb = { 256, 1664, 640, 8 },
+	.gt = 2,
 };
 
 static const struct gt_info hsw_gt3_info = {
@@ -158,6 +166,7 @@ static const struct gt_info hsw_gt3_info = {
 		(280 - 1) << HSW_PS_MAX_THREADS_SHIFT |
 		1 << HSW_PS_SAMPLE_MASK_SHIFT,
 	.urb = { 512, 3328, 1280, 16 },
+	.gt = 3,
 };
 
 inline static bool is_ivb(struct sna *sna)
@@ -2123,6 +2132,24 @@ inline static bool can_switch_to_blt(struct sna *sna,
 	return kgem_ring_is_idle(&sna->kgem, KGEM_BLT);
 }
 
+inline static bool can_switch_to_render(struct sna *sna,
+					struct kgem_bo *bo)
+{
+	if (sna->kgem.ring == KGEM_RENDER)
+		return true;
+
+	if (NO_RING_SWITCH)
+		return false;
+
+	if (!sna->kgem.has_semaphores)
+		return false;
+
+	if (bo && !RQ_IS_BLT(bo->rq) && !(bo->scanout && !sna->kgem.has_wt))
+		return true;
+
+	return !kgem_ring_is_idle(&sna->kgem, KGEM_RENDER);
+}
+
 static inline bool untiled_tlb_miss(struct kgem_bo *bo)
 {
 	return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096;
@@ -2143,6 +2170,15 @@ inline static bool prefer_blt_ring(struct sna *sna,
 	return can_switch_to_blt(sna, bo, flags);
 }
 
+inline static bool prefer_render_ring(struct sna *sna,
+				      struct kgem_bo *bo)
+{
+	if (sna->render_state.gen7.info->gt < 2)
+		return false;
+
+	return can_switch_to_render(sna, bo);
+}
+
 static bool
 try_blt(struct sna *sna,
 	PicturePtr dst, PicturePtr src,
@@ -2392,6 +2428,9 @@ prefer_blt_composite(struct sna *sna, struct sna_composite_op *tmp)
 	    kgem_bo_is_render(tmp->src.bo))
 		return false;
 
+	if (prefer_render_ring(sna, tmp->dst.bo))
+		return false;
+
 	if (!prefer_blt_ring(sna, tmp->dst.bo, 0))
 		return false;
 
@@ -2833,6 +2872,9 @@ static inline bool prefer_blt_copy(struct sna *sna,
 	    kgem_bo_is_render(src_bo))
 		return false;
 
+	if (prefer_render_ring(sna, dst_bo))
+		return false;
+
 	if (!prefer_blt_ring(sna, dst_bo, flags))
 		return false;
 
@@ -3238,6 +3280,9 @@ static inline bool prefer_blt_fill(struct sna *sna,
 	if (untiled_tlb_miss(bo))
 		return true;
 
+	if (prefer_render_ring(sna, bo))
+		return false;
+
 	if (!prefer_blt_ring(sna, bo, 0))
 		return false;