xf86-video-intel: 4 commits - src/sna/gen6_render.c src/sna/kgem.c src/sna/kgem.h src/sna/sna_accel.c src/sna/sna_composite.c src/sna/sna.h
Chris Wilson
ickle at kemper.freedesktop.org
Sun Jan 15 17:37:57 PST 2012
src/sna/gen6_render.c | 44 ++++++++++++++++++++++++++++++--------------
src/sna/kgem.c | 25 ++++++++++++++++++-------
src/sna/kgem.h | 2 ++
src/sna/sna.h | 1 -
src/sna/sna_accel.c | 15 +++++++--------
src/sna/sna_composite.c | 9 ++++++---
6 files changed, 63 insertions(+), 33 deletions(-)
New commits:
commit fd4c139a3959df90c7f078817fc6e2f3db715bf8
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Mon Jan 16 00:32:12 2012 +0000
sna: On LLC systems quietly replace all linear mmappings using the CPU
If the GPU and CPU caches are shared and coherent, we can use a cached
mapping for linear bo in the CPU domain with no penalty and so avoid the
penalty of using WC/UC mappings through the GTT (and any aperture
pressure). We presume that the bo for such mappings are indeed LLC
cached...
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 0075bed..d119ae3 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -85,6 +85,7 @@ static inline void list_replace(struct list *old,
#define DBG_NO_HW 0
#define DBG_NO_TILING 0
#define DBG_NO_VMAP 0
+#define DBG_NO_LLC 0
#define DBG_NO_SEMAPHORES 0
#define DBG_NO_MADV 0
#define DBG_NO_MAP_UPLOAD 0
@@ -601,6 +602,13 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
DBG(("%s: has relaxed fencing? %d\n", __FUNCTION__,
kgem->has_relaxed_fencing));
+ kgem->has_llc = false;
+ if (!DBG_NO_LLC && gen >= 60)
+ kgem->has_llc = true;
+ kgem->has_cpu_bo = kgem->has_llc;
+ DBG(("%s: cpu bo enabled %d: llc? %d\n", __FUNCTION__,
+ kgem->has_cpu_bo, kgem->has_llc));
+
kgem->has_semaphores = false;
if (gen >= 60 && semaphores_enabled())
kgem->has_semaphores = true;
@@ -2170,6 +2178,8 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
int for_cpu = !!(flags & CREATE_CPU_MAP);
+ if (kgem->has_llc && tiling == I915_TILING_NONE)
+ for_cpu = 1;
/* We presume that we will need to upload to this bo,
* and so would prefer to have an active VMA.
*/
@@ -2604,14 +2614,15 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
assert(bo->exec == NULL);
assert(list_is_empty(&bo->list));
- if (IS_CPU_MAP(bo->map)) {
- if (bo->tiling == I915_TILING_NONE) {
- kgem_bo_sync__cpu(kgem, bo);
- return CPU_MAP(bo->map);
- }
- kgem_bo_release_map(kgem, bo);
+ if (kgem->has_llc && bo->tiling == I915_TILING_NONE) {
+ ptr = kgem_bo_map__cpu(kgem, bo);
+ kgem_bo_sync__cpu(kgem, bo);
+ return ptr;
}
+ if (IS_CPU_MAP(bo->map))
+ kgem_bo_release_map(kgem, bo);
+
ptr = bo->map;
if (ptr == NULL) {
kgem_trim_vma_cache(kgem, MAP_GTT, bo->bucket);
@@ -2958,7 +2969,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
bo = NULL;
#if !DBG_NO_MAP_UPLOAD
- if (!DEBUG_NO_LLC && kgem->gen >= 60) {
+ if (kgem->has_cpu_bo) {
struct kgem_bo *old;
bo = malloc(sizeof(*bo));
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 377d21d..621e3cd 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -146,6 +146,8 @@ struct kgem {
uint32_t has_vmap :1;
uint32_t has_relaxed_fencing :1;
uint32_t has_semaphores :1;
+ uint32_t has_llc :1;
+ uint32_t has_cpu_bo :1;
uint16_t fence_max;
uint16_t half_cpu_cache_pages;
diff --git a/src/sna/sna.h b/src/sna/sna.h
index 185bc1d..5153d33 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -93,7 +93,6 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define DEBUG_NO_RENDER 0
#define DEBUG_NO_BLT 0
#define DEBUG_NO_IO 0
-#define DEBUG_NO_LLC 0
#define DEBUG_FLUSH_CACHE 0
#define DEBUG_FLUSH_BATCH 0
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 1665791..7774cba 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -205,7 +205,7 @@ sna_pixmap_alloc_cpu(struct sna *sna,
assert(priv->stride);
- if (!DEBUG_NO_LLC && sna->kgem.gen >= 60) {
+ if (sna->kgem.has_cpu_bo) {
DBG(("%s: allocating CPU buffer (%dx%d)\n", __FUNCTION__,
pixmap->drawable.width, pixmap->drawable.height));
commit c20a729d0a57fd5a782d9114535d9474f39a8950
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Jan 15 22:19:22 2012 +0000
sna/gen6: Force a batch submission after allocation failure during composite
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index b155e86..39e036b 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -1699,7 +1699,8 @@ gen6_render_composite_blt(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
- if (!gen6_get_rectangles(sna, op, 1)) {
+ if (unlikely(!gen6_get_rectangles(sna, op, 1))) {
+ _kgem_submit(&sna->kgem);
gen6_emit_composite_state(sna, op);
gen6_get_rectangles(sna, op, 1);
}
@@ -1714,7 +1715,8 @@ gen6_render_composite_box(struct sna *sna,
{
struct sna_composite_rectangles r;
- if (!gen6_get_rectangles(sna, op, 1)) {
+ if (unlikely(!gen6_get_rectangles(sna, op, 1))) {
+ _kgem_submit(&sna->kgem);
gen6_emit_composite_state(sna, op);
gen6_get_rectangles(sna, op, 1);
}
@@ -1741,9 +1743,11 @@ gen6_render_composite_boxes(struct sna *sna,
do {
int nbox_this_time = gen6_get_rectangles(sna, op, nbox);
- if (nbox_this_time == 0) {
+ if (unlikely(nbox_this_time == 0)) {
+ _kgem_submit(&sna->kgem);
gen6_emit_composite_state(sna, op);
nbox_this_time = gen6_get_rectangles(sna, op, nbox);
+ assert(nbox_this_time);
}
nbox -= nbox_this_time;
do {
@@ -1992,7 +1996,8 @@ gen6_render_video(struct sna *sna,
r.y1 = box->y1 + pix_yoff;
r.y2 = box->y2 + pix_yoff;
- if (!gen6_get_rectangles(sna, &tmp, 1)) {
+ if (unlikely(!gen6_get_rectangles(sna, &tmp, 1))) {
+ _kgem_submit(&sna->kgem);
gen6_emit_video_state(sna, &tmp, frame);
gen6_get_rectangles(sna, &tmp, 1);
}
@@ -2825,7 +2830,8 @@ gen6_render_composite_spans_box(struct sna *sna,
box->x2 - box->x1,
box->y2 - box->y1));
- if (gen6_get_rectangles(sna, &op->base, 1) == 0) {
+ if (unlikely(gen6_get_rectangles(sna, &op->base, 1) == 0)) {
+ _kgem_submit(&sna->kgem);
gen6_emit_composite_state(sna, &op->base);
gen6_get_rectangles(sna, &op->base, 1);
}
@@ -2849,9 +2855,11 @@ gen6_render_composite_spans_boxes(struct sna *sna,
int nbox_this_time;
nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox);
- if (nbox_this_time == 0) {
+ if (unlikely(nbox_this_time == 0)) {
+ _kgem_submit(&sna->kgem);
gen6_emit_composite_state(sna, &op->base);
nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox);
+ assert(nbox_this_time);
}
nbox -= nbox_this_time;
@@ -3161,7 +3169,8 @@ gen6_render_copy_boxes(struct sna *sna, uint8_t alu,
do {
float *v;
int n_this_time = gen6_get_rectangles(sna, &tmp, n);
- if (n_this_time == 0) {
+ if (unlikely(n_this_time == 0)) {
+ _kgem_submit(&sna->kgem);
gen6_emit_copy_state(sna, &tmp);
n_this_time = gen6_get_rectangles(sna, &tmp, n);
}
@@ -3201,7 +3210,8 @@ gen6_render_copy_blt(struct sna *sna,
int16_t w, int16_t h,
int16_t dx, int16_t dy)
{
- if (!gen6_get_rectangles(sna, &op->base, 1)) {
+ if (unlikely(!gen6_get_rectangles(sna, &op->base, 1))) {
+ _kgem_submit(&sna->kgem);
gen6_emit_copy_state(sna, &op->base);
gen6_get_rectangles(sna, &op->base, 1);
}
@@ -3453,7 +3463,8 @@ gen6_render_fill_boxes(struct sna *sna,
do {
int n_this_time = gen6_get_rectangles(sna, &tmp, n);
- if (n_this_time == 0) {
+ if (unlikely(n_this_time == 0)) {
+ _kgem_submit(&sna->kgem);
gen6_emit_fill_state(sna, &tmp);
n_this_time = gen6_get_rectangles(sna, &tmp, n);
}
@@ -3489,7 +3500,8 @@ gen6_render_op_fill_blt(struct sna *sna,
{
DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h));
- if (!gen6_get_rectangles(sna, &op->base, 1)) {
+ if (unlikely(!gen6_get_rectangles(sna, &op->base, 1))) {
+ _kgem_submit(&sna->kgem);
gen6_emit_fill_state(sna, &op->base);
gen6_get_rectangles(sna, &op->base, 1);
}
@@ -3515,7 +3527,8 @@ gen6_render_op_fill_box(struct sna *sna,
DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__,
box->x1, box->y1, box->x2, box->y2));
- if (!gen6_get_rectangles(sna, &op->base, 1)) {
+ if (unlikely(!gen6_get_rectangles(sna, &op->base, 1))) {
+ _kgem_submit(&sna->kgem);
gen6_emit_fill_state(sna, &op->base);
gen6_get_rectangles(sna, &op->base, 1);
}
@@ -3544,7 +3557,8 @@ gen6_render_op_fill_boxes(struct sna *sna,
do {
int nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox);
- if (nbox_this_time == 0) {
+ if (unlikely(nbox_this_time == 0)) {
+ _kgem_submit(&sna->kgem);
gen6_emit_fill_state(sna, &op->base);
nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox);
}
@@ -3735,7 +3749,8 @@ gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
gen6_emit_fill_state(sna, &tmp);
gen6_align_vertex(sna, &tmp);
- if (!gen6_get_rectangles(sna, &tmp, 1)) {
+ if (unlikely(!gen6_get_rectangles(sna, &tmp, 1))) {
+ _kgem_submit(&sna->kgem);
gen6_emit_fill_state(sna, &tmp);
gen6_get_rectangles(sna, &tmp, 1);
}
@@ -3831,7 +3846,8 @@ gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
gen6_emit_fill_state(sna, &tmp);
gen6_align_vertex(sna, &tmp);
- if (!gen6_get_rectangles(sna, &tmp, 1)) {
+ if (unlikely(!gen6_get_rectangles(sna, &tmp, 1))) {
+ _kgem_submit(&sna->kgem);
gen6_emit_fill_state(sna, &tmp);
gen6_get_rectangles(sna, &tmp, 1);
}
commit 380a2fca3cce4c99c5026ab800f7885a1959b16d
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Jan 15 22:13:20 2012 +0000
sna: Optimise call to composite with single box
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_composite.c b/src/sna/sna_composite.c
index 4111b8d..15876f5 100644
--- a/src/sna/sna_composite.c
+++ b/src/sna/sna_composite.c
@@ -495,9 +495,12 @@ sna_composite(CARD8 op,
goto fallback;
}
- tmp.boxes(sna, &tmp,
- REGION_RECTS(®ion),
- REGION_NUM_RECTS(®ion));
+ if (region.data == NULL)
+ tmp.box(sna, &tmp, ®ion.extents);
+ else
+ tmp.boxes(sna, &tmp,
+ REGION_BOXPTR(®ion),
+ REGION_NUM_RECTS(®ion));
apply_damage(&tmp, ®ion);
tmp.done(sna, &tmp);
commit 9f89250de1dc134f54cb68012fa5eb996c61d57e
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Jan 15 21:54:13 2012 +0000
sna: Use the prefer-GPU hint for forcing allocation for core drawing
Similar to the render paths and simpler than the current look up tiling
method.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 537c4d1..1665791 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1023,7 +1023,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
if (DAMAGE_IS_ALL(priv->cpu_damage))
goto out;
- if (priv->stride == 0 && priv->gpu_bo == NULL && flags & MOVE_WRITE)
+ if (priv->gpu_bo == NULL && !priv->gpu && flags & MOVE_WRITE)
return _sna_pixmap_move_to_cpu(pixmap, flags);
get_drawable_deltas(drawable, pixmap, &dx, &dy);
@@ -1514,7 +1514,7 @@ _sna_drawable_use_gpu_bo(DrawablePtr drawable,
return FALSE;
if (priv->gpu_bo == NULL) {
- if (sna_pixmap_choose_tiling(pixmap) == I915_TILING_NONE) {
+ if (!priv->gpu) {
DBG(("%s: untiled, will not force allocation\n",
__FUNCTION__));
return FALSE;
@@ -2731,12 +2731,11 @@ move_to_gpu(PixmapPtr pixmap, struct sna_pixmap *priv,
if (priv->gpu_bo)
return TRUE;
- if (priv->cpu_bo) {
- if (pixmap->usage_hint)
- return FALSE;
+ if (!priv->gpu)
+ return FALSE;
- if (priv->cpu_bo->size <= 4096 ||
- sna_pixmap_choose_tiling(pixmap) == I915_TILING_NONE)
+ if (priv->cpu_bo) {
+ if (sna_pixmap_choose_tiling(pixmap) == I915_TILING_NONE)
return FALSE;
return (priv->source_count++-SOURCE_BIAS) * w*h >=
More information about the xorg-commit
mailing list