[Mesa-dev] [PATCH V2 17/22] i965/gen9: Add XY_FAST_COPY_BLT support to intelEmitCopyBlit()
Anuj Phogat
anuj.phogat at gmail.com
Fri Apr 17 16:51:38 PDT 2015
This patch enables using XY_FAST_COPY_BLT only for Yf/Ys tiled buffers.
Later It can be turned on for other tiling patterns (X,Y).
Signed-off-by: Anuj Phogat <anuj.phogat at gmail.com>
---
src/mesa/drivers/dri/i965/intel_blit.c | 292 +++++++++++++++++++++++----
src/mesa/drivers/dri/i965/intel_blit.h | 3 +
src/mesa/drivers/dri/i965/intel_copy_image.c | 3 +
src/mesa/drivers/dri/i965/intel_reg.h | 33 +++
4 files changed, 291 insertions(+), 40 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index 9500bd7..36746c4 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -43,6 +43,23 @@
#define FILE_DEBUG_FLAG DEBUG_BLIT
+#define SET_TILING_XY_FAST_COPY_BLT(tiling, tr_mode, type) \
+({ \
+ switch (tiling) { \
+ case I915_TILING_X: \
+ CMD |= type ## _TILED_X; \
+ break; \
+ case I915_TILING_Y: \
+ if (tr_mode == INTEL_MIPTREE_TRMODE_YS) \
+ CMD |= type ## _TILED_64K; \
+ else \
+ CMD |= type ## _TILED_Y; \
+ break; \
+ default: \
+ unreachable("not reached"); \
+ } \
+})
+
static void
intel_miptree_set_alpha_to_one(struct brw_context *brw,
struct intel_mipmap_tree *mt,
@@ -75,6 +92,12 @@ static uint32_t
br13_for_cpp(int cpp)
{
switch (cpp) {
+ case 16:
+ return BR13_32323232;
+ break;
+ case 8:
+ return BR13_16161616;
+ break;
case 4:
return BR13_8888;
break;
@@ -89,6 +112,132 @@ br13_for_cpp(int cpp)
}
}
+static uint32_t
+get_tr_horizontal_align(uint32_t tr_mode, uint32_t cpp, bool is_src) {
+ /*Alignment tables for YF/YS tiled surfaces. */
+ const uint32_t align_2d_yf[] = {64, 64, 32, 32, 16};
+ const uint32_t align_2d_ys[] = {256, 256, 128, 128, 64};
+ const uint32_t bpp = cpp * 8;
+ uint32_t align;
+ int i = 0;
+
+ if (tr_mode == INTEL_MIPTREE_TRMODE_NONE)
+ return 0;
+
+ /* Compute array index. */
+ assert (bpp >= 8 && bpp <= 128 && (bpp & (bpp - 1)) == 0);
+ while (bpp >> (i + 4))
+ i++;
+
+ if (tr_mode == INTEL_MIPTREE_TRMODE_YF)
+ align = align_2d_yf[i];
+ else
+ align = align_2d_ys[i];
+
+ switch(align) {
+ case 512:
+ return is_src ? XY_SRC_H_ALIGN_512 : XY_DST_H_ALIGN_512;
+ case 256:
+ return is_src ? XY_SRC_H_ALIGN_256 : XY_DST_H_ALIGN_256;
+ case 128:
+ return is_src ? XY_SRC_H_ALIGN_128 : XY_DST_H_ALIGN_128;
+ case 64:
+ return is_src ? XY_SRC_H_ALIGN_64 : XY_DST_H_ALIGN_64;
+ case 32:
+ /* XY_FAST_COPY_BLT doesn't support horizontal alignment of 16. */
+ case 16:
+ return is_src ? XY_SRC_H_ALIGN_32 : XY_DST_H_ALIGN_32;
+ default:
+ unreachable("not reached");
+ }
+}
+
+static uint32_t
+get_tr_vertical_align(uint32_t tr_mode, uint32_t cpp, bool is_src) {
+ /* Vertical alignment tables for YF/YS tiled surfaces. */
+ const unsigned align_2d_yf[] = {64, 32, 32, 16, 16};
+ const unsigned align_2d_ys[] = {256, 128, 128, 64, 64};
+ const uint32_t bpp = cpp * 8;
+ uint32_t align;
+ int i = 0;
+
+ if (tr_mode == INTEL_MIPTREE_TRMODE_NONE)
+ return 0;
+
+ /* Compute array index. */
+ assert (bpp >= 8 && bpp <= 128 && (bpp & (bpp - 1)) == 0);
+ while (bpp >> (i + 4))
+ i++;
+
+ if (tr_mode == INTEL_MIPTREE_TRMODE_YF)
+ align = align_2d_yf[i];
+ else
+ align = align_2d_ys[i];
+
+ switch(align) {
+ case 256:
+ return is_src ? XY_SRC_V_ALIGN_256 : XY_DST_V_ALIGN_256;
+ case 128:
+ return is_src ? XY_SRC_V_ALIGN_128 : XY_DST_V_ALIGN_128;
+ case 64:
+ /* XY_FAST_COPY_BLT doesn't support vertical alignments of 16 and 32. */
+ case 32:
+ case 16:
+ return is_src ? XY_SRC_V_ALIGN_64 : XY_DST_V_ALIGN_64;
+ default:
+ unreachable("not reached");
+ }
+}
+
+static bool
+fast_copy_blit_error_check(uintptr_t src_addr, uint32_t src_pitch,
+ uint32_t src_tiling, uintptr_t dst_addr,
+ uint32_t dst_pitch, uint32_t dst_tiling,
+ uint32_t cpp)
+{
+ const bool dst_tiling_none = dst_tiling == I915_TILING_NONE;
+ const bool src_tiling_none = src_tiling == I915_TILING_NONE;
+
+ /* When destination tiling is enabled, this address is 64Byte aligned. */
+ if (!dst_tiling_none) {
+ if (dst_addr & 63)
+ return false;
+ }
+
+ /* When source tiling is enabled, this address should be 4 KB aligned. */
+ if (!src_tiling_none) {
+ if (src_addr & 4095)
+ return false;
+ }
+
+ /* When either source or destination tiling is enabled, this address is
+ * 16-byte aligned.
+ */
+ if (!src_tiling_none || !dst_tiling_none) {
+ if (src_addr & 15)
+ return false;
+ }
+
+ assert(cpp <= 16);
+ /* For Fast Copy Blits the pitch cannot be a negative number. */
+ assert(dst_pitch >= 0);
+
+ /* For Linear surfaces, the pitch has to be an OWord (16byte) multiple. */
+ if ((src_tiling_none && src_pitch % 16 != 0) ||
+ (dst_tiling_none && dst_pitch % 16 != 0))
+ return false;
+
+ /* For Tiled surfaces, the pitch has to be a multiple of the Tile width
+ * (X direction width of the Tile). This means the pitch value will
+ * always be Cache Line aligned (64byte multiple).
+ */
+ if ((!dst_tiling_none && dst_pitch % 64 != 0) ||
+ (!src_tiling_none && src_pitch % 64 != 0))
+ return false;
+
+ return true;
+}
+
/**
* Emits the packet for switching the blitter from X to Y tiled or back.
*
@@ -156,6 +305,7 @@ intel_miptree_blit(struct brw_context *brw,
uint32_t width, uint32_t height,
GLenum logicop)
{
+ bool overlap = false;
/* The blitter doesn't understand multisampling at all. */
if (src_mt->num_samples > 0 || dst_mt->num_samples > 0)
return false;
@@ -216,6 +366,12 @@ intel_miptree_blit(struct brw_context *brw,
intel_miptree_resolve_color(brw, src_mt);
intel_miptree_resolve_color(brw, dst_mt);
+ /* Check for an overlap for SKL+ platforms. */
+ if (brw->gen >= 9 &&
+ ((src_x < dst_x && (src_x + width) > dst_x) ||
+ (src_x > dst_x && (dst_x + width) > src_x)))
+ overlap = true;
+
if (src_flip)
src_y = minify(src_mt->physical_height0, src_level - src_mt->first_level) - src_y - height;
@@ -251,12 +407,15 @@ intel_miptree_blit(struct brw_context *brw,
src_pitch,
src_mt->bo, src_mt->offset,
src_mt->tiling,
+ src_mt->tr_mode,
dst_mt->pitch,
dst_mt->bo, dst_mt->offset,
dst_mt->tiling,
+ dst_mt->tr_mode,
src_x, src_y,
dst_x, dst_y,
width, height,
+ overlap,
logicop)) {
return false;
}
@@ -280,36 +439,41 @@ intelEmitCopyBlit(struct brw_context *brw,
drm_intel_bo *src_buffer,
GLuint src_offset,
uint32_t src_tiling,
+ uint32_t src_tr_mode,
GLshort dst_pitch,
drm_intel_bo *dst_buffer,
GLuint dst_offset,
uint32_t dst_tiling,
+ uint32_t dst_tr_mode,
GLshort src_x, GLshort src_y,
GLshort dst_x, GLshort dst_y,
GLshort w, GLshort h,
+ bool overlap,
GLenum logic_op)
{
GLuint CMD, BR13, pass = 0;
int dst_y2 = dst_y + h;
int dst_x2 = dst_x + w;
drm_intel_bo *aper_array[3];
- bool dst_y_tiled = dst_tiling == I915_TILING_Y;
- bool src_y_tiled = src_tiling == I915_TILING_Y;
+ bool dst_tr_mode_none = dst_tr_mode == INTEL_MIPTREE_TRMODE_NONE;
+ bool src_tr_mode_none = src_tr_mode == INTEL_MIPTREE_TRMODE_NONE;
+
+ bool dst_y_tiled = dst_tiling == I915_TILING_Y || !dst_tr_mode_none;
+ bool src_y_tiled = src_tiling == I915_TILING_Y || !src_tr_mode_none;
+ bool use_fast_copy_blit = brw->gen >= 9 && !overlap &&
+ (!src_tr_mode_none || !dst_tr_mode_none);
- if (dst_tiling != I915_TILING_NONE) {
- if (dst_offset & 4095)
- return false;
- }
- if (src_tiling != I915_TILING_NONE) {
- if (src_offset & 4095)
- return false;
- }
if ((dst_y_tiled || src_y_tiled) && brw->gen < 6)
return false;
assert(!dst_y_tiled || (dst_pitch % 128) == 0);
assert(!src_y_tiled || (src_pitch % 128) == 0);
+ if (use_fast_copy_blit == false &&
+ (src_tr_mode != INTEL_MIPTREE_TRMODE_NONE ||
+ dst_tr_mode != INTEL_MIPTREE_TRMODE_NONE))
+ return false;
+
/* do space check before going any further */
do {
aper_array[0] = brw->batch.bo;
@@ -334,13 +498,6 @@ intelEmitCopyBlit(struct brw_context *brw,
src_buffer, src_pitch, src_offset, src_x, src_y,
dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
- /* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop
- * the low bits. Offsets must be naturally aligned.
- */
- if (src_pitch % 4 != 0 || src_offset % cpp != 0 ||
- dst_pitch % 4 != 0 || dst_offset % cpp != 0)
- return false;
-
/* For big formats (such as floating point), do the copy using 16 or 32bpp
* and multiply the coordinates.
*/
@@ -359,27 +516,76 @@ intelEmitCopyBlit(struct brw_context *brw,
}
}
- BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16;
+ if (use_fast_copy_blit &&
+ fast_copy_blit_error_check(src_offset, src_pitch, src_tiling,
+ dst_offset, dst_pitch, dst_tiling, cpp)) {
+ BR13 = br13_for_cpp(cpp);
- switch (cpp) {
- case 1:
- case 2:
- CMD = XY_SRC_COPY_BLT_CMD;
- break;
- case 4:
- CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
- break;
- default:
- return false;
- }
+ if (src_tr_mode == INTEL_MIPTREE_TRMODE_YF)
+ BR13 |= XY_SRC_TRMODE_YF;
- if (dst_tiling != I915_TILING_NONE) {
- CMD |= XY_DST_TILED;
- dst_pitch /= 4;
- }
- if (src_tiling != I915_TILING_NONE) {
- CMD |= XY_SRC_TILED;
- src_pitch /= 4;
+ if (dst_tr_mode == INTEL_MIPTREE_TRMODE_YF)
+ BR13 |= XY_DST_TRMODE_YF;
+
+ CMD = XY_FAST_COPY_BLT_CMD;
+
+ if (dst_tiling != I915_TILING_NONE) {
+ SET_TILING_XY_FAST_COPY_BLT(dst_tiling, dst_tr_mode, XY_DST);
+ /* Pitch value should be specified as a number of Dwords. */
+ dst_pitch /= 4;
+ }
+ if (src_tiling != I915_TILING_NONE) {
+ SET_TILING_XY_FAST_COPY_BLT(src_tiling, src_tr_mode, XY_SRC);
+ /* Pitch value should be specified as a number of Dwords. */
+ src_pitch /= 4;
+ }
+
+ CMD |= get_tr_horizontal_align(src_tr_mode, cpp, true /* is_src */);
+ CMD |= get_tr_vertical_align(src_tr_mode, cpp, true /* is_src */);
+
+ CMD |= get_tr_horizontal_align(dst_tr_mode, cpp, false /* is_src */);
+ CMD |= get_tr_vertical_align(dst_tr_mode, cpp, false /* is_src */);
+
+ } else {
+ /* Source and destination base addresses should be 4 KB aligned. */
+ if (dst_tiling != I915_TILING_NONE) {
+ if (dst_offset & 4095)
+ return false;
+ }
+ if (src_tiling != I915_TILING_NONE) {
+ if (src_offset & 4095)
+ return false;
+ }
+
+ /* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop
+ * the low bits. Offsets must be naturally aligned.
+ */
+ if (src_pitch % 4 != 0 || src_offset % cpp != 0 ||
+ dst_pitch % 4 != 0 || dst_offset % cpp != 0)
+ return false;
+
+ assert(cpp <= 4);
+ BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16;
+ switch (cpp) {
+ case 1:
+ case 2:
+ CMD = XY_SRC_COPY_BLT_CMD;
+ break;
+ case 4:
+ CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+ break;
+ default:
+ return false;
+ }
+
+ if (dst_tiling != I915_TILING_NONE) {
+ CMD |= XY_DST_TILED;
+ dst_pitch /= 4;
+ }
+ if (src_tiling != I915_TILING_NONE) {
+ CMD |= XY_SRC_TILED;
+ src_pitch /= 4;
+ }
}
if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
@@ -533,11 +739,14 @@ intel_emit_linear_blit(struct brw_context *brw,
pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 1), 4);
height = (pitch == 0) ? 1 : size / pitch;
ok = intelEmitCopyBlit(brw, 1,
- pitch, src_bo, src_offset, I915_TILING_NONE,
- pitch, dst_bo, dst_offset, I915_TILING_NONE,
+ pitch, src_bo, src_offset,
+ I915_TILING_NONE, INTEL_MIPTREE_TRMODE_NONE,
+ pitch, dst_bo, dst_offset,
+ I915_TILING_NONE, INTEL_MIPTREE_TRMODE_NONE,
0, 0, /* src x/y */
0, 0, /* dst x/y */
pitch, height, /* w, h */
+ false, /* overlap */
GL_COPY);
if (!ok)
_mesa_problem(ctx, "Failed to linear blit %dx%d\n", pitch, height);
@@ -549,11 +758,14 @@ intel_emit_linear_blit(struct brw_context *brw,
pitch = ALIGN(size, 4);
if (size != 0) {
ok = intelEmitCopyBlit(brw, 1,
- pitch, src_bo, src_offset, I915_TILING_NONE,
- pitch, dst_bo, dst_offset, I915_TILING_NONE,
+ pitch, src_bo, src_offset,
+ I915_TILING_NONE, INTEL_MIPTREE_TRMODE_NONE,
+ pitch, dst_bo, dst_offset,
+ I915_TILING_NONE, INTEL_MIPTREE_TRMODE_NONE,
0, 0, /* src x/y */
0, 0, /* dst x/y */
size, 1, /* w, h */
+ false, /* overlap */
GL_COPY);
if (!ok)
_mesa_problem(ctx, "Failed to linear blit %dx%d\n", size, 1);
diff --git a/src/mesa/drivers/dri/i965/intel_blit.h b/src/mesa/drivers/dri/i965/intel_blit.h
index f563939..29c9ead 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.h
+++ b/src/mesa/drivers/dri/i965/intel_blit.h
@@ -37,13 +37,16 @@ intelEmitCopyBlit(struct brw_context *brw,
drm_intel_bo *src_buffer,
GLuint src_offset,
uint32_t src_tiling,
+ uint32_t src_tr_mode,
GLshort dst_pitch,
drm_intel_bo *dst_buffer,
GLuint dst_offset,
uint32_t dst_tiling,
+ uint32_t dst_tr_mode,
GLshort srcx, GLshort srcy,
GLshort dstx, GLshort dsty,
GLshort w, GLshort h,
+ bool overlap,
GLenum logicop );
bool intel_miptree_blit(struct brw_context *brw,
diff --git a/src/mesa/drivers/dri/i965/intel_copy_image.c b/src/mesa/drivers/dri/i965/intel_copy_image.c
index f4c7eff..1e4a1e2 100644
--- a/src/mesa/drivers/dri/i965/intel_copy_image.c
+++ b/src/mesa/drivers/dri/i965/intel_copy_image.c
@@ -126,12 +126,15 @@ copy_image_with_blitter(struct brw_context *brw,
src_mt->pitch,
src_mt->bo, src_mt->offset,
src_mt->tiling,
+ src_mt->tr_mode,
dst_mt->pitch,
dst_mt->bo, dst_mt->offset,
dst_mt->tiling,
+ dst_mt->tr_mode,
src_x, src_y,
dst_x, dst_y,
src_width, src_height,
+ false /* overlap */,
GL_COPY);
}
diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h
index 488fb5b..6830148 100644
--- a/src/mesa/drivers/dri/i965/intel_reg.h
+++ b/src/mesa/drivers/dri/i965/intel_reg.h
@@ -87,6 +87,8 @@
#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22))
+#define XY_FAST_COPY_BLT_CMD (CMD_2D | (0x42 << 22))
+
#define XY_TEXT_IMMEDIATE_BLIT_CMD (CMD_2D | (0x31 << 22))
# define XY_TEXT_BYTE_PACKED (1 << 16)
@@ -95,11 +97,42 @@
#define XY_BLT_WRITE_RGB (1 << 20)
#define XY_SRC_TILED (1 << 15)
#define XY_DST_TILED (1 << 11)
+#define XY_SRC_TILED_X (1 << 20)
+#define XY_SRC_TILED_Y (2 << 20)
+#define XY_SRC_TILED_64K (3 << 20)
+#define XY_DST_TILED_X (1 << 13)
+#define XY_DST_TILED_Y (2 << 13)
+#define XY_DST_TILED_64K (3 << 13)
+
+#define XY_SRC_H_ALIGN_32 (0 << 17)
+#define XY_SRC_H_ALIGN_64 (1 << 17)
+#define XY_SRC_H_ALIGN_128 (2 << 17)
+#define XY_SRC_H_ALIGN_256 (3 << 17)
+#define XY_SRC_H_ALIGN_512 (4 << 17)
+
+#define XY_SRC_V_ALIGN_64 (0 << 15)
+#define XY_SRC_V_ALIGN_128 (1 << 15)
+#define XY_SRC_V_ALIGN_256 (2 << 15)
+
+#define XY_DST_H_ALIGN_32 (0 << 10)
+#define XY_DST_H_ALIGN_64 (1 << 10)
+#define XY_DST_H_ALIGN_128 (2 << 10)
+#define XY_DST_H_ALIGN_256 (3 << 10)
+#define XY_DST_H_ALIGN_512 (4 << 10)
+
+#define XY_DST_V_ALIGN_64 (0 << 8)
+#define XY_DST_V_ALIGN_128 (1 << 8)
+#define XY_DST_V_ALIGN_256 (2 << 8)
/* BR13 */
#define BR13_8 (0x0 << 24)
#define BR13_565 (0x1 << 24)
#define BR13_8888 (0x3 << 24)
+#define BR13_16161616 (0x4 << 24)
+#define BR13_32323232 (0x5 << 24)
+
+#define XY_SRC_TRMODE_YF (1 << 31)
+#define XY_DST_TRMODE_YF (1 << 30)
/* Pipeline Statistics Counter Registers */
#define IA_VERTICES_COUNT 0x2310
--
2.3.4
More information about the mesa-dev
mailing list