[Mesa-dev] [PATCH 12/23] i965: Import image format conversion primitives.
Francisco Jerez
currojerez at riseup.net
Tue Apr 28 11:44:23 PDT 2015
Define bitfield packing, unpacking and type conversion operations in
terms of which the image format conversion code will be implemented.
These don't directly know about image formats: The packing and
unpacking functions take a 4-tuple of bit shifts and a 4-tuple of bit
widths as arguments, determining the bitfield position of each
component. Most of the remaining functions perform integer, fixed
point normalized, and floating point type conversions, mapping between
a target type with the per-component bit widths given by a parameter
and a matching native representation of the same type.
---
src/mesa/drivers/dri/i965/brw_ir_surface_builder.h | 385 +++++++++++++++++++++
src/mesa/drivers/dri/i965/brw_reg.h | 3 +
2 files changed, 388 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_ir_surface_builder.h b/src/mesa/drivers/dri/i965/brw_ir_surface_builder.h
index e46c7c1..317ad15 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_surface_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_surface_builder.h
@@ -719,6 +719,391 @@ namespace brw {
return dst;
}
}
+
+ namespace detail {
+ /**
+ * Simple 4-tuple of scalars used to pass around per-color component
+ * values. It has to be parameterized on T because we need values of a
+ * number of different types.
+ */
+ template<typename T>
+ struct color {
+ color(T x = 0) : r(x), g(x), b(x), a(x)
+ {
+ }
+
+ color(T r, T g, T b, T a) : r(r), g(g), b(b), a(a)
+ {
+ }
+
+ T
+ operator[](unsigned i) const
+ {
+ const T xs[] = { r, g, b, a };
+ return xs[i];
+ }
+
+ T r, g, b, a;
+ };
+
+ /**
+ * Return the bitmask of non-zero components of the given 4-tuple.
+ */
+ template<typename T>
+ unsigned
+ bitmask(const color<T> &c)
+ {
+ return !!c.r << 0 | !!c.g << 1 | !!c.b << 2 | !!c.a << 3;
+ }
+ }
+
+ namespace image_format_conversion {
+ using detail::color;
+
+ namespace detail {
+ /**
+ * Maximum representable value in an unsigned integer with the given
+ * number of bits.
+ */
+ inline unsigned
+ scale(unsigned n)
+ {
+ return (1 << n) - 1;
+ }
+
+ /**
+ * Load a 4-component constant vector into registers.
+ */
+ template<typename T>
+ src_svec4
+ emit_vector_imm(const svec4_builder &bld, const color<T> &c)
+ {
+ const dst_svec4 dst = bld.natural_reg(fs_reg(T()).type);
+
+ for (int i = 0; i < 4; ++i)
+ bld.MOV(writemask(dst, 1 << i), c[i]);
+
+ return dst;
+ }
+
+ /**
+ * Load a 4-component constant vector into registers.
+ *
+ * This SIMD4x2 specialization attempts to use vector immediates and
+ * writemasking to minimize the number of copies. This definitely
+ * belongs in a general vectorization pass rather than here, but we
+ * don't have such a pass yet and we want to generate reasonable code in
+ * the meantime.
+ */
+ template<typename T>
+ src_reg
+ emit_vector_imm(const vec4_builder &bld, const color<T> &c)
+ {
+ const dst_reg dst = bld.natural_reg(src_reg(T()).type);
+
+ if (brw_float_to_vf(c.r) != -1 &&
+ brw_float_to_vf(c.g) != -1 &&
+ brw_float_to_vf(c.b) != -1 &&
+ brw_float_to_vf(c.a) != -1) {
+ bld.MOV(dst, src_reg(brw_float_to_vf(c.r), brw_float_to_vf(c.g),
+ brw_float_to_vf(c.b), brw_float_to_vf(c.a)));
+
+ } else {
+ for (unsigned mask_left = dst.writemask; mask_left;) {
+ const unsigned i = ffs(mask_left) - 1;
+ const unsigned mask = bitmask(
+ color<bool>(c.r == c[i], c.g == c[i],
+ c.b == c[i], c.a == c[i]));
+
+ bld.MOV(writemask(dst, mask), c[i]);
+ mask_left &= ~mask;
+ }
+ }
+
+ return dst;
+ }
+ }
+
+ /**
+ * Pack the vector \p src in a bitfield given the per-component bit
+ * shifts and widths.
+ */
+ template<typename B, typename S>
+ S
+ emit_pack(const B &bld, const S &src,
+ const color<unsigned> &shifts,
+ const color<unsigned> &widths)
+ {
+ using namespace detail;
+ const unsigned mask = bitmask(widths);
+ const typename B::dst_reg dst = bld.natural_reg(BRW_REGISTER_TYPE_UD);
+
+ /* Shift each component left to the correct bitfield position. */
+ bld.SHL(writemask(dst, mask), src,
+ emit_vector_imm(bld, color<unsigned>(
+ shifts.r % 32, shifts.g % 32,
+ shifts.b % 32, shifts.a % 32)));
+
+ /* Add everything up. */
+ if (mask >> 1) {
+ assert(shifts.r + widths.r <= 32 && shifts.g + widths.g <= 32 &&
+ shifts.b + widths.b <= 64 && shifts.a + widths.a <= 64);
+ bld.OR(writemask(dst, WRITEMASK_XY),
+ swizzle(dst, BRW_SWIZZLE_XZXZ),
+ swizzle(dst, (mask >> 3 ? BRW_SWIZZLE_YWYW :
+ BRW_SWIZZLE_YZYZ)));
+ }
+
+ if (mask >> 2 && (shifts.b < 32 || shifts.a < 32)) {
+ assert(shifts.b + widths.b <= 32 && shifts.a + widths.a <= 32);
+ bld.OR(writemask(dst, WRITEMASK_X),
+ swizzle(dst, BRW_SWIZZLE_XXXX),
+ swizzle(dst, BRW_SWIZZLE_YYYY));
+ }
+
+ return dst;
+ }
+
+ /**
+ * Unpack a vector from the bitfield \p src given the per-component bit
+ * shifts and widths.
+ */
+ template<typename B, typename S>
+ S
+ emit_unpack(const B &bld, const S &src,
+ const color<unsigned> &shifts,
+ const color<unsigned> &widths)
+ {
+ using namespace detail;
+ const unsigned mask = bitmask(widths);
+ const typename B::dst_reg dst = bld.natural_reg(src.type);
+
+ /* Shift left to discard the most significant bits. */
+ bld.SHL(writemask(dst, mask),
+ swizzle(src, BRW_SWIZZLE4(shifts.r / 32, shifts.g / 32,
+ shifts.b / 32, shifts.a / 32)),
+ emit_vector_imm(bld, color<unsigned>(
+ 32 - shifts.r % 32 - widths.r,
+ 32 - shifts.g % 32 - widths.g,
+ 32 - shifts.b % 32 - widths.b,
+ 32 - shifts.a % 32 - widths.a)));
+
+ /* Shift back to the least significant bits using an arithmetic
+ * shift to get sign extension on signed types.
+ */
+ bld.ASR(writemask(dst, mask), dst,
+ emit_vector_imm(bld, color<unsigned>(
+ 32 - widths.r, 32 - widths.g,
+ 32 - widths.b, 32 - widths.a)));
+
+ return dst;
+ }
+
+ /**
+ * Convert a vector into an integer vector of the specified signedness
+ * and bit widths, properly handling overflow.
+ */
+ template<typename B, typename S>
+ S
+ emit_convert_to_integer(const B &bld, const S &src,
+ const color<unsigned> &widths,
+ bool is_signed)
+ {
+ using namespace detail;
+ const unsigned mask = bitmask(widths);
+ const unsigned s = (is_signed ? 1 : 0);
+ const typename B::dst_reg dst = bld.natural_reg(
+ is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD);
+
+ bld.MOV(writemask(dst, mask), src);
+
+ /* Clamp to the minimum value. */
+ if (is_signed) {
+ const S min = emit_vector_imm(bld, color<int>(
+ -scale(widths.r - s) - 1, -scale(widths.g - s) - 1,
+ -scale(widths.b - s) - 1, -scale(widths.a - s) - 1));
+ bld.emit_minmax(writemask(dst, mask), dst, min,
+ BRW_CONDITIONAL_G);
+ }
+
+ /* Clamp to the maximum value. */
+ const S max = emit_vector_imm(bld, color<int>(
+ scale(widths.r - s), scale(widths.g - s),
+ scale(widths.b - s), scale(widths.a - s)));
+ bld.emit_minmax(writemask(dst, mask), dst, max,
+ BRW_CONDITIONAL_L);
+
+ return dst;
+ }
+
+ /**
+ * Convert a normalized fixed-point vector of the specified signedness
+ * and bit widths into a floating point vector.
+ */
+ template<typename B, typename S>
+ S
+ emit_convert_from_scaled(const B &bld, const S &src,
+ const color<unsigned> &widths,
+ bool is_signed)
+ {
+ using namespace detail;
+ const unsigned mask = bitmask(widths);
+ const unsigned s = (is_signed ? 1 : 0);
+ const typename B::dst_reg dst = bld.natural_reg(BRW_REGISTER_TYPE_F);
+
+ /* Convert to float. */
+ bld.MOV(writemask(dst, mask), src);
+
+ /* Divide by the normalization constants. */
+ bld.MUL(writemask(dst, mask), dst,
+ emit_vector_imm(bld, color<float>(
+ 1.0 / scale(widths.r - s), 1.0 / scale(widths.g - s),
+ 1.0 / scale(widths.b - s), 1.0 / scale(widths.a - s))));
+
+ /* Clamp to the minimum value. */
+ if (is_signed)
+ bld.emit_minmax(writemask(dst, mask), dst, -1.0f,
+ BRW_CONDITIONAL_G);
+
+ return dst;
+ }
+
+ /**
+ * Convert a floating point vector into a normalized fixed-point vector
+ * of the specified signedness and bit widths.
+ */
+ template<typename B, typename S>
+ S
+ emit_convert_to_scaled(const B &bld, const S &src,
+ const color<unsigned> &widths,
+ bool is_signed)
+ {
+ using namespace detail;
+ const unsigned mask = bitmask(widths);
+ const unsigned s = (is_signed ? 1 : 0);
+ const typename B::dst_reg dst = bld.natural_reg(
+ is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD);
+ const typename B::dst_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
+
+ bld.MOV(writemask(fdst, mask), src);
+
+ /* Clamp to the minimum value. */
+ if (is_signed)
+ bld.emit_minmax(writemask(fdst, mask), fdst, -1.0f,
+ BRW_CONDITIONAL_G);
+
+ /* Clamp to the maximum value. */
+ bld.emit_minmax(writemask(fdst, mask), fdst, 1.0f,
+ BRW_CONDITIONAL_L);
+
+ /* Multiply by the normalization constants. */
+ bld.MUL(writemask(fdst, mask), fdst,
+ emit_vector_imm(bld, color<float>(
+ scale(widths.r - s), scale(widths.g - s),
+ scale(widths.b - s), scale(widths.a - s))));
+
+ /* Convert to integer. */
+ bld.RNDE(writemask(fdst, mask), fdst);
+ bld.MOV(writemask(dst, mask), fdst);
+
+ return dst;
+ }
+
+ /**
+ * Convert a floating point vector of the specified bit widths into a
+ * 32-bit floating point vector.
+ */
+ template<typename B, typename S>
+ S
+ emit_convert_from_float(const B &bld, const S &src,
+ const color<unsigned> &widths)
+ {
+ using namespace detail;
+ const unsigned mask = bitmask(widths);
+ const unsigned shift_mask = bitmask(
+ color<bool>(widths.r < 16, widths.g < 16,
+ widths.b < 16, widths.a < 16));
+ const typename B::dst_reg dst = bld.natural_reg(BRW_REGISTER_TYPE_UD);
+ const typename B::dst_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
+
+ bld.MOV(writemask(dst, mask), src);
+
+ /* Extend 10-bit and 11-bit floating point numbers to 15 bits.
+ * This works because they have a 5-bit exponent just like the
+ * 16-bit floating point format, and they have no sign bit.
+ */
+ if (shift_mask)
+ bld.SHL(writemask(dst, shift_mask), dst,
+ emit_vector_imm(bld, color<unsigned>(
+ 15 - widths.r, 15 - widths.g,
+ 15 - widths.b, 15 - widths.a)));
+
+ /* Convert to 32-bit floating point. */
+ bld.F16TO32(writemask(fdst, mask), dst);
+
+ return fdst;
+ }
+
+ /**
+ * Convert a vector into a floating point vector of the specified bit
+ * widths.
+ */
+ template<typename B, typename S>
+ S
+ emit_convert_to_float(const B &bld, const S &src,
+ const color<unsigned> &widths)
+ {
+ using namespace detail;
+ const unsigned mask = bitmask(widths);
+ const unsigned shift_mask = bitmask(
+ color<bool>(widths.r < 16, widths.g < 16,
+ widths.b < 16, widths.a < 16));
+ const typename B::dst_reg dst = bld.natural_reg(BRW_REGISTER_TYPE_UD);
+ const typename B::dst_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
+
+ bld.MOV(writemask(fdst, mask), src);
+
+ /* Clamp to the minimum value. */
+ if (shift_mask)
+ bld.emit_minmax(writemask(fdst, shift_mask), fdst, 0.0f,
+ BRW_CONDITIONAL_G);
+
+ /* Convert to 16-bit floating-point. */
+ bld.F32TO16(writemask(dst, mask), fdst);
+
+ /* Discard the least significant bits to get floating point numbers
+ * of the requested width. This works because the 10-bit and
+ * 11-bit floating point formats have a 5-bit exponent just like
+ * the 16-bit format, and they have no sign bit.
+ */
+ if (shift_mask)
+ bld.SHR(writemask(dst, shift_mask), dst,
+ emit_vector_imm(bld, color<unsigned>(
+ 15 - widths.r, 15 - widths.g,
+ 15 - widths.b, 15 - widths.a)));
+
+ return dst;
+ }
+
+ /**
+ * Fill missing components of a vector with 0, 0, 0, 1.
+ */
+ template<typename B, typename S>
+ S
+ emit_pad(const B &bld, const S &src, const color<unsigned> &widths)
+ {
+ using namespace detail;
+ const unsigned mask = bitmask(widths);
+ const typename B::dst_reg dst = bld.natural_reg(src.type);
+
+ bld.MOV(writemask(dst, mask), src);
+ if (~mask & dst.writemask)
+ bld.MOV(writemask(dst, ~mask),
+ emit_vector_imm(bld, color<unsigned>(0, 0, 0, 1)));
+
+ return dst;
+ }
+ }
}
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index c03a8ae..668f83c 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -82,7 +82,10 @@ struct brw_device_info;
#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2)
#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3)
#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
+#define BRW_SWIZZLE_XZXZ BRW_SWIZZLE4(0,2,0,2)
#define BRW_SWIZZLE_YZXW BRW_SWIZZLE4(1,2,0,3)
+#define BRW_SWIZZLE_YZYZ BRW_SWIZZLE4(1,2,1,2)
+#define BRW_SWIZZLE_YWYW BRW_SWIZZLE4(1,3,1,3)
#define BRW_SWIZZLE_ZXYW BRW_SWIZZLE4(2,0,1,3)
#define BRW_SWIZZLE_ZWZW BRW_SWIZZLE4(2,3,2,3)
--
2.3.5
More information about the mesa-dev
mailing list