[Mesa-dev] [PATCH 12/23] i965: Import image format conversion primitives.

Tue Apr 28 11:44:23 PDT 2015

Define bitfield packing, unpacking and type conversion operations in
terms of which the image format conversion code will be implemented.
These don't directly know about image formats: The packing and
unpacking functions take a 4-tuple of bit shifts and a 4-tuple of bit
widths as arguments, determining the bitfield position of each
component.  Most of the remaining functions perform integer, fixed
point normalized, and floating point type conversions, mapping between
a target type with the per-component bit widths given by a parameter
and a matching native representation of the same type.
---
 src/mesa/drivers/dri/i965/brw_ir_surface_builder.h | 385 +++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_reg.h                |   3 +
 2 files changed, 388 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_ir_surface_builder.h b/src/mesa/drivers/dri/i965/brw_ir_surface_builder.h
index e46c7c1..317ad15 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_surface_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_surface_builder.h
@@ -719,6 +719,391 @@ namespace brw {
          return dst;
       }
    }
+
+   namespace detail {
+      /**
+       * Simple 4-tuple of scalars used to pass around per-color component
+       * values.  It has to be parameterized on T because we need values of a
+       * number of different types.
+       */
+      template<typename T>
+      struct color {
+         color(T x = 0) : r(x), g(x), b(x), a(x)
+         {
+         }
+
+         color(T r, T g, T b, T a) : r(r), g(g), b(b), a(a)
+         {
+         }
+
+         T
+         operator[](unsigned i) const
+         {
+            const T xs[] = { r, g, b, a };
+            return xs[i];
+         }
+
+         T r, g, b, a;
+      };
+
+      /**
+       * Return the bitmask of non-zero components of the given 4-tuple.
+       */
+      template<typename T>
+      unsigned
+      bitmask(const color<T> &c)
+      {
+         return !!c.r << 0 | !!c.g << 1 | !!c.b << 2 | !!c.a << 3;
+      }
+   }
+
+   namespace image_format_conversion {
+      using detail::color;
+
+      namespace detail {
+         /**
+          * Maximum representable value in an unsigned integer with the given
+          * number of bits.
+          */
+         inline unsigned
+         scale(unsigned n)
+         {
+            return (1 << n) - 1;
+         }
+
+         /**
+          * Load a 4-component constant vector into registers.
+          */
+         template<typename T>
+         src_svec4
+         emit_vector_imm(const svec4_builder &bld, const color<T> &c)
+         {
+            const dst_svec4 dst = bld.natural_reg(fs_reg(T()).type);
+
+            for (int i = 0; i < 4; ++i)
+               bld.MOV(writemask(dst, 1 << i), c[i]);
+
+            return dst;
+         }
+
+         /**
+          * Load a 4-component constant vector into registers.
+          *
+          * This SIMD4x2 specialization attempts to use vector immediates and
+          * writemasking to minimize the number of copies.  This definitely
+          * belongs in a general vectorization pass rather than here, but we
+          * don't have such a pass yet and we want to generate reasonable code in
+          * the meantime.
+          */
+         template<typename T>
+         src_reg
+         emit_vector_imm(const vec4_builder &bld, const color<T> &c)
+         {
+            const dst_reg dst = bld.natural_reg(src_reg(T()).type);
+
+            if (brw_float_to_vf(c.r) != -1 &&
+                brw_float_to_vf(c.g) != -1 &&
+                brw_float_to_vf(c.b) != -1 &&
+                brw_float_to_vf(c.a) != -1) {
+               bld.MOV(dst, src_reg(brw_float_to_vf(c.r), brw_float_to_vf(c.g),
+                                    brw_float_to_vf(c.b), brw_float_to_vf(c.a)));
+
+            } else {
+               for (unsigned mask_left = dst.writemask; mask_left;) {
+                  const unsigned i = ffs(mask_left) - 1;
+                  const unsigned mask = bitmask(
+                     color<bool>(c.r == c[i], c.g == c[i],
+                                 c.b == c[i], c.a == c[i]));
+
+                  bld.MOV(writemask(dst, mask), c[i]);
+                  mask_left &= ~mask;
+               }
+            }
+
+            return dst;
+         }
+      }
+
+      /**
+       * Pack the vector \p src in a bitfield given the per-component bit
+       * shifts and widths.
+       */
+      template<typename B, typename S>
+      S
+      emit_pack(const B &bld, const S &src,
+                const color<unsigned> &shifts,
+                const color<unsigned> &widths)
+      {
+         using namespace detail;
+         const unsigned mask = bitmask(widths);
+         const typename B::dst_reg dst = bld.natural_reg(BRW_REGISTER_TYPE_UD);
+
+         /* Shift each component left to the correct bitfield position. */
+         bld.SHL(writemask(dst, mask), src,
+                 emit_vector_imm(bld, color<unsigned>(
+                                    shifts.r % 32, shifts.g % 32,
+                                    shifts.b % 32, shifts.a % 32)));
+
+         /* Add everything up. */
+         if (mask >> 1) {
+            assert(shifts.r + widths.r <= 32 && shifts.g + widths.g <= 32 &&
+                   shifts.b + widths.b <= 64 && shifts.a + widths.a <= 64);
+            bld.OR(writemask(dst, WRITEMASK_XY),
+                   swizzle(dst, BRW_SWIZZLE_XZXZ),
+                   swizzle(dst, (mask >> 3 ? BRW_SWIZZLE_YWYW :
+                                 BRW_SWIZZLE_YZYZ)));
+         }
+
+         if (mask >> 2 && (shifts.b < 32 || shifts.a < 32)) {
+            assert(shifts.b + widths.b <= 32 && shifts.a + widths.a <= 32);
+            bld.OR(writemask(dst, WRITEMASK_X),
+                   swizzle(dst, BRW_SWIZZLE_XXXX),
+                   swizzle(dst, BRW_SWIZZLE_YYYY));
+         }
+
+         return dst;
+      }
+
+      /**
+       * Unpack a vector from the bitfield \p src given the per-component bit
+       * shifts and widths.
+       */
+      template<typename B, typename S>
+      S
+      emit_unpack(const B &bld, const S &src,
+                  const color<unsigned> &shifts,
+                  const color<unsigned> &widths)
+      {
+         using namespace detail;
+         const unsigned mask = bitmask(widths);
+         const typename B::dst_reg dst = bld.natural_reg(src.type);
+
+         /* Shift left to discard the most significant bits. */
+         bld.SHL(writemask(dst, mask),
+                 swizzle(src, BRW_SWIZZLE4(shifts.r / 32, shifts.g / 32,
+                                           shifts.b / 32, shifts.a / 32)),
+                 emit_vector_imm(bld, color<unsigned>(
+                                    32 - shifts.r % 32 - widths.r,
+                                    32 - shifts.g % 32 - widths.g,
+                                    32 - shifts.b % 32 - widths.b,
+                                    32 - shifts.a % 32 - widths.a)));
+
+         /* Shift back to the least significant bits using an arithmetic
+          * shift to get sign extension on signed types.
+          */
+         bld.ASR(writemask(dst, mask), dst,
+                 emit_vector_imm(bld, color<unsigned>(
+                                    32 - widths.r, 32 - widths.g,
+                                    32 - widths.b, 32 - widths.a)));
+
+         return dst;
+      }
+
+      /**
+       * Convert a vector into an integer vector of the specified signedness
+       * and bit widths, properly handling overflow.
+       */
+      template<typename B, typename S>
+      S
+      emit_convert_to_integer(const B &bld, const S &src,
+                              const color<unsigned> &widths,
+                              bool is_signed)
+      {
+         using namespace detail;
+         const unsigned mask = bitmask(widths);
+         const unsigned s = (is_signed ? 1 : 0);
+         const typename B::dst_reg dst = bld.natural_reg(
+            is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD);
+
+         bld.MOV(writemask(dst, mask), src);
+
+         /* Clamp to the minimum value. */
+         if (is_signed) {
+            const S min = emit_vector_imm(bld, color<int>(
+               -scale(widths.r - s) - 1, -scale(widths.g - s) - 1,
+               -scale(widths.b - s) - 1, -scale(widths.a - s) - 1));
+            bld.emit_minmax(writemask(dst, mask), dst, min,
+                            BRW_CONDITIONAL_G);
+         }
+
+         /* Clamp to the maximum value. */
+         const S max = emit_vector_imm(bld, color<int>(
+            scale(widths.r - s), scale(widths.g - s),
+            scale(widths.b - s), scale(widths.a - s)));
+         bld.emit_minmax(writemask(dst, mask), dst, max,
+                         BRW_CONDITIONAL_L);
+
+         return dst;
+      }
+
+      /**
+       * Convert a normalized fixed-point vector of the specified signedness
+       * and bit widths into a floating point vector.
+       */
+      template<typename B, typename S>
+      S
+      emit_convert_from_scaled(const B &bld, const S &src,
+                               const color<unsigned> &widths,
+                               bool is_signed)
+      {
+         using namespace detail;
+         const unsigned mask = bitmask(widths);
+         const unsigned s = (is_signed ? 1 : 0);
+         const typename B::dst_reg dst = bld.natural_reg(BRW_REGISTER_TYPE_F);
+
+         /* Convert to float. */
+         bld.MOV(writemask(dst, mask), src);
+
+         /* Divide by the normalization constants. */
+         bld.MUL(writemask(dst, mask), dst,
+                 emit_vector_imm(bld, color<float>(
+                    1.0 / scale(widths.r - s), 1.0 / scale(widths.g - s),
+                    1.0 / scale(widths.b - s), 1.0 / scale(widths.a - s))));
+
+         /* Clamp to the minimum value. */
+         if (is_signed)
+            bld.emit_minmax(writemask(dst, mask), dst, -1.0f,
+                            BRW_CONDITIONAL_G);
+
+         return dst;
+      }
+
+      /**
+       * Convert a floating point vector into a normalized fixed-point vector
+       * of the specified signedness and bit widths.
+       */
+      template<typename B, typename S>
+      S
+      emit_convert_to_scaled(const B &bld, const S &src,
+                             const color<unsigned> &widths,
+                             bool is_signed)
+      {
+         using namespace detail;
+         const unsigned mask = bitmask(widths);
+         const unsigned s = (is_signed ? 1 : 0);
+         const typename B::dst_reg dst = bld.natural_reg(
+            is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD);
+         const typename B::dst_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
+
+         bld.MOV(writemask(fdst, mask), src);
+
+         /* Clamp to the minimum value. */
+         if (is_signed)
+            bld.emit_minmax(writemask(fdst, mask), fdst, -1.0f,
+                            BRW_CONDITIONAL_G);
+
+         /* Clamp to the maximum value. */
+         bld.emit_minmax(writemask(fdst, mask), fdst, 1.0f,
+                         BRW_CONDITIONAL_L);
+
+         /* Multiply by the normalization constants. */
+         bld.MUL(writemask(fdst, mask), fdst,
+                 emit_vector_imm(bld, color<float>(
+                                    scale(widths.r - s), scale(widths.g - s),
+                                    scale(widths.b - s), scale(widths.a - s))));
+
+         /* Convert to integer. */
+         bld.RNDE(writemask(fdst, mask), fdst);
+         bld.MOV(writemask(dst, mask), fdst);
+
+         return dst;
+      }
+
+      /**
+       * Convert a floating point vector of the specified bit widths into a
+       * 32-bit floating point vector.
+       */
+      template<typename B, typename S>
+      S
+      emit_convert_from_float(const B &bld, const S &src,
+                              const color<unsigned> &widths)
+      {
+         using namespace detail;
+         const unsigned mask = bitmask(widths);
+         const unsigned shift_mask = bitmask(
+            color<bool>(widths.r < 16, widths.g < 16,
+                        widths.b < 16, widths.a < 16));
+         const typename B::dst_reg dst = bld.natural_reg(BRW_REGISTER_TYPE_UD);
+         const typename B::dst_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
+
+         bld.MOV(writemask(dst, mask), src);
+
+         /* Extend 10-bit and 11-bit floating point numbers to 15 bits.
+          * This works because they have a 5-bit exponent just like the
+          * 16-bit floating point format, and they have no sign bit.
+          */
+         if (shift_mask)
+            bld.SHL(writemask(dst, shift_mask), dst,
+                    emit_vector_imm(bld, color<unsigned>(
+                                       15 - widths.r, 15 - widths.g,
+                                       15 - widths.b, 15 - widths.a)));
+
+         /* Convert to 32-bit floating point. */
+         bld.F16TO32(writemask(fdst, mask), dst);
+
+         return fdst;
+      }
+
+      /**
+       * Convert a vector into a floating point vector of the specified bit
+       * widths.
+       */
+      template<typename B, typename S>
+      S
+      emit_convert_to_float(const B &bld, const S &src,
+                            const color<unsigned> &widths)
+      {
+         using namespace detail;
+         const unsigned mask = bitmask(widths);
+         const unsigned shift_mask = bitmask(
+            color<bool>(widths.r < 16, widths.g < 16,
+                        widths.b < 16, widths.a < 16));
+         const typename B::dst_reg dst = bld.natural_reg(BRW_REGISTER_TYPE_UD);
+         const typename B::dst_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
+
+         bld.MOV(writemask(fdst, mask), src);
+
+         /* Clamp to the minimum value. */
+         if (shift_mask)
+            bld.emit_minmax(writemask(fdst, shift_mask), fdst, 0.0f,
+                            BRW_CONDITIONAL_G);
+
+         /* Convert to 16-bit floating-point. */
+         bld.F32TO16(writemask(dst, mask), fdst);
+
+         /* Discard the least significant bits to get floating point numbers
+          * of the requested width.  This works because the 10-bit and
+          * 11-bit floating point formats have a 5-bit exponent just like
+          * the 16-bit format, and they have no sign bit.
+          */
+         if (shift_mask)
+            bld.SHR(writemask(dst, shift_mask), dst,
+                    emit_vector_imm(bld, color<unsigned>(
+                                       15 - widths.r, 15 - widths.g,
+                                       15 - widths.b, 15 - widths.a)));
+
+         return dst;
+      }
+
+      /**
+       * Fill missing components of a vector with 0, 0, 0, 1.
+       */
+      template<typename B, typename S>
+      S
+      emit_pad(const B &bld, const S &src, const color<unsigned> &widths)
+      {
+         using namespace detail;
+         const unsigned mask = bitmask(widths);
+         const typename B::dst_reg dst = bld.natural_reg(src.type);
+
+         bld.MOV(writemask(dst, mask), src);
+         if (~mask & dst.writemask)
+            bld.MOV(writemask(dst, ~mask),
+                    emit_vector_imm(bld, color<unsigned>(0, 0, 0, 1)));
+
+         return dst;
+      }
+   }
 }
 
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index c03a8ae..668f83c 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -82,7 +82,10 @@ struct brw_device_info;
 #define BRW_SWIZZLE_ZZZZ      BRW_SWIZZLE4(2,2,2,2)
 #define BRW_SWIZZLE_WWWW      BRW_SWIZZLE4(3,3,3,3)
 #define BRW_SWIZZLE_XYXY      BRW_SWIZZLE4(0,1,0,1)
+#define BRW_SWIZZLE_XZXZ      BRW_SWIZZLE4(0,2,0,2)
 #define BRW_SWIZZLE_YZXW      BRW_SWIZZLE4(1,2,0,3)
+#define BRW_SWIZZLE_YZYZ      BRW_SWIZZLE4(1,2,1,2)
+#define BRW_SWIZZLE_YWYW      BRW_SWIZZLE4(1,3,1,3)
 #define BRW_SWIZZLE_ZXYW      BRW_SWIZZLE4(2,0,1,3)
 #define BRW_SWIZZLE_ZWZW      BRW_SWIZZLE4(2,3,2,3)
 
-- 
2.3.5