[Mesa-dev] [PATCH 20/21] i965/fs: Introduce scalarizing SVEC4 IR builder.

Tue Apr 28 10:08:36 PDT 2015

See "i965/fs: Introduce FS IR builder." for the rationale.
---
 src/mesa/drivers/dri/i965/brw_fs_builder.h | 426 +++++++++++++++++++++++++++++
 1 file changed, 426 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h
index 6b36d1f..0368d2b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
@@ -677,6 +677,432 @@ namespace brw {
       const void *base_ir;
       /** @} */
    };
+
+   /**
+    * Toolbox to assemble an FS IR program out of vector instructions,
+    * scalarizing them on emission.  It's meant to be largely compatible with
+    * brw::vec4_builder in order to enable generic FS/VEC4 programming.
+    */
+   class svec4_builder {
+   public:
+      /** Type used in this IR to represent a source of an instruction. */
+      typedef src_svec4 src_reg;
+
+      /** Type used in this IR to represent the destination of an instruction. */
+      typedef dst_svec4 dst_reg;
+
+      /** Type used in this IR to represent an instruction. */
+      typedef svec4_inst instruction;
+
+      /** You can use this to do scalar operations on the same IR. */
+      typedef fs_builder scalar_builder;
+
+      /** We build vector instructions. */
+      typedef svec4_builder vector_builder;
+
+      /**
+       * Construct a scalarizing vector builder stacked on top of a scalar
+       * builder.
+       */
+      svec4_builder(const fs_builder &bld) :
+         devinfo(bld.devinfo), bld(bld)
+      {
+      }
+
+      /**
+       * Construct a scalar builder inheriting other code generation
+       * parameters from this.
+       */
+      const fs_builder &
+      scalar() const
+      {
+         return bld;
+      }
+
+      /**
+       * Construct a vector builder inheriting other code generation
+       * parameters from this.
+       */
+      svec4_builder
+      vector() const
+      {
+         return *this;
+      }
+
+      /**
+       * Construct a builder of half-SIMD-width instructions inheriting other
+       * code generation parameters from this.  Predication and control flow
+       * masking will use the enable signals for the i-th half.
+       */
+      svec4_builder
+      half(unsigned i) const
+      {
+         return svec4_builder(bld.half(i));
+      }
+
+      /**
+       * Get the SIMD width in use.
+       */
+      unsigned
+      dispatch_width() const
+      {
+         return bld.dispatch_width();
+      }
+
+      /**
+       * Get the lowered predicate to be used to interpret the flag result
+       * written by a reduced SVEC4 instruction (i.e. having called
+       * brw::exec_reduce() on the instruction with \p pred as argument).
+       * This can be used to "map" an ALIGN16 predication mode into an ALIGN1
+       * mode, allowing vector comparisons in the scalar back-end.
+       *
+       * \sa brw::exec_reduce().
+       */
+      static brw_predicate
+      reduced_predicate(brw_predicate pred)
+      {
+         return (pred == BRW_PREDICATE_NONE ? BRW_PREDICATE_NONE :
+                 BRW_PREDICATE_NORMAL);
+      }
+
+      /**
+       * Allocate a virtual register of natural vector size and SIMD width.
+       * \p n gives the amount of space to allocate in dispatch_width units
+       * (which is just enough space for one logical component in this IR).
+       */
+      dst_reg
+      natural_reg(brw_reg_type type, unsigned n = 4) const
+      {
+         return resize(dst_reg(bld.natural_reg(type, n)), n);
+      }
+
+      /**
+       * Create a register of natural vector size and SIMD width using array
+       * \p reg as storage.
+       */
+      dst_reg
+      natural_reg(const array_reg &reg) const
+      {
+         return bld.natural_reg(reg);
+      }
+
+      /**
+       * Allocate a virtual register of vector size one and natural SIMD
+       * width.
+       */
+      dst_reg
+      scalar_reg(brw_reg_type type) const
+      {
+         return dst_reg(bld.natural_reg(type), WRITEMASK_X);
+      }
+
+      /**
+       * Allocate a raw chunk of memory from the virtual GRF file with no
+       * special vector size or SIMD width.  \p n is given in units of 32B
+       * registers.
+       */
+      ::array_reg
+      array_reg(enum brw_reg_type type, unsigned n) const
+      {
+         return bld.array_reg(type, n);
+      }
+
+      /**
+       * Create a null register of floating type.
+       */
+      dst_reg
+      null_reg_f() const
+      {
+         return dst_reg(retype(brw_null_vec(dispatch_width()),
+                               BRW_REGISTER_TYPE_F));
+      }
+
+      /**
+       * Create a null register of signed integer type.
+       */
+      dst_reg
+      null_reg_d() const
+      {
+         return dst_reg(retype(brw_null_vec(dispatch_width()),
+                               BRW_REGISTER_TYPE_D));
+      }
+
+      /**
+       * Create a null register of unsigned integer type.
+       */
+      dst_reg
+      null_reg_ud() const
+      {
+         return dst_reg(retype(brw_null_vec(dispatch_width()),
+                               BRW_REGISTER_TYPE_UD));
+      }
+
+      /**
+       * Create and insert a nullary control instruction into the program.
+       */
+      instruction *
+      emit(enum opcode opcode) const
+      {
+         instruction *inst = new(bld.mem_ctx) instruction;
+         inst->v[0] = bld.emit(opcode);
+         return inst;
+      }
+
+      /**
+       * Create and insert a nullary instruction into the program.
+       */
+      instruction *
+      emit(enum opcode opcode, const dst_reg &dst) const
+      {
+         instruction *inst = new(bld.mem_ctx) instruction;
+
+         for (unsigned i = 0; i < 4; ++i) {
+            if (dst.writemask & (1 << i))
+               inst->v[i] = bld.emit(opcode, component(dst, i));
+         }
+
+         return inst;
+      }
+
+      /**
+       * Create and insert a unary instruction into the program.
+       */
+      instruction *
+      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const
+      {
+         instruction *inst = new(bld.mem_ctx) instruction;
+
+         for (unsigned i = 0; i < ARRAY_SIZE(inst->v); ++i) {
+            if (dst.writemask & (1 << i))
+               inst->v[i] = bld.emit(opcode, component(dst, i), component(src0, i));
+         }
+
+         return inst;
+      }
+
+      /**
+       * Create and insert a binary instruction into the program.
+       */
+      instruction *
+      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
+           const src_reg &src1) const
+      {
+         instruction *inst = new(bld.mem_ctx) instruction;
+
+         for (unsigned i = 0; i < ARRAY_SIZE(inst->v); ++i) {
+            if (dst.writemask & (1 << i))
+               inst->v[i] = bld.emit(opcode, component(dst, i), component(src0, i),
+                                     component(src1, i));
+         }
+
+         return inst;
+      }
+
+      /**
+       * Create and insert a ternary instruction into the program.
+       */
+      instruction *
+      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
+           const src_reg &src1, const src_reg &src2) const
+      {
+         instruction *inst = new(bld.mem_ctx) instruction;
+
+         for (unsigned i = 0; i < ARRAY_SIZE(inst->v); ++i) {
+            if (dst.writemask & (1 << i))
+               inst->v[i] = bld.emit(opcode, component(dst, i), component(src0, i),
+                                     component(src1, i), component(src2, i));
+         }
+
+         return inst;
+      }
+
+      /**
+       * Insert a preallocated instruction into the program.
+       */
+      instruction *
+      emit(instruction *inst) const
+      {
+         for (unsigned i = 0; i < ARRAY_SIZE(inst->v); ++i) {
+            if (inst->v[i])
+               bld.emit(inst->v[i]);
+         }
+
+         return inst;
+      }
+
+      /**
+       * Select \p src0 if the comparison of both sources with the given
+       * conditional mod evaluates to true, otherwise select \p src1.
+       *
+       * Generally useful to get the minimum or maximum of two values.
+       */
+      void
+      emit_minmax(const dst_reg &dst, const src_reg &src0,
+                  const src_reg &src1, brw_conditional_mod mod) const
+      {
+         for (unsigned i = 0; i < ARRAY_SIZE(svec4_inst::v); ++i) {
+            if (dst.writemask & (1 << i))
+               bld.emit_minmax(component(dst, i), component(src0, i),
+                               component(src1, i), mod);
+         }
+      }
+
+      /**
+       * Copy any live channel from \p src to the first channel of \p dst.
+       */
+      void
+      emit_uniformize(const dst_reg &dst, const src_reg &src0) const
+      {
+         for (unsigned i = 0; i < ARRAY_SIZE(svec4_inst::v); ++i) {
+            if (dst.writemask & (1 << i))
+               bld.emit_uniformize(component(dst, i), component(src0, i));
+         }
+      }
+
+      /**
+       * Assorted arithmetic ops.
+       * @{
+       */
+#define ALU1(op)                                        \
+      instruction *                                     \
+      op(const dst_reg &dst, const src_reg &src0) const \
+      {                                                 \
+         return emit(BRW_OPCODE_##op, dst, src0);       \
+      }
+
+#define ALU2(op)                                                        \
+      instruction *                                                     \
+      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
+      {                                                                 \
+         return emit(BRW_OPCODE_##op, dst, src0, src1);                 \
+      }
+
+#define ALU3(op)                                                        \
+      instruction *                                                     \
+      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1,  \
+         const src_reg &src2) const                                     \
+      {                                                                 \
+         return emit(BRW_OPCODE_##op, dst, src0, src1, src2);           \
+      }
+
+      ALU2(ADD)
+      ALU2(AND)
+      ALU2(ASR)
+      ALU2(AVG)
+      ALU3(BFE)
+      ALU2(BFI1)
+      ALU3(BFI2)
+      ALU1(BFREV)
+      ALU1(CBIT)
+      ALU2(CMPN)
+      ALU3(CSEL)
+      ALU2(DP2)
+      ALU2(DP3)
+      ALU2(DP4)
+      ALU2(DPH)
+      ALU1(F16TO32)
+      ALU1(F32TO16)
+      ALU1(FBH)
+      ALU1(FBL)
+      ALU1(FRC)
+      ALU2(LINE)
+      ALU1(LZD)
+      ALU2(MAC)
+      ALU3(MAD)
+      ALU1(MOV)
+      ALU2(MUL)
+      ALU1(NOT)
+      ALU2(OR)
+      ALU2(PLN)
+      ALU1(RNDD)
+      ALU1(RNDE)
+      ALU1(RNDU)
+      ALU1(RNDZ)
+      ALU2(SAD2)
+      ALU2(SEL)
+      ALU2(SHL)
+      ALU2(SHR)
+      ALU2(XOR)
+
+#undef ALU3
+#undef ALU2
+#undef ALU1
+      /** @} */
+
+      /**
+       * CMP: Sets the low bit of the destination channels with the result
+       * of the comparison, while the upper bits are undefined, and updates
+       * the flag register with the packed 16 bits of the result.
+       */
+      instruction *
+      CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
+          brw_conditional_mod condition) const
+      {
+         instruction *inst = new(bld.mem_ctx) instruction;
+
+         for (unsigned i = 0; i < ARRAY_SIZE(inst->v); ++i) {
+            if (dst.writemask & (1 << i))
+               bld.CMP(component(dst, i), component(src0, i), component(src1, i),
+                       condition);
+         }
+
+         return inst;
+      }
+
+      /**
+       * Gen4 predicated IF.
+       */
+      instruction *
+      IF(brw_predicate predicate) const
+      {
+         instruction *inst = new(bld.mem_ctx) instruction;
+         inst->v[0] = bld.IF(predicate);
+         return inst;
+      }
+
+      /**
+       * Gen6 IF with embedded comparison.
+       */
+      instruction *
+      IF(const src_reg &src0, const src_reg &src1,
+         brw_conditional_mod condition) const
+      {
+         assert(brw_is_single_value_swizzle(src0.swizzle) &&
+                brw_is_single_value_swizzle(src1.swizzle));
+         instruction *inst = new(bld.mem_ctx) instruction;
+         inst->v[0] = bld.IF(component(src0, 0), component(src1, 0), condition);
+         return inst;
+      }
+
+      /**
+       * Emit a linear interpolation instruction.
+       */
+      instruction *
+      LRP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
+          const src_reg &src2) const
+      {
+         instruction *inst = new(bld.mem_ctx) instruction;
+
+         for (unsigned i = 0; i < ARRAY_SIZE(inst->v); ++i) {
+            if (dst.writemask & (1 << i))
+               bld.LRP(component(dst, i), component(src0, i), component(src1, i),
+                       component(src2, i));
+         }
+
+         return inst;
+      }
+
+      const brw_device_info *const devinfo;
+
+   private:
+      fs_builder bld;
+   };
+
+   svec4_builder
+   fs_builder::vector() const
+   {
+      return vector_builder(*this);
+   }
 }
 
 #endif
-- 
2.3.5