[Mesa-dev] [PATCH 18/27] i965/fs: Append ir_binop_ubo_load entries to the gather table
Abdiel Janulgue
abdiel.janulgue at linux.intel.com
Tue Apr 28 13:08:15 PDT 2015
When the const block and offset are immediate values. Otherwise just
fall-back to the previous method of uploading the UBO constant data to
GRF using pull constants.
Signed-off-by: Abdiel Janulgue <abdiel.janulgue at linux.intel.com>
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 11 ++++
src/mesa/drivers/dri/i965/brw_fs.h | 4 ++
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 86 +++++++++++++++++++++++++++-
3 files changed, 100 insertions(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 071ac59..031d807 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2273,6 +2273,7 @@ fs_visitor::assign_constant_locations()
}
stage_prog_data->nr_params = 0;
+ stage_prog_data->nr_ubo_params = ubo_uniforms;
unsigned const_reg_access[uniforms];
memset(const_reg_access, 0, sizeof(const_reg_access));
@@ -2302,6 +2303,16 @@ fs_visitor::assign_constant_locations()
stage_prog_data->gather_table[p].channel_mask =
const_reg_access[i];
}
+
+ for (unsigned i = 0; i < this->nr_ubo_gather_table; i++) {
+ int p = stage_prog_data->nr_gather_table++;
+ stage_prog_data->gather_table[p].reg = this->ubo_gather_table[i].reg;
+ stage_prog_data->gather_table[p].channel_mask = this->ubo_gather_table[i].channel_mask;
+ stage_prog_data->gather_table[p].const_block = this->ubo_gather_table[i].const_block;
+ stage_prog_data->gather_table[p].const_offset = this->ubo_gather_table[i].const_offset;
+ stage_prog_data->max_ubo_const_block = MAX2(stage_prog_data->max_ubo_const_block,
+ this->ubo_gather_table[i].const_block);
+ }
}
/**
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 32063f0..a48b2bb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -417,6 +417,7 @@ public:
void setup_uniform_values(ir_variable *ir);
void setup_builtin_uniform_values(ir_variable *ir);
int implied_mrf_writes(fs_inst *inst);
+ bool generate_ubo_gather_table(ir_expression* ir);
virtual void dump_instructions();
virtual void dump_instructions(const char *name);
@@ -445,6 +446,9 @@ public:
/** Total number of direct uniforms we can get from NIR */
unsigned num_direct_uniforms;
+ /** Number of ubo uniform variable components visited. */
+ unsigned ubo_uniforms;
+
/** Byte-offset for the next available spot in the scratch space buffer. */
unsigned last_scratch;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 4e99366..11e608b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1179,11 +1179,18 @@ fs_visitor::visit(ir_expression *ir)
emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, this->result, op[0], op[1]);
break;
case ir_binop_ubo_load: {
+ /* Use gather push constants if at all possible, otherwise just
+ * fall back to pull constants for UBOs
+ */
+ if (generate_ubo_gather_table(ir))
+ break;
+
/* This IR node takes a constant uniform block and a constant or
* variable byte offset within the block and loads a vector from that.
*/
ir_constant *const_uniform_block = ir->operands[0]->as_constant();
ir_constant *const_offset = ir->operands[1]->as_constant();
+
fs_reg surf_index;
if (const_uniform_block) {
@@ -4144,6 +4151,79 @@ fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg)
*reg = neg_result;
}
+bool
+fs_visitor::generate_ubo_gather_table(ir_expression *ir)
+{
+ ir_constant *const_uniform_block = ir->operands[0]->as_constant();
+ ir_constant *const_offset = ir->operands[1]->as_constant();
+
+ if (ir->operation != ir_binop_ubo_load ||
+ !brw->has_resource_streamer ||
+ !brw->fs_ubo_gather ||
+ !const_uniform_block ||
+ !const_offset)
+ return false;
+
+ /* Only allow 16 registers (128 uniform components) as push constants.
+ */
+ unsigned int max_push_components = 16 * 8;
+ unsigned param_index = uniforms + ubo_uniforms;
+ if ((param_index + ir->type->vector_elements) >= max_push_components)
+ return false;
+
+ fs_reg reg;
+ if (dispatch_width == 16) {
+ for (int i = 0; i < (int) this->nr_ubo_gather_table; i++) {
+ if ((this->ubo_gather_table[i].const_block ==
+ const_uniform_block->value.u[0]) &&
+ (this->ubo_gather_table[i].const_offset ==
+ const_offset->value.u[0])) {
+ reg = fs_reg(UNIFORM, this->ubo_gather_table[i].reg);
+ reg.type = brw_type_for_base_type(ir->type);
+ break;
+ }
+ }
+ assert(reg.file == UNIFORM);
+ }
+
+ if (reg.file != UNIFORM) {
+ reg = fs_reg(UNIFORM, param_index);
+ int gather = this->nr_ubo_gather_table++;
+
+ assert(ir->type->vector_elements <= 4);
+ ubo_uniforms += ir->type->vector_elements;
+ this->ubo_gather_table[gather].reg = reg.reg;
+ this->ubo_gather_table[gather].const_block =
+ const_uniform_block->value.u[0];
+ this->ubo_gather_table[gather].const_offset =
+ const_offset->value.u[0];
+ reg.type = brw_type_for_base_type(ir->type);
+ }
+
+ if (ir->type->base_type == GLSL_TYPE_BOOL) {
+
+ for (int i = 0; i < ir->type->vector_elements; i++) {
+
+ /* The std140 packing rules don't allow vectors to cross 16-byte
+ * boundaries, and a reg is 32 bytes.
+ */
+ assert(reg.subreg_offset < 32);
+
+ /* UBO bools are any nonzero value. We consider bools to be
+ * values with the low bit set to 1. Convert them using CMP.
+ */
+ emit(CMP(result, reg, fs_reg(0u), BRW_CONDITIONAL_NZ));
+
+ result = offset(result, 1);
+ }
+ result.reg_offset = 0;
+ } else {
+ result = reg;
+ }
+
+ return true;
+}
+
fs_visitor::fs_visitor(struct brw_context *brw,
void *mem_ctx,
const struct brw_wm_prog_key *key,
@@ -4224,6 +4304,7 @@ fs_visitor::init()
this->regs_live_at_ip = NULL;
this->uniforms = 0;
+ this->ubo_uniforms = 0;
this->last_scratch = 0;
this->pull_constant_loc = NULL;
this->push_constant_loc = NULL;
@@ -4231,8 +4312,11 @@ fs_visitor::init()
this->spilled_any_registers = false;
this->do_dual_src = false;
- if (dispatch_width == 8)
+ if (dispatch_width == 8) {
this->param_size = rzalloc_array(mem_ctx, int, stage_prog_data->nr_params);
+ this->ubo_gather_table = rzalloc_array(mem_ctx, backend_visitor::gather_table,
+ stage_prog_data->nr_params);
+ }
}
fs_visitor::~fs_visitor()
--
1.9.1
More information about the mesa-dev
mailing list