[Mesa-dev] [PATCH v3 002/104] nir: Add a deref instruction type

Sun Apr 8 20:48:30 UTC 2018

On Sun, Apr 8, 2018 at 4:26 PM, Bas Nieuwenhuizen
<bas at basnieuwenhuizen.nl> wrote:
> On Sun, Apr 8, 2018 at 6:06 PM, Rob Clark <robdclark at gmail.com> wrote:
>> On Sun, Apr 8, 2018 at 11:15 AM, Bas Nieuwenhuizen
>> <bas at basnieuwenhuizen.nl> wrote:
>>> On Sun, Apr 8, 2018 at 3:29 PM, Rob Clark <robdclark at gmail.com> wrote:
>>>> On Sun, Apr 8, 2018 at 8:58 AM, Bas Nieuwenhuizen
>>>> <bas at basnieuwenhuizen.nl> wrote:
>>>>> On Sun, Apr 8, 2018 at 1:38 PM, Rob Clark <robdclark at gmail.com> wrote:
>>>>>> On Tue, Apr 3, 2018 at 2:32 PM, Jason Ekstrand <jason at jlekstrand.net> wrote:
>>>>>>> This commit adds a new instruction type to NIR for handling derefs.
>>>>>>> Nothing uses it yet but this adds the data structure as well as all of
>>>>>>> the code to validate, print, clone, and [de]serialize them.
>>>>>>> ---
>>>>>>>  src/compiler/nir/nir.c                    | 50 +++++++++++++++++++
>>>>>>>  src/compiler/nir/nir.h                    | 58 ++++++++++++++++++++-
>>>>>>>  src/compiler/nir/nir_clone.c              | 42 ++++++++++++++++
>>>>>>>  src/compiler/nir/nir_instr_set.c          | 78 +++++++++++++++++++++++++++++
>>>>>>>  src/compiler/nir/nir_opt_copy_propagate.c | 62 +++++++++++++++++++----
>>>>>>>  src/compiler/nir/nir_opt_dce.c            |  7 +++
>>>>>>>  src/compiler/nir/nir_print.c              | 56 +++++++++++++++++++++
>>>>>>>  src/compiler/nir/nir_serialize.c          | 81 ++++++++++++++++++++++++++++++
>>>>>>>  src/compiler/nir/nir_validate.c           | 83 +++++++++++++++++++++++++++++++
>>>>>>>  9 files changed, 506 insertions(+), 11 deletions(-)
>>>>>>>
>>>>>>> diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
>>>>>>> index 8364197..a538f22 100644
>>>>>>> --- a/src/compiler/nir/nir.c
>>>>>>> +++ b/src/compiler/nir/nir.c
>>>>>>> @@ -470,6 +470,26 @@ nir_alu_instr_create(nir_shader *shader, nir_op op)
>>>>>>>     return instr;
>>>>>>>  }
>>>>>>>
>>>>>>> +nir_deref_instr *
>>>>>>> +nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
>>>>>>> +{
>>>>>>> +   nir_deref_instr *instr =
>>>>>>> +      rzalloc_size(shader, sizeof(nir_deref_instr));
>>>>>>> +
>>>>>>> +   instr_init(&instr->instr, nir_instr_type_deref);
>>>>>>> +
>>>>>>> +   instr->deref_type = deref_type;
>>>>>>> +   if (deref_type != nir_deref_type_var)
>>>>>>> +      src_init(&instr->parent);
>>>>>>> +
>>>>>>> +   if (deref_type == nir_deref_type_array)
>>>>>>> +      src_init(&instr->arr.index);
>>>>>>> +
>>>>>>> +   dest_init(&instr->dest);
>>>>>>> +
>>>>>>> +   return instr;
>>>>>>> +}
>>>>>>> +
>>>>>>>  nir_jump_instr *
>>>>>>>  nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
>>>>>>>  {
>>>>>>> @@ -1199,6 +1219,12 @@ visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state)
>>>>>>>  }
>>>>>>>
>>>>>>>  static bool
>>>>>>> +visit_deref_dest(nir_deref_instr *instr, nir_foreach_dest_cb cb, void *state)
>>>>>>> +{
>>>>>>> +   return cb(&instr->dest, state);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static bool
>>>>>>>  visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb,
>>>>>>>                       void *state)
>>>>>>>  {
>>>>>>> @@ -1239,6 +1265,8 @@ nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state)
>>>>>>>     switch (instr->type) {
>>>>>>>     case nir_instr_type_alu:
>>>>>>>        return visit_alu_dest(nir_instr_as_alu(instr), cb, state);
>>>>>>> +   case nir_instr_type_deref:
>>>>>>> +      return visit_deref_dest(nir_instr_as_deref(instr), cb, state);
>>>>>>>     case nir_instr_type_intrinsic:
>>>>>>>        return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state);
>>>>>>>     case nir_instr_type_tex:
>>>>>>> @@ -1284,6 +1312,7 @@ nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state)
>>>>>>>  {
>>>>>>>     switch (instr->type) {
>>>>>>>     case nir_instr_type_alu:
>>>>>>> +   case nir_instr_type_deref:
>>>>>>>     case nir_instr_type_tex:
>>>>>>>     case nir_instr_type_intrinsic:
>>>>>>>     case nir_instr_type_phi:
>>>>>>> @@ -1350,6 +1379,23 @@ visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state)
>>>>>>>  }
>>>>>>>
>>>>>>>  static bool
>>>>>>> +visit_deref_instr_src(nir_deref_instr *instr,
>>>>>>> +                      nir_foreach_src_cb cb, void *state)
>>>>>>> +{
>>>>>>> +   if (instr->deref_type != nir_deref_type_var) {
>>>>>>> +      if (!visit_src(&instr->parent, cb, state))
>>>>>>> +         return false;
>>>>>>> +   }
>>>>>>> +
>>>>>>> +   if (instr->deref_type == nir_deref_type_array) {
>>>>>>> +      if (!visit_src(&instr->arr.index, cb, state))
>>>>>>> +         return false;
>>>>>>> +   }
>>>>>>> +
>>>>>>> +   return true;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static bool
>>>>>>>  visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state)
>>>>>>>  {
>>>>>>>     for (unsigned i = 0; i < instr->num_srcs; i++) {
>>>>>>> @@ -1437,6 +1483,10 @@ nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state)
>>>>>>>        if (!visit_alu_src(nir_instr_as_alu(instr), cb, state))
>>>>>>>           return false;
>>>>>>>        break;
>>>>>>> +   case nir_instr_type_deref:
>>>>>>> +      if (!visit_deref_instr_src(nir_instr_as_deref(instr), cb, state))
>>>>>>> +         return false;
>>>>>>> +      break;
>>>>>>>     case nir_instr_type_intrinsic:
>>>>>>>        if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state))
>>>>>>>           return false;
>>>>>>> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>>>>>>> index cc7c401..db7dc91 100644
>>>>>>> --- a/src/compiler/nir/nir.h
>>>>>>> +++ b/src/compiler/nir/nir.h
>>>>>>> @@ -427,6 +427,7 @@ typedef struct nir_register {
>>>>>>>
>>>>>>>  typedef enum {
>>>>>>>     nir_instr_type_alu,
>>>>>>> +   nir_instr_type_deref,
>>>>>>>     nir_instr_type_call,
>>>>>>>     nir_instr_type_tex,
>>>>>>>     nir_instr_type_intrinsic,
>>>>>>> @@ -894,7 +895,9 @@ bool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2,
>>>>>>>  typedef enum {
>>>>>>>     nir_deref_type_var,
>>>>>>>     nir_deref_type_array,
>>>>>>> -   nir_deref_type_struct
>>>>>>> +   nir_deref_type_array_wildcard,
>>>>>>> +   nir_deref_type_struct,
>>>>>>> +   nir_deref_type_cast,
>>>>>>>  } nir_deref_type;
>>>>>>>
>>>>>>>  typedef struct nir_deref {
>>>>>>> @@ -956,6 +959,56 @@ nir_deref_tail(nir_deref *deref)
>>>>>>>  typedef struct {
>>>>>>>     nir_instr instr;
>>>>>>>
>>>>>>> +   /** The type of this deref instruction */
>>>>>>> +   nir_deref_type deref_type;
>>>>>>> +
>>>>>>> +   /** The mode of the underlying variable */
>>>>>>> +   nir_variable_mode mode;
>>>>>>
>>>>>> In fact, it seems like deref->mode is unused outside of nir_print and
>>>>>> nir_validate.. for logical addressing we can get the mode from the
>>>>>> deref_var->var at the start of the chain, and deref->mode has no
>>>>>> meaning for physical addressing (where the mode comes from the
>>>>>> pointer).
>>>>>>
>>>>>> So maybe just drop deref->mode?
>>>>>
>>>>> Isn't it still useful with logical addressing in case a var is not
>>>>> immediately available? (think VK_KHR_variable_pointers)
>>>>
>>>> not sure, maybe this should just also use fat-pointers like physical
>>>> addressing does??
>>>>
>>>>> Also I could see this being useful in physical addressing too to avoid
>>>>> all passes working with derefs needing to do the constant folding?
>>>>
>>>> The problem is that you don't necessarily know the type at compile
>>>> time (and in the case where you do, you need to do constant folding to
>>>> figure it out)
>>>
>>> So I have two considerations here
>>>
>>> 1) for vulkan you always know the mode, even when you don't know the var.
>>> 2)  In CL the mode can still get annotated in the source program (CL C
>>> non-generic pointers) in cases in which we cannot reasonably figure it
>>> out with just constant folding. In those cases the mode is extra
>>> information that you really lose.
>>
>> so, even in cl 1.x, you could do things like 'somefxn(foo ? global_ptr
>> : local_ptr)'.. depending on how much we inline all the things, that
>> might not get CF'd away.
>
> But something like
> __constant int *ptr_value = ...;
> store ptr in complex data structure.
> __constant int* ptr2 = load from complex data structure.
>
> Without explicitly annotating ptr2 it is unlikely that constant
> folding would find that ptr2 is pointing to __constant address space.
> Hence removing the modes loses valuable information that you cannot
> get back by constant folding. However, if you have a pointer with
> unknown mode, we could have a special mode (or mode_all?) and you can
> use the uvec2 representation in that case?

hmm, I'm not really getting how deref->mode could magically have
information that fatptr.y doesn't have.. if the mode is known, vtn
could stash it in fatptr.y and everyone is happy?  If vtn doesn't know
this, then I don't see how deref->mode helps..

>>
>> I think I'm leaning towards using fat ptrs for the vk case, since I
>> guess that is a case where you could always expect
>> nir_src_as_const_value() to work, to get the variable mode.  If for no
>> other reason than I guess these deref's, if the var is not known,
>> start w/ deref_cast, and it would be ugly for deref_cast to have to
>> work differently for compute vs vk.  But maybe Jason already has some
>> thoughts about it?
>
> I'd like to avoid fat pointers alltogether on AMD since we would not
> use it even for CL. a generic pointer is just a uint64_t for us, with
> no bitfield in there for the address space.
>
> I think we may need to think a bit more about representation however,
> as e.g. for AMD a pointer is typically 64-bits (but we can do e.g.
> 32-bits for known workgroup pointers), the current deref instructions
> return 32-bit, and you want something like a uvec2 as pointer
> representation?

afaiu, newer AMD (and NV) hw can remap shared/private into a single
global address space..  But I guess that is an easy subset of the
harder case where drivers need to use different instructions.. so a
pretty simple lowering pass run before lower_io could remap things
that use fatptrs into something that ignores fatptr.y.  Then opt
passes make fatptr.y go away.  So both AMD and hw that doesn't have a
flat address space are happy.

BR,
-R