[Mesa-dev] [PATCH v3 002/104] nir: Add a deref instruction type
Rob Clark
robdclark at gmail.com
Sun Apr 8 20:48:30 UTC 2018
On Sun, Apr 8, 2018 at 4:26 PM, Bas Nieuwenhuizen
<bas at basnieuwenhuizen.nl> wrote:
> On Sun, Apr 8, 2018 at 6:06 PM, Rob Clark <robdclark at gmail.com> wrote:
>> On Sun, Apr 8, 2018 at 11:15 AM, Bas Nieuwenhuizen
>> <bas at basnieuwenhuizen.nl> wrote:
>>> On Sun, Apr 8, 2018 at 3:29 PM, Rob Clark <robdclark at gmail.com> wrote:
>>>> On Sun, Apr 8, 2018 at 8:58 AM, Bas Nieuwenhuizen
>>>> <bas at basnieuwenhuizen.nl> wrote:
>>>>> On Sun, Apr 8, 2018 at 1:38 PM, Rob Clark <robdclark at gmail.com> wrote:
>>>>>> On Tue, Apr 3, 2018 at 2:32 PM, Jason Ekstrand <jason at jlekstrand.net> wrote:
>>>>>>> This commit adds a new instruction type to NIR for handling derefs.
>>>>>>> Nothing uses it yet but this adds the data structure as well as all of
>>>>>>> the code to validate, print, clone, and [de]serialize them.
>>>>>>> ---
>>>>>>> src/compiler/nir/nir.c | 50 +++++++++++++++++++
>>>>>>> src/compiler/nir/nir.h | 58 ++++++++++++++++++++-
>>>>>>> src/compiler/nir/nir_clone.c | 42 ++++++++++++++++
>>>>>>> src/compiler/nir/nir_instr_set.c | 78 +++++++++++++++++++++++++++++
>>>>>>> src/compiler/nir/nir_opt_copy_propagate.c | 62 +++++++++++++++++++----
>>>>>>> src/compiler/nir/nir_opt_dce.c | 7 +++
>>>>>>> src/compiler/nir/nir_print.c | 56 +++++++++++++++++++++
>>>>>>> src/compiler/nir/nir_serialize.c | 81 ++++++++++++++++++++++++++++++
>>>>>>> src/compiler/nir/nir_validate.c | 83 +++++++++++++++++++++++++++++++
>>>>>>> 9 files changed, 506 insertions(+), 11 deletions(-)
>>>>>>>
>>>>>>> diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
>>>>>>> index 8364197..a538f22 100644
>>>>>>> --- a/src/compiler/nir/nir.c
>>>>>>> +++ b/src/compiler/nir/nir.c
>>>>>>> @@ -470,6 +470,26 @@ nir_alu_instr_create(nir_shader *shader, nir_op op)
>>>>>>> return instr;
>>>>>>> }
>>>>>>>
>>>>>>> +nir_deref_instr *
>>>>>>> +nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
>>>>>>> +{
>>>>>>> + nir_deref_instr *instr =
>>>>>>> + rzalloc_size(shader, sizeof(nir_deref_instr));
>>>>>>> +
>>>>>>> + instr_init(&instr->instr, nir_instr_type_deref);
>>>>>>> +
>>>>>>> + instr->deref_type = deref_type;
>>>>>>> + if (deref_type != nir_deref_type_var)
>>>>>>> + src_init(&instr->parent);
>>>>>>> +
>>>>>>> + if (deref_type == nir_deref_type_array)
>>>>>>> + src_init(&instr->arr.index);
>>>>>>> +
>>>>>>> + dest_init(&instr->dest);
>>>>>>> +
>>>>>>> + return instr;
>>>>>>> +}
>>>>>>> +
>>>>>>> nir_jump_instr *
>>>>>>> nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
>>>>>>> {
>>>>>>> @@ -1199,6 +1219,12 @@ visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state)
>>>>>>> }
>>>>>>>
>>>>>>> static bool
>>>>>>> +visit_deref_dest(nir_deref_instr *instr, nir_foreach_dest_cb cb, void *state)
>>>>>>> +{
>>>>>>> + return cb(&instr->dest, state);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static bool
>>>>>>> visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb,
>>>>>>> void *state)
>>>>>>> {
>>>>>>> @@ -1239,6 +1265,8 @@ nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state)
>>>>>>> switch (instr->type) {
>>>>>>> case nir_instr_type_alu:
>>>>>>> return visit_alu_dest(nir_instr_as_alu(instr), cb, state);
>>>>>>> + case nir_instr_type_deref:
>>>>>>> + return visit_deref_dest(nir_instr_as_deref(instr), cb, state);
>>>>>>> case nir_instr_type_intrinsic:
>>>>>>> return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state);
>>>>>>> case nir_instr_type_tex:
>>>>>>> @@ -1284,6 +1312,7 @@ nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state)
>>>>>>> {
>>>>>>> switch (instr->type) {
>>>>>>> case nir_instr_type_alu:
>>>>>>> + case nir_instr_type_deref:
>>>>>>> case nir_instr_type_tex:
>>>>>>> case nir_instr_type_intrinsic:
>>>>>>> case nir_instr_type_phi:
>>>>>>> @@ -1350,6 +1379,23 @@ visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state)
>>>>>>> }
>>>>>>>
>>>>>>> static bool
>>>>>>> +visit_deref_instr_src(nir_deref_instr *instr,
>>>>>>> + nir_foreach_src_cb cb, void *state)
>>>>>>> +{
>>>>>>> + if (instr->deref_type != nir_deref_type_var) {
>>>>>>> + if (!visit_src(&instr->parent, cb, state))
>>>>>>> + return false;
>>>>>>> + }
>>>>>>> +
>>>>>>> + if (instr->deref_type == nir_deref_type_array) {
>>>>>>> + if (!visit_src(&instr->arr.index, cb, state))
>>>>>>> + return false;
>>>>>>> + }
>>>>>>> +
>>>>>>> + return true;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static bool
>>>>>>> visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state)
>>>>>>> {
>>>>>>> for (unsigned i = 0; i < instr->num_srcs; i++) {
>>>>>>> @@ -1437,6 +1483,10 @@ nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state)
>>>>>>> if (!visit_alu_src(nir_instr_as_alu(instr), cb, state))
>>>>>>> return false;
>>>>>>> break;
>>>>>>> + case nir_instr_type_deref:
>>>>>>> + if (!visit_deref_instr_src(nir_instr_as_deref(instr), cb, state))
>>>>>>> + return false;
>>>>>>> + break;
>>>>>>> case nir_instr_type_intrinsic:
>>>>>>> if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state))
>>>>>>> return false;
>>>>>>> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>>>>>>> index cc7c401..db7dc91 100644
>>>>>>> --- a/src/compiler/nir/nir.h
>>>>>>> +++ b/src/compiler/nir/nir.h
>>>>>>> @@ -427,6 +427,7 @@ typedef struct nir_register {
>>>>>>>
>>>>>>> typedef enum {
>>>>>>> nir_instr_type_alu,
>>>>>>> + nir_instr_type_deref,
>>>>>>> nir_instr_type_call,
>>>>>>> nir_instr_type_tex,
>>>>>>> nir_instr_type_intrinsic,
>>>>>>> @@ -894,7 +895,9 @@ bool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2,
>>>>>>> typedef enum {
>>>>>>> nir_deref_type_var,
>>>>>>> nir_deref_type_array,
>>>>>>> - nir_deref_type_struct
>>>>>>> + nir_deref_type_array_wildcard,
>>>>>>> + nir_deref_type_struct,
>>>>>>> + nir_deref_type_cast,
>>>>>>> } nir_deref_type;
>>>>>>>
>>>>>>> typedef struct nir_deref {
>>>>>>> @@ -956,6 +959,56 @@ nir_deref_tail(nir_deref *deref)
>>>>>>> typedef struct {
>>>>>>> nir_instr instr;
>>>>>>>
>>>>>>> + /** The type of this deref instruction */
>>>>>>> + nir_deref_type deref_type;
>>>>>>> +
>>>>>>> + /** The mode of the underlying variable */
>>>>>>> + nir_variable_mode mode;
>>>>>>
>>>>>> In fact, it seems like deref->mode is unused outside of nir_print and
>>>>>> nir_validate.. for logical addressing we can get the mode from the
>>>>>> deref_var->var at the start of the chain, and deref->mode has no
>>>>>> meaning for physical addressing (where the mode comes from the
>>>>>> pointer).
>>>>>>
>>>>>> So maybe just drop deref->mode?
>>>>>
>>>>> Isn't it still useful with logical addressing in case a var is not
>>>>> immediately available? (think VK_KHR_variable_pointers)
>>>>
>>>> not sure, maybe this should just also use fat-pointers like physical
>>>> addressing does??
>>>>
>>>>> Also I could see this being useful in physical addressing too to avoid
>>>>> all passes working with derefs needing to do the constant folding?
>>>>
>>>> The problem is that you don't necessarily know the type at compile
>>>> time (and in the case where you do, you need to do constant folding to
>>>> figure it out)
>>>
>>> So I have two considerations here
>>>
>>> 1) for vulkan you always know the mode, even when you don't know the var.
>>> 2) In CL the mode can still get annotated in the source program (CL C
>>> non-generic pointers) in cases in which we cannot reasonably figure it
>>> out with just constant folding. In those cases the mode is extra
>>> information that you really lose.
>>
>> so, even in cl 1.x, you could do things like 'somefxn(foo ? global_ptr
>> : local_ptr)'.. depending on how much we inline all the things, that
>> might not get CF'd away.
>
> But something like
> __constant int *ptr_value = ...;
> store ptr in complex data structure.
> __constant int* ptr2 = load from complex data structure.
>
> Without explicitly annotating ptr2 it is unlikely that constant
> folding would find that ptr2 is pointing to __constant address space.
> Hence removing the modes loses valuable information that you cannot
> get back by constant folding. However, if you have a pointer with
> unknown mode, we could have a special mode (or mode_all?) and you can
> use the uvec2 representation in that case?
hmm, I'm not really getting how deref->mode could magically have
information that fatptr.y doesn't have.. if the mode is known, vtn
could stash it in fatptr.y and everyone is happy? If vtn doesn't know
this, then I don't see how deref->mode helps..
>>
>> I think I'm leaning towards using fat ptrs for the vk case, since I
>> guess that is a case where you could always expect
>> nir_src_as_const_value() to work, to get the variable mode. If for no
>> other reason than I guess these deref's, if the var is not known,
>> start w/ deref_cast, and it would be ugly for deref_cast to have to
>> work differently for compute vs vk. But maybe Jason already has some
>> thoughts about it?
>
> I'd like to avoid fat pointers alltogether on AMD since we would not
> use it even for CL. a generic pointer is just a uint64_t for us, with
> no bitfield in there for the address space.
>
> I think we may need to think a bit more about representation however,
> as e.g. for AMD a pointer is typically 64-bits (but we can do e.g.
> 32-bits for known workgroup pointers), the current deref instructions
> return 32-bit, and you want something like a uvec2 as pointer
> representation?
afaiu, newer AMD (and NV) hw can remap shared/private into a single
global address space.. But I guess that is an easy subset of the
harder case where drivers need to use different instructions.. so a
pretty simple lowering pass run before lower_io could remap things
that use fatptrs into something that ignores fatptr.y. Then opt
passes make fatptr.y go away. So both AMD and hw that doesn't have a
flat address space are happy.
BR,
-R
More information about the mesa-dev
mailing list