[Mesa-dev] [PATCH 3/4] nir: support to flatten_all in peephole-select

Wed Apr 1 16:24:08 PDT 2015

On Wed, Apr 1, 2015 at 4:20 PM, Rob Clark <robdclark at gmail.com> wrote:
> On Wed, Apr 1, 2015 at 6:56 PM, Jason Ekstrand <jason at jlekstrand.net> wrote:
>> On Tue, Mar 31, 2015 at 3:57 PM, Rob Clark <robdclark at gmail.com> wrote:
>>> From: Rob Clark <robclark at freedesktop.org>
>>>
>>> Freedreno and vc4 want this behavior for the time being (until we have
>>> real flow control).  Even after that, we probably want to turn this into
>>> some sort of driver tunable threshold, since for at least some hardware,
>>> reasonably large if/else is best flattend rather than having divergent
>>> flow control.
>>>
>>> NOTE: wasn't sure about some of the additional restrictions in
>>> block_check_for_allowed_instrs()..  are there some other cases where
>>> I might need to fix things up in order to be guaranteed to be able to
>>> flatten?
>>>
>>> NOTE: drop vc4 hunk if this is merged first, ofc
>>>
>>> Signed-off-by: Rob Clark <robclark at freedesktop.org>
>>> ---
>>>  src/gallium/drivers/vc4/vc4_program.c    |  2 +-
>>>  src/glsl/nir/nir.h                       |  2 +-
>>>  src/glsl/nir/nir_opt_peephole_select.c   | 21 +++++++++++++++------
>>>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp |  2 +-
>>>  4 files changed, 18 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
>>> index db51665..09896ce 100644
>>> --- a/src/gallium/drivers/vc4/vc4_program.c
>>> +++ b/src/gallium/drivers/vc4/vc4_program.c
>>> @@ -1673,7 +1673,7 @@ vc4_optimize_nir(struct nir_shader *s)
>>>                  progress = nir_copy_prop(s) || progress;
>>>                  progress = nir_opt_dce(s) || progress;
>>>                  progress = nir_opt_cse(s) || progress;
>>> -                progress = nir_opt_peephole_select(s) || progress;
>>> +                progress = nir_opt_peephole_select(s, true) || progress;
>>>                  progress = nir_opt_algebraic(s) || progress;
>>>                  progress = nir_opt_constant_folding(s) || progress;
>>>          } while (progress);
>>> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
>>> index 74927e5..cd03d6b 100644
>>> --- a/src/glsl/nir/nir.h
>>> +++ b/src/glsl/nir/nir.h
>>> @@ -1631,7 +1631,7 @@ bool nir_opt_dce(nir_shader *shader);
>>>
>>>  void nir_opt_gcm(nir_shader *shader);
>>>
>>> -bool nir_opt_peephole_select(nir_shader *shader);
>>> +bool nir_opt_peephole_select(nir_shader *shader, bool flatten_all);
>>>  bool nir_opt_peephole_ffma(nir_shader *shader);
>>>
>>>  bool nir_opt_remove_phis(nir_shader *shader);
>>> diff --git a/src/glsl/nir/nir_opt_peephole_select.c b/src/glsl/nir/nir_opt_peephole_select.c
>>> index b89451b..e48fc7a 100644
>>> --- a/src/glsl/nir/nir_opt_peephole_select.c
>>> +++ b/src/glsl/nir/nir_opt_peephole_select.c
>>> @@ -44,10 +44,16 @@
>>>   * whose only use is one of the following phi nodes.  This happens all the
>>>   * time when the SSA form comes from a conditional assignment with a
>>>   * swizzle.
>>> + *
>>> + * If 'flatten_all' is true, then flatten *all* if/else into one block
>>> + * and use select to pick the winners.  This is useful for drivers that
>>> + * do not (yet) have proper flow control.  Eventually we probably want
>>> + * to make this a more clever driver tunable threshold.
>>>   */
>>>
>>>  struct peephole_select_state {
>>>     void *mem_ctx;
>>> +   bool flatten_all;
>>>     bool progress;
>>>  };
>>>
>>> @@ -150,9 +156,10 @@ nir_opt_peephole_select_block(nir_block *block, void *void_state)
>>>     nir_block *else_block = nir_cf_node_as_block(else_node);
>>>
>>>     /* ... and those blocks must only contain "allowed" instructions. */
>>> -   if (!block_check_for_allowed_instrs(then_block) ||
>>> -       !block_check_for_allowed_instrs(else_block))
>>> -      return true;
>>> +   if (!state->flatten_all)
>>> +      if (!block_check_for_allowed_instrs(then_block) ||
>>> +          !block_check_for_allowed_instrs(else_block))
>>> +         return true;
>>
>> This isn't valid.  We can't blindly flatten if either list of blocks
>> contains an instruction that has side-effects.  In order to do that,
>> we would have to add predication back into NIR.
>>
>
> oh, right.. non-predicated kill would have to get converted into a
> predicated kill.
>
> Beyond that, do I just need to check for
> NIR_INTRINSIC_CAN_ELIMINATE/REORDER?  Or is there a better way to
> identify instructions with side effects?

That should be sufficient for now

>>>     /* At this point, we know that the previous CFG node is an if-then
>>>      * statement containing only moves to phi nodes in this block.  We can
>>> @@ -217,11 +224,12 @@ nir_opt_peephole_select_block(nir_block *block, void *void_state)
>>>  }
>>>
>>>  static bool
>>> -nir_opt_peephole_select_impl(nir_function_impl *impl)
>>> +nir_opt_peephole_select_impl(nir_function_impl *impl, bool flatten_all)
>>>  {
>>>     struct peephole_select_state state;
>>>
>>>     state.mem_ctx = ralloc_parent(impl);
>>> +   state.flatten_all = flatten_all;
>>>     state.progress = false;
>>>
>>>     nir_foreach_block(impl, nir_opt_peephole_select_block, &state);
>>> @@ -233,13 +241,14 @@ nir_opt_peephole_select_impl(nir_function_impl *impl)
>>>  }
>>>
>>>  bool
>>> -nir_opt_peephole_select(nir_shader *shader)
>>> +nir_opt_peephole_select(nir_shader *shader, bool flatten_all)
>>
>> As matt said, boolean arguments aren't great.  I also don't like
>> arbitrary bitfields.  We do, however, have a shader params struct.
>> You could add a "flatten" flag to it I guess.  However, doing so kind
>> of implies that we actually flatten everything which (as stated above)
>> we can't here.
>
> sure, I'll add it to the params struct.. I guess for the most part
> things that cannot be flattened are not something to care about for
> backends that can't (yet) do proper flow control (other than the
> kill->kill_if), so best-effort flatten_everything is fine..
>
> BR,
> -R
>
>> --Jason
>>
>>>  {
>>>     bool progress = false;
>>>
>>>     nir_foreach_overload(shader, overload) {
>>>        if (overload->impl)
>>> -         progress |= nir_opt_peephole_select_impl(overload->impl);
>>> +         progress |= nir_opt_peephole_select_impl(overload->impl,
>>> +                                                  flatten_all);
>>>     }
>>>
>>>     return progress;
>>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>>> index 0b8ed1a..7bd7cd3 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>>> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>>> @@ -47,7 +47,7 @@ nir_optimize(nir_shader *nir)
>>>        nir_validate_shader(nir);
>>>        progress |= nir_opt_cse(nir);
>>>        nir_validate_shader(nir);
>>> -      progress |= nir_opt_peephole_select(nir);
>>> +      progress |= nir_opt_peephole_select(nir, false);
>>>        nir_validate_shader(nir);
>>>        progress |= nir_opt_algebraic(nir);
>>>        nir_validate_shader(nir);
>>> --
>>> 2.1.0
>>>