[Mesa-dev] [PATCH 8/9] nir: Implement a nir_sweep() pass.

Jason Ekstrand jason at jlekstrand.net
Wed Apr 1 17:54:29 PDT 2015


On Sat, Mar 28, 2015 at 2:28 PM, Kenneth Graunke <kenneth at whitecape.org> wrote:
> This pass performs a mark and sweep pass over a nir_shader's associated
> memory - anything still connected to the program will be kept, and any
> dead memory we dropped on the floor will be freed.
>
> The expectation is that this will be called when finished building and
> optimizing the shader.  However, it's also fine to call it earlier, and
> many times, to free up memory earlier.
>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
>  src/glsl/Makefile.sources |   1 +
>  src/glsl/nir/nir.h        |   2 +
>  src/glsl/nir/nir_sweep.c  | 299 ++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 302 insertions(+)
>  create mode 100644 src/glsl/nir/nir_sweep.c
>
> diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
> index 8d29c55..7046407 100644
> --- a/src/glsl/Makefile.sources
> +++ b/src/glsl/Makefile.sources
> @@ -54,6 +54,7 @@ NIR_FILES = \
>         nir/nir_search.c \
>         nir/nir_search.h \
>         nir/nir_split_var_copies.c \
> +       nir/nir_sweep.c \
>         nir/nir_to_ssa.c \
>         nir/nir_types.h \
>         nir/nir_validate.c \
> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> index 7b886e3..946f895 100644
> --- a/src/glsl/nir/nir.h
> +++ b/src/glsl/nir/nir.h
> @@ -1632,6 +1632,8 @@ bool nir_opt_peephole_ffma(nir_shader *shader);
>
>  bool nir_opt_remove_phis(nir_shader *shader);
>
> +void nir_sweep(nir_shader *shader);
> +
>  #ifdef __cplusplus
>  } /* extern "C" */
>  #endif
> diff --git a/src/glsl/nir/nir_sweep.c b/src/glsl/nir/nir_sweep.c
> new file mode 100644
> index 0000000..cba5be7
> --- /dev/null
> +++ b/src/glsl/nir/nir_sweep.c
> @@ -0,0 +1,299 @@
> +/*
> + * Copyright © 2015 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "nir.h"
> +
> +/**
> + * \file nir_sweep.c
> + *
> + * The nir_sweep() pass performs a mark and sweep pass over a nir_shader's associated
> + * memory - anything still connected to the program will be kept, and any dead memory
> + * we dropped on the floor will be freed.
> + *
> + * The expectation is that drivers should call this when finished compiling the shader
> + * (after any optimization, lowering, and so on).  However, it's also fine to call it
> + * earlier, and even many times, trading CPU cycles for memory savings.
> + */
> +
> +#define steal_list(mem_ctx, type, list) \
> +   foreach_list_typed(type, obj, node, list) { ralloc_steal(mem_ctx, obj); }
> +
> +static void sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node);
> +
> +static void
> +sweep_ssa_def(nir_shader *nir, nir_ssa_def *ssa)
> +{
> +   ralloc_steal(nir, ssa->uses);
> +   ralloc_steal(nir, ssa->if_uses);
> +}
> +
> +static void
> +sweep_src(nir_shader *nir, nir_src *src)
> +{
> +   if (!src)
> +      return;
> +
> +   if (src->is_ssa) {
> +      sweep_ssa_def(nir, src->ssa);
> +   } else {
> +      sweep_src(nir, src->reg.indirect);
> +   }
> +}
> +
> +static void
> +sweep_dest(nir_shader *nir, nir_dest *dest)
> +{
> +   if (dest->is_ssa) {
> +      sweep_ssa_def(nir, &dest->ssa);
> +   } else {
> +      sweep_src(nir, dest->reg.indirect);
> +   }
> +}
> +
> +static void
> +sweep_deref_chain(nir_shader *nir, nir_deref *deref)
> +{
> +   for (; deref; deref = deref->child) {
> +      ralloc_steal(nir, deref);
> +   }
> +}
> +
> +static void
> +sweep_alu_instr(nir_shader *nir, nir_alu_instr *alu)
> +{
> +   for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
> +      sweep_src(nir, &alu->src[i].src);
> +   }
> +
> +   sweep_dest(nir, &alu->dest.dest);
> +}
> +
> +static void
> +sweep_call_instr(nir_shader *nir, nir_call_instr *call)
> +{
> +   ralloc_steal(nir, call->params);
> +   for (int i = 0; i < call->num_params; i++) {
> +      sweep_deref_chain(nir, &call->params[i]->deref);
> +   }
> +   if (call->return_deref)
> +      sweep_deref_chain(nir, &call->return_deref->deref);
> +}
> +
> +static void
> +sweep_tex_instr(nir_shader *nir, nir_tex_instr *tex)
> +{
> +   if (tex->sampler)
> +      sweep_deref_chain(nir, &tex->sampler->deref);
> +
> +   ralloc_steal(nir, tex->src);
> +   for (int i = 0; i < tex->num_srcs; i++) {
> +      sweep_src(nir, &tex->src[i].src);
> +   }
> +}
> +
> +static void
> +sweep_intrinsic_instr(nir_shader *nir, nir_intrinsic_instr *intrin)
> +{
> +   for (int i = 0; i < nir_intrinsic_infos[intrin->intrinsic].num_srcs; i++) {
> +      sweep_src(nir, &intrin->src[i]);
> +   }
> +
> +   for (int i = 0; i < nir_intrinsic_infos[intrin->intrinsic].num_variables; i++) {
> +      sweep_deref_chain(nir, &intrin->variables[i]->deref);
> +   }
> +
> +   sweep_dest(nir, &intrin->dest);
> +}
> +
> +static void
> +sweep_load_const_instr(nir_shader *nir, nir_load_const_instr *load_const)
> +{
> +   sweep_ssa_def(nir, &load_const->def);
> +}
> +
> +static void
> +sweep_ssa_undef_instr(nir_shader *nir, nir_ssa_undef_instr *ssa_undef)
> +{
> +   sweep_ssa_def(nir, &ssa_undef->def);
> +}
> +
> +static void
> +sweep_phi_instr(nir_shader *nir, nir_phi_instr *phi)
> +{
> +   foreach_list_typed(nir_phi_src, phi_src, node, &phi->srcs) {
> +      ralloc_steal(nir, phi_src);
> +      /* skip pred */
> +      sweep_src(nir, &phi_src->src);
> +   }
> +
> +   sweep_dest(nir, &phi->dest);
> +}
> +
> +static void
> +sweep_parallel_copy_instr(nir_shader *nir, nir_parallel_copy_instr *parallel_copy)
> +{
> +   /* Nothing to do here?  It looks like nir_from_ssa already tidies up after itself. */
> +}
> +
> +
> +static void
> +sweep_block(nir_shader *nir, nir_block *block)
> +{
> +   ralloc_steal(nir, block);
> +
> +   nir_foreach_instr(block, instr) {
> +      ralloc_steal(nir, instr);
> +
> +      switch (instr->type) {
> +      case nir_instr_type_alu:
> +         sweep_alu_instr(nir, nir_instr_as_alu(instr));
> +         break;
> +      case nir_instr_type_call:
> +         sweep_call_instr(nir, nir_instr_as_call(instr));
> +         break;
> +      case nir_instr_type_tex:
> +         sweep_tex_instr(nir, nir_instr_as_tex(instr));
> +         break;
> +      case nir_instr_type_intrinsic:
> +         sweep_intrinsic_instr(nir, nir_instr_as_intrinsic(instr));
> +         break;
> +      case nir_instr_type_load_const:
> +         sweep_load_const_instr(nir, nir_instr_as_load_const(instr));
> +         break;
> +      case nir_instr_type_ssa_undef:
> +         sweep_ssa_undef_instr(nir, nir_instr_as_ssa_undef(instr));
> +         break;
> +      case nir_instr_type_jump:
> +         /* Nothing to do */
> +         break;
> +      case nir_instr_type_phi:
> +         sweep_phi_instr(nir, nir_instr_as_phi(instr));
> +         break;
> +      case nir_instr_type_parallel_copy:
> +         sweep_parallel_copy_instr(nir, nir_instr_as_parallel_copy(instr));
> +         break;
> +      default:
> +         unreachable("Invalid instruction type");
> +      }
> +   }
> +}

I really don't like having to independantly sweep instructions.  I
think we should be able to just have everything inside the instruction
parented to the instruction and ralloc_steal the instruction.  That
would make sweeping *much* cleaner and would also mean that deleting
an instruction actually cleans it up.

> +static void
> +sweep_if(nir_shader *nir, nir_if *iff)
> +{
> +   ralloc_steal(nir, iff);

This isn't sufficient.  The if can have a source with a relative
offset (yes, that's a crazy case) and that will have a ralloc'd source
that doesn't get swept.

> +   foreach_list_typed(nir_cf_node, cf_node, node, &iff->then_list) {
> +      sweep_cf_node(nir, cf_node);
> +   }
> +
> +   foreach_list_typed(nir_cf_node, cf_node, node, &iff->else_list) {
> +      sweep_cf_node(nir, cf_node);
> +   }
> +}
> +
> +static void
> +sweep_loop(nir_shader *nir, nir_loop *loop)
> +{
> +   ralloc_steal(nir, loop);
> +
> +   foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {
> +      sweep_cf_node(nir, cf_node);
> +   }
> +}
> +
> +static void
> +sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node)
> +{
> +   switch (cf_node->type) {
> +   case nir_cf_node_block:
> +      sweep_block(nir, nir_cf_node_as_block(cf_node));
> +      break;
> +   case nir_cf_node_if:
> +      sweep_if(nir, nir_cf_node_as_if(cf_node));
> +      break;
> +   case nir_cf_node_loop:
> +      sweep_loop(nir, nir_cf_node_as_loop(cf_node));
> +      break;
> +   default:
> +      unreachable("Invalid CF node type");
> +   }
> +}
> +
> +static void
> +sweep_impl(nir_shader *nir, nir_function_impl *impl)
> +{
> +   ralloc_steal(nir, impl);
> +
> +   ralloc_steal(nir, impl->params);
> +   ralloc_steal(nir, impl->return_var);
> +   steal_list(nir, nir_variable, &impl->locals);
> +   steal_list(nir, nir_register, &impl->registers);
> +   sweep_block(nir, impl->start_block);

The start block is in the cf_node list so you don't need to sweep it separately

> +   sweep_block(nir, impl->end_block);

Put this below sweeping the cf_node list.

> +
> +   foreach_list_typed(nir_cf_node, cf_node, node, &impl->body) {
> +      sweep_cf_node(nir, cf_node);
> +   }
> +
> +   /* Wipe out all the metadata, if any. */
> +   nir_metadata_preserve(impl, nir_metadata_none);

Is this really needed?  We shouldn't be changing any metadata.  In
particular, we don't actually change any pointers.

> +}
> +
> +static void
> +sweep_function(nir_shader *nir, nir_function *f)
> +{
> +   ralloc_steal(nir, f);
> +
> +   foreach_list_typed(nir_function_overload, overload, node, &f->overload_list) {
> +      ralloc_steal(nir, overload);
> +      ralloc_steal(nir, overload->params);
> +      if (overload->impl)
> +         sweep_impl(nir, overload->impl);
> +   }
> +}
> +
> +void
> +nir_sweep(nir_shader *nir)

I made the s/sweap/steal/ comment earlier.  I'm still not a fan of
"sweep", but I don't know that I like "steal" either.  If this is a
"mark and sweep" pass then the "sweep" functions are actually
performing the "mark" step and ralloc_free is performing the sweep.

> +{
> +   void *rubbish = ralloc_context(NULL);
> +
> +   /* First, move ownership of all the memory to a temporary context; assume dead. */
> +   ralloc_adopt(rubbish, nir);

Yes, we could do it this way.  Or we can simply have a dedicated
context for each shader that isn't the nir_shader itself.  Then we
would make a new context, move everything from old to new and delete
the old.  I don't know for sure which I prefer.
--Jason

> +   /* Variables and registers are not dead.  Steal them back. */
> +   steal_list(nir, nir_variable, &nir->uniforms);
> +   steal_list(nir, nir_variable, &nir->inputs);
> +   steal_list(nir, nir_variable, &nir->outputs);
> +   steal_list(nir, nir_variable, &nir->globals);
> +   steal_list(nir, nir_variable, &nir->system_values);
> +   steal_list(nir, nir_register, &nir->registers);
> +
> +   /* Recurse into functions, stealing their contents back. */
> +   foreach_list_typed(nir_function, func, node, &nir->functions) {
> +      sweep_function(nir, func);
> +   }
> +
> +   /* Free everything we didn't steal back. */
> +   ralloc_free(rubbish);
> +}
> --
> 2.3.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list