[Mesa-dev] [PATCH 8/9] nir: Implement a nir_sweep() pass.
Jason Ekstrand
jason at jlekstrand.net
Wed Apr 1 17:54:29 PDT 2015
On Sat, Mar 28, 2015 at 2:28 PM, Kenneth Graunke <kenneth at whitecape.org> wrote:
> This pass performs a mark and sweep pass over a nir_shader's associated
> memory - anything still connected to the program will be kept, and any
> dead memory we dropped on the floor will be freed.
>
> The expectation is that this will be called when finished building and
> optimizing the shader. However, it's also fine to call it earlier, and
> many times, to free up memory earlier.
>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
> src/glsl/Makefile.sources | 1 +
> src/glsl/nir/nir.h | 2 +
> src/glsl/nir/nir_sweep.c | 299 ++++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 302 insertions(+)
> create mode 100644 src/glsl/nir/nir_sweep.c
>
> diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
> index 8d29c55..7046407 100644
> --- a/src/glsl/Makefile.sources
> +++ b/src/glsl/Makefile.sources
> @@ -54,6 +54,7 @@ NIR_FILES = \
> nir/nir_search.c \
> nir/nir_search.h \
> nir/nir_split_var_copies.c \
> + nir/nir_sweep.c \
> nir/nir_to_ssa.c \
> nir/nir_types.h \
> nir/nir_validate.c \
> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> index 7b886e3..946f895 100644
> --- a/src/glsl/nir/nir.h
> +++ b/src/glsl/nir/nir.h
> @@ -1632,6 +1632,8 @@ bool nir_opt_peephole_ffma(nir_shader *shader);
>
> bool nir_opt_remove_phis(nir_shader *shader);
>
> +void nir_sweep(nir_shader *shader);
> +
> #ifdef __cplusplus
> } /* extern "C" */
> #endif
> diff --git a/src/glsl/nir/nir_sweep.c b/src/glsl/nir/nir_sweep.c
> new file mode 100644
> index 0000000..cba5be7
> --- /dev/null
> +++ b/src/glsl/nir/nir_sweep.c
> @@ -0,0 +1,299 @@
> +/*
> + * Copyright © 2015 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "nir.h"
> +
> +/**
> + * \file nir_sweep.c
> + *
> + * The nir_sweep() pass performs a mark and sweep pass over a nir_shader's associated
> + * memory - anything still connected to the program will be kept, and any dead memory
> + * we dropped on the floor will be freed.
> + *
> + * The expectation is that drivers should call this when finished compiling the shader
> + * (after any optimization, lowering, and so on). However, it's also fine to call it
> + * earlier, and even many times, trading CPU cycles for memory savings.
> + */
> +
> +#define steal_list(mem_ctx, type, list) \
> + foreach_list_typed(type, obj, node, list) { ralloc_steal(mem_ctx, obj); }
> +
> +static void sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node);
> +
> +static void
> +sweep_ssa_def(nir_shader *nir, nir_ssa_def *ssa)
> +{
> + ralloc_steal(nir, ssa->uses);
> + ralloc_steal(nir, ssa->if_uses);
> +}
> +
> +static void
> +sweep_src(nir_shader *nir, nir_src *src)
> +{
> + if (!src)
> + return;
> +
> + if (src->is_ssa) {
> + sweep_ssa_def(nir, src->ssa);
> + } else {
> + sweep_src(nir, src->reg.indirect);
> + }
> +}
> +
> +static void
> +sweep_dest(nir_shader *nir, nir_dest *dest)
> +{
> + if (dest->is_ssa) {
> + sweep_ssa_def(nir, &dest->ssa);
> + } else {
> + sweep_src(nir, dest->reg.indirect);
> + }
> +}
> +
> +static void
> +sweep_deref_chain(nir_shader *nir, nir_deref *deref)
> +{
> + for (; deref; deref = deref->child) {
> + ralloc_steal(nir, deref);
> + }
> +}
> +
> +static void
> +sweep_alu_instr(nir_shader *nir, nir_alu_instr *alu)
> +{
> + for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
> + sweep_src(nir, &alu->src[i].src);
> + }
> +
> + sweep_dest(nir, &alu->dest.dest);
> +}
> +
> +static void
> +sweep_call_instr(nir_shader *nir, nir_call_instr *call)
> +{
> + ralloc_steal(nir, call->params);
> + for (int i = 0; i < call->num_params; i++) {
> + sweep_deref_chain(nir, &call->params[i]->deref);
> + }
> + if (call->return_deref)
> + sweep_deref_chain(nir, &call->return_deref->deref);
> +}
> +
> +static void
> +sweep_tex_instr(nir_shader *nir, nir_tex_instr *tex)
> +{
> + if (tex->sampler)
> + sweep_deref_chain(nir, &tex->sampler->deref);
> +
> + ralloc_steal(nir, tex->src);
> + for (int i = 0; i < tex->num_srcs; i++) {
> + sweep_src(nir, &tex->src[i].src);
> + }
> +}
> +
> +static void
> +sweep_intrinsic_instr(nir_shader *nir, nir_intrinsic_instr *intrin)
> +{
> + for (int i = 0; i < nir_intrinsic_infos[intrin->intrinsic].num_srcs; i++) {
> + sweep_src(nir, &intrin->src[i]);
> + }
> +
> + for (int i = 0; i < nir_intrinsic_infos[intrin->intrinsic].num_variables; i++) {
> + sweep_deref_chain(nir, &intrin->variables[i]->deref);
> + }
> +
> + sweep_dest(nir, &intrin->dest);
> +}
> +
> +static void
> +sweep_load_const_instr(nir_shader *nir, nir_load_const_instr *load_const)
> +{
> + sweep_ssa_def(nir, &load_const->def);
> +}
> +
> +static void
> +sweep_ssa_undef_instr(nir_shader *nir, nir_ssa_undef_instr *ssa_undef)
> +{
> + sweep_ssa_def(nir, &ssa_undef->def);
> +}
> +
> +static void
> +sweep_phi_instr(nir_shader *nir, nir_phi_instr *phi)
> +{
> + foreach_list_typed(nir_phi_src, phi_src, node, &phi->srcs) {
> + ralloc_steal(nir, phi_src);
> + /* skip pred */
> + sweep_src(nir, &phi_src->src);
> + }
> +
> + sweep_dest(nir, &phi->dest);
> +}
> +
> +static void
> +sweep_parallel_copy_instr(nir_shader *nir, nir_parallel_copy_instr *parallel_copy)
> +{
> + /* Nothing to do here? It looks like nir_from_ssa already tidies up after itself. */
> +}
> +
> +
> +static void
> +sweep_block(nir_shader *nir, nir_block *block)
> +{
> + ralloc_steal(nir, block);
> +
> + nir_foreach_instr(block, instr) {
> + ralloc_steal(nir, instr);
> +
> + switch (instr->type) {
> + case nir_instr_type_alu:
> + sweep_alu_instr(nir, nir_instr_as_alu(instr));
> + break;
> + case nir_instr_type_call:
> + sweep_call_instr(nir, nir_instr_as_call(instr));
> + break;
> + case nir_instr_type_tex:
> + sweep_tex_instr(nir, nir_instr_as_tex(instr));
> + break;
> + case nir_instr_type_intrinsic:
> + sweep_intrinsic_instr(nir, nir_instr_as_intrinsic(instr));
> + break;
> + case nir_instr_type_load_const:
> + sweep_load_const_instr(nir, nir_instr_as_load_const(instr));
> + break;
> + case nir_instr_type_ssa_undef:
> + sweep_ssa_undef_instr(nir, nir_instr_as_ssa_undef(instr));
> + break;
> + case nir_instr_type_jump:
> + /* Nothing to do */
> + break;
> + case nir_instr_type_phi:
> + sweep_phi_instr(nir, nir_instr_as_phi(instr));
> + break;
> + case nir_instr_type_parallel_copy:
> + sweep_parallel_copy_instr(nir, nir_instr_as_parallel_copy(instr));
> + break;
> + default:
> + unreachable("Invalid instruction type");
> + }
> + }
> +}
I really don't like having to independantly sweep instructions. I
think we should be able to just have everything inside the instruction
parented to the instruction and ralloc_steal the instruction. That
would make sweeping *much* cleaner and would also mean that deleting
an instruction actually cleans it up.
> +static void
> +sweep_if(nir_shader *nir, nir_if *iff)
> +{
> + ralloc_steal(nir, iff);
This isn't sufficient. The if can have a source with a relative
offset (yes, that's a crazy case) and that will have a ralloc'd source
that doesn't get swept.
> + foreach_list_typed(nir_cf_node, cf_node, node, &iff->then_list) {
> + sweep_cf_node(nir, cf_node);
> + }
> +
> + foreach_list_typed(nir_cf_node, cf_node, node, &iff->else_list) {
> + sweep_cf_node(nir, cf_node);
> + }
> +}
> +
> +static void
> +sweep_loop(nir_shader *nir, nir_loop *loop)
> +{
> + ralloc_steal(nir, loop);
> +
> + foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {
> + sweep_cf_node(nir, cf_node);
> + }
> +}
> +
> +static void
> +sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node)
> +{
> + switch (cf_node->type) {
> + case nir_cf_node_block:
> + sweep_block(nir, nir_cf_node_as_block(cf_node));
> + break;
> + case nir_cf_node_if:
> + sweep_if(nir, nir_cf_node_as_if(cf_node));
> + break;
> + case nir_cf_node_loop:
> + sweep_loop(nir, nir_cf_node_as_loop(cf_node));
> + break;
> + default:
> + unreachable("Invalid CF node type");
> + }
> +}
> +
> +static void
> +sweep_impl(nir_shader *nir, nir_function_impl *impl)
> +{
> + ralloc_steal(nir, impl);
> +
> + ralloc_steal(nir, impl->params);
> + ralloc_steal(nir, impl->return_var);
> + steal_list(nir, nir_variable, &impl->locals);
> + steal_list(nir, nir_register, &impl->registers);
> + sweep_block(nir, impl->start_block);
The start block is in the cf_node list so you don't need to sweep it separately
> + sweep_block(nir, impl->end_block);
Put this below sweeping the cf_node list.
> +
> + foreach_list_typed(nir_cf_node, cf_node, node, &impl->body) {
> + sweep_cf_node(nir, cf_node);
> + }
> +
> + /* Wipe out all the metadata, if any. */
> + nir_metadata_preserve(impl, nir_metadata_none);
Is this really needed? We shouldn't be changing any metadata. In
particular, we don't actually change any pointers.
> +}
> +
> +static void
> +sweep_function(nir_shader *nir, nir_function *f)
> +{
> + ralloc_steal(nir, f);
> +
> + foreach_list_typed(nir_function_overload, overload, node, &f->overload_list) {
> + ralloc_steal(nir, overload);
> + ralloc_steal(nir, overload->params);
> + if (overload->impl)
> + sweep_impl(nir, overload->impl);
> + }
> +}
> +
> +void
> +nir_sweep(nir_shader *nir)
I made the s/sweap/steal/ comment earlier. I'm still not a fan of
"sweep", but I don't know that I like "steal" either. If this is a
"mark and sweep" pass then the "sweep" functions are actually
performing the "mark" step and ralloc_free is performing the sweep.
> +{
> + void *rubbish = ralloc_context(NULL);
> +
> + /* First, move ownership of all the memory to a temporary context; assume dead. */
> + ralloc_adopt(rubbish, nir);
Yes, we could do it this way. Or we can simply have a dedicated
context for each shader that isn't the nir_shader itself. Then we
would make a new context, move everything from old to new and delete
the old. I don't know for sure which I prefer.
--Jason
> + /* Variables and registers are not dead. Steal them back. */
> + steal_list(nir, nir_variable, &nir->uniforms);
> + steal_list(nir, nir_variable, &nir->inputs);
> + steal_list(nir, nir_variable, &nir->outputs);
> + steal_list(nir, nir_variable, &nir->globals);
> + steal_list(nir, nir_variable, &nir->system_values);
> + steal_list(nir, nir_register, &nir->registers);
> +
> + /* Recurse into functions, stealing their contents back. */
> + foreach_list_typed(nir_function, func, node, &nir->functions) {
> + sweep_function(nir, func);
> + }
> +
> + /* Free everything we didn't steal back. */
> + ralloc_free(rubbish);
> +}
> --
> 2.3.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list