[Mesa-dev] [PATCH] nir: add lowering for idiv/udiv/umod
Ilia Mirkin
imirkin at alum.mit.edu
Fri Apr 3 08:31:58 PDT 2015
On Fri, Apr 3, 2015 at 11:21 AM, Rob Clark <robdclark at gmail.com> wrote:
> From: Rob Clark <robclark at freedesktop.org>
>
> Based on the algo from NV50LegalizeSSA::handleDIV() and handleMOD().
> See also trans_idiv() in freedreno/ir3/ir3_compiler.c (which was an
> adaptation of the nv50 code from Ilia Mirkin).
>
> Also, including a py script that implements the same algo with numpy,
> based on something written by Ilia (and beaten on with a hammer a bit
> by me).
>
> I've tested this on i965 hacked up to insert the idiv lowering pass.
>
> Signed-off-by: Rob Clark <robclark at freedesktop.org>
> ---
> src/glsl/Makefile.sources | 1 +
> src/glsl/nir/div-lowering.py | 75 ++++++++++++++++++++
Python *really* hates files with - in their name. You can't import
them, so you have to use underscores. Admittedly it's not designed for
importing, but let's not prevent it in the future.
> src/glsl/nir/nir.h | 1 +
> src/glsl/nir/nir_lower_idiv.c | 157 ++++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 234 insertions(+)
> create mode 100755 src/glsl/nir/div-lowering.py
> create mode 100644 src/glsl/nir/nir_lower_idiv.c
>
> diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
> index ffce706..5d70e88 100644
> --- a/src/glsl/Makefile.sources
> +++ b/src/glsl/Makefile.sources
> @@ -33,6 +33,7 @@ NIR_FILES = \
> nir/nir_lower_atomics.c \
> nir/nir_lower_global_vars_to_local.c \
> nir/nir_lower_locals_to_regs.c \
> + nir/nir_lower_idiv.c \
> nir/nir_lower_io.c \
> nir/nir_lower_phis_to_scalar.c \
> nir/nir_lower_samplers.cpp \
> diff --git a/src/glsl/nir/div-lowering.py b/src/glsl/nir/div-lowering.py
> new file mode 100755
> index 0000000..87db784
> --- /dev/null
> +++ b/src/glsl/nir/div-lowering.py
> @@ -0,0 +1,75 @@
> +#!/usr/bin/python
I think it's BS, but you're going to get yelled at by people who have
foolishly set up python to point to python3 (despite the *huge*
quantity of programs that will never change and assume that python ==
python2). Probably just hard-code it to python2 for now, which is a
symlink available in most, but not all, python installations.
> +
> +import numpy as np
> +import sys
> +
> +op = sys.argv[1]
> +
> +if op not in ("idiv", "udiv", "umod"):
> + print "invalid op:", op
> + exit(1)
> +
> +is_signed = op == "idiv"
> +
> +if is_signed:
> + numer = np.int32(sys.argv[2])
> + denom = np.int32(sys.argv[3])
> +else:
> + numer = np.uint32(sys.argv[2])
> + denom = np.uint32(sys.argv[3])
> +
> +print op, numer, denom, "\n"
print prints a newline by default, no need for the "\n". Unless
there's a , at the end, in which case it skips the newline. Which is
what I was doing in my version since I wanted like a / b = 5 or
whatever.
> +
> +
> +if is_signed:
> + af = np.float32(numer)
> + bf = np.float32(denom)
> + af = np.abs(af)
> + bf = np.abs(bf)
> + a = np.abs(numer).view(np.uint32)
> + b = np.abs(denom).view(np.uint32)
> +else:
> + af = np.float32(numer)
> + bf = np.float32(denom)
> + a = numer
> + b = denom
> +
> +# get first result:
> +bf = np.reciprocal(bf)
> +bf = (bf.view(np.uint32) - np.uint32(2)).view(np.float32)
> +q = af * bf
> +
> +if is_signed:
> + q = np.int32(q).view(np.uint32)
> +else:
> + q = np.uint32(q).view(np.uint32)
> +
> +# get error of first result:
> +r = q * b
> +r = a - r
> +r = np.float32(r)
> +r = r * bf
> +r = np.uint32(r)
> +
> +# add quotients:
> +q = q + r
> +
> +# correction: if modulus >= divisor, add 1
> +r = q * b
> +r = a - r
> +
> +r = np.uint32(1) if r.view(np.uint32) >= b.view(np.uint32) else np.uint32(0)
> +q = q + r
> +
> +if is_signed:
> + r = np.bitwise_xor(numer, denom)
> + r = np.right_shift(r, 31)
> + b = -q
> + q = b if r else q
> +
> +if op == "umod":
> + r = q * b
> + q = a - r
> +
> +print "=", q.view(np.int32)
> +
> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> index c14c51c..20984e9 100644
> --- a/src/glsl/nir/nir.h
> +++ b/src/glsl/nir/nir.h
> @@ -1605,6 +1605,7 @@ void nir_lower_samplers(nir_shader *shader,
>
> void nir_lower_system_values(nir_shader *shader);
> void nir_lower_tex_projector(nir_shader *shader);
> +void nir_lower_idiv(nir_shader *shader);
>
> void nir_lower_atomics(nir_shader *shader);
> void nir_lower_to_source_mods(nir_shader *shader);
> diff --git a/src/glsl/nir/nir_lower_idiv.c b/src/glsl/nir/nir_lower_idiv.c
> new file mode 100644
> index 0000000..c2f08df
> --- /dev/null
> +++ b/src/glsl/nir/nir_lower_idiv.c
> @@ -0,0 +1,157 @@
> +/*
> + * Copyright © 2015 Red Hat
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + * Rob Clark <robclark at freedesktop.org>
> + */
> +
> +#include "nir.h"
> +#include "nir_builder.h"
> +
> +/* Lowers idiv/udiv/umod
> + * Based on NV50LegalizeSSA::handleDIV()
> + *
> + * Note that this is probably not enough precision for compute shaders.
> + * Perhaps we want a second higher precision (looping) version of this?
> + * Or perhaps we assume if you can do compute shaders you can also
> + * branch out to a pre-optimized shader library routine..
> + */
> +
> +static void
> +convert_instr(nir_builder *bld, nir_alu_instr *alu)
> +{
> + nir_ssa_def *numer, *denom, *af, *bf, *a, *b, *q, *r;
> + nir_op op = alu->op;
> + bool is_signed;
> +
> + if ((op != nir_op_idiv) &&
> + (op != nir_op_udiv) &&
> + (op != nir_op_umod))
> + return;
> +
> + is_signed = (op == nir_op_idiv);
> +
> + nir_builder_insert_before_instr(bld, &alu->instr);
> +
> + numer = nir_ssa_for_src(bld, alu->src[0].src,
> + nir_ssa_alu_instr_src_components(alu, 0));
> + denom = nir_ssa_for_src(bld, alu->src[1].src,
> + nir_ssa_alu_instr_src_components(alu, 1));
> +
> + if (is_signed) {
> + af = nir_i2f(bld, numer);
> + bf = nir_i2f(bld, denom);
> + af = nir_fabs(bld, af);
> + bf = nir_fabs(bld, bf);
> + a = nir_iabs(bld, numer);
> + b = nir_iabs(bld, denom);
> + } else {
> + af = nir_u2f(bld, numer);
> + bf = nir_u2f(bld, denom);
> + a = numer;
> + b = denom;
> + }
> +
> + /* get first result: */
> + bf = nir_frcp(bld, bf);
> + bf = nir_isub(bld, bf, nir_imm_int(bld, 2)); /* yes, really */
> + q = nir_fmul(bld, af, bf);
> +
> + if (is_signed) {
> + q = nir_f2i(bld, q);
> + } else {
> + q = nir_f2u(bld, q);
> + }
> +
> + /* get error of first result: */
> + r = nir_imul(bld, q, b);
> + r = nir_isub(bld, a, r);
> + r = nir_u2f(bld, r);
> + r = nir_fmul(bld, r, bf);
> + r = nir_f2u(bld, r);
> +
> + /* add quotients: */
> + q = nir_iadd(bld, q, r);
> +
> + /* correction: if modulus >= divisor, add 1 */
> + r = nir_imul(bld, q, b);
> + r = nir_isub(bld, a, r);
> +
> + r = nir_ige(bld, r, b);
> + r = nir_b2i(bld, r);
> +
> + q = nir_iadd(bld, q, r);
> + if (is_signed) {
> + /* fix the sign: */
> + r = nir_ixor(bld, numer, denom);
> + r = nir_ushr(bld, r, nir_imm_int(bld, 31));
> + r = nir_i2b(bld, r);
> + b = nir_ineg(bld, q);
> + q = nir_bcsel(bld, r, b, q);
> + }
> +
> + if (op == nir_op_umod) {
> + /* division result in q */
> + r = nir_imul(bld, q, b);
> + q = nir_isub(bld, a, r);
> + }
> +
> + assert(alu->dest.dest.is_ssa);
> + nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
> + nir_src_for_ssa(q),
> + ralloc_parent(alu));
> +}
> +
> +static bool
> +convert_block(nir_block *block, void *state)
> +{
> + nir_builder *b = state;
> +
> + nir_foreach_instr_safe(block, instr) {
> + if (instr->type == nir_instr_type_alu)
> + convert_instr(b, nir_instr_as_alu(instr));
> + }
> +
> + return true;
> +}
> +
> +static void
> +convert_impl(nir_function_impl *impl)
> +{
> + nir_builder b;
> + nir_builder_init(&b, impl);
> +
> + nir_foreach_block(impl, convert_block, &b);
> + nir_metadata_preserve(impl, nir_metadata_block_index |
> + nir_metadata_dominance);
> +}
> +
> +void
> +nir_lower_idiv(nir_shader *shader)
> +{
> + nir_foreach_overload(shader, overload) {
> + if (overload->impl)
> + convert_impl(overload->impl);
> + }
> +
> + exec_list_make_empty(&shader->system_values);
> +}
> --
> 2.1.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list