[PATCH] Add DEC Alpha sum_s16 fast path
Matt Turner
mattst88 at gmail.com
Thu Nov 5 14:25:16 PST 2009
Lifted from Compaq's Compiler Writer's Guide for the Alpha 21264,
appendix B.
http://h18000.www1.hp.com/cpq-alphaserver/technology/literature/cmpwrgd.pdf
Signed-off-by: Matt Turner <mattst88 at gmail.com>
---
configure.ac | 1 +
liboil/Makefile.am | 9 +++++-
liboil/alpha/Makefile.am | 8 +++++
liboil/alpha/sum.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++
m4/as-host-defines.m4 | 5 +++
5 files changed, 100 insertions(+), 1 deletions(-)
create mode 100644 liboil/alpha/Makefile.am
create mode 100644 liboil/alpha/sum.c
diff --git a/configure.ac b/configure.ac
index 8145e64..d60da26 100644
--- a/configure.ac
+++ b/configure.ac
@@ -256,6 +256,7 @@ AC_CONFIG_FILES([
Makefile
doc/Makefile
liboil/Makefile
+liboil/alpha/Makefile
liboil/amd64/Makefile
liboil/3dnow/Makefile
liboil/c/Makefile
diff --git a/liboil/Makefile.am b/liboil/Makefile.am
index 4991a34..7367ecb 100644
--- a/liboil/Makefile.am
+++ b/liboil/Makefile.am
@@ -1,7 +1,7 @@
pkgincludedir = $(includedir)/liboil- at LIBOIL_MAJORMINOR@/liboil
-DIST_SUBDIRS = amd64 3dnow c colorspace conv copy dct fb i386 i386_amd64 jpeg math md5 mmx motovec powerpc powerpc_asm_blocks ref simdpack sse utf8 deprecated arm
+DIST_SUBDIRS = alpha amd64 3dnow c colorspace conv copy dct fb i386 i386_amd64 jpeg math md5 mmx motovec powerpc powerpc_asm_blocks ref simdpack sse utf8 deprecated arm
SUBDIRS = c colorspace conv copy dct jpeg math md5 ref simdpack utf8 deprecated
EXTRA_DIST = README
@@ -53,6 +53,13 @@ libs += arm/libarm.la
endif
endif
+if HAVE_ALPHA
+if HAVE_GCC_ASM
+SUBDIRS += alpha
+libs += alpha/libalpha.la
+endif
+endif
+
lib_LTLIBRARIES = liboil- at LIBOIL_MAJORMINOR@.la
if USE_NEW_ABI
diff --git a/liboil/alpha/Makefile.am b/liboil/alpha/Makefile.am
new file mode 100644
index 0000000..1a8f913
--- /dev/null
+++ b/liboil/alpha/Makefile.am
@@ -0,0 +1,8 @@
+
+noinst_LTLIBRARIES = libalpha.la
+
+libalpha_la_SOURCES = \
+ sum.c
+
+libalpha_la_CFLAGS = $(LIBOIL_CFLAGS)
+
diff --git a/liboil/alpha/sum.c b/liboil/alpha/sum.c
new file mode 100644
index 0000000..238b2b3
--- /dev/null
+++ b/liboil/alpha/sum.c
@@ -0,0 +1,78 @@
+
+#include <liboil/liboilfunction.h>
+#include <liboil/liboilclasses.h>
+
+void
+sum_s16_alpha (int16_t *d, int16_t *s, int n)
+{
+ /*
+ # 16 is the pointer p
+ # 17 is the counter length
+ # 24, 25 hold the partial sums on loop exit
+ # 18, 19 have the first two quadwords of data:
+ # 18: HGFEDCBA
+ # 19: PONMLKJI
+ */
+
+ unsigned long tmp1, tmp2; /* 0 and 1 */
+ unsigned long tmp3, tmp4; /* 18 and 19 */
+ unsigned long tmp5, tmp6; /* 24 and 25 */
+ unsigned long tmp7; /* 27 */
+ unsigned int sum = 0;
+
+ while(n&15) {
+ sum += s[0];
+ s++;
+ n--;
+ }
+
+ if (n > 0) {
+
+ asm volatile(
+ " ldq %2, 0(%7)\n"
+ " bis $31, $31, %4\n"
+ " ldq %3, 8(%7)\n"
+ " bis $31, $31, %5\n"
+
+ ".align 4 # Octaword alignment\n"
+ "1: zapnot %2, 0x33, %0 # U1 chunk 0: 00FE00BA\n"
+ " bis $31, $31, $31 # L NOP\n"
+ " zap %2, 0x33, %1 # U0 chunk 1: HG00DC00\n"
+ " ldq %2, 16(%7) # L1 load 2 ahead *p: HGFEDCBA\n"
+
+ " addq %4, %0, %4 # U1 accumulate 0\n"
+ " bis $31, $31, $31 # L NOP\n"
+ " srl %1, 16, %1 # U0 shift: 00HG00DC\n"
+ " lda %8, -8(%8) # L0 countdown\n"
+
+ " zapnot %3, 0x33, %0 # U1 chunk 3: 00NM00JI\n"
+ " bis $31, $31, $31 # L NOP\n"
+ " zap %3, 0x33, %6 # U0 chunk 4: PO00LK00\n"
+ " ldq %3, 24(%7) # L0 load 2 ahead *p: PONMLKJIL1\n"
+
+ " addq %4, %0, %4 # U1 accumulate 0\n"
+ " addq %5, %1, %5 # L0 accumulate 1\n"
+ " srl %6, 16, %6 # U0 shift: 00PO00LK\n"
+ " ldl $31, 512(%7) # L1 prefetch\n"
+
+ " lda %7, 16(%7) # U1 p++\n"
+ " addq %5, %6, %5 # L0 accumulate 1\n"
+ " bgt %8, 1b # U0 loop control\n"
+ " bis $31, $31, $31 # L NOP (replace with fall through)\n"
+
+ : "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3),
+ "=&r" (tmp4), "=&r" (tmp5), "=&r" (tmp6), "=&r" (tmp7)
+ : "r" (s), "r" (n)
+ );
+
+ tmp1 = tmp5 >> 32;
+ tmp2 = tmp5 & 0xffffffff;
+ tmp3 = tmp6 >> 32;
+ tmp4 = tmp6 & 0xffffffff;
+
+ sum += tmp1 + tmp2 + tmp3 + tmp4;
+ }
+
+ *d = sum;
+}
+OIL_DEFINE_IMPL_FULL (sum_s16_alpha, sum_s16, OIL_IMPL_FLAG_ASM);
diff --git a/m4/as-host-defines.m4 b/m4/as-host-defines.m4
index 8b813bb..8549971 100644
--- a/m4/as-host-defines.m4
+++ b/m4/as-host-defines.m4
@@ -27,12 +27,17 @@ AC_DEFUN([AS_HOST_DEFINES],
HAVE_ARM=yes
AC_DEFINE(HAVE_ARM, 1, [Defined if host is arm])
;;
+ xalpha)
+ HAVE_ALPHA=yes
+ AC_DEFINE(HAVE_ALPHA, 1, [Defined if host is alpha])
+ ;;
esac
AM_CONDITIONAL(HAVE_I386, test "x$HAVE_I386" = "xyes")
AM_CONDITIONAL(HAVE_AMD64, test "x$HAVE_AMD64" = "xyes")
AM_CONDITIONAL(HAVE_POWERPC, test "x$HAVE_POWERPC" = "xyes")
AM_CONDITIONAL(HAVE_ARM, test "x$HAVE_ARM" = "xyes")
+AM_CONDITIONAL(HAVE_ALPHA, test "x$HAVE_ALPHA" = "xyes")
case "${host_os}" in
mingw*)
--
1.6.4.4
More information about the xorg-devel
mailing list