SIMD-less render optimizations

Daniel Amelang daniel.amelang at gmail.com
Wed Apr 18 00:03:26 PDT 2007


On 4/17/07, Daniel Amelang <daniel.amelang at gmail.com> wrote:
> Long story short, I got a 1.9x speedup for the cairo-perf test
> paint_similar_rgba_over-512 using the xlib-rgb backend on the N800
> (which uses a 16-bit color framebuffer and runs xserver 1.1.99.3) and
> the patch is attached.

And you'll need this additional patch to make the code work correctly
:) I needed to make certain parts of the code endian-aware.

Dan Amelang
-------------- next part --------------
From b06f2727b2e98edb18490096875dabf5bf5a36a1 Mon Sep 17 00:00:00 2001
From: Dan Amelang <dan at amelang.net>
Date: Wed, 18 Apr 2007 00:00:33 -0700
Subject: [PATCH] Fix fbCompositeSrc_8888x0565 optimizations to be byte-endian aware

---
 fb/fbpict.c |   21 +++++++++++++++++++++
 1 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/fb/fbpict.c b/fb/fbpict.c
index 1ad5c8b..cf4a555 100644
--- a/fb/fbpict.c
+++ b/fb/fbpict.c
@@ -618,8 +618,13 @@ fbCompositeSrc_8888x0565 (CARD8      op,
             if ((s1 & s2 & s3 & s4) > 0xfeffffff)
             {
                 /* In this case, we just perform a SOURCE for all 4 pixels */
+#if X_BYTE_ORDER == X_BIG_ENDIAN
                 *dst_2px_wide++ = (cvt8888to0565 (s1) << 16) | cvt8888to0565 (s2);
                 *dst_2px_wide++ = (cvt8888to0565 (s3) << 16) | cvt8888to0565 (s4);
+#else
+                *dst_2px_wide++ = cvt8888to0565 (s1) | (cvt8888to0565 (s2) << 16);
+                *dst_2px_wide++ = cvt8888to0565 (s3) | (cvt8888to0565 (s4) << 16);
+#endif
             }
             /* Next, check if the next 4 pixels have any alpha in them at all */
             else if ((s1 | s2 | s3 | s4) > 0x00ffffff)
@@ -627,17 +632,33 @@ fbCompositeSrc_8888x0565 (CARD8      op,
                 /* In which case, we perform OVER on each one of them */
                 CARD32 d1, d2, d3, d4;
 
+#if X_BYTE_ORDER == X_BIG_ENDIAN
                 d1 = (*dst_2px_wide >> 16);
                 d2 = (*dst_2px_wide & 0xffff);
                 FbOverU_8888x565 (s1, d1);
                 FbOverU_8888x565 (s2, d2);
                 *dst_2px_wide++ = (d1 << 16) | d2;
+#else
+                d2 = (*dst_2px_wide >> 16);
+                d1 = (*dst_2px_wide & 0xffff);
+                FbOverU_8888x565 (s1, d1);
+                FbOverU_8888x565 (s2, d2);
+                *dst_2px_wide++ = d1 | (d2 << 16);
+#endif
 
+#if X_BYTE_ORDER == X_BIG_ENDIAN
                 d3 = (*dst_2px_wide >> 16);
                 d4 = (*dst_2px_wide & 0xffff);
                 FbOverU_8888x565 (s3, d3);
                 FbOverU_8888x565 (s4, d4);
                 *dst_2px_wide++ = (d3 << 16) | d4;
+#else
+                d4 = (*dst_2px_wide >> 16);
+                d3 = (*dst_2px_wide & 0xffff);
+                FbOverU_8888x565 (s3, d3);
+                FbOverU_8888x565 (s4, d4);
+                *dst_2px_wide++ = d3 | (d4 << 16);
+#endif
             }
             else
             {
-- 
1.4.4.2



More information about the xorg mailing list