[PATCH 1/6] make byte swapping macros less opaque to the optimizer

Siarhei Siamashka siarhei.siamashka at gmail.com
Fri Apr 29 04:46:23 PDT 2011


On Fri, Apr 29, 2011 at 1:33 AM, Matt Turner <mattst88 at gmail.com> wrote:
> On Thu, Apr 28, 2011 at 4:36 PM, Siarhei Siamashka
> <siarhei.siamashka at gmail.com> wrote:
>> On Thu, Apr 28, 2011 at 2:33 AM, Matt Turner <mattst88 at gmail.com> wrote:
>>> diff --git a/include/misc.h b/include/misc.h
>>> index 803f5ba..b7a3fd2 100644
>>> --- a/include/misc.h
>>> +++ b/include/misc.h
>>> @@ -240,32 +240,17 @@ xstrtokenize(const char *str, const char* separators);
>>>  #define SwapRestL(stuff) \
>>>     SwapLongs((CARD32 *)(stuff + 1), LengthRestL(stuff))
>>>
>>> -/* byte swap a 32-bit value */
>>> -#define swapl(x, n) { \
>>> -                n = ((char *) (x))[0];\
>>> -                ((char *) (x))[0] = ((char *) (x))[3];\
>>> -                ((char *) (x))[3] = n;\
>>> -                n = ((char *) (x))[1];\
>>> -                ((char *) (x))[1] = ((char *) (x))[2];\
>>> -                ((char *) (x))[2] = n; }
>>> -
>>> -/* byte swap a short */
>>> -#define swaps(x, n) { \
>>> -                n = ((char *) (x))[0];\
>>> -                ((char *) (x))[0] = ((char *) (x))[1];\
>>> -                ((char *) (x))[1] = n; }
>>> -
>>>  /* copy 32-bit value from src to dst byteswapping on the way */
>>> -#define cpswapl(src, dst) { \
>>> -                 ((char *)&(dst))[0] = ((char *) &(src))[3];\
>>> -                 ((char *)&(dst))[1] = ((char *) &(src))[2];\
>>> -                 ((char *)&(dst))[2] = ((char *) &(src))[1];\
>>> -                 ((char *)&(dst))[3] = ((char *) &(src))[0]; }
>>> +#define cpswapl(src, dst) (dst) = lswapl((src))
>>>
>>>  /* copy short from src to dst byteswapping on the way */
>>> -#define cpswaps(src, dst) { \
>>> -                ((char *) &(dst))[0] = ((char *) &(src))[1];\
>>> -                ((char *) &(dst))[1] = ((char *) &(src))[0]; }
>>> +#define cpswaps(src, dst) (dst) = lswaps((src))
>>> +
>>> +/* byte swap a 32-bit value */
>>> +#define swapl(x, n) (*(x)) = lswapl(*(x))
>>> +
>>> +/* byte swap a short */
>>> +#define swaps(x, n) (*(x)) = lswaps(*(x))
>>
>> This is not an equal replacement for the previous code. The difference
>> is that the older variant of 'swapl' could accept any pointer having
>> any alignment, but still do byteswap for exactly 4 bytes at that
>> memory location.
>>
>> Just to perform an additional sanity check, the following test code can be used:
>>
>> -#define swapl(x, n) (*(x)) = lswapl(*(x))
>> +#define swapl(x, n) do {                    \
>> +        char dummytmp1[4 - sizeof(*(x))];   \
>> +        char dummytmp2[sizeof(*(x)) - 4];   \
>> +        (*(x)) = lswapl(*(x));              \
>> +    } while (0)
>>
>>  /* byte swap a short */
>> -#define swaps(x, n) (*(x)) = lswaps(*(x))
>> +#define swaps(x, n) do {                    \
>> +        char dummytmp1[2 - sizeof(*(x))];   \
>> +        char dummytmp2[sizeof(*(x)) - 2];   \
>> +        (*(x)) = lswaps(*(x));              \
>> +    } while (0)
>>
>> And this extra guard check already spots at least one issue:
>>
>> swaprep.c:436:2: error: size of array 'dummytmp2' is too large
>>
>> http://cgit.freedesktop.org/xorg/xserver/tree/dix/swaprep.c?id=xorg-server-1.10.1#n423
>>
>> One more potential pitfall is data alignment. The older variant could
>> work correctly with any alignment, but the new variant will fail if
>> the pointer is somehow not naturally aligned. So I think the patch is
>> very dangerous and every affected line of code needs to be carefully
>> reviewed.
>>
>> --
>> Best regards,
>> Siarhei Siamashka
>
> Good call!
>
> I wonder if these misalignments are bugs in their own right?

I'm not even sure if there are any real problems related to alignment
here. Just the proposed patch now needs strict alignment and the older
code did not. It may be either fine or may potentially trigger some
alignment traps at runtime on the architectures which need strict
alignment if one of the uses for these macros happens to use unaligned
pointers. To be on a safe side, it is possible to use a trick with
__attribute__((packed)) when compiling with gcc like described in
http://lwn.net/Articles/259732/

void swapl_unaligned(void *x)
{
    struct dummy
    {
        uint32_t data;
    } __attribute__((packed));
    ((struct dummy *)x)->data = __builtin_bswap32(((struct dummy *)x)->data);
}

void swapl_aligned(uint32_t *x)
{
    *x = __builtin_bswap32(*x);
}

The above two functions are compiled into the following code on x86-64:

0000000000000000 <swapl_unaligned>:
   0:   8b 07                   mov    (%rdi),%eax
   2:   0f c8                   bswap  %eax
   4:   89 07                   mov    %eax,(%rdi)
   6:   c3                      retq

0000000000000007 <swapl_aligned>:
   7:   8b 07                   mov    (%rdi),%eax
   9:   0f c8                   bswap  %eax
   b:   89 07                   mov    %eax,(%rdi)
   d:   c3                      retq

And for armv4t it is:

00000000 <swapl_unaligned>:
   0:   e5d0c001        ldrb    ip, [r0, #1]
   4:   e5d02000        ldrb    r2, [r0]
   8:   e5d01002        ldrb    r1, [r0, #2]
   c:   e5d03003        ldrb    r3, [r0, #3]
  10:   e182240c        orr     r2, r2, ip, lsl #8
  14:   e1822801        orr     r2, r2, r1, lsl #16
  18:   e1822c03        orr     r2, r2, r3, lsl #24
  1c:   e0223862        eor     r3, r2, r2, ror #16
  20:   e1a03423        lsr     r3, r3, #8
  24:   e3c33cff        bic     r3, r3, #65280  ; 0xff00
  28:   e0233462        eor     r3, r3, r2, ror #8
  2c:   e1a0c423        lsr     ip, r3, #8
  30:   e1a01823        lsr     r1, r3, #16
  34:   e1a02c23        lsr     r2, r3, #24
  38:   e5c03000        strb    r3, [r0]
  3c:   e5c0c001        strb    ip, [r0, #1]
  40:   e5c01002        strb    r1, [r0, #2]
  44:   e5c02003        strb    r2, [r0, #3]
  48:   e12fff1e        bx      lr

0000004c <swapl_aligned>:
  4c:   e5902000        ldr     r2, [r0]
  50:   e0223862        eor     r3, r2, r2, ror #16
  54:   e1a03423        lsr     r3, r3, #8
  58:   e3c33cff        bic     r3, r3, #65280  ; 0xff00
  5c:   e0233462        eor     r3, r3, r2, ror #8
  60:   e5803000        str     r3, [r0]
  64:   e12fff1e        bx      lr

The compiler generates efficient code on x86, but still can work
safely with unaligned pointers on the other architectures (though this
'swapl_unaligned' is quite suboptimal).

And of course, if none of the swapl/swaps/cpswapl/cpswaps are supposed
to deal with unaligned pointers, then this is not an issue at all.

-- 
Best regards,
Siarhei Siamashka


More information about the xorg-devel mailing list