Commit 08f895c3 authored by Andi Kleen's avatar Andi Kleen Committed by Linus Torvalds

[PATCH] Efficient bswab64 for i386

Due to some bugs in byteorder/generic.h linux would always use C handcoded
swab64 for 64bit ntohq or cpu_to_be64.  The C version is very inefficient
and expands to 30+ instructions of horrible code.

This hurts on filesystems that use on disk big endian data structures
with 64bit data types.

This patch adds an assembly optimized swab64 to fix it. Now bswab64 is
4 instructions when your CPU supports bswap and 9 when it doesn't.
Tests were done with gcc 3.2, may be different on older gcc.

This is good for ~600 bytes code size reduction in XFS (gcc 3.2):
Before:
 503199    3296    1682  508177   7c111 fs/xfs/xfs.o
After:
 502543    3296    1682  507521   7be81 fs/xfs/xfs.o

Also should be faster.

Also some minor cleanups in the file.
parent b4655acd
......@@ -24,21 +24,41 @@ static __inline__ __const__ __u32 ___arch__swab32(__u32 x)
return x;
}
/* gcc should generate this for open coded C now too. May be worth switching to
it because inline assembly cannot be scheduled. -AK */
static __inline__ __const__ __u16 ___arch__swab16(__u16 x)
{
__asm__("xchgb %b0,%h0" /* swap bytes */ \
: "=q" (x) \
: "0" (x)); \
__asm__("xchgb %b0,%h0" /* swap bytes */
: "=q" (x)
: "0" (x));
return x;
}
static inline __u64 ___arch__swab64(__u64 val)
{
union {
struct { __u32 a,b; } s;
__u64 u;
} v;
v.u = val;
#ifdef CONFIG_X86_BSWAP
asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1"
: "=r" (v.s.a), "=r" (v.s.b)
: "0" (v.s.a), "1" (v.s.b));
#else
v.s.a = ___arch__swab32(v.s.a);
v.s.b = ___arch__swab32(v.s.b);
asm("xchgl %0,%1" : "=r" (v.s.a), "=r" (v.s.b) : "0" (v.s.a), "1" (v.s.b));
#endif
return v.u;
}
#define __arch__swab64(x) ___arch__swab64(x)
#define __arch__swab32(x) ___arch__swab32(x)
#define __arch__swab16(x) ___arch__swab16(x)
#if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
# define __BYTEORDER_HAS_U64__
# define __SWAB_64_THRU_32__
#endif
#define __BYTEORDER_HAS_U64__
#endif /* __GNUC__ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment