Commit 821376bf authored by David Mosberger-Tang's avatar David Mosberger-Tang Committed by Tony Luck

[IA64] fix fls()

The ia64-version of fls() never worked as intended (the bitnumbering
was off by 1 and fls(0) was undefined).  This patch fixes the problem
by using a popcnt-based fls(), which on McKinley-derived cores is
slightly faster than both ia64_fls() and generic_fls().  The resulting
code, however, is bigger (7-8 bundles instead of about 3 bundles).
Also switch ia64_popcnt() to __builtin_popcountl() for GCC v3.4 or
newer since the compiler can predicate that and schedule it better.

Thanks to Simon Derr and Matt Mackall for tracking down this bug.
Signed-off-by: default avatarDavid Mosberger-Tang <davidm@hpl.hp.com>
Signed-off-by: default avatarTony Luck <tony.luck@intel.com>
parent d8470b7c
...@@ -314,8 +314,8 @@ __ffs (unsigned long x) ...@@ -314,8 +314,8 @@ __ffs (unsigned long x)
#ifdef __KERNEL__ #ifdef __KERNEL__
/* /*
* find_last_zero_bit - find the last zero bit in a 64 bit quantity * Return bit number of last (most-significant) bit set. Undefined
* @x: The value to search * for x==0. Bits are numbered from 0..63 (e.g., ia64_fls(9) == 3).
*/ */
static inline unsigned long static inline unsigned long
ia64_fls (unsigned long x) ia64_fls (unsigned long x)
...@@ -327,10 +327,23 @@ ia64_fls (unsigned long x) ...@@ -327,10 +327,23 @@ ia64_fls (unsigned long x)
return exp - 0xffff; return exp - 0xffff;
} }
/*
* Find the last (most significant) bit set. Returns 0 for x==0 and
* bits are numbered from 1..32 (e.g., fls(9) == 4).
*/
static inline int static inline int
fls (int x) fls (int t)
{ {
return ia64_fls((unsigned int) x); unsigned long x = t & 0xffffffffu;
if (!x)
return 0;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
return ia64_popcnt(x);
} }
/* /*
......
...@@ -133,13 +133,17 @@ register unsigned long ia64_r13 asm ("r13") __attribute_used__; ...@@ -133,13 +133,17 @@ register unsigned long ia64_r13 asm ("r13") __attribute_used__;
ia64_intri_res; \ ia64_intri_res; \
}) })
#define ia64_popcnt(x) \ #if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
({ \ # define ia64_popcnt(x) __builtin_popcountl(x)
#else
# define ia64_popcnt(x) \
({ \
__u64 ia64_intri_res; \ __u64 ia64_intri_res; \
asm ("popcnt %0=%1" : "=r" (ia64_intri_res) : "r" (x)); \ asm ("popcnt %0=%1" : "=r" (ia64_intri_res) : "r" (x)); \
\ \
ia64_intri_res; \ ia64_intri_res; \
}) })
#endif
#define ia64_getf_exp(x) \ #define ia64_getf_exp(x) \
({ \ ({ \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment