mirror of
https://github.com/openvswitch/ovs
synced 2025-08-31 14:25:26 +00:00
lib/util: More portable use of builtin popcnt.
- Use the GCC predefined macro __POPCNT__ to detect the availability of fast __builtin_popcnt function. - Use portable preprocessor macros to detect 64-bit build. - Only define the 32-bit parts when needed and declare the count_1bits_8 at file scope to silence a warning. This time I have tested all code paths to make sure no warnigns are generated. Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com> Reviewed-by: Simon Horman <horms@verge.net.au>
This commit is contained in:
64
lib/util.h
64
lib/util.h
@@ -371,49 +371,55 @@ log_2_ceil(uint64_t n)
|
||||
return log_2_floor(n) + !is_pow2(n);
|
||||
}
|
||||
|
||||
extern const uint8_t count_1bits_8[256];
|
||||
|
||||
/* Returns the number of 1-bits in 'x', between 0 and 32 inclusive. */
|
||||
/* unsigned int count_1bits(uint64_t x):
|
||||
*
|
||||
* Returns the number of 1-bits in 'x', between 0 and 64 inclusive. */
|
||||
#if UINTPTR_MAX == UINT64_MAX
|
||||
static inline unsigned int
|
||||
count_1bits_32(uint32_t x)
|
||||
count_1bits(uint64_t x)
|
||||
{
|
||||
#if __GNUC__ >= 4 && defined(__corei7)
|
||||
/* __builtin_popcount() is fast only when supported by the CPU. */
|
||||
return __builtin_popcount(x);
|
||||
#if __GNUC__ >= 4 && __POPCNT__
|
||||
return __builtin_popcountll(x);
|
||||
#else
|
||||
/* This portable implementation is the fastest one we know of for 64
|
||||
* bits, and about 3x faster than GCC 4.7 __builtin_popcountll(). */
|
||||
const uint64_t h55 = UINT64_C(0x5555555555555555);
|
||||
const uint64_t h33 = UINT64_C(0x3333333333333333);
|
||||
const uint64_t h0F = UINT64_C(0x0F0F0F0F0F0F0F0F);
|
||||
const uint64_t h01 = UINT64_C(0x0101010101010101);
|
||||
x -= (x >> 1) & h55; /* Count of each 2 bits in-place. */
|
||||
x = (x & h33) + ((x >> 2) & h33); /* Count of each 4 bits in-place. */
|
||||
x = (x + (x >> 4)) & h0F; /* Count of each 8 bits in-place. */
|
||||
return (x * h01) >> 56; /* Sum of all bytes. */
|
||||
#endif
|
||||
}
|
||||
#else /* Not 64-bit. */
|
||||
#if __GNUC__ >= 4 && __POPCNT__
|
||||
static inline unsigned int
|
||||
count_1bits_32__(uint32_t x)
|
||||
{
|
||||
return __builtin_popcount(x);
|
||||
}
|
||||
#else
|
||||
#define NEED_COUNT_1BITS_8 1
|
||||
extern const uint8_t count_1bits_8[256];
|
||||
static inline unsigned int
|
||||
count_1bits_32__(uint32_t x)
|
||||
{
|
||||
/* This portable implementation is the fastest one we know of for 32 bits,
|
||||
* and faster than GCC __builtin_popcount(). */
|
||||
return (count_1bits_8[x & 0xff] +
|
||||
count_1bits_8[(x >> 8) & 0xff] +
|
||||
count_1bits_8[(x >> 16) & 0xff] +
|
||||
count_1bits_8[x >> 24]);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Returns the number of 1-bits in 'x', between 0 and 64 inclusive. */
|
||||
#endif
|
||||
static inline unsigned int
|
||||
count_1bits(uint64_t x)
|
||||
{
|
||||
if (sizeof(void *) == 8) { /* 64-bit CPU */
|
||||
#if __GNUC__ >= 4 && defined(__corei7)
|
||||
/* __builtin_popcountll() is fast only when supported by the CPU. */
|
||||
return __builtin_popcountll(x);
|
||||
#else
|
||||
/* This portable implementation is the fastest one we know of for 64
|
||||
* bits, and about 3x faster than GCC 4.7 __builtin_popcountll(). */
|
||||
const uint64_t h55 = UINT64_C(0x5555555555555555);
|
||||
const uint64_t h33 = UINT64_C(0x3333333333333333);
|
||||
const uint64_t h0F = UINT64_C(0x0F0F0F0F0F0F0F0F);
|
||||
const uint64_t h01 = UINT64_C(0x0101010101010101);
|
||||
x -= (x >> 1) & h55; /* Count of each 2 bits in-place. */
|
||||
x = (x & h33) + ((x >> 2) & h33); /* Count of each 4 bits in-place. */
|
||||
x = (x + (x >> 4)) & h0F; /* Count of each 8 bits in-place. */
|
||||
return (x * h01) >> 56; /* Sum of all bytes. */
|
||||
#endif
|
||||
} else { /* 32-bit CPU */
|
||||
return count_1bits_32(x) + count_1bits_32(x >> 32);
|
||||
}
|
||||
return count_1bits_32__(x) + count_1bits_32__(x >> 32);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Returns the rightmost 1-bit in 'x' (e.g. 01011000 => 00001000), or 0 if 'x'
|
||||
* is 0. */
|
||||
|
Reference in New Issue
Block a user