2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-31 14:25:26 +00:00

lib/util: More portable use of builtin popcnt.

- Use the GCC predefined macro __POPCNT__ to detect the availability
  of fast __builtin_popcnt function.
- Use portable preprocessor macros to detect 64-bit build.
- Only define the 32-bit parts when needed and declare the
  count_1bits_8 at file scope to silence a warning.

This time I have tested all code paths to make sure no warnigns are
generated.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Reviewed-by: Simon Horman <horms@verge.net.au>
This commit is contained in:
Jarno Rajahalme
2013-12-12 08:27:41 -08:00
parent 4abb8608af
commit 381657b3ea
2 changed files with 36 additions and 30 deletions

View File

@@ -901,7 +901,7 @@ raw_clz64(uint64_t n)
}
#endif
#if !(__GNUC__ >= 4 && defined(__corei7))
#if NEED_COUNT_1BITS_8
#define INIT1(X) \
((((X) & (1 << 0)) != 0) + \
(((X) & (1 << 1)) != 0) + \

View File

@@ -371,32 +371,14 @@ log_2_ceil(uint64_t n)
return log_2_floor(n) + !is_pow2(n);
}
extern const uint8_t count_1bits_8[256];
/* Returns the number of 1-bits in 'x', between 0 and 32 inclusive. */
static inline unsigned int
count_1bits_32(uint32_t x)
{
#if __GNUC__ >= 4 && defined(__corei7)
/* __builtin_popcount() is fast only when supported by the CPU. */
return __builtin_popcount(x);
#else
/* This portable implementation is the fastest one we know of for 32 bits,
* and faster than GCC __builtin_popcount(). */
return (count_1bits_8[x & 0xff] +
count_1bits_8[(x >> 8) & 0xff] +
count_1bits_8[(x >> 16) & 0xff] +
count_1bits_8[x >> 24]);
#endif
}
/* Returns the number of 1-bits in 'x', between 0 and 64 inclusive. */
/* unsigned int count_1bits(uint64_t x):
*
* Returns the number of 1-bits in 'x', between 0 and 64 inclusive. */
#if UINTPTR_MAX == UINT64_MAX
static inline unsigned int
count_1bits(uint64_t x)
{
if (sizeof(void *) == 8) { /* 64-bit CPU */
#if __GNUC__ >= 4 && defined(__corei7)
/* __builtin_popcountll() is fast only when supported by the CPU. */
#if __GNUC__ >= 4 && __POPCNT__
return __builtin_popcountll(x);
#else
/* This portable implementation is the fastest one we know of for 64
@@ -410,10 +392,34 @@ count_1bits(uint64_t x)
x = (x + (x >> 4)) & h0F; /* Count of each 8 bits in-place. */
return (x * h01) >> 56; /* Sum of all bytes. */
#endif
} else { /* 32-bit CPU */
return count_1bits_32(x) + count_1bits_32(x >> 32);
}
}
#else /* Not 64-bit. */
#if __GNUC__ >= 4 && __POPCNT__
static inline unsigned int
count_1bits_32__(uint32_t x)
{
return __builtin_popcount(x);
}
#else
#define NEED_COUNT_1BITS_8 1
extern const uint8_t count_1bits_8[256];
static inline unsigned int
count_1bits_32__(uint32_t x)
{
/* This portable implementation is the fastest one we know of for 32 bits,
* and faster than GCC __builtin_popcount(). */
return (count_1bits_8[x & 0xff] +
count_1bits_8[(x >> 8) & 0xff] +
count_1bits_8[(x >> 16) & 0xff] +
count_1bits_8[x >> 24]);
}
#endif
static inline unsigned int
count_1bits(uint64_t x)
{
return count_1bits_32__(x) + count_1bits_32__(x >> 32);
}
#endif
/* Returns the rightmost 1-bit in 'x' (e.g. 01011000 => 00001000), or 0 if 'x'
* is 0. */