mirror of
https://github.com/openvswitch/ovs
synced 2025-08-31 06:15:47 +00:00
util: Better count_1bits().
Inline, use another well-known algorithm for 64-bit builds, and use builtins when they are known to be fast at compile time. A 32-bit version of the alternate algorithm is slower than the existing implementation, so the old one is used for 32-bit builds. Inline assembler would be a bit faster on 32-bit i7 build, but we use the GCC builtin for portability. It should be stressed builds for specific CPUs do not work on others CPUs, and that OVS build system or runtime does not currently support CPU detection. Speed improvement v.s. existing implementation / GCC 4.7 __builtin_popcountll(): i386: 64% (inlining) / 380% i386 on i7: 240% (inlining + builtin) / 820% x86_64: 59% (inlining + different algorithm) / 190% x86_64 on i7: 370% (inlining + builtin) / 0% Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
This commit is contained in:
29
lib/util.c
29
lib/util.c
@@ -901,14 +901,7 @@ raw_clz64(uint64_t n)
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Returns the number of 1-bits in 'x', between 0 and 32 inclusive. */
|
||||
static unsigned int
|
||||
count_1bits_32(uint32_t x)
|
||||
{
|
||||
/* In my testing, this implementation is over twice as fast as any other
|
||||
* portable implementation that I tried, including GCC 4.4
|
||||
* __builtin_popcount(), although nonportable asm("popcnt") was over 50%
|
||||
* faster. */
|
||||
#if !(__GNUC__ >= 4 && defined(__corei7))
|
||||
#define INIT1(X) \
|
||||
((((X) & (1 << 0)) != 0) + \
|
||||
(((X) & (1 << 1)) != 0) + \
|
||||
@@ -925,22 +918,10 @@ count_1bits_32(uint32_t x)
|
||||
#define INIT32(X) INIT16(X), INIT16((X) + 16)
|
||||
#define INIT64(X) INIT32(X), INIT32((X) + 32)
|
||||
|
||||
static const uint8_t count_1bits_8[256] = {
|
||||
INIT64(0), INIT64(64), INIT64(128), INIT64(192)
|
||||
};
|
||||
|
||||
return (count_1bits_8[x & 0xff] +
|
||||
count_1bits_8[(x >> 8) & 0xff] +
|
||||
count_1bits_8[(x >> 16) & 0xff] +
|
||||
count_1bits_8[x >> 24]);
|
||||
}
|
||||
|
||||
/* Returns the number of 1-bits in 'x', between 0 and 64 inclusive. */
|
||||
unsigned int
|
||||
count_1bits(uint64_t x)
|
||||
{
|
||||
return count_1bits_32(x) + count_1bits_32(x >> 32);
|
||||
}
|
||||
const uint8_t count_1bits_8[256] = {
|
||||
INIT64(0), INIT64(64), INIT64(128), INIT64(192)
|
||||
};
|
||||
#endif
|
||||
|
||||
/* Returns true if the 'n' bytes starting at 'p' are zeros. */
|
||||
bool
|
||||
|
Reference in New Issue
Block a user