2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-31 06:15:47 +00:00

util: Better count_1bits().

Inline, use another well-known algorithm for 64-bit builds, and use
builtins when they are known to be fast at compile time.  A 32-bit
version of the alternate algorithm is slower than the existing
implementation, so the old one is used for 32-bit builds.  Inline
assembler would be a bit faster on 32-bit i7 build, but we use the GCC
builtin for portability.

It should be stressed builds for specific CPUs do not work on others
CPUs, and that OVS build system or runtime does not currently support
CPU detection.

Speed improvement v.s. existing implementation / GCC 4.7
__builtin_popcountll():

i386:         64%  (inlining)                         / 380%
i386 on i7:   240% (inlining + builtin)               / 820%
x86_64:       59%  (inlining + different algorithm)   / 190%
x86_64 on i7: 370% (inlining + builtin)               / 0%

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
This commit is contained in:
Jarno Rajahalme
2013-11-27 12:58:46 -08:00
parent 52105b6787
commit c3cc4d2dd2
2 changed files with 47 additions and 25 deletions

View File

@@ -901,14 +901,7 @@ raw_clz64(uint64_t n)
}
#endif
/* Returns the number of 1-bits in 'x', between 0 and 32 inclusive. */
static unsigned int
count_1bits_32(uint32_t x)
{
/* In my testing, this implementation is over twice as fast as any other
* portable implementation that I tried, including GCC 4.4
* __builtin_popcount(), although nonportable asm("popcnt") was over 50%
* faster. */
#if !(__GNUC__ >= 4 && defined(__corei7))
#define INIT1(X) \
((((X) & (1 << 0)) != 0) + \
(((X) & (1 << 1)) != 0) + \
@@ -925,22 +918,10 @@ count_1bits_32(uint32_t x)
#define INIT32(X) INIT16(X), INIT16((X) + 16)
#define INIT64(X) INIT32(X), INIT32((X) + 32)
static const uint8_t count_1bits_8[256] = {
INIT64(0), INIT64(64), INIT64(128), INIT64(192)
};
return (count_1bits_8[x & 0xff] +
count_1bits_8[(x >> 8) & 0xff] +
count_1bits_8[(x >> 16) & 0xff] +
count_1bits_8[x >> 24]);
}
/* Returns the number of 1-bits in 'x', between 0 and 64 inclusive. */
unsigned int
count_1bits(uint64_t x)
{
return count_1bits_32(x) + count_1bits_32(x >> 32);
}
const uint8_t count_1bits_8[256] = {
INIT64(0), INIT64(64), INIT64(128), INIT64(192)
};
#endif
/* Returns true if the 'n' bytes starting at 'p' are zeros. */
bool