Commit a12edb8d authored by Robert Griesemer's avatar Robert Griesemer

math/bits: faster OnesCount, added respective benchmarks

Also: Changed Reverse/ReverseBytes implementations to use
the same (smaller) masks as OnesCount.

BenchmarkOnesCount-8          37.0          6.26          -83.08%
BenchmarkOnesCount8-8         7.24          1.99          -72.51%
BenchmarkOnesCount16-8        11.3          2.47          -78.14%
BenchmarkOnesCount32-8        18.4          3.02          -83.59%
BenchmarkOnesCount64-8        40.0          3.78          -90.55%
BenchmarkReverse-8            6.69          6.22          -7.03%
BenchmarkReverse8-8           1.64          1.64          +0.00%
BenchmarkReverse16-8          2.26          2.18          -3.54%
BenchmarkReverse32-8          2.88          2.87          -0.35%
BenchmarkReverse64-8          5.64          4.34          -23.05%
BenchmarkReverseBytes-8       2.48          2.17          -12.50%
BenchmarkReverseBytes16-8     0.63          0.95          +50.79%
BenchmarkReverseBytes32-8     1.13          1.24          +9.73%
BenchmarkReverseBytes64-8     2.50          2.16          -13.60%

OnesCount-8       37.0ns ± 0%   6.3ns ± 0%   ~             (p=1.000 n=1+1)
OnesCount8-8      7.24ns ± 0%  1.99ns ± 0%   ~             (p=1.000 n=1+1)
OnesCount16-8     11.3ns ± 0%   2.5ns ± 0%   ~             (p=1.000 n=1+1)
OnesCount32-8     18.4ns ± 0%   3.0ns ± 0%   ~             (p=1.000 n=1+1)
OnesCount64-8     40.0ns ± 0%   3.8ns ± 0%   ~             (p=1.000 n=1+1)
Reverse-8         6.69ns ± 0%  6.22ns ± 0%   ~             (p=1.000 n=1+1)
Reverse8-8        1.64ns ± 0%  1.64ns ± 0%   ~     (all samples are equal)
Reverse16-8       2.26ns ± 0%  2.18ns ± 0%   ~             (p=1.000 n=1+1)
Reverse32-8       2.88ns ± 0%  2.87ns ± 0%   ~             (p=1.000 n=1+1)
Reverse64-8       5.64ns ± 0%  4.34ns ± 0%   ~             (p=1.000 n=1+1)
ReverseBytes-8    2.48ns ± 0%  2.17ns ± 0%   ~             (p=1.000 n=1+1)
ReverseBytes16-8  0.63ns ± 0%  0.95ns ± 0%   ~             (p=1.000 n=1+1)
ReverseBytes32-8  1.13ns ± 0%  1.24ns ± 0%   ~             (p=1.000 n=1+1)
ReverseBytes64-8  2.50ns ± 0%  2.16ns ± 0%   ~             (p=1.000 n=1+1)

Change-Id: I591b0ffc83fc3a42828256b6e5030f32c64f9497
Reviewed-on: https://go-review.googlesource.com/37218Reviewed-by: default avatarMatthew Dempsky <mdempsky@google.com>
parent 21c71d77
......@@ -45,20 +45,58 @@ func TrailingZeros64(x uint64) int { return ntz64(x) }
// --- OnesCount ---
const m0 = 0x5555555555555555 // 01010101 ...
const m1 = 0x3333333333333333 // 00110011 ...
const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ...
const m3 = 0x00ff00ff00ff00ff // etc.
const m4 = 0x0000ffff0000ffff
const m5 = 0x00000000ffffffff
// OnesCount returns the number of one bits ("population count") in x.
func OnesCount(x uint) int { return pop(uint64(x)) }
func OnesCount(x uint) int {
if UintSize == 32 {
return OnesCount32(uint32(x))
}
return OnesCount64(uint64(x))
}
// OnesCount8 returns the number of one bits ("population count") in x.
func OnesCount8(x uint8) int { return pop(uint64(x)) }
func OnesCount8(x uint8) int {
const m = 1<<8 - 1
x = x>>1&(m0&m) + x&(m0&m)
x = x>>2&(m1&m) + x&(m1&m)
return int(x>>4 + x&(m2&m))
}
// OnesCount16 returns the number of one bits ("population count") in x.
func OnesCount16(x uint16) int { return pop(uint64(x)) }
func OnesCount16(x uint16) int {
const m = 1<<16 - 1
x = x>>1&(m0&m) + x&(m0&m)
x = x>>2&(m1&m) + x&(m1&m)
x = x>>4&(m2&m) + x&(m2&m)
return int(x>>8 + x&(m3&m))
}
// OnesCount32 returns the number of one bits ("population count") in x.
func OnesCount32(x uint32) int { return pop(uint64(x)) }
func OnesCount32(x uint32) int {
const m = 1<<32 - 1
x = x>>1&(m0&m) + x&(m0&m)
x = x>>2&(m1&m) + x&(m1&m)
x = x>>4&(m2&m) + x&(m2&m)
x = x>>8&(m3&m) + x&(m3&m)
return int(x>>16 + x&(m4&m))
}
// OnesCount64 returns the number of one bits ("population count") in x.
func OnesCount64(x uint64) int { return pop(uint64(x)) }
func OnesCount64(x uint64) int {
const m = 1<<64 - 1
x = x>>1&(m0&m) + x&(m0&m)
x = x>>2&(m1&m) + x&(m1&m)
x = x>>4&(m2&m) + x&(m2&m)
x = x>>8&(m3&m) + x&(m3&m)
x = x>>16&(m4&m) + x&(m4&m)
return int(x>>32 + x&(m5&m))
}
// --- RotateLeft ---
......@@ -96,12 +134,6 @@ func RotateRight64(x uint64, k int) uint64 { return uint64(rot(uint64(x), 64, 64
// --- Reverse ---
const m0 = 0xaaaaaaaaaaaaaaaa // 10101010 ...
const m1 = 0xcccccccccccccccc // 11001100 ...
const m2 = 0xf0f0f0f0f0f0f0f0 // 11110000 ...
const m3 = 0xff00ff00ff00ff00 // etc.
const m4 = 0xffff0000ffff0000
// Reverse returns the value of x with its bits in reversed order.
func Reverse(x uint) uint {
if UintSize == 32 {
......@@ -113,38 +145,38 @@ func Reverse(x uint) uint {
// Reverse8 returns the value of x with its bits in reversed order.
func Reverse8(x uint8) uint8 {
const m = 1<<8 - 1
x = x&(m0&m)>>1 | x<<1&(m0&m)
x = x&(m1&m)>>2 | x<<2&(m1&m)
x = x>>1&(m0&m) | x&(m0&m)<<1
x = x>>2&(m1&m) | x&(m1&m)<<2
return x>>4 | x<<4
}
// Reverse16 returns the value of x with its bits in reversed order.
func Reverse16(x uint16) uint16 {
const m = 1<<16 - 1
x = x&(m0&m)>>1 | x<<1&(m0&m)
x = x&(m1&m)>>2 | x<<2&(m1&m)
x = x&(m2&m)>>4 | x<<4&(m2&m)
x = x>>1&(m0&m) | x&(m0&m)<<1
x = x>>2&(m1&m) | x&(m1&m)<<2
x = x>>4&(m2&m) | x&(m2&m)<<4
return x>>8 | x<<8
}
// Reverse32 returns the value of x with its bits in reversed order.
func Reverse32(x uint32) uint32 {
const m = 1<<32 - 1
x = x&(m0&m)>>1 | x<<1&(m0&m)
x = x&(m1&m)>>2 | x<<2&(m1&m)
x = x&(m2&m)>>4 | x<<4&(m2&m)
x = x&(m3&m)>>8 | x<<8&(m3&m)
x = x>>1&(m0&m) | x&(m0&m)<<1
x = x>>2&(m1&m) | x&(m1&m)<<2
x = x>>4&(m2&m) | x&(m2&m)<<4
x = x>>8&(m3&m) | x&(m3&m)<<8
return x>>16 | x<<16
}
// Reverse64 returns the value of x with its bits in reversed order.
func Reverse64(x uint64) uint64 {
const m = 1<<64 - 1
x = x&(m0&m)>>1 | x<<1&(m0&m)
x = x&(m1&m)>>2 | x<<2&(m1&m)
x = x&(m2&m)>>4 | x<<4&(m2&m)
x = x&(m3&m)>>8 | x<<8&(m3&m)
x = x&(m4&m)>>16 | x<<16&(m4&m)
x = x>>1&(m0&m) | x&(m0&m)<<1
x = x>>2&(m1&m) | x&(m1&m)<<2
x = x>>4&(m2&m) | x&(m2&m)<<4
x = x>>8&(m3&m) | x&(m3&m)<<8
x = x>>16&(m4&m) | x&(m4&m)<<16
return x>>32 | x<<32
}
......@@ -166,15 +198,15 @@ func ReverseBytes16(x uint16) uint16 {
// ReverseBytes32 returns the value of x with its bytes in reversed order.
func ReverseBytes32(x uint32) uint32 {
const m = 1<<32 - 1
x = x&(m3&m)>>8 | x<<8&(m3&m)
x = x>>8&(m3&m) | x&(m3&m)<<8
return x>>16 | x<<16
}
// ReverseBytes64 returns the value of x with its bytes in reversed order.
func ReverseBytes64(x uint64) uint64 {
const m = 1<<64 - 1
x = x&(m3&m)>>8 | x<<8&(m3&m)
x = x&(m4&m)>>16 | x<<16&(m4&m)
x = x>>8&(m3&m) | x&(m3&m)<<8
x = x>>16&(m4&m) | x&(m4&m)<<16
return x>>32 | x<<32
}
......
......@@ -74,14 +74,6 @@ func ntz64(x uint64) int {
return int(deBruijn64tab[(x&-x)*deBruijn64>>(64-6)])
}
func pop(x uint64) (n int) {
for x != 0 {
n++
x &= x - 1
}
return
}
func pos(k int) uint {
if k < 0 {
panic("negative rotation count")
......
......@@ -189,6 +189,56 @@ func TestOnesCount(t *testing.T) {
}
}
// Exported (global) variable to store function results
// during benchmarking to ensure side-effect free calls
// are not optimized away.
var Unused int
// Exported (global) variable serving as input for some
// of the benchmarks to ensure side-effect free calls
// are not optimized away.
var Input uint64 = deBruijn64
func BenchmarkOnesCount(b *testing.B) {
var s int
for i := 0; i < b.N; i++ {
s += OnesCount(uint(Input))
}
Unused = s
}
func BenchmarkOnesCount8(b *testing.B) {
var s int
for i := 0; i < b.N; i++ {
s += OnesCount8(uint8(Input))
}
Unused = s
}
func BenchmarkOnesCount16(b *testing.B) {
var s int
for i := 0; i < b.N; i++ {
s += OnesCount16(uint16(Input))
}
Unused = s
}
func BenchmarkOnesCount32(b *testing.B) {
var s int
for i := 0; i < b.N; i++ {
s += OnesCount32(uint32(Input))
}
Unused = s
}
func BenchmarkOnesCount64(b *testing.B) {
var s int
for i := 0; i < b.N; i++ {
s += OnesCount64(uint64(Input))
}
Unused = s
}
func TestRotateLeft(t *testing.T) {
var m uint64 = deBruijn64
......@@ -367,17 +417,12 @@ func testReverse(t *testing.T, x64, want64 uint64) {
}
}
// Exported (global) variable to store function results
// during benchmarking, to ensure side-effect free calls
// are not optimized away.
var Unused uint64
func BenchmarkReverse(b *testing.B) {
var s uint
for i := 0; i < b.N; i++ {
s += Reverse(uint(i))
}
Unused = uint64(s)
Unused = int(s)
}
func BenchmarkReverse8(b *testing.B) {
......@@ -385,7 +430,7 @@ func BenchmarkReverse8(b *testing.B) {
for i := 0; i < b.N; i++ {
s += Reverse8(uint8(i))
}
Unused = uint64(s)
Unused = int(s)
}
func BenchmarkReverse16(b *testing.B) {
......@@ -393,7 +438,7 @@ func BenchmarkReverse16(b *testing.B) {
for i := 0; i < b.N; i++ {
s += Reverse16(uint16(i))
}
Unused = uint64(s)
Unused = int(s)
}
func BenchmarkReverse32(b *testing.B) {
......@@ -401,7 +446,7 @@ func BenchmarkReverse32(b *testing.B) {
for i := 0; i < b.N; i++ {
s += Reverse32(uint32(i))
}
Unused = uint64(s)
Unused = int(s)
}
func BenchmarkReverse64(b *testing.B) {
......@@ -409,7 +454,7 @@ func BenchmarkReverse64(b *testing.B) {
for i := 0; i < b.N; i++ {
s += Reverse64(uint64(i))
}
Unused = s
Unused = int(s)
}
func TestReverseBytes(t *testing.T) {
......@@ -473,7 +518,7 @@ func BenchmarkReverseBytes(b *testing.B) {
for i := 0; i < b.N; i++ {
s += ReverseBytes(uint(i))
}
Unused = uint64(s)
Unused = int(s)
}
func BenchmarkReverseBytes16(b *testing.B) {
......@@ -481,7 +526,7 @@ func BenchmarkReverseBytes16(b *testing.B) {
for i := 0; i < b.N; i++ {
s += ReverseBytes16(uint16(i))
}
Unused = uint64(s)
Unused = int(s)
}
func BenchmarkReverseBytes32(b *testing.B) {
......@@ -489,7 +534,7 @@ func BenchmarkReverseBytes32(b *testing.B) {
for i := 0; i < b.N; i++ {
s += ReverseBytes32(uint32(i))
}
Unused = uint64(s)
Unused = int(s)
}
func BenchmarkReverseBytes64(b *testing.B) {
......@@ -497,7 +542,7 @@ func BenchmarkReverseBytes64(b *testing.B) {
for i := 0; i < b.N; i++ {
s += ReverseBytes64(uint64(i))
}
Unused = s
Unused = int(s)
}
func TestLen(t *testing.T) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment