Commit d7a659b1 authored by Robert Griesemer's avatar Robert Griesemer

math/bits: faster OnesCount using table lookups for sizes 8,16,32

For uint64, the existing algorithm is faster.

benchmark                  old ns/op     new ns/op     delta
BenchmarkOnesCount8-8      1.95          0.97          -50.26%
BenchmarkOnesCount16-8     2.54          1.39          -45.28%
BenchmarkOnesCount32-8     2.61          1.96          -24.90%

Measured on 2.3 GHz Intel Core i7 running macOS 10.12.3.

Change-Id: I6cc42882fef3d24694720464039161e339a9ae99
Reviewed-on: https://go-review.googlesource.com/37580Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
parent 064e44f2
...@@ -63,32 +63,17 @@ func OnesCount(x uint) int { ...@@ -63,32 +63,17 @@ func OnesCount(x uint) int {
// OnesCount8 returns the number of one bits ("population count") in x. // OnesCount8 returns the number of one bits ("population count") in x.
func OnesCount8(x uint8) int { func OnesCount8(x uint8) int {
const m = 1<<8 - 1 return int(pop8tab[x])
x = x>>1&(m0&m) + x&(m0&m)
x = x>>2&(m1&m) + x&(m1&m)
x += x >> 4
return int(x) & (1<<4 - 1)
} }
// OnesCount16 returns the number of one bits ("population count") in x. // OnesCount16 returns the number of one bits ("population count") in x.
func OnesCount16(x uint16) int { func OnesCount16(x uint16) int {
const m = 1<<16 - 1 return int(pop8tab[x>>8] + pop8tab[x&0xff])
x = x>>1&(m0&m) + x&(m0&m)
x = x>>2&(m1&m) + x&(m1&m)
x = (x>>4 + x) & (m2 & m)
x += x >> 8
return int(x) & (1<<5 - 1)
} }
// OnesCount32 returns the number of one bits ("population count") in x. // OnesCount32 returns the number of one bits ("population count") in x.
func OnesCount32(x uint32) int { func OnesCount32(x uint32) int {
const m = 1<<32 - 1 return int(pop8tab[x>>24] + pop8tab[x>>16&0xff] + pop8tab[x>>8&0xff] + pop8tab[x&0xff])
x = x>>1&(m0&m) + x&(m0&m)
x = x>>2&(m1&m) + x&(m1&m)
x = (x>>4 + x) & (m2 & m)
x += x >> 8
x += x >> 16
return int(x) & (1<<6 - 1)
} }
// OnesCount64 returns the number of one bits ("population count") in x. // OnesCount64 returns the number of one bits ("population count") in x.
......
...@@ -6,6 +6,25 @@ ...@@ -6,6 +6,25 @@
package bits package bits
var pop8tab = [256]uint8{
0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04,
0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07,
0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07,
0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07,
0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07,
0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08,
}
var rev8tab = [256]uint8{ var rev8tab = [256]uint8{
0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0, 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8, 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
......
...@@ -30,6 +30,7 @@ package bits ...@@ -30,6 +30,7 @@ package bits
func main() { func main() {
buf := bytes.NewBuffer(header) buf := bytes.NewBuffer(header)
gen(buf, "pop8tab", pop8)
gen(buf, "rev8tab", rev8) gen(buf, "rev8tab", rev8)
// add more tables as needed // add more tables as needed
...@@ -57,6 +58,14 @@ func gen(w io.Writer, name string, f func(uint8) uint8) { ...@@ -57,6 +58,14 @@ func gen(w io.Writer, name string, f func(uint8) uint8) {
fmt.Fprint(w, "\n}\n\n") fmt.Fprint(w, "\n}\n\n")
} }
func pop8(x uint8) (n uint8) {
for x != 0 {
x &= x - 1
n++
}
return
}
func rev8(x uint8) (r uint8) { func rev8(x uint8) (r uint8) {
for i := 8; i > 0; i-- { for i := 8; i > 0; i-- {
r = r<<1 | x&1 r = r<<1 | x&1
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment