Commit 2fb75ea6 authored by Rick Hudson's avatar Rick Hudson

[dev.garbage] runtime: use sys.Ctz64 intrinsic

Our compilers now provides instrinsics including
sys.Ctz64 that support CTZ (count trailing zero)
instructions. This CL replaces the Go versions
of CTZ with the compiler intrinsic.

Count trailing zeros CTZ finds the least
significant 1 in a word and returns the number
of less significant 0s in the word.

Allocation uses the bitmap created by the garbage
collector to locate an unmarked object. The logic
takes a word of the bitmap, complements, and then
caches it. It then uses CTZ to locate an available
unmarked object. It then shifts marked bits out of
the bitmap word preparing it for the next search.
Once all the unmarked objects are used in the
cached work the bitmap gets another word and
repeats the process.

Change-Id: Id2fc42d1d4b9893efaa2e1bd01896985b7e42f82
Reviewed-on: https://go-review.googlesource.com/21366Reviewed-by: default avatarAustin Clements <austin@google.com>
parent 2063d5d9
...@@ -491,14 +491,13 @@ var zerobase uintptr ...@@ -491,14 +491,13 @@ var zerobase uintptr
// Otherwise it returns 0. // Otherwise it returns 0.
func (c *mcache) nextFreeFast(sizeclass int8) gclinkptr { func (c *mcache) nextFreeFast(sizeclass int8) gclinkptr {
s := c.alloc[sizeclass] s := c.alloc[sizeclass]
ctzIndex := uint8(s.allocCache)
if ctzIndex != 0 { theBit := sys.Ctz64(s.allocCache) // Is there a free object in the allocCache?
theBit := uint64(ctzVals[ctzIndex]) if theBit < 64 {
freeidx := s.freeindex // help the pre ssa compiler out here with cse. result := s.freeindex + uintptr(theBit)
result := freeidx + uintptr(theBit)
if result < s.nelems { if result < s.nelems {
s.allocCache >>= (theBit + 1) s.allocCache >>= (theBit + 1)
freeidx = result + 1 freeidx := result + 1
if freeidx%64 == 0 && freeidx != s.nelems { if freeidx%64 == 0 && freeidx != s.nelems {
// We just incremented s.freeindex so it isn't 0 // We just incremented s.freeindex so it isn't 0
// so we are moving to the next aCache. // so we are moving to the next aCache.
......
...@@ -190,62 +190,7 @@ func (s *mspan) allocBitsForIndex(allocBitIndex uintptr) markBits { ...@@ -190,62 +190,7 @@ func (s *mspan) allocBitsForIndex(allocBitIndex uintptr) markBits {
return markBits{bytePtr, uint8(1 << whichBit), allocBitIndex} return markBits{bytePtr, uint8(1 << whichBit), allocBitIndex}
} }
// ctzVals contains the count of trailing zeros for the // refillaCache takes 8 bytes s.allocBits starting at whichByte
// index. 0 returns 8 indicating 8 zeros.
var ctzVals = [256]int8{
8, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
7, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0}
// A temporary stand in for the count trailing zero ctz instruction.
// IA bsf works on 64 bit non-zero word.
func ctz64(markBits uint64) uint64 {
ctz8 := ctzVals[markBits&0xff]
if ctz8 != 8 {
return uint64(ctz8)
} else if markBits == 0 { // low byte is zero check fill word.
return 64 // bits in 64 bit word, ensures loop terminates
}
result := uint64(8)
markBits >>= 8
for ctz8 = ctzVals[markBits&0xff]; ctz8 == 8; ctz8 = ctzVals[markBits&0xff] {
result += 8
markBits >>= 8
}
result += uint64(ctz8)
return result
}
// refillAllocCache takes 8 bytes s.allocBits starting at whichByte
// and negates them so that ctz (count trailing zeros) instructions // and negates them so that ctz (count trailing zeros) instructions
// can be used. It then places these 8 bytes into the cached 64 bit // can be used. It then places these 8 bytes into the cached 64 bit
// s.allocCache. // s.allocCache.
...@@ -278,7 +223,8 @@ func (s *mspan) nextFreeIndex() uintptr { ...@@ -278,7 +223,8 @@ func (s *mspan) nextFreeIndex() uintptr {
} }
aCache := s.allocCache aCache := s.allocCache
bitIndex := ctz64(aCache)
bitIndex := sys.Ctz64(aCache)
for bitIndex == 64 { for bitIndex == 64 {
// Move index to start of next cached bits. // Move index to start of next cached bits.
sfreeindex = (sfreeindex + 64) &^ (64 - 1) sfreeindex = (sfreeindex + 64) &^ (64 - 1)
...@@ -290,8 +236,9 @@ func (s *mspan) nextFreeIndex() uintptr { ...@@ -290,8 +236,9 @@ func (s *mspan) nextFreeIndex() uintptr {
// Refill s.allocCache with the next 64 alloc bits. // Refill s.allocCache with the next 64 alloc bits.
s.refillAllocCache(whichByte) s.refillAllocCache(whichByte)
aCache = s.allocCache aCache = s.allocCache
bitIndex = ctz64(aCache) bitIndex = sys.Ctz64(aCache)
// Nothing was available try again now allocCache has been refilled. // nothing available in cached bits
// grab the next 8 bytes and try again.
} }
result := sfreeindex + uintptr(bitIndex) result := sfreeindex + uintptr(bitIndex)
if result >= snelems { if result >= snelems {
......
...@@ -145,7 +145,7 @@ type mspan struct { ...@@ -145,7 +145,7 @@ type mspan struct {
// Cache of the allocBits at freeindex. allocCache is shifted // Cache of the allocBits at freeindex. allocCache is shifted
// such that the lowest bit corresponds to the bit freeindex. // such that the lowest bit corresponds to the bit freeindex.
// allocCache holds the complement of allocBits, thus allowing // allocCache holds the complement of allocBits, thus allowing
// ctz64 (count trailing zero) to use it directly. // ctz (count trailing zero) to use it directly.
// allocCache may contain bits beyond s.nelems; the caller must ignore // allocCache may contain bits beyond s.nelems; the caller must ignore
// these. // these.
allocCache uint64 allocCache uint64
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment