Commit 122384e4 authored by Rick Hudson's avatar Rick Hudson

runtime: Remove boundary bit logic.

This is an experiment to see if removing the boundary bit logic will
lead to fewer cache misses and improved performance. Instead of using
boundary bits we use the span information to get element size and use
some bit whacking to get the boundary without having to touch the
random heap bits which cause cache misses.

Furthermore once the boundary bit is removed we can either use that
bit for a simpler checkmark routine or we can reduce the number of
bits in the GC bitmap to 2 bits per pointer sized work. For example
the 2 bits at the boundary can be used for marking and pointer/scalar
differentiation. Since we don't need the mark bit except at the
boundary nibble of the object other nibbles can use this bit
as a noscan bit to indicate that there are no more pointers in
the object.

Currently the changed included in this CL slows down the garbage
benchmark. With the boundary bits garbage gives 5.78 and without
(this CL) it gives 5.88 which is a 2% slowdown.

Change-Id: Id68f831ad668176f7dc9f7b57b339e4ebb6dc4c2
Reviewed-on: https://go-review.googlesource.com/6665Reviewed-by: default avatarAustin Clements <austin@google.com>
parent 7be32d03
......@@ -32,7 +32,7 @@
// describe p and the high 4 bits describe p+ptrSize.
//
// The 4 bits for each word are:
// 0001 - bitBoundary: this is the start of an object
// 0001 - not used
// 0010 - bitMarked: this object has been marked by GC
// tt00 - word type bits, as in a type bitmap.
//
......@@ -77,7 +77,6 @@ const (
heapBitsWidth = 4
heapBitmapScale = ptrSize * (8 / heapBitsWidth) // number of data bytes per heap bitmap byte
bitBoundary = 1
bitMarked = 2
typeShift = 2
)
......@@ -151,30 +150,21 @@ func heapBitsForSpan(base uintptr) (hbits heapBits) {
// heapBitsForObject returns the base address for the heap object
// containing the address p, along with the heapBits for base.
// If p does not point into a heap object, heapBitsForObject returns base == 0.
func heapBitsForObject(p uintptr) (base uintptr, hbits heapBits) {
// If p does not point into a heap object,
// return base == 0
// otherwise return the base of the object.
func heapBitsForObject(p uintptr) (base uintptr, hbits heapBits, s *mspan) {
if p < mheap_.arena_start || p >= mheap_.arena_used {
return
}
// If heap bits for the pointer-sized word containing p have bitBoundary set,
// then we know this is the base of the object, and we can stop now.
// This handles the case where p is the base and, due to rounding
// when looking up the heap bits, also the case where p points beyond
// the base but still into the first pointer-sized word of the object.
hbits = heapBitsForAddr(p)
if hbits.isBoundary() {
base = p &^ (ptrSize - 1)
return
}
// Otherwise, p points into the middle of an object.
// p points into the heap, but possibly to the middle of an object.
// Consult the span table to find the block beginning.
// TODO(rsc): Factor this out.
k := p >> _PageShift
x := k
x -= mheap_.arena_start >> _PageShift
s := h_spans[x]
s = h_spans[x]
if s == nil || pageID(k) < s.start || p >= s.limit || s.state != mSpanInUse {
if s == nil || s.state == _MSpanStack {
// If s is nil, the virtual address has never been part of the heap.
......@@ -216,19 +206,16 @@ func heapBitsForObject(p uintptr) (base uintptr, hbits heapBits) {
base += n * s.elemsize
}
if base == p {
print("runtime: failed to find block beginning for ", hex(p), " s=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), "\n")
throw("failed to find block beginning")
}
// Now that we know the actual base, compute heapBits to return to caller.
hbits = heapBitsForAddr(base)
if !hbits.isBoundary() {
throw("missing boundary at computed object start")
}
return
}
// prefetch the bits.
func (h heapBits) prefetch() {
prefetchnta(uintptr(unsafe.Pointer((h.bitp))))
}
// next returns the heapBits describing the next pointer-sized word in memory.
// That is, if h describes address p, h.next() describes p+ptrSize.
// Note that next does not modify h. The caller must record the result.
......@@ -258,14 +245,6 @@ func (h heapBits) setMarkedNonAtomic() {
*h.bitp |= bitMarked << h.shift
}
// isBoundary reports whether the heap bits have the boundary bit set.
func (h heapBits) isBoundary() bool {
return *h.bitp&(bitBoundary<<h.shift) != 0
}
// Note that there is no setBoundary or setBoundaryNonAtomic.
// Boundaries are always in bulk, for the entire span.
// typeBits returns the heap bits' type bits.
func (h heapBits) typeBits() uint8 {
return (*h.bitp >> (h.shift + typeShift)) & typeMask
......@@ -299,60 +278,8 @@ func (h heapBits) setCheckmarked() {
// initSpan initializes the heap bitmap for a span.
func (h heapBits) initSpan(size, n, total uintptr) {
if size == ptrSize {
// Only possible on 64-bit system, since minimum size is 8.
// Set all nibbles to bitBoundary using uint64 writes.
nbyte := n * ptrSize / heapBitmapScale
nuint64 := nbyte / 8
bitp := subtractb(h.bitp, nbyte-1)
for i := uintptr(0); i < nuint64; i++ {
const boundary64 = bitBoundary |
bitBoundary<<4 |
bitBoundary<<8 |
bitBoundary<<12 |
bitBoundary<<16 |
bitBoundary<<20 |
bitBoundary<<24 |
bitBoundary<<28 |
bitBoundary<<32 |
bitBoundary<<36 |
bitBoundary<<40 |
bitBoundary<<44 |
bitBoundary<<48 |
bitBoundary<<52 |
bitBoundary<<56 |
bitBoundary<<60
*(*uint64)(unsafe.Pointer(bitp)) = boundary64
bitp = addb(bitp, 8)
}
return
}
if size*n < total {
// To detect end of object during GC object scan,
// add boundary just past end of last block.
// The object scan knows to stop when it reaches
// the end of the span, but in this case the object
// ends before the end of the span.
//
// TODO(rsc): If the bitmap bits were going to be typeDead
// otherwise, what's the point of this?
// Can we delete this logic?
n++
}
step := size / heapBitmapScale
bitp := h.bitp
for i := uintptr(0); i < n; i++ {
*bitp = bitBoundary
bitp = subtractb(bitp, step)
}
}
// clearSpan clears the heap bitmap bytes for the span.
func (h heapBits) clearSpan(size, n, total uintptr) {
if total%heapBitmapScale != 0 {
throw("clearSpan: unaligned length")
throw("initSpan: unaligned length")
}
nbyte := total / heapBitmapScale
memclr(unsafe.Pointer(subtractb(h.bitp, nbyte-1)), nbyte)
......@@ -371,9 +298,7 @@ func (h heapBits) initCheckmarkSpan(size, n, total uintptr) {
bitp := h.bitp
for i := uintptr(0); i < n; i += 2 {
x := int(*bitp)
if x&0x11 != 0x11 {
throw("missing bitBoundary")
}
if (x>>typeShift)&typeMask == typeDead {
x += (typeScalar - typeDead) << typeShift
}
......@@ -392,9 +317,6 @@ func (h heapBits) initCheckmarkSpan(size, n, total uintptr) {
bitp := h.bitp
step := size / heapBitmapScale
for i := uintptr(0); i < n; i++ {
if *bitp&bitBoundary == 0 {
throw("missing bitBoundary")
}
x := *bitp
if (x>>typeShift)&typeMask == typeDead {
x += (typeScalar - typeDead) << typeShift
......@@ -416,10 +338,6 @@ func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
bitp := h.bitp
for i := uintptr(0); i < n; i += 2 {
x := int(*bitp)
if x&(bitBoundary|bitBoundary<<4) != (bitBoundary | bitBoundary<<4) {
throw("missing bitBoundary")
}
switch typ := (x >> typeShift) & typeMask; typ {
case typeScalar:
x += (typeDead - typeScalar) << typeShift
......@@ -448,10 +366,6 @@ func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
step := size / heapBitmapScale
for i := uintptr(0); i < n; i++ {
x := int(*bitp)
if x&bitBoundary == 0 {
throw("missing bitBoundary")
}
switch typ := (x >> typeShift) & typeMask; {
case typ == typeScalarCheckmarked && (x>>(4+typeShift))&typeMask != typeDead:
x += (typeScalar - typeScalarCheckmarked) << typeShift
......@@ -503,7 +417,7 @@ func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) {
if x&bitMarked != 0 {
x &^= bitMarked
} else {
x = bitBoundary // clear marked bit, set type bits to typeDead
x = 0
f(base + i*size)
}
*bitp = uint8(x)
......@@ -522,10 +436,6 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
// From here till marked label marking the object as allocated
// and storing type info in the GC bitmap.
h := heapBitsForAddr(x)
if debugMalloc && (*h.bitp>>h.shift)&0x0f != bitBoundary {
println("runtime: bits =", (*h.bitp>>h.shift)&0x0f)
throw("bad bits in markallocated")
}
var ti, te uintptr
var ptrmask *uint8
......@@ -572,7 +482,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask
}
if size == 2*ptrSize {
*h.bitp = *ptrmask | bitBoundary
// h.shift is 0 for all sizes > ptrSize.
*h.bitp = *ptrmask
return
}
te = uintptr(typ.size) / ptrSize
......@@ -581,15 +492,17 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
te /= 2
}
// Copy pointer bitmask into the bitmap.
// TODO(rlh): add comment addressing the following concerns:
// If size > 2*ptrSize, is x guaranteed to be at least 2*ptrSize-aligned?
// And if type occupies and odd number of words, why are we only going through half
// of ptrmask and why don't we have to shift everything by 4 on odd iterations?
for i := uintptr(0); i < dataSize; i += 2 * ptrSize {
v := *(*uint8)(add(unsafe.Pointer(ptrmask), ti))
ti++
if ti == te {
ti = 0
}
if i == 0 {
v |= bitBoundary
}
if i+ptrSize == dataSize {
v &^= typeMask << (4 + typeShift)
}
......@@ -783,12 +696,6 @@ func unrollgcproginplace_m(v unsafe.Pointer, typ *_type, size, size0 uintptr) {
// Mark first word as bitAllocated.
// Mark word after last as typeDead.
// TODO(rsc): Explain why we need to set this boundary.
// Aren't the boundaries always set for the whole span?
// Did unrollgcproc1 overwrite the boundary bit?
// Is that okay?
h := heapBitsForAddr(uintptr(v))
*h.bitp |= bitBoundary << h.shift
if size0 < size {
h := heapBitsForAddr(uintptr(v) + size0)
*h.bitp &^= typeMask << typeShift
......
......@@ -173,7 +173,7 @@ func mCentral_FreeSpan(c *mcentral, s *mspan, n int32, start gclinkptr, end gcli
s.needzero = 1
s.freelist = 0
unlock(&c.lock)
heapBitsForSpan(s.base()).clearSpan(s.layout())
heapBitsForSpan(s.base()).initSpan(s.layout())
mHeap_Free(&mheap_, s, 0)
return true
}
......
......@@ -404,7 +404,7 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWorkProducer) {
}
}
// Scan the object b of size n, adding pointers to wbuf.
// Scan the object b of size n bytes, adding pointers to wbuf.
// Return possibly new wbuf to use.
// If ptrmask != nil, it specifies where pointers are in b.
// If ptrmask == nil, the GC bitmap should be consulted.
......@@ -417,13 +417,16 @@ func scanobject(b, n uintptr, ptrmask *uint8, gcw *gcWorkProducer) {
// Find bits of the beginning of the object.
var hbits heapBits
if ptrmask == nil {
b, hbits = heapBitsForObject(b)
var s *mspan
b, hbits, s = heapBitsForObject(b)
if b == 0 {
return
}
n = s.elemsize
if n == 0 {
n = mheap_.arena_used - b
throw("scanobject n == 0")
}
}
for i := uintptr(0); i < n; i += ptrSize {
......@@ -433,15 +436,9 @@ func scanobject(b, n uintptr, ptrmask *uint8, gcw *gcWorkProducer) {
// dense mask (stack or data)
bits = (uintptr(*(*byte)(add(unsafe.Pointer(ptrmask), (i/ptrSize)/4))) >> (((i / ptrSize) % 4) * typeBitsWidth)) & typeMask
} else {
// Check if we have reached end of span.
// n is an overestimate of the size of the object.
if (b+i)%_PageSize == 0 && h_spans[(b-arena_start)>>_PageShift] != h_spans[(b+i-arena_start)>>_PageShift] {
break
}
bits = uintptr(hbits.typeBits())
if i > 0 && (hbits.isBoundary() || bits == typeDead) {
break // reached beginning of the next object
if bits == typeDead {
break // no more pointers in this object
}
hbits = hbits.next()
}
......@@ -468,7 +465,7 @@ func scanobject(b, n uintptr, ptrmask *uint8, gcw *gcWorkProducer) {
}
// Mark the object.
if obj, hbits := heapBitsForObject(obj); obj != 0 {
if obj, hbits, _ := heapBitsForObject(obj); obj != 0 {
greyobject(obj, b, i, hbits, gcw)
}
}
......@@ -481,7 +478,7 @@ func shade(b uintptr) {
if !inheap(b) {
throw("shade: passed an address not in the heap")
}
if obj, hbits := heapBitsForObject(b); obj != 0 {
if obj, hbits, _ := heapBitsForObject(b); obj != 0 {
// TODO: this would be a great place to put a check to see
// if we are harvesting and if we are then we should
// figure out why there is a call to shade when the
......
......@@ -218,7 +218,7 @@ func mSpan_Sweep(s *mspan, preserve bool) bool {
if preserve {
throw("can't preserve large span")
}
heapBitsForSpan(p).clearSpan(s.layout())
heapBitsForSpan(p).initSpan(s.layout())
s.needzero = 1
// important to set sweepgen before returning it to heap
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment