Commit 0234dfd4 authored by Russ Cox's avatar Russ Cox

runtime: use 2-bit heap bitmap (in place of 4-bit)

Previous CLs changed the representation of the non-heap type bitmaps
to be 1-bit bitmaps (pointer or not). Before this CL, the heap bitmap
stored a 2-bit type for each word and a mark bit and checkmark bit
for the first word of the object. (There used to be additional per-word bits.)

Reduce heap bitmap to 2-bit, with 1 dedicated to pointer or not,
and the other used for mark, checkmark, and "keep scanning forward
to find pointers in this object." See comments for details.

This CL replaces heapBitsSetType with very slow but obviously correct code.
A followup CL will optimize it. (Spoiler: the new code is faster than Go 1.4 was.)

Change-Id: I999577a133f3cfecacebdec9cdc3573c235c7fb9
Reviewed-on: https://go-review.googlesource.com/9703Reviewed-by: default avatarRick Hudson <rlh@golang.org>
Reviewed-by: default avatarAustin Clements <austin@google.com>
parent 6d8a147b
...@@ -10,6 +10,12 @@ import ( ...@@ -10,6 +10,12 @@ import (
"testing" "testing"
) )
const (
typeScalar = 0
typePointer = 1
typeDead = 255
)
// TestGCInfo tests that various objects in heap, data and bss receive correct GC pointer type info. // TestGCInfo tests that various objects in heap, data and bss receive correct GC pointer type info.
func TestGCInfo(t *testing.T) { func TestGCInfo(t *testing.T) {
verifyGCInfo(t, "bss ScalarPtr", &bssScalarPtr, infoScalarPtr) verifyGCInfo(t, "bss ScalarPtr", &bssScalarPtr, infoScalarPtr)
...@@ -37,7 +43,9 @@ func TestGCInfo(t *testing.T) { ...@@ -37,7 +43,9 @@ func TestGCInfo(t *testing.T) {
verifyGCInfo(t, "stack iface", new(Iface), nonStackInfo(infoIface)) verifyGCInfo(t, "stack iface", new(Iface), nonStackInfo(infoIface))
for i := 0; i < 10; i++ { for i := 0; i < 10; i++ {
verifyGCInfo(t, "heap PtrSlice", escape(&make([]*byte, 10)[0]), infoPtr10)
verifyGCInfo(t, "heap ScalarPtr", escape(new(ScalarPtr)), infoScalarPtr) verifyGCInfo(t, "heap ScalarPtr", escape(new(ScalarPtr)), infoScalarPtr)
verifyGCInfo(t, "heap ScalarPtrSlice", escape(&make([]ScalarPtr, 4)[0]), infoScalarPtr4)
verifyGCInfo(t, "heap PtrScalar", escape(new(PtrScalar)), infoPtrScalar) verifyGCInfo(t, "heap PtrScalar", escape(new(PtrScalar)), infoPtrScalar)
verifyGCInfo(t, "heap BigStruct", escape(new(BigStruct)), infoBigStruct()) verifyGCInfo(t, "heap BigStruct", escape(new(BigStruct)), infoBigStruct())
verifyGCInfo(t, "heap string", escape(new(string)), infoString) verifyGCInfo(t, "heap string", escape(new(string)), infoString)
...@@ -78,18 +86,7 @@ func escape(p interface{}) interface{} { ...@@ -78,18 +86,7 @@ func escape(p interface{}) interface{} {
return p return p
} }
const ( var infoPtr10 = []byte{typePointer, typePointer, typePointer, typePointer, typePointer, typePointer, typePointer, typePointer, typePointer, typePointer}
typeDead = iota
typeScalar
typePointer
)
const (
BitsString = iota // unused
BitsSlice // unused
BitsIface
BitsEface
)
type ScalarPtr struct { type ScalarPtr struct {
q int q int
...@@ -102,6 +99,8 @@ type ScalarPtr struct { ...@@ -102,6 +99,8 @@ type ScalarPtr struct {
var infoScalarPtr = []byte{typeScalar, typePointer, typeScalar, typePointer, typeScalar, typePointer} var infoScalarPtr = []byte{typeScalar, typePointer, typeScalar, typePointer, typeScalar, typePointer}
var infoScalarPtr4 = append(append(append(append([]byte(nil), infoScalarPtr...), infoScalarPtr...), infoScalarPtr...), infoScalarPtr...)
type PtrScalar struct { type PtrScalar struct {
q *int q *int
w int w int
......
...@@ -730,14 +730,13 @@ func makeheapobjbv(p uintptr, size uintptr) bitvector { ...@@ -730,14 +730,13 @@ func makeheapobjbv(p uintptr, size uintptr) bitvector {
i := uintptr(0) i := uintptr(0)
hbits := heapBitsForAddr(p) hbits := heapBitsForAddr(p)
for ; i < nptr; i++ { for ; i < nptr; i++ {
bits := hbits.typeBits() if i >= 2 && !hbits.isMarked() {
if bits == typeDead {
break // end of object break // end of object
} }
hbits = hbits.next() if hbits.isPointer() {
if bits == typePointer {
tmpbuf[i/8] |= 1 << (i % 8) tmpbuf[i/8] |= 1 << (i % 8)
} }
hbits = hbits.next()
} }
return bitvector{int32(i), &tmpbuf[0]} return bitvector{int32(i), &tmpbuf[0]}
} }
...@@ -6,48 +6,36 @@ ...@@ -6,48 +6,36 @@
// //
// Stack, data, and bss bitmaps // Stack, data, and bss bitmaps
// //
// Not handled in this file, but worth mentioning: stack frames and global data // Stack frames and global variables in the data and bss sections are described
// in the data and bss sections are described by 1-bit bitmaps in which 0 means // by 1-bit bitmaps in which 0 means uninteresting and 1 means live pointer
// scalar or uninitialized or dead and 1 means pointer to visit during GC. // to be visited during GC.
//
// Comparing this 1-bit form with the 2-bit form described below, 0 represents
// both the 2-bit 00 and 01, while 1 represents the 2-bit 10.
// Therefore conversions between the two (until the 2-bit form is gone)
// can be done by x>>1 for 2-bit to 1-bit and x+1 for 1-bit to 2-bit.
//
// Type bitmaps
//
// Types that aren't too large
// record information about the layout of their memory words using a type bitmap.
// The bitmap holds two bits for each pointer-sized word. The two-bit values are:
//
// 00 - typeDead: not a pointer, and no pointers in the rest of the object
// 01 - typeScalar: not a pointer
// 10 - typePointer: a pointer that GC should trace
// 11 - unused
//
// typeDead only appears in type bitmaps in Go type descriptors
// and in type bitmaps embedded in the heap bitmap (see below).
// //
// Heap bitmap // Heap bitmap
// //
// The allocated heap comes from a subset of the memory in the range [start, used), // The allocated heap comes from a subset of the memory in the range [start, used),
// where start == mheap_.arena_start and used == mheap_.arena_used. // where start == mheap_.arena_start and used == mheap_.arena_used.
// The heap bitmap comprises 4 bits for each pointer-sized word in that range, // The heap bitmap comprises 2 bits for each pointer-sized word in that range,
// stored in bytes indexed backward in memory from start. // stored in bytes indexed backward in memory from start.
// That is, the byte at address start-1 holds the 4-bit entries for the two words // That is, the byte at address start-1 holds the 2-bit entries for the four words
// start, start+ptrSize, the byte at start-2 holds the entries for start+2*ptrSize, // start through start+3*ptrSize, the byte at start-2 holds the entries for
// start+3*ptrSize, and so on. // start+4*ptrSize through start+7*ptrSize, and so on.
// In the byte holding the entries for addresses p and p+ptrSize, the low 4 bits // In each byte, the low 2 bits describe the first word, the next 2 bits describe
// describe p and the high 4 bits describe p+ptrSize. // the next word, and so on.
// //
// The 4 bits for each word are: // In each 2-bit entry, the lower bit holds the same information as in the 1-bit
// 0001 - not used // bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC.
// 0010 - bitMarked: this object has been marked by GC // The meaning of the high bit depends on the position of the word being described
// tt00 - word type bits, as in a type bitmap. // in its allocated object. In the first word, the high bit is the GC ``marked'' bit.
// In the second word, the high bit is the GC ``checkmarked'' bit (see below).
// In the third and later words, the high bit indicates that the object is still
// being described. In these words, if a bit pair with a high bit 0 is encountered,
// the low bit can also be assumed to be 0, and the object description is over.
// This 00 is called the ``dead'' encoding: it signals that the rest of the words
// in the object are uninteresting to the garbage collector.
// //
// The code makes use of the fact that the zero value for a heap bitmap nibble // The code makes use of the fact that the zero value for a heap bitmap
// has no boundary bit set, no marked bit set, and type bits == typeDead. // has no live pointer bit set and is (depending on position), not marked,
// not checkmarked, and is the dead encoding.
// These properties must be preserved when modifying the encoding. // These properties must be preserved when modifying the encoding.
// //
// Checkmarks // Checkmarks
...@@ -57,44 +45,32 @@ ...@@ -57,44 +45,32 @@
// collector implementation. As a sanity check, the GC has a 'checkmark' // collector implementation. As a sanity check, the GC has a 'checkmark'
// mode that retraverses the object graph with the world stopped, to make // mode that retraverses the object graph with the world stopped, to make
// sure that everything that should be marked is marked. // sure that everything that should be marked is marked.
// In checkmark mode, in the heap bitmap, the type bits for the first word // In checkmark mode, in the heap bitmap, the high bit of the 2-bit entry
// of an object are redefined: // for the second word of the object holds the checkmark bit.
// // When not in checkmark mode, this bit is set to 1.
// 00 - typeScalarCheckmarked // typeScalar, checkmarked
// 01 - typeScalar // typeScalar, not checkmarked
// 10 - typePointer // typePointer, not checkmarked
// 11 - typePointerCheckmarked // typePointer, checkmarked
// //
// That is, typeDead is redefined to be typeScalar + a checkmark, and the // The smallest possible allocation is 8 bytes. On a 32-bit machine, that
// previously unused 11 pattern is redefined to be typePointer + a checkmark. // means every allocated object has two words, so there is room for the
// To prepare for this mode, we must move any typeDead in the first word of // checkmark bit. On a 64-bit machine, however, the 8-byte allocation is
// a multiword object to the second word. // just one word, so the second bit pair is not available for encoding the
// checkmark. However, because non-pointer allocations are combined
// into larger 16-byte (maxTinySize) allocations, a plain 8-byte allocation
// must be a pointer, so the type bit in the first word is not actually needed.
// It is still used in general, except in checkmark the type bit is repurposed
// as the checkmark bit and then reinitialized (to 1) as the type bit when
// finished.
package runtime package runtime
import "unsafe" import "unsafe"
const ( const (
typeDead = 0 bitPointer = 1
typeScalarCheckmarked = 0 bitMarked = 2
typeScalar = 1
typePointer = 2
typePointerCheckmarked = 3
typeBitsWidth = 2 // # of type bits per pointer-sized word
typeMask = 1<<typeBitsWidth - 1
heapBitsWidth = 4
heapBitmapScale = ptrSize * (8 / heapBitsWidth) // number of data bytes per heap bitmap byte
bitMarked = 2
typeShift = 2
)
// Information from the compiler about the layout of stack frames. heapBitsWidth = 2 // heap bitmap bits to describe one pointer
type bitvector struct { heapBitmapScale = ptrSize * (8 / heapBitsWidth) // number of data bytes described by one heap bitmap byte
n int32 // # of bits )
bytedata *uint8
}
// addb returns the byte pointer p+n. // addb returns the byte pointer p+n.
//go:nowritebarrier //go:nowritebarrier
...@@ -141,8 +117,9 @@ type heapBits struct { ...@@ -141,8 +117,9 @@ type heapBits struct {
// heapBitsForAddr returns the heapBits for the address addr. // heapBitsForAddr returns the heapBits for the address addr.
// The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used). // The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used).
func heapBitsForAddr(addr uintptr) heapBits { func heapBitsForAddr(addr uintptr) heapBits {
// 2 bits per work, 4 pairs per byte, and a mask is hard coded.
off := (addr - mheap_.arena_start) / ptrSize off := (addr - mheap_.arena_start) / ptrSize
return heapBits{(*uint8)(unsafe.Pointer(mheap_.arena_start - off/2 - 1)), uint32(4 * (off & 1))} return heapBits{(*uint8)(unsafe.Pointer(mheap_.arena_start - off/4 - 1)), uint32(2 * (off & 3))}
} }
// heapBitsForSpan returns the heapBits for the span base address base. // heapBitsForSpan returns the heapBits for the span base address base.
...@@ -229,20 +206,39 @@ func (h heapBits) prefetch() { ...@@ -229,20 +206,39 @@ func (h heapBits) prefetch() {
// That is, if h describes address p, h.next() describes p+ptrSize. // That is, if h describes address p, h.next() describes p+ptrSize.
// Note that next does not modify h. The caller must record the result. // Note that next does not modify h. The caller must record the result.
func (h heapBits) next() heapBits { func (h heapBits) next() heapBits {
if h.shift == 0 { if h.shift < 8-heapBitsWidth {
return heapBits{h.bitp, 4} return heapBits{h.bitp, h.shift + heapBitsWidth}
} }
return heapBits{subtractb(h.bitp, 1), 0} return heapBits{subtractb(h.bitp, 1), 0}
} }
// forward returns the heapBits describing n pointer-sized words ahead of h in memory.
// That is, if h describes address p, h.forward(n) describes p+n*ptrSize.
// h.forward(1) is equivalent to h.next(), just slower.
// Note that forward does not modify h. The caller must record the result.
// bits returns the heap bits for the current word.
func (h heapBits) forward(n uintptr) heapBits {
n += uintptr(h.shift) / heapBitsWidth
return heapBits{subtractb(h.bitp, n/4), uint32(n%4) * heapBitsWidth}
}
// The caller can test isMarked and isPointer by &-ing with bitMarked and bitPointer.
// The result includes in its higher bits the bits for subsequent words
// described by the same bitmap byte.
func (h heapBits) bits() uint32 {
return uint32(*h.bitp) >> h.shift
}
// isMarked reports whether the heap bits have the marked bit set. // isMarked reports whether the heap bits have the marked bit set.
// h must describe the initial word of the object.
func (h heapBits) isMarked() bool { func (h heapBits) isMarked() bool {
return *h.bitp&(bitMarked<<h.shift) != 0 return *h.bitp&(bitMarked<<h.shift) != 0
} }
// setMarked sets the marked bit in the heap bits, atomically. // setMarked sets the marked bit in the heap bits, atomically.
// h must describe the initial word of the object.
func (h heapBits) setMarked() { func (h heapBits) setMarked() {
// Each byte of GC bitmap holds info for two words. // Each byte of GC bitmap holds info for four words.
// Might be racing with other updates, so use atomic update always. // Might be racing with other updates, so use atomic update always.
// We used to be clever here and use a non-atomic update in certain // We used to be clever here and use a non-atomic update in certain
// cases, but it's not worth the risk. // cases, but it's not worth the risk.
...@@ -250,31 +246,68 @@ func (h heapBits) setMarked() { ...@@ -250,31 +246,68 @@ func (h heapBits) setMarked() {
} }
// setMarkedNonAtomic sets the marked bit in the heap bits, non-atomically. // setMarkedNonAtomic sets the marked bit in the heap bits, non-atomically.
// h must describe the initial word of the object.
func (h heapBits) setMarkedNonAtomic() { func (h heapBits) setMarkedNonAtomic() {
*h.bitp |= bitMarked << h.shift *h.bitp |= bitMarked << h.shift
} }
// typeBits returns the heap bits' type bits. // isPointer reports whether the heap bits describe a pointer word.
func (h heapBits) typeBits() uint8 { // h must describe the initial word of the object.
return (*h.bitp >> (h.shift + typeShift)) & typeMask func (h heapBits) isPointer() bool {
return (*h.bitp>>h.shift)&bitPointer != 0
}
// hasPointers reports whether the given object has any pointers.
// It must be told how large the object at h is, so that it does not read too
// far into the bitmap.
// h must describe the initial word of the object.
func (h heapBits) hasPointers(size uintptr) bool {
if size == ptrSize { // 1-word objects are always pointers
return true
}
// Otherwise, at least a 2-word object, and at least 2-word aligned,
// so h.shift is either 0 or 4, so we know we can get the bits for the
// first two words out of *h.bitp.
// If either of the first two words is a pointer, not pointer free.
b := uint32(*h.bitp >> h.shift)
if b&(bitPointer|bitPointer<<heapBitsWidth) != 0 {
return true
}
if size == 2*ptrSize {
return false
}
// At least a 4-word object. Check scan bit (aka marked bit) in third word.
if h.shift == 0 {
return b&(bitMarked<<(2*heapBitsWidth)) != 0
}
return uint32(*subtractb(h.bitp, 1))&bitMarked != 0
} }
// isCheckmarked reports whether the heap bits have the checkmarked bit set. // isCheckmarked reports whether the heap bits have the checkmarked bit set.
func (h heapBits) isCheckmarked() bool { // It must be told how large the object at h is, because the encoding of the
typ := h.typeBits() // checkmark bit varies by size.
return typ == typeScalarCheckmarked || typ == typePointerCheckmarked // h must describe the initial word of the object.
func (h heapBits) isCheckmarked(size uintptr) bool {
if size == ptrSize {
return (*h.bitp>>h.shift)&bitPointer != 0
}
// All multiword objects are 2-word aligned,
// so we know that the initial word's 2-bit pair
// and the second word's 2-bit pair are in the
// same heap bitmap byte, *h.bitp.
return (*h.bitp>>(heapBitsWidth+h.shift))&bitMarked != 0
} }
// setCheckmarked sets the checkmarked bit. // setCheckmarked sets the checkmarked bit.
func (h heapBits) setCheckmarked() { // It must be told how large the object at h is, because the encoding of the
typ := h.typeBits() // checkmark bit varies by size.
if typ == typeScalar { // h must describe the initial word of the object.
// Clear low type bit to turn 01 into 00. func (h heapBits) setCheckmarked(size uintptr) {
atomicand8(h.bitp, ^((1 << typeShift) << h.shift)) if size == ptrSize {
} else if typ == typePointer { atomicor8(h.bitp, bitPointer<<h.shift)
// Set low type bit to turn 10 into 11. return
atomicor8(h.bitp, (1<<typeShift)<<h.shift)
} }
atomicor8(h.bitp, bitMarked<<(heapBitsWidth+h.shift))
} }
// The methods operating on spans all require that h has been returned // The methods operating on spans all require that h has been returned
...@@ -295,95 +328,43 @@ func (h heapBits) initSpan(size, n, total uintptr) { ...@@ -295,95 +328,43 @@ func (h heapBits) initSpan(size, n, total uintptr) {
} }
// initCheckmarkSpan initializes a span for being checkmarked. // initCheckmarkSpan initializes a span for being checkmarked.
// This would be a no-op except that we need to rewrite any // It clears the checkmark bits, which are set to 1 in normal operation.
// typeDead bits in the first word of the object into typeScalar
// followed by a typeDead in the second word of the object.
func (h heapBits) initCheckmarkSpan(size, n, total uintptr) { func (h heapBits) initCheckmarkSpan(size, n, total uintptr) {
if size == ptrSize { // The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely.
if ptrSize == 8 && size == ptrSize {
// Checkmark bit is type bit, bottom bit of every 2-bit entry.
// Only possible on 64-bit system, since minimum size is 8. // Only possible on 64-bit system, since minimum size is 8.
// Must update both top and bottom nibble of each byte. // Must clear type bit (checkmark bit) of every word.
// There is no second word in these objects, so all we have // The type bit is the lower of every two-bit pair.
// to do is rewrite typeDead to typeScalar by adding the 1<<typeShift bit.
bitp := h.bitp bitp := h.bitp
for i := uintptr(0); i < n; i += 2 { for i := uintptr(0); i < n; i += 4 {
x := int(*bitp) *bitp &^= bitPointer | bitPointer<<2 | bitPointer<<4 | bitPointer<<6
if (x>>typeShift)&typeMask == typeDead {
x += (typeScalar - typeDead) << typeShift
}
if (x>>(4+typeShift))&typeMask == typeDead {
x += (typeScalar - typeDead) << (4 + typeShift)
}
*bitp = uint8(x)
bitp = subtractb(bitp, 1) bitp = subtractb(bitp, 1)
} }
return return
} }
// Update bottom nibble for first word of each object.
// If the bottom nibble says typeDead, change to typeScalar
// and clear top nibble to mark as typeDead.
bitp := h.bitp
step := size / heapBitmapScale
for i := uintptr(0); i < n; i++ { for i := uintptr(0); i < n; i++ {
x := *bitp *h.bitp &^= bitMarked << (heapBitsWidth + h.shift)
if (x>>typeShift)&typeMask == typeDead { h = h.forward(size / ptrSize)
x += (typeScalar - typeDead) << typeShift
x &= 0x0f // clear top nibble to typeDead
}
bitp = subtractb(bitp, step)
} }
} }
// clearCheckmarkSpan removes all the checkmarks from a span. // clearCheckmarkSpan undoes all the checkmarking in a span.
// If it finds a multiword object starting with typeScalar typeDead, // The actual checkmark bits are ignored, so the only work to do
// it rewrites the heap bits to the simpler typeDead typeDead. // is to fix the pointer bits. (Pointer bits are ignored by scanobject
// but consulted by typedmemmove.)
func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) { func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
if size == ptrSize { // The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely.
if ptrSize == 8 && size == ptrSize {
// Checkmark bit is type bit, bottom bit of every 2-bit entry.
// Only possible on 64-bit system, since minimum size is 8. // Only possible on 64-bit system, since minimum size is 8.
// Must update both top and bottom nibble of each byte. // Must clear type bit (checkmark bit) of every word.
// typeScalarCheckmarked can be left as typeDead, // The type bit is the lower of every two-bit pair.
// but we want to change typeScalar back to typeDead.
bitp := h.bitp bitp := h.bitp
for i := uintptr(0); i < n; i += 2 { for i := uintptr(0); i < n; i += 4 {
x := int(*bitp) *bitp |= bitPointer | bitPointer<<2 | bitPointer<<4 | bitPointer<<6
switch typ := (x >> typeShift) & typeMask; typ {
case typeScalar:
x += (typeDead - typeScalar) << typeShift
case typePointerCheckmarked:
x += (typePointer - typePointerCheckmarked) << typeShift
}
switch typ := (x >> (4 + typeShift)) & typeMask; typ {
case typeScalar:
x += (typeDead - typeScalar) << (4 + typeShift)
case typePointerCheckmarked:
x += (typePointer - typePointerCheckmarked) << (4 + typeShift)
}
*bitp = uint8(x)
bitp = subtractb(bitp, 1) bitp = subtractb(bitp, 1)
} }
return
}
// Update bottom nibble for first word of each object.
// If the bottom nibble says typeScalarCheckmarked and the top is not typeDead,
// change to typeScalar. Otherwise leave, since typeScalarCheckmarked == typeDead.
// If the bottom nibble says typePointerCheckmarked, change to typePointer.
bitp := h.bitp
step := size / heapBitmapScale
for i := uintptr(0); i < n; i++ {
x := int(*bitp)
switch typ := (x >> typeShift) & typeMask; {
case typ == typeScalarCheckmarked && (x>>(4+typeShift))&typeMask != typeDead:
x += (typeScalar - typeScalarCheckmarked) << typeShift
case typ == typePointerCheckmarked:
x += (typePointer - typePointerCheckmarked) << typeShift
}
*bitp = uint8(x)
bitp = subtractb(bitp, step)
} }
} }
...@@ -393,44 +374,98 @@ func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) { ...@@ -393,44 +374,98 @@ func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
// bits for the first two words (or one for single-word objects) to typeDead // bits for the first two words (or one for single-word objects) to typeDead
// and then calls f(p), where p is the object's base address. // and then calls f(p), where p is the object's base address.
// f is expected to add the object to a free list. // f is expected to add the object to a free list.
// For non-free objects, heapBitsSweepSpan turns off the marked bit.
func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) { func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) {
h := heapBitsForSpan(base) h := heapBitsForSpan(base)
if size == ptrSize { switch {
// Only possible on 64-bit system, since minimum size is 8. default:
// Must read and update both top and bottom nibble of each byte. throw("heapBitsSweepSpan")
case size == ptrSize:
// Consider mark bits in all four 2-bit entries of each bitmap byte.
bitp := h.bitp bitp := h.bitp
for i := uintptr(0); i < n; i += 2 { for i := uintptr(0); i < n; i += 4 {
x := int(*bitp) x := uint32(*bitp)
if x&bitMarked != 0 { if x&bitMarked != 0 {
x &^= bitMarked x &^= bitMarked
} else { } else {
x &^= typeMask << typeShift x &^= bitPointer
f(base + i*ptrSize) f(base + i*ptrSize)
} }
if x&(bitMarked<<2) != 0 {
x &^= bitMarked << 2
} else {
x &^= bitPointer << 2
f(base + (i+1)*ptrSize)
}
if x&(bitMarked<<4) != 0 { if x&(bitMarked<<4) != 0 {
x &^= bitMarked << 4 x &^= bitMarked << 4
} else { } else {
x &^= typeMask << (4 + typeShift) x &^= bitPointer << 4
f(base + (i+1)*ptrSize) f(base + (i+2)*ptrSize)
}
if x&(bitMarked<<6) != 0 {
x &^= bitMarked << 6
} else {
x &^= bitPointer << 6
f(base + (i+3)*ptrSize)
} }
*bitp = uint8(x) *bitp = uint8(x)
bitp = subtractb(bitp, 1) bitp = subtractb(bitp, 1)
} }
return
}
bitp := h.bitp case size%(4*ptrSize) == 0:
step := size / heapBitmapScale // Mark bit is in first word of each object.
for i := uintptr(0); i < n; i++ { // Each object starts at bit 0 of a heap bitmap byte.
x := int(*bitp) bitp := h.bitp
if x&bitMarked != 0 { step := size / heapBitmapScale
x &^= bitMarked for i := uintptr(0); i < n; i++ {
} else { x := uint32(*bitp)
x = 0 if x&bitMarked != 0 {
f(base + i*size) x &^= bitMarked
} else {
x = 0
f(base + i*size)
}
*bitp = uint8(x)
bitp = subtractb(bitp, step)
}
case size%(4*ptrSize) == 2*ptrSize:
// Mark bit is in first word of each object,
// but every other object starts halfway through a heap bitmap byte.
// Unroll loop 2x to handle alternating shift count and step size.
bitp := h.bitp
step := size / heapBitmapScale
var i uintptr
for i = uintptr(0); i < n; i += 2 {
x := uint32(*bitp)
if x&bitMarked != 0 {
x &^= bitMarked
} else {
x &^= 0x0f
f(base + i*size)
if size > 2*ptrSize {
x = 0
}
}
*bitp = uint8(x)
if i+1 >= n {
break
}
bitp = subtractb(bitp, step)
x = uint32(*bitp)
if x&(bitMarked<<4) != 0 {
x &^= bitMarked << 4
} else {
x &^= 0xf0
f(base + (i+1)*size)
if size > 2*ptrSize {
*subtractb(bitp, 1) = 0
}
}
*bitp = uint8(x)
bitp = subtractb(bitp, step+1)
} }
*bitp = uint8(x)
bitp = subtractb(bitp, step)
} }
} }
...@@ -456,7 +491,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { ...@@ -456,7 +491,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
// when initializing the span, and then the atomicor8 here // when initializing the span, and then the atomicor8 here
// goes away - heapBitsSetType would be a no-op // goes away - heapBitsSetType would be a no-op
// in that case. // in that case.
atomicor8(h.bitp, typePointer<<(typeShift+h.shift)) atomicor8(h.bitp, bitPointer<<h.shift)
return return
} }
if typ.kind&kindGCProg != 0 { if typ.kind&kindGCProg != 0 {
...@@ -489,41 +524,28 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { ...@@ -489,41 +524,28 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask
} }
// Copy from 1-bit ptrmask into 4-bit bitmap. // Copy from 1-bit ptrmask into 2-bit bitmap.
elemSize := typ.size // If size is a multiple of 4 words, then the bitmap bytes for the object
var v uint32 // pending byte of 4-bit bitmap; uint32 for better code gen // are not shared with any other object and can be written directly.
nv := 0 // number of bits added to v // On 64-bit systems, many sizes are only 16-byte aligned; half of
for i := uintptr(0); i < dataSize; i += elemSize { // those are not multiples of 4 words (for example, 48/8 = 6 words);
// At each word, b holds the pending bits from the 1-bit bitmap, // those share either the leading byte or the trailing byte of their bitmaps
// with a sentinel 1 bit above all the actual bits. // with another object.
// When b == 1, that means it is out of bits and needs to be refreshed. nptr := typ.size / ptrSize
// *(p+1) is the next byte to read. _ = nptr
p := ptrmask for i := uintptr(0); i < dataSize/ptrSize; i++ {
b := uint32(*p) | 0x100 atomicand8(h.bitp, ^((bitPointer | bitMarked) << h.shift))
for j := uintptr(0); j < elemSize; j += ptrSize { j := i % nptr
if b == 1 { if (*addb(ptrmask, j/8)>>(j%8))&1 != 0 {
p = addb(p, 1) atomicor8(h.bitp, bitPointer<<h.shift)
b = uint32(*p) | 0x100 }
} if i >= 2 {
// b&1 is 1 for pointer, 0 for scalar. atomicor8(h.bitp, bitMarked<<h.shift)
// We want typePointer (2) or typeScalar (1), so add 1.
v |= ((b & 1) + 1) << (uint(nv) + typeShift)
b >>= 1
if nv += heapBitsWidth; nv == 8 {
*h.bitp = uint8(v)
h.bitp = subtractb(h.bitp, 1)
v = 0
nv = 0
}
} }
h = h.next()
} }
if dataSize < size {
// Finish final byte of bitmap and mark next word (if any) with typeDead (0) atomicand8(h.bitp, ^((bitPointer | bitMarked) << h.shift))
if nv != 0 {
*h.bitp = uint8(v)
h.bitp = subtractb(h.bitp, 1)
} else if dataSize < size {
*h.bitp = 0
} }
} }
...@@ -600,7 +622,7 @@ const ( ...@@ -600,7 +622,7 @@ const (
// ppos is a pointer to position in mask, in bits. // ppos is a pointer to position in mask, in bits.
// sparse says to generate 4-bits per word mask for heap (1-bit for data/bss otherwise). // sparse says to generate 4-bits per word mask for heap (1-bit for data/bss otherwise).
//go:nowritebarrier //go:nowritebarrier
func unrollgcprog1(maskp *byte, prog *byte, ppos *uintptr, inplace, sparse bool) *byte { func unrollgcprog1(maskp *byte, prog *byte, ppos *uintptr, inplace bool) *byte {
pos := *ppos pos := *ppos
mask := (*[1 << 30]byte)(unsafe.Pointer(maskp)) mask := (*[1 << 30]byte)(unsafe.Pointer(maskp))
for { for {
...@@ -616,6 +638,8 @@ func unrollgcprog1(maskp *byte, prog *byte, ppos *uintptr, inplace, sparse bool) ...@@ -616,6 +638,8 @@ func unrollgcprog1(maskp *byte, prog *byte, ppos *uintptr, inplace, sparse bool)
for i := 0; i < siz; i++ { for i := 0; i < siz; i++ {
v := p[i/8] >> (uint(i) % 8) & 1 v := p[i/8] >> (uint(i) % 8) & 1
if inplace { if inplace {
throw("gc inplace")
const typeShift = 2
// Store directly into GC bitmap. // Store directly into GC bitmap.
h := heapBitsForAddr(uintptr(unsafe.Pointer(&mask[pos]))) h := heapBitsForAddr(uintptr(unsafe.Pointer(&mask[pos])))
if h.shift == 0 { if h.shift == 0 {
...@@ -624,12 +648,6 @@ func unrollgcprog1(maskp *byte, prog *byte, ppos *uintptr, inplace, sparse bool) ...@@ -624,12 +648,6 @@ func unrollgcprog1(maskp *byte, prog *byte, ppos *uintptr, inplace, sparse bool)
*h.bitp |= v << (4 + typeShift) *h.bitp |= v << (4 + typeShift)
} }
pos += ptrSize pos += ptrSize
} else if sparse {
throw("sparse")
// 4-bits per word, type bits in high bits
v <<= (pos % 8) + typeShift
mask[pos/8] |= v
pos += heapBitsWidth
} else { } else {
// 1 bit per word, for data/bss bitmap // 1 bit per word, for data/bss bitmap
mask[pos/8] |= v << (pos % 8) mask[pos/8] |= v << (pos % 8)
...@@ -647,7 +665,7 @@ func unrollgcprog1(maskp *byte, prog *byte, ppos *uintptr, inplace, sparse bool) ...@@ -647,7 +665,7 @@ func unrollgcprog1(maskp *byte, prog *byte, ppos *uintptr, inplace, sparse bool)
prog = (*byte)(add(unsafe.Pointer(prog), ptrSize)) prog = (*byte)(add(unsafe.Pointer(prog), ptrSize))
var prog1 *byte var prog1 *byte
for i := uintptr(0); i < siz; i++ { for i := uintptr(0); i < siz; i++ {
prog1 = unrollgcprog1(&mask[0], prog, &pos, inplace, sparse) prog1 = unrollgcprog1(&mask[0], prog, &pos, inplace)
} }
if *prog1 != insArrayEnd { if *prog1 != insArrayEnd {
throw("unrollgcprog: array does not end with insArrayEnd") throw("unrollgcprog: array does not end with insArrayEnd")
...@@ -667,7 +685,7 @@ func unrollglobgcprog(prog *byte, size uintptr) bitvector { ...@@ -667,7 +685,7 @@ func unrollglobgcprog(prog *byte, size uintptr) bitvector {
mask := (*[1 << 30]byte)(persistentalloc(masksize+1, 0, &memstats.gc_sys)) mask := (*[1 << 30]byte)(persistentalloc(masksize+1, 0, &memstats.gc_sys))
mask[masksize] = 0xa1 mask[masksize] = 0xa1
pos := uintptr(0) pos := uintptr(0)
prog = unrollgcprog1(&mask[0], prog, &pos, false, false) prog = unrollgcprog1(&mask[0], prog, &pos, false)
if pos != size/ptrSize { if pos != size/ptrSize {
print("unrollglobgcprog: bad program size, got ", pos, ", expect ", size/ptrSize, "\n") print("unrollglobgcprog: bad program size, got ", pos, ", expect ", size/ptrSize, "\n")
throw("unrollglobgcprog: bad program size") throw("unrollglobgcprog: bad program size")
...@@ -682,17 +700,21 @@ func unrollglobgcprog(prog *byte, size uintptr) bitvector { ...@@ -682,17 +700,21 @@ func unrollglobgcprog(prog *byte, size uintptr) bitvector {
} }
func unrollgcproginplace_m(v unsafe.Pointer, typ *_type, size, size0 uintptr) { func unrollgcproginplace_m(v unsafe.Pointer, typ *_type, size, size0 uintptr) {
throw("unrollinplace")
// TODO(rsc): Update for 1-bit bitmaps.
// TODO(rsc): Explain why these non-atomic updates are okay. // TODO(rsc): Explain why these non-atomic updates are okay.
pos := uintptr(0) pos := uintptr(0)
prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1]))) prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1])))
for pos != size0 { for pos != size0 {
unrollgcprog1((*byte)(v), prog, &pos, true, true) unrollgcprog1((*byte)(v), prog, &pos, true)
} }
// Mark first word as bitAllocated. // Mark first word as bitAllocated.
// Mark word after last as typeDead. // Mark word after last as typeDead.
if size0 < size { if size0 < size {
h := heapBitsForAddr(uintptr(v) + size0) h := heapBitsForAddr(uintptr(v) + size0)
const typeMask = 0
const typeShift = 0
*h.bitp &^= typeMask << typeShift *h.bitp &^= typeMask << typeShift
} }
} }
...@@ -707,7 +729,7 @@ func unrollgcprog_m(typ *_type) { ...@@ -707,7 +729,7 @@ func unrollgcprog_m(typ *_type) {
if *mask == 0 { if *mask == 0 {
pos := uintptr(8) // skip the unroll flag pos := uintptr(8) // skip the unroll flag
prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1]))) prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1])))
prog = unrollgcprog1(mask, prog, &pos, false, false) prog = unrollgcprog1(mask, prog, &pos, false)
if *prog != insEnd { if *prog != insEnd {
throw("unrollgcprog: program does not end with insEnd") throw("unrollgcprog: program does not end with insEnd")
} }
...@@ -737,26 +759,24 @@ func getgcmask(ep interface{}) (mask []byte) { ...@@ -737,26 +759,24 @@ func getgcmask(ep interface{}) (mask []byte) {
for datap := &firstmoduledata; datap != nil; datap = datap.next { for datap := &firstmoduledata; datap != nil; datap = datap.next {
// data // data
if datap.data <= uintptr(p) && uintptr(p) < datap.edata { if datap.data <= uintptr(p) && uintptr(p) < datap.edata {
bitmap := datap.gcdatamask.bytedata
n := (*ptrtype)(unsafe.Pointer(t)).elem.size n := (*ptrtype)(unsafe.Pointer(t)).elem.size
mask = make([]byte, n/ptrSize) mask = make([]byte, n/ptrSize)
for i := uintptr(0); i < n; i += ptrSize { for i := uintptr(0); i < n; i += ptrSize {
off := (uintptr(p) + i - datap.data) / ptrSize off := (uintptr(p) + i - datap.data) / ptrSize
bits := (*addb(datap.gcdatamask.bytedata, off/8) >> (off % 8)) & 1 mask[i/ptrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
bits += 1 // convert 1-bit to 2-bit
mask[i/ptrSize] = bits
} }
return return
} }
// bss // bss
if datap.bss <= uintptr(p) && uintptr(p) < datap.ebss { if datap.bss <= uintptr(p) && uintptr(p) < datap.ebss {
bitmap := datap.gcbssmask.bytedata
n := (*ptrtype)(unsafe.Pointer(t)).elem.size n := (*ptrtype)(unsafe.Pointer(t)).elem.size
mask = make([]byte, n/ptrSize) mask = make([]byte, n/ptrSize)
for i := uintptr(0); i < n; i += ptrSize { for i := uintptr(0); i < n; i += ptrSize {
off := (uintptr(p) + i - datap.bss) / ptrSize off := (uintptr(p) + i - datap.bss) / ptrSize
bits := (*addb(datap.gcbssmask.bytedata, off/8) >> (off % 8)) & 1 mask[i/ptrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
bits += 1 // convert 1-bit to 2-bit
mask[i/ptrSize] = bits
} }
return return
} }
...@@ -768,8 +788,14 @@ func getgcmask(ep interface{}) (mask []byte) { ...@@ -768,8 +788,14 @@ func getgcmask(ep interface{}) (mask []byte) {
if mlookup(uintptr(p), &base, &n, nil) != 0 { if mlookup(uintptr(p), &base, &n, nil) != 0 {
mask = make([]byte, n/ptrSize) mask = make([]byte, n/ptrSize)
for i := uintptr(0); i < n; i += ptrSize { for i := uintptr(0); i < n; i += ptrSize {
bits := heapBitsForAddr(base + i).typeBits() hbits := heapBitsForAddr(base + i)
mask[i/ptrSize] = bits if hbits.isPointer() {
mask[i/ptrSize] = 1
}
if i >= 2*ptrSize && !hbits.isMarked() {
mask[i/ptrSize] = 255
break
}
} }
return return
} }
...@@ -801,10 +827,9 @@ func getgcmask(ep interface{}) (mask []byte) { ...@@ -801,10 +827,9 @@ func getgcmask(ep interface{}) (mask []byte) {
n := (*ptrtype)(unsafe.Pointer(t)).elem.size n := (*ptrtype)(unsafe.Pointer(t)).elem.size
mask = make([]byte, n/ptrSize) mask = make([]byte, n/ptrSize)
for i := uintptr(0); i < n; i += ptrSize { for i := uintptr(0); i < n; i += ptrSize {
bitmap := bv.bytedata
off := (uintptr(p) + i - frame.varp + size) / ptrSize off := (uintptr(p) + i - frame.varp + size) / ptrSize
bits := (*addb(bv.bytedata, off/8) >> (off % 8)) & 1 mask[i/ptrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
bits += 1 // convert 1-bit to 2-bit
mask[i/ptrSize] = bits
} }
} }
return return
......
...@@ -597,20 +597,19 @@ func scanobject(b uintptr, gcw *gcWork) { ...@@ -597,20 +597,19 @@ func scanobject(b uintptr, gcw *gcWork) {
// Avoid needless hbits.next() on last iteration. // Avoid needless hbits.next() on last iteration.
hbits = hbits.next() hbits = hbits.next()
} }
bits := uintptr(hbits.typeBits()) // During checkmarking, 1-word objects store the checkmark
if bits == typeDead { // in the type bit for the one word. The only one-word objects
break // no more pointers in this object // are pointers, or else they'd be merged with other non-pointer
} // data into larger allocations.
if n != 1 {
if bits <= typeScalar { // typeScalar, typeDead, typeScalarMarked b := hbits.bits()
continue if i >= 2*ptrSize && b&bitMarked == 0 {
} break // no more pointers in this object
}
if bits&typePointer != typePointer { if b&bitPointer == 0 {
print("gc useCheckmark=", useCheckmark, " b=", hex(b), "\n") continue // not a pointer
throw("unexpected garbage collection bits") }
} }
// Work here is duplicated in scanblock. // Work here is duplicated in scanblock.
// If you make changes here, make changes there too. // If you make changes here, make changes there too.
...@@ -673,11 +672,11 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork ...@@ -673,11 +672,11 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork
throw("checkmark found unmarked object") throw("checkmark found unmarked object")
} }
if hbits.isCheckmarked() { if hbits.isCheckmarked(span.elemsize) {
return return
} }
hbits.setCheckmarked() hbits.setCheckmarked(span.elemsize)
if !hbits.isCheckmarked() { if !hbits.isCheckmarked(span.elemsize) {
throw("setCheckmarked and isCheckmarked disagree") throw("setCheckmarked and isCheckmarked disagree")
} }
} else { } else {
...@@ -685,12 +684,11 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork ...@@ -685,12 +684,11 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork
if hbits.isMarked() { if hbits.isMarked() {
return return
} }
hbits.setMarked() hbits.setMarked()
// If this is a noscan object, fast-track it to black // If this is a noscan object, fast-track it to black
// instead of greying it. // instead of greying it.
if hbits.typeBits() == typeDead { if !hbits.hasPointers(span.elemsize) {
gcw.bytesMarked += uint64(span.elemsize) gcw.bytesMarked += uint64(span.elemsize)
return return
} }
......
...@@ -352,6 +352,12 @@ func adjustpointer(adjinfo *adjustinfo, vpp unsafe.Pointer) { ...@@ -352,6 +352,12 @@ func adjustpointer(adjinfo *adjustinfo, vpp unsafe.Pointer) {
} }
} }
// Information from the compiler about the layout of stack frames.
type bitvector struct {
n int32 // # of bits
bytedata *uint8
}
type gobitvector struct { type gobitvector struct {
n uintptr n uintptr
bytedata []uint8 bytedata []uint8
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment