Commit 3479b065 authored by Rick Hudson's avatar Rick Hudson

[dev.garbage] runtime: allocate directly from GC mark bits

Instead of building a freelist from the mark bits generated
by the GC this CL allocates directly from the mark bits.

The approach moves the mark bits from the pointer/no pointer
heap structures into their own per span data structures. The
mark/allocation vectors consist of a single mark bit per
object. Two vectors are maintained, one for allocation and
one for the GC's mark phase. During the GC cycle's sweep
phase the interpretation of the vectors is swapped. The
mark vector becomes the allocation vector and the old
allocation vector is cleared and becomes the mark vector that
the next GC cycle will use.

Marked entries in the allocation vector indicate that the
object is not free. Each allocation vector maintains a boundary
between areas of the span already allocated from and areas
not yet allocated from. As objects are allocated this boundary
is moved until it reaches the end of the span. At this point
further allocations will be done from another span.

Since we no longer sweep a span inspecting each freed object
the responsibility for maintaining pointer/scalar bits in
the heapBitMap containing is now the responsibility of the
the routines doing the actual allocation.

This CL is functionally complete and ready for performance
tuning.

Change-Id: I336e0fc21eef1066e0b68c7067cc71b9f3d50e04
Reviewed-on: https://go-review.googlesource.com/19470Reviewed-by: default avatarAustin Clements <austin@google.com>
parent dc65a82e
...@@ -472,9 +472,13 @@ func dumpobjs() { ...@@ -472,9 +472,13 @@ func dumpobjs() {
if n > uintptr(len(freemark)) { if n > uintptr(len(freemark)) {
throw("freemark array doesn't have enough entries") throw("freemark array doesn't have enough entries")
} }
for l := s.freelist; l.ptr() != nil; l = l.ptr().next {
freemark[(uintptr(l)-p)/size] = true for freeIndex := s.freeindex; freeIndex < s.nelems; freeIndex++ {
if s.isFree(freeIndex) {
freemark[freeIndex] = true
}
} }
for j := uintptr(0); j < n; j, p = j+1, p+size { for j := uintptr(0); j < n; j, p = j+1, p+size {
if freemark[j] { if freemark[j] {
freemark[j] = false freemark[j] = false
...@@ -709,7 +713,7 @@ func makeheapobjbv(p uintptr, size uintptr) bitvector { ...@@ -709,7 +713,7 @@ func makeheapobjbv(p uintptr, size uintptr) bitvector {
i := uintptr(0) i := uintptr(0)
hbits := heapBitsForAddr(p) hbits := heapBitsForAddr(p)
for ; i < nptr; i++ { for ; i < nptr; i++ {
if i >= 2 && !hbits.isMarked() { if i >= 2 && !hbits.morePointers() {
break // end of object break // end of object
} }
if hbits.isPointer() { if hbits.isPointer() {
......
...@@ -502,23 +502,34 @@ const ( ...@@ -502,23 +502,34 @@ const (
// weight allocation. If it is a heavy weight allocation the caller must // weight allocation. If it is a heavy weight allocation the caller must
// determine whether a new GC cycle needs to be started or if the GC is active // determine whether a new GC cycle needs to be started or if the GC is active
// whether this goroutine needs to assist the GC. // whether this goroutine needs to assist the GC.
// https://golang.org/cl/5350 motivates why this routine should preform a
// prefetch.
func (c *mcache) nextFree(sizeclass int8) (v gclinkptr, shouldhelpgc bool) { func (c *mcache) nextFree(sizeclass int8) (v gclinkptr, shouldhelpgc bool) {
s := c.alloc[sizeclass] s := c.alloc[sizeclass]
v = s.freelist shouldhelpgc = false
if v.ptr() == nil { freeIndex := s.nextFreeIndex(s.freeindex)
if freeIndex == s.nelems {
// The span is full.
if uintptr(s.ref) != s.nelems {
throw("s.ref != s.nelems && freeIndex == s.nelems")
}
systemstack(func() { systemstack(func() {
c.refill(int32(sizeclass)) c.refill(int32(sizeclass))
}) })
shouldhelpgc = true shouldhelpgc = true
s = c.alloc[sizeclass] s = c.alloc[sizeclass]
v = s.freelist freeIndex = s.nextFreeIndex(s.freeindex)
}
if freeIndex >= s.nelems {
throw("freeIndex is not valid")
} }
s.freelist = v.ptr().next
v = gclinkptr(freeIndex*s.elemsize + s.base())
// Advance the freeIndex.
s.freeindex = freeIndex + 1
s.ref++ s.ref++
// prefetchnta offers best performance, see change list message. if uintptr(s.ref) > s.nelems {
prefetchnta(uintptr(v.ptr().next)) throw("s.ref > s.nelems")
}
return return
} }
...@@ -655,10 +666,8 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer { ...@@ -655,10 +666,8 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
v, shouldhelpgc = c.nextFree(sizeclass) v, shouldhelpgc = c.nextFree(sizeclass)
x = unsafe.Pointer(v) x = unsafe.Pointer(v)
if flags&flagNoZero == 0 { if flags&flagNoZero == 0 {
v.ptr().next = 0
if size > 2*sys.PtrSize && ((*[2]uintptr)(x))[1] != 0 {
memclr(unsafe.Pointer(v), size) memclr(unsafe.Pointer(v), size)
} // TODO:(rlh) Only clear if object is not known to be zeroed.
} }
} }
} else { } else {
...@@ -667,12 +676,13 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer { ...@@ -667,12 +676,13 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
systemstack(func() { systemstack(func() {
s = largeAlloc(size, flags) s = largeAlloc(size, flags)
}) })
s.freeindex = 1
x = unsafe.Pointer(uintptr(s.start << pageShift)) x = unsafe.Pointer(uintptr(s.start << pageShift))
size = s.elemsize size = s.elemsize
} }
if flags&flagNoScan != 0 { if flags&flagNoScan != 0 {
// All objects are pre-marked as noscan. Nothing to do. heapBitsSetTypeNoScan(uintptr(x), size)
} else { } else {
// If allocating a defer+arg block, now that we've picked a malloc size // If allocating a defer+arg block, now that we've picked a malloc size
// large enough to hold everything, cut the "asked for" size down to // large enough to hold everything, cut the "asked for" size down to
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
// In each 2-bit entry, the lower bit holds the same information as in the 1-bit // In each 2-bit entry, the lower bit holds the same information as in the 1-bit
// bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC. // bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC.
// The meaning of the high bit depends on the position of the word being described // The meaning of the high bit depends on the position of the word being described
// in its allocated object. In the first word, the high bit is the GC ``marked'' bit. // in its allocated object. In the first word, the high bit is unused.
// In the second word, the high bit is the GC ``checkmarked'' bit (see below). // In the second word, the high bit is the GC ``checkmarked'' bit (see below).
// In the third and later words, the high bit indicates that the object is still // In the third and later words, the high bit indicates that the object is still
// being described. In these words, if a bit pair with a high bit 0 is encountered, // being described. In these words, if a bit pair with a high bit 0 is encountered,
...@@ -33,12 +33,13 @@ ...@@ -33,12 +33,13 @@
// in the object are uninteresting to the garbage collector. // in the object are uninteresting to the garbage collector.
// //
// The 2-bit entries are split when written into the byte, so that the top half // The 2-bit entries are split when written into the byte, so that the top half
// of the byte contains 4 mark bits and the bottom half contains 4 pointer bits. // of the byte contains 4 high bits and the bottom half contains 4 low (pointer)
// bits.
// This form allows a copy from the 1-bit to the 4-bit form to keep the // This form allows a copy from the 1-bit to the 4-bit form to keep the
// pointer bits contiguous, instead of having to space them out. // pointer bits contiguous, instead of having to space them out.
// //
// The code makes use of the fact that the zero value for a heap bitmap // The code makes use of the fact that the zero value for a heap bitmap
// has no live pointer bit set and is (depending on position), not marked, // has no live pointer bit set and is (depending on position), not used,
// not checkmarked, and is the dead encoding. // not checkmarked, and is the dead encoding.
// These properties must be preserved when modifying the encoding. // These properties must be preserved when modifying the encoding.
// //
...@@ -63,6 +64,7 @@ ...@@ -63,6 +64,7 @@
// It is still used in general, except in checkmark the type bit is repurposed // It is still used in general, except in checkmark the type bit is repurposed
// as the checkmark bit and then reinitialized (to 1) as the type bit when // as the checkmark bit and then reinitialized (to 1) as the type bit when
// finished. // finished.
//
package runtime package runtime
...@@ -254,16 +256,20 @@ func markBitsForAddr(p uintptr) markBits { ...@@ -254,16 +256,20 @@ func markBitsForAddr(p uintptr) markBits {
func (s *mspan) markBitsForAddr(p uintptr) markBits { func (s *mspan) markBitsForAddr(p uintptr) markBits {
byteOffset := p - s.base() byteOffset := p - s.base()
markBitIndex := byteOffset / s.elemsize // TODO if hot spot use fancy divide.... markBitIndex := uintptr(0)
return s.markBitsForIndex(markBitIndex) if byteOffset != 0 {
} // markBitIndex := (p - s.base()) / s.elemsize, using division by multiplication
markBitIndex = uintptr(uint64(byteOffset) >> s.divShift * uint64(s.divMul) >> s.divShift2)
func (s *mspan) markBitsForIndex(markBitIndex uintptr) markBits { }
whichByte := markBitIndex / 8 whichByte := markBitIndex / 8
whichBit := markBitIndex % 8 whichBit := markBitIndex % 8
return markBits{&s.gcmarkBits[whichByte], uint8(1 << whichBit), markBitIndex} return markBits{&s.gcmarkBits[whichByte], uint8(1 << whichBit), markBitIndex}
} }
func (s *mspan) markBitsForBase() markBits {
return markBits{&s.gcmarkBits[0], uint8(1), 0}
}
// isMarked reports whether mark bit m is set. // isMarked reports whether mark bit m is set.
func (m markBits) isMarked() bool { func (m markBits) isMarked() bool {
return *m.bytep&m.mask != 0 return *m.bytep&m.mask != 0
...@@ -307,6 +313,17 @@ func markBitsForSpan(base uintptr) (mbits markBits) { ...@@ -307,6 +313,17 @@ func markBitsForSpan(base uintptr) (mbits markBits) {
return mbits return mbits
} }
// advance advances the markBits to the next object in the span.
func (m *markBits) advance() {
if m.mask == 1<<7 {
m.bytep = (*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(m.bytep)) + 1))
m.mask = 1
} else {
m.mask = m.mask << 1
}
m.index++
}
// heapBitsForAddr returns the heapBits for the address addr. // heapBitsForAddr returns the heapBits for the address addr.
// The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used). // The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used).
// //
...@@ -440,28 +457,13 @@ func (h heapBits) bits() uint32 { ...@@ -440,28 +457,13 @@ func (h heapBits) bits() uint32 {
return uint32(*h.bitp) >> (h.shift & 31) return uint32(*h.bitp) >> (h.shift & 31)
} }
// isMarked reports whether the heap bits have the marked bit set. // morePointers returns true if this word and all remaining words in this object
// h must describe the initial word of the object. // are scalars.
func (h heapBits) isMarked() bool { // h must not describe the first or second word of the object.
func (h heapBits) morePointers() bool {
return *h.bitp&(bitMarked<<h.shift) != 0 return *h.bitp&(bitMarked<<h.shift) != 0
} }
// setMarked sets the marked bit in the heap bits, atomically.
// h must describe the initial word of the object.
func (h heapBits) setMarked() {
// Each byte of GC bitmap holds info for four words.
// Might be racing with other updates, so use atomic update always.
// We used to be clever here and use a non-atomic update in certain
// cases, but it's not worth the risk.
atomic.Or8(h.bitp, bitMarked<<h.shift)
}
// setMarkedNonAtomic sets the marked bit in the heap bits, non-atomically.
// h must describe the initial word of the object.
func (h heapBits) setMarkedNonAtomic() {
*h.bitp |= bitMarked << h.shift
}
// isPointer reports whether the heap bits describe a pointer word. // isPointer reports whether the heap bits describe a pointer word.
// h must describe the initial word of the object. // h must describe the initial word of the object.
// //
...@@ -733,106 +735,134 @@ func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) { ...@@ -733,106 +735,134 @@ func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
} }
} }
// heapBitsSweepSpan coordinates the sweeping of a span by reading // heapBitsSweepSpan coordinates the sweeping of a span and inspects
// and updating the corresponding heap bitmap entries. // each freed object. If objects are being traced or if msan is enabled
// For each free object in the span, heapBitsSweepSpan sets the type // then heapBitsSweepSpan calls f(p), where p is the object's base address.
// bits for the first four words (less for smaller objects) to scalar/dead // When not tracing and msan is not enabled heapBitsSweepSpan is lightweight.
// and then calls f(p), where p is the object's base address. // heapBitsSweepSpan never alters the pointer/scalar heapBit maps. HeapBit map
// f is expected to add the object to a free list. // maintenance is the responsibility of the allocation routines.
// For non-free objects, heapBitsSweepSpan turns off the marked bit. // TODO:(rlh) Deal with the checkmark bits but moving them
func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) { // out of heap bitmap thus enabling bulk clearing.
func heapBitsSweepSpan(s *mspan, f func(uintptr)) (nfree int) {
base := s.base()
size := s.elemsize
n := s.nelems
cl := s.sizeclass
doCall := debug.allocfreetrace != 0 || msanenabled || cl == 0
h := heapBitsForSpan(base) h := heapBitsForSpan(base)
switch { switch {
default: default:
throw("heapBitsSweepSpan") throw("heapBitsSweepSpan")
case sys.PtrSize == 8 && size == sys.PtrSize: case sys.PtrSize == 8 && size == sys.PtrSize:
// Consider mark bits in all four 2-bit entries of each bitmap byte. nfree = heapBitsSweep8BitPtrs(h, s, base, n, cl, doCall, f)
bitp := h.bitp case size%(4*sys.PtrSize) == 0:
nfree = heapBitsSweepMap(h, s, base, size, n, cl, doCall, f)
case size%(4*sys.PtrSize) == 2*sys.PtrSize:
nfree = heapBitsSweepMap(h, s, base, size, n, cl, doCall, f)
}
return
}
func heapBitsSweep8BitPtrs(h heapBits, s *mspan, base, n uintptr, cl uint8, doCall bool, f func(uintptr)) (nfree int) {
mbits := s.markBitsForBase()
for i := uintptr(0); i < n; i += 4 { for i := uintptr(0); i < n; i += 4 {
x := uint32(*bitp)
// Note that unlike the other size cases, we leave the pointer bits set here. // Note that unlike the other size cases, we leave the pointer bits set here.
// These are initialized during initSpan when the span is created and left // These are initialized during initSpan when the span is created and left
// in place the whole time the span is used for pointer-sized objects. // in place the whole time the span is used for pointer-sized objects.
// That lets heapBitsSetType avoid an atomic update to set the pointer bit // That lets heapBitsSetType avoid an atomic update to set the pointer bit
// during allocation. // during allocation.
if x&bitMarked != 0 { if !(mbits.isMarked() || mbits.index >= s.freeindex && s.allocBits[mbits.index/8]&mbits.mask == 0) {
x &^= bitMarked if doCall {
} else {
f(base + i*sys.PtrSize) f(base + i*sys.PtrSize)
} }
if x&(bitMarked<<heapBitsShift) != 0 { if cl != 0 {
x &^= bitMarked << heapBitsShift nfree++
} else { }
}
mbits.advance()
if !(mbits.isMarked() || mbits.index >= s.freeindex && s.allocBits[mbits.index/8]&mbits.mask == 0) {
if doCall {
f(base + (i+1)*sys.PtrSize) f(base + (i+1)*sys.PtrSize)
} }
if x&(bitMarked<<(2*heapBitsShift)) != 0 { if cl != 0 {
x &^= bitMarked << (2 * heapBitsShift) nfree++
} else { }
}
mbits.advance()
if !(mbits.isMarked() || mbits.index >= s.freeindex && s.allocBits[mbits.index/8]&mbits.mask == 0) {
if doCall {
f(base + (i+2)*sys.PtrSize) f(base + (i+2)*sys.PtrSize)
} }
if x&(bitMarked<<(3*heapBitsShift)) != 0 { if cl != 0 {
x &^= bitMarked << (3 * heapBitsShift) nfree++
} else { }
}
mbits.advance()
if !(mbits.isMarked() || mbits.index >= s.freeindex && s.allocBits[mbits.index/8]&mbits.mask == 0) {
if doCall {
f(base + (i+3)*sys.PtrSize) f(base + (i+3)*sys.PtrSize)
} }
*bitp = uint8(x) if cl != 0 {
bitp = subtract1(bitp) nfree++
} }
case size%(4*sys.PtrSize) == 0:
// Mark bit is in first word of each object.
// Each object starts at bit 0 of a heap bitmap byte.
bitp := h.bitp
step := size / heapBitmapScale
for i := uintptr(0); i < n; i++ {
x := uint32(*bitp)
if x&bitMarked != 0 {
x &^= bitMarked
} else {
x = 0
f(base + i*size)
} }
*bitp = uint8(x) mbits.advance()
bitp = subtractb(bitp, step)
} }
return
}
case size%(4*sys.PtrSize) == 2*sys.PtrSize: func (m *markBits) nextFreed(maxIndex uintptr, s *mspan) bool {
// Mark bit is in first word of each object, mByte := *m.bytep
// but every other object starts halfway through a heap bitmap byte. for {
// Unroll loop 2x to handle alternating shift count and step size. for mByte == 0xff {
bitp := h.bitp if m.index >= maxIndex {
step := size / heapBitmapScale return false
var i uintptr
for i = uintptr(0); i < n; i += 2 {
x := uint32(*bitp)
if x&bitMarked != 0 {
x &^= bitMarked
} else {
x &^= bitMarked | bitPointer | (bitMarked|bitPointer)<<heapBitsShift
f(base + i*size)
if size > 2*sys.PtrSize {
x = 0
} }
m.index = (m.index + 8) &^ (8 - 1)
m.mask = 1
m.bytep = add1(m.bytep)
mByte = *m.bytep
} }
*bitp = uint8(x) if m.index >= maxIndex {
if i+1 >= n { return false
break }
for m.index < maxIndex {
if m.mask&mByte == 0 {
if m.index < s.freeindex {
return true
}
if s.allocBits[m.index/8]&m.mask != 0 {
return true
} }
bitp = subtractb(bitp, step) }
x = uint32(*bitp) if m.mask == 1<<7 {
if x&(bitMarked<<(2*heapBitsShift)) != 0 { m.mask = 1
x &^= bitMarked << (2 * heapBitsShift) m.bytep = add1(m.bytep)
mByte = *m.bytep
m.index++
break
} else { } else {
x &^= (bitMarked|bitPointer)<<(2*heapBitsShift) | (bitMarked|bitPointer)<<(3*heapBitsShift) m.mask = m.mask << 1
f(base + (i+1)*size) m.index++
if size > 2*sys.PtrSize {
*subtract1(bitp) = 0
} }
} }
*bitp = uint8(x)
bitp = subtractb(bitp, step+1)
} }
return false
}
func heapBitsSweepMap(h heapBits, s *mspan, base, size, n uintptr, cl uint8, doCall bool, f func(uintptr)) (nfree int) {
twobits := s.markBitsForBase()
for twobits.nextFreed(n, s) {
if doCall {
f(base + twobits.index*size)
}
if cl != 0 {
nfree++
}
twobits.advance()
} }
return
} }
// heapBitsSetType records that the new allocation [x, x+size) // heapBitsSetType records that the new allocation [x, x+size)
...@@ -862,7 +892,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { ...@@ -862,7 +892,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
// size is sizeof(_defer{}) (at least 6 words) and dataSize may be // size is sizeof(_defer{}) (at least 6 words) and dataSize may be
// arbitrarily larger. // arbitrarily larger.
// //
// The checks for size == ptrSize and size == 2*ptrSize can therefore // The checks for size == sys.PtrSize and size == 2*sys.PtrSize can therefore
// assume that dataSize == size without checking it explicitly. // assume that dataSize == size without checking it explicitly.
if sys.PtrSize == 8 && size == sys.PtrSize { if sys.PtrSize == 8 && size == sys.PtrSize {
...@@ -902,10 +932,13 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { ...@@ -902,10 +932,13 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
// (In general the number of instances of typ being allocated is // (In general the number of instances of typ being allocated is
// dataSize/typ.size.) // dataSize/typ.size.)
if sys.PtrSize == 4 && dataSize == sys.PtrSize { if sys.PtrSize == 4 && dataSize == sys.PtrSize {
// 1 pointer. // 1 pointer object. On 32-bit machines clear the bit for the
// unused second word.
if gcphase == _GCoff { if gcphase == _GCoff {
*h.bitp &^= (bitPointer | bitMarked | ((bitPointer | bitMarked) << heapBitsShift)) << h.shift
*h.bitp |= bitPointer << h.shift *h.bitp |= bitPointer << h.shift
} else { } else {
atomic.And8(h.bitp, ^uint8((bitPointer|bitMarked|((bitPointer|bitMarked)<<heapBitsShift))<<h.shift))
atomic.Or8(h.bitp, bitPointer<<h.shift) atomic.Or8(h.bitp, bitPointer<<h.shift)
} }
} else { } else {
...@@ -918,7 +951,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { ...@@ -918,7 +951,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
} }
return return
} }
// Otherwise typ.size must be 2*ptrSize, and typ.kind&kindGCProg == 0. // Otherwise typ.size must be 2*sys.PtrSize,
// and typ.kind&kindGCProg == 0.
if doubleCheck { if doubleCheck {
if typ.size != 2*sys.PtrSize || typ.kind&kindGCProg != 0 { if typ.size != 2*sys.PtrSize || typ.kind&kindGCProg != 0 {
print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n") print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n")
...@@ -928,8 +962,19 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { ...@@ -928,8 +962,19 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
b := uint32(*ptrmask) b := uint32(*ptrmask)
hb := b & 3 hb := b & 3
if gcphase == _GCoff { if gcphase == _GCoff {
// bitPointer == 1, bitMarked is 1 << 4, heapBitsShift is 1.
// 110011 is shifted h.shift and complemented.
// This clears out the bits that are about to be
// ored into *h.hbitp in the next instructions.
*h.bitp &^= (bitPointer | bitMarked | ((bitPointer | bitMarked) << heapBitsShift)) << h.shift
*h.bitp |= uint8(hb << h.shift) *h.bitp |= uint8(hb << h.shift)
} else { } else {
// TODO:(rlh) since the GC is not concurrently setting the
// mark bits in the heap map anymore and malloc
// owns the span we are allocating in why does this have
// to be atomic?
atomic.And8(h.bitp, ^uint8((bitPointer|bitMarked|((bitPointer|bitMarked)<<heapBitsShift))<<h.shift))
atomic.Or8(h.bitp, uint8(hb<<h.shift)) atomic.Or8(h.bitp, uint8(hb<<h.shift))
} }
return return
...@@ -1043,8 +1088,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { ...@@ -1043,8 +1088,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
// Replicate ptrmask to fill entire pbits uintptr. // Replicate ptrmask to fill entire pbits uintptr.
// Doubling and truncating is fewer steps than // Doubling and truncating is fewer steps than
// iterating by nb each time. (nb could be 1.) // iterating by nb each time. (nb could be 1.)
// Since we loaded typ.ptrdata/ptrSize bits // Since we loaded typ.ptrdata/sys.PtrSize bits
// but are pretending to have typ.size/ptrSize, // but are pretending to have typ.size/sys.PtrSize,
// there might be no replication necessary/possible. // there might be no replication necessary/possible.
pbits = b pbits = b
endnb = nb endnb = nb
...@@ -1135,13 +1180,15 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { ...@@ -1135,13 +1180,15 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
// not with its mark bit. Since there is only one allocation // not with its mark bit. Since there is only one allocation
// from a given span at a time, we should be able to set // from a given span at a time, we should be able to set
// these bits non-atomically. Not worth the risk right now. // these bits non-atomically. Not worth the risk right now.
hb = (b & 3) << (2 * heapBitsShift) hb = (b & (bitPointer | bitPointer<<heapBitsShift)) << (2 * heapBitsShift)
b >>= 2 b >>= 2
nb -= 2 nb -= 2
// Note: no bitMarker in hb because the first two words don't get markers from us. // Note: no bitMarker in hb because the first two words don't get markers from us.
if gcphase == _GCoff { if gcphase == _GCoff {
*hbitp &^= uint8((bitPointer | (bitPointer << heapBitsShift)) << (2 * heapBitsShift))
*hbitp |= uint8(hb) *hbitp |= uint8(hb)
} else { } else {
atomic.And8(hbitp, ^(uint8(bitPointer|bitPointer<<heapBitsShift) << (2 * heapBitsShift)))
atomic.Or8(hbitp, uint8(hb)) atomic.Or8(hbitp, uint8(hb))
} }
hbitp = subtract1(hbitp) hbitp = subtract1(hbitp)
...@@ -1331,6 +1378,41 @@ Phase4: ...@@ -1331,6 +1378,41 @@ Phase4:
} }
} }
// heapBitsSetTypeNoScan marks x as noscan. For objects with 1 or 2
// words set their bitPointers to off (0).
// All other objects have the first 3 bitPointers set to
// off (0) and the scan word in the third word
// also set to off (0).
func heapBitsSetTypeNoScan(x, size uintptr) {
h := heapBitsForAddr(uintptr(x))
bitp := h.bitp
if sys.PtrSize == 8 && size == sys.PtrSize {
// If this is truely noScan the tinyAlloc logic should have noticed
// and combined such objects.
throw("noscan object is too small")
} else if size%(4*sys.PtrSize) == 0 {
*bitp &^= bitPointer | bitPointer<<heapBitsShift | (bitMarked|bitPointer)<<(2*heapBitsShift)
} else if size%(4*sys.PtrSize) == 2*sys.PtrSize {
if h.shift == 0 {
*bitp &^= (bitPointer | bitPointer<<heapBitsShift)
if size > 2*sys.PtrSize {
*bitp &^= (bitPointer | bitMarked) << (2 * heapBitsShift)
}
} else if h.shift == 2 {
*bitp &^= bitPointer<<(2*heapBitsShift) | bitPointer<<(3*heapBitsShift)
if size > 2*sys.PtrSize {
bitp = subtract1(bitp)
*bitp &^= bitPointer | bitMarked
}
} else {
throw("Type has unrecognized size")
}
} else {
throw("Type has unrecognized size")
}
}
var debugPtrmask struct { var debugPtrmask struct {
lock mutex lock mutex
data *byte data *byte
...@@ -1424,7 +1506,7 @@ func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize u ...@@ -1424,7 +1506,7 @@ func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize u
// progToPointerMask returns the 1-bit pointer mask output by the GC program prog. // progToPointerMask returns the 1-bit pointer mask output by the GC program prog.
// size the size of the region described by prog, in bytes. // size the size of the region described by prog, in bytes.
// The resulting bitvector will have no more than size/ptrSize bits. // The resulting bitvector will have no more than size/sys.PtrSize bits.
func progToPointerMask(prog *byte, size uintptr) bitvector { func progToPointerMask(prog *byte, size uintptr) bitvector {
n := (size/sys.PtrSize + 7) / 8 n := (size/sys.PtrSize + 7) / 8
x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1] x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1]
...@@ -1560,7 +1642,7 @@ Run: ...@@ -1560,7 +1642,7 @@ Run:
// into a register and use that register for the entire loop // into a register and use that register for the entire loop
// instead of repeatedly reading from memory. // instead of repeatedly reading from memory.
// Handling fewer than 8 bits here makes the general loop simpler. // Handling fewer than 8 bits here makes the general loop simpler.
// The cutoff is ptrSize*8 - 7 to guarantee that when we add // The cutoff is sys.PtrSize*8 - 7 to guarantee that when we add
// the pattern to a bit buffer holding at most 7 bits (a partial byte) // the pattern to a bit buffer holding at most 7 bits (a partial byte)
// it will not overflow. // it will not overflow.
src := dst src := dst
...@@ -1855,7 +1937,7 @@ func getgcmask(ep interface{}) (mask []byte) { ...@@ -1855,7 +1937,7 @@ func getgcmask(ep interface{}) (mask []byte) {
if hbits.isPointer() { if hbits.isPointer() {
mask[i/sys.PtrSize] = 1 mask[i/sys.PtrSize] = 1
} }
if i >= 2*sys.PtrSize && !hbits.isMarked() { if i >= 2*sys.PtrSize && !hbits.morePointers() {
mask = mask[:i/sys.PtrSize] mask = mask[:i/sys.PtrSize]
break break
} }
......
...@@ -108,9 +108,11 @@ func (c *mcache) refill(sizeclass int32) *mspan { ...@@ -108,9 +108,11 @@ func (c *mcache) refill(sizeclass int32) *mspan {
_g_.m.locks++ _g_.m.locks++
// Return the current cached span to the central lists. // Return the current cached span to the central lists.
s := c.alloc[sizeclass] s := c.alloc[sizeclass]
if s.freelist.ptr() != nil {
throw("refill on a nonempty span") if uintptr(s.ref) != s.nelems {
throw("refill of span with free space remaining")
} }
if s != &emptymspan { if s != &emptymspan {
s.incache = false s.incache = false
} }
...@@ -120,10 +122,11 @@ func (c *mcache) refill(sizeclass int32) *mspan { ...@@ -120,10 +122,11 @@ func (c *mcache) refill(sizeclass int32) *mspan {
if s == nil { if s == nil {
throw("out of memory") throw("out of memory")
} }
if s.freelist.ptr() == nil {
println(s.ref, (s.npages<<_PageShift)/s.elemsize) if uintptr(s.ref) == s.nelems {
throw("empty span") throw("span has no free space")
} }
c.alloc[sizeclass] = s c.alloc[sizeclass] = s
_g_.m.locks-- _g_.m.locks--
return s return s
......
...@@ -18,7 +18,7 @@ import "runtime/internal/atomic" ...@@ -18,7 +18,7 @@ import "runtime/internal/atomic"
type mcentral struct { type mcentral struct {
lock mutex lock mutex
sizeclass int32 sizeclass int32
nonempty mSpanList // list of spans with a free object nonempty mSpanList // list of spans with a free object, ie a nonempty free list
empty mSpanList // list of spans with no free objects (or cached in an mcache) empty mSpanList // list of spans with no free objects (or cached in an mcache)
} }
...@@ -67,7 +67,9 @@ retry: ...@@ -67,7 +67,9 @@ retry:
c.empty.insertBack(s) c.empty.insertBack(s)
unlock(&c.lock) unlock(&c.lock)
s.sweep(true) s.sweep(true)
if s.freelist.ptr() != nil { freeIndex := s.nextFreeIndex(0)
if freeIndex != s.nelems {
s.freeindex = freeIndex
goto havespan goto havespan
} }
lock(&c.lock) lock(&c.lock)
...@@ -115,9 +117,6 @@ havespan: ...@@ -115,9 +117,6 @@ havespan:
// heap_live changed. // heap_live changed.
gcController.revise() gcController.revise()
} }
if s.freelist.ptr() == nil {
throw("freelist empty")
}
s.incache = true s.incache = true
return s return s
} }
...@@ -150,15 +149,11 @@ func (c *mcentral) uncacheSpan(s *mspan) { ...@@ -150,15 +149,11 @@ func (c *mcentral) uncacheSpan(s *mspan) {
// the latest generation. // the latest generation.
// If preserve=true, don't return the span to heap nor relink in MCentral lists; // If preserve=true, don't return the span to heap nor relink in MCentral lists;
// caller takes care of it. // caller takes care of it.
func (c *mcentral) freeSpan(s *mspan, n int32, start gclinkptr, end gclinkptr, preserve bool) bool { func (c *mcentral) freeSpan(s *mspan, n int32, start gclinkptr, end gclinkptr, preserve bool, wasempty bool) bool {
if s.incache { if s.incache {
throw("freespan into cached span") throw("freeSpan given cached span")
} }
// Add the objects back to s's free list.
wasempty := s.freelist.ptr() == nil
end.ptr().next = s.freelist
s.freelist = start
s.ref -= uint16(n) s.ref -= uint16(n)
if preserve { if preserve {
...@@ -190,16 +185,14 @@ func (c *mcentral) freeSpan(s *mspan, n int32, start gclinkptr, end gclinkptr, p ...@@ -190,16 +185,14 @@ func (c *mcentral) freeSpan(s *mspan, n int32, start gclinkptr, end gclinkptr, p
return false return false
} }
// s is completely freed, return it to the heap.
c.nonempty.remove(s) c.nonempty.remove(s)
s.needzero = 1 s.needzero = 1
s.freelist = 0
unlock(&c.lock) unlock(&c.lock)
mheap_.freeSpan(s, 0) mheap_.freeSpan(s, 0)
return true return true
} }
// Fetch a new span from the heap and carve into objects for the free list. // grow allocates a new empty span from the heap and initializes it for c's size class.
func (c *mcentral) grow() *mspan { func (c *mcentral) grow() *mspan {
npages := uintptr(class_to_allocnpages[c.sizeclass]) npages := uintptr(class_to_allocnpages[c.sizeclass])
size := uintptr(class_to_size[c.sizeclass]) size := uintptr(class_to_size[c.sizeclass])
...@@ -212,19 +205,7 @@ func (c *mcentral) grow() *mspan { ...@@ -212,19 +205,7 @@ func (c *mcentral) grow() *mspan {
p := uintptr(s.start << _PageShift) p := uintptr(s.start << _PageShift)
s.limit = p + size*n s.limit = p + size*n
head := gclinkptr(p)
tail := gclinkptr(p)
// i==0 iteration already done
for i := uintptr(1); i < n; i++ {
p += size
tail.ptr().next = gclinkptr(p)
tail = gclinkptr(p)
}
if s.freelist.ptr() != nil {
throw("freelist not empty")
}
tail.ptr().next = 0
s.freelist = head
heapBitsForSpan(s.base()).initSpan(s) heapBitsForSpan(s.base()).initSpan(s)
return s return s
} }
...@@ -1044,9 +1044,9 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork ...@@ -1044,9 +1044,9 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork
if obj&(sys.PtrSize-1) != 0 { if obj&(sys.PtrSize-1) != 0 {
throw("greyobject: obj not pointer-aligned") throw("greyobject: obj not pointer-aligned")
} }
mbits := span.markBitsForAddr(obj)
if useCheckmark { if useCheckmark {
if !hbits.isMarked() { if !mbits.isMarked() {
printlock() printlock()
print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n") print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n")
print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n") print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n")
...@@ -1068,10 +1068,10 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork ...@@ -1068,10 +1068,10 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork
} }
} else { } else {
// If marked we have nothing to do. // If marked we have nothing to do.
if hbits.isMarked() { if mbits.isMarked() {
return return
} }
hbits.setMarked() mbits.setMarked()
// If this is a noscan object, fast-track it to black // If this is a noscan object, fast-track it to black
// instead of greying it. // instead of greying it.
...@@ -1138,7 +1138,7 @@ func gcmarknewobject_m(obj, size uintptr) { ...@@ -1138,7 +1138,7 @@ func gcmarknewobject_m(obj, size uintptr) {
if useCheckmark && !gcBlackenPromptly { // The world should be stopped so this should not happen. if useCheckmark && !gcBlackenPromptly { // The world should be stopped so this should not happen.
throw("gcmarknewobject called while doing checkmark") throw("gcmarknewobject called while doing checkmark")
} }
heapBitsForAddr(obj).setMarked() markBitsForAddr(obj).setMarked()
atomic.Xadd64(&work.bytesMarked, int64(size)) atomic.Xadd64(&work.bytesMarked, int64(size))
} }
......
...@@ -192,16 +192,13 @@ func (s *mspan) sweep(preserve bool) bool { ...@@ -192,16 +192,13 @@ func (s *mspan) sweep(preserve bool) bool {
c := _g_.m.mcache c := _g_.m.mcache
freeToHeap := false freeToHeap := false
// Mark any free objects in this span so we don't collect them. // The allocBits indicate which unmarked objects don't need to be
sstart := uintptr(s.start << _PageShift) // processed since they were free at the end of the last GC cycle
for link := s.freelist; link.ptr() != nil; link = link.ptr().next { // and were not allocated since then.
if uintptr(link) < sstart || s.limit <= uintptr(link) { // If the allocBits index is >= s.freeindex and the bit
// Free list is corrupted. // is not marked then the object remains unallocated
dumpFreeList(s) // since the last GC.
throw("free list corrupted") // This situation is analogous to being on a freelist.
}
heapBitsForAddr(uintptr(link)).setMarkedNonAtomic()
}
// Unlink & free special records for any objects we're about to free. // Unlink & free special records for any objects we're about to free.
// Two complications here: // Two complications here:
...@@ -216,8 +213,8 @@ func (s *mspan) sweep(preserve bool) bool { ...@@ -216,8 +213,8 @@ func (s *mspan) sweep(preserve bool) bool {
for special != nil { for special != nil {
// A finalizer can be set for an inner byte of an object, find object beginning. // A finalizer can be set for an inner byte of an object, find object beginning.
p := uintptr(s.start<<_PageShift) + uintptr(special.offset)/size*size p := uintptr(s.start<<_PageShift) + uintptr(special.offset)/size*size
hbits := heapBitsForAddr(p) mbits := s.markBitsForAddr(p)
if !hbits.isMarked() { if !mbits.isMarked() {
// This object is not marked and has at least one special record. // This object is not marked and has at least one special record.
// Pass 1: see if it has at least one finalizer. // Pass 1: see if it has at least one finalizer.
hasFin := false hasFin := false
...@@ -225,7 +222,7 @@ func (s *mspan) sweep(preserve bool) bool { ...@@ -225,7 +222,7 @@ func (s *mspan) sweep(preserve bool) bool {
for tmp := special; tmp != nil && uintptr(tmp.offset) < endOffset; tmp = tmp.next { for tmp := special; tmp != nil && uintptr(tmp.offset) < endOffset; tmp = tmp.next {
if tmp.kind == _KindSpecialFinalizer { if tmp.kind == _KindSpecialFinalizer {
// Stop freeing of object if it has a finalizer. // Stop freeing of object if it has a finalizer.
hbits.setMarkedNonAtomic() mbits.setMarkedNonAtomic()
hasFin = true hasFin = true
break break
} }
...@@ -259,8 +256,7 @@ func (s *mspan) sweep(preserve bool) bool { ...@@ -259,8 +256,7 @@ func (s *mspan) sweep(preserve bool) bool {
// This thread owns the span now, so it can manipulate // This thread owns the span now, so it can manipulate
// the block bitmap without atomic operations. // the block bitmap without atomic operations.
size, n, _ := s.layout() nfree = heapBitsSweepSpan(s, func(p uintptr) {
heapBitsSweepSpan(s.base(), size, n, func(p uintptr) {
// At this point we know that we are looking at garbage object // At this point we know that we are looking at garbage object
// that needs to be collected. // that needs to be collected.
if debug.allocfreetrace != 0 { if debug.allocfreetrace != 0 {
...@@ -288,17 +284,18 @@ func (s *mspan) sweep(preserve bool) bool { ...@@ -288,17 +284,18 @@ func (s *mspan) sweep(preserve bool) bool {
} else if size > sys.PtrSize { } else if size > sys.PtrSize {
*(*uintptr)(unsafe.Pointer(p + sys.PtrSize)) = 0 *(*uintptr)(unsafe.Pointer(p + sys.PtrSize)) = 0
} }
if head.ptr() == nil {
head = gclinkptr(p)
} else {
end.ptr().next = gclinkptr(p)
}
end = gclinkptr(p)
end.ptr().next = gclinkptr(0x0bade5)
nfree++
} }
}) })
wasempty := s.nextFreeIndex(s.freeindex) == s.nelems
s.freeindex = 0 // reset allocation index to start of span.
// Swap role of allocBits with gcmarkBits
// Clear gcmarkBits in preparation for next GC
s.allocBits, s.gcmarkBits = s.gcmarkBits, s.allocBits
s.clearGCMarkBits() // prepare for next GC
// We need to set s.sweepgen = h.sweepgen only when all blocks are swept, // We need to set s.sweepgen = h.sweepgen only when all blocks are swept,
// because of the potential for a concurrent free/SetFinalizer. // because of the potential for a concurrent free/SetFinalizer.
// But we need to set it before we make the span available for allocation // But we need to set it before we make the span available for allocation
...@@ -311,11 +308,14 @@ func (s *mspan) sweep(preserve bool) bool { ...@@ -311,11 +308,14 @@ func (s *mspan) sweep(preserve bool) bool {
print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n") print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
throw("MSpan_Sweep: bad span state after sweep") throw("MSpan_Sweep: bad span state after sweep")
} }
// Serialization point.
// At this point the mark bits are cleared and allocation ready
// to go so release the span.
atomic.Store(&s.sweepgen, sweepgen) atomic.Store(&s.sweepgen, sweepgen)
} }
if nfree > 0 { if nfree > 0 {
c.local_nsmallfree[cl] += uintptr(nfree) c.local_nsmallfree[cl] += uintptr(nfree)
res = mheap_.central[cl].mcentral.freeSpan(s, int32(nfree), head, end, preserve) res = mheap_.central[cl].mcentral.freeSpan(s, int32(nfree), head, end, preserve, wasempty)
// MCentral_FreeSpan updates sweepgen // MCentral_FreeSpan updates sweepgen
} else if freeToHeap { } else if freeToHeap {
// Free large span to heap // Free large span to heap
...@@ -399,27 +399,3 @@ func reimburseSweepCredit(unusableBytes uintptr) { ...@@ -399,27 +399,3 @@ func reimburseSweepCredit(unusableBytes uintptr) {
throw("spanBytesAlloc underflow") throw("spanBytesAlloc underflow")
} }
} }
func dumpFreeList(s *mspan) {
printlock()
print("runtime: free list of span ", s, ":\n")
sstart := uintptr(s.start << _PageShift)
link := s.freelist
for i := 0; i < int(s.npages*_PageSize/s.elemsize); i++ {
if i != 0 {
print(" -> ")
}
print(hex(link))
if link.ptr() == nil {
break
}
if uintptr(link) < sstart || s.limit <= uintptr(link) {
// Bad link. Stop walking before we crash.
print(" (BAD)")
break
}
link = link.ptr().next
}
print("\n")
printunlock()
}
...@@ -119,8 +119,7 @@ type mspan struct { ...@@ -119,8 +119,7 @@ type mspan struct {
start pageID // starting page number start pageID // starting page number
npages uintptr // number of pages in span npages uintptr // number of pages in span
freelist gclinkptr // list of free objects for _MSpanInUse stackfreelist gclinkptr // list of free stacks, avoids overloading freelist
stackfreelist gclinkptr // list of free stacks, avoids overloading freelist for _MSpanStack
// freeindex is the slot index between 0 and nelems at which to begin scanning // freeindex is the slot index between 0 and nelems at which to begin scanning
// for the next free object in this span. // for the next free object in this span.
...@@ -472,7 +471,6 @@ func (h *mheap) alloc_m(npage uintptr, sizeclass int32, large bool) *mspan { ...@@ -472,7 +471,6 @@ func (h *mheap) alloc_m(npage uintptr, sizeclass int32, large bool) *mspan {
// able to map interior pointer to containing span. // able to map interior pointer to containing span.
atomic.Store(&s.sweepgen, h.sweepgen) atomic.Store(&s.sweepgen, h.sweepgen)
s.state = _MSpanInUse s.state = _MSpanInUse
s.freelist = 0
s.ref = 0 s.ref = 0
s.sizeclass = uint8(sizeclass) s.sizeclass = uint8(sizeclass)
if sizeclass == 0 { if sizeclass == 0 {
...@@ -914,7 +912,6 @@ func (span *mspan) init(start pageID, npages uintptr) { ...@@ -914,7 +912,6 @@ func (span *mspan) init(start pageID, npages uintptr) {
span.list = nil span.list = nil
span.start = start span.start = start
span.npages = npages span.npages = npages
span.freelist = 0
span.ref = 0 span.ref = 0
span.sizeclass = 0 span.sizeclass = 0
span.incache = false span.incache = false
...@@ -925,6 +922,17 @@ func (span *mspan) init(start pageID, npages uintptr) { ...@@ -925,6 +922,17 @@ func (span *mspan) init(start pageID, npages uintptr) {
span.speciallock.key = 0 span.speciallock.key = 0
span.specials = nil span.specials = nil
span.needzero = 0 span.needzero = 0
span.freeindex = 0
span.allocBits = &span.markbits1
span.gcmarkBits = &span.markbits2
// determine if this is actually needed. It is once / span so it
// isn't expensive. This is to be replaced by an arena
// based system where things can be cleared all at once so
// don't worry about optimizing this.
for i := 0; i < len(span.markbits1); i++ {
span.allocBits[i] = 0
span.gcmarkBits[i] = 0
}
} }
func (span *mspan) inList() bool { func (span *mspan) inList() bool {
......
...@@ -1137,7 +1137,6 @@ func freeStackSpans() { ...@@ -1137,7 +1137,6 @@ func freeStackSpans() {
next := s.next next := s.next
if s.ref == 0 { if s.ref == 0 {
list.remove(s) list.remove(s)
s.freelist = 0
s.stackfreelist = 0 s.stackfreelist = 0
mheap_.freeStack(s) mheap_.freeStack(s)
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment