Commit 56b54912 authored by Rick Hudson's avatar Rick Hudson

Merge remote-tracking branch 'origin/dev.garbage'

This commit moves the GC from free list allocation to
bit mark allocation. Instead of using the bitmaps
generated during the mark phases to generate free
list and then using the free lists for allocation we
allocate directly from the bitmaps.

The change in the garbage benchmark

name              old time/op  new time/op  delta
XBenchGarbage-12  2.22ms ± 1%  2.13ms ± 1%  -3.90%  (p=0.000 n=18+18)

Change-Id: I17f57233336f0ca5ef5404c3be4ecb443ab622aa
parents d8d33514 e9eaa181
Reviving dev.garbage branch for use in new garbage collection experiment.
......@@ -529,7 +529,7 @@ func cgoCheckUnknownPointer(p unsafe.Pointer, msg string) (base, i uintptr) {
return
}
b, hbits, span := heapBitsForObject(uintptr(p), 0, 0)
b, hbits, span, _ := heapBitsForObject(uintptr(p), 0, 0)
base = b
if base == 0 {
return
......
......@@ -447,7 +447,7 @@ func dumproots() {
continue
}
spf := (*specialfinalizer)(unsafe.Pointer(sp))
p := unsafe.Pointer((uintptr(s.start) << _PageShift) + uintptr(spf.special.offset))
p := unsafe.Pointer(s.base() + uintptr(spf.special.offset))
dumpfinalizer(p, spf.fn, spf.fint, spf.ot)
}
}
......@@ -467,15 +467,19 @@ func dumpobjs() {
if s.state != _MSpanInUse {
continue
}
p := uintptr(s.start << _PageShift)
p := s.base()
size := s.elemsize
n := (s.npages << _PageShift) / size
if n > uintptr(len(freemark)) {
throw("freemark array doesn't have enough entries")
}
for l := s.freelist; l.ptr() != nil; l = l.ptr().next {
freemark[(uintptr(l)-p)/size] = true
for freeIndex := s.freeindex; freeIndex < s.nelems; freeIndex++ {
if s.isFree(freeIndex) {
freemark[freeIndex] = true
}
}
for j := uintptr(0); j < n; j, p = j+1, p+size {
if freemark[j] {
freemark[j] = false
......@@ -615,7 +619,7 @@ func dumpmemprof() {
continue
}
spp := (*specialprofile)(unsafe.Pointer(sp))
p := uintptr(s.start<<_PageShift) + uintptr(spp.special.offset)
p := s.base() + uintptr(spp.special.offset)
dumpint(tagAllocSample)
dumpint(uint64(p))
dumpint(uint64(uintptr(unsafe.Pointer(spp.b))))
......@@ -710,7 +714,7 @@ func makeheapobjbv(p uintptr, size uintptr) bitvector {
i := uintptr(0)
hbits := heapBitsForAddr(p)
for ; i < nptr; i++ {
if i >= 2 && !hbits.isMarked() {
if i >= 2 && !hbits.morePointers() {
break // end of object
}
if hbits.isPointer() {
......
......@@ -94,6 +94,9 @@ const (
pageShift = _PageShift
pageSize = _PageSize
pageMask = _PageMask
// By construction, single page spans of the smallest object class
// have the most objects per span.
maxObjsPerSpan = pageSize / 8
mSpanInUse = _MSpanInUse
......@@ -167,9 +170,6 @@ const (
_MaxGcproc = 32
)
// Page number (address>>pageShift)
type pageID uintptr
const _MaxArena32 = 2 << 30
// OS-defined helpers:
......@@ -384,6 +384,10 @@ func sysReserveHigh(n uintptr, reserved *bool) unsafe.Pointer {
return sysReserve(nil, n, reserved)
}
// sysAlloc allocates the next n bytes from the heap arena. The
// returned pointer is always _PageSize aligned and between
// h.arena_start and h.arena_end. sysAlloc returns nil on failure.
// There is no corresponding free function.
func (h *mheap) sysAlloc(n uintptr) unsafe.Pointer {
if n > h.arena_end-h.arena_used {
// We are in 32-bit mode, maybe we didn't use all possible address space yet.
......@@ -484,6 +488,65 @@ func (h *mheap) sysAlloc(n uintptr) unsafe.Pointer {
// base address for all 0-byte allocations
var zerobase uintptr
// nextFreeFast returns the next free object if one is quickly available.
// Otherwise it returns 0.
func nextFreeFast(s *mspan) gclinkptr {
theBit := sys.Ctz64(s.allocCache) // Is there a free object in the allocCache?
if theBit < 64 {
result := s.freeindex + uintptr(theBit)
if result < s.nelems {
freeidx := result + 1
if freeidx%64 == 0 && freeidx != s.nelems {
return 0
}
s.allocCache >>= (theBit + 1)
s.freeindex = freeidx
v := gclinkptr(result*s.elemsize + s.base())
s.allocCount++
return v
}
}
return 0
}
// nextFree returns the next free object from the cached span if one is available.
// Otherwise it refills the cache with a span with an available object and
// returns that object along with a flag indicating that this was a heavy
// weight allocation. If it is a heavy weight allocation the caller must
// determine whether a new GC cycle needs to be started or if the GC is active
// whether this goroutine needs to assist the GC.
func (c *mcache) nextFree(sizeclass int8) (v gclinkptr, s *mspan, shouldhelpgc bool) {
s = c.alloc[sizeclass]
shouldhelpgc = false
freeIndex := s.nextFreeIndex()
if freeIndex == s.nelems {
// The span is full.
if uintptr(s.allocCount) != s.nelems {
println("runtime: s.allocCount=", s.allocCount, "s.nelems=", s.nelems)
throw("s.allocCount != s.nelems && freeIndex == s.nelems")
}
systemstack(func() {
c.refill(int32(sizeclass))
})
shouldhelpgc = true
s = c.alloc[sizeclass]
freeIndex = s.nextFreeIndex()
}
if freeIndex >= s.nelems {
throw("freeIndex is not valid")
}
v = gclinkptr(freeIndex*s.elemsize + s.base())
s.allocCount++
if uintptr(s.allocCount) > s.nelems {
println("s.allocCount=", s.allocCount, "s.nelems=", s.nelems)
throw("s.allocCount > s.nelems")
}
return
}
// Allocate an object of size bytes.
// Small objects are allocated from the per-P cache's free lists.
// Large objects (> 32 kB) are allocated straight from the heap.
......@@ -538,7 +601,6 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
shouldhelpgc := false
dataSize := size
c := gomcache()
var s *mspan
var x unsafe.Pointer
noscan := typ == nil || typ.kind&kindNoPointers != 0
if size <= maxSmallSize {
......@@ -591,20 +653,11 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
return x
}
// Allocate a new maxTinySize block.
s = c.alloc[tinySizeClass]
v := s.freelist
if v.ptr() == nil {
systemstack(func() {
c.refill(tinySizeClass)
})
shouldhelpgc = true
s = c.alloc[tinySizeClass]
v = s.freelist
span := c.alloc[tinySizeClass]
v := nextFreeFast(span)
if v == 0 {
v, _, shouldhelpgc = c.nextFree(tinySizeClass)
}
s.freelist = v.ptr().next
s.ref++
// prefetchnta offers best performance, see change list message.
prefetchnta(uintptr(v.ptr().next))
x = unsafe.Pointer(v)
(*[2]uint64)(x)[0] = 0
(*[2]uint64)(x)[1] = 0
......@@ -623,26 +676,14 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
sizeclass = size_to_class128[(size-1024+127)>>7]
}
size = uintptr(class_to_size[sizeclass])
s = c.alloc[sizeclass]
v := s.freelist
if v.ptr() == nil {
systemstack(func() {
c.refill(int32(sizeclass))
})
shouldhelpgc = true
s = c.alloc[sizeclass]
v = s.freelist
span := c.alloc[sizeclass]
v := nextFreeFast(span)
if v == 0 {
v, span, shouldhelpgc = c.nextFree(sizeclass)
}
s.freelist = v.ptr().next
s.ref++
// prefetchnta offers best performance, see change list message.
prefetchnta(uintptr(v.ptr().next))
x = unsafe.Pointer(v)
if needzero {
v.ptr().next = 0
if size > 2*sys.PtrSize && ((*[2]uintptr)(x))[1] != 0 {
memclr(unsafe.Pointer(v), size)
}
if needzero && span.needzero != 0 {
memclr(unsafe.Pointer(v), size)
}
}
} else {
......@@ -651,13 +692,15 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
systemstack(func() {
s = largeAlloc(size, needzero)
})
x = unsafe.Pointer(uintptr(s.start << pageShift))
s.freeindex = 1
s.allocCount = 1
x = unsafe.Pointer(s.base())
size = s.elemsize
}
var scanSize uintptr
if noscan {
// All objects are pre-marked as noscan. Nothing to do.
heapBitsSetTypeNoScan(uintptr(x), size)
} else {
// If allocating a defer+arg block, now that we've picked a malloc size
// large enough to hold everything, cut the "asked for" size down to
......@@ -701,6 +744,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
if raceenabled {
racemalloc(x, size)
}
if msanenabled {
msanmalloc(x, size)
}
......@@ -755,8 +799,8 @@ func largeAlloc(size uintptr, needzero bool) *mspan {
if s == nil {
throw("out of memory")
}
s.limit = uintptr(s.start)<<_PageShift + size
heapBitsForSpan(s.base()).initSpan(s.layout())
s.limit = s.base() + size
heapBitsForSpan(s.base()).initSpan(s)
return s
}
......
......@@ -24,7 +24,7 @@
// In each 2-bit entry, the lower bit holds the same information as in the 1-bit
// bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC.
// The meaning of the high bit depends on the position of the word being described
// in its allocated object. In the first word, the high bit is the GC ``marked'' bit.
// in its allocated object. In the first word, the high bit is unused.
// In the second word, the high bit is the GC ``checkmarked'' bit (see below).
// In the third and later words, the high bit indicates that the object is still
// being described. In these words, if a bit pair with a high bit 0 is encountered,
......@@ -33,12 +33,13 @@
// in the object are uninteresting to the garbage collector.
//
// The 2-bit entries are split when written into the byte, so that the top half
// of the byte contains 4 mark bits and the bottom half contains 4 pointer bits.
// of the byte contains 4 high bits and the bottom half contains 4 low (pointer)
// bits.
// This form allows a copy from the 1-bit to the 4-bit form to keep the
// pointer bits contiguous, instead of having to space them out.
//
// The code makes use of the fact that the zero value for a heap bitmap
// has no live pointer bit set and is (depending on position), not marked,
// has no live pointer bit set and is (depending on position), not used,
// not checkmarked, and is the dead encoding.
// These properties must be preserved when modifying the encoding.
//
......@@ -63,6 +64,7 @@
// It is still used in general, except in checkmark the type bit is repurposed
// as the checkmark bit and then reinitialized (to 1) as the type bit when
// finished.
//
package runtime
......@@ -95,6 +97,8 @@ func addb(p *byte, n uintptr) *byte {
}
// subtractb returns the byte pointer p-n.
// subtractb is typically used when traversing the pointer tables referred to by hbits
// which are arranged in reverse order.
//go:nowritebarrier
//go:nosplit
func subtractb(p *byte, n uintptr) *byte {
......@@ -115,6 +119,8 @@ func add1(p *byte) *byte {
}
// subtract1 returns the byte pointer p-1.
// subtract1 is typically used when traversing the pointer tables referred to by hbits
// which are arranged in reverse order.
//go:nowritebarrier
//
// nosplit because it is used during write barriers and must not be preempted.
......@@ -161,6 +167,193 @@ type heapBits struct {
shift uint32
}
// markBits provides access to the mark bit for an object in the heap.
// bytep points to the byte holding the mark bit.
// mask is a byte with a single bit set that can be &ed with *bytep
// to see if the bit has been set.
// *m.byte&m.mask != 0 indicates the mark bit is set.
// index can be used along with span information to generate
// the address of the object in the heap.
// We maintain one set of mark bits for allocation and one for
// marking purposes.
type markBits struct {
bytep *uint8
mask uint8
index uintptr
}
//go:nosplit
func (s *mspan) allocBitsForIndex(allocBitIndex uintptr) markBits {
whichByte := allocBitIndex / 8
whichBit := allocBitIndex % 8
bytePtr := addb(s.allocBits, whichByte)
return markBits{bytePtr, uint8(1 << whichBit), allocBitIndex}
}
// refillaCache takes 8 bytes s.allocBits starting at whichByte
// and negates them so that ctz (count trailing zeros) instructions
// can be used. It then places these 8 bytes into the cached 64 bit
// s.allocCache.
func (s *mspan) refillAllocCache(whichByte uintptr) {
bytes := (*[8]uint8)(unsafe.Pointer(addb(s.allocBits, whichByte)))
aCache := uint64(0)
aCache |= uint64(bytes[0])
aCache |= uint64(bytes[1]) << (1 * 8)
aCache |= uint64(bytes[2]) << (2 * 8)
aCache |= uint64(bytes[3]) << (3 * 8)
aCache |= uint64(bytes[4]) << (4 * 8)
aCache |= uint64(bytes[5]) << (5 * 8)
aCache |= uint64(bytes[6]) << (6 * 8)
aCache |= uint64(bytes[7]) << (7 * 8)
s.allocCache = ^aCache
}
// nextFreeIndex returns the index of the next free object in s at
// or after s.freeindex.
// There are hardware instructions that can be used to make this
// faster if profiling warrants it.
func (s *mspan) nextFreeIndex() uintptr {
sfreeindex := s.freeindex
snelems := s.nelems
if sfreeindex == snelems {
return sfreeindex
}
if sfreeindex > snelems {
throw("s.freeindex > s.nelems")
}
aCache := s.allocCache
bitIndex := sys.Ctz64(aCache)
for bitIndex == 64 {
// Move index to start of next cached bits.
sfreeindex = (sfreeindex + 64) &^ (64 - 1)
if sfreeindex >= snelems {
s.freeindex = snelems
return snelems
}
whichByte := sfreeindex / 8
// Refill s.allocCache with the next 64 alloc bits.
s.refillAllocCache(whichByte)
aCache = s.allocCache
bitIndex = sys.Ctz64(aCache)
// nothing available in cached bits
// grab the next 8 bytes and try again.
}
result := sfreeindex + uintptr(bitIndex)
if result >= snelems {
s.freeindex = snelems
return snelems
}
s.allocCache >>= (bitIndex + 1)
sfreeindex = result + 1
if sfreeindex%64 == 0 && sfreeindex != snelems {
// We just incremented s.freeindex so it isn't 0.
// As each 1 in s.allocCache was encountered and used for allocation
// it was shifted away. At this point s.allocCache contains all 0s.
// Refill s.allocCache so that it corresponds
// to the bits at s.allocBits starting at s.freeindex.
whichByte := sfreeindex / 8
s.refillAllocCache(whichByte)
}
s.freeindex = sfreeindex
return result
}
func (s *mspan) isFree(index uintptr) bool {
whichByte := index / 8
whichBit := index % 8
byteVal := *addb(s.allocBits, whichByte)
return byteVal&uint8(1<<whichBit) == 0
}
func (s *mspan) objIndex(p uintptr) uintptr {
byteOffset := p - s.base()
if byteOffset == 0 {
return 0
}
if s.baseMask != 0 {
// s.baseMask is 0, elemsize is a power of two, so shift by s.divShift
return byteOffset >> s.divShift
}
return uintptr(((uint64(byteOffset) >> s.divShift) * uint64(s.divMul)) >> s.divShift2)
}
func markBitsForAddr(p uintptr) markBits {
s := spanOf(p)
objIndex := s.objIndex(p)
return s.markBitsForIndex(objIndex)
}
func (s *mspan) markBitsForIndex(objIndex uintptr) markBits {
whichByte := objIndex / 8
bitMask := uint8(1 << (objIndex % 8)) // low 3 bits hold the bit index
bytePtr := addb(s.gcmarkBits, whichByte)
return markBits{bytePtr, bitMask, objIndex}
}
func (s *mspan) markBitsForBase() markBits {
return markBits{s.gcmarkBits, uint8(1), 0}
}
// isMarked reports whether mark bit m is set.
func (m markBits) isMarked() bool {
return *m.bytep&m.mask != 0
}
// setMarked sets the marked bit in the markbits, atomically. Some compilers
// are not able to inline atomic.Or8 function so if it appears as a hot spot consider
// inlining it manually.
func (m markBits) setMarked() {
// Might be racing with other updates, so use atomic update always.
// We used to be clever here and use a non-atomic update in certain
// cases, but it's not worth the risk.
atomic.Or8(m.bytep, m.mask)
}
// setMarkedNonAtomic sets the marked bit in the markbits, non-atomically.
func (m markBits) setMarkedNonAtomic() {
*m.bytep |= m.mask
}
// clearMarked clears the marked bit in the markbits, atomically.
func (m markBits) clearMarked() {
// Might be racing with other updates, so use atomic update always.
// We used to be clever here and use a non-atomic update in certain
// cases, but it's not worth the risk.
atomic.And8(m.bytep, ^m.mask)
}
// clearMarkedNonAtomic clears the marked bit non-atomically.
func (m markBits) clearMarkedNonAtomic() {
*m.bytep ^= m.mask
}
// markBitsForSpan returns the markBits for the span base address base.
func markBitsForSpan(base uintptr) (mbits markBits) {
if base < mheap_.arena_start || base >= mheap_.arena_used {
throw("heapBitsForSpan: base out of range")
}
mbits = markBitsForAddr(base)
if mbits.mask != 1 {
throw("markBitsForSpan: unaligned start")
}
return mbits
}
// advance advances the markBits to the next object in the span.
func (m *markBits) advance() {
if m.mask == 1<<7 {
m.bytep = (*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(m.bytep)) + 1))
m.mask = 1
} else {
m.mask = m.mask << 1
}
m.index++
}
// heapBitsForAddr returns the heapBits for the address addr.
// The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used).
//
......@@ -177,15 +370,12 @@ func heapBitsForSpan(base uintptr) (hbits heapBits) {
if base < mheap_.arena_start || base >= mheap_.arena_used {
throw("heapBitsForSpan: base out of range")
}
hbits = heapBitsForAddr(base)
if hbits.shift != 0 {
throw("heapBitsForSpan: unaligned start")
}
return hbits
return heapBitsForAddr(base)
}
// heapBitsForObject returns the base address for the heap object
// containing the address p, along with the heapBits for base.
// containing the address p, the heapBits for base,
// the object's span, and of the index of the object in s.
// If p does not point into a heap object,
// return base == 0
// otherwise return the base of the object.
......@@ -193,7 +383,7 @@ func heapBitsForSpan(base uintptr) (hbits heapBits) {
// refBase and refOff optionally give the base address of the object
// in which the pointer p was found and the byte offset at which it
// was found. These are used for error reporting.
func heapBitsForObject(p, refBase, refOff uintptr) (base uintptr, hbits heapBits, s *mspan) {
func heapBitsForObject(p, refBase, refOff uintptr) (base uintptr, hbits heapBits, s *mspan, objIndex uintptr) {
arenaStart := mheap_.arena_start
if p < arenaStart || p >= mheap_.arena_used {
return
......@@ -202,9 +392,8 @@ func heapBitsForObject(p, refBase, refOff uintptr) (base uintptr, hbits heapBits
idx := off >> _PageShift
// p points into the heap, but possibly to the middle of an object.
// Consult the span table to find the block beginning.
k := p >> _PageShift
s = h_spans[idx]
if s == nil || pageID(k) < s.start || p >= s.limit || s.state != mSpanInUse {
if s == nil || p < s.base() || p >= s.limit || s.state != mSpanInUse {
if s == nil || s.state == _MSpanStack {
// If s is nil, the virtual address has never been part of the heap.
// This pointer may be to some mmap'd region, so we allow it.
......@@ -230,7 +419,7 @@ func heapBitsForObject(p, refBase, refOff uintptr) (base uintptr, hbits heapBits
} else {
print(" to unused region of span")
}
print("idx=", hex(idx), " span.start=", hex(s.start<<_PageShift), " span.limit=", hex(s.limit), " span.state=", s.state, "\n")
print("idx=", hex(idx), " span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", s.state, "\n")
if refBase != 0 {
print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n")
gcDumpObject("object", refBase, refOff)
......@@ -245,6 +434,7 @@ func heapBitsForObject(p, refBase, refOff uintptr) (base uintptr, hbits heapBits
// optimize for power of 2 sized objects.
base = s.base()
base = base + (p-base)&s.baseMask
objIndex = (base - s.base()) >> s.divShift
// base = p & s.baseMask is faster for small spans,
// but doesn't work for large spans.
// Overall, it's faster to use the more general computation above.
......@@ -252,8 +442,8 @@ func heapBitsForObject(p, refBase, refOff uintptr) (base uintptr, hbits heapBits
base = s.base()
if p-base >= s.elemsize {
// n := (p - base) / s.elemsize, using division by multiplication
n := uintptr(uint64(p-base) >> s.divShift * uint64(s.divMul) >> s.divShift2)
base += n * s.elemsize
objIndex = uintptr(uint64(p-base) >> s.divShift * uint64(s.divMul) >> s.divShift2)
base += objIndex * s.elemsize
}
}
// Now that we know the actual base, compute heapBits to return to caller.
......@@ -298,28 +488,13 @@ func (h heapBits) bits() uint32 {
return uint32(*h.bitp) >> (h.shift & 31)
}
// isMarked reports whether the heap bits have the marked bit set.
// h must describe the initial word of the object.
func (h heapBits) isMarked() bool {
// morePointers returns true if this word and all remaining words in this object
// are scalars.
// h must not describe the first or second word of the object.
func (h heapBits) morePointers() bool {
return *h.bitp&(bitMarked<<h.shift) != 0
}
// setMarked sets the marked bit in the heap bits, atomically.
// h must describe the initial word of the object.
func (h heapBits) setMarked() {
// Each byte of GC bitmap holds info for four words.
// Might be racing with other updates, so use atomic update always.
// We used to be clever here and use a non-atomic update in certain
// cases, but it's not worth the risk.
atomic.Or8(h.bitp, bitMarked<<h.shift)
}
// setMarkedNonAtomic sets the marked bit in the heap bits, non-atomically.
// h must describe the initial word of the object.
func (h heapBits) setMarkedNonAtomic() {
*h.bitp |= bitMarked << h.shift
}
// isPointer reports whether the heap bits describe a pointer word.
// h must describe the initial word of the object.
//
......@@ -549,7 +724,19 @@ func typeBitsBulkBarrier(typ *_type, p, size uintptr) {
// If this is a span of pointer-sized objects, it initializes all
// words to pointer (and there are no dead bits).
// Otherwise, it initializes all words to scalar/dead.
func (h heapBits) initSpan(size, n, total uintptr) {
func (h heapBits) initSpan(s *mspan) {
size, n, total := s.layout()
// Init the markbit structures
s.freeindex = 0
s.allocCache = ^uint64(0) // all 1s indicating all free.
s.nelems = n
s.allocBits = nil
s.gcmarkBits = nil
s.gcmarkBits = newMarkBits(s.nelems)
s.allocBits = newAllocBits(s.nelems)
// Clear bits corresponding to objects.
if total%heapBitmapScale != 0 {
throw("initSpan: unaligned length")
}
......@@ -610,106 +797,60 @@ func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
}
}
// heapBitsSweepSpan coordinates the sweeping of a span by reading
// and updating the corresponding heap bitmap entries.
// For each free object in the span, heapBitsSweepSpan sets the type
// bits for the first four words (less for smaller objects) to scalar/dead
// and then calls f(p), where p is the object's base address.
// f is expected to add the object to a free list.
// For non-free objects, heapBitsSweepSpan turns off the marked bit.
func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) {
h := heapBitsForSpan(base)
switch {
default:
throw("heapBitsSweepSpan")
case sys.PtrSize == 8 && size == sys.PtrSize:
// Consider mark bits in all four 2-bit entries of each bitmap byte.
bitp := h.bitp
for i := uintptr(0); i < n; i += 4 {
x := uint32(*bitp)
// Note that unlike the other size cases, we leave the pointer bits set here.
// These are initialized during initSpan when the span is created and left
// in place the whole time the span is used for pointer-sized objects.
// That lets heapBitsSetType avoid an atomic update to set the pointer bit
// during allocation.
if x&bitMarked != 0 {
x &^= bitMarked
} else {
f(base + i*sys.PtrSize)
}
if x&(bitMarked<<heapBitsShift) != 0 {
x &^= bitMarked << heapBitsShift
} else {
f(base + (i+1)*sys.PtrSize)
}
if x&(bitMarked<<(2*heapBitsShift)) != 0 {
x &^= bitMarked << (2 * heapBitsShift)
} else {
f(base + (i+2)*sys.PtrSize)
}
if x&(bitMarked<<(3*heapBitsShift)) != 0 {
x &^= bitMarked << (3 * heapBitsShift)
} else {
f(base + (i+3)*sys.PtrSize)
}
*bitp = uint8(x)
bitp = subtract1(bitp)
}
case size%(4*sys.PtrSize) == 0:
// Mark bit is in first word of each object.
// Each object starts at bit 0 of a heap bitmap byte.
bitp := h.bitp
step := size / heapBitmapScale
for i := uintptr(0); i < n; i++ {
x := uint32(*bitp)
if x&bitMarked != 0 {
x &^= bitMarked
} else {
x = 0
f(base + i*size)
}
*bitp = uint8(x)
bitp = subtractb(bitp, step)
}
case size%(4*sys.PtrSize) == 2*sys.PtrSize:
// Mark bit is in first word of each object,
// but every other object starts halfway through a heap bitmap byte.
// Unroll loop 2x to handle alternating shift count and step size.
bitp := h.bitp
step := size / heapBitmapScale
var i uintptr
for i = uintptr(0); i < n; i += 2 {
x := uint32(*bitp)
if x&bitMarked != 0 {
x &^= bitMarked
} else {
x &^= bitMarked | bitPointer | (bitMarked|bitPointer)<<heapBitsShift
f(base + i*size)
if size > 2*sys.PtrSize {
x = 0
}
}
*bitp = uint8(x)
if i+1 >= n {
break
}
bitp = subtractb(bitp, step)
x = uint32(*bitp)
if x&(bitMarked<<(2*heapBitsShift)) != 0 {
x &^= bitMarked << (2 * heapBitsShift)
} else {
x &^= (bitMarked|bitPointer)<<(2*heapBitsShift) | (bitMarked|bitPointer)<<(3*heapBitsShift)
f(base + (i+1)*size)
if size > 2*sys.PtrSize {
*subtract1(bitp) = 0
}
}
*bitp = uint8(x)
bitp = subtractb(bitp, step+1)
}
// oneBitCount is indexed by byte and produces the
// number of 1 bits in that byte. For example 128 has 1 bit set
// and oneBitCount[128] will holds 1.
var oneBitCount = [256]uint8{
0, 1, 1, 2, 1, 2, 2, 3,
1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7,
5, 6, 6, 7, 6, 7, 7, 8}
// countFree runs through the mark bits in a span and counts the number of free objects
// in the span.
// TODO:(rlh) Use popcount intrinsic.
func (s *mspan) countFree() int {
count := 0
maxIndex := s.nelems / 8
for i := uintptr(0); i < maxIndex; i++ {
mrkBits := *addb(s.gcmarkBits, i)
count += int(oneBitCount[mrkBits])
}
if bitsInLastByte := s.nelems % 8; bitsInLastByte != 0 {
mrkBits := *addb(s.gcmarkBits, maxIndex)
mask := uint8((1 << bitsInLastByte) - 1)
bits := mrkBits & mask
count += int(oneBitCount[bits])
}
return int(s.nelems) - count
}
// heapBitsSetType records that the new allocation [x, x+size)
......@@ -739,7 +880,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
// size is sizeof(_defer{}) (at least 6 words) and dataSize may be
// arbitrarily larger.
//
// The checks for size == ptrSize and size == 2*ptrSize can therefore
// The checks for size == sys.PtrSize and size == 2*sys.PtrSize can therefore
// assume that dataSize == size without checking it explicitly.
if sys.PtrSize == 8 && size == sys.PtrSize {
......@@ -779,10 +920,13 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
// (In general the number of instances of typ being allocated is
// dataSize/typ.size.)
if sys.PtrSize == 4 && dataSize == sys.PtrSize {
// 1 pointer.
// 1 pointer object. On 32-bit machines clear the bit for the
// unused second word.
if gcphase == _GCoff {
*h.bitp &^= (bitPointer | bitMarked | ((bitPointer | bitMarked) << heapBitsShift)) << h.shift
*h.bitp |= bitPointer << h.shift
} else {
atomic.And8(h.bitp, ^uint8((bitPointer|bitMarked|((bitPointer|bitMarked)<<heapBitsShift))<<h.shift))
atomic.Or8(h.bitp, bitPointer<<h.shift)
}
} else {
......@@ -795,7 +939,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
}
return
}
// Otherwise typ.size must be 2*ptrSize, and typ.kind&kindGCProg == 0.
// Otherwise typ.size must be 2*sys.PtrSize,
// and typ.kind&kindGCProg == 0.
if doubleCheck {
if typ.size != 2*sys.PtrSize || typ.kind&kindGCProg != 0 {
print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n")
......@@ -805,8 +950,19 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
b := uint32(*ptrmask)
hb := b & 3
if gcphase == _GCoff {
// bitPointer == 1, bitMarked is 1 << 4, heapBitsShift is 1.
// 110011 is shifted h.shift and complemented.
// This clears out the bits that are about to be
// ored into *h.hbitp in the next instructions.
*h.bitp &^= (bitPointer | bitMarked | ((bitPointer | bitMarked) << heapBitsShift)) << h.shift
*h.bitp |= uint8(hb << h.shift)
} else {
// TODO:(rlh) since the GC is not concurrently setting the
// mark bits in the heap map anymore and malloc
// owns the span we are allocating in why does this have
// to be atomic?
atomic.And8(h.bitp, ^uint8((bitPointer|bitMarked|((bitPointer|bitMarked)<<heapBitsShift))<<h.shift))
atomic.Or8(h.bitp, uint8(hb<<h.shift))
}
return
......@@ -920,8 +1076,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
// Replicate ptrmask to fill entire pbits uintptr.
// Doubling and truncating is fewer steps than
// iterating by nb each time. (nb could be 1.)
// Since we loaded typ.ptrdata/ptrSize bits
// but are pretending to have typ.size/ptrSize,
// Since we loaded typ.ptrdata/sys.PtrSize bits
// but are pretending to have typ.size/sys.PtrSize,
// there might be no replication necessary/possible.
pbits = b
endnb = nb
......@@ -1012,13 +1168,15 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
// not with its mark bit. Since there is only one allocation
// from a given span at a time, we should be able to set
// these bits non-atomically. Not worth the risk right now.
hb = (b & 3) << (2 * heapBitsShift)
hb = (b & (bitPointer | bitPointer<<heapBitsShift)) << (2 * heapBitsShift)
b >>= 2
nb -= 2
// Note: no bitMarker in hb because the first two words don't get markers from us.
if gcphase == _GCoff {
*hbitp &^= uint8((bitPointer | (bitPointer << heapBitsShift)) << (2 * heapBitsShift))
*hbitp |= uint8(hb)
} else {
atomic.And8(hbitp, ^(uint8(bitPointer|bitPointer<<heapBitsShift) << (2 * heapBitsShift)))
atomic.Or8(hbitp, uint8(hb))
}
hbitp = subtract1(hbitp)
......@@ -1208,6 +1366,41 @@ Phase4:
}
}
// heapBitsSetTypeNoScan marks x as noscan. For objects with 1 or 2
// words set their bitPointers to off (0).
// All other objects have the first 3 bitPointers set to
// off (0) and the scan word in the third word
// also set to off (0).
func heapBitsSetTypeNoScan(x, size uintptr) {
h := heapBitsForAddr(uintptr(x))
bitp := h.bitp
if sys.PtrSize == 8 && size == sys.PtrSize {
// If this is truely noScan the tinyAlloc logic should have noticed
// and combined such objects.
throw("noscan object is too small")
} else if size%(4*sys.PtrSize) == 0 {
*bitp &^= bitPointer | bitPointer<<heapBitsShift | (bitMarked|bitPointer)<<(2*heapBitsShift)
} else if size%(4*sys.PtrSize) == 2*sys.PtrSize {
if h.shift == 0 {
*bitp &^= (bitPointer | bitPointer<<heapBitsShift)
if size > 2*sys.PtrSize {
*bitp &^= (bitPointer | bitMarked) << (2 * heapBitsShift)
}
} else if h.shift == 2 {
*bitp &^= bitPointer<<(2*heapBitsShift) | bitPointer<<(3*heapBitsShift)
if size > 2*sys.PtrSize {
bitp = subtract1(bitp)
*bitp &^= bitPointer | bitMarked
}
} else {
throw("Type has unrecognized size")
}
} else {
throw("Type has unrecognized size")
}
}
var debugPtrmask struct {
lock mutex
data *byte
......@@ -1301,7 +1494,7 @@ func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize u
// progToPointerMask returns the 1-bit pointer mask output by the GC program prog.
// size the size of the region described by prog, in bytes.
// The resulting bitvector will have no more than size/ptrSize bits.
// The resulting bitvector will have no more than size/sys.PtrSize bits.
func progToPointerMask(prog *byte, size uintptr) bitvector {
n := (size/sys.PtrSize + 7) / 8
x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1]
......@@ -1437,7 +1630,7 @@ Run:
// into a register and use that register for the entire loop
// instead of repeatedly reading from memory.
// Handling fewer than 8 bits here makes the general loop simpler.
// The cutoff is ptrSize*8 - 7 to guarantee that when we add
// The cutoff is sys.PtrSize*8 - 7 to guarantee that when we add
// the pattern to a bit buffer holding at most 7 bits (a partial byte)
// it will not overflow.
src := dst
......@@ -1732,7 +1925,7 @@ func getgcmask(ep interface{}) (mask []byte) {
if hbits.isPointer() {
mask[i/sys.PtrSize] = 1
}
if i >= 2*sys.PtrSize && !hbits.isMarked() {
if i >= 2*sys.PtrSize && !hbits.morePointers() {
mask = mask[:i/sys.PtrSize]
break
}
......
......@@ -108,9 +108,11 @@ func (c *mcache) refill(sizeclass int32) *mspan {
_g_.m.locks++
// Return the current cached span to the central lists.
s := c.alloc[sizeclass]
if s.freelist.ptr() != nil {
throw("refill on a nonempty span")
if uintptr(s.allocCount) != s.nelems {
throw("refill of span with free space remaining")
}
if s != &emptymspan {
s.incache = false
}
......@@ -120,10 +122,11 @@ func (c *mcache) refill(sizeclass int32) *mspan {
if s == nil {
throw("out of memory")
}
if s.freelist.ptr() == nil {
println(s.ref, (s.npages<<_PageShift)/s.elemsize)
throw("empty span")
if uintptr(s.allocCount) == s.nelems {
throw("span has no free space")
}
c.alloc[sizeclass] = s
_g_.m.locks--
return s
......
......@@ -18,7 +18,7 @@ import "runtime/internal/atomic"
type mcentral struct {
lock mutex
sizeclass int32
nonempty mSpanList // list of spans with a free object
nonempty mSpanList // list of spans with a free object, ie a nonempty free list
empty mSpanList // list of spans with no free objects (or cached in an mcache)
}
......@@ -67,7 +67,9 @@ retry:
c.empty.insertBack(s)
unlock(&c.lock)
s.sweep(true)
if s.freelist.ptr() != nil {
freeIndex := s.nextFreeIndex()
if freeIndex != s.nelems {
s.freeindex = freeIndex
goto havespan
}
lock(&c.lock)
......@@ -98,11 +100,11 @@ retry:
// c is unlocked.
havespan:
cap := int32((s.npages << _PageShift) / s.elemsize)
n := cap - int32(s.ref)
if n == 0 {
throw("empty span")
n := cap - int32(s.allocCount)
if n == 0 || s.freeindex == s.nelems || uintptr(s.allocCount) == s.nelems {
throw("span has no free objects")
}
usedBytes := uintptr(s.ref) * s.elemsize
usedBytes := uintptr(s.allocCount) * s.elemsize
if usedBytes > 0 {
reimburseSweepCredit(usedBytes)
}
......@@ -115,10 +117,16 @@ havespan:
// heap_live changed.
gcController.revise()
}
if s.freelist.ptr() == nil {
throw("freelist empty")
}
s.incache = true
freeByteBase := s.freeindex &^ (64 - 1)
whichByte := freeByteBase / 8
// Init alloc bits cache.
s.refillAllocCache(whichByte)
// Adjust the allocCache so that s.freeindex corresponds to the low bit in
// s.allocCache.
s.allocCache >>= s.freeindex % 64
return s
}
......@@ -128,12 +136,12 @@ func (c *mcentral) uncacheSpan(s *mspan) {
s.incache = false
if s.ref == 0 {
throw("uncaching full span")
if s.allocCount == 0 {
throw("uncaching span but s.allocCount == 0")
}
cap := int32((s.npages << _PageShift) / s.elemsize)
n := cap - int32(s.ref)
n := cap - int32(s.allocCount)
if n > 0 {
c.empty.remove(s)
c.nonempty.insert(s)
......@@ -144,22 +152,19 @@ func (c *mcentral) uncacheSpan(s *mspan) {
unlock(&c.lock)
}
// Free n objects from a span s back into the central free list c.
// Called during sweep.
// Returns true if the span was returned to heap. Sets sweepgen to
// the latest generation.
// If preserve=true, don't return the span to heap nor relink in MCentral lists;
// caller takes care of it.
func (c *mcentral) freeSpan(s *mspan, n int32, start gclinkptr, end gclinkptr, preserve bool) bool {
// freeSpan updates c and s after sweeping s.
// It sets s's sweepgen to the latest generation,
// and, based on the number of free objects in s,
// moves s to the appropriate list of c or returns it
// to the heap.
// freeSpan returns true if s was returned to the heap.
// If preserve=true, it does not move s (the caller
// must take care of it).
func (c *mcentral) freeSpan(s *mspan, preserve bool, wasempty bool) bool {
if s.incache {
throw("freespan into cached span")
throw("freeSpan given cached span")
}
// Add the objects back to s's free list.
wasempty := s.freelist.ptr() == nil
end.ptr().next = s.freelist
s.freelist = start
s.ref -= uint16(n)
s.needzero = 1
if preserve {
// preserve is set only when called from MCentral_CacheSpan above,
......@@ -185,21 +190,18 @@ func (c *mcentral) freeSpan(s *mspan, n int32, start gclinkptr, end gclinkptr, p
// lock of c above.)
atomic.Store(&s.sweepgen, mheap_.sweepgen)
if s.ref != 0 {
if s.allocCount != 0 {
unlock(&c.lock)
return false
}
// s is completely freed, return it to the heap.
c.nonempty.remove(s)
s.needzero = 1
s.freelist = 0
unlock(&c.lock)
mheap_.freeSpan(s, 0)
return true
}
// Fetch a new span from the heap and carve into objects for the free list.
// grow allocates a new empty span from the heap and initializes it for c's size class.
func (c *mcentral) grow() *mspan {
npages := uintptr(class_to_allocnpages[c.sizeclass])
size := uintptr(class_to_size[c.sizeclass])
......@@ -210,21 +212,9 @@ func (c *mcentral) grow() *mspan {
return nil
}
p := uintptr(s.start << _PageShift)
p := s.base()
s.limit = p + size*n
head := gclinkptr(p)
tail := gclinkptr(p)
// i==0 iteration already done
for i := uintptr(1); i < n; i++ {
p += size
tail.ptr().next = gclinkptr(p)
tail = gclinkptr(p)
}
if s.freelist.ptr() != nil {
throw("freelist not empty")
}
tail.ptr().next = 0
s.freelist = head
heapBitsForSpan(s.base()).initSpan(s.layout())
heapBitsForSpan(s.base()).initSpan(s)
return s
}
......@@ -402,7 +402,7 @@ func findObject(v unsafe.Pointer) (s *mspan, x unsafe.Pointer, n uintptr) {
if s == nil {
return
}
x = unsafe.Pointer(uintptr(s.start) << pageShift)
x = unsafe.Pointer(s.base())
if uintptr(v) < uintptr(x) || uintptr(v) >= uintptr(unsafe.Pointer(s.limit)) || s.state != mSpanInUse {
s = nil
......
......@@ -360,7 +360,7 @@ func markrootSpans(gcw *gcWork, shard int) {
// retain everything it points to.
spf := (*specialfinalizer)(unsafe.Pointer(sp))
// A finalizer can be set for an inner byte of an object, find object beginning.
p := uintptr(s.start<<_PageShift) + uintptr(spf.special.offset)/s.elemsize*s.elemsize
p := s.base() + uintptr(spf.special.offset)/s.elemsize*s.elemsize
// Mark everything that can be reached from
// the object (but *not* the object itself or
......@@ -962,7 +962,10 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
if blocking {
b = gcw.get()
} else {
b = gcw.tryGet()
b = gcw.tryGetFast()
if b == 0 {
b = gcw.tryGet()
}
}
if b == 0 {
// work barrier reached or tryGet failed.
......@@ -1025,7 +1028,11 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 {
// PREFETCH(wbuf->obj[wbuf.nobj - 3];
// }
//
b := gcw.tryGet()
b := gcw.tryGetFast()
if b == 0 {
b = gcw.tryGet()
}
if b == 0 {
break
}
......@@ -1075,8 +1082,8 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
// Same work as in scanobject; see comments there.
obj := *(*uintptr)(unsafe.Pointer(b + i))
if obj != 0 && arena_start <= obj && obj < arena_used {
if obj, hbits, span := heapBitsForObject(obj, b, i); obj != 0 {
greyobject(obj, b, i, hbits, span, gcw)
if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i); obj != 0 {
greyobject(obj, b, i, hbits, span, gcw, objIndex)
}
}
}
......@@ -1141,8 +1148,8 @@ func scanobject(b uintptr, gcw *gcWork) {
// Check if it points into heap and not back at the current object.
if obj != 0 && arena_start <= obj && obj < arena_used && obj-b >= n {
// Mark the object.
if obj, hbits, span := heapBitsForObject(obj, b, i); obj != 0 {
greyobject(obj, b, i, hbits, span, gcw)
if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i); obj != 0 {
greyobject(obj, b, i, hbits, span, gcw, objIndex)
}
}
}
......@@ -1155,9 +1162,9 @@ func scanobject(b uintptr, gcw *gcWork) {
// Preemption must be disabled.
//go:nowritebarrier
func shade(b uintptr) {
if obj, hbits, span := heapBitsForObject(b, 0, 0); obj != 0 {
if obj, hbits, span, objIndex := heapBitsForObject(b, 0, 0); obj != 0 {
gcw := &getg().m.p.ptr().gcw
greyobject(obj, 0, 0, hbits, span, gcw)
greyobject(obj, 0, 0, hbits, span, gcw, objIndex)
if gcphase == _GCmarktermination || gcBlackenPromptly {
// Ps aren't allowed to cache work during mark
// termination.
......@@ -1170,14 +1177,15 @@ func shade(b uintptr) {
// If it isn't already marked, mark it and enqueue into gcw.
// base and off are for debugging only and could be removed.
//go:nowritebarrierrec
func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork) {
func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork, objIndex uintptr) {
// obj should be start of allocation, and so must be at least pointer-aligned.
if obj&(sys.PtrSize-1) != 0 {
throw("greyobject: obj not pointer-aligned")
}
mbits := span.markBitsForIndex(objIndex)
if useCheckmark {
if !hbits.isMarked() {
if !mbits.isMarked() {
printlock()
print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n")
print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n")
......@@ -1199,11 +1207,11 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork
}
} else {
// If marked we have nothing to do.
if hbits.isMarked() {
if mbits.isMarked() {
return
}
hbits.setMarked()
// mbits.setMarked() // Avoid extra call overhead with manual inlining.
atomic.Or8(mbits.bytep, mbits.mask)
// If this is a noscan object, fast-track it to black
// instead of greying it.
if !hbits.hasPointers(span.elemsize) {
......@@ -1218,8 +1226,9 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork
// Previously we put the obj in an 8 element buffer that is drained at a rate
// to give the PREFETCH time to do its work.
// Use of PREFETCHNTA might be more appropriate than PREFETCH
gcw.put(obj)
if !gcw.putFast(obj) {
gcw.put(obj)
}
}
// gcDumpObject dumps the contents of obj for debugging and marks the
......@@ -1238,7 +1247,7 @@ func gcDumpObject(label string, obj, off uintptr) {
print(" s=nil\n")
return
}
print(" s.start*_PageSize=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), " s.sizeclass=", s.sizeclass, " s.elemsize=", s.elemsize, "\n")
print(" s.base()=", hex(s.base()), " s.limit=", hex(s.limit), " s.sizeclass=", s.sizeclass, " s.elemsize=", s.elemsize, "\n")
skipped := false
for i := uintptr(0); i < s.elemsize; i += sys.PtrSize {
// For big objects, just print the beginning (because
......@@ -1274,7 +1283,7 @@ func gcmarknewobject(obj, size, scanSize uintptr) {
if useCheckmark && !gcBlackenPromptly { // The world should be stopped so this should not happen.
throw("gcmarknewobject called while doing checkmark")
}
heapBitsForAddr(obj).setMarked()
markBitsForAddr(obj).setMarked()
gcw := &getg().m.p.ptr().gcw
gcw.bytesMarked += uint64(size)
gcw.scanWork += int64(scanSize)
......
......@@ -8,7 +8,6 @@ package runtime
import (
"runtime/internal/atomic"
"runtime/internal/sys"
"unsafe"
)
......@@ -52,6 +51,7 @@ func finishsweep_m(stw bool) {
}
}
}
nextMarkBitArenaEpoch()
}
func bgsweep(c chan int) {
......@@ -187,21 +187,16 @@ func (s *mspan) sweep(preserve bool) bool {
res := false
nfree := 0
var head, end gclinkptr
c := _g_.m.mcache
freeToHeap := false
// Mark any free objects in this span so we don't collect them.
sstart := uintptr(s.start << _PageShift)
for link := s.freelist; link.ptr() != nil; link = link.ptr().next {
if uintptr(link) < sstart || s.limit <= uintptr(link) {
// Free list is corrupted.
dumpFreeList(s)
throw("free list corrupted")
}
heapBitsForAddr(uintptr(link)).setMarkedNonAtomic()
}
// The allocBits indicate which unmarked objects don't need to be
// processed since they were free at the end of the last GC cycle
// and were not allocated since then.
// If the allocBits index is >= s.freeindex and the bit
// is not marked then the object remains unallocated
// since the last GC.
// This situation is analogous to being on a freelist.
// Unlink & free special records for any objects we're about to free.
// Two complications here:
......@@ -215,17 +210,18 @@ func (s *mspan) sweep(preserve bool) bool {
special := *specialp
for special != nil {
// A finalizer can be set for an inner byte of an object, find object beginning.
p := uintptr(s.start<<_PageShift) + uintptr(special.offset)/size*size
hbits := heapBitsForAddr(p)
if !hbits.isMarked() {
objIndex := uintptr(special.offset) / size
p := s.base() + objIndex*size
mbits := s.markBitsForIndex(objIndex)
if !mbits.isMarked() {
// This object is not marked and has at least one special record.
// Pass 1: see if it has at least one finalizer.
hasFin := false
endOffset := p - uintptr(s.start<<_PageShift) + size
endOffset := p - s.base() + size
for tmp := special; tmp != nil && uintptr(tmp.offset) < endOffset; tmp = tmp.next {
if tmp.kind == _KindSpecialFinalizer {
// Stop freeing of object if it has a finalizer.
hbits.setMarkedNonAtomic()
mbits.setMarkedNonAtomic()
hasFin = true
break
}
......@@ -234,7 +230,7 @@ func (s *mspan) sweep(preserve bool) bool {
for special != nil && uintptr(special.offset) < endOffset {
// Find the exact byte for which the special was setup
// (as opposed to object beginning).
p := uintptr(s.start<<_PageShift) + uintptr(special.offset)
p := s.base() + uintptr(special.offset)
if special.kind == _KindSpecialFinalizer || !hasFin {
// Splice out special record.
y := special
......@@ -255,67 +251,67 @@ func (s *mspan) sweep(preserve bool) bool {
}
}
// Sweep through n objects of given size starting at p.
// This thread owns the span now, so it can manipulate
// the block bitmap without atomic operations.
size, n, _ := s.layout()
heapBitsSweepSpan(s.base(), size, n, func(p uintptr) {
// At this point we know that we are looking at garbage object
// that needs to be collected.
if debug.allocfreetrace != 0 {
tracefree(unsafe.Pointer(p), size)
}
if msanenabled {
msanfree(unsafe.Pointer(p), size)
if debug.allocfreetrace != 0 {
// Find all newly freed objects. This doesn't have to
// efficient; allocfreetrace has massive overhead.
mbits := s.markBitsForBase()
abits := s.allocBitsForIndex(0)
for i := uintptr(0); i < s.nelems; i++ {
if !mbits.isMarked() && (abits.index < s.freeindex || abits.isMarked()) {
x := s.base() + i*s.elemsize
tracefree(unsafe.Pointer(x), size)
}
mbits.advance()
abits.advance()
}
}
// Reset to allocated+noscan.
if cl == 0 {
// Free large span.
if preserve {
throw("can't preserve large span")
}
s.needzero = 1
// Count the number of free objects in this span.
nfree = s.countFree()
if cl == 0 && nfree != 0 {
s.needzero = 1
freeToHeap = true
}
nalloc := uint16(s.nelems) - uint16(nfree)
nfreed := s.allocCount - nalloc
if nalloc > s.allocCount {
print("runtime: nelems=", s.nelems, " nfree=", nfree, " nalloc=", nalloc, " previous allocCount=", s.allocCount, " nfreed=", nfreed, "\n")
throw("sweep increased allocation count")
}
// Free the span after heapBitsSweepSpan
// returns, since it's not done with the span.
freeToHeap = true
} else {
// Free small object.
if size > 2*sys.PtrSize {
*(*uintptr)(unsafe.Pointer(p + sys.PtrSize)) = uintptrMask & 0xdeaddeaddeaddead // mark as "needs to be zeroed"
} else if size > sys.PtrSize {
*(*uintptr)(unsafe.Pointer(p + sys.PtrSize)) = 0
}
if head.ptr() == nil {
head = gclinkptr(p)
} else {
end.ptr().next = gclinkptr(p)
}
end = gclinkptr(p)
end.ptr().next = gclinkptr(0x0bade5)
nfree++
}
})
s.allocCount = nalloc
wasempty := s.nextFreeIndex() == s.nelems
s.freeindex = 0 // reset allocation index to start of span.
// gcmarkBits becomes the allocBits.
// get a fresh cleared gcmarkBits in preparation for next GC
s.allocBits = s.gcmarkBits
s.gcmarkBits = newMarkBits(s.nelems)
// Initialize alloc bits cache.
s.refillAllocCache(0)
// We need to set s.sweepgen = h.sweepgen only when all blocks are swept,
// because of the potential for a concurrent free/SetFinalizer.
// But we need to set it before we make the span available for allocation
// (return it to heap or mcentral), because allocation code assumes that a
// span is already swept if available for allocation.
if freeToHeap || nfree == 0 {
if freeToHeap || nfreed == 0 {
// The span must be in our exclusive ownership until we update sweepgen,
// check for potential races.
if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
throw("MSpan_Sweep: bad span state after sweep")
}
// Serialization point.
// At this point the mark bits are cleared and allocation ready
// to go so release the span.
atomic.Store(&s.sweepgen, sweepgen)
}
if nfree > 0 {
c.local_nsmallfree[cl] += uintptr(nfree)
res = mheap_.central[cl].mcentral.freeSpan(s, int32(nfree), head, end, preserve)
if nfreed > 0 && cl != 0 {
c.local_nsmallfree[cl] += uintptr(nfreed)
res = mheap_.central[cl].mcentral.freeSpan(s, preserve, wasempty)
// MCentral_FreeSpan updates sweepgen
} else if freeToHeap {
// Free large span to heap
......@@ -336,7 +332,7 @@ func (s *mspan) sweep(preserve bool) bool {
// implement and then call some kind of MHeap_DeleteSpan.
if debug.efence > 0 {
s.limit = 0 // prevent mlookup from finding this span
sysFault(unsafe.Pointer(uintptr(s.start<<_PageShift)), size)
sysFault(unsafe.Pointer(s.base()), size)
} else {
mheap_.freeSpan(s, 1)
}
......@@ -399,27 +395,3 @@ func reimburseSweepCredit(unusableBytes uintptr) {
throw("spanBytesAlloc underflow")
}
}
func dumpFreeList(s *mspan) {
printlock()
print("runtime: free list of span ", s, ":\n")
sstart := uintptr(s.start << _PageShift)
link := s.freelist
for i := 0; i < int(s.npages*_PageSize/s.elemsize); i++ {
if i != 0 {
print(" -> ")
}
print(hex(link))
if link.ptr() == nil {
break
}
if uintptr(link) < sstart || s.limit <= uintptr(link) {
// Bad link. Stop walking before we crash.
print(" (BAD)")
break
}
link = link.ptr().next
}
print("\n")
printunlock()
}
......@@ -116,6 +116,22 @@ func (w *gcWork) put(obj uintptr) {
wbuf.nobj++
}
// putFast does a put and returns true if it can be done quickly
// otherwise it returns false and the caller needs to call put.
//go:nowritebarrier
func (w *gcWork) putFast(obj uintptr) bool {
wbuf := w.wbuf1.ptr()
if wbuf == nil {
return false
} else if wbuf.nobj == len(wbuf.obj) {
return false
}
wbuf.obj[wbuf.nobj] = obj
wbuf.nobj++
return true
}
// tryGet dequeues a pointer for the garbage collector to trace.
//
// If there are no pointers remaining in this gcWork or in the global
......@@ -147,6 +163,23 @@ func (w *gcWork) tryGet() uintptr {
return wbuf.obj[wbuf.nobj]
}
// tryGetFast dequeues a pointer for the garbage collector to trace
// if one is readily available. Otherwise it returns 0 and
// the caller is expected to call tryGet().
//go:nowritebarrier
func (w *gcWork) tryGetFast() uintptr {
wbuf := w.wbuf1.ptr()
if wbuf == nil {
return 0
}
if wbuf.nobj == 0 {
return 0
}
wbuf.nobj--
return wbuf.obj[wbuf.nobj]
}
// get dequeues a pointer for the garbage collector to trace, blocking
// if necessary to ensure all pointers from all queues and caches have
// been retrieved. get returns 0 if there are no pointers remaining.
......
......@@ -117,9 +117,63 @@ type mspan struct {
prev **mspan // previous span's next field, or list head's first field if none
list *mSpanList // For debugging. TODO: Remove.
start pageID // starting page number
npages uintptr // number of pages in span
freelist gclinkptr // list of free objects
startAddr uintptr // address of first byte of span aka s.base()
npages uintptr // number of pages in span
stackfreelist gclinkptr // list of free stacks, avoids overloading freelist
// freeindex is the slot index between 0 and nelems at which to begin scanning
// for the next free object in this span.
// Each allocation scans allocBits starting at freeindex until it encounters a 0
// indicating a free object. freeindex is then adjusted so that subsequent scans begin
// just past the the newly discovered free object.
//
// If freeindex == nelem, this span has no free objects.
//
// allocBits is a bitmap of objects in this span.
// If n >= freeindex and allocBits[n/8] & (1<<(n%8)) is 0
// then object n is free;
// otherwise, object n is allocated. Bits starting at nelem are
// undefined and should never be referenced.
//
// Object n starts at address n*elemsize + (start << pageShift).
freeindex uintptr
// TODO: Look up nelems from sizeclass and remove this field if it
// helps performance.
nelems uintptr // number of object in the span.
// Cache of the allocBits at freeindex. allocCache is shifted
// such that the lowest bit corresponds to the bit freeindex.
// allocCache holds the complement of allocBits, thus allowing
// ctz (count trailing zero) to use it directly.
// allocCache may contain bits beyond s.nelems; the caller must ignore
// these.
allocCache uint64
// allocBits and gcmarkBits hold pointers to a span's mark and
// allocation bits. The pointers are 8 byte aligned.
// There are three arenas where this data is held.
// free: Dirty arenas that are no longer accessed
// and can be reused.
// next: Holds information to be used in the next GC cycle.
// current: Information being used during this GC cycle.
// previous: Information being used during the last GC cycle.
// A new GC cycle starts with the call to finishsweep_m.
// finishsweep_m moves the previous arena to the free arena,
// the current arena to the previous arena, and
// the next arena to the current arena.
// The next arena is populated as the spans request
// memory to hold gcmarkBits for the next GC cycle as well
// as allocBits for newly allocated spans.
//
// The pointer arithmetic is done "by hand" instead of using
// arrays to avoid bounds checks along critical performance
// paths.
// The sweep will free the old allocBits and set allocBits to the
// gcmarkBits. The gcmarkBits are replaced with a fresh zeroed
// out memory.
allocBits *uint8
gcmarkBits *uint8
// sweep generation:
// if sweepgen == h->sweepgen - 2, the span needs sweeping
// if sweepgen == h->sweepgen - 1, the span is currently being swept
......@@ -128,7 +182,7 @@ type mspan struct {
sweepgen uint32
divMul uint32 // for divide by elemsize - divMagic.mul
ref uint16 // capacity - number of objects in freelist
allocCount uint16 // capacity - number of objects in freelist
sizeclass uint8 // size class
incache bool // being used by an mcache
state uint8 // mspaninuse etc
......@@ -145,7 +199,7 @@ type mspan struct {
}
func (s *mspan) base() uintptr {
return uintptr(s.start << _PageShift)
return s.startAddr
}
func (s *mspan) layout() (size, n, total uintptr) {
......@@ -207,11 +261,8 @@ func inheap(b uintptr) bool {
return false
}
// Not a beginning of a block, consult span table to find the block beginning.
k := b >> _PageShift
x := k
x -= mheap_.arena_start >> _PageShift
s := h_spans[x]
if s == nil || pageID(k) < s.start || b >= s.limit || s.state != mSpanInUse {
s := h_spans[(b-mheap_.arena_start)>>_PageShift]
if s == nil || b < s.base() || b >= s.limit || s.state != mSpanInUse {
return false
}
return true
......@@ -261,7 +312,7 @@ func mlookup(v uintptr, base *uintptr, size *uintptr, sp **mspan) int32 {
return 0
}
p := uintptr(s.start) << _PageShift
p := s.base()
if s.sizeclass == 0 {
// Large object.
if base != nil {
......@@ -440,8 +491,7 @@ func (h *mheap) alloc_m(npage uintptr, sizeclass int32, large bool) *mspan {
// able to map interior pointer to containing span.
atomic.Store(&s.sweepgen, h.sweepgen)
s.state = _MSpanInUse
s.freelist = 0
s.ref = 0
s.allocCount = 0
s.sizeclass = uint8(sizeclass)
if sizeclass == 0 {
s.elemsize = s.npages << _PageShift
......@@ -504,7 +554,7 @@ func (h *mheap) alloc(npage uintptr, sizeclass int32, large bool, needzero bool)
if s != nil {
if needzero && s.needzero != 0 {
memclr(unsafe.Pointer(s.start<<_PageShift), s.npages<<_PageShift)
memclr(unsafe.Pointer(s.base()), s.npages<<_PageShift)
}
s.needzero = 0
}
......@@ -520,8 +570,8 @@ func (h *mheap) allocStack(npage uintptr) *mspan {
s := h.allocSpanLocked(npage)
if s != nil {
s.state = _MSpanStack
s.freelist = 0
s.ref = 0
s.stackfreelist = 0
s.allocCount = 0
memstats.stacks_inuse += uint64(s.npages << _PageShift)
}
......@@ -572,7 +622,7 @@ HaveSpan:
throw("still in list")
}
if s.npreleased > 0 {
sysUsed(unsafe.Pointer(s.start<<_PageShift), s.npages<<_PageShift)
sysUsed(unsafe.Pointer(s.base()), s.npages<<_PageShift)
memstats.heap_released -= uint64(s.npreleased << _PageShift)
s.npreleased = 0
}
......@@ -580,10 +630,9 @@ HaveSpan:
if s.npages > npage {
// Trim extra and put it back in the heap.
t := (*mspan)(h.spanalloc.alloc())
t.init(s.start+pageID(npage), s.npages-npage)
t.init(s.base()+npage<<_PageShift, s.npages-npage)
s.npages = npage
p := uintptr(t.start)
p -= (h.arena_start >> _PageShift)
p := (t.base() - h.arena_start) >> _PageShift
if p > 0 {
h_spans[p-1] = s
}
......@@ -597,8 +646,7 @@ HaveSpan:
}
s.unusedsince = 0
p := uintptr(s.start)
p -= (h.arena_start >> _PageShift)
p := (s.base() - h.arena_start) >> _PageShift
for n := uintptr(0); n < npage; n++ {
h_spans[p+n] = s
}
......@@ -626,7 +674,7 @@ func bestFit(list *mSpanList, npage uintptr, best *mspan) *mspan {
if s.npages < npage {
continue
}
if best == nil || s.npages < best.npages || (s.npages == best.npages && s.start < best.start) {
if best == nil || s.npages < best.npages || (s.npages == best.npages && s.base() < best.base()) {
best = s
}
}
......@@ -663,9 +711,8 @@ func (h *mheap) grow(npage uintptr) bool {
// Create a fake "in use" span and free it, so that the
// right coalescing happens.
s := (*mspan)(h.spanalloc.alloc())
s.init(pageID(uintptr(v)>>_PageShift), ask>>_PageShift)
p := uintptr(s.start)
p -= (h.arena_start >> _PageShift)
s.init(uintptr(v), ask>>_PageShift)
p := (s.base() - h.arena_start) >> _PageShift
for i := p; i < p+s.npages; i++ {
h_spans[i] = s
}
......@@ -696,11 +743,8 @@ func (h *mheap) lookupMaybe(v unsafe.Pointer) *mspan {
if uintptr(v) < h.arena_start || uintptr(v) >= h.arena_used {
return nil
}
p := uintptr(v) >> _PageShift
q := p
q -= h.arena_start >> _PageShift
s := h_spans[q]
if s == nil || p < uintptr(s.start) || uintptr(v) >= uintptr(unsafe.Pointer(s.limit)) || s.state != _MSpanInUse {
s := h_spans[(uintptr(v)-h.arena_start)>>_PageShift]
if s == nil || uintptr(v) < s.base() || uintptr(v) >= uintptr(unsafe.Pointer(s.limit)) || s.state != _MSpanInUse {
return nil
}
return s
......@@ -715,6 +759,12 @@ func (h *mheap) freeSpan(s *mspan, acct int32) {
mp.mcache.local_scan = 0
memstats.tinyallocs += uint64(mp.mcache.local_tinyallocs)
mp.mcache.local_tinyallocs = 0
if msanenabled {
// Tell msan that this entire span is no longer in use.
base := unsafe.Pointer(s.base())
bytes := s.npages << _PageShift
msanfree(base, bytes)
}
if acct != 0 {
memstats.heap_objects--
}
......@@ -743,12 +793,12 @@ func (h *mheap) freeStack(s *mspan) {
func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince int64) {
switch s.state {
case _MSpanStack:
if s.ref != 0 {
if s.allocCount != 0 {
throw("MHeap_FreeSpanLocked - invalid stack free")
}
case _MSpanInUse:
if s.ref != 0 || s.sweepgen != h.sweepgen {
print("MHeap_FreeSpanLocked - span ", s, " ptr ", hex(s.start<<_PageShift), " ref ", s.ref, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n")
if s.allocCount != 0 || s.sweepgen != h.sweepgen {
print("MHeap_FreeSpanLocked - span ", s, " ptr ", hex(s.base()), " allocCount ", s.allocCount, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n")
throw("MHeap_FreeSpanLocked - invalid free")
}
h.pagesInUse -= uint64(s.npages)
......@@ -776,12 +826,11 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i
s.npreleased = 0
// Coalesce with earlier, later spans.
p := uintptr(s.start)
p -= h.arena_start >> _PageShift
p := (s.base() - h.arena_start) >> _PageShift
if p > 0 {
t := h_spans[p-1]
if t != nil && t.state == _MSpanFree {
s.start = t.start
s.startAddr = t.startAddr
s.npages += t.npages
s.npreleased = t.npreleased // absorb released pages
s.needzero |= t.needzero
......@@ -831,7 +880,7 @@ func scavengelist(list *mSpanList, now, limit uint64) uintptr {
var sumreleased uintptr
for s := list.first; s != nil; s = s.next {
if (now-uint64(s.unusedsince)) > limit && s.npreleased != s.npages {
start := uintptr(s.start) << _PageShift
start := s.base()
end := start + s.npages<<_PageShift
if sys.PhysPageSize > _PageSize {
// We can only release pages in
......@@ -886,14 +935,13 @@ func runtime_debug_freeOSMemory() {
}
// Initialize a new span with the given start and npages.
func (span *mspan) init(start pageID, npages uintptr) {
func (span *mspan) init(base uintptr, npages uintptr) {
span.next = nil
span.prev = nil
span.list = nil
span.start = start
span.startAddr = base
span.npages = npages
span.freelist = 0
span.ref = 0
span.allocCount = 0
span.sizeclass = 0
span.incache = false
span.elemsize = 0
......@@ -903,6 +951,9 @@ func (span *mspan) init(start pageID, npages uintptr) {
span.speciallock.key = 0
span.specials = nil
span.needzero = 0
span.freeindex = 0
span.allocBits = nil
span.gcmarkBits = nil
}
func (span *mspan) inList() bool {
......@@ -917,7 +968,7 @@ func (list *mSpanList) init() {
func (list *mSpanList) remove(span *mspan) {
if span.prev == nil || span.list != list {
println("failed MSpanList_Remove", span, span.prev, span.list, list)
println("runtime: failed MSpanList_Remove", span, span.prev, span.list, list)
throw("MSpanList_Remove")
}
if span.next != nil {
......@@ -939,7 +990,7 @@ func (list *mSpanList) isEmpty() bool {
func (list *mSpanList) insert(span *mspan) {
if span.next != nil || span.prev != nil || span.list != nil {
println("failed MSpanList_Insert", span, span.next, span.prev, span.list)
println("runtime: failed MSpanList_Insert", span, span.next, span.prev, span.list)
throw("MSpanList_Insert")
}
span.next = list.first
......@@ -998,7 +1049,7 @@ func addspecial(p unsafe.Pointer, s *special) bool {
mp := acquirem()
span.ensureSwept()
offset := uintptr(p) - uintptr(span.start<<_PageShift)
offset := uintptr(p) - span.base()
kind := s.kind
lock(&span.speciallock)
......@@ -1046,7 +1097,7 @@ func removespecial(p unsafe.Pointer, kind uint8) *special {
mp := acquirem()
span.ensureSwept()
offset := uintptr(p) - uintptr(span.start<<_PageShift)
offset := uintptr(p) - span.base()
lock(&span.speciallock)
t := &span.specials
......@@ -1169,3 +1220,117 @@ func freespecial(s *special, p unsafe.Pointer, size uintptr) {
panic("not reached")
}
}
const gcBitsChunkBytes = uintptr(1 << 16)
const gcBitsHeaderBytes = unsafe.Sizeof(gcBitsHeader{})
type gcBitsHeader struct {
free uintptr // free is the index into bits of the next free byte.
next uintptr // *gcBits triggers recursive type bug. (issue 14620)
}
type gcBits struct {
// gcBitsHeader // side step recursive type bug (issue 14620) by including fields by hand.
free uintptr // free is the index into bits of the next free byte.
next *gcBits
bits [gcBitsChunkBytes - gcBitsHeaderBytes]uint8
}
var gcBitsArenas struct {
lock mutex
free *gcBits
next *gcBits
current *gcBits
previous *gcBits
}
// newMarkBits returns a pointer to 8 byte aligned bytes
// to be used for a span's mark bits.
func newMarkBits(nelems uintptr) *uint8 {
lock(&gcBitsArenas.lock)
blocksNeeded := uintptr((nelems + 63) / 64)
bytesNeeded := blocksNeeded * 8
if gcBitsArenas.next == nil ||
gcBitsArenas.next.free+bytesNeeded > uintptr(len(gcBits{}.bits)) {
// Allocate a new arena.
fresh := newArena()
fresh.next = gcBitsArenas.next
gcBitsArenas.next = fresh
}
if gcBitsArenas.next.free >= gcBitsChunkBytes {
println("runtime: gcBitsArenas.next.free=", gcBitsArenas.next.free, gcBitsChunkBytes)
throw("markBits overflow")
}
result := &gcBitsArenas.next.bits[gcBitsArenas.next.free]
gcBitsArenas.next.free += bytesNeeded
unlock(&gcBitsArenas.lock)
return result
}
// newAllocBits returns a pointer to 8 byte aligned bytes
// to be used for this span's alloc bits.
// newAllocBits is used to provide newly initialized spans
// allocation bits. For spans not being initialized the
// the mark bits are repurposed as allocation bits when
// the span is swept.
func newAllocBits(nelems uintptr) *uint8 {
return newMarkBits(nelems)
}
// nextMarkBitArenaEpoch establishes a new epoch for the arenas
// holding the mark bits. The arenas are named relative to the
// current GC cycle which is demarcated by the call to finishweep_m.
//
// All current spans have been swept.
// During that sweep each span allocated room for its gcmarkBits in
// gcBitsArenas.next block. gcBitsArenas.next becomes the gcBitsArenas.current
// where the GC will mark objects and after each span is swept these bits
// will be used to allocate objects.
// gcBitsArenas.current becomes gcBitsArenas.previous where the span's
// gcAllocBits live until all the spans have been swept during this GC cycle.
// The span's sweep extinguishes all the references to gcBitsArenas.previous
// by pointing gcAllocBits into the gcBitsArenas.current.
// The gcBitsArenas.previous is released to the gcBitsArenas.free list.
func nextMarkBitArenaEpoch() {
lock(&gcBitsArenas.lock)
if gcBitsArenas.previous != nil {
if gcBitsArenas.free == nil {
gcBitsArenas.free = gcBitsArenas.previous
} else {
// Find end of previous arenas.
last := gcBitsArenas.previous
for last = gcBitsArenas.previous; last.next != nil; last = last.next {
}
last.next = gcBitsArenas.free
gcBitsArenas.free = gcBitsArenas.previous
}
}
gcBitsArenas.previous = gcBitsArenas.current
gcBitsArenas.current = gcBitsArenas.next
gcBitsArenas.next = nil // newMarkBits calls newArena when needed
unlock(&gcBitsArenas.lock)
}
// newArena allocates and zeroes a gcBits arena.
func newArena() *gcBits {
var result *gcBits
if gcBitsArenas.free == nil {
result = (*gcBits)(sysAlloc(gcBitsChunkBytes, &memstats.gc_sys))
if result == nil {
throw("runtime: cannot allocate memory")
}
} else {
result = gcBitsArenas.free
gcBitsArenas.free = gcBitsArenas.free.next
memclr(unsafe.Pointer(result), gcBitsChunkBytes)
}
result.next = nil
// If result.bits is not 8 byte aligned adjust index so
// that &result.bits[result.free] is 8 byte aligned.
if uintptr(unsafe.Offsetof(gcBits{}.bits))&7 == 0 {
result.free = 0
} else {
result.free = 8 - (uintptr(unsafe.Pointer(&result.bits[0])) & 7)
}
return result
}
......@@ -55,7 +55,7 @@ var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8
func sizeToClass(size int32) int32 {
if size > _MaxSmallSize {
throw("SizeToClass - invalid size")
throw("invalid size")
}
if size > 1024-8 {
return int32(size_to_class128[(size-1024+127)>>7])
......@@ -79,7 +79,7 @@ func initSizes() {
}
}
if align&(align-1) != 0 {
throw("InitSizes - bug")
throw("incorrect alignment")
}
// Make the allocnpages big enough that
......@@ -106,10 +106,18 @@ func initSizes() {
sizeclass++
}
if sizeclass != _NumSizeClasses {
print("sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n")
throw("InitSizes - bad NumSizeClasses")
print("runtime: sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n")
throw("bad NumSizeClasses")
}
// Check maxObjsPerSpan => number of objects invariant.
for i, size := range class_to_size {
if size != 0 && class_to_allocnpages[i]*pageSize/size > maxObjsPerSpan {
throw("span contains too many objects")
}
if size == 0 && i != 0 {
throw("size is 0 but class is not 0")
}
}
// Initialize the size_to_class tables.
nextsize := 0
for sizeclass = 1; sizeclass < _NumSizeClasses; sizeclass++ {
......@@ -128,12 +136,12 @@ func initSizes() {
for n := int32(0); n < _MaxSmallSize; n++ {
sizeclass := sizeToClass(n)
if sizeclass < 1 || sizeclass >= _NumSizeClasses || class_to_size[sizeclass] < n {
print("size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
print("incorrect SizeToClass\n")
goto dump
}
if sizeclass > 1 && class_to_size[sizeclass-1] >= n {
print("size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
print("SizeToClass too big\n")
goto dump
}
......@@ -155,18 +163,18 @@ func initSizes() {
dump:
if true {
print("NumSizeClasses=", _NumSizeClasses, "\n")
print("runtime: NumSizeClasses=", _NumSizeClasses, "\n")
print("runtime·class_to_size:")
for sizeclass = 0; sizeclass < _NumSizeClasses; sizeclass++ {
print(" ", class_to_size[sizeclass], "")
}
print("\n\n")
print("size_to_class8:")
print("runtime: size_to_class8:")
for i := 0; i < len(size_to_class8); i++ {
print(" ", i*8, "=>", size_to_class8[i], "(", class_to_size[size_to_class8[i]], ")\n")
}
print("\n")
print("size_to_class128:")
print("runtime: size_to_class128:")
for i := 0; i < len(size_to_class128); i++ {
print(" ", i*128, "=>", size_to_class128[i], "(", class_to_size[size_to_class128[i]], ")\n")
}
......
......@@ -295,9 +295,9 @@ func updatememstats(stats *gcstats) {
memstats.nmalloc++
memstats.alloc += uint64(s.elemsize)
} else {
memstats.nmalloc += uint64(s.ref)
memstats.by_size[s.sizeclass].nmalloc += uint64(s.ref)
memstats.alloc += uint64(s.ref) * uint64(s.elemsize)
memstats.nmalloc += uint64(s.allocCount)
memstats.by_size[s.sizeclass].nmalloc += uint64(s.allocCount)
memstats.alloc += uint64(s.allocCount) * uint64(s.elemsize)
}
}
unlock(&mheap_.lock)
......
......@@ -191,26 +191,26 @@ func stackpoolalloc(order uint8) gclinkptr {
if s == nil {
throw("out of memory")
}
if s.ref != 0 {
throw("bad ref")
if s.allocCount != 0 {
throw("bad allocCount")
}
if s.freelist.ptr() != nil {
throw("bad freelist")
if s.stackfreelist.ptr() != nil {
throw("bad stackfreelist")
}
for i := uintptr(0); i < _StackCacheSize; i += _FixedStack << order {
x := gclinkptr(uintptr(s.start)<<_PageShift + i)
x.ptr().next = s.freelist
s.freelist = x
x := gclinkptr(s.base() + i)
x.ptr().next = s.stackfreelist
s.stackfreelist = x
}
list.insert(s)
}
x := s.freelist
x := s.stackfreelist
if x.ptr() == nil {
throw("span has no free stacks")
}
s.freelist = x.ptr().next
s.ref++
if s.freelist.ptr() == nil {
s.stackfreelist = x.ptr().next
s.allocCount++
if s.stackfreelist.ptr() == nil {
// all stacks in s are allocated.
list.remove(s)
}
......@@ -223,14 +223,14 @@ func stackpoolfree(x gclinkptr, order uint8) {
if s.state != _MSpanStack {
throw("freeing stack not in a stack span")
}
if s.freelist.ptr() == nil {
if s.stackfreelist.ptr() == nil {
// s will now have a free stack
stackpool[order].insert(s)
}
x.ptr().next = s.freelist
s.freelist = x
s.ref--
if gcphase == _GCoff && s.ref == 0 {
x.ptr().next = s.stackfreelist
s.stackfreelist = x
s.allocCount--
if gcphase == _GCoff && s.allocCount == 0 {
// Span is completely free. Return it to the heap
// immediately if we're sweeping.
//
......@@ -247,7 +247,7 @@ func stackpoolfree(x gclinkptr, order uint8) {
//
// By not freeing, we prevent step #4 until GC is done.
stackpool[order].remove(s)
s.freelist = 0
s.stackfreelist = 0
mheap_.freeStack(s)
}
}
......@@ -391,7 +391,7 @@ func stackalloc(n uint32) (stack, []stkbar) {
throw("out of memory")
}
}
v = unsafe.Pointer(s.start << _PageShift)
v = unsafe.Pointer(s.base())
}
if raceenabled {
......@@ -456,7 +456,7 @@ func stackfree(stk stack, n uintptr) {
} else {
s := mheap_.lookup(v)
if s.state != _MSpanStack {
println(hex(s.start<<_PageShift), v)
println(hex(s.base()), v)
throw("bad span state")
}
if gcphase == _GCoff {
......@@ -1136,9 +1136,9 @@ func freeStackSpans() {
list := &stackpool[order]
for s := list.first; s != nil; {
next := s.next
if s.ref == 0 {
if s.allocCount == 0 {
list.remove(s)
s.freelist = 0
s.stackfreelist = 0
mheap_.freeStack(s)
}
s = next
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment