Commit 3479b065 authored by Rick Hudson's avatar Rick Hudson

[dev.garbage] runtime: allocate directly from GC mark bits

Instead of building a freelist from the mark bits generated
by the GC this CL allocates directly from the mark bits.

The approach moves the mark bits from the pointer/no pointer
heap structures into their own per span data structures. The
mark/allocation vectors consist of a single mark bit per
object. Two vectors are maintained, one for allocation and
one for the GC's mark phase. During the GC cycle's sweep
phase the interpretation of the vectors is swapped. The
mark vector becomes the allocation vector and the old
allocation vector is cleared and becomes the mark vector that
the next GC cycle will use.

Marked entries in the allocation vector indicate that the
object is not free. Each allocation vector maintains a boundary
between areas of the span already allocated from and areas
not yet allocated from. As objects are allocated this boundary
is moved until it reaches the end of the span. At this point
further allocations will be done from another span.

Since we no longer sweep a span inspecting each freed object
the responsibility for maintaining pointer/scalar bits in
the heapBitMap containing is now the responsibility of the
the routines doing the actual allocation.

This CL is functionally complete and ready for performance
tuning.

Change-Id: I336e0fc21eef1066e0b68c7067cc71b9f3d50e04
Reviewed-on: https://go-review.googlesource.com/19470Reviewed-by: default avatarAustin Clements <austin@google.com>
parent dc65a82e
......@@ -472,9 +472,13 @@ func dumpobjs() {
if n > uintptr(len(freemark)) {
throw("freemark array doesn't have enough entries")
}
for l := s.freelist; l.ptr() != nil; l = l.ptr().next {
freemark[(uintptr(l)-p)/size] = true
for freeIndex := s.freeindex; freeIndex < s.nelems; freeIndex++ {
if s.isFree(freeIndex) {
freemark[freeIndex] = true
}
}
for j := uintptr(0); j < n; j, p = j+1, p+size {
if freemark[j] {
freemark[j] = false
......@@ -709,7 +713,7 @@ func makeheapobjbv(p uintptr, size uintptr) bitvector {
i := uintptr(0)
hbits := heapBitsForAddr(p)
for ; i < nptr; i++ {
if i >= 2 && !hbits.isMarked() {
if i >= 2 && !hbits.morePointers() {
break // end of object
}
if hbits.isPointer() {
......
......@@ -502,23 +502,34 @@ const (
// weight allocation. If it is a heavy weight allocation the caller must
// determine whether a new GC cycle needs to be started or if the GC is active
// whether this goroutine needs to assist the GC.
// https://golang.org/cl/5350 motivates why this routine should preform a
// prefetch.
func (c *mcache) nextFree(sizeclass int8) (v gclinkptr, shouldhelpgc bool) {
s := c.alloc[sizeclass]
v = s.freelist
if v.ptr() == nil {
shouldhelpgc = false
freeIndex := s.nextFreeIndex(s.freeindex)
if freeIndex == s.nelems {
// The span is full.
if uintptr(s.ref) != s.nelems {
throw("s.ref != s.nelems && freeIndex == s.nelems")
}
systemstack(func() {
c.refill(int32(sizeclass))
})
shouldhelpgc = true
s = c.alloc[sizeclass]
v = s.freelist
freeIndex = s.nextFreeIndex(s.freeindex)
}
if freeIndex >= s.nelems {
throw("freeIndex is not valid")
}
s.freelist = v.ptr().next
v = gclinkptr(freeIndex*s.elemsize + s.base())
// Advance the freeIndex.
s.freeindex = freeIndex + 1
s.ref++
// prefetchnta offers best performance, see change list message.
prefetchnta(uintptr(v.ptr().next))
if uintptr(s.ref) > s.nelems {
throw("s.ref > s.nelems")
}
return
}
......@@ -655,10 +666,8 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
v, shouldhelpgc = c.nextFree(sizeclass)
x = unsafe.Pointer(v)
if flags&flagNoZero == 0 {
v.ptr().next = 0
if size > 2*sys.PtrSize && ((*[2]uintptr)(x))[1] != 0 {
memclr(unsafe.Pointer(v), size)
}
memclr(unsafe.Pointer(v), size)
// TODO:(rlh) Only clear if object is not known to be zeroed.
}
}
} else {
......@@ -667,12 +676,13 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
systemstack(func() {
s = largeAlloc(size, flags)
})
s.freeindex = 1
x = unsafe.Pointer(uintptr(s.start << pageShift))
size = s.elemsize
}
if flags&flagNoScan != 0 {
// All objects are pre-marked as noscan. Nothing to do.
heapBitsSetTypeNoScan(uintptr(x), size)
} else {
// If allocating a defer+arg block, now that we've picked a malloc size
// large enough to hold everything, cut the "asked for" size down to
......
This diff is collapsed.
......@@ -108,9 +108,11 @@ func (c *mcache) refill(sizeclass int32) *mspan {
_g_.m.locks++
// Return the current cached span to the central lists.
s := c.alloc[sizeclass]
if s.freelist.ptr() != nil {
throw("refill on a nonempty span")
if uintptr(s.ref) != s.nelems {
throw("refill of span with free space remaining")
}
if s != &emptymspan {
s.incache = false
}
......@@ -120,10 +122,11 @@ func (c *mcache) refill(sizeclass int32) *mspan {
if s == nil {
throw("out of memory")
}
if s.freelist.ptr() == nil {
println(s.ref, (s.npages<<_PageShift)/s.elemsize)
throw("empty span")
if uintptr(s.ref) == s.nelems {
throw("span has no free space")
}
c.alloc[sizeclass] = s
_g_.m.locks--
return s
......
......@@ -18,7 +18,7 @@ import "runtime/internal/atomic"
type mcentral struct {
lock mutex
sizeclass int32
nonempty mSpanList // list of spans with a free object
nonempty mSpanList // list of spans with a free object, ie a nonempty free list
empty mSpanList // list of spans with no free objects (or cached in an mcache)
}
......@@ -67,7 +67,9 @@ retry:
c.empty.insertBack(s)
unlock(&c.lock)
s.sweep(true)
if s.freelist.ptr() != nil {
freeIndex := s.nextFreeIndex(0)
if freeIndex != s.nelems {
s.freeindex = freeIndex
goto havespan
}
lock(&c.lock)
......@@ -115,9 +117,6 @@ havespan:
// heap_live changed.
gcController.revise()
}
if s.freelist.ptr() == nil {
throw("freelist empty")
}
s.incache = true
return s
}
......@@ -150,15 +149,11 @@ func (c *mcentral) uncacheSpan(s *mspan) {
// the latest generation.
// If preserve=true, don't return the span to heap nor relink in MCentral lists;
// caller takes care of it.
func (c *mcentral) freeSpan(s *mspan, n int32, start gclinkptr, end gclinkptr, preserve bool) bool {
func (c *mcentral) freeSpan(s *mspan, n int32, start gclinkptr, end gclinkptr, preserve bool, wasempty bool) bool {
if s.incache {
throw("freespan into cached span")
throw("freeSpan given cached span")
}
// Add the objects back to s's free list.
wasempty := s.freelist.ptr() == nil
end.ptr().next = s.freelist
s.freelist = start
s.ref -= uint16(n)
if preserve {
......@@ -190,16 +185,14 @@ func (c *mcentral) freeSpan(s *mspan, n int32, start gclinkptr, end gclinkptr, p
return false
}
// s is completely freed, return it to the heap.
c.nonempty.remove(s)
s.needzero = 1
s.freelist = 0
unlock(&c.lock)
mheap_.freeSpan(s, 0)
return true
}
// Fetch a new span from the heap and carve into objects for the free list.
// grow allocates a new empty span from the heap and initializes it for c's size class.
func (c *mcentral) grow() *mspan {
npages := uintptr(class_to_allocnpages[c.sizeclass])
size := uintptr(class_to_size[c.sizeclass])
......@@ -212,19 +205,7 @@ func (c *mcentral) grow() *mspan {
p := uintptr(s.start << _PageShift)
s.limit = p + size*n
head := gclinkptr(p)
tail := gclinkptr(p)
// i==0 iteration already done
for i := uintptr(1); i < n; i++ {
p += size
tail.ptr().next = gclinkptr(p)
tail = gclinkptr(p)
}
if s.freelist.ptr() != nil {
throw("freelist not empty")
}
tail.ptr().next = 0
s.freelist = head
heapBitsForSpan(s.base()).initSpan(s)
return s
}
......@@ -1044,9 +1044,9 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork
if obj&(sys.PtrSize-1) != 0 {
throw("greyobject: obj not pointer-aligned")
}
mbits := span.markBitsForAddr(obj)
if useCheckmark {
if !hbits.isMarked() {
if !mbits.isMarked() {
printlock()
print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n")
print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n")
......@@ -1068,10 +1068,10 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork
}
} else {
// If marked we have nothing to do.
if hbits.isMarked() {
if mbits.isMarked() {
return
}
hbits.setMarked()
mbits.setMarked()
// If this is a noscan object, fast-track it to black
// instead of greying it.
......@@ -1138,7 +1138,7 @@ func gcmarknewobject_m(obj, size uintptr) {
if useCheckmark && !gcBlackenPromptly { // The world should be stopped so this should not happen.
throw("gcmarknewobject called while doing checkmark")
}
heapBitsForAddr(obj).setMarked()
markBitsForAddr(obj).setMarked()
atomic.Xadd64(&work.bytesMarked, int64(size))
}
......
......@@ -192,16 +192,13 @@ func (s *mspan) sweep(preserve bool) bool {
c := _g_.m.mcache
freeToHeap := false
// Mark any free objects in this span so we don't collect them.
sstart := uintptr(s.start << _PageShift)
for link := s.freelist; link.ptr() != nil; link = link.ptr().next {
if uintptr(link) < sstart || s.limit <= uintptr(link) {
// Free list is corrupted.
dumpFreeList(s)
throw("free list corrupted")
}
heapBitsForAddr(uintptr(link)).setMarkedNonAtomic()
}
// The allocBits indicate which unmarked objects don't need to be
// processed since they were free at the end of the last GC cycle
// and were not allocated since then.
// If the allocBits index is >= s.freeindex and the bit
// is not marked then the object remains unallocated
// since the last GC.
// This situation is analogous to being on a freelist.
// Unlink & free special records for any objects we're about to free.
// Two complications here:
......@@ -216,8 +213,8 @@ func (s *mspan) sweep(preserve bool) bool {
for special != nil {
// A finalizer can be set for an inner byte of an object, find object beginning.
p := uintptr(s.start<<_PageShift) + uintptr(special.offset)/size*size
hbits := heapBitsForAddr(p)
if !hbits.isMarked() {
mbits := s.markBitsForAddr(p)
if !mbits.isMarked() {
// This object is not marked and has at least one special record.
// Pass 1: see if it has at least one finalizer.
hasFin := false
......@@ -225,7 +222,7 @@ func (s *mspan) sweep(preserve bool) bool {
for tmp := special; tmp != nil && uintptr(tmp.offset) < endOffset; tmp = tmp.next {
if tmp.kind == _KindSpecialFinalizer {
// Stop freeing of object if it has a finalizer.
hbits.setMarkedNonAtomic()
mbits.setMarkedNonAtomic()
hasFin = true
break
}
......@@ -259,8 +256,7 @@ func (s *mspan) sweep(preserve bool) bool {
// This thread owns the span now, so it can manipulate
// the block bitmap without atomic operations.
size, n, _ := s.layout()
heapBitsSweepSpan(s.base(), size, n, func(p uintptr) {
nfree = heapBitsSweepSpan(s, func(p uintptr) {
// At this point we know that we are looking at garbage object
// that needs to be collected.
if debug.allocfreetrace != 0 {
......@@ -288,17 +284,18 @@ func (s *mspan) sweep(preserve bool) bool {
} else if size > sys.PtrSize {
*(*uintptr)(unsafe.Pointer(p + sys.PtrSize)) = 0
}
if head.ptr() == nil {
head = gclinkptr(p)
} else {
end.ptr().next = gclinkptr(p)
}
end = gclinkptr(p)
end.ptr().next = gclinkptr(0x0bade5)
nfree++
}
})
wasempty := s.nextFreeIndex(s.freeindex) == s.nelems
s.freeindex = 0 // reset allocation index to start of span.
// Swap role of allocBits with gcmarkBits
// Clear gcmarkBits in preparation for next GC
s.allocBits, s.gcmarkBits = s.gcmarkBits, s.allocBits
s.clearGCMarkBits() // prepare for next GC
// We need to set s.sweepgen = h.sweepgen only when all blocks are swept,
// because of the potential for a concurrent free/SetFinalizer.
// But we need to set it before we make the span available for allocation
......@@ -311,11 +308,14 @@ func (s *mspan) sweep(preserve bool) bool {
print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
throw("MSpan_Sweep: bad span state after sweep")
}
// Serialization point.
// At this point the mark bits are cleared and allocation ready
// to go so release the span.
atomic.Store(&s.sweepgen, sweepgen)
}
if nfree > 0 {
c.local_nsmallfree[cl] += uintptr(nfree)
res = mheap_.central[cl].mcentral.freeSpan(s, int32(nfree), head, end, preserve)
res = mheap_.central[cl].mcentral.freeSpan(s, int32(nfree), head, end, preserve, wasempty)
// MCentral_FreeSpan updates sweepgen
} else if freeToHeap {
// Free large span to heap
......@@ -399,27 +399,3 @@ func reimburseSweepCredit(unusableBytes uintptr) {
throw("spanBytesAlloc underflow")
}
}
func dumpFreeList(s *mspan) {
printlock()
print("runtime: free list of span ", s, ":\n")
sstart := uintptr(s.start << _PageShift)
link := s.freelist
for i := 0; i < int(s.npages*_PageSize/s.elemsize); i++ {
if i != 0 {
print(" -> ")
}
print(hex(link))
if link.ptr() == nil {
break
}
if uintptr(link) < sstart || s.limit <= uintptr(link) {
// Bad link. Stop walking before we crash.
print(" (BAD)")
break
}
link = link.ptr().next
}
print("\n")
printunlock()
}
......@@ -119,8 +119,7 @@ type mspan struct {
start pageID // starting page number
npages uintptr // number of pages in span
freelist gclinkptr // list of free objects for _MSpanInUse
stackfreelist gclinkptr // list of free stacks, avoids overloading freelist for _MSpanStack
stackfreelist gclinkptr // list of free stacks, avoids overloading freelist
// freeindex is the slot index between 0 and nelems at which to begin scanning
// for the next free object in this span.
......@@ -472,7 +471,6 @@ func (h *mheap) alloc_m(npage uintptr, sizeclass int32, large bool) *mspan {
// able to map interior pointer to containing span.
atomic.Store(&s.sweepgen, h.sweepgen)
s.state = _MSpanInUse
s.freelist = 0
s.ref = 0
s.sizeclass = uint8(sizeclass)
if sizeclass == 0 {
......@@ -914,7 +912,6 @@ func (span *mspan) init(start pageID, npages uintptr) {
span.list = nil
span.start = start
span.npages = npages
span.freelist = 0
span.ref = 0
span.sizeclass = 0
span.incache = false
......@@ -925,6 +922,17 @@ func (span *mspan) init(start pageID, npages uintptr) {
span.speciallock.key = 0
span.specials = nil
span.needzero = 0
span.freeindex = 0
span.allocBits = &span.markbits1
span.gcmarkBits = &span.markbits2
// determine if this is actually needed. It is once / span so it
// isn't expensive. This is to be replaced by an arena
// based system where things can be cleared all at once so
// don't worry about optimizing this.
for i := 0; i < len(span.markbits1); i++ {
span.allocBits[i] = 0
span.gcmarkBits[i] = 0
}
}
func (span *mspan) inList() bool {
......
......@@ -1137,7 +1137,6 @@ func freeStackSpans() {
next := s.next
if s.ref == 0 {
list.remove(s)
s.freelist = 0
s.stackfreelist = 0
mheap_.freeStack(s)
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment