Commit c0392d2e authored by Austin Clements's avatar Austin Clements

runtime: make the heap bitmap sparse

This splits the heap bitmap into separate chunks for every 64MB of the
heap and introduces an index mapping from virtual address to metadata.
It modifies the heapBits abstraction to use this two-level structure.
Finally, it modifies heapBitsSetType to unroll the bitmap into the
object itself and then copy it out if the bitmap would span
discontiguous bitmap chunks.

This is a step toward supporting general sparse heaps, which will
eliminate address space conflict failures as well as the limit on the
heap size.

It's also advantageous for 32-bit. 32-bit already supports
discontiguous heaps by always starting the arena at address 0.
However, as a result, with a contiguous bitmap, if the kernel chooses
a high address (near 2GB) for a heap mapping, the runtime is forced to
map up to 128MB of heap bitmap. Now the runtime can map sections of
the bitmap for just the parts of the address space used by the heap.

Updates #10460.

This slightly slows down the x/garbage and compilebench benchmarks.
However, I think the slowdown is acceptably small.

name        old time/op     new time/op     delta
Template        178ms ± 1%      180ms ± 1%  +0.78%    (p=0.029 n=10+10)
Unicode        85.7ms ± 2%     86.5ms ± 2%    ~       (p=0.089 n=10+10)
GoTypes         594ms ± 0%      599ms ± 1%  +0.70%    (p=0.000 n=9+9)
Compiler        2.86s ± 0%      2.87s ± 0%  +0.40%    (p=0.001 n=9+9)
SSA             7.23s ± 2%      7.29s ± 2%  +0.94%    (p=0.029 n=10+10)
Flate           116ms ± 1%      117ms ± 1%  +0.99%    (p=0.000 n=9+9)
GoParser        146ms ± 1%      146ms ± 0%    ~       (p=0.193 n=10+7)
Reflect         399ms ± 0%      403ms ± 1%  +0.89%    (p=0.001 n=10+10)
Tar             173ms ± 1%      174ms ± 1%  +0.91%    (p=0.013 n=10+9)
XML             208ms ± 1%      210ms ± 1%  +0.93%    (p=0.000 n=10+10)
[Geo mean]      368ms           371ms       +0.79%

name                       old time/op  new time/op  delta
Garbage/benchmem-MB=64-12  2.17ms ± 1%  2.21ms ± 1%  +2.15%  (p=0.000 n=20+20)

Change-Id: I037fd283221976f4f61249119d6b97b100bcbc66
Reviewed-on: https://go-review.googlesource.com/85883
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarRick Hudson <rlh@golang.org>
parent f61057c4
...@@ -154,6 +154,39 @@ const ( ...@@ -154,6 +154,39 @@ const (
// since the arena starts at address 0. // since the arena starts at address 0.
_MaxMem = 1<<_MHeapMap_TotalBits - 1 _MaxMem = 1<<_MHeapMap_TotalBits - 1
// memLimitBits is the maximum number of bits in a heap address.
//
// On 64-bit platforms, we limit this to 48 bits because that
// is the maximum supported by Linux across all 64-bit
// architectures, with the exception of s390x.
// s390x supports full 64-bit addresses, but the allocator
// will panic in the unlikely event we exceed 48 bits.
//
// On 32-bit platforms, we accept the full 32-bit address
// space because doing so is cheap.
// mips32 only has access to the low 2GB of virtual memory, so
// we further limit it to 31 bits.
//
// The size of the arena index is proportional to
// 1<<memLimitBits, so it's important that this not be too
// large. 48 bits is about the threshold; above that we would
// need to go to a two level arena index.
memLimitBits = _64bit*48 + (1-_64bit)*(32-(sys.GoarchMips+sys.GoarchMipsle))
// memLimit is one past the highest possible heap pointer value.
memLimit = 1 << memLimitBits
// heapArenaBytes is the size of a heap arena. The heap
// consists of mappings of size heapArenaBytes, aligned to
// heapArenaBytes. The initial heap mapping is one arena.
//
// TODO: Right now only the bitmap is divided into separate
// arenas, but shortly all of the heap will be.
heapArenaBytes = (64<<20)*_64bit + (4<<20)*(1-_64bit)
// heapArenaBitmapBytes is the size of each heap arena's bitmap.
heapArenaBitmapBytes = heapArenaBytes / (sys.PtrSize * 8 / 2)
// Max number of threads to run garbage collection. // Max number of threads to run garbage collection.
// 2, 3, and 4 are all plausible maximums depending // 2, 3, and 4 are all plausible maximums depending
// on the hardware details of the machine. The garbage // on the hardware details of the machine. The garbage
...@@ -221,6 +254,12 @@ func mallocinit() { ...@@ -221,6 +254,12 @@ func mallocinit() {
testdefersizes() testdefersizes()
if heapArenaBitmapBytes&(heapArenaBitmapBytes-1) != 0 {
// heapBits expects modular arithmetic on bitmap
// addresses to work.
throw("heapArenaBitmapBytes not a power of 2")
}
// Copy class sizes out for statistics table. // Copy class sizes out for statistics table.
for i := range class_to_size { for i := range class_to_size {
memstats.by_size[i].size = uint32(class_to_size[i]) memstats.by_size[i].size = uint32(class_to_size[i])
...@@ -248,9 +287,6 @@ func mallocinit() { ...@@ -248,9 +287,6 @@ func mallocinit() {
// The spans array holds one *mspan per _PageSize of arena. // The spans array holds one *mspan per _PageSize of arena.
var spansSize uintptr = (_MaxMem + 1) / _PageSize * sys.PtrSize var spansSize uintptr = (_MaxMem + 1) / _PageSize * sys.PtrSize
spansSize = round(spansSize, _PageSize) spansSize = round(spansSize, _PageSize)
// The bitmap holds 2 bits per word of arena.
var bitmapSize uintptr = (_MaxMem + 1) / (sys.PtrSize * 8 / 2)
bitmapSize = round(bitmapSize, _PageSize)
// Set up the allocation arena, a contiguous area of memory where // Set up the allocation arena, a contiguous area of memory where
// allocated data will be found. // allocated data will be found.
...@@ -275,9 +311,6 @@ func mallocinit() { ...@@ -275,9 +311,6 @@ func mallocinit() {
// not collecting memory because some non-pointer block of memory // not collecting memory because some non-pointer block of memory
// had a bit pattern that matched a memory address. // had a bit pattern that matched a memory address.
// //
// Actually we reserve 544 GB (because the bitmap ends up being 32 GB)
// but it hardly matters: e0 00 is not valid UTF-8 either.
//
// If this fails we fall back to the 32 bit memory mechanism // If this fails we fall back to the 32 bit memory mechanism
// //
// However, on arm64, we ignore all this advice above and slam the // However, on arm64, we ignore all this advice above and slam the
...@@ -285,7 +318,7 @@ func mallocinit() { ...@@ -285,7 +318,7 @@ func mallocinit() {
// translation buffers, the user address space is limited to 39 bits // translation buffers, the user address space is limited to 39 bits
// On darwin/arm64, the address space is even smaller. // On darwin/arm64, the address space is even smaller.
arenaSize := round(_MaxMem, _PageSize) arenaSize := round(_MaxMem, _PageSize)
pSize = bitmapSize + spansSize + arenaSize + _PageSize pSize = spansSize + arenaSize + _PageSize
for i := 0; i <= 0x7f; i++ { for i := 0; i <= 0x7f; i++ {
switch { switch {
case GOARCH == "arm64" && GOOS == "darwin": case GOARCH == "arm64" && GOOS == "darwin":
...@@ -344,7 +377,7 @@ func mallocinit() { ...@@ -344,7 +377,7 @@ func mallocinit() {
// away from the running binary image and then round up // away from the running binary image and then round up
// to a MB boundary. // to a MB boundary.
p = round(firstmoduledata.end+(1<<18), 1<<20) p = round(firstmoduledata.end+(1<<18), 1<<20)
pSize = bitmapSize + spansSize + arenaSize + _PageSize pSize = spansSize + arenaSize + _PageSize
if p <= procBrk && procBrk < p+pSize { if p <= procBrk && procBrk < p+pSize {
// Move the start above the brk, // Move the start above the brk,
// leaving some room for future brk // leaving some room for future brk
...@@ -369,8 +402,6 @@ func mallocinit() { ...@@ -369,8 +402,6 @@ func mallocinit() {
spansStart := p1 spansStart := p1
p1 += spansSize p1 += spansSize
mheap_.bitmap_start = p1
p1 += bitmapSize
if sys.PtrSize == 4 { if sys.PtrSize == 4 {
// Set arena_start such that we can accept memory // Set arena_start such that we can accept memory
// reservations located anywhere in the 4GB virtual space. // reservations located anywhere in the 4GB virtual space.
...@@ -383,24 +414,18 @@ func mallocinit() { ...@@ -383,24 +414,18 @@ func mallocinit() {
mheap_.arena_alloc = p1 mheap_.arena_alloc = p1
mheap_.arena_reserved = reserved mheap_.arena_reserved = reserved
// Pre-compute the value heapBitsForAddr can use to directly
// map a heap address to a bitmap address. The obvious
// computation is:
//
// bitp = bitmap_start + (addr - arena_start)/ptrSize/4
//
// We can shuffle this to
//
// bitp = (bitmap_start - arena_start/ptrSize/4) + addr/ptrSize/4
//
// bitmap_delta is the value of the first term.
mheap_.bitmap_delta = mheap_.bitmap_start - mheap_.arena_start/heapBitmapScale
if mheap_.arena_start&(_PageSize-1) != 0 { if mheap_.arena_start&(_PageSize-1) != 0 {
println("bad pagesize", hex(p), hex(p1), hex(spansSize), hex(bitmapSize), hex(_PageSize), "start", hex(mheap_.arena_start)) println("bad pagesize", hex(p), hex(p1), hex(spansSize), hex(_PageSize), "start", hex(mheap_.arena_start))
throw("misrounded allocation in mallocinit") throw("misrounded allocation in mallocinit")
} }
// Map the arena index. Most of this will never be touched.
var untracked uint64
mheap_.arenas = (*[memLimit / heapArenaBytes]*heapArena)(persistentalloc(unsafe.Sizeof(*mheap_.arenas), sys.PtrSize, &untracked))
if mheap_.arenas == nil {
throw("failed to allocate arena index")
}
// Initialize the rest of the allocator. // Initialize the rest of the allocator.
mheap_.init(spansStart, spansSize) mheap_.init(spansStart, spansSize)
_g_ := getg() _g_ := getg()
......
...@@ -13,14 +13,12 @@ ...@@ -13,14 +13,12 @@
// //
// Heap bitmap // Heap bitmap
// //
// The allocated heap comes from a subset of the memory in the range [start, used), // The heap bitmap comprises 2 bits for each pointer-sized word in the heap,
// where start == mheap_.arena_start and used == mheap_.arena_used. // stored in the heapArena metadata backing each heap arena.
// The heap bitmap comprises 2 bits for each pointer-sized word in that range, // That is, if ha is the heapArena for the arena starting a start,
// stored in bytes indexed forward in memory from bitmap_start. // then ha.bitmap[0] holds the 2-bit entries for the four words start
// That is, the byte at address bitmap holds the 2-bit entries for the // through start+3*ptrSize, ha.bitmap[1] holds the entries for
// four words start through start+3*ptrSize, the byte at // start+4*ptrSize through start+7*ptrSize, and so on.
// bitmap_start+1 holds the entries for start+4*ptrSize through
// start+7*ptrSize, and so on.
// //
// In each 2-bit entry, the lower bit holds the same information as in the 1-bit // In each 2-bit entry, the lower bit holds the same information as in the 1-bit
// bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC. // bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC.
...@@ -87,7 +85,6 @@ const ( ...@@ -87,7 +85,6 @@ const (
bitScan = 1 << 4 bitScan = 1 << 4
heapBitsShift = 1 // shift offset between successive bitPointer or bitScan entries heapBitsShift = 1 // shift offset between successive bitPointer or bitScan entries
heapBitmapScale = sys.PtrSize * (8 / 2) // number of data bytes described by one heap bitmap byte
wordsPerBitmapByte = 8 / 2 // heap words described by one bitmap byte wordsPerBitmapByte = 8 / 2 // heap words described by one bitmap byte
// all scan/pointer bits in a byte // all scan/pointer bits in a byte
...@@ -137,28 +134,6 @@ func subtract1(p *byte) *byte { ...@@ -137,28 +134,6 @@ func subtract1(p *byte) *byte {
return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1)) return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1))
} }
// mapBits maps any additional bitmap memory needed for the new arena memory.
//
// Don't call this directly. Call mheap.setArenaUsed.
//
//go:nowritebarrier
func (h *mheap) mapBits(arena_used uintptr) {
// Caller has added extra mappings to the arena.
// Add extra mappings of bitmap words as needed.
// We allocate extra bitmap pieces in chunks of bitmapChunk.
const bitmapChunk = 8192
n := (arena_used - mheap_.arena_start) / heapBitmapScale
n = round(n, bitmapChunk)
n = round(n, physPageSize)
if h.bitmap_mapped >= n {
return
}
sysMap(unsafe.Pointer(h.bitmap_start+h.bitmap_mapped), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys)
h.bitmap_mapped = n
}
// heapBits provides access to the bitmap bits for a single heap word. // heapBits provides access to the bitmap bits for a single heap word.
// The methods on heapBits take value receivers so that the compiler // The methods on heapBits take value receivers so that the compiler
// can more easily inline calls to those methods and registerize the // can more easily inline calls to those methods and registerize the
...@@ -166,8 +141,14 @@ func (h *mheap) mapBits(arena_used uintptr) { ...@@ -166,8 +141,14 @@ func (h *mheap) mapBits(arena_used uintptr) {
type heapBits struct { type heapBits struct {
bitp *uint8 bitp *uint8
shift uint32 shift uint32
arena uint32 // Index of heap arena containing bitp
last *uint8 // Last byte arena's bitmap
} }
// Make the compiler check that heapBits.arena is large enough to hold
// the maximum arena index.
var _ = heapBits{arena: memLimit / heapArenaBytes}
// markBits provides access to the mark bit for an object in the heap. // markBits provides access to the mark bit for an object in the heap.
// bytep points to the byte holding the mark bit. // bytep points to the byte holding the mark bit.
// mask is a byte with a single bit set that can be &ed with *bytep // mask is a byte with a single bit set that can be &ed with *bytep
...@@ -349,14 +330,26 @@ func (m *markBits) advance() { ...@@ -349,14 +330,26 @@ func (m *markBits) advance() {
} }
// heapBitsForAddr returns the heapBits for the address addr. // heapBitsForAddr returns the heapBits for the address addr.
// The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used). // The caller must ensure addr is in an allocated span.
// In particular, be careful not to point past the end of an object.
// //
// nosplit because it is used during write barriers and must not be preempted. // nosplit because it is used during write barriers and must not be preempted.
//go:nosplit //go:nosplit
func heapBitsForAddr(addr uintptr) heapBits { func heapBitsForAddr(addr uintptr) heapBits {
// 2 bits per word, 4 pairs per byte, and a mask is hard coded. // 2 bits per word, 4 pairs per byte, and a mask is hard coded.
off := addr / sys.PtrSize off := addr / sys.PtrSize
return heapBits{(*uint8)(unsafe.Pointer(mheap_.bitmap_delta + off/4)), uint32(off & 3)} arena := addr / heapArenaBytes
ha := mheap_.arenas[arena]
// The compiler uses a load for nil checking ha, but in this
// case we'll almost never hit that cache line again, so it
// makes more sense to do a value check.
if ha == nil {
// addr is not in the heap. Crash without inhibiting inlining.
_ = *ha
}
bitp := &ha.bitmap[(off/4)%heapArenaBitmapBytes]
last := &ha.bitmap[len(ha.bitmap)-1]
return heapBits{bitp, uint32(off & 3), uint32(arena), last}
} }
// heapBitsForSpan returns the heapBits for the span base address base. // heapBitsForSpan returns the heapBits for the span base address base.
...@@ -446,9 +439,24 @@ func findObject(p, refBase, refOff uintptr) (base uintptr, s *mspan, objIndex ui ...@@ -446,9 +439,24 @@ func findObject(p, refBase, refOff uintptr) (base uintptr, s *mspan, objIndex ui
//go:nosplit //go:nosplit
func (h heapBits) next() heapBits { func (h heapBits) next() heapBits {
if h.shift < 3*heapBitsShift { if h.shift < 3*heapBitsShift {
return heapBits{h.bitp, h.shift + heapBitsShift} h.shift += heapBitsShift
} else if h.bitp != h.last {
h.bitp, h.shift = add1(h.bitp), 0
} else {
// Move to the next arena.
h.arena++
a := mheap_.arenas[h.arena]
if a == nil {
// We just passed the end of the object, which
// was also the end of the heap. Poison h. It
// should never be dereferenced at this point.
h.bitp, h.last = nil, nil
} else {
h.bitp, h.shift = &a.bitmap[0], 0
h.last = &a.bitmap[len(a.bitmap)-1]
}
} }
return heapBits{add1(h.bitp), 0} return h
} }
// forward returns the heapBits describing n pointer-sized words ahead of h in memory. // forward returns the heapBits describing n pointer-sized words ahead of h in memory.
...@@ -456,16 +464,37 @@ func (h heapBits) next() heapBits { ...@@ -456,16 +464,37 @@ func (h heapBits) next() heapBits {
// h.forward(1) is equivalent to h.next(), just slower. // h.forward(1) is equivalent to h.next(), just slower.
// Note that forward does not modify h. The caller must record the result. // Note that forward does not modify h. The caller must record the result.
// bits returns the heap bits for the current word. // bits returns the heap bits for the current word.
//go:nosplit
func (h heapBits) forward(n uintptr) heapBits { func (h heapBits) forward(n uintptr) heapBits {
n += uintptr(h.shift) / heapBitsShift n += uintptr(h.shift) / heapBitsShift
return heapBits{addb(h.bitp, n/4), uint32(n%4) * heapBitsShift} nbitp := uintptr(unsafe.Pointer(h.bitp)) + n/4
h.shift = uint32(n%4) * heapBitsShift
if nbitp <= uintptr(unsafe.Pointer(h.last)) {
h.bitp = (*uint8)(unsafe.Pointer(nbitp))
return h
}
// We're in a new heap arena.
past := nbitp - (uintptr(unsafe.Pointer(h.last)) + 1)
h.arena += 1 + uint32(past/heapArenaBitmapBytes)
a := mheap_.arenas[h.arena]
if a == nil {
h.bitp, h.last = nil, nil
} else {
h.bitp = &a.bitmap[past%heapArenaBitmapBytes]
h.last = &a.bitmap[len(a.bitmap)-1]
}
return h
} }
// forwardOrBoundary is like forward, but stops at boundaries between // forwardOrBoundary is like forward, but stops at boundaries between
// contiguous sections of the bitmap. It returns the number of words // contiguous sections of the bitmap. It returns the number of words
// advanced over, which will be <= n. // advanced over, which will be <= n.
func (h heapBits) forwardOrBoundary(n uintptr) (heapBits, uintptr) { func (h heapBits) forwardOrBoundary(n uintptr) (heapBits, uintptr) {
// The bitmap is contiguous right now, so this is just forward. maxn := 4 * ((uintptr(unsafe.Pointer(h.last)) + 1) - uintptr(unsafe.Pointer(h.bitp)))
if n > maxn {
n = maxn
}
return h.forward(n), n return h.forward(n), n
} }
...@@ -951,6 +980,16 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { ...@@ -951,6 +980,16 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
// This is a lot of lines of code, but it compiles into relatively few // This is a lot of lines of code, but it compiles into relatively few
// machine instructions. // machine instructions.
outOfPlace := false
if (x+size-1)/heapArenaBytes != uintptr(h.arena) {
// This object spans heap arenas, so the bitmap may be
// discontiguous. Unroll it into the object instead
// and then copy it out.
outOfPlace = true
h.bitp = (*uint8)(unsafe.Pointer(x))
h.last = nil
}
var ( var (
// Ptrmask input. // Ptrmask input.
p *byte // last ptrmask byte read p *byte // last ptrmask byte read
...@@ -989,9 +1028,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { ...@@ -989,9 +1028,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
} }
ptrmask = debugPtrmask.data ptrmask = debugPtrmask.data
runGCProg(addb(typ.gcdata, 4), nil, ptrmask, 1) runGCProg(addb(typ.gcdata, 4), nil, ptrmask, 1)
goto Phase4
} }
return goto Phase4
} }
// Note about sizes: // Note about sizes:
...@@ -1109,7 +1147,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { ...@@ -1109,7 +1147,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
nw = 2 nw = 2
} }
// Phase 1: Special case for leading byte (shift==0) or half-byte (shift==4). // Phase 1: Special case for leading byte (shift==0) or half-byte (shift==2).
// The leading byte is special because it contains the bits for word 1, // The leading byte is special because it contains the bits for word 1,
// which does not have the scan bit set. // which does not have the scan bit set.
// The leading half-byte is special because it's a half a byte, // The leading half-byte is special because it's a half a byte,
...@@ -1280,9 +1318,81 @@ Phase3: ...@@ -1280,9 +1318,81 @@ Phase3:
} }
Phase4: Phase4:
// Phase 4: all done, but perhaps double check. // Phase 4: Copy unrolled bitmap to per-arena bitmaps, if necessary.
if outOfPlace {
// TODO: We could probably make this faster by
// handling [x+dataSize, x+size) specially.
h := heapBitsForAddr(x)
// cnw is the number of heap words, or bit pairs
// remaining (like nw above).
cnw := size / sys.PtrSize
src := (*uint8)(unsafe.Pointer(x))
// We know the first and last byte of the bitmap are
// not the same, but it's still possible for small
// objects span arenas, so it may share bitmap bytes
// with neighboring objects.
//
// Handle the first byte specially if it's shared. See
// Phase 1 for why this is the only special case we need.
if doubleCheck {
if !(h.shift == 0 || (sys.PtrSize == 8 && h.shift == 2)) {
print("x=", x, " size=", size, " cnw=", h.shift, "\n")
throw("bad start shift")
}
}
if sys.PtrSize == 8 && h.shift == 2 {
*hbitp = *hbitp&^((bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift)<<(2*heapBitsShift)) | *src
h = h.next().next()
cnw -= 2
src = addb(src, 1)
}
// We're now byte aligned. Copy out to per-arena
// bitmaps until the last byte (which may again be
// partial).
for cnw >= 4 {
hNext, words := h.forwardOrBoundary(cnw)
// n is the number of bitmap bytes to copy.
n := words / 4
memmove(unsafe.Pointer(h.bitp), unsafe.Pointer(src), n)
cnw -= words
h = hNext
src = addb(src, n)
}
// Handle the last byte if it's shared.
if cnw == 2 {
*h.bitp = *h.bitp&^(bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift) | *src
src = addb(src, 1)
h = h.next().next()
}
if doubleCheck { if doubleCheck {
end := heapBitsForAddr(x + size) if uintptr(unsafe.Pointer(src)) > x+size {
throw("copy exceeded object size")
}
if !(cnw == 0 || cnw == 2) {
print("x=", x, " size=", size, " cnw=", cnw, "\n")
throw("bad number of remaining words")
}
// Set up hbitp so doubleCheck code below can check it.
hbitp = h.bitp
}
// Zero the object where we wrote the bitmap.
memclrNoHeapPointers(unsafe.Pointer(x), uintptr(unsafe.Pointer(src))-x)
}
// Double check the whole bitmap.
if doubleCheck {
// x+size may not point to the heap, so back up one
// word and then call next().
end := heapBitsForAddr(x + size - sys.PtrSize).next()
if !outOfPlace && (end.bitp == nil || (end.shift == 0 && end.bitp == &mheap_.arenas[end.arena].bitmap[0])) {
// The unrolling code above walks hbitp just
// past the bitmap without moving to the next
// arena. Synthesize this for end.bitp.
end.bitp = addb(&mheap_.arenas[end.arena-1].bitmap[0], heapArenaBitmapBytes)
end.arena--
end.last = nil
}
if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) { if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) {
println("ended at wrong bitmap byte for", typ.string(), "x", dataSize/typ.size) println("ended at wrong bitmap byte for", typ.string(), "x", dataSize/typ.size)
print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n") print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
...@@ -1322,7 +1432,7 @@ Phase4: ...@@ -1322,7 +1432,7 @@ Phase4:
if have != want { if have != want {
println("mismatch writing bits for", typ.string(), "x", dataSize/typ.size) println("mismatch writing bits for", typ.string(), "x", dataSize/typ.size)
print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n") print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
print("kindGCProg=", typ.kind&kindGCProg != 0, "\n") print("kindGCProg=", typ.kind&kindGCProg != 0, " outOfPlace=", outOfPlace, "\n")
print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n") print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
h0 := heapBitsForAddr(x) h0 := heapBitsForAddr(x)
print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n") print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
...@@ -1430,7 +1540,7 @@ func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize u ...@@ -1430,7 +1540,7 @@ func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize u
totalBits = (elemSize*(count-1) + progSize) / sys.PtrSize totalBits = (elemSize*(count-1) + progSize) / sys.PtrSize
} }
endProg := unsafe.Pointer(addb(h.bitp, (totalBits+3)/4)) endProg := unsafe.Pointer(addb(h.bitp, (totalBits+3)/4))
endAlloc := unsafe.Pointer(addb(h.bitp, allocSize/heapBitmapScale)) endAlloc := unsafe.Pointer(addb(h.bitp, allocSize/sys.PtrSize/wordsPerBitmapByte))
memclrNoHeapPointers(endProg, uintptr(endAlloc)-uintptr(endProg)) memclrNoHeapPointers(endProg, uintptr(endAlloc)-uintptr(endProg))
} }
......
...@@ -114,9 +114,6 @@ type mheap struct { ...@@ -114,9 +114,6 @@ type mheap struct {
nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize) nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
// range of addresses we might see in the heap // range of addresses we might see in the heap
bitmap_start uintptr // Points to first byte of bitmap
bitmap_mapped uintptr
bitmap_delta uintptr // Used to map heap address to bitmap address
// The arena_* fields indicate the addresses of the Go heap. // The arena_* fields indicate the addresses of the Go heap.
// //
...@@ -143,6 +140,21 @@ type mheap struct { ...@@ -143,6 +140,21 @@ type mheap struct {
// here and *must* clobber it to use it. // here and *must* clobber it to use it.
arena_reserved bool arena_reserved bool
// arenas is the heap arena index. arenas[va/heapArenaBytes]
// points to the metadata for the heap arena containing va.
//
// For regions of the address space that are not backed by the
// Go heap, the arena index contains nil.
//
// Modifications are protected by mheap_.lock. Reads can be
// performed without locking; however, a given entry can
// transition from nil to non-nil at any time when the lock
// isn't held. (Entries never transitions back to nil.)
//
// This structure is fully mapped by mallocinit, so it's safe
// to probe any index.
arenas *[memLimit / heapArenaBytes]*heapArena
//_ uint32 // ensure 64-bit alignment //_ uint32 // ensure 64-bit alignment
// central free lists for small size classes. // central free lists for small size classes.
...@@ -167,6 +179,23 @@ type mheap struct { ...@@ -167,6 +179,23 @@ type mheap struct {
var mheap_ mheap var mheap_ mheap
// A heapArena stores metadata for a heap arena. heapArenas are stored
// outside of the Go heap and accessed via the mheap_.arenas index.
//
// This gets allocated directly from the OS, so ideally it should be a
// multiple of the system page size. For example, avoid adding small
// fields.
//
//go:notinheap
type heapArena struct {
// bitmap stores the pointer/scalar bitmap for the words in
// this arena. See mbitmap.go for a description. Use the
// heapBits type to access this.
bitmap [heapArenaBitmapBytes]byte
// TODO: Also store the spans map here.
}
// An MSpan is a run of pages. // An MSpan is a run of pages.
// //
// When a MSpan is in the heap free list, state == MSpanFree // When a MSpan is in the heap free list, state == MSpanFree
...@@ -507,8 +536,21 @@ func (h *mheap) setArenaUsed(arena_used uintptr, racemap bool) { ...@@ -507,8 +536,21 @@ func (h *mheap) setArenaUsed(arena_used uintptr, racemap bool) {
// avoids faults when other threads try access these regions immediately // avoids faults when other threads try access these regions immediately
// after observing the change to arena_used. // after observing the change to arena_used.
// Map the bitmap. // Allocate heap arena metadata.
h.mapBits(arena_used) for ri := h.arena_used / heapArenaBytes; ri < (arena_used+heapArenaBytes-1)/heapArenaBytes; ri++ {
if h.arenas[ri] != nil {
continue
}
r := (*heapArena)(persistentalloc(unsafe.Sizeof(heapArena{}), sys.PtrSize, &memstats.gc_sys))
if r == nil {
throw("runtime: out of memory allocating heap arena metadata")
}
// Store atomically just in case an object from the
// new heap arena becomes visible before the heap lock
// is released (which shouldn't happen, but there's
// little downside to this).
atomic.StorepNoWB(unsafe.Pointer(&h.arenas[ri]), unsafe.Pointer(r))
}
// Map spans array. // Map spans array.
h.mapSpans(arena_used) h.mapSpans(arena_used)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment