Commit 9d78e75a authored by Michael Anthony Knyszek's avatar Michael Anthony Knyszek Committed by Michael Knyszek

runtime: track ranges of address space which are owned by the heap

This change adds a new inUse field to the allocator which tracks ranges
of addresses that are owned by the heap. It is updated on each heap
growth.

These ranges are tracked in an array which is kept sorted. In practice
this array shouldn't exceed its initial allocation except in rare cases
and thus should be small (ideally exactly 1 element in size).

In a hypothetical worst-case scenario wherein we have a 1 TiB heap and 4
MiB arenas (note that the address ranges will never be at a smaller
granularity than an arena, since arenas are always allocated
contiguously), inUse would use at most 4 MiB of memory if the heap
mappings were completely discontiguous (highly unlikely) with an
additional 2 MiB leaked from previous allocations. Furthermore, the
copies that are done to keep the inUse array sorted will copy at most 4
MiB of memory in such a scenario, which, assuming a conservative copying
rate of 5 GiB/s, amounts to about 800µs.

However, note that in practice:
1) Most 64-bit platforms have 64 MiB arenas.
2) The copies should incur little-to-no page faults, meaning a copy rate
   closer to 25-50 GiB/s is expected.
3) Go heaps are almost always mostly contiguous.

Updates #35514.

Change-Id: I3ad07f1c2b5b9340acf59ecc3b9ae09e884814fe
Reviewed-on: https://go-review.googlesource.com/c/go/+/207757
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
Reviewed-by: default avatarAustin Clements <austin@google.com>
parent ef3ef8fc
...@@ -734,6 +734,16 @@ func (p *PageAlloc) Scavenge(nbytes uintptr, locked bool) (r uintptr) { ...@@ -734,6 +734,16 @@ func (p *PageAlloc) Scavenge(nbytes uintptr, locked bool) (r uintptr) {
}) })
return return
} }
func (p *PageAlloc) InUse() []AddrRange {
ranges := make([]AddrRange, 0, len(p.inUse.ranges))
for _, r := range p.inUse.ranges {
ranges = append(ranges, AddrRange{
Base: r.base,
Limit: r.limit,
})
}
return ranges
}
// Returns nil if the PallocData's L2 is missing. // Returns nil if the PallocData's L2 is missing.
func (p *PageAlloc) PallocData(i ChunkIdx) *PallocData { func (p *PageAlloc) PallocData(i ChunkIdx) *PallocData {
...@@ -745,6 +755,12 @@ func (p *PageAlloc) PallocData(i ChunkIdx) *PallocData { ...@@ -745,6 +755,12 @@ func (p *PageAlloc) PallocData(i ChunkIdx) *PallocData {
return (*PallocData)(&l2[ci.l2()]) return (*PallocData)(&l2[ci.l2()])
} }
// AddrRange represents a range over addresses.
// Specifically, it represents the range [Base, Limit).
type AddrRange struct {
Base, Limit uintptr
}
// BitRange represents a range over a bitmap. // BitRange represents a range over a bitmap.
type BitRange struct { type BitRange struct {
I, N uint // bit index and length in bits I, N uint // bit index and length in bits
......
...@@ -245,6 +245,19 @@ type pageAlloc struct { ...@@ -245,6 +245,19 @@ type pageAlloc struct {
// currently ready to use. // currently ready to use.
start, end chunkIdx start, end chunkIdx
// inUse is a slice of ranges of address space which are
// known by the page allocator to be currently in-use (passed
// to grow).
//
// This field is currently unused on 32-bit architectures but
// is harmless to track. We care much more about having a
// contiguous heap in these cases and take additional measures
// to ensure that, so in nearly all cases this should have just
// 1 element.
//
// All access is protected by the mheapLock.
inUse addrRanges
// mheap_.lock. This level of indirection makes it possible // mheap_.lock. This level of indirection makes it possible
// to test pageAlloc indepedently of the runtime allocator. // to test pageAlloc indepedently of the runtime allocator.
mheapLock *mutex mheapLock *mutex
...@@ -268,6 +281,9 @@ func (s *pageAlloc) init(mheapLock *mutex, sysStat *uint64) { ...@@ -268,6 +281,9 @@ func (s *pageAlloc) init(mheapLock *mutex, sysStat *uint64) {
} }
s.sysStat = sysStat s.sysStat = sysStat
// Initialize s.inUse.
s.inUse.init(sysStat)
// System-dependent initialization. // System-dependent initialization.
s.sysInit() s.sysInit()
...@@ -381,6 +397,10 @@ func (s *pageAlloc) grow(base, size uintptr) { ...@@ -381,6 +397,10 @@ func (s *pageAlloc) grow(base, size uintptr) {
if end > s.end { if end > s.end {
s.end = end s.end = end
} }
// Note that [base, limit) will never overlap with any existing
// range inUse because grow only ever adds never-used memory
// regions to the page allocator.
s.inUse.add(addrRange{base, limit})
// A grow operation is a lot like a free operation, so if our // A grow operation is a lot like a free operation, so if our
// chunk ends up below the (linearized) s.searchAddr, update // chunk ends up below the (linearized) s.searchAddr, update
......
...@@ -40,6 +40,119 @@ func checkPageAlloc(t *testing.T, want, got *PageAlloc) { ...@@ -40,6 +40,119 @@ func checkPageAlloc(t *testing.T, want, got *PageAlloc) {
// TODO(mknyszek): Verify summaries too? // TODO(mknyszek): Verify summaries too?
} }
func TestPageAllocGrow(t *testing.T) {
tests := map[string]struct {
chunks []ChunkIdx
inUse []AddrRange
}{
"One": {
chunks: []ChunkIdx{
BaseChunkIdx,
},
inUse: []AddrRange{
{PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)},
},
},
"Contiguous2": {
chunks: []ChunkIdx{
BaseChunkIdx,
BaseChunkIdx + 1,
},
inUse: []AddrRange{
{PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+2, 0)},
},
},
"Contiguous5": {
chunks: []ChunkIdx{
BaseChunkIdx,
BaseChunkIdx + 1,
BaseChunkIdx + 2,
BaseChunkIdx + 3,
BaseChunkIdx + 4,
},
inUse: []AddrRange{
{PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+5, 0)},
},
},
"Discontiguous": {
chunks: []ChunkIdx{
BaseChunkIdx,
BaseChunkIdx + 2,
BaseChunkIdx + 4,
},
inUse: []AddrRange{
{PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)},
{PageBase(BaseChunkIdx+2, 0), PageBase(BaseChunkIdx+3, 0)},
{PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+5, 0)},
},
},
"Mixed": {
chunks: []ChunkIdx{
BaseChunkIdx,
BaseChunkIdx + 1,
BaseChunkIdx + 2,
BaseChunkIdx + 4,
},
inUse: []AddrRange{
{PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+3, 0)},
{PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+5, 0)},
},
},
"WildlyDiscontiguous": {
chunks: []ChunkIdx{
BaseChunkIdx,
BaseChunkIdx + 1,
BaseChunkIdx + 0x10,
BaseChunkIdx + 0x21,
},
inUse: []AddrRange{
{PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+2, 0)},
{PageBase(BaseChunkIdx+0x10, 0), PageBase(BaseChunkIdx+0x11, 0)},
{PageBase(BaseChunkIdx+0x21, 0), PageBase(BaseChunkIdx+0x22, 0)},
},
},
}
for name, v := range tests {
v := v
t.Run(name, func(t *testing.T) {
// By creating a new pageAlloc, we will
// grow it for each chunk defined in x.
x := make(map[ChunkIdx][]BitRange)
for _, c := range v.chunks {
x[c] = []BitRange{}
}
b := NewPageAlloc(x, nil)
defer FreePageAlloc(b)
got := b.InUse()
want := v.inUse
// Check for mismatches.
if len(got) != len(want) {
t.Fail()
} else {
for i := range want {
if want[i] != got[i] {
t.Fail()
break
}
}
}
if t.Failed() {
t.Logf("found inUse mismatch")
t.Logf("got:")
for i, r := range got {
t.Logf("\t#%d [0x%x, 0x%x)", i, r.Base, r.Limit)
}
t.Logf("want:")
for i, r := range want {
t.Logf("\t#%d [0x%x, 0x%x)", i, r.Base, r.Limit)
}
}
})
}
}
func TestPageAllocAlloc(t *testing.T) { func TestPageAllocAlloc(t *testing.T) {
type hit struct { type hit struct {
npages, base, scav uintptr npages, base, scav uintptr
......
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Address range data structure.
//
// This file contains an implementation of a data structure which
// manages ordered address ranges.
package runtime
import (
"runtime/internal/sys"
"unsafe"
)
// addrRange represents a region of address space.
type addrRange struct {
// base and limit together represent the region of address space
// [base, limit). That is, base is inclusive, limit is exclusive.
base, limit uintptr
}
// addrRanges is a data structure holding a collection of ranges of
// address space.
//
// The ranges are coalesced eagerly to reduce the
// number ranges it holds.
//
// The slice backing store for this field is persistentalloc'd
// and thus there is no way to free it.
//
// addrRanges is not thread-safe.
type addrRanges struct {
// ranges is a slice of ranges sorted by base.
ranges []addrRange
// sysStat is the stat to track allocations by this type
sysStat *uint64
}
func (a *addrRanges) init(sysStat *uint64) {
ranges := (*notInHeapSlice)(unsafe.Pointer(&a.ranges))
ranges.len = 0
ranges.cap = 16
ranges.array = (*notInHeap)(persistentalloc(unsafe.Sizeof(addrRange{})*uintptr(ranges.cap), sys.PtrSize, sysStat))
a.sysStat = sysStat
}
// findSucc returns the first index in a such that base is
// less than the base of the addrRange at that index.
func (a *addrRanges) findSucc(base uintptr) int {
// TODO(mknyszek): Consider a binary search for large arrays.
// While iterating over these ranges is potentially expensive,
// the expected number of ranges is small, ideally just 1,
// since Go heaps are usually mostly contiguous.
for i := range a.ranges {
if base < a.ranges[i].base {
return i
}
}
return len(a.ranges)
}
// add inserts a new address range to a.
//
// r must not overlap with any address range in a.
func (a *addrRanges) add(r addrRange) {
// The copies in this function are potentially expensive, but this data
// structure is meant to represent the Go heap. At worst, copying this
// would take ~160µs assuming a conservative copying rate of 25 GiB/s (the
// copy will almost never trigger a page fault) for a 1 TiB heap with 4 MiB
// arenas which is completely discontiguous. ~160µs is still a lot, but in
// practice most platforms have 64 MiB arenas (which cuts this by a factor
// of 16) and Go heaps are usually mostly contiguous, so the chance that
// an addrRanges even grows to that size is extremely low.
// Because we assume r is not currently represented in a,
// findSucc gives us our insertion index.
i := a.findSucc(r.base)
coalescesDown := i > 0 && a.ranges[i-1].limit == r.base
coalescesUp := i < len(a.ranges) && r.limit == a.ranges[i].base
if coalescesUp && coalescesDown {
// We have neighbors and they both border us.
// Merge a.ranges[i-1], r, and a.ranges[i] together into a.ranges[i-1].
a.ranges[i-1].limit = a.ranges[i].limit
// Delete a.ranges[i].
copy(a.ranges[i:], a.ranges[i+1:])
a.ranges = a.ranges[:len(a.ranges)-1]
} else if coalescesDown {
// We have a neighbor at a lower address only and it borders us.
// Merge the new space into a.ranges[i-1].
a.ranges[i-1].limit = r.limit
} else if coalescesUp {
// We have a neighbor at a higher address only and it borders us.
// Merge the new space into a.ranges[i].
a.ranges[i].base = r.base
} else {
// We may or may not have neighbors which don't border us.
// Add the new range.
if len(a.ranges)+1 > cap(a.ranges) {
// Grow the array. Note that this leaks the old array, but since
// we're doubling we have at most 2x waste. For a 1 TiB heap and
// 4 MiB arenas which are all discontiguous (both very conservative
// assumptions), this would waste at most 4 MiB of memory.
oldRanges := a.ranges
ranges := (*notInHeapSlice)(unsafe.Pointer(&a.ranges))
ranges.len = len(oldRanges)
ranges.cap = cap(oldRanges) * 2
ranges.array = (*notInHeap)(persistentalloc(unsafe.Sizeof(addrRange{})*uintptr(ranges.cap), sys.PtrSize, a.sysStat))
// Copy in the old array, but make space for the new range.
copy(a.ranges[:i], oldRanges[:i])
copy(a.ranges[i+1:], oldRanges[i:])
} else {
a.ranges = a.ranges[:len(a.ranges)+1]
copy(a.ranges[i+1:], a.ranges[i:])
}
a.ranges[i] = r
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment