runtime: track ranges of address space which are owned by the heap

This change adds a new inUse field to the allocator which tracks ranges of addresses that are owned by the heap. It is updated on each heap growth. These ranges are tracked in an array which is kept sorted. In practice this array shouldn't exceed its initial allocation except in rare cases and thus should be small (ideally exactly 1 element in size). In a hypothetical worst-case scenario wherein we have a 1 TiB heap and 4 MiB arenas (note that the address ranges will never be at a smaller granularity than an arena, since arenas are always allocated contiguously), inUse would use at most 4 MiB of memory if the heap mappings were completely discontiguous (highly unlikely) with an additional 2 MiB leaked from previous allocations. Furthermore, the copies that are done to keep the inUse array sorted will copy at most 4 MiB of memory in such a scenario, which, assuming a conservative copying rate of 5 GiB/s, amounts to about 800µs. However, note that in practice: 1) Most 64-bit platforms have 64 MiB arenas. 2) The copies should incur little-to-no page faults, meaning a copy rate closer to 25-50 GiB/s is expected. 3) Go heaps are almost always mostly contiguous. Updates #35514. Change-Id: I3ad07f1c2b5b9340acf59ecc3b9ae09e884814fe Reviewed-on: https://go-review.googlesource.com/c/go/+/207757 Run-TryBot: Michael Knyszek <mknyszek@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com> Reviewed-by: Austin Clements <austin@google.com>

runtime: track ranges of address space which are owned by the heap
This change adds a new inUse field to the allocator which tracks ranges of addresses that are owned by the heap. It is updated on each heap growth. These ranges are tracked in an array which is kept sorted. In practice this array shouldn't exceed its initial allocation except in rare cases and thus should be small (ideally exactly 1 element in size). In a hypothetical worst-case scenario wherein we have a 1 TiB heap and 4 MiB arenas (note that the address ranges will never be at a smaller granularity than an arena, since arenas are always allocated contiguously), inUse would use at most 4 MiB of memory if the heap mappings were completely discontiguous (highly unlikely) with an additional 2 MiB leaked from previous allocations. Furthermore, the copies that are done to keep the inUse array sorted will copy at most 4 MiB of memory in such a scenario, which, assuming a conservative copying rate of 5 GiB/s, amounts to about 800µs. However, note that in practice: 1) Most 64-bit platforms have 64 MiB arenas. 2) The copies should incur little-to-no page faults, meaning a copy rate closer to 25-50 GiB/s is expected. 3) Go heaps are almost always mostly contiguous. Updates #35514. Change-Id: I3ad07f1c2b5b9340acf59ecc3b9ae09e884814fe Reviewed-on: https://go-review.googlesource.com/c/go/+/207757 Run-TryBot: Michael Knyszek <mknyszek@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com> Reviewed-by: Austin Clements <austin@google.com>
9d78e75a · Michael Anthony Knyszek · Michael Knyszek · ef3ef8fc · 9d78e75a · 9d78e75a
Commit 9d78e75a authored Nov 15, 2019 by Michael Anthony Knyszek Committed by Michael Knyszek Dec 11, 2019
4 changed files
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -734,6 +734,16 @@ func (p *PageAlloc) Scavenge(nbytes uintptr, locked bool) (r uintptr) {
 	})
 	return
 }
+func (p *PageAlloc) InUse() []AddrRange {
+	ranges := make([]AddrRange, 0, len(p.inUse.ranges))
+	for _, r := range p.inUse.ranges {
+		ranges = append(ranges, AddrRange{
+			Base:  r.base,
+			Limit: r.limit,
+		})
+	}
+	return ranges
+}

 // Returns nil if the PallocData's L2 is missing.
 func (p *PageAlloc) PallocData(i ChunkIdx) *PallocData {
@@ -745,6 +755,12 @@ func (p *PageAlloc) PallocData(i ChunkIdx) *PallocData {
 	return (*PallocData)(&l2[ci.l2()])
 }

+// AddrRange represents a range over addresses.
+// Specifically, it represents the range [Base, Limit).
+type AddrRange struct {
+	Base, Limit uintptr
+}
+
 // BitRange represents a range over a bitmap.
 type BitRange struct {
 	I, N uint // bit index and length in bits

--- a/src/runtime/mpagealloc.go
+++ b/src/runtime/mpagealloc.go
@@ -245,6 +245,19 @@ type pageAlloc struct {
 	// currently ready to use.
 	start, end chunkIdx

+	// inUse is a slice of ranges of address space which are
+	// known by the page allocator to be currently in-use (passed
+	// to grow).
+	//
+	// This field is currently unused on 32-bit architectures but
+	// is harmless to track. We care much more about having a
+	// contiguous heap in these cases and take additional measures
+	// to ensure that, so in nearly all cases this should have just
+	// 1 element.
+	//
+	// All access is protected by the mheapLock.
+	inUse addrRanges
+
 	// mheap_.lock. This level of indirection makes it possible
 	// to test pageAlloc indepedently of the runtime allocator.
 	mheapLock *mutex
@@ -268,6 +281,9 @@ func (s *pageAlloc) init(mheapLock *mutex, sysStat *uint64) {
 	}
 	s.sysStat = sysStat

+	// Initialize s.inUse.
+	s.inUse.init(sysStat)
+
 	// System-dependent initialization.
 	s.sysInit()

@@ -381,6 +397,10 @@ func (s *pageAlloc) grow(base, size uintptr) {
 	if end > s.end {
 		s.end = end
 	}
+	// Note that [base, limit) will never overlap with any existing
+	// range inUse because grow only ever adds never-used memory
+	// regions to the page allocator.
+	s.inUse.add(addrRange{base, limit})

 	// A grow operation is a lot like a free operation, so if our
 	// chunk ends up below the (linearized) s.searchAddr, update

--- a/src/runtime/mpagealloc_test.go
+++ b/src/runtime/mpagealloc_test.go
@@ -40,6 +40,119 @@ func checkPageAlloc(t *testing.T, want, got *PageAlloc) {
 	// TODO(mknyszek): Verify summaries too?
 }

+func TestPageAllocGrow(t *testing.T) {
+	tests := map[string]struct {
+		chunks []ChunkIdx
+		inUse  []AddrRange
+	}{
+		"One": {
+			chunks: []ChunkIdx{
+				BaseChunkIdx,
+			},
+			inUse: []AddrRange{
+				{PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)},
+			},
+		},
+		"Contiguous2": {
+			chunks: []ChunkIdx{
+				BaseChunkIdx,
+				BaseChunkIdx + 1,
+			},
+			inUse: []AddrRange{
+				{PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+2, 0)},
+			},
+		},
+		"Contiguous5": {
+			chunks: []ChunkIdx{
+				BaseChunkIdx,
+				BaseChunkIdx + 1,
+				BaseChunkIdx + 2,
+				BaseChunkIdx + 3,
+				BaseChunkIdx + 4,
+			},
+			inUse: []AddrRange{
+				{PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+5, 0)},
+			},
+		},
+		"Discontiguous": {
+			chunks: []ChunkIdx{
+				BaseChunkIdx,
+				BaseChunkIdx + 2,
+				BaseChunkIdx + 4,
+			},
+			inUse: []AddrRange{
+				{PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)},
+				{PageBase(BaseChunkIdx+2, 0), PageBase(BaseChunkIdx+3, 0)},
+				{PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+5, 0)},
+			},
+		},
+		"Mixed": {
+			chunks: []ChunkIdx{
+				BaseChunkIdx,
+				BaseChunkIdx + 1,
+				BaseChunkIdx + 2,
+				BaseChunkIdx + 4,
+			},
+			inUse: []AddrRange{
+				{PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+3, 0)},
+				{PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+5, 0)},
+			},
+		},
+		"WildlyDiscontiguous": {
+			chunks: []ChunkIdx{
+				BaseChunkIdx,
+				BaseChunkIdx + 1,
+				BaseChunkIdx + 0x10,
+				BaseChunkIdx + 0x21,
+			},
+			inUse: []AddrRange{
+				{PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+2, 0)},
+				{PageBase(BaseChunkIdx+0x10, 0), PageBase(BaseChunkIdx+0x11, 0)},
+				{PageBase(BaseChunkIdx+0x21, 0), PageBase(BaseChunkIdx+0x22, 0)},
+			},
+		},
+	}
+	for name, v := range tests {
+		v := v
+		t.Run(name, func(t *testing.T) {
+			// By creating a new pageAlloc, we will
+			// grow it for each chunk defined in x.
+			x := make(map[ChunkIdx][]BitRange)
+			for _, c := range v.chunks {
+				x[c] = []BitRange{}
+			}
+			b := NewPageAlloc(x, nil)
+			defer FreePageAlloc(b)
+
+			got := b.InUse()
+			want := v.inUse
+
+			// Check for mismatches.
+			if len(got) != len(want) {
+				t.Fail()
+			} else {
+				for i := range want {
+					if want[i] != got[i] {
+						t.Fail()
+						break
+					}
+				}
+			}
+			if t.Failed() {
+				t.Logf("found inUse mismatch")
+				t.Logf("got:")
+				for i, r := range got {
+					t.Logf("\t#%d [0x%x, 0x%x)", i, r.Base, r.Limit)
+				}
+				t.Logf("want:")
+				for i, r := range want {
+					t.Logf("\t#%d [0x%x, 0x%x)", i, r.Base, r.Limit)
+				}
+			}
+		})
+	}
+}
+
 func TestPageAllocAlloc(t *testing.T) {
 	type hit struct {
 		npages, base, scav uintptr

--- a/src/runtime/mranges.go
+++ b/src/runtime/mranges.go
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Address range data structure.
+//
+// This file contains an implementation of a data structure which
+// manages ordered address ranges.
+
+package runtime
+
+import (
+	"runtime/internal/sys"
+	"unsafe"
+)
+
+// addrRange represents a region of address space.
+type addrRange struct {
+	// base and limit together represent the region of address space
+	// [base, limit). That is, base is inclusive, limit is exclusive.
+	base, limit uintptr
+}
+
+// addrRanges is a data structure holding a collection of ranges of
+// address space.
+//
+// The ranges are coalesced eagerly to reduce the
+// number ranges it holds.
+//
+// The slice backing store for this field is persistentalloc'd
+// and thus there is no way to free it.
+//
+// addrRanges is not thread-safe.
+type addrRanges struct {
+	// ranges is a slice of ranges sorted by base.
+	ranges []addrRange
+
+	// sysStat is the stat to track allocations by this type
+	sysStat *uint64
+}
+
+func (a *addrRanges) init(sysStat *uint64) {
+	ranges := (*notInHeapSlice)(unsafe.Pointer(&a.ranges))
+	ranges.len = 0
+	ranges.cap = 16
+	ranges.array = (*notInHeap)(persistentalloc(unsafe.Sizeof(addrRange{})*uintptr(ranges.cap), sys.PtrSize, sysStat))
+	a.sysStat = sysStat
+}
+
+// findSucc returns the first index in a such that base is
+// less than the base of the addrRange at that index.
+func (a *addrRanges) findSucc(base uintptr) int {
+	// TODO(mknyszek): Consider a binary search for large arrays.
+	// While iterating over these ranges is potentially expensive,
+	// the expected number of ranges is small, ideally just 1,
+	// since Go heaps are usually mostly contiguous.
+	for i := range a.ranges {
+		if base < a.ranges[i].base {
+			return i
+		}
+	}
+	return len(a.ranges)
+}
+
+// add inserts a new address range to a.
+//
+// r must not overlap with any address range in a.
+func (a *addrRanges) add(r addrRange) {
+	// The copies in this function are potentially expensive, but this data
+	// structure is meant to represent the Go heap. At worst, copying this
+	// would take ~160µs assuming a conservative copying rate of 25 GiB/s (the
+	// copy will almost never trigger a page fault) for a 1 TiB heap with 4 MiB
+	// arenas which is completely discontiguous. ~160µs is still a lot, but in
+	// practice most platforms have 64 MiB arenas (which cuts this by a factor
+	// of 16) and Go heaps are usually mostly contiguous, so the chance that
+	// an addrRanges even grows to that size is extremely low.
+
+	// Because we assume r is not currently represented in a,
+	// findSucc gives us our insertion index.
+	i := a.findSucc(r.base)
+	coalescesDown := i > 0 && a.ranges[i-1].limit == r.base
+	coalescesUp := i < len(a.ranges) && r.limit == a.ranges[i].base
+	if coalescesUp && coalescesDown {
+		// We have neighbors and they both border us.
+		// Merge a.ranges[i-1], r, and a.ranges[i] together into a.ranges[i-1].
+		a.ranges[i-1].limit = a.ranges[i].limit
+
+		// Delete a.ranges[i].
+		copy(a.ranges[i:], a.ranges[i+1:])
+		a.ranges = a.ranges[:len(a.ranges)-1]
+	} else if coalescesDown {
+		// We have a neighbor at a lower address only and it borders us.
+		// Merge the new space into a.ranges[i-1].
+		a.ranges[i-1].limit = r.limit
+	} else if coalescesUp {
+		// We have a neighbor at a higher address only and it borders us.
+		// Merge the new space into a.ranges[i].
+		a.ranges[i].base = r.base
+	} else {
+		// We may or may not have neighbors which don't border us.
+		// Add the new range.
+		if len(a.ranges)+1 > cap(a.ranges) {
+			// Grow the array. Note that this leaks the old array, but since
+			// we're doubling we have at most 2x waste. For a 1 TiB heap and
+			// 4 MiB arenas which are all discontiguous (both very conservative
+			// assumptions), this would waste at most 4 MiB of memory.
+			oldRanges := a.ranges
+			ranges := (*notInHeapSlice)(unsafe.Pointer(&a.ranges))
+			ranges.len = len(oldRanges)
+			ranges.cap = cap(oldRanges) * 2
+			ranges.array = (*notInHeap)(persistentalloc(unsafe.Sizeof(addrRange{})*uintptr(ranges.cap), sys.PtrSize, a.sysStat))
+
+			// Copy in the old array, but make space for the new range.
+			copy(a.ranges[:i], oldRanges[:i])
+			copy(a.ranges[i+1:], oldRanges[i:])
+		} else {
+			a.ranges = a.ranges[:len(a.ranges)+1]
+			copy(a.ranges[i+1:], a.ranges[i:])
+		}
+		a.ranges[i] = r
+	}
+}