Commit 1e2d2f09 authored by Russ Cox's avatar Russ Cox

[dev.cc] runtime: convert memory allocator and garbage collector to Go

The conversion was done with an automated tool and then
modified only as necessary to make it compile and run.

[This CL is part of the removal of C code from package runtime.
See golang.org/s/dev.cc for an overview.]

LGTM=r
R=r
CC=austin, dvyukov, golang-codereviews, iant, khr
https://golang.org/cl/167540043
parent d98553a7
......@@ -26,7 +26,7 @@ func makechan(t *chantype, size int64) *hchan {
if hchanSize%maxAlign != 0 || elem.align > maxAlign {
gothrow("makechan: bad alignment")
}
if size < 0 || int64(uintptr(size)) != size || (elem.size > 0 && uintptr(size) > (maxmem-hchanSize)/uintptr(elem.size)) {
if size < 0 || int64(uintptr(size)) != size || (elem.size > 0 && uintptr(size) > (_MaxMem-hchanSize)/uintptr(elem.size)) {
panic("makechan: size out of range")
}
......
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Implementation of runtime/debug.WriteHeapDump. Writes all
// objects in the heap plus additional info (roots, threads,
// finalizers, etc.) to a file.
// The format of the dumped file is described at
// http://code.google.com/p/go-wiki/wiki/heapdump14
#include "runtime.h"
#include "arch_GOARCH.h"
#include "malloc.h"
#include "mgc0.h"
#include "type.h"
#include "typekind.h"
#include "funcdata.h"
#include "zaexperiment.h"
#include "textflag.h"
extern byte runtime·data[];
extern byte runtime·edata[];
extern byte runtime·bss[];
extern byte runtime·ebss[];
enum {
FieldKindEol = 0,
FieldKindPtr = 1,
FieldKindIface = 2,
FieldKindEface = 3,
TagEOF = 0,
TagObject = 1,
TagOtherRoot = 2,
TagType = 3,
TagGoRoutine = 4,
TagStackFrame = 5,
TagParams = 6,
TagFinalizer = 7,
TagItab = 8,
TagOSThread = 9,
TagMemStats = 10,
TagQueuedFinalizer = 11,
TagData = 12,
TagBss = 13,
TagDefer = 14,
TagPanic = 15,
TagMemProf = 16,
TagAllocSample = 17,
};
static uintptr* playgcprog(uintptr offset, uintptr *prog, void (*callback)(void*,uintptr,uintptr), void *arg);
static void dumpfields(BitVector bv);
static void dumpbvtypes(BitVector *bv, byte *base);
static BitVector makeheapobjbv(byte *p, uintptr size);
// fd to write the dump to.
static uintptr dumpfd;
#pragma dataflag NOPTR /* tmpbuf not a heap pointer at least */
static byte *tmpbuf;
static uintptr tmpbufsize;
// buffer of pending write data
enum {
BufSize = 4096,
};
#pragma dataflag NOPTR
static byte buf[BufSize];
static uintptr nbuf;
static void
write(byte *data, uintptr len)
{
if(len + nbuf <= BufSize) {
runtime·memmove(buf + nbuf, data, len);
nbuf += len;
return;
}
runtime·write(dumpfd, buf, nbuf);
if(len >= BufSize) {
runtime·write(dumpfd, data, len);
nbuf = 0;
} else {
runtime·memmove(buf, data, len);
nbuf = len;
}
}
static void
flush(void)
{
runtime·write(dumpfd, buf, nbuf);
nbuf = 0;
}
// Cache of types that have been serialized already.
// We use a type's hash field to pick a bucket.
// Inside a bucket, we keep a list of types that
// have been serialized so far, most recently used first.
// Note: when a bucket overflows we may end up
// serializing a type more than once. That's ok.
enum {
TypeCacheBuckets = 256, // must be a power of 2
TypeCacheAssoc = 4,
};
typedef struct TypeCacheBucket TypeCacheBucket;
struct TypeCacheBucket {
Type *t[TypeCacheAssoc];
};
#pragma dataflag NOPTR /* only initialized and used while world is stopped */
static TypeCacheBucket typecache[TypeCacheBuckets];
// dump a uint64 in a varint format parseable by encoding/binary
static void
dumpint(uint64 v)
{
byte buf[10];
int32 n;
n = 0;
while(v >= 0x80) {
buf[n++] = v | 0x80;
v >>= 7;
}
buf[n++] = v;
write(buf, n);
}
static void
dumpbool(bool b)
{
dumpint(b ? 1 : 0);
}
// dump varint uint64 length followed by memory contents
static void
dumpmemrange(byte *data, uintptr len)
{
dumpint(len);
write(data, len);
}
static void
dumpstr(String s)
{
dumpmemrange(s.str, s.len);
}
static void
dumpcstr(int8 *c)
{
dumpmemrange((byte*)c, runtime·findnull((byte*)c));
}
// dump information for a type
static void
dumptype(Type *t)
{
TypeCacheBucket *b;
int32 i, j;
if(t == nil) {
return;
}
// If we've definitely serialized the type before,
// no need to do it again.
b = &typecache[t->hash & (TypeCacheBuckets-1)];
if(t == b->t[0]) return;
for(i = 1; i < TypeCacheAssoc; i++) {
if(t == b->t[i]) {
// Move-to-front
for(j = i; j > 0; j--) {
b->t[j] = b->t[j-1];
}
b->t[0] = t;
return;
}
}
// Might not have been dumped yet. Dump it and
// remember we did so.
for(j = TypeCacheAssoc-1; j > 0; j--) {
b->t[j] = b->t[j-1];
}
b->t[0] = t;
// dump the type
dumpint(TagType);
dumpint((uintptr)t);
dumpint(t->size);
if(t->x == nil || t->x->pkgPath == nil || t->x->name == nil) {
dumpstr(*t->string);
} else {
dumpint(t->x->pkgPath->len + 1 + t->x->name->len);
write(t->x->pkgPath->str, t->x->pkgPath->len);
write((byte*)".", 1);
write(t->x->name->str, t->x->name->len);
}
dumpbool((t->kind & KindDirectIface) == 0 || (t->kind & KindNoPointers) == 0);
}
// dump an object
static void
dumpobj(byte *obj, uintptr size, BitVector bv)
{
dumpbvtypes(&bv, obj);
dumpint(TagObject);
dumpint((uintptr)obj);
dumpmemrange(obj, size);
dumpfields(bv);
}
static void
dumpotherroot(int8 *description, byte *to)
{
dumpint(TagOtherRoot);
dumpcstr(description);
dumpint((uintptr)to);
}
static void
dumpfinalizer(byte *obj, FuncVal *fn, Type* fint, PtrType *ot)
{
dumpint(TagFinalizer);
dumpint((uintptr)obj);
dumpint((uintptr)fn);
dumpint((uintptr)fn->fn);
dumpint((uintptr)fint);
dumpint((uintptr)ot);
}
typedef struct ChildInfo ChildInfo;
struct ChildInfo {
// Information passed up from the callee frame about
// the layout of the outargs region.
uintptr argoff; // where the arguments start in the frame
uintptr arglen; // size of args region
BitVector args; // if args.n >= 0, pointer map of args region
byte *sp; // callee sp
uintptr depth; // depth in call stack (0 == most recent)
};
// dump kinds & offsets of interesting fields in bv
static void
dumpbv(BitVector *bv, uintptr offset)
{
uintptr i;
for(i = 0; i < bv->n; i += BitsPerPointer) {
switch(bv->bytedata[i/8] >> i%8 & 3) {
case BitsDead:
// BitsDead has already been processed in makeheapobjbv.
// We should only see it in stack maps, in which case we should continue processing.
break;
case BitsScalar:
break;
case BitsPointer:
dumpint(FieldKindPtr);
dumpint(offset + i / BitsPerPointer * PtrSize);
break;
case BitsMultiWord:
switch(bv->bytedata[(i+BitsPerPointer)/8] >> (i+BitsPerPointer)%8 & 3) {
default:
runtime·throw("unexpected garbage collection bits");
case BitsIface:
dumpint(FieldKindIface);
dumpint(offset + i / BitsPerPointer * PtrSize);
i += BitsPerPointer;
break;
case BitsEface:
dumpint(FieldKindEface);
dumpint(offset + i / BitsPerPointer * PtrSize);
i += BitsPerPointer;
break;
}
}
}
}
static bool
dumpframe(Stkframe *s, void *arg)
{
Func *f;
ChildInfo *child;
uintptr pc, off, size;
int32 pcdata;
StackMap *stackmap;
int8 *name;
BitVector bv;
child = (ChildInfo*)arg;
f = s->fn;
// Figure out what we can about our stack map
pc = s->pc;
if(pc != f->entry)
pc--;
pcdata = runtime·pcdatavalue(f, PCDATA_StackMapIndex, pc);
if(pcdata == -1) {
// We do not have a valid pcdata value but there might be a
// stackmap for this function. It is likely that we are looking
// at the function prologue, assume so and hope for the best.
pcdata = 0;
}
stackmap = runtime·funcdata(f, FUNCDATA_LocalsPointerMaps);
// Dump any types we will need to resolve Efaces.
if(child->args.n >= 0)
dumpbvtypes(&child->args, (byte*)s->sp + child->argoff);
if(stackmap != nil && stackmap->n > 0) {
bv = runtime·stackmapdata(stackmap, pcdata);
dumpbvtypes(&bv, (byte*)(s->varp - bv.n / BitsPerPointer * PtrSize));
} else {
bv.n = -1;
}
// Dump main body of stack frame.
dumpint(TagStackFrame);
dumpint(s->sp); // lowest address in frame
dumpint(child->depth); // # of frames deep on the stack
dumpint((uintptr)child->sp); // sp of child, or 0 if bottom of stack
dumpmemrange((byte*)s->sp, s->fp - s->sp); // frame contents
dumpint(f->entry);
dumpint(s->pc);
dumpint(s->continpc);
name = runtime·funcname(f);
if(name == nil)
name = "unknown function";
dumpcstr(name);
// Dump fields in the outargs section
if(child->args.n >= 0) {
dumpbv(&child->args, child->argoff);
} else {
// conservative - everything might be a pointer
for(off = child->argoff; off < child->argoff + child->arglen; off += PtrSize) {
dumpint(FieldKindPtr);
dumpint(off);
}
}
// Dump fields in the local vars section
if(stackmap == nil) {
// No locals information, dump everything.
for(off = child->arglen; off < s->varp - s->sp; off += PtrSize) {
dumpint(FieldKindPtr);
dumpint(off);
}
} else if(stackmap->n < 0) {
// Locals size information, dump just the locals.
size = -stackmap->n;
for(off = s->varp - size - s->sp; off < s->varp - s->sp; off += PtrSize) {
dumpint(FieldKindPtr);
dumpint(off);
}
} else if(stackmap->n > 0) {
// Locals bitmap information, scan just the pointers in
// locals.
dumpbv(&bv, s->varp - bv.n / BitsPerPointer * PtrSize - s->sp);
}
dumpint(FieldKindEol);
// Record arg info for parent.
child->argoff = s->argp - s->fp;
child->arglen = s->arglen;
child->sp = (byte*)s->sp;
child->depth++;
stackmap = runtime·funcdata(f, FUNCDATA_ArgsPointerMaps);
if(stackmap != nil)
child->args = runtime·stackmapdata(stackmap, pcdata);
else
child->args.n = -1;
return true;
}
static void
dumpgoroutine(G *gp)
{
uintptr sp, pc, lr;
ChildInfo child;
Defer *d;
Panic *p;
bool (*fn)(Stkframe*, void*);
if(gp->syscallsp != (uintptr)nil) {
sp = gp->syscallsp;
pc = gp->syscallpc;
lr = 0;
} else {
sp = gp->sched.sp;
pc = gp->sched.pc;
lr = gp->sched.lr;
}
dumpint(TagGoRoutine);
dumpint((uintptr)gp);
dumpint((uintptr)sp);
dumpint(gp->goid);
dumpint(gp->gopc);
dumpint(runtime·readgstatus(gp));
dumpbool(gp->issystem);
dumpbool(false); // isbackground
dumpint(gp->waitsince);
dumpstr(gp->waitreason);
dumpint((uintptr)gp->sched.ctxt);
dumpint((uintptr)gp->m);
dumpint((uintptr)gp->defer);
dumpint((uintptr)gp->panic);
// dump stack
child.args.n = -1;
child.arglen = 0;
child.sp = nil;
child.depth = 0;
fn = dumpframe;
runtime·gentraceback(pc, sp, lr, gp, 0, nil, 0x7fffffff, &fn, &child, 0);
// dump defer & panic records
for(d = gp->defer; d != nil; d = d->link) {
dumpint(TagDefer);
dumpint((uintptr)d);
dumpint((uintptr)gp);
dumpint((uintptr)d->argp);
dumpint((uintptr)d->pc);
dumpint((uintptr)d->fn);
dumpint((uintptr)d->fn->fn);
dumpint((uintptr)d->link);
}
for (p = gp->panic; p != nil; p = p->link) {
dumpint(TagPanic);
dumpint((uintptr)p);
dumpint((uintptr)gp);
dumpint((uintptr)p->arg.type);
dumpint((uintptr)p->arg.data);
dumpint(0); // was p->defer, no longer recorded
dumpint((uintptr)p->link);
}
}
static void
dumpgs(void)
{
G *gp;
uint32 i;
uint32 status;
// goroutines & stacks
for(i = 0; i < runtime·allglen; i++) {
gp = runtime·allg[i];
status = runtime·readgstatus(gp); // The world is stopped so gp will not be in a scan state.
switch(status){
default:
runtime·printf("runtime: unexpected G.status %d\n", status);
runtime·throw("dumpgs in STW - bad status");
case Gdead:
break;
case Grunnable:
case Gsyscall:
case Gwaiting:
dumpgoroutine(gp);
break;
}
}
}
static void
finq_callback(FuncVal *fn, byte *obj, uintptr nret, Type *fint, PtrType *ot)
{
dumpint(TagQueuedFinalizer);
dumpint((uintptr)obj);
dumpint((uintptr)fn);
dumpint((uintptr)fn->fn);
dumpint((uintptr)fint);
dumpint((uintptr)ot);
USED(&nret);
}
static void
dumproots(void)
{
MSpan *s, **allspans;
uint32 spanidx;
Special *sp;
SpecialFinalizer *spf;
byte *p;
// data segment
dumpbvtypes(&runtime·gcdatamask, runtime·data);
dumpint(TagData);
dumpint((uintptr)runtime·data);
dumpmemrange(runtime·data, runtime·edata - runtime·data);
dumpfields(runtime·gcdatamask);
// bss segment
dumpbvtypes(&runtime·gcbssmask, runtime·bss);
dumpint(TagBss);
dumpint((uintptr)runtime·bss);
dumpmemrange(runtime·bss, runtime·ebss - runtime·bss);
dumpfields(runtime·gcbssmask);
// MSpan.types
allspans = runtime·mheap.allspans;
for(spanidx=0; spanidx<runtime·mheap.nspan; spanidx++) {
s = allspans[spanidx];
if(s->state == MSpanInUse) {
// Finalizers
for(sp = s->specials; sp != nil; sp = sp->next) {
if(sp->kind != KindSpecialFinalizer)
continue;
spf = (SpecialFinalizer*)sp;
p = (byte*)((s->start << PageShift) + spf->special.offset);
dumpfinalizer(p, spf->fn, spf->fint, spf->ot);
}
}
}
// Finalizer queue
runtime·iterate_finq(finq_callback);
}
// Bit vector of free marks.
// Needs to be as big as the largest number of objects per span.
#pragma dataflag NOPTR
static byte free[PageSize/8];
static void
dumpobjs(void)
{
uintptr i, j, size, n;
MSpan *s;
MLink *l;
byte *p;
for(i = 0; i < runtime·mheap.nspan; i++) {
s = runtime·mheap.allspans[i];
if(s->state != MSpanInUse)
continue;
p = (byte*)(s->start << PageShift);
size = s->elemsize;
n = (s->npages << PageShift) / size;
if(n > nelem(free))
runtime·throw("free array doesn't have enough entries");
for(l = s->freelist; l != nil; l = l->next)
free[((byte*)l - p) / size] = true;
for(j = 0; j < n; j++, p += size) {
if(free[j]) {
free[j] = false;
continue;
}
dumpobj(p, size, makeheapobjbv(p, size));
}
}
}
static void
dumpparams(void)
{
byte *x;
dumpint(TagParams);
x = (byte*)1;
if(*(byte*)&x == 1)
dumpbool(false); // little-endian ptrs
else
dumpbool(true); // big-endian ptrs
dumpint(PtrSize);
dumpint((uintptr)runtime·mheap.arena_start);
dumpint((uintptr)runtime·mheap.arena_used);
dumpint(thechar);
dumpcstr(GOEXPERIMENT);
dumpint(runtime·ncpu);
}
static void
itab_callback(Itab *tab)
{
Type *t;
t = tab->type;
// Dump a map from itab* to the type of its data field.
// We want this map so we can deduce types of interface referents.
if((t->kind & KindDirectIface) == 0) {
// indirect - data slot is a pointer to t.
dumptype(t->ptrto);
dumpint(TagItab);
dumpint((uintptr)tab);
dumpint((uintptr)t->ptrto);
} else if((t->kind & KindNoPointers) == 0) {
// t is pointer-like - data slot is a t.
dumptype(t);
dumpint(TagItab);
dumpint((uintptr)tab);
dumpint((uintptr)t);
} else {
// Data slot is a scalar. Dump type just for fun.
// With pointer-only interfaces, this shouldn't happen.
dumptype(t);
dumpint(TagItab);
dumpint((uintptr)tab);
dumpint((uintptr)t);
}
}
static void
dumpitabs(void)
{
void (*fn)(Itab*);
fn = itab_callback;
runtime·iterate_itabs(&fn);
}
static void
dumpms(void)
{
M *mp;
for(mp = runtime·allm; mp != nil; mp = mp->alllink) {
dumpint(TagOSThread);
dumpint((uintptr)mp);
dumpint(mp->id);
dumpint(mp->procid);
}
}
static void
dumpmemstats(void)
{
int32 i;
dumpint(TagMemStats);
dumpint(mstats.alloc);
dumpint(mstats.total_alloc);
dumpint(mstats.sys);
dumpint(mstats.nlookup);
dumpint(mstats.nmalloc);
dumpint(mstats.nfree);
dumpint(mstats.heap_alloc);
dumpint(mstats.heap_sys);
dumpint(mstats.heap_idle);
dumpint(mstats.heap_inuse);
dumpint(mstats.heap_released);
dumpint(mstats.heap_objects);
dumpint(mstats.stacks_inuse);
dumpint(mstats.stacks_sys);
dumpint(mstats.mspan_inuse);
dumpint(mstats.mspan_sys);
dumpint(mstats.mcache_inuse);
dumpint(mstats.mcache_sys);
dumpint(mstats.buckhash_sys);
dumpint(mstats.gc_sys);
dumpint(mstats.other_sys);
dumpint(mstats.next_gc);
dumpint(mstats.last_gc);
dumpint(mstats.pause_total_ns);
for(i = 0; i < 256; i++)
dumpint(mstats.pause_ns[i]);
dumpint(mstats.numgc);
}
static void
dumpmemprof_callback(Bucket *b, uintptr nstk, uintptr *stk, uintptr size, uintptr allocs, uintptr frees)
{
uintptr i, pc;
Func *f;
byte buf[20];
String file;
int32 line;
dumpint(TagMemProf);
dumpint((uintptr)b);
dumpint(size);
dumpint(nstk);
for(i = 0; i < nstk; i++) {
pc = stk[i];
f = runtime·findfunc(pc);
if(f == nil) {
runtime·snprintf(buf, sizeof(buf), "%X", (uint64)pc);
dumpcstr((int8*)buf);
dumpcstr("?");
dumpint(0);
} else {
dumpcstr(runtime·funcname(f));
// TODO: Why do we need to back up to a call instruction here?
// Maybe profiler should do this.
if(i > 0 && pc > f->entry) {
if(thechar == '6' || thechar == '8')
pc--;
else
pc -= 4; // arm, etc
}
line = runtime·funcline(f, pc, &file);
dumpstr(file);
dumpint(line);
}
}
dumpint(allocs);
dumpint(frees);
}
static void
dumpmemprof(void)
{
MSpan *s, **allspans;
uint32 spanidx;
Special *sp;
SpecialProfile *spp;
byte *p;
void (*fn)(Bucket*, uintptr, uintptr*, uintptr, uintptr, uintptr);
fn = dumpmemprof_callback;
runtime·iterate_memprof(&fn);
allspans = runtime·mheap.allspans;
for(spanidx=0; spanidx<runtime·mheap.nspan; spanidx++) {
s = allspans[spanidx];
if(s->state != MSpanInUse)
continue;
for(sp = s->specials; sp != nil; sp = sp->next) {
if(sp->kind != KindSpecialProfile)
continue;
spp = (SpecialProfile*)sp;
p = (byte*)((s->start << PageShift) + spp->special.offset);
dumpint(TagAllocSample);
dumpint((uintptr)p);
dumpint((uintptr)spp->b);
}
}
}
static void
mdump(void)
{
byte *hdr;
uintptr i;
MSpan *s;
// make sure we're done sweeping
for(i = 0; i < runtime·mheap.nspan; i++) {
s = runtime·mheap.allspans[i];
if(s->state == MSpanInUse)
runtime·MSpan_EnsureSwept(s);
}
runtime·memclr((byte*)&typecache[0], sizeof(typecache));
hdr = (byte*)"go1.4 heap dump\n";
write(hdr, runtime·findnull(hdr));
dumpparams();
dumpitabs();
dumpobjs();
dumpgs();
dumpms();
dumproots();
dumpmemstats();
dumpmemprof();
dumpint(TagEOF);
flush();
}
void
runtime·writeheapdump_m(void)
{
uintptr fd;
fd = g->m->scalararg[0];
g->m->scalararg[0] = 0;
runtime·casgstatus(g->m->curg, Grunning, Gwaiting);
g->waitreason = runtime·gostringnocopy((byte*)"dumping heap");
// Update stats so we can dump them.
// As a side effect, flushes all the MCaches so the MSpan.freelist
// lists contain all the free objects.
runtime·updatememstats(nil);
// Set dump file.
dumpfd = fd;
// Call dump routine.
mdump();
// Reset dump file.
dumpfd = 0;
if(tmpbuf != nil) {
runtime·SysFree(tmpbuf, tmpbufsize, &mstats.other_sys);
tmpbuf = nil;
tmpbufsize = 0;
}
runtime·casgstatus(g->m->curg, Gwaiting, Grunning);
}
// dumpint() the kind & offset of each field in an object.
static void
dumpfields(BitVector bv)
{
dumpbv(&bv, 0);
dumpint(FieldKindEol);
}
// The heap dump reader needs to be able to disambiguate
// Eface entries. So it needs to know every type that might
// appear in such an entry. The following routine accomplishes that.
// Dump all the types that appear in the type field of
// any Eface described by this bit vector.
static void
dumpbvtypes(BitVector *bv, byte *base)
{
uintptr i;
for(i = 0; i < bv->n; i += BitsPerPointer) {
if((bv->bytedata[i/8] >> i%8 & 3) != BitsMultiWord)
continue;
switch(bv->bytedata[(i+BitsPerPointer)/8] >> (i+BitsPerPointer)%8 & 3) {
default:
runtime·throw("unexpected garbage collection bits");
case BitsIface:
i += BitsPerPointer;
break;
case BitsEface:
dumptype(*(Type**)(base + i / BitsPerPointer * PtrSize));
i += BitsPerPointer;
break;
}
}
}
static BitVector
makeheapobjbv(byte *p, uintptr size)
{
uintptr off, nptr, i;
byte shift, *bitp, bits;
bool mw;
// Extend the temp buffer if necessary.
nptr = size/PtrSize;
if(tmpbufsize < nptr*BitsPerPointer/8+1) {
if(tmpbuf != nil)
runtime·SysFree(tmpbuf, tmpbufsize, &mstats.other_sys);
tmpbufsize = nptr*BitsPerPointer/8+1;
tmpbuf = runtime·sysAlloc(tmpbufsize, &mstats.other_sys);
if(tmpbuf == nil)
runtime·throw("heapdump: out of memory");
}
// Copy and compact the bitmap.
mw = false;
for(i = 0; i < nptr; i++) {
off = (uintptr*)(p + i*PtrSize) - (uintptr*)runtime·mheap.arena_start;
bitp = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapByte) * gcBits;
bits = (*bitp >> (shift + 2)) & BitsMask;
if(!mw && bits == BitsDead)
break; // end of heap object
mw = !mw && bits == BitsMultiWord;
tmpbuf[i*BitsPerPointer/8] &= ~(BitsMask<<((i*BitsPerPointer)%8));
tmpbuf[i*BitsPerPointer/8] |= bits<<((i*BitsPerPointer)%8);
}
return (BitVector){i*BitsPerPointer, tmpbuf};
}
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Implementation of runtime/debug.WriteHeapDump. Writes all
// objects in the heap plus additional info (roots, threads,
// finalizers, etc.) to a file.
// The format of the dumped file is described at
// http://code.google.com/p/go-wiki/wiki/heapdump14
package runtime
import "unsafe"
const (
fieldKindEol = 0
fieldKindPtr = 1
fieldKindIface = 2
fieldKindEface = 3
tagEOF = 0
tagObject = 1
tagOtherRoot = 2
tagType = 3
tagGoroutine = 4
tagStackFrame = 5
tagParams = 6
tagFinalizer = 7
tagItab = 8
tagOSThread = 9
tagMemStats = 10
tagQueuedFinalizer = 11
tagData = 12
tagBSS = 13
tagDefer = 14
tagPanic = 15
tagMemProf = 16
tagAllocSample = 17
)
var dumpfd uintptr // fd to write the dump to.
var tmpbuf []byte
// buffer of pending write data
const (
bufSize = 4096
)
var buf [bufSize]byte
var nbuf uintptr
func dwrite(data unsafe.Pointer, len uintptr) {
if len == 0 {
return
}
if nbuf+len <= bufSize {
copy(buf[nbuf:], (*[bufSize]byte)(data)[:len])
nbuf += len
return
}
write(dumpfd, (unsafe.Pointer)(&buf), int32(nbuf))
if len >= bufSize {
write(dumpfd, data, int32(len))
nbuf = 0
} else {
copy(buf[:], (*[bufSize]byte)(data)[:len])
nbuf = len
}
}
func dwritebyte(b byte) {
dwrite(unsafe.Pointer(&b), 1)
}
func flush() {
write(dumpfd, (unsafe.Pointer)(&buf), int32(nbuf))
nbuf = 0
}
// Cache of types that have been serialized already.
// We use a type's hash field to pick a bucket.
// Inside a bucket, we keep a list of types that
// have been serialized so far, most recently used first.
// Note: when a bucket overflows we may end up
// serializing a type more than once. That's ok.
const (
typeCacheBuckets = 256
typeCacheAssoc = 4
)
type typeCacheBucket struct {
t [typeCacheAssoc]*_type
}
var typecache [typeCacheBuckets]typeCacheBucket
// dump a uint64 in a varint format parseable by encoding/binary
func dumpint(v uint64) {
var buf [10]byte
var n int
for v >= 0x80 {
buf[n] = byte(v | 0x80)
n++
v >>= 7
}
buf[n] = byte(v)
n++
dwrite(unsafe.Pointer(&buf), uintptr(n))
}
func dumpbool(b bool) {
if b {
dumpint(1)
} else {
dumpint(0)
}
}
// dump varint uint64 length followed by memory contents
func dumpmemrange(data unsafe.Pointer, len uintptr) {
dumpint(uint64(len))
dwrite(data, len)
}
func dumpslice(b []byte) {
dumpint(uint64(len(b)))
if len(b) > 0 {
dwrite(unsafe.Pointer(&b[0]), uintptr(len(b)))
}
}
func dumpstr(s string) {
sp := (*stringStruct)(unsafe.Pointer(&s))
dumpmemrange(sp.str, uintptr(sp.len))
}
// dump information for a type
func dumptype(t *_type) {
if t == nil {
return
}
// If we've definitely serialized the type before,
// no need to do it again.
b := &typecache[t.hash&(typeCacheBuckets-1)]
if t == b.t[0] {
return
}
for i := 1; i < typeCacheAssoc; i++ {
if t == b.t[i] {
// Move-to-front
for j := i; j > 0; j-- {
b.t[j] = b.t[j-1]
}
b.t[0] = t
return
}
}
// Might not have been dumped yet. Dump it and
// remember we did so.
for j := typeCacheAssoc - 1; j > 0; j-- {
b.t[j] = b.t[j-1]
}
b.t[0] = t
// dump the type
dumpint(tagType)
dumpint(uint64(uintptr(unsafe.Pointer(t))))
dumpint(uint64(t.size))
if t.x == nil || t.x.pkgpath == nil || t.x.name == nil {
dumpstr(*t._string)
} else {
pkgpath := (*stringStruct)(unsafe.Pointer(&t.x.pkgpath))
name := (*stringStruct)(unsafe.Pointer(&t.x.name))
dumpint(uint64(uintptr(pkgpath.len) + 1 + uintptr(name.len)))
dwrite(pkgpath.str, uintptr(pkgpath.len))
dwritebyte('.')
dwrite(name.str, uintptr(name.len))
}
dumpbool(t.kind&kindDirectIface == 0 || t.kind&kindNoPointers == 0)
}
// dump an object
func dumpobj(obj unsafe.Pointer, size uintptr, bv bitvector) {
dumpbvtypes(&bv, obj)
dumpint(tagObject)
dumpint(uint64(uintptr(obj)))
dumpmemrange(obj, size)
dumpfields(bv)
}
func dumpotherroot(description string, to unsafe.Pointer) {
dumpint(tagOtherRoot)
dumpstr(description)
dumpint(uint64(uintptr(to)))
}
func dumpfinalizer(obj unsafe.Pointer, fn *funcval, fint *_type, ot *ptrtype) {
dumpint(tagFinalizer)
dumpint(uint64(uintptr(obj)))
dumpint(uint64(uintptr(unsafe.Pointer(fn))))
dumpint(uint64(uintptr(unsafe.Pointer(fn.fn))))
dumpint(uint64(uintptr(unsafe.Pointer(fint))))
dumpint(uint64(uintptr(unsafe.Pointer(ot))))
}
type childInfo struct {
// Information passed up from the callee frame about
// the layout of the outargs region.
argoff uintptr // where the arguments start in the frame
arglen uintptr // size of args region
args bitvector // if args.n >= 0, pointer map of args region
sp *uint8 // callee sp
depth uintptr // depth in call stack (0 == most recent)
}
// dump kinds & offsets of interesting fields in bv
func dumpbv(cbv *bitvector, offset uintptr) {
bv := gobv(*cbv)
for i := uintptr(0); i < uintptr(bv.n); i += bitsPerPointer {
switch bv.bytedata[i/8] >> (i % 8) & 3 {
default:
gothrow("unexpected pointer bits")
case _BitsDead:
// BitsDead has already been processed in makeheapobjbv.
// We should only see it in stack maps, in which case we should continue processing.
case _BitsScalar:
// ok
case _BitsPointer:
dumpint(fieldKindPtr)
dumpint(uint64(offset + i/_BitsPerPointer*ptrSize))
}
}
}
func dumpframe(s *stkframe, arg unsafe.Pointer) bool {
child := (*childInfo)(arg)
f := s.fn
// Figure out what we can about our stack map
pc := s.pc
if pc != f.entry {
pc--
}
pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, pc)
if pcdata == -1 {
// We do not have a valid pcdata value but there might be a
// stackmap for this function. It is likely that we are looking
// at the function prologue, assume so and hope for the best.
pcdata = 0
}
stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
// Dump any types we will need to resolve Efaces.
if child.args.n >= 0 {
dumpbvtypes(&child.args, unsafe.Pointer(s.sp+child.argoff))
}
var bv bitvector
if stkmap != nil && stkmap.n > 0 {
bv = stackmapdata(stkmap, pcdata)
dumpbvtypes(&bv, unsafe.Pointer(s.varp-uintptr(bv.n/_BitsPerPointer*ptrSize)))
} else {
bv.n = -1
}
// Dump main body of stack frame.
dumpint(tagStackFrame)
dumpint(uint64(s.sp)) // lowest address in frame
dumpint(uint64(child.depth)) // # of frames deep on the stack
dumpint(uint64(uintptr(unsafe.Pointer(child.sp)))) // sp of child, or 0 if bottom of stack
dumpmemrange(unsafe.Pointer(s.sp), s.fp-s.sp) // frame contents
dumpint(uint64(f.entry))
dumpint(uint64(s.pc))
dumpint(uint64(s.continpc))
name := gofuncname(f)
if name == "" {
name = "unknown function"
}
dumpstr(name)
// Dump fields in the outargs section
if child.args.n >= 0 {
dumpbv(&child.args, child.argoff)
} else {
// conservative - everything might be a pointer
for off := child.argoff; off < child.argoff+child.arglen; off += ptrSize {
dumpint(fieldKindPtr)
dumpint(uint64(off))
}
}
// Dump fields in the local vars section
if stkmap == nil {
// No locals information, dump everything.
for off := child.arglen; off < s.varp-s.sp; off += ptrSize {
dumpint(fieldKindPtr)
dumpint(uint64(off))
}
} else if stkmap.n < 0 {
// Locals size information, dump just the locals.
size := uintptr(-stkmap.n)
for off := s.varp - size - s.sp; off < s.varp-s.sp; off += ptrSize {
dumpint(fieldKindPtr)
dumpint(uint64(off))
}
} else if stkmap.n > 0 {
// Locals bitmap information, scan just the pointers in
// locals.
dumpbv(&bv, s.varp-uintptr(bv.n)/_BitsPerPointer*ptrSize-s.sp)
}
dumpint(fieldKindEol)
// Record arg info for parent.
child.argoff = s.argp - s.fp
child.arglen = s.arglen
child.sp = (*uint8)(unsafe.Pointer(s.sp))
child.depth++
stkmap = (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
if stkmap != nil {
child.args = stackmapdata(stkmap, pcdata)
} else {
child.args.n = -1
}
return true
}
func dumpgoroutine(gp *g) {
var sp, pc, lr uintptr
if gp.syscallsp != 0 {
sp = gp.syscallsp
pc = gp.syscallpc
lr = 0
} else {
sp = gp.sched.sp
pc = gp.sched.pc
lr = gp.sched.lr
}
dumpint(tagGoroutine)
dumpint(uint64(uintptr(unsafe.Pointer(gp))))
dumpint(uint64(sp))
dumpint(uint64(gp.goid))
dumpint(uint64(gp.gopc))
dumpint(uint64(readgstatus(gp)))
dumpbool(gp.issystem)
dumpbool(false) // isbackground
dumpint(uint64(gp.waitsince))
dumpstr(gp.waitreason)
dumpint(uint64(uintptr(gp.sched.ctxt)))
dumpint(uint64(uintptr(unsafe.Pointer(gp.m))))
dumpint(uint64(uintptr(unsafe.Pointer(gp._defer))))
dumpint(uint64(uintptr(unsafe.Pointer(gp._panic))))
// dump stack
var child childInfo
child.args.n = -1
child.arglen = 0
child.sp = nil
child.depth = 0
gentraceback(pc, sp, lr, gp, 0, nil, 0x7fffffff, dumpframe, noescape(unsafe.Pointer(&child)), 0)
// dump defer & panic records
for d := gp._defer; d != nil; d = d.link {
dumpint(tagDefer)
dumpint(uint64(uintptr(unsafe.Pointer(d))))
dumpint(uint64(uintptr(unsafe.Pointer(gp))))
dumpint(uint64(d.argp))
dumpint(uint64(d.pc))
dumpint(uint64(uintptr(unsafe.Pointer(d.fn))))
dumpint(uint64(uintptr(unsafe.Pointer(d.fn.fn))))
dumpint(uint64(uintptr(unsafe.Pointer(d.link))))
}
for p := gp._panic; p != nil; p = p.link {
dumpint(tagPanic)
dumpint(uint64(uintptr(unsafe.Pointer(p))))
dumpint(uint64(uintptr(unsafe.Pointer(gp))))
eface := (*eface)(unsafe.Pointer(&p.arg))
dumpint(uint64(uintptr(unsafe.Pointer(eface._type))))
dumpint(uint64(uintptr(unsafe.Pointer(eface.data))))
dumpint(0) // was p->defer, no longer recorded
dumpint(uint64(uintptr(unsafe.Pointer(p.link))))
}
}
func dumpgs() {
// goroutines & stacks
for i := 0; uintptr(i) < allglen; i++ {
gp := allgs[i]
status := readgstatus(gp) // The world is stopped so gp will not be in a scan state.
switch status {
default:
print("runtime: unexpected G.status ", hex(status), "\n")
gothrow("dumpgs in STW - bad status")
case _Gdead:
// ok
case _Grunnable,
_Gsyscall,
_Gwaiting:
dumpgoroutine(gp)
}
}
}
func finq_callback(fn *funcval, obj unsafe.Pointer, nret uintptr, fint *_type, ot *ptrtype) {
dumpint(tagQueuedFinalizer)
dumpint(uint64(uintptr(obj)))
dumpint(uint64(uintptr(unsafe.Pointer(fn))))
dumpint(uint64(uintptr(unsafe.Pointer(fn.fn))))
dumpint(uint64(uintptr(unsafe.Pointer(fint))))
dumpint(uint64(uintptr(unsafe.Pointer(ot))))
}
func dumproots() {
// data segment
dumpbvtypes(&gcdatamask, unsafe.Pointer(&data))
dumpint(tagData)
dumpint(uint64(uintptr(unsafe.Pointer(&data))))
dumpmemrange(unsafe.Pointer(&data), uintptr(unsafe.Pointer(&edata))-uintptr(unsafe.Pointer(&data)))
dumpfields(gcdatamask)
// bss segment
dumpbvtypes(&gcbssmask, unsafe.Pointer(&bss))
dumpint(tagBSS)
dumpint(uint64(uintptr(unsafe.Pointer(&bss))))
dumpmemrange(unsafe.Pointer(&bss), uintptr(unsafe.Pointer(&ebss))-uintptr(unsafe.Pointer(&bss)))
dumpfields(gcbssmask)
// MSpan.types
allspans := h_allspans
for spanidx := uint32(0); spanidx < mheap_.nspan; spanidx++ {
s := allspans[spanidx]
if s.state == _MSpanInUse {
// Finalizers
for sp := s.specials; sp != nil; sp = sp.next {
if sp.kind != _KindSpecialFinalizer {
continue
}
spf := (*specialfinalizer)(unsafe.Pointer(sp))
p := unsafe.Pointer((uintptr(s.start) << _PageShift) + uintptr(spf.special.offset))
dumpfinalizer(p, spf.fn, spf.fint, spf.ot)
}
}
}
// Finalizer queue
iterate_finq(finq_callback)
}
// Bit vector of free marks.
// Needs to be as big as the largest number of objects per span.
var freemark [_PageSize / 8]bool
func dumpobjs() {
for i := uintptr(0); i < uintptr(mheap_.nspan); i++ {
s := h_allspans[i]
if s.state != _MSpanInUse {
continue
}
p := uintptr(s.start << _PageShift)
size := s.elemsize
n := (s.npages << _PageShift) / size
if n > uintptr(len(freemark)) {
gothrow("freemark array doesn't have enough entries")
}
for l := s.freelist; l != nil; l = l.next {
freemark[(uintptr(unsafe.Pointer(l))-p)/size] = true
}
for j := uintptr(0); j < n; j, p = j+1, p+size {
if freemark[j] {
freemark[j] = false
continue
}
dumpobj(unsafe.Pointer(p), size, makeheapobjbv(p, size))
}
}
}
func dumpparams() {
dumpint(tagParams)
x := uintptr(1)
if *(*byte)(unsafe.Pointer(&x)) == 1 {
dumpbool(false) // little-endian ptrs
} else {
dumpbool(true) // big-endian ptrs
}
dumpint(ptrSize)
dumpint(uint64(mheap_.arena_start))
dumpint(uint64(mheap_.arena_used))
dumpint(thechar)
dumpstr(goexperiment)
dumpint(uint64(ncpu))
}
func itab_callback(tab *itab) {
t := tab._type
// Dump a map from itab* to the type of its data field.
// We want this map so we can deduce types of interface referents.
if t.kind&kindDirectIface == 0 {
// indirect - data slot is a pointer to t.
dumptype(t.ptrto)
dumpint(tagItab)
dumpint(uint64(uintptr(unsafe.Pointer(tab))))
dumpint(uint64(uintptr(unsafe.Pointer(t.ptrto))))
} else if t.kind&kindNoPointers == 0 {
// t is pointer-like - data slot is a t.
dumptype(t)
dumpint(tagItab)
dumpint(uint64(uintptr(unsafe.Pointer(tab))))
dumpint(uint64(uintptr(unsafe.Pointer(t))))
} else {
// Data slot is a scalar. Dump type just for fun.
// With pointer-only interfaces, this shouldn't happen.
dumptype(t)
dumpint(tagItab)
dumpint(uint64(uintptr(unsafe.Pointer(tab))))
dumpint(uint64(uintptr(unsafe.Pointer(t))))
}
}
func dumpitabs() {
iterate_itabs(itab_callback)
}
func dumpms() {
for mp := allm; mp != nil; mp = mp.alllink {
dumpint(tagOSThread)
dumpint(uint64(uintptr(unsafe.Pointer(mp))))
dumpint(uint64(mp.id))
dumpint(mp.procid)
}
}
func dumpmemstats() {
dumpint(tagMemStats)
dumpint(memstats.alloc)
dumpint(memstats.total_alloc)
dumpint(memstats.sys)
dumpint(memstats.nlookup)
dumpint(memstats.nmalloc)
dumpint(memstats.nfree)
dumpint(memstats.heap_alloc)
dumpint(memstats.heap_sys)
dumpint(memstats.heap_idle)
dumpint(memstats.heap_inuse)
dumpint(memstats.heap_released)
dumpint(memstats.heap_objects)
dumpint(memstats.stacks_inuse)
dumpint(memstats.stacks_sys)
dumpint(memstats.mspan_inuse)
dumpint(memstats.mspan_sys)
dumpint(memstats.mcache_inuse)
dumpint(memstats.mcache_sys)
dumpint(memstats.buckhash_sys)
dumpint(memstats.gc_sys)
dumpint(memstats.other_sys)
dumpint(memstats.next_gc)
dumpint(memstats.last_gc)
dumpint(memstats.pause_total_ns)
for i := 0; i < 256; i++ {
dumpint(memstats.pause_ns[i])
}
dumpint(uint64(memstats.numgc))
}
func dumpmemprof_callback(b *bucket, nstk uintptr, pstk *uintptr, size, allocs, frees uintptr) {
stk := (*[100000]uintptr)(unsafe.Pointer(pstk))
dumpint(tagMemProf)
dumpint(uint64(uintptr(unsafe.Pointer(b))))
dumpint(uint64(size))
dumpint(uint64(nstk))
for i := uintptr(0); i < nstk; i++ {
pc := stk[i]
f := findfunc(pc)
if f == nil {
var buf [64]byte
n := len(buf)
n--
buf[n] = ')'
if pc == 0 {
n--
buf[n] = '0'
} else {
for pc > 0 {
n--
buf[n] = "0123456789abcdef"[pc&15]
pc >>= 4
}
}
n--
buf[n] = 'x'
n--
buf[n] = '0'
n--
buf[n] = '('
dumpslice(buf[n:])
dumpstr("?")
dumpint(0)
} else {
dumpstr(gofuncname(f))
if i > 0 && pc > f.entry {
pc--
}
var file string
line := funcline(f, pc, &file)
dumpstr(file)
dumpint(uint64(line))
}
}
dumpint(uint64(allocs))
dumpint(uint64(frees))
}
func dumpmemprof() {
iterate_memprof(dumpmemprof_callback)
allspans := h_allspans
for spanidx := uint32(0); spanidx < mheap_.nspan; spanidx++ {
s := allspans[spanidx]
if s.state != _MSpanInUse {
continue
}
for sp := s.specials; sp != nil; sp = sp.next {
if sp.kind != _KindSpecialProfile {
continue
}
spp := (*specialprofile)(unsafe.Pointer(sp))
p := uintptr(s.start<<_PageShift) + uintptr(spp.special.offset)
dumpint(tagAllocSample)
dumpint(uint64(p))
dumpint(uint64(uintptr(unsafe.Pointer(spp.b))))
}
}
}
var dumphdr = []byte("go1.4 heap dump\n")
func mdump() {
// make sure we're done sweeping
for i := uintptr(0); i < uintptr(mheap_.nspan); i++ {
s := h_allspans[i]
if s.state == _MSpanInUse {
mSpan_EnsureSwept(s)
}
}
memclr(unsafe.Pointer(&typecache), unsafe.Sizeof(typecache))
dwrite(unsafe.Pointer(&dumphdr[0]), uintptr(len(dumphdr)))
dumpparams()
dumpitabs()
dumpobjs()
dumpgs()
dumpms()
dumproots()
dumpmemstats()
dumpmemprof()
dumpint(tagEOF)
flush()
}
func writeheapdump_m() {
_g_ := getg()
fd := _g_.m.scalararg[0]
_g_.m.scalararg[0] = 0
casgstatus(_g_.m.curg, _Grunning, _Gwaiting)
_g_.waitreason = "dumping heap"
// Update stats so we can dump them.
// As a side effect, flushes all the MCaches so the MSpan.freelist
// lists contain all the free objects.
updatememstats(nil)
// Set dump file.
dumpfd = fd
// Call dump routine.
mdump()
// Reset dump file.
dumpfd = 0
if tmpbuf != nil {
sysFree(unsafe.Pointer(&tmpbuf[0]), uintptr(len(tmpbuf)), &memstats.other_sys)
tmpbuf = nil
}
casgstatus(_g_.m.curg, _Gwaiting, _Grunning)
}
// dumpint() the kind & offset of each field in an object.
func dumpfields(bv bitvector) {
dumpbv(&bv, 0)
dumpint(fieldKindEol)
}
// The heap dump reader needs to be able to disambiguate
// Eface entries. So it needs to know every type that might
// appear in such an entry. The following routine accomplishes that.
// TODO(rsc, khr): Delete - no longer possible.
// Dump all the types that appear in the type field of
// any Eface described by this bit vector.
func dumpbvtypes(bv *bitvector, base unsafe.Pointer) {
}
func makeheapobjbv(p uintptr, size uintptr) bitvector {
// Extend the temp buffer if necessary.
nptr := size / ptrSize
if uintptr(len(tmpbuf)) < nptr*_BitsPerPointer/8+1 {
if tmpbuf != nil {
sysFree(unsafe.Pointer(&tmpbuf[0]), uintptr(len(tmpbuf)), &memstats.other_sys)
}
n := nptr*_BitsPerPointer/8 + 1
p := sysAlloc(n, &memstats.other_sys)
if p == nil {
gothrow("heapdump: out of memory")
}
tmpbuf = (*[1 << 30]byte)(p)[:n]
}
// Copy and compact the bitmap.
var i uintptr
for i = 0; i < nptr; i++ {
off := (p + i*ptrSize - mheap_.arena_start) / ptrSize
bitp := (*uint8)(unsafe.Pointer(mheap_.arena_start - off/wordsPerBitmapByte - 1))
shift := uint8((off % wordsPerBitmapByte) * gcBits)
bits := (*bitp >> (shift + 2)) & _BitsMask
if bits == _BitsDead {
break // end of heap object
}
tmpbuf[i*_BitsPerPointer/8] &^= (_BitsMask << ((i * _BitsPerPointer) % 8))
tmpbuf[i*_BitsPerPointer/8] |= bits << ((i * _BitsPerPointer) % 8)
}
return bitvector{int32(i * _BitsPerPointer), &tmpbuf[0]}
}
......@@ -28,10 +28,11 @@ const (
maxGCMask = _MaxGCMask
bitsDead = _BitsDead
bitsPointer = _BitsPointer
bitsScalar = _BitsScalar
mSpanInUse = _MSpanInUse
concurrentSweep = _ConcurrentSweep != 0
concurrentSweep = _ConcurrentSweep
)
// Page number (address>>pageShift)
......@@ -142,10 +143,9 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
s = c.alloc[tinySizeClass]
v := s.freelist
if v == nil {
mp := acquirem()
mp.scalararg[0] = tinySizeClass
onM(mcacheRefill_m)
releasem(mp)
onM(func() {
mCache_Refill(c, tinySizeClass)
})
s = c.alloc[tinySizeClass]
v = s.freelist
}
......@@ -173,10 +173,9 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
s = c.alloc[sizeclass]
v := s.freelist
if v == nil {
mp := acquirem()
mp.scalararg[0] = uintptr(sizeclass)
onM(mcacheRefill_m)
releasem(mp)
onM(func() {
mCache_Refill(c, int32(sizeclass))
})
s = c.alloc[sizeclass]
v = s.freelist
}
......@@ -193,13 +192,10 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
}
c.local_cachealloc += intptr(size)
} else {
mp := acquirem()
mp.scalararg[0] = uintptr(size)
mp.scalararg[1] = uintptr(flags)
onM(largeAlloc_m)
s = (*mspan)(mp.ptrarg[0])
mp.ptrarg[0] = nil
releasem(mp)
var s *mspan
onM(func() {
s = largeAlloc(size, uint32(flags))
})
x = unsafe.Pointer(uintptr(s.start << pageShift))
size = uintptr(s.elemsize)
}
......@@ -359,7 +355,7 @@ func newarray(typ *_type, n uintptr) unsafe.Pointer {
if typ.kind&kindNoPointers != 0 {
flags |= flagNoScan
}
if int(n) < 0 || (typ.size > 0 && n > maxmem/uintptr(typ.size)) {
if int(n) < 0 || (typ.size > 0 && n > _MaxMem/uintptr(typ.size)) {
panic("runtime: allocation size out of range")
}
return mallocgc(uintptr(typ.size)*n, typ, flags)
......@@ -585,10 +581,9 @@ func SetFinalizer(obj interface{}, finalizer interface{}) {
ftyp := f._type
if ftyp == nil {
// switch to M stack and remove finalizer
mp := acquirem()
mp.ptrarg[0] = e.data
onM(removeFinalizer_m)
releasem(mp)
onM(func() {
removefinalizer(e.data)
})
return
}
......@@ -633,18 +628,11 @@ okarg:
// make sure we have a finalizer goroutine
createfing()
// switch to M stack to add finalizer record
mp := acquirem()
mp.ptrarg[0] = f.data
mp.ptrarg[1] = e.data
mp.scalararg[0] = nret
mp.ptrarg[2] = unsafe.Pointer(fint)
mp.ptrarg[3] = unsafe.Pointer(ot)
onM(setFinalizer_m)
if mp.scalararg[0] != 1 {
gothrow("runtime.SetFinalizer: finalizer already set")
}
releasem(mp)
onM(func() {
if !addfinalizer(e.data, (*funcval)(f.data), nret, fint, ot) {
gothrow("runtime.SetFinalizer: finalizer already set")
}
})
}
// round n up to a multiple of a. a must be a power of 2.
......
......@@ -6,143 +6,104 @@
//
// TODO(rsc): double-check stats.
#include "runtime.h"
#include "arch_GOARCH.h"
#include "malloc.h"
#include "type.h"
#include "typekind.h"
#include "race.h"
#include "stack.h"
#include "textflag.h"
// Mark mheap as 'no pointers', it does not contain interesting pointers but occupies ~45K.
#pragma dataflag NOPTR
MHeap runtime·mheap;
#pragma dataflag NOPTR
MStats runtime·memstats;
int32
runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp)
{
uintptr n, i;
byte *p;
MSpan *s;
g->m->mcache->local_nlookup++;
if (sizeof(void*) == 4 && g->m->mcache->local_nlookup >= (1<<30)) {
package runtime
import "unsafe"
const _MaxArena32 = 2 << 30
// For use by Go. If it were a C enum it would be made available automatically,
// but the value of MaxMem is too large for enum.
// XXX - uintptr runtime·maxmem = MaxMem;
func mlookup(v uintptr, base *uintptr, size *uintptr, sp **mspan) int32 {
_g_ := getg()
_g_.m.mcache.local_nlookup++
if ptrSize == 4 && _g_.m.mcache.local_nlookup >= 1<<30 {
// purge cache stats to prevent overflow
runtime·lock(&runtime·mheap.lock);
runtime·purgecachedstats(g->m->mcache);
runtime·unlock(&runtime·mheap.lock);
lock(&mheap_.lock)
purgecachedstats(_g_.m.mcache)
unlock(&mheap_.lock)
}
s = runtime·MHeap_LookupMaybe(&runtime·mheap, v);
if(sp)
*sp = s;
if(s == nil) {
if(base)
*base = nil;
if(size)
*size = 0;
return 0;
s := mHeap_LookupMaybe(&mheap_, unsafe.Pointer(v))
if sp != nil {
*sp = s
}
if s == nil {
if base != nil {
*base = 0
}
if size != nil {
*size = 0
}
return 0
}
p = (byte*)((uintptr)s->start<<PageShift);
if(s->sizeclass == 0) {
p := uintptr(s.start) << _PageShift
if s.sizeclass == 0 {
// Large object.
if(base)
*base = p;
if(size)
*size = s->npages<<PageShift;
return 1;
if base != nil {
*base = p
}
if size != nil {
*size = s.npages << _PageShift
}
return 1
}
n = s->elemsize;
if(base) {
i = ((byte*)v - p)/n;
*base = p + i*n;
n := s.elemsize
if base != nil {
i := (uintptr(v) - uintptr(p)) / n
*base = p + i*n
}
if size != nil {
*size = n
}
if(size)
*size = n;
return 1;
return 1
}
#pragma textflag NOSPLIT
void
runtime·purgecachedstats(MCache *c)
{
MHeap *h;
int32 i;
//go:nosplit
func purgecachedstats(c *mcache) {
// Protected by either heap or GC lock.
h = &runtime·mheap;
mstats.heap_alloc += c->local_cachealloc;
c->local_cachealloc = 0;
mstats.tinyallocs += c->local_tinyallocs;
c->local_tinyallocs = 0;
mstats.nlookup += c->local_nlookup;
c->local_nlookup = 0;
h->largefree += c->local_largefree;
c->local_largefree = 0;
h->nlargefree += c->local_nlargefree;
c->local_nlargefree = 0;
for(i=0; i<nelem(c->local_nsmallfree); i++) {
h->nsmallfree[i] += c->local_nsmallfree[i];
c->local_nsmallfree[i] = 0;
h := &mheap_
memstats.heap_alloc += uint64(c.local_cachealloc)
c.local_cachealloc = 0
memstats.tinyallocs += uint64(c.local_tinyallocs)
c.local_tinyallocs = 0
memstats.nlookup += uint64(c.local_nlookup)
c.local_nlookup = 0
h.largefree += uint64(c.local_largefree)
c.local_largefree = 0
h.nlargefree += uint64(c.local_nlargefree)
c.local_nlargefree = 0
for i := 0; i < len(c.local_nsmallfree); i++ {
h.nsmallfree[i] += uint64(c.local_nsmallfree[i])
c.local_nsmallfree[i] = 0
}
}
// Size of the trailing by_size array differs between Go and C,
// and all data after by_size is local to C, not exported to Go.
// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
// sizeof_C_MStats is what C thinks about size of Go struct.
uintptr runtime·sizeof_C_MStats = offsetof(MStats, by_size[61]);
func mallocinit() {
initSizes()
if class_to_size[_TinySizeClass] != _TinySize {
gothrow("bad TinySizeClass")
}
#define MaxArena32 (2U<<30)
var p, arena_size, bitmap_size, spans_size, p_size, limit uintptr
var reserved bool
// For use by Go. If it were a C enum it would be made available automatically,
// but the value of MaxMem is too large for enum.
uintptr runtime·maxmem = MaxMem;
void
runtime·mallocinit(void)
{
byte *p, *p1;
uintptr arena_size, bitmap_size, spans_size, p_size;
extern byte runtime·end[];
uintptr limit;
uint64 i;
bool reserved;
p = nil;
p_size = 0;
arena_size = 0;
bitmap_size = 0;
spans_size = 0;
reserved = false;
// for 64-bit build
USED(p);
USED(p_size);
USED(arena_size);
USED(bitmap_size);
USED(spans_size);
runtime·InitSizes();
if(runtime·class_to_size[TinySizeClass] != TinySize)
runtime·throw("bad TinySizeClass");
// limit = runtime·memlimit();
// limit = runtime.memlimit();
// See https://code.google.com/p/go/issues/detail?id=5049
// TODO(rsc): Fix after 1.1.
limit = 0;
limit = 0
// Set up the allocation arena, a contiguous area of memory where
// allocated data will be found. The arena begins with a bitmap large
// enough to hold 4 bits per allocated word.
if(sizeof(void*) == 8 && (limit == 0 || limit > (1<<30))) {
if ptrSize == 8 && (limit == 0 || limit > 1<<30) {
// On a 64-bit machine, allocate from a single contiguous reservation.
// 128 GB (MaxMem) should be big enough for now.
//
......@@ -153,7 +114,7 @@ runtime·mallocinit(void)
// in the middle of 0x00c0 for us to choose. Choosing 0x00c0 means
// that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df.
// In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid
// UTF-8 sequences, and they are otherwise as far away from
// UTF-8 sequences, and they are otherwise as far away from
// ff (likely a common byte) as possible. If that fails, we try other 0xXXc0
// addresses. An earlier attempt to use 0x11f8 caused out of memory errors
// on OS X during thread allocations. 0x00c0 causes conflicts with
......@@ -167,19 +128,21 @@ runtime·mallocinit(void)
// but it hardly matters: e0 00 is not valid UTF-8 either.
//
// If this fails we fall back to the 32 bit memory mechanism
arena_size = MaxMem;
bitmap_size = arena_size / (sizeof(void*)*8/4);
spans_size = arena_size / PageSize * sizeof(runtime·mheap.spans[0]);
spans_size = ROUND(spans_size, PageSize);
for(i = 0; i <= 0x7f; i++) {
p = (void*)(i<<40 | 0x00c0ULL<<32);
p_size = bitmap_size + spans_size + arena_size + PageSize;
p = runtime·SysReserve(p, p_size, &reserved);
if(p != nil)
break;
arena_size = round(_MaxMem, _PageSize)
bitmap_size = arena_size / (ptrSize * 8 / 4)
spans_size = arena_size / _PageSize * ptrSize
spans_size = round(spans_size, _PageSize)
for i := 0; i <= 0x7f; i++ {
p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
p_size = bitmap_size + spans_size + arena_size + _PageSize
p = uintptr(sysReserve(unsafe.Pointer(p), p_size, &reserved))
if p != 0 {
break
}
}
}
if (p == nil) {
if p == 0 {
// On a 32-bit machine, we can't typically get away
// with a giant virtual address space reservation.
// Instead we map the memory information bitmap
......@@ -197,15 +160,15 @@ runtime·mallocinit(void)
// for 4GB of mappings, and then accept any memory the
// kernel threw at us, but normally that's a waste of 512 MB
// of address space, which is probably too much in a 32-bit world.
bitmap_size = MaxArena32 / (sizeof(void*)*8/4);
arena_size = 512<<20;
spans_size = MaxArena32 / PageSize * sizeof(runtime·mheap.spans[0]);
if(limit > 0 && arena_size+bitmap_size+spans_size > limit) {
bitmap_size = (limit / 9) & ~((1<<PageShift) - 1);
arena_size = bitmap_size * 8;
spans_size = arena_size / PageSize * sizeof(runtime·mheap.spans[0]);
bitmap_size = _MaxArena32 / (ptrSize * 8 / 4)
arena_size = 512 << 20
spans_size = _MaxArena32 / _PageSize * ptrSize
if limit > 0 && arena_size+bitmap_size+spans_size > limit {
bitmap_size = (limit / 9) &^ ((1 << _PageShift) - 1)
arena_size = bitmap_size * 8
spans_size = arena_size / _PageSize * ptrSize
}
spans_size = ROUND(spans_size, PageSize);
spans_size = round(spans_size, _PageSize)
// SysReserve treats the address we ask for, end, as a hint,
// not as an absolute requirement. If we ask for the end
......@@ -216,181 +179,140 @@ runtime·mallocinit(void)
// So adjust it upward a little bit ourselves: 1/4 MB to get
// away from the running binary image and then round up
// to a MB boundary.
p = (byte*)ROUND((uintptr)runtime·end + (1<<18), 1<<20);
p_size = bitmap_size + spans_size + arena_size + PageSize;
p = runtime·SysReserve(p, p_size, &reserved);
if(p == nil)
runtime·throw("runtime: cannot reserve arena virtual address space");
p = round(uintptr(unsafe.Pointer(&end))+(1<<18), 1<<20)
p_size = bitmap_size + spans_size + arena_size + _PageSize
p = uintptr(sysReserve(unsafe.Pointer(p), p_size, &reserved))
if p == 0 {
gothrow("runtime: cannot reserve arena virtual address space")
}
}
// PageSize can be larger than OS definition of page size,
// so SysReserve can give us a PageSize-unaligned pointer.
// To overcome this we ask for PageSize more and round up the pointer.
p1 = (byte*)ROUND((uintptr)p, PageSize);
runtime·mheap.spans = (MSpan**)p1;
runtime·mheap.bitmap = p1 + spans_size;
runtime·mheap.arena_start = p1 + spans_size + bitmap_size;
runtime·mheap.arena_used = runtime·mheap.arena_start;
runtime·mheap.arena_end = p + p_size;
runtime·mheap.arena_reserved = reserved;
if(((uintptr)runtime·mheap.arena_start & (PageSize-1)) != 0)
runtime·throw("misrounded allocation in mallocinit");
p1 := round(p, _PageSize)
mheap_.spans = (**mspan)(unsafe.Pointer(p1))
mheap_.bitmap = p1 + spans_size
mheap_.arena_start = p1 + (spans_size + bitmap_size)
mheap_.arena_used = mheap_.arena_start
mheap_.arena_end = p + p_size
mheap_.arena_reserved = reserved
if mheap_.arena_start&(_PageSize-1) != 0 {
println("bad pagesize", hex(p), hex(p1), hex(spans_size), hex(bitmap_size), hex(_PageSize), "start", hex(mheap_.arena_start))
gothrow("misrounded allocation in mallocinit")
}
// Initialize the rest of the allocator.
runtime·MHeap_Init(&runtime·mheap);
g->m->mcache = runtime·allocmcache();
// Initialize the rest of the allocator.
mHeap_Init(&mheap_, spans_size)
_g_ := getg()
_g_.m.mcache = allocmcache()
}
void*
runtime·MHeap_SysAlloc(MHeap *h, uintptr n)
{
byte *p, *p_end;
uintptr p_size;
bool reserved;
if(n > h->arena_end - h->arena_used) {
func mHeap_SysAlloc(h *mheap, n uintptr) unsafe.Pointer {
if n > uintptr(h.arena_end)-uintptr(h.arena_used) {
// We are in 32-bit mode, maybe we didn't use all possible address space yet.
// Reserve some more space.
byte *new_end;
p_size = ROUND(n + PageSize, 256<<20);
new_end = h->arena_end + p_size;
if(new_end <= h->arena_start + MaxArena32) {
p_size := round(n+_PageSize, 256<<20)
new_end := h.arena_end + p_size
if new_end <= h.arena_start+_MaxArena32 {
// TODO: It would be bad if part of the arena
// is reserved and part is not.
p = runtime·SysReserve(h->arena_end, p_size, &reserved);
if(p == h->arena_end) {
h->arena_end = new_end;
h->arena_reserved = reserved;
}
else if(p+p_size <= h->arena_start + MaxArena32) {
var reserved bool
p := uintptr(sysReserve((unsafe.Pointer)(h.arena_end), p_size, &reserved))
if p == h.arena_end {
h.arena_end = new_end
h.arena_reserved = reserved
} else if p+p_size <= h.arena_start+_MaxArena32 {
// Keep everything page-aligned.
// Our pages are bigger than hardware pages.
h->arena_end = p+p_size;
h->arena_used = p + (-(uintptr)p&(PageSize-1));
h->arena_reserved = reserved;
h.arena_end = p + p_size
h.arena_used = p + (-uintptr(p) & (_PageSize - 1))
h.arena_reserved = reserved
} else {
uint64 stat;
stat = 0;
runtime·SysFree(p, p_size, &stat);
var stat uint64
sysFree((unsafe.Pointer)(p), p_size, &stat)
}
}
}
if(n <= h->arena_end - h->arena_used) {
if n <= uintptr(h.arena_end)-uintptr(h.arena_used) {
// Keep taking from our reservation.
p = h->arena_used;
runtime·SysMap(p, n, h->arena_reserved, &mstats.heap_sys);
h->arena_used += n;
runtime·MHeap_MapBits(h);
runtime·MHeap_MapSpans(h);
if(raceenabled)
runtime·racemapshadow(p, n);
if(((uintptr)p & (PageSize-1)) != 0)
runtime·throw("misrounded allocation in MHeap_SysAlloc");
return p;
p := h.arena_used
sysMap((unsafe.Pointer)(p), n, h.arena_reserved, &memstats.heap_sys)
h.arena_used += n
mHeap_MapBits(h)
mHeap_MapSpans(h)
if raceenabled {
racemapshadow((unsafe.Pointer)(p), n)
}
if uintptr(p)&(_PageSize-1) != 0 {
gothrow("misrounded allocation in MHeap_SysAlloc")
}
return (unsafe.Pointer)(p)
}
// If using 64-bit, our reservation is all we have.
if(h->arena_end - h->arena_start >= MaxArena32)
return nil;
if uintptr(h.arena_end)-uintptr(h.arena_start) >= _MaxArena32 {
return nil
}
// On 32-bit, once the reservation is gone we can
// try to get memory at a location chosen by the OS
// and hope that it is in the range we allocated bitmap for.
p_size = ROUND(n, PageSize) + PageSize;
p = runtime·sysAlloc(p_size, &mstats.heap_sys);
if(p == nil)
return nil;
if(p < h->arena_start || p+p_size - h->arena_start >= MaxArena32) {
runtime·printf("runtime: memory allocated by OS (%p) not in usable range [%p,%p)\n",
p, h->arena_start, h->arena_start+MaxArena32);
runtime·SysFree(p, p_size, &mstats.heap_sys);
return nil;
}
p_end = p + p_size;
p += -(uintptr)p & (PageSize-1);
if(p+n > h->arena_used) {
h->arena_used = p+n;
if(p_end > h->arena_end)
h->arena_end = p_end;
runtime·MHeap_MapBits(h);
runtime·MHeap_MapSpans(h);
if(raceenabled)
runtime·racemapshadow(p, n);
p_size := round(n, _PageSize) + _PageSize
p := uintptr(sysAlloc(p_size, &memstats.heap_sys))
if p == 0 {
return nil
}
if(((uintptr)p & (PageSize-1)) != 0)
runtime·throw("misrounded allocation in MHeap_SysAlloc");
return p;
}
void
runtime·setFinalizer_m(void)
{
FuncVal *fn;
void *arg;
uintptr nret;
Type *fint;
PtrType *ot;
fn = g->m->ptrarg[0];
arg = g->m->ptrarg[1];
nret = g->m->scalararg[0];
fint = g->m->ptrarg[2];
ot = g->m->ptrarg[3];
g->m->ptrarg[0] = nil;
g->m->ptrarg[1] = nil;
g->m->ptrarg[2] = nil;
g->m->ptrarg[3] = nil;
g->m->scalararg[0] = runtime·addfinalizer(arg, fn, nret, fint, ot);
}
if p < h.arena_start || uintptr(p)+p_size-uintptr(h.arena_start) >= _MaxArena32 {
print("runtime: memory allocated by OS (", p, ") not in usable range [", hex(h.arena_start), ",", hex(h.arena_start+_MaxArena32), ")\n")
sysFree((unsafe.Pointer)(p), p_size, &memstats.heap_sys)
return nil
}
void
runtime·removeFinalizer_m(void)
{
void *p;
p_end := p + p_size
p += -p & (_PageSize - 1)
if uintptr(p)+n > uintptr(h.arena_used) {
h.arena_used = p + n
if p_end > h.arena_end {
h.arena_end = p_end
}
mHeap_MapBits(h)
mHeap_MapSpans(h)
if raceenabled {
racemapshadow((unsafe.Pointer)(p), n)
}
}
p = g->m->ptrarg[0];
g->m->ptrarg[0] = nil;
runtime·removefinalizer(p);
if uintptr(p)&(_PageSize-1) != 0 {
gothrow("misrounded allocation in MHeap_SysAlloc")
}
return (unsafe.Pointer)(p)
}
// mcallable cache refill
void
runtime·mcacheRefill_m(void)
{
runtime·MCache_Refill(g->m->mcache, (int32)g->m->scalararg[0]);
}
var end struct{}
func largeAlloc(size uintptr, flag uint32) *mspan {
// print("largeAlloc size=", size, "\n")
void
runtime·largeAlloc_m(void)
{
uintptr npages, size;
MSpan *s;
void *v;
int32 flag;
//runtime·printf("largeAlloc size=%D\n", g->m->scalararg[0]);
// Allocate directly from heap.
size = g->m->scalararg[0];
flag = (int32)g->m->scalararg[1];
if(size + PageSize < size)
runtime·throw("out of memory");
npages = size >> PageShift;
if((size & PageMask) != 0)
npages++;
s = runtime·MHeap_Alloc(&runtime·mheap, npages, 0, 1, !(flag & FlagNoZero));
if(s == nil)
runtime·throw("out of memory");
s->limit = (byte*)(s->start<<PageShift) + size;
v = (void*)(s->start << PageShift);
if size+_PageSize < size {
gothrow("out of memory")
}
npages := size >> _PageShift
if size&_PageMask != 0 {
npages++
}
s := mHeap_Alloc(&mheap_, npages, 0, true, flag&_FlagNoZero == 0)
if s == nil {
gothrow("out of memory")
}
s.limit = uintptr(s.start)<<_PageShift + size
v := unsafe.Pointer(uintptr(s.start) << _PageShift)
// setup for mark sweep
runtime·markspan(v, 0, 0, true);
g->m->ptrarg[0] = s;
markspan(v, 0, 0, true)
return s
}
......@@ -2,6 +2,10 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runtime
import "unsafe"
// Memory allocator, based on tcmalloc.
// http://goog-perftools.sourceforge.net/doc/tcmalloc.html
......@@ -80,85 +84,64 @@
// This C code was written with an eye toward translating to Go
// in the future. Methods have the form Type_Method(Type *t, ...).
typedef struct MCentral MCentral;
typedef struct MHeap MHeap;
typedef struct MSpan MSpan;
typedef struct MStats MStats;
typedef struct MLink MLink;
typedef struct GCStats GCStats;
enum
{
PageShift = 13,
PageSize = 1<<PageShift,
PageMask = PageSize - 1,
};
typedef uintptr pageID; // address >> PageShift
enum
{
const (
_PageShift = 13
_PageSize = 1 << _PageShift
_PageMask = _PageSize - 1
)
const (
// _64bit = 1 on 64-bit systems, 0 on 32-bit systems
_64bit = 1 << (^uintptr(0) >> 63) / 2
// Computed constant. The definition of MaxSmallSize and the
// algorithm in msize.c produce some number of different allocation
// size classes. NumSizeClasses is that number. It's needed here
// because there are static arrays of this length; when msize runs its
// size choosing algorithm it double-checks that NumSizeClasses agrees.
NumSizeClasses = 67,
_NumSizeClasses = 67
// Tunable constants.
MaxSmallSize = 32<<10,
_MaxSmallSize = 32 << 10
// Tiny allocator parameters, see "Tiny allocator" comment in malloc.goc.
TinySize = 16,
TinySizeClass = 2,
_TinySize = 16
_TinySizeClass = 2
FixAllocChunk = 16<<10, // Chunk size for FixAlloc
MaxMHeapList = 1<<(20 - PageShift), // Maximum page length for fixed-size list in MHeap.
HeapAllocChunk = 1<<20, // Chunk size for heap growth
_FixAllocChunk = 16 << 10 // Chunk size for FixAlloc
_MaxMHeapList = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
_HeapAllocChunk = 1 << 20 // Chunk size for heap growth
// Per-P, per order stack segment cache size.
StackCacheSize = 32*1024,
_StackCacheSize = 32 * 1024
// Number of orders that get caching. Order 0 is FixedStack
// and each successive order is twice as large.
NumStackOrders = 3,
_NumStackOrders = 3
// Number of bits in page to span calculations (4k pages).
// On Windows 64-bit we limit the arena to 32GB or 35 bits (see below for reason).
// On other 64-bit platforms, we limit the arena to 128GB, or 37 bits.
// On 32-bit, we don't bother limiting anything, so we use the full 32-bit address.
#ifdef _64BIT
#ifdef GOOS_windows
// On Windows 64-bit we limit the arena to 32GB or 35 bits.
// Windows counts memory used by page table into committed memory
// of the process, so we can't reserve too much memory.
// See http://golang.org/issue/5402 and http://golang.org/issue/5236.
MHeapMap_Bits = 35 - PageShift,
#else
MHeapMap_Bits = 37 - PageShift,
#endif
#else
MHeapMap_Bits = 32 - PageShift,
#endif
// On other 64-bit platforms, we limit the arena to 128GB, or 37 bits.
// On 32-bit, we don't bother limiting anything, so we use the full 32-bit address.
_MHeapMap_TotalBits = (_64bit*_Windows)*35 + (_64bit*(1-_Windows))*37 + (1-_64bit)*32
_MHeapMap_Bits = _MHeapMap_TotalBits - _PageShift
_MaxMem = uintptr(1<<_MHeapMap_TotalBits - 1)
// Max number of threads to run garbage collection.
// 2, 3, and 4 are all plausible maximums depending
// on the hardware details of the machine. The garbage
// collector scales well to 32 cpus.
MaxGcproc = 32,
};
// Maximum memory allocation size, a hint for callers.
// This must be a #define instead of an enum because it
// is so large.
#ifdef _64BIT
#define MaxMem (1ULL<<(MHeapMap_Bits+PageShift)) /* 128 GB or 32 GB */
#else
#define MaxMem ((uintptr)-1)
#endif
_MaxGcproc = 32
)
// A generic linked list of blocks. (Typically the block is bigger than sizeof(MLink).)
struct MLink
{
MLink *next;
};
type mlink struct {
next *mlink
}
// sysAlloc obtains a large chunk of zeroed memory from the
// operating system, typically on the order of a hundred kilobytes
......@@ -196,14 +179,6 @@ struct MLink
// SysFault marks a (already sysAlloc'd) region to fault
// if accessed. Used only for debugging the runtime.
void* runtime·sysAlloc(uintptr nbytes, uint64 *stat);
void runtime·SysFree(void *v, uintptr nbytes, uint64 *stat);
void runtime·SysUnused(void *v, uintptr nbytes);
void runtime·SysUsed(void *v, uintptr nbytes);
void runtime·SysMap(void *v, uintptr nbytes, bool reserved, uint64 *stat);
void* runtime·SysReserve(void *v, uintptr nbytes, bool *reserved);
void runtime·SysFault(void *v, uintptr nbytes);
// FixAlloc is a simple free-list allocator for fixed size objects.
// Malloc uses a FixAlloc wrapped around sysAlloc to manages its
// MCache and MSpan objects.
......@@ -212,83 +187,72 @@ void runtime·SysFault(void *v, uintptr nbytes);
// The caller is responsible for locking around FixAlloc calls.
// Callers can keep state in the object but the first word is
// smashed by freeing and reallocating.
struct FixAlloc
{
uintptr size;
void (*first)(void *arg, byte *p); // called first time p is returned
void* arg;
MLink* list;
byte* chunk;
uint32 nchunk;
uintptr inuse; // in-use bytes now
uint64* stat;
};
void runtime·FixAlloc_Init(FixAlloc *f, uintptr size, void (*first)(void*, byte*), void *arg, uint64 *stat);
void* runtime·FixAlloc_Alloc(FixAlloc *f);
void runtime·FixAlloc_Free(FixAlloc *f, void *p);
type fixalloc struct {
size uintptr
first unsafe.Pointer // go func(unsafe.pointer, unsafe.pointer); f(arg, p) called first time p is returned
arg unsafe.Pointer
list *mlink
chunk *byte
nchunk uint32
inuse uintptr // in-use bytes now
stat *uint64
}
// Statistics.
// Shared with Go: if you edit this structure, also edit type MemStats in mem.go.
struct MStats
{
type mstats struct {
// General statistics.
uint64 alloc; // bytes allocated and still in use
uint64 total_alloc; // bytes allocated (even if freed)
uint64 sys; // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
uint64 nlookup; // number of pointer lookups
uint64 nmalloc; // number of mallocs
uint64 nfree; // number of frees
alloc uint64 // bytes allocated and still in use
total_alloc uint64 // bytes allocated (even if freed)
sys uint64 // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
nlookup uint64 // number of pointer lookups
nmalloc uint64 // number of mallocs
nfree uint64 // number of frees
// Statistics about malloc heap.
// protected by mheap.lock
uint64 heap_alloc; // bytes allocated and still in use
uint64 heap_sys; // bytes obtained from system
uint64 heap_idle; // bytes in idle spans
uint64 heap_inuse; // bytes in non-idle spans
uint64 heap_released; // bytes released to the OS
uint64 heap_objects; // total number of allocated objects
heap_alloc uint64 // bytes allocated and still in use
heap_sys uint64 // bytes obtained from system
heap_idle uint64 // bytes in idle spans
heap_inuse uint64 // bytes in non-idle spans
heap_released uint64 // bytes released to the os
heap_objects uint64 // total number of allocated objects
// Statistics about allocation of low-level fixed-size structures.
// Protected by FixAlloc locks.
uint64 stacks_inuse; // this number is included in heap_inuse above
uint64 stacks_sys; // always 0 in mstats
uint64 mspan_inuse; // MSpan structures
uint64 mspan_sys;
uint64 mcache_inuse; // MCache structures
uint64 mcache_sys;
uint64 buckhash_sys; // profiling bucket hash table
uint64 gc_sys;
uint64 other_sys;
stacks_inuse uint64 // this number is included in heap_inuse above
stacks_sys uint64 // always 0 in mstats
mspan_inuse uint64 // mspan structures
mspan_sys uint64
mcache_inuse uint64 // mcache structures
mcache_sys uint64
buckhash_sys uint64 // profiling bucket hash table
gc_sys uint64
other_sys uint64
// Statistics about garbage collector.
// Protected by mheap or stopping the world during GC.
uint64 next_gc; // next GC (in heap_alloc time)
uint64 last_gc; // last GC (in absolute time)
uint64 pause_total_ns;
uint64 pause_ns[256]; // circular buffer of recent GC pause lengths
uint64 pause_end[256]; // circular buffer of recent GC end times (nanoseconds since 1970)
uint32 numgc;
bool enablegc;
bool debuggc;
next_gc uint64 // next gc (in heap_alloc time)
last_gc uint64 // last gc (in absolute time)
pause_total_ns uint64
pause_ns [256]uint64 // circular buffer of recent gc pause lengths
pause_end [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970)
numgc uint32
enablegc bool
debuggc bool
// Statistics about allocation size classes.
struct MStatsBySize {
uint32 size;
uint64 nmalloc;
uint64 nfree;
} by_size[NumSizeClasses];
uint64 tinyallocs; // number of tiny allocations that didn't cause actual allocation; not exported to Go directly
};
#define mstats runtime·memstats
extern MStats mstats;
void runtime·updatememstats(GCStats *stats);
void runtime·ReadMemStats(MStats *stats);
by_size [_NumSizeClasses]struct {
size uint32
nmalloc uint64
nfree uint64
}
tinyallocs uint64 // number of tiny allocations that didn't cause actual allocation; not exported to go directly
}
var memstats mstats
// Size classes. Computed and initialized by InitSizes.
//
......@@ -300,322 +264,212 @@ void runtime·ReadMemStats(MStats *stats);
// class_to_allocnpages[i] = number of pages to allocate when
// making new objects in class i
int32 runtime·SizeToClass(int32);
uintptr runtime·roundupsize(uintptr);
extern int32 runtime·class_to_size[NumSizeClasses];
extern int32 runtime·class_to_allocnpages[NumSizeClasses];
extern int8 runtime·size_to_class8[1024/8 + 1];
extern int8 runtime·size_to_class128[(MaxSmallSize-1024)/128 + 1];
extern void runtime·InitSizes(void);
typedef struct MCacheList MCacheList;
struct MCacheList
{
MLink *list;
uint32 nlist;
};
typedef struct StackFreeList StackFreeList;
struct StackFreeList
{
MLink *list; // linked list of free stacks
uintptr size; // total size of stacks in list
};
typedef struct SudoG SudoG;
var class_to_size [_NumSizeClasses]int32
var class_to_allocnpages [_NumSizeClasses]int32
var size_to_class8 [1024/8 + 1]int8
var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8
type mcachelist struct {
list *mlink
nlist uint32
}
type stackfreelist struct {
list *mlink // linked list of free stacks
size uintptr // total size of stacks in list
}
// Per-thread (in Go, per-P) cache for small objects.
// No locking needed because it is per-thread (per-P).
struct MCache
{
type mcache struct {
// The following members are accessed on every malloc,
// so they are grouped here for better caching.
int32 next_sample; // trigger heap sample after allocating this many bytes
intptr local_cachealloc; // bytes allocated (or freed) from cache since last lock of heap
next_sample int32 // trigger heap sample after allocating this many bytes
local_cachealloc intptr // bytes allocated (or freed) from cache since last lock of heap
// Allocator cache for tiny objects w/o pointers.
// See "Tiny allocator" comment in malloc.goc.
byte* tiny;
uintptr tinysize;
uintptr local_tinyallocs; // number of tiny allocs not counted in other stats
tiny *byte
tinysize uintptr
local_tinyallocs uintptr // number of tiny allocs not counted in other stats
// The rest is not accessed on every malloc.
MSpan* alloc[NumSizeClasses]; // spans to allocate from
alloc [_NumSizeClasses]*mspan // spans to allocate from
StackFreeList stackcache[NumStackOrders];
stackcache [_NumStackOrders]stackfreelist
SudoG* sudogcache;
sudogcache *sudog
void* gcworkbuf;
gcworkbuf unsafe.Pointer
// Local allocator stats, flushed during GC.
uintptr local_nlookup; // number of pointer lookups
uintptr local_largefree; // bytes freed for large objects (>MaxSmallSize)
uintptr local_nlargefree; // number of frees for large objects (>MaxSmallSize)
uintptr local_nsmallfree[NumSizeClasses]; // number of frees for small objects (<=MaxSmallSize)
};
MSpan* runtime·MCache_Refill(MCache *c, int32 sizeclass);
void runtime·MCache_ReleaseAll(MCache *c);
void runtime·stackcache_clear(MCache *c);
void runtime·gcworkbuffree(void *b);
enum
{
KindSpecialFinalizer = 1,
KindSpecialProfile = 2,
local_nlookup uintptr // number of pointer lookups
local_largefree uintptr // bytes freed for large objects (>maxsmallsize)
local_nlargefree uintptr // number of frees for large objects (>maxsmallsize)
local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize)
}
const (
_KindSpecialFinalizer = 1
_KindSpecialProfile = 2
// Note: The finalizer special must be first because if we're freeing
// an object, a finalizer special will cause the freeing operation
// to abort, and we want to keep the other special records around
// if that happens.
};
)
typedef struct Special Special;
struct Special
{
Special* next; // linked list in span
uint16 offset; // span offset of object
byte kind; // kind of Special
};
type special struct {
next *special // linked list in span
offset uint16 // span offset of object
kind byte // kind of special
}
// The described object has a finalizer set for it.
typedef struct SpecialFinalizer SpecialFinalizer;
struct SpecialFinalizer
{
Special special;
FuncVal* fn;
uintptr nret;
Type* fint;
PtrType* ot;
};
type specialfinalizer struct {
special special
fn *funcval
nret uintptr
fint *_type
ot *ptrtype
}
// The described object is being heap profiled.
typedef struct Bucket Bucket; // from mprof.h
typedef struct SpecialProfile SpecialProfile;
struct SpecialProfile
{
Special special;
Bucket* b;
};
type specialprofile struct {
special special
b *bucket
}
// An MSpan is a run of pages.
enum
{
MSpanInUse = 0, // allocated for garbage collected heap
MSpanStack, // allocated for use by stack allocator
MSpanFree,
MSpanListHead,
MSpanDead,
};
struct MSpan
{
MSpan *next; // in a span linked list
MSpan *prev; // in a span linked list
pageID start; // starting page number
uintptr npages; // number of pages in span
MLink *freelist; // list of free objects
const (
_MSpanInUse = iota // allocated for garbage collected heap
_MSpanStack // allocated for use by stack allocator
_MSpanFree
_MSpanListHead
_MSpanDead
)
type mspan struct {
next *mspan // in a span linked list
prev *mspan // in a span linked list
start pageID // starting page number
npages uintptr // number of pages in span
freelist *mlink // list of free objects
// sweep generation:
// if sweepgen == h->sweepgen - 2, the span needs sweeping
// if sweepgen == h->sweepgen - 1, the span is currently being swept
// if sweepgen == h->sweepgen, the span is swept and ready to use
// h->sweepgen is incremented by 2 after every GC
uint32 sweepgen;
uint16 ref; // capacity - number of objects in freelist
uint8 sizeclass; // size class
bool incache; // being used by an MCache
uint8 state; // MSpanInUse etc
uint8 needzero; // needs to be zeroed before allocation
uintptr elemsize; // computed from sizeclass or from npages
int64 unusedsince; // First time spotted by GC in MSpanFree state
uintptr npreleased; // number of pages released to the OS
byte *limit; // end of data in span
Mutex specialLock; // guards specials list
Special *specials; // linked list of special records sorted by offset.
};
void runtime·MSpan_Init(MSpan *span, pageID start, uintptr npages);
void runtime·MSpan_EnsureSwept(MSpan *span);
bool runtime·MSpan_Sweep(MSpan *span, bool preserve);
sweepgen uint32
ref uint16 // capacity - number of objects in freelist
sizeclass uint8 // size class
incache bool // being used by an mcache
state uint8 // mspaninuse etc
needzero uint8 // needs to be zeroed before allocation
elemsize uintptr // computed from sizeclass or from npages
unusedsince int64 // first time spotted by gc in mspanfree state
npreleased uintptr // number of pages released to the os
limit uintptr // end of data in span
speciallock mutex // guards specials list
specials *special // linked list of special records sorted by offset.
}
// Every MSpan is in one doubly-linked list,
// either one of the MHeap's free lists or one of the
// MCentral's span lists. We use empty MSpan structures as list heads.
void runtime·MSpanList_Init(MSpan *list);
bool runtime·MSpanList_IsEmpty(MSpan *list);
void runtime·MSpanList_Insert(MSpan *list, MSpan *span);
void runtime·MSpanList_InsertBack(MSpan *list, MSpan *span);
void runtime·MSpanList_Remove(MSpan *span); // from whatever list it is in
// Central list of free objects of a given size.
struct MCentral
{
Mutex lock;
int32 sizeclass;
MSpan nonempty; // list of spans with a free object
MSpan empty; // list of spans with no free objects (or cached in an MCache)
};
void runtime·MCentral_Init(MCentral *c, int32 sizeclass);
MSpan* runtime·MCentral_CacheSpan(MCentral *c);
void runtime·MCentral_UncacheSpan(MCentral *c, MSpan *s);
bool runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end, bool preserve);
type mcentral struct {
lock mutex
sizeclass int32
nonempty mspan // list of spans with a free object
empty mspan // list of spans with no free objects (or cached in an mcache)
}
// Main malloc heap.
// The heap itself is the "free[]" and "large" arrays,
// but all the other global data is here too.
struct MHeap
{
Mutex lock;
MSpan free[MaxMHeapList]; // free lists of given length
MSpan freelarge; // free lists length >= MaxMHeapList
MSpan busy[MaxMHeapList]; // busy lists of large objects of given length
MSpan busylarge; // busy lists of large objects length >= MaxMHeapList
MSpan **allspans; // all spans out there
MSpan **gcspans; // copy of allspans referenced by GC marker or sweeper
uint32 nspan;
uint32 nspancap;
uint32 sweepgen; // sweep generation, see comment in MSpan
uint32 sweepdone; // all spans are swept
type mheap struct {
lock mutex
free [_MaxMHeapList]mspan // free lists of given length
freelarge mspan // free lists length >= _MaxMHeapList
busy [_MaxMHeapList]mspan // busy lists of large objects of given length
busylarge mspan // busy lists of large objects length >= _MaxMHeapList
allspans **mspan // all spans out there
gcspans **mspan // copy of allspans referenced by gc marker or sweeper
nspan uint32
sweepgen uint32 // sweep generation, see comment in mspan
sweepdone uint32 // all spans are swept
// span lookup
MSpan** spans;
uintptr spans_mapped;
spans **mspan
spans_mapped uintptr
// range of addresses we might see in the heap
byte *bitmap;
uintptr bitmap_mapped;
byte *arena_start;
byte *arena_used;
byte *arena_end;
bool arena_reserved;
bitmap uintptr
bitmap_mapped uintptr
arena_start uintptr
arena_used uintptr
arena_end uintptr
arena_reserved bool
// central free lists for small size classes.
// the padding makes sure that the MCentrals are
// spaced CacheLineSize bytes apart, so that each MCentral.lock
// gets its own cache line.
struct MHeapCentral {
MCentral mcentral;
byte pad[CacheLineSize];
} central[NumSizeClasses];
central [_NumSizeClasses]struct {
mcentral mcentral
pad [_CacheLineSize]byte
}
FixAlloc spanalloc; // allocator for Span*
FixAlloc cachealloc; // allocator for MCache*
FixAlloc specialfinalizeralloc; // allocator for SpecialFinalizer*
FixAlloc specialprofilealloc; // allocator for SpecialProfile*
Mutex speciallock; // lock for sepcial record allocators.
spanalloc fixalloc // allocator for span*
cachealloc fixalloc // allocator for mcache*
specialfinalizeralloc fixalloc // allocator for specialfinalizer*
specialprofilealloc fixalloc // allocator for specialprofile*
speciallock mutex // lock for sepcial record allocators.
// Malloc stats.
uint64 largefree; // bytes freed for large objects (>MaxSmallSize)
uint64 nlargefree; // number of frees for large objects (>MaxSmallSize)
uint64 nsmallfree[NumSizeClasses]; // number of frees for small objects (<=MaxSmallSize)
};
#define runtime·mheap runtime·mheap_
extern MHeap runtime·mheap;
void runtime·MHeap_Init(MHeap *h);
MSpan* runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large, bool needzero);
MSpan* runtime·MHeap_AllocStack(MHeap *h, uintptr npage);
void runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct);
void runtime·MHeap_FreeStack(MHeap *h, MSpan *s);
MSpan* runtime·MHeap_Lookup(MHeap *h, void *v);
MSpan* runtime·MHeap_LookupMaybe(MHeap *h, void *v);
void* runtime·MHeap_SysAlloc(MHeap *h, uintptr n);
void runtime·MHeap_MapBits(MHeap *h);
void runtime·MHeap_MapSpans(MHeap *h);
void runtime·MHeap_Scavenge(int32 k, uint64 now, uint64 limit);
void* runtime·persistentalloc(uintptr size, uintptr align, uint64 *stat);
int32 runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **s);
uintptr runtime·sweepone(void);
void runtime·markspan(void *v, uintptr size, uintptr n, bool leftover);
void runtime·unmarkspan(void *v, uintptr size);
void runtime·purgecachedstats(MCache*);
void runtime·tracealloc(void*, uintptr, Type*);
void runtime·tracefree(void*, uintptr);
void runtime·tracegc(void);
int32 runtime·gcpercent;
int32 runtime·readgogc(void);
void runtime·clearpools(void);
enum
{
largefree uint64 // bytes freed for large objects (>maxsmallsize)
nlargefree uint64 // number of frees for large objects (>maxsmallsize)
nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
}
var mheap_ mheap
const (
// flags to malloc
FlagNoScan = 1<<0, // GC doesn't have to scan object
FlagNoZero = 1<<1, // don't zero memory
};
void runtime·mProf_Malloc(void*, uintptr);
void runtime·mProf_Free(Bucket*, uintptr, bool);
void runtime·mProf_GC(void);
void runtime·iterate_memprof(void (**callback)(Bucket*, uintptr, uintptr*, uintptr, uintptr, uintptr));
int32 runtime·gcprocs(void);
void runtime·helpgc(int32 nproc);
void runtime·gchelper(void);
void runtime·createfing(void);
G* runtime·wakefing(void);
void runtime·getgcmask(byte*, Type*, byte**, uintptr*);
_FlagNoScan = 1 << 0 // GC doesn't have to scan object
_FlagNoZero = 1 << 1 // don't zero memory
)
// NOTE: Layout known to queuefinalizer.
typedef struct Finalizer Finalizer;
struct Finalizer
{
FuncVal *fn; // function to call
void *arg; // ptr to object
uintptr nret; // bytes of return values from fn
Type *fint; // type of first argument of fn
PtrType *ot; // type of ptr to object
};
typedef struct FinBlock FinBlock;
struct FinBlock
{
FinBlock *alllink;
FinBlock *next;
int32 cnt;
int32 cap;
Finalizer fin[1];
};
extern Mutex runtime·finlock; // protects the following variables
extern G* runtime·fing;
extern bool runtime·fingwait;
extern bool runtime·fingwake;
extern FinBlock *runtime·finq; // list of finalizers that are to be executed
extern FinBlock *runtime·finc; // cache of free blocks
void runtime·setprofilebucket_m(void);
bool runtime·addfinalizer(void*, FuncVal *fn, uintptr, Type*, PtrType*);
void runtime·removefinalizer(void*);
void runtime·queuefinalizer(byte *p, FuncVal *fn, uintptr nret, Type *fint, PtrType *ot);
bool runtime·freespecial(Special *s, void *p, uintptr size, bool freed);
type finalizer struct {
fn *funcval // function to call
arg unsafe.Pointer // ptr to object
nret uintptr // bytes of return values from fn
fint *_type // type of first argument of fn
ot *ptrtype // type of ptr to object
}
type finblock struct {
alllink *finblock
next *finblock
cnt int32
cap int32
fin [1]finalizer
}
// Information from the compiler about the layout of stack frames.
struct BitVector
{
int32 n; // # of bits
uint8 *bytedata;
};
typedef struct StackMap StackMap;
struct StackMap
{
int32 n; // number of bitmaps
int32 nbit; // number of bits in each bitmap
uint8 bytedata[]; // bitmaps, each starting on a 32-bit boundary
};
type bitvector struct {
n int32 // # of bits
bytedata *uint8
}
type stackmap struct {
n int32 // number of bitmaps
nbit int32 // number of bits in each bitmap
bytedata [0]byte // bitmaps, each starting on a 32-bit boundary
}
// Returns pointer map data for the given stackmap index
// (the index is encoded in PCDATA_StackMapIndex).
BitVector runtime·stackmapdata(StackMap *stackmap, int32 n);
extern BitVector runtime·gcdatamask;
extern BitVector runtime·gcbssmask;
// defined in mgc0.go
void runtime·gc_m_ptr(Eface*);
void runtime·gc_g_ptr(Eface*);
void runtime·gc_itab_ptr(Eface*);
void runtime·setgcpercent_m(void);
// Value we use to mark dead pointers when GODEBUG=gcdead=1.
#define PoisonGC ((uintptr)0xf969696969696969ULL)
#define PoisonStack ((uintptr)0x6868686868686868ULL)
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Per-P malloc cache for small objects.
//
// See malloc.h for an overview.
#include "runtime.h"
#include "arch_GOARCH.h"
#include "malloc.h"
extern volatile intgo runtime·MemProfileRate;
// dummy MSpan that contains no free objects.
MSpan runtime·emptymspan;
MCache*
runtime·allocmcache(void)
{
intgo rate;
MCache *c;
int32 i;
runtime·lock(&runtime·mheap.lock);
c = runtime·FixAlloc_Alloc(&runtime·mheap.cachealloc);
runtime·unlock(&runtime·mheap.lock);
runtime·memclr((byte*)c, sizeof(*c));
for(i = 0; i < NumSizeClasses; i++)
c->alloc[i] = &runtime·emptymspan;
// Set first allocation sample size.
rate = runtime·MemProfileRate;
if(rate > 0x3fffffff) // make 2*rate not overflow
rate = 0x3fffffff;
if(rate != 0)
c->next_sample = runtime·fastrand1() % (2*rate);
return c;
}
static void
freemcache(MCache *c)
{
runtime·MCache_ReleaseAll(c);
runtime·stackcache_clear(c);
runtime·gcworkbuffree(c->gcworkbuf);
runtime·lock(&runtime·mheap.lock);
runtime·purgecachedstats(c);
runtime·FixAlloc_Free(&runtime·mheap.cachealloc, c);
runtime·unlock(&runtime·mheap.lock);
}
static void
freemcache_m(void)
{
MCache *c;
c = g->m->ptrarg[0];
g->m->ptrarg[0] = nil;
freemcache(c);
}
void
runtime·freemcache(MCache *c)
{
void (*fn)(void);
g->m->ptrarg[0] = c;
fn = freemcache_m;
runtime·onM(&fn);
}
// Gets a span that has a free object in it and assigns it
// to be the cached span for the given sizeclass. Returns this span.
MSpan*
runtime·MCache_Refill(MCache *c, int32 sizeclass)
{
MSpan *s;
g->m->locks++;
// Return the current cached span to the central lists.
s = c->alloc[sizeclass];
if(s->freelist != nil)
runtime·throw("refill on a nonempty span");
if(s != &runtime·emptymspan)
s->incache = false;
// Get a new cached span from the central lists.
s = runtime·MCentral_CacheSpan(&runtime·mheap.central[sizeclass].mcentral);
if(s == nil)
runtime·throw("out of memory");
if(s->freelist == nil) {
runtime·printf("%d %d\n", s->ref, (int32)((s->npages << PageShift) / s->elemsize));
runtime·throw("empty span");
}
c->alloc[sizeclass] = s;
g->m->locks--;
return s;
}
void
runtime·MCache_ReleaseAll(MCache *c)
{
int32 i;
MSpan *s;
for(i=0; i<NumSizeClasses; i++) {
s = c->alloc[i];
if(s != &runtime·emptymspan) {
runtime·MCentral_UncacheSpan(&runtime·mheap.central[i].mcentral, s);
c->alloc[i] = &runtime·emptymspan;
}
}
}
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Per-P malloc cache for small objects.
//
// See malloc.h for an overview.
package runtime
import "unsafe"
// dummy MSpan that contains no free objects.
var emptymspan mspan
func allocmcache() *mcache {
lock(&mheap_.lock)
c := (*mcache)(fixAlloc_Alloc(&mheap_.cachealloc))
unlock(&mheap_.lock)
memclr(unsafe.Pointer(c), unsafe.Sizeof(*c))
for i := 0; i < _NumSizeClasses; i++ {
c.alloc[i] = &emptymspan
}
// Set first allocation sample size.
rate := MemProfileRate
if rate > 0x3fffffff { // make 2*rate not overflow
rate = 0x3fffffff
}
if rate != 0 {
c.next_sample = int32(int(fastrand1()) % (2 * rate))
}
return c
}
func freemcache(c *mcache) {
onM(func() {
mCache_ReleaseAll(c)
stackcache_clear(c)
gcworkbuffree(c.gcworkbuf)
lock(&mheap_.lock)
purgecachedstats(c)
fixAlloc_Free(&mheap_.cachealloc, unsafe.Pointer(c))
unlock(&mheap_.lock)
})
}
// Gets a span that has a free object in it and assigns it
// to be the cached span for the given sizeclass. Returns this span.
func mCache_Refill(c *mcache, sizeclass int32) *mspan {
_g_ := getg()
_g_.m.locks++
// Return the current cached span to the central lists.
s := c.alloc[sizeclass]
if s.freelist != nil {
gothrow("refill on a nonempty span")
}
if s != &emptymspan {
s.incache = false
}
// Get a new cached span from the central lists.
s = mCentral_CacheSpan(&mheap_.central[sizeclass].mcentral)
if s == nil {
gothrow("out of memory")
}
if s.freelist == nil {
println(s.ref, (s.npages<<_PageShift)/s.elemsize)
gothrow("empty span")
}
c.alloc[sizeclass] = s
_g_.m.locks--
return s
}
func mCache_ReleaseAll(c *mcache) {
for i := 0; i < _NumSizeClasses; i++ {
s := c.alloc[i]
if s != &emptymspan {
mCentral_UncacheSpan(&mheap_.central[i].mcentral, s)
c.alloc[i] = &emptymspan
}
}
}
......@@ -10,118 +10,110 @@
// Each MCentral is two lists of MSpans: those with free objects (c->nonempty)
// and those that are completely allocated (c->empty).
#include "runtime.h"
#include "arch_GOARCH.h"
#include "malloc.h"
package runtime
static MSpan* MCentral_Grow(MCentral *c);
import "unsafe"
// Initialize a single central free list.
void
runtime·MCentral_Init(MCentral *c, int32 sizeclass)
{
c->sizeclass = sizeclass;
runtime·MSpanList_Init(&c->nonempty);
runtime·MSpanList_Init(&c->empty);
func mCentral_Init(c *mcentral, sizeclass int32) {
c.sizeclass = sizeclass
mSpanList_Init(&c.nonempty)
mSpanList_Init(&c.empty)
}
// Allocate a span to use in an MCache.
MSpan*
runtime·MCentral_CacheSpan(MCentral *c)
{
MSpan *s;
int32 cap, n;
uint32 sg;
runtime·lock(&c->lock);
sg = runtime·mheap.sweepgen;
func mCentral_CacheSpan(c *mcentral) *mspan {
lock(&c.lock)
sg := mheap_.sweepgen
retry:
for(s = c->nonempty.next; s != &c->nonempty; s = s->next) {
if(s->sweepgen == sg-2 && runtime·cas(&s->sweepgen, sg-2, sg-1)) {
runtime·MSpanList_Remove(s);
runtime·MSpanList_InsertBack(&c->empty, s);
runtime·unlock(&c->lock);
runtime·MSpan_Sweep(s, true);
goto havespan;
var s *mspan
for s = c.nonempty.next; s != &c.nonempty; s = s.next {
if s.sweepgen == sg-2 && cas(&s.sweepgen, sg-2, sg-1) {
mSpanList_Remove(s)
mSpanList_InsertBack(&c.empty, s)
unlock(&c.lock)
mSpan_Sweep(s, true)
goto havespan
}
if(s->sweepgen == sg-1) {
if s.sweepgen == sg-1 {
// the span is being swept by background sweeper, skip
continue;
continue
}
// we have a nonempty span that does not require sweeping, allocate from it
runtime·MSpanList_Remove(s);
runtime·MSpanList_InsertBack(&c->empty, s);
runtime·unlock(&c->lock);
goto havespan;
mSpanList_Remove(s)
mSpanList_InsertBack(&c.empty, s)
unlock(&c.lock)
goto havespan
}
for(s = c->empty.next; s != &c->empty; s = s->next) {
if(s->sweepgen == sg-2 && runtime·cas(&s->sweepgen, sg-2, sg-1)) {
for s = c.empty.next; s != &c.empty; s = s.next {
if s.sweepgen == sg-2 && cas(&s.sweepgen, sg-2, sg-1) {
// we have an empty span that requires sweeping,
// sweep it and see if we can free some space in it
runtime·MSpanList_Remove(s);
mSpanList_Remove(s)
// swept spans are at the end of the list
runtime·MSpanList_InsertBack(&c->empty, s);
runtime·unlock(&c->lock);
runtime·MSpan_Sweep(s, true);
if(s->freelist != nil)
goto havespan;
runtime·lock(&c->lock);
mSpanList_InsertBack(&c.empty, s)
unlock(&c.lock)
mSpan_Sweep(s, true)
if s.freelist != nil {
goto havespan
}
lock(&c.lock)
// the span is still empty after sweep
// it is already in the empty list, so just retry
goto retry;
goto retry
}
if(s->sweepgen == sg-1) {
if s.sweepgen == sg-1 {
// the span is being swept by background sweeper, skip
continue;
continue
}
// already swept empty span,
// all subsequent ones must also be either swept or in process of sweeping
break;
break
}
runtime·unlock(&c->lock);
unlock(&c.lock)
// Replenish central list if empty.
s = MCentral_Grow(c);
if(s == nil)
return nil;
runtime·lock(&c->lock);
runtime·MSpanList_InsertBack(&c->empty, s);
runtime·unlock(&c->lock);
s = mCentral_Grow(c)
if s == nil {
return nil
}
lock(&c.lock)
mSpanList_InsertBack(&c.empty, s)
unlock(&c.lock)
havespan:
// At this point s is a non-empty span, queued at the end of the empty list,
// c is unlocked.
cap = (s->npages << PageShift) / s->elemsize;
n = cap - s->ref;
if(n == 0)
runtime·throw("empty span");
if(s->freelist == nil)
runtime·throw("freelist empty");
s->incache = true;
return s;
havespan:
cap := int32((s.npages << _PageShift) / s.elemsize)
n := cap - int32(s.ref)
if n == 0 {
gothrow("empty span")
}
if s.freelist == nil {
gothrow("freelist empty")
}
s.incache = true
return s
}
// Return span from an MCache.
void
runtime·MCentral_UncacheSpan(MCentral *c, MSpan *s)
{
int32 cap, n;
runtime·lock(&c->lock);
func mCentral_UncacheSpan(c *mcentral, s *mspan) {
lock(&c.lock)
s->incache = false;
s.incache = false
if(s->ref == 0)
runtime·throw("uncaching full span");
if s.ref == 0 {
gothrow("uncaching full span")
}
cap = (s->npages << PageShift) / s->elemsize;
n = cap - s->ref;
if(n > 0) {
runtime·MSpanList_Remove(s);
runtime·MSpanList_Insert(&c->nonempty, s);
cap := int32((s.npages << _PageShift) / s.elemsize)
n := cap - int32(s.ref)
if n > 0 {
mSpanList_Remove(s)
mSpanList_Insert(&c.nonempty, s)
}
runtime·unlock(&c->lock);
unlock(&c.lock)
}
// Free n objects from a span s back into the central free list c.
......@@ -130,85 +122,78 @@ runtime·MCentral_UncacheSpan(MCentral *c, MSpan *s)
// the latest generation.
// If preserve=true, don't return the span to heap nor relink in MCentral lists;
// caller takes care of it.
bool
runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end, bool preserve)
{
bool wasempty;
if(s->incache)
runtime·throw("freespan into cached span");
func mCentral_FreeSpan(c *mcentral, s *mspan, n int32, start *mlink, end *mlink, preserve bool) bool {
if s.incache {
gothrow("freespan into cached span")
}
// Add the objects back to s's free list.
wasempty = s->freelist == nil;
end->next = s->freelist;
s->freelist = start;
s->ref -= n;
wasempty := s.freelist == nil
end.next = s.freelist
s.freelist = start
s.ref -= uint16(n)
if(preserve) {
if preserve {
// preserve is set only when called from MCentral_CacheSpan above,
// the span must be in the empty list.
if(s->next == nil)
runtime·throw("can't preserve unlinked span");
runtime·atomicstore(&s->sweepgen, runtime·mheap.sweepgen);
return false;
if s.next == nil {
gothrow("can't preserve unlinked span")
}
atomicstore(&s.sweepgen, mheap_.sweepgen)
return false
}
runtime·lock(&c->lock);
lock(&c.lock)
// Move to nonempty if necessary.
if(wasempty) {
runtime·MSpanList_Remove(s);
runtime·MSpanList_Insert(&c->nonempty, s);
if wasempty {
mSpanList_Remove(s)
mSpanList_Insert(&c.nonempty, s)
}
// delay updating sweepgen until here. This is the signal that
// the span may be used in an MCache, so it must come after the
// linked list operations above (actually, just after the
// lock of c above.)
runtime·atomicstore(&s->sweepgen, runtime·mheap.sweepgen);
atomicstore(&s.sweepgen, mheap_.sweepgen)
if(s->ref != 0) {
runtime·unlock(&c->lock);
return false;
if s.ref != 0 {
unlock(&c.lock)
return false
}
// s is completely freed, return it to the heap.
runtime·MSpanList_Remove(s);
s->needzero = 1;
s->freelist = nil;
runtime·unlock(&c->lock);
runtime·unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift);
runtime·MHeap_Free(&runtime·mheap, s, 0);
return true;
mSpanList_Remove(s)
s.needzero = 1
s.freelist = nil
unlock(&c.lock)
unmarkspan(uintptr(s.start)<<_PageShift, s.npages<<_PageShift)
mHeap_Free(&mheap_, s, 0)
return true
}
// Fetch a new span from the heap and carve into objects for the free list.
static MSpan*
MCentral_Grow(MCentral *c)
{
uintptr size, npages, i, n;
MLink **tailp, *v;
byte *p;
MSpan *s;
npages = runtime·class_to_allocnpages[c->sizeclass];
size = runtime·class_to_size[c->sizeclass];
n = (npages << PageShift) / size;
s = runtime·MHeap_Alloc(&runtime·mheap, npages, c->sizeclass, 0, 1);
if(s == nil)
return nil;
func mCentral_Grow(c *mcentral) *mspan {
npages := uintptr(class_to_allocnpages[c.sizeclass])
size := uintptr(class_to_size[c.sizeclass])
n := (npages << _PageShift) / size
s := mHeap_Alloc(&mheap_, npages, c.sizeclass, false, true)
if s == nil {
return nil
}
// Carve span into sequence of blocks.
tailp = &s->freelist;
p = (byte*)(s->start << PageShift);
s->limit = p + size*n;
for(i=0; i<n; i++) {
v = (MLink*)p;
*tailp = v;
tailp = &v->next;
p += size;
tailp := &s.freelist
p := uintptr(s.start << _PageShift)
s.limit = p + size*n
for i := uintptr(0); i < n; i++ {
v := (*mlink)(unsafe.Pointer(p))
*tailp = v
tailp = &v.next
p += size
}
*tailp = nil;
runtime·markspan((byte*)(s->start<<PageShift), size, n, size*n < (s->npages<<PageShift));
return s;
*tailp = nil
markspan(unsafe.Pointer(uintptr(s.start)<<_PageShift), size, n, size*n < s.npages<<_PageShift)
return s
}
......@@ -59,7 +59,11 @@ type MemStats struct {
}
}
var sizeof_C_MStats uintptr // filled in by malloc.goc
// Size of the trailing by_size array differs between Go and C,
// and all data after by_size is local to runtime, not exported.
// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
// sizeof_C_MStats is what C thinks about size of Go struct.
var sizeof_C_MStats = unsafe.Offsetof(memstats.by_size) + 61*unsafe.Sizeof(memstats.by_size[0])
func init() {
var memStats MemStats
......
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "runtime.h"
#include "arch_GOARCH.h"
#include "defs_GOOS_GOARCH.h"
#include "os_GOOS.h"
#include "malloc.h"
#include "textflag.h"
#pragma textflag NOSPLIT
void*
runtime·sysAlloc(uintptr n, uint64 *stat)
{
void *v;
v = runtime·mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
if(v < (void*)4096)
return nil;
runtime·xadd64(stat, n);
return v;
}
void
runtime·SysUnused(void *v, uintptr n)
{
// Linux's MADV_DONTNEED is like BSD's MADV_FREE.
runtime·madvise(v, n, MADV_FREE);
}
void
runtime·SysUsed(void *v, uintptr n)
{
USED(v);
USED(n);
}
void
runtime·SysFree(void *v, uintptr n, uint64 *stat)
{
runtime·xadd64(stat, -(uint64)n);
runtime·munmap(v, n);
}
void
runtime·SysFault(void *v, uintptr n)
{
runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
}
void*
runtime·SysReserve(void *v, uintptr n, bool *reserved)
{
void *p;
*reserved = true;
p = runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
if(p < (void*)4096)
return nil;
return p;
}
enum
{
ENOMEM = 12,
};
void
runtime·SysMap(void *v, uintptr n, bool reserved, uint64 *stat)
{
void *p;
USED(reserved);
runtime·xadd64(stat, n);
p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);
if(p == (void*)ENOMEM)
runtime·throw("runtime: out of memory");
if(p != v)
runtime·throw("runtime: cannot map pages in arena address space");
}
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runtime
import "unsafe"
//go:nosplit
func sysAlloc(n uintptr, stat *uint64) unsafe.Pointer {
v := (unsafe.Pointer)(mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0))
if uintptr(v) < 4096 {
return nil
}
xadd64(stat, int64(n))
return v
}
func sysUnused(v unsafe.Pointer, n uintptr) {
// Linux's MADV_DONTNEED is like BSD's MADV_FREE.
madvise(v, n, _MADV_FREE)
}
func sysUsed(v unsafe.Pointer, n uintptr) {
}
func sysFree(v unsafe.Pointer, n uintptr, stat *uint64) {
xadd64(stat, -int64(n))
munmap(v, n)
}
func sysFault(v unsafe.Pointer, n uintptr) {
mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, -1, 0)
}
func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {
*reserved = true
p := (unsafe.Pointer)(mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0))
if uintptr(p) < 4096 {
return nil
}
return p
}
const (
_ENOMEM = 12
)
func sysMap(v unsafe.Pointer, n uintptr, reserved bool, stat *uint64) {
xadd64(stat, int64(n))
p := (unsafe.Pointer)(mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0))
if uintptr(p) == _ENOMEM {
gothrow("runtime: out of memory")
}
if p != v {
gothrow("runtime: cannot map pages in arena address space")
}
}
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "runtime.h"
#include "arch_GOARCH.h"
#include "defs_GOOS_GOARCH.h"
#include "os_GOOS.h"
#include "malloc.h"
#include "textflag.h"
enum
{
_PAGE_SIZE = 4096,
EACCES = 13,
};
static int32
addrspace_free(void *v, uintptr n)
{
int32 errval;
uintptr chunk;
uintptr off;
// NOTE: vec must be just 1 byte long here.
// Mincore returns ENOMEM if any of the pages are unmapped,
// but we want to know that all of the pages are unmapped.
// To make these the same, we can only ask about one page
// at a time. See golang.org/issue/7476.
static byte vec[1];
for(off = 0; off < n; off += chunk) {
chunk = _PAGE_SIZE * sizeof vec;
if(chunk > (n - off))
chunk = n - off;
errval = runtime·mincore((int8*)v + off, chunk, vec);
// ENOMEM means unmapped, which is what we want.
// Anything else we assume means the pages are mapped.
if (errval != -ENOMEM)
return 0;
}
return 1;
}
static void *
mmap_fixed(byte *v, uintptr n, int32 prot, int32 flags, int32 fd, uint32 offset)
{
void *p;
p = runtime·mmap(v, n, prot, flags, fd, offset);
if(p != v && addrspace_free(v, n)) {
// On some systems, mmap ignores v without
// MAP_FIXED, so retry if the address space is free.
if(p > (void*)4096)
runtime·munmap(p, n);
p = runtime·mmap(v, n, prot, flags|MAP_FIXED, fd, offset);
}
return p;
}
#pragma textflag NOSPLIT
void*
runtime·sysAlloc(uintptr n, uint64 *stat)
{
void *p;
p = runtime·mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
if(p < (void*)4096) {
if(p == (void*)EACCES) {
runtime·printf("runtime: mmap: access denied\n");
runtime·printf("if you're running SELinux, enable execmem for this process.\n");
runtime·exit(2);
}
if(p == (void*)EAGAIN) {
runtime·printf("runtime: mmap: too much locked memory (check 'ulimit -l').\n");
runtime·exit(2);
}
return nil;
}
runtime·xadd64(stat, n);
return p;
}
void
runtime·SysUnused(void *v, uintptr n)
{
runtime·madvise(v, n, MADV_DONTNEED);
}
void
runtime·SysUsed(void *v, uintptr n)
{
USED(v);
USED(n);
}
void
runtime·SysFree(void *v, uintptr n, uint64 *stat)
{
runtime·xadd64(stat, -(uint64)n);
runtime·munmap(v, n);
}
void
runtime·SysFault(void *v, uintptr n)
{
runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
}
void*
runtime·SysReserve(void *v, uintptr n, bool *reserved)
{
void *p;
// On 64-bit, people with ulimit -v set complain if we reserve too
// much address space. Instead, assume that the reservation is okay
// if we can reserve at least 64K and check the assumption in SysMap.
// Only user-mode Linux (UML) rejects these requests.
if(sizeof(void*) == 8 && n > 1LL<<32) {
p = mmap_fixed(v, 64<<10, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
if (p != v) {
if(p >= (void*)4096)
runtime·munmap(p, 64<<10);
return nil;
}
runtime·munmap(p, 64<<10);
*reserved = false;
return v;
}
p = runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
if((uintptr)p < 4096)
return nil;
*reserved = true;
return p;
}
void
runtime·SysMap(void *v, uintptr n, bool reserved, uint64 *stat)
{
void *p;
runtime·xadd64(stat, n);
// On 64-bit, we don't actually have v reserved, so tread carefully.
if(!reserved) {
p = mmap_fixed(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
if(p == (void*)ENOMEM)
runtime·throw("runtime: out of memory");
if(p != v) {
runtime·printf("runtime: address space conflict: map(%p) = %p\n", v, p);
runtime·throw("runtime: address space conflict");
}
return;
}
p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);
if(p == (void*)ENOMEM)
runtime·throw("runtime: out of memory");
if(p != v)
runtime·throw("runtime: cannot map pages in arena address space");
}
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runtime
import "unsafe"
const (
_PAGE_SIZE = 4096
_EACCES = 13
)
// NOTE: vec must be just 1 byte long here.
// Mincore returns ENOMEM if any of the pages are unmapped,
// but we want to know that all of the pages are unmapped.
// To make these the same, we can only ask about one page
// at a time. See golang.org/issue/7476.
var addrspace_vec [1]byte
func addrspace_free(v unsafe.Pointer, n uintptr) bool {
var chunk uintptr
for off := uintptr(0); off < n; off += chunk {
chunk = _PAGE_SIZE * uintptr(len(addrspace_vec))
if chunk > (n - off) {
chunk = n - off
}
errval := mincore(unsafe.Pointer(uintptr(v)+off), chunk, &addrspace_vec[0])
// ENOMEM means unmapped, which is what we want.
// Anything else we assume means the pages are mapped.
if errval != -_ENOMEM {
return false
}
}
return true
}
func mmap_fixed(v unsafe.Pointer, n uintptr, prot, flags, fd int32, offset uint32) unsafe.Pointer {
p := mmap(v, n, prot, flags, fd, offset)
if p != v && addrspace_free(v, n) {
// On some systems, mmap ignores v without
// MAP_FIXED, so retry if the address space is free.
if uintptr(p) > 4096 {
munmap(p, n)
}
p = mmap(v, n, prot, flags|_MAP_FIXED, fd, offset)
}
return p
}
//go:nosplit
func sysAlloc(n uintptr, stat *uint64) unsafe.Pointer {
p := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
if uintptr(p) < 4096 {
if uintptr(p) == _EACCES {
print("runtime: mmap: access denied\n")
print("if you're running SELinux, enable execmem for this process.\n")
exit(2)
}
if uintptr(p) == _EAGAIN {
print("runtime: mmap: too much locked memory (check 'ulimit -l').\n")
exit(2)
}
return nil
}
xadd64(stat, int64(n))
return p
}
func sysUnused(v unsafe.Pointer, n uintptr) {
madvise(v, n, _MADV_DONTNEED)
}
func sysUsed(v unsafe.Pointer, n uintptr) {
}
func sysFree(v unsafe.Pointer, n uintptr, stat *uint64) {
xadd64(stat, -int64(n))
munmap(v, n)
}
func sysFault(v unsafe.Pointer, n uintptr) {
mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, -1, 0)
}
func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {
// On 64-bit, people with ulimit -v set complain if we reserve too
// much address space. Instead, assume that the reservation is okay
// if we can reserve at least 64K and check the assumption in SysMap.
// Only user-mode Linux (UML) rejects these requests.
if ptrSize == 7 && uint64(n) > 1<<32 {
p := mmap_fixed(v, 64<<10, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
if p != v {
if uintptr(p) >= 4096 {
munmap(p, 64<<10)
}
return nil
}
munmap(p, 64<<10)
*reserved = false
return v
}
p := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
if uintptr(p) < 4096 {
return nil
}
*reserved = true
return p
}
func sysMap(v unsafe.Pointer, n uintptr, reserved bool, stat *uint64) {
xadd64(stat, int64(n))
// On 64-bit, we don't actually have v reserved, so tread carefully.
if !reserved {
p := mmap_fixed(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
if uintptr(p) == _ENOMEM {
gothrow("runtime: out of memory")
}
if p != v {
print("runtime: address space conflict: map(", v, ") = ", p, "\n")
gothrow("runtime: address space conflict")
}
return
}
p := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
if uintptr(p) == _ENOMEM {
gothrow("runtime: out of memory")
}
if p != v {
gothrow("runtime: cannot map pages in arena address space")
}
}
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Fixed-size object allocator. Returned memory is not zeroed.
//
// See malloc.h for overview.
#include "runtime.h"
#include "arch_GOARCH.h"
#include "malloc.h"
// Initialize f to allocate objects of the given size,
// using the allocator to obtain chunks of memory.
void
runtime·FixAlloc_Init(FixAlloc *f, uintptr size, void (*first)(void*, byte*), void *arg, uint64 *stat)
{
f->size = size;
f->first = first;
f->arg = arg;
f->list = nil;
f->chunk = nil;
f->nchunk = 0;
f->inuse = 0;
f->stat = stat;
}
void*
runtime·FixAlloc_Alloc(FixAlloc *f)
{
void *v;
if(f->size == 0) {
runtime·printf("runtime: use of FixAlloc_Alloc before FixAlloc_Init\n");
runtime·throw("runtime: internal error");
}
if(f->list) {
v = f->list;
f->list = *(void**)f->list;
f->inuse += f->size;
return v;
}
if(f->nchunk < f->size) {
f->chunk = runtime·persistentalloc(FixAllocChunk, 0, f->stat);
f->nchunk = FixAllocChunk;
}
v = f->chunk;
if(f->first)
f->first(f->arg, v);
f->chunk += f->size;
f->nchunk -= f->size;
f->inuse += f->size;
return v;
}
void
runtime·FixAlloc_Free(FixAlloc *f, void *p)
{
f->inuse -= f->size;
*(void**)p = f->list;
f->list = p;
}
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Fixed-size object allocator. Returned memory is not zeroed.
//
// See malloc.h for overview.
package runtime
import "unsafe"
// Initialize f to allocate objects of the given size,
// using the allocator to obtain chunks of memory.
func fixAlloc_Init(f *fixalloc, size uintptr, first func(unsafe.Pointer, unsafe.Pointer), arg unsafe.Pointer, stat *uint64) {
f.size = size
f.first = *(*unsafe.Pointer)(unsafe.Pointer(&first))
f.arg = arg
f.list = nil
f.chunk = nil
f.nchunk = 0
f.inuse = 0
f.stat = stat
}
func fixAlloc_Alloc(f *fixalloc) unsafe.Pointer {
if f.size == 0 {
print("runtime: use of FixAlloc_Alloc before FixAlloc_Init\n")
gothrow("runtime: internal error")
}
if f.list != nil {
v := unsafe.Pointer(f.list)
f.list = f.list.next
f.inuse += f.size
return v
}
if uintptr(f.nchunk) < f.size {
f.chunk = (*uint8)(persistentalloc(_FixAllocChunk, 0, f.stat))
f.nchunk = _FixAllocChunk
}
v := (unsafe.Pointer)(f.chunk)
if f.first != nil {
fn := *(*func(unsafe.Pointer, unsafe.Pointer))(unsafe.Pointer(&f.first))
fn(f.arg, v)
}
f.chunk = (*byte)(add(unsafe.Pointer(f.chunk), f.size))
f.nchunk -= uint32(f.size)
f.inuse += f.size
return v
}
func fixAlloc_Free(f *fixalloc, p unsafe.Pointer) {
f.inuse -= f.size
v := (*mlink)(p)
v.next = f.list
f.list = v
}
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// TODO(rsc): The code having to do with the heap bitmap needs very serious cleanup.
// It has gotten completely out of control.
// Garbage collector (GC).
//
// GC is:
// - mark&sweep
// - mostly precise (with the exception of some C-allocated objects, assembly frames/arguments, etc)
// - parallel (up to MaxGcproc threads)
// - partially concurrent (mark is stop-the-world, while sweep is concurrent)
// - non-moving/non-compacting
// - full (non-partial)
//
// GC rate.
// Next GC is after we've allocated an extra amount of memory proportional to
// the amount already in use. The proportion is controlled by GOGC environment variable
// (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M
// (this mark is tracked in next_gc variable). This keeps the GC cost in linear
// proportion to the allocation cost. Adjusting GOGC just changes the linear constant
// (and also the amount of extra memory used).
//
// Concurrent sweep.
// The sweep phase proceeds concurrently with normal program execution.
// The heap is swept span-by-span both lazily (when a goroutine needs another span)
// and concurrently in a background goroutine (this helps programs that are not CPU bound).
// However, at the end of the stop-the-world GC phase we don't know the size of the live heap,
// and so next_gc calculation is tricky and happens as follows.
// At the end of the stop-the-world phase next_gc is conservatively set based on total
// heap size; all spans are marked as "needs sweeping".
// Whenever a span is swept, next_gc is decremented by GOGC*newly_freed_memory.
// The background sweeper goroutine simply sweeps spans one-by-one bringing next_gc
// closer to the target value. However, this is not enough to avoid over-allocating memory.
// Consider that a goroutine wants to allocate a new span for a large object and
// there are no free swept spans, but there are small-object unswept spans.
// If the goroutine naively allocates a new span, it can surpass the yet-unknown
// target next_gc value. In order to prevent such cases (1) when a goroutine needs
// to allocate a new small-object span, it sweeps small-object spans for the same
// object size until it frees at least one object; (2) when a goroutine needs to
// allocate large-object span from heap, it sweeps spans until it frees at least
// that many pages into heap. Together these two measures ensure that we don't surpass
// target next_gc value by a large margin. There is an exception: if a goroutine sweeps
// and frees two nonadjacent one-page spans to the heap, it will allocate a new two-page span,
// but there can still be other one-page unswept spans which could be combined into a two-page span.
// It's critical to ensure that no operations proceed on unswept spans (that would corrupt
// mark bits in GC bitmap). During GC all mcaches are flushed into the central cache,
// so they are empty. When a goroutine grabs a new span into mcache, it sweeps it.
// When a goroutine explicitly frees an object or sets a finalizer, it ensures that
// the span is swept (either by sweeping it, or by waiting for the concurrent sweep to finish).
// The finalizer goroutine is kicked off only when all spans are swept.
// When the next GC starts, it sweeps all not-yet-swept spans (if any).
package runtime
import "unsafe"
const (
_DebugGC = 0
_DebugGCPtrs = false // if true, print trace of every pointer load during GC
_ConcurrentSweep = true
_WorkbufSize = 4 * 1024
_FinBlockSize = 4 * 1024
_RootData = 0
_RootBss = 1
_RootFinalizers = 2
_RootSpans = 3
_RootFlushCaches = 4
_RootCount = 5
)
// ptrmask for an allocation containing a single pointer.
var oneptr = [...]uint8{bitsPointer}
// Initialized from $GOGC. GOGC=off means no gc.
var gcpercent int32
// Holding worldsema grants an M the right to try to stop the world.
// The procedure is:
//
// semacquire(&worldsema);
// m.gcing = 1;
// stoptheworld();
//
// ... do stuff ...
//
// m.gcing = 0;
// semrelease(&worldsema);
// starttheworld();
//
var worldsema uint32 = 1
type workbuf struct {
node lfnode // must be first
nobj uintptr
obj [(_WorkbufSize - unsafe.Sizeof(lfnode{}) - ptrSize) / ptrSize]uintptr
}
var data, edata, bss, ebss, gcdata, gcbss struct{}
var finlock mutex // protects the following variables
var fing *g // goroutine that runs finalizers
var finq *finblock // list of finalizers that are to be executed
var finc *finblock // cache of free blocks
var finptrmask [_FinBlockSize / ptrSize / pointersPerByte]byte
var fingwait bool
var fingwake bool
var allfin *finblock // list of all blocks
var gcdatamask bitvector
var gcbssmask bitvector
var gclock mutex
var badblock [1024]uintptr
var nbadblock int32
type workdata struct {
full uint64 // lock-free list of full blocks
empty uint64 // lock-free list of empty blocks
pad0 [_CacheLineSize]uint8 // prevents false-sharing between full/empty and nproc/nwait
nproc uint32
tstart int64
nwait uint32
ndone uint32
alldone note
markfor *parfor
// Copy of mheap.allspans for marker or sweeper.
spans []*mspan
}
var work workdata
//go:linkname weak_cgo_allocate go.weak.runtime._cgo_allocate_internal
var weak_cgo_allocate byte
// Is _cgo_allocate linked into the binary?
func have_cgo_allocate() bool {
return &weak_cgo_allocate != nil
}
// scanblock scans a block of n bytes starting at pointer b for references
// to other objects, scanning any it finds recursively until there are no
// unscanned objects left. Instead of using an explicit recursion, it keeps
// a work list in the Workbuf* structures and loops in the main function
// body. Keeping an explicit work list is easier on the stack allocator and
// more efficient.
func scanblock(b, n uintptr, ptrmask *uint8) {
// Cache memory arena parameters in local vars.
arena_start := mheap_.arena_start
arena_used := mheap_.arena_used
wbuf := getempty(nil)
nobj := wbuf.nobj
wp := &wbuf.obj[nobj]
keepworking := b == 0
var ptrbitp unsafe.Pointer
// ptrmask can have 2 possible values:
// 1. nil - obtain pointer mask from GC bitmap.
// 2. pointer to a compact mask (for stacks and data).
goto_scanobj := b != 0
for {
if goto_scanobj {
goto_scanobj = false
} else {
if nobj == 0 {
// Out of work in workbuf.
if !keepworking {
putempty(wbuf)
return
}
// Refill workbuf from global queue.
wbuf = getfull(wbuf)
if wbuf == nil {
return
}
nobj = wbuf.nobj
if nobj < uintptr(len(wbuf.obj)) {
wp = &wbuf.obj[nobj]
} else {
wp = nil
}
}
// If another proc wants a pointer, give it some.
if work.nwait > 0 && nobj > 4 && work.full == 0 {
wbuf.nobj = nobj
wbuf = handoff(wbuf)
nobj = wbuf.nobj
if nobj < uintptr(len(wbuf.obj)) {
wp = &wbuf.obj[nobj]
} else {
wp = nil
}
}
nobj--
wp = &wbuf.obj[nobj]
b = *wp
n = arena_used - uintptr(b)
ptrmask = nil // use GC bitmap for pointer info
}
if _DebugGCPtrs {
print("scanblock ", b, " +", hex(n), " ", ptrmask, "\n")
}
// Find bits of the beginning of the object.
if ptrmask == nil {
off := (uintptr(b) - arena_start) / ptrSize
ptrbitp = unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1)
}
var i uintptr
for i = 0; i < n; i += ptrSize {
// Find bits for this word.
var bits uintptr
if ptrmask == nil {
// Check if we have reached end of span.
if (uintptr(b)+i)%_PageSize == 0 &&
h_spans[(uintptr(b)-arena_start)>>_PageShift] != h_spans[(uintptr(b)+i-arena_start)>>_PageShift] {
break
}
// Consult GC bitmap.
bits = uintptr(*(*byte)(ptrbitp))
if wordsPerBitmapByte != 2 {
gothrow("alg doesn't work for wordsPerBitmapByte != 2")
}
j := (uintptr(b) + i) / ptrSize & 1
ptrbitp = add(ptrbitp, -j)
bits >>= gcBits * j
if bits&bitBoundary != 0 && i != 0 {
break // reached beginning of the next object
}
bits = (bits >> 2) & bitsMask
if bits == bitsDead {
break // reached no-scan part of the object
}
} else {
// dense mask (stack or data)
bits = (uintptr(*(*byte)(add(unsafe.Pointer(ptrmask), (i/ptrSize)/4))) >> (((i / ptrSize) % 4) * bitsPerPointer)) & bitsMask
}
if bits <= _BitsScalar { // BitsScalar || BitsDead
continue
}
if bits != _BitsPointer {
gothrow("unexpected garbage collection bits")
}
obj := *(*uintptr)(unsafe.Pointer(b + i))
obj0 := obj
markobj:
var s *mspan
var off, bitp, shift, xbits uintptr
// At this point we have extracted the next potential pointer.
// Check if it points into heap.
if obj == 0 {
continue
}
if obj < arena_start || arena_used <= obj {
if uintptr(obj) < _PhysPageSize && invalidptr != 0 {
s = nil
goto badobj
}
continue
}
// Mark the object.
obj &^= ptrSize - 1
off = (obj - arena_start) / ptrSize
bitp = arena_start - off/wordsPerBitmapByte - 1
shift = (off % wordsPerBitmapByte) * gcBits
xbits = uintptr(*(*byte)(unsafe.Pointer(bitp)))
bits = (xbits >> shift) & bitMask
if (bits & bitBoundary) == 0 {
// Not a beginning of a block, consult span table to find the block beginning.
k := pageID(obj >> _PageShift)
x := k
x -= pageID(arena_start >> _PageShift)
s = h_spans[x]
if s == nil || k < s.start || s.limit <= obj || s.state != mSpanInUse {
// Stack pointers lie within the arena bounds but are not part of the GC heap.
// Ignore them.
if s != nil && s.state == _MSpanStack {
continue
}
goto badobj
}
p := uintptr(s.start) << _PageShift
if s.sizeclass != 0 {
size := s.elemsize
idx := (obj - p) / size
p = p + idx*size
}
if p == obj {
print("runtime: failed to find block beginning for ", hex(p), " s=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), "\n")
gothrow("failed to find block beginning")
}
obj = p
goto markobj
}
if _DebugGCPtrs {
print("scan *", hex(b+i), " = ", hex(obj0), " => base ", hex(obj), "\n")
}
if nbadblock > 0 && obj == badblock[nbadblock-1] {
// Running garbage collection again because
// we want to find the path from a root to a bad pointer.
// Found possible next step; extend or finish path.
for j := int32(0); j < nbadblock; j++ {
if badblock[j] == b {
goto AlreadyBad
}
}
print("runtime: found *(", hex(b), "+", hex(i), ") = ", hex(obj0), "+", hex(obj-obj0), "\n")
if ptrmask != nil {
gothrow("bad pointer")
}
if nbadblock >= int32(len(badblock)) {
gothrow("badblock trace too long")
}
badblock[nbadblock] = uintptr(b)
nbadblock++
AlreadyBad:
}
// Now we have bits, bitp, and shift correct for
// obj pointing at the base of the object.
// Only care about not marked objects.
if bits&bitMarked != 0 {
continue
}
// If obj size is greater than 8, then each byte of GC bitmap
// contains info for at most one object. In such case we use
// non-atomic byte store to mark the object. This can lead
// to double enqueue of the object for scanning, but scanning
// is an idempotent operation, so it is OK. This cannot lead
// to bitmap corruption because the single marked bit is the
// only thing that can change in the byte.
// For 8-byte objects we use non-atomic store, if the other
// quadruple is already marked. Otherwise we resort to CAS
// loop for marking.
if xbits&(bitMask|bitMask<<gcBits) != bitBoundary|bitBoundary<<gcBits || work.nproc == 1 {
*(*byte)(unsafe.Pointer(bitp)) = uint8(xbits | bitMarked<<shift)
} else {
atomicor8((*byte)(unsafe.Pointer(bitp)), bitMarked<<shift)
}
if (xbits>>(shift+2))&bitsMask == bitsDead {
continue // noscan object
}
// Queue the obj for scanning.
// TODO: PREFETCH here.
// If workbuf is full, obtain an empty one.
if nobj >= uintptr(len(wbuf.obj)) {
wbuf.nobj = nobj
wbuf = getempty(wbuf)
nobj = wbuf.nobj
wp = &wbuf.obj[nobj]
}
*wp = obj
nobj++
if nobj < uintptr(len(wbuf.obj)) {
wp = &wbuf.obj[nobj]
} else {
wp = nil
}
continue
badobj:
// If cgo_allocate is linked into the binary, it can allocate
// memory as []unsafe.Pointer that may not contain actual
// pointers and must be scanned conservatively.
// In this case alone, allow the bad pointer.
if have_cgo_allocate() && ptrmask == nil {
continue
}
// Anything else indicates a bug somewhere.
// If we're in the middle of chasing down a different bad pointer,
// don't confuse the trace by printing about this one.
if nbadblock > 0 {
continue
}
print("runtime: garbage collector found invalid heap pointer *(", hex(b), "+", hex(i), ")=", hex(obj))
if s == nil {
print(" s=nil\n")
} else {
print(" span=", uintptr(s.start)<<_PageShift, "-", s.limit, "-", (uintptr(s.start)+s.npages)<<_PageShift, " state=", s.state, "\n")
}
if ptrmask != nil {
gothrow("invalid heap pointer")
}
// Add to badblock list, which will cause the garbage collection
// to keep repeating until it has traced the chain of pointers
// leading to obj all the way back to a root.
if nbadblock == 0 {
badblock[nbadblock] = uintptr(b)
nbadblock++
}
}
if _DebugGCPtrs {
print("end scanblock ", hex(b), " +", hex(n), " ", ptrmask, "\n")
}
if _DebugGC > 0 && ptrmask == nil {
// For heap objects ensure that we did not overscan.
var p, n uintptr
if mlookup(b, &p, &n, nil) == 0 || b != p || i > n {
print("runtime: scanned (", hex(b), "+", hex(i), "), heap object (", hex(p), "+", hex(n), ")\n")
gothrow("scanblock: scanned invalid object")
}
}
}
}
func markroot(desc *parfor, i uint32) {
// Note: if you add a case here, please also update heapdump.c:dumproots.
switch i {
case _RootData:
scanblock(uintptr(unsafe.Pointer(&data)), uintptr(unsafe.Pointer(&edata))-uintptr(unsafe.Pointer(&data)), gcdatamask.bytedata)
case _RootBss:
scanblock(uintptr(unsafe.Pointer(&bss)), uintptr(unsafe.Pointer(&ebss))-uintptr(unsafe.Pointer(&bss)), gcbssmask.bytedata)
case _RootFinalizers:
for fb := allfin; fb != nil; fb = fb.alllink {
scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), uintptr(fb.cnt)*unsafe.Sizeof(fb.fin[0]), &finptrmask[0])
}
case _RootSpans:
// mark MSpan.specials
sg := mheap_.sweepgen
for spanidx := uint32(0); spanidx < uint32(len(work.spans)); spanidx++ {
s := work.spans[spanidx]
if s.state != mSpanInUse {
continue
}
if s.sweepgen != sg {
print("sweep ", s.sweepgen, " ", sg, "\n")
gothrow("gc: unswept span")
}
for sp := s.specials; sp != nil; sp = sp.next {
if sp.kind != _KindSpecialFinalizer {
continue
}
// don't mark finalized object, but scan it so we
// retain everything it points to.
spf := (*specialfinalizer)(unsafe.Pointer(sp))
// A finalizer can be set for an inner byte of an object, find object beginning.
p := uintptr(s.start<<_PageShift) + uintptr(spf.special.offset)/s.elemsize*s.elemsize
scanblock(p, s.elemsize, nil)
scanblock(uintptr(unsafe.Pointer(&spf.fn)), ptrSize, &oneptr[0])
}
}
case _RootFlushCaches:
flushallmcaches()
default:
// the rest is scanning goroutine stacks
if uintptr(i-_RootCount) >= allglen {
gothrow("markroot: bad index")
}
gp := allgs[i-_RootCount]
// remember when we've first observed the G blocked
// needed only to output in traceback
status := readgstatus(gp)
if (status == _Gwaiting || status == _Gsyscall) && gp.waitsince == 0 {
gp.waitsince = work.tstart
}
// Shrink a stack if not much of it is being used.
shrinkstack(gp)
if readgstatus(gp) == _Gdead {
gp.gcworkdone = true
} else {
gp.gcworkdone = false
}
restart := stopg(gp)
scanstack(gp)
if restart {
restartg(gp)
}
}
}
// Get an empty work buffer off the work.empty list,
// allocating new buffers as needed.
func getempty(b *workbuf) *workbuf {
_g_ := getg()
if b != nil {
lfstackpush(&work.full, &b.node)
}
b = nil
c := _g_.m.mcache
if c.gcworkbuf != nil {
b = (*workbuf)(c.gcworkbuf)
c.gcworkbuf = nil
}
if b == nil {
b = (*workbuf)(lfstackpop(&work.empty))
}
if b == nil {
b = (*workbuf)(persistentalloc(unsafe.Sizeof(*b), _CacheLineSize, &memstats.gc_sys))
}
b.nobj = 0
return b
}
func putempty(b *workbuf) {
_g_ := getg()
c := _g_.m.mcache
if c.gcworkbuf == nil {
c.gcworkbuf = (unsafe.Pointer)(b)
return
}
lfstackpush(&work.empty, &b.node)
}
func gcworkbuffree(b unsafe.Pointer) {
if b != nil {
putempty((*workbuf)(b))
}
}
// Get a full work buffer off the work.full list, or return nil.
func getfull(b *workbuf) *workbuf {
if b != nil {
lfstackpush(&work.empty, &b.node)
}
b = (*workbuf)(lfstackpop(&work.full))
if b != nil || work.nproc == 1 {
return b
}
xadd(&work.nwait, +1)
for i := 0; ; i++ {
if work.full != 0 {
xadd(&work.nwait, -1)
b = (*workbuf)(lfstackpop(&work.full))
if b != nil {
return b
}
xadd(&work.nwait, +1)
}
if work.nwait == work.nproc {
return nil
}
_g_ := getg()
if i < 10 {
_g_.m.gcstats.nprocyield++
procyield(20)
} else if i < 20 {
_g_.m.gcstats.nosyield++
osyield()
} else {
_g_.m.gcstats.nsleep++
usleep(100)
}
}
}
func handoff(b *workbuf) *workbuf {
// Make new buffer with half of b's pointers.
b1 := getempty(nil)
n := b.nobj / 2
b.nobj -= n
b1.nobj = n
memmove(unsafe.Pointer(&b1.obj[0]), unsafe.Pointer(&b.obj[b.nobj]), n*unsafe.Sizeof(b1.obj[0]))
_g_ := getg()
_g_.m.gcstats.nhandoff++
_g_.m.gcstats.nhandoffcnt += uint64(n)
// Put b on full list - let first half of b get stolen.
lfstackpush(&work.full, &b.node)
return b1
}
func stackmapdata(stkmap *stackmap, n int32) bitvector {
if n < 0 || n >= stkmap.n {
gothrow("stackmapdata: index out of range")
}
return bitvector{stkmap.nbit, (*byte)(add(unsafe.Pointer(&stkmap.bytedata), uintptr(n*((stkmap.nbit+31)/32*4))))}
}
// Scan a stack frame: local variables and function arguments/results.
func scanframe(frame *stkframe, unused unsafe.Pointer) bool {
f := frame.fn
targetpc := frame.continpc
if targetpc == 0 {
// Frame is dead.
return true
}
if _DebugGC > 1 {
print("scanframe ", gofuncname(f), "\n")
}
if targetpc != f.entry {
targetpc--
}
pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc)
if pcdata == -1 {
// We do not have a valid pcdata value but there might be a
// stackmap for this function. It is likely that we are looking
// at the function prologue, assume so and hope for the best.
pcdata = 0
}
// Scan local variables if stack frame has been allocated.
size := frame.varp - frame.sp
var minsize uintptr
if thechar != '6' && thechar != '8' {
minsize = ptrSize
} else {
minsize = 0
}
if size > minsize {
stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
if stkmap == nil || stkmap.n <= 0 {
print("runtime: frame ", gofuncname(f), " untyped locals ", hex(frame.varp-size), "+", hex(size), "\n")
gothrow("missing stackmap")
}
// Locals bitmap information, scan just the pointers in locals.
if pcdata < 0 || pcdata >= stkmap.n {
// don't know where we are
print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " locals stack map entries for ", gofuncname(f), " (targetpc=", targetpc, ")\n")
gothrow("scanframe: bad symbol table")
}
bv := stackmapdata(stkmap, pcdata)
size = (uintptr(bv.n) * ptrSize) / bitsPerPointer
scanblock(frame.varp-size, uintptr(bv.n)/bitsPerPointer*ptrSize, bv.bytedata)
}
// Scan arguments.
if frame.arglen > 0 {
var bv bitvector
if frame.argmap != nil {
bv = *frame.argmap
} else {
stkmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
if stkmap == nil || stkmap.n <= 0 {
print("runtime: frame ", gofuncname(f), " untyped args ", hex(frame.argp), "+", hex(frame.arglen), "\n")
gothrow("missing stackmap")
}
if pcdata < 0 || pcdata >= stkmap.n {
// don't know where we are
print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " args stack map entries for ", gofuncname(f), " (targetpc=", targetpc, ")\n")
gothrow("scanframe: bad symbol table")
}
bv = stackmapdata(stkmap, pcdata)
}
scanblock(frame.argp, uintptr(bv.n)/bitsPerPointer*ptrSize, bv.bytedata)
}
return true
}
func scanstack(gp *g) {
// TODO(rsc): Due to a precedence error, this was never checked in the original C version.
// If you enable the check, the gothrow happens.
/*
if readgstatus(gp)&_Gscan == 0 {
print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
gothrow("mark - bad status")
}
*/
switch readgstatus(gp) &^ _Gscan {
default:
print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
gothrow("mark - bad status")
case _Gdead:
return
case _Grunning:
print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
gothrow("mark - world not stopped")
case _Grunnable, _Gsyscall, _Gwaiting:
// ok
}
if gp == getg() {
gothrow("can't scan our own stack")
}
mp := gp.m
if mp != nil && mp.helpgc != 0 {
gothrow("can't scan gchelper stack")
}
gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0)
tracebackdefers(gp, scanframe, nil)
}
// The gp has been moved to a gc safepoint. If there is gcphase specific
// work it is done here.
func gcphasework(gp *g) {
switch gcphase {
default:
gothrow("gcphasework in bad gcphase")
case _GCoff, _GCquiesce, _GCstw, _GCsweep:
// No work for now.
case _GCmark:
// Disabled until concurrent GC is implemented
// but indicate the scan has been done.
// scanstack(gp);
}
gp.gcworkdone = true
}
var finalizer1 = [...]byte{
// Each Finalizer is 5 words, ptr ptr uintptr ptr ptr.
// Each byte describes 4 words.
// Need 4 Finalizers described by 5 bytes before pattern repeats:
// ptr ptr uintptr ptr ptr
// ptr ptr uintptr ptr ptr
// ptr ptr uintptr ptr ptr
// ptr ptr uintptr ptr ptr
// aka
// ptr ptr uintptr ptr
// ptr ptr ptr uintptr
// ptr ptr ptr ptr
// uintptr ptr ptr ptr
// ptr uintptr ptr ptr
// Assumptions about Finalizer layout checked below.
bitsPointer | bitsPointer<<2 | bitsScalar<<4 | bitsPointer<<6,
bitsPointer | bitsPointer<<2 | bitsPointer<<4 | bitsScalar<<6,
bitsPointer | bitsPointer<<2 | bitsPointer<<4 | bitsPointer<<6,
bitsScalar | bitsPointer<<2 | bitsPointer<<4 | bitsPointer<<6,
bitsPointer | bitsScalar<<2 | bitsPointer<<4 | bitsPointer<<6,
}
func queuefinalizer(p unsafe.Pointer, fn *funcval, nret uintptr, fint *_type, ot *ptrtype) {
lock(&finlock)
if finq == nil || finq.cnt == finq.cap {
if finc == nil {
finc = (*finblock)(persistentalloc(_FinBlockSize, 0, &memstats.gc_sys))
finc.cap = int32((_FinBlockSize-unsafe.Sizeof(finblock{}))/unsafe.Sizeof(finalizer{}) + 1)
finc.alllink = allfin
allfin = finc
if finptrmask[0] == 0 {
// Build pointer mask for Finalizer array in block.
// Check assumptions made in finalizer1 array above.
if (unsafe.Sizeof(finalizer{}) != 5*ptrSize ||
unsafe.Offsetof(finalizer{}.fn) != 0 ||
unsafe.Offsetof(finalizer{}.arg) != ptrSize ||
unsafe.Offsetof(finalizer{}.nret) != 2*ptrSize ||
unsafe.Offsetof(finalizer{}.fint) != 3*ptrSize ||
unsafe.Offsetof(finalizer{}.ot) != 4*ptrSize ||
bitsPerPointer != 2) {
gothrow("finalizer out of sync")
}
for i := range finptrmask {
finptrmask[i] = finalizer1[i%len(finalizer1)]
}
}
}
block := finc
finc = block.next
block.next = finq
finq = block
}
f := (*finalizer)(add(unsafe.Pointer(&finq.fin[0]), uintptr(finq.cnt)*unsafe.Sizeof(finq.fin[0])))
finq.cnt++
f.fn = fn
f.nret = nret
f.fint = fint
f.ot = ot
f.arg = p
fingwake = true
unlock(&finlock)
}
func iterate_finq(callback func(*funcval, unsafe.Pointer, uintptr, *_type, *ptrtype)) {
for fb := allfin; fb != nil; fb = fb.alllink {
for i := int32(0); i < fb.cnt; i++ {
f := &fb.fin[i]
callback(f.fn, f.arg, f.nret, f.fint, f.ot)
}
}
}
func mSpan_EnsureSwept(s *mspan) {
// Caller must disable preemption.
// Otherwise when this function returns the span can become unswept again
// (if GC is triggered on another goroutine).
_g_ := getg()
if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 {
gothrow("MSpan_EnsureSwept: m is not locked")
}
sg := mheap_.sweepgen
if atomicload(&s.sweepgen) == sg {
return
}
if cas(&s.sweepgen, sg-2, sg-1) {
mSpan_Sweep(s, false)
return
}
// unfortunate condition, and we don't have efficient means to wait
for atomicload(&s.sweepgen) != sg {
osyield()
}
}
// Sweep frees or collects finalizers for blocks not marked in the mark phase.
// It clears the mark bits in preparation for the next GC round.
// Returns true if the span was returned to heap.
// If preserve=true, don't return it to heap nor relink in MCentral lists;
// caller takes care of it.
func mSpan_Sweep(s *mspan, preserve bool) bool {
// It's critical that we enter this function with preemption disabled,
// GC must not start while we are in the middle of this function.
_g_ := getg()
if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 {
gothrow("MSpan_Sweep: m is not locked")
}
sweepgen := mheap_.sweepgen
if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
gothrow("MSpan_Sweep: bad span state")
}
arena_start := mheap_.arena_start
cl := s.sizeclass
size := s.elemsize
var n int32
var npages int32
if cl == 0 {
n = 1
} else {
// Chunk full of small blocks.
npages = class_to_allocnpages[cl]
n = (npages << _PageShift) / int32(size)
}
res := false
nfree := 0
var head mlink
end := &head
c := _g_.m.mcache
sweepgenset := false
// Mark any free objects in this span so we don't collect them.
for link := s.freelist; link != nil; link = link.next {
off := (uintptr(unsafe.Pointer(link)) - arena_start) / ptrSize
bitp := arena_start - off/wordsPerBitmapByte - 1
shift := (off % wordsPerBitmapByte) * gcBits
*(*byte)(unsafe.Pointer(bitp)) |= bitMarked << shift
}
// Unlink & free special records for any objects we're about to free.
specialp := &s.specials
special := *specialp
for special != nil {
// A finalizer can be set for an inner byte of an object, find object beginning.
p := uintptr(s.start<<_PageShift) + uintptr(special.offset)/size*size
off := (p - arena_start) / ptrSize
bitp := arena_start - off/wordsPerBitmapByte - 1
shift := (off % wordsPerBitmapByte) * gcBits
bits := (*(*byte)(unsafe.Pointer(bitp)) >> shift) & bitMask
if bits&bitMarked == 0 {
// Find the exact byte for which the special was setup
// (as opposed to object beginning).
p := uintptr(s.start<<_PageShift) + uintptr(special.offset)
// about to free object: splice out special record
y := special
special = special.next
*specialp = special
if !freespecial(y, unsafe.Pointer(p), size, false) {
// stop freeing of object if it has a finalizer
*(*byte)(unsafe.Pointer(bitp)) |= bitMarked << shift
}
} else {
// object is still live: keep special record
specialp = &special.next
special = *specialp
}
}
// Sweep through n objects of given size starting at p.
// This thread owns the span now, so it can manipulate
// the block bitmap without atomic operations.
p := uintptr(s.start << _PageShift)
off := (p - arena_start) / ptrSize
bitp := arena_start - off/wordsPerBitmapByte - 1
shift := uint(0)
step := size / (ptrSize * wordsPerBitmapByte)
// Rewind to the previous quadruple as we move to the next
// in the beginning of the loop.
bitp += step
if step == 0 {
// 8-byte objects.
bitp++
shift = gcBits
}
for ; n > 0; n, p = n-1, p+size {
bitp -= step
if step == 0 {
if shift != 0 {
bitp--
}
shift = gcBits - shift
}
xbits := *(*byte)(unsafe.Pointer(bitp))
bits := (xbits >> shift) & bitMask
// Allocated and marked object, reset bits to allocated.
if bits&bitMarked != 0 {
*(*byte)(unsafe.Pointer(bitp)) &^= bitMarked << shift
continue
}
// At this point we know that we are looking at garbage object
// that needs to be collected.
if debug.allocfreetrace != 0 {
tracefree(unsafe.Pointer(p), size)
}
// Reset to allocated+noscan.
*(*byte)(unsafe.Pointer(bitp)) = uint8(uintptr(xbits&^((bitMarked|bitsMask<<2)<<shift)) | uintptr(bitsDead)<<(shift+2))
if cl == 0 {
// Free large span.
if preserve {
gothrow("can't preserve large span")
}
unmarkspan(p, s.npages<<_PageShift)
s.needzero = 1
// important to set sweepgen before returning it to heap
atomicstore(&s.sweepgen, sweepgen)
sweepgenset = true
// NOTE(rsc,dvyukov): The original implementation of efence
// in CL 22060046 used SysFree instead of SysFault, so that
// the operating system would eventually give the memory
// back to us again, so that an efence program could run
// longer without running out of memory. Unfortunately,
// calling SysFree here without any kind of adjustment of the
// heap data structures means that when the memory does
// come back to us, we have the wrong metadata for it, either in
// the MSpan structures or in the garbage collection bitmap.
// Using SysFault here means that the program will run out of
// memory fairly quickly in efence mode, but at least it won't
// have mysterious crashes due to confused memory reuse.
// It should be possible to switch back to SysFree if we also
// implement and then call some kind of MHeap_DeleteSpan.
if debug.efence > 0 {
s.limit = 0 // prevent mlookup from finding this span
sysFault(unsafe.Pointer(p), size)
} else {
mHeap_Free(&mheap_, s, 1)
}
c.local_nlargefree++
c.local_largefree += size
xadd64(&memstats.next_gc, -int64(size)*int64(gcpercent+100)/100)
res = true
} else {
// Free small object.
if size > 2*ptrSize {
*(*uintptr)(unsafe.Pointer(p + ptrSize)) = uintptrMask & 0xdeaddeaddeaddead // mark as "needs to be zeroed"
} else if size > ptrSize {
*(*uintptr)(unsafe.Pointer(p + ptrSize)) = 0
}
end.next = (*mlink)(unsafe.Pointer(p))
end = end.next
nfree++
}
}
// We need to set s.sweepgen = h.sweepgen only when all blocks are swept,
// because of the potential for a concurrent free/SetFinalizer.
// But we need to set it before we make the span available for allocation
// (return it to heap or mcentral), because allocation code assumes that a
// span is already swept if available for allocation.
if !sweepgenset && nfree == 0 {
// The span must be in our exclusive ownership until we update sweepgen,
// check for potential races.
if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
gothrow("MSpan_Sweep: bad span state after sweep")
}
atomicstore(&s.sweepgen, sweepgen)
}
if nfree > 0 {
c.local_nsmallfree[cl] += uintptr(nfree)
c.local_cachealloc -= intptr(uintptr(nfree) * size)
xadd64(&memstats.next_gc, -int64(nfree)*int64(size)*int64(gcpercent+100)/100)
res = mCentral_FreeSpan(&mheap_.central[cl].mcentral, s, int32(nfree), head.next, end, preserve)
// MCentral_FreeSpan updates sweepgen
}
return res
}
// State of background sweep.
// Protected by gclock.
type sweepdata struct {
g *g
parked bool
started bool
spanidx uint32 // background sweeper position
nbgsweep uint32
npausesweep uint32
}
var sweep sweepdata
// sweeps one span
// returns number of pages returned to heap, or ^uintptr(0) if there is nothing to sweep
func sweepone() uintptr {
_g_ := getg()
// increment locks to ensure that the goroutine is not preempted
// in the middle of sweep thus leaving the span in an inconsistent state for next GC
_g_.m.locks++
sg := mheap_.sweepgen
for {
idx := xadd(&sweep.spanidx, 1) - 1
if idx >= uint32(len(work.spans)) {
mheap_.sweepdone = 1
_g_.m.locks--
return ^uintptr(0)
}
s := work.spans[idx]
if s.state != mSpanInUse {
s.sweepgen = sg
continue
}
if s.sweepgen != sg-2 || !cas(&s.sweepgen, sg-2, sg-1) {
continue
}
npages := s.npages
if !mSpan_Sweep(s, false) {
npages = 0
}
_g_.m.locks--
return npages
}
}
func gosweepone() uintptr {
var ret uintptr
onM(func() {
ret = sweepone()
})
return ret
}
func gosweepdone() bool {
return mheap_.sweepdone != 0
}
func gchelper() {
_g_ := getg()
_g_.m.traceback = 2
gchelperstart()
// parallel mark for over gc roots
parfordo(work.markfor)
// help other threads scan secondary blocks
scanblock(0, 0, nil)
nproc := work.nproc // work.nproc can change right after we increment work.ndone
if xadd(&work.ndone, +1) == nproc-1 {
notewakeup(&work.alldone)
}
_g_.m.traceback = 0
}
func cachestats() {
for i := 0; ; i++ {
p := allp[i]
if p == nil {
break
}
c := p.mcache
if c == nil {
continue
}
purgecachedstats(c)
}
}
func flushallmcaches() {
for i := 0; ; i++ {
p := allp[i]
if p == nil {
break
}
c := p.mcache
if c == nil {
continue
}
mCache_ReleaseAll(c)
stackcache_clear(c)
}
}
func updatememstats(stats *gcstats) {
if stats != nil {
*stats = gcstats{}
}
for mp := allm; mp != nil; mp = mp.alllink {
if stats != nil {
src := (*[unsafe.Sizeof(gcstats{}) / 8]uint64)(unsafe.Pointer(&mp.gcstats))
dst := (*[unsafe.Sizeof(gcstats{}) / 8]uint64)(unsafe.Pointer(stats))
for i, v := range src {
dst[i] += v
}
mp.gcstats = gcstats{}
}
}
memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse)
memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse)
memstats.sys = memstats.heap_sys + memstats.stacks_sys + memstats.mspan_sys +
memstats.mcache_sys + memstats.buckhash_sys + memstats.gc_sys + memstats.other_sys
// Calculate memory allocator stats.
// During program execution we only count number of frees and amount of freed memory.
// Current number of alive object in the heap and amount of alive heap memory
// are calculated by scanning all spans.
// Total number of mallocs is calculated as number of frees plus number of alive objects.
// Similarly, total amount of allocated memory is calculated as amount of freed memory
// plus amount of alive heap memory.
memstats.alloc = 0
memstats.total_alloc = 0
memstats.nmalloc = 0
memstats.nfree = 0
for i := 0; i < len(memstats.by_size); i++ {
memstats.by_size[i].nmalloc = 0
memstats.by_size[i].nfree = 0
}
// Flush MCache's to MCentral.
onM(flushallmcaches)
// Aggregate local stats.
cachestats()
// Scan all spans and count number of alive objects.
lock(&mheap_.lock)
for i := uint32(0); i < mheap_.nspan; i++ {
s := h_allspans[i]
if s.state != mSpanInUse {
continue
}
if s.sizeclass == 0 {
memstats.nmalloc++
memstats.alloc += uint64(s.elemsize)
} else {
memstats.nmalloc += uint64(s.ref)
memstats.by_size[s.sizeclass].nmalloc += uint64(s.ref)
memstats.alloc += uint64(s.ref) * uint64(s.elemsize)
}
}
unlock(&mheap_.lock)
// Aggregate by size class.
smallfree := uint64(0)
memstats.nfree = mheap_.nlargefree
for i := 0; i < len(memstats.by_size); i++ {
memstats.nfree += mheap_.nsmallfree[i]
memstats.by_size[i].nfree = mheap_.nsmallfree[i]
memstats.by_size[i].nmalloc += mheap_.nsmallfree[i]
smallfree += uint64(mheap_.nsmallfree[i]) * uint64(class_to_size[i])
}
memstats.nfree += memstats.tinyallocs
memstats.nmalloc += memstats.nfree
// Calculate derived stats.
memstats.total_alloc = uint64(memstats.alloc) + uint64(mheap_.largefree) + smallfree
memstats.heap_alloc = memstats.alloc
memstats.heap_objects = memstats.nmalloc - memstats.nfree
}
// Structure of arguments passed to function gc().
// This allows the arguments to be passed via mcall.
type gc_args struct {
start_time int64 // start time of GC in ns (just before stoptheworld)
eagersweep bool
}
func gcinit() {
if unsafe.Sizeof(workbuf{}) != _WorkbufSize {
gothrow("runtime: size of Workbuf is suboptimal")
}
work.markfor = parforalloc(_MaxGcproc)
gcpercent = readgogc()
gcdatamask = unrollglobgcprog((*byte)(unsafe.Pointer(&gcdata)), uintptr(unsafe.Pointer(&edata))-uintptr(unsafe.Pointer(&data)))
gcbssmask = unrollglobgcprog((*byte)(unsafe.Pointer(&gcbss)), uintptr(unsafe.Pointer(&ebss))-uintptr(unsafe.Pointer(&bss)))
}
func gc_m() {
_g_ := getg()
gp := _g_.m.curg
casgstatus(gp, _Grunning, _Gwaiting)
gp.waitreason = "garbage collection"
var a gc_args
a.start_time = int64(_g_.m.scalararg[0]) | int64(uintptr(_g_.m.scalararg[1]))<<32
a.eagersweep = _g_.m.scalararg[2] != 0
gc(&a)
if nbadblock > 0 {
// Work out path from root to bad block.
for {
gc(&a)
if nbadblock >= int32(len(badblock)) {
gothrow("cannot find path to bad pointer")
}
}
}
casgstatus(gp, _Gwaiting, _Grunning)
}
func gc(args *gc_args) {
if _DebugGCPtrs {
print("GC start\n")
}
if debug.allocfreetrace > 0 {
tracegc()
}
_g_ := getg()
_g_.m.traceback = 2
t0 := args.start_time
work.tstart = args.start_time
var t1 int64
if debug.gctrace > 0 {
t1 = nanotime()
}
// Sweep what is not sweeped by bgsweep.
for sweepone() != ^uintptr(0) {
sweep.npausesweep++
}
// Cache runtime.mheap_.allspans in work.spans to avoid conflicts with
// resizing/freeing allspans.
// New spans can be created while GC progresses, but they are not garbage for
// this round:
// - new stack spans can be created even while the world is stopped.
// - new malloc spans can be created during the concurrent sweep
// Even if this is stop-the-world, a concurrent exitsyscall can allocate a stack from heap.
lock(&mheap_.lock)
// Free the old cached sweep array if necessary.
if work.spans != nil && &work.spans[0] != &h_allspans[0] {
sysFree(unsafe.Pointer(&work.spans[0]), uintptr(len(work.spans))*unsafe.Sizeof(work.spans[0]), &memstats.other_sys)
}
// Cache the current array for marking.
mheap_.gcspans = mheap_.allspans
work.spans = h_allspans
unlock(&mheap_.lock)
work.nwait = 0
work.ndone = 0
work.nproc = uint32(gcprocs())
parforsetup(work.markfor, work.nproc, uint32(_RootCount+allglen), nil, false, markroot)
if work.nproc > 1 {
noteclear(&work.alldone)
helpgc(int32(work.nproc))
}
var t2 int64
if debug.gctrace > 0 {
t2 = nanotime()
}
gchelperstart()
parfordo(work.markfor)
scanblock(0, 0, nil)
var t3 int64
if debug.gctrace > 0 {
t3 = nanotime()
}
if work.nproc > 1 {
notesleep(&work.alldone)
}
shrinkfinish()
cachestats()
// next_gc calculation is tricky with concurrent sweep since we don't know size of live heap
// estimate what was live heap size after previous GC (for printing only)
heap0 := memstats.next_gc * 100 / (uint64(gcpercent) + 100)
// conservatively set next_gc to high value assuming that everything is live
// concurrent/lazy sweep will reduce this number while discovering new garbage
memstats.next_gc = memstats.heap_alloc + memstats.heap_alloc*uint64(gcpercent)/100
t4 := nanotime()
atomicstore64(&memstats.last_gc, uint64(unixnanotime())) // must be Unix time to make sense to user
memstats.pause_ns[memstats.numgc%uint32(len(memstats.pause_ns))] = uint64(t4 - t0)
memstats.pause_end[memstats.numgc%uint32(len(memstats.pause_end))] = uint64(t4)
memstats.pause_total_ns += uint64(t4 - t0)
memstats.numgc++
if memstats.debuggc {
print("pause ", t4-t0, "\n")
}
if debug.gctrace > 0 {
heap1 := memstats.heap_alloc
var stats gcstats
updatememstats(&stats)
if heap1 != memstats.heap_alloc {
print("runtime: mstats skew: heap=", heap1, "/", memstats.heap_alloc, "\n")
gothrow("mstats skew")
}
obj := memstats.nmalloc - memstats.nfree
stats.nprocyield += work.markfor.nprocyield
stats.nosyield += work.markfor.nosyield
stats.nsleep += work.markfor.nsleep
print("gc", memstats.numgc, "(", work.nproc, "): ",
(t1-t0)/1000, "+", (t2-t1)/1000, "+", (t3-t2)/1000, "+", (t4-t3)/1000, " us, ",
heap0>>20, " -> ", heap1>>20, " MB, ",
obj, " (", memstats.nmalloc, "-", memstats.nfree, ") objects, ",
gcount(), " goroutines, ",
len(work.spans), "/", sweep.nbgsweep, "/", sweep.npausesweep, " sweeps, ",
stats.nhandoff, "(", stats.nhandoffcnt, ") handoff, ",
work.markfor.nsteal, "(", work.markfor.nstealcnt, ") steal, ",
stats.nprocyield, "/", stats.nosyield, "/", stats.nsleep, " yields\n")
sweep.nbgsweep = 0
sweep.npausesweep = 0
}
// See the comment in the beginning of this function as to why we need the following.
// Even if this is still stop-the-world, a concurrent exitsyscall can allocate a stack from heap.
lock(&mheap_.lock)
// Free the old cached mark array if necessary.
if work.spans != nil && &work.spans[0] != &h_allspans[0] {
sysFree(unsafe.Pointer(&work.spans[0]), uintptr(len(work.spans))*unsafe.Sizeof(work.spans[0]), &memstats.other_sys)
}
// Cache the current array for sweeping.
mheap_.gcspans = mheap_.allspans
mheap_.sweepgen += 2
mheap_.sweepdone = 0
work.spans = h_allspans
sweep.spanidx = 0
unlock(&mheap_.lock)
if _ConcurrentSweep && !args.eagersweep {
lock(&gclock)
if !sweep.started {
go bgsweep()
sweep.started = true
} else if sweep.parked {
sweep.parked = false
ready(sweep.g)
}
unlock(&gclock)
} else {
// Sweep all spans eagerly.
for sweepone() != ^uintptr(0) {
sweep.npausesweep++
}
// Do an additional mProf_GC, because all 'free' events are now real as well.
mProf_GC()
}
mProf_GC()
_g_.m.traceback = 0
if _DebugGCPtrs {
print("GC end\n")
}
}
func readmemstats_m() {
_g_ := getg()
stats := (*mstats)(_g_.m.ptrarg[0])
_g_.m.ptrarg[0] = nil
updatememstats(nil)
// Size of the trailing by_size array differs between Go and C,
// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
memmove(unsafe.Pointer(stats), unsafe.Pointer(&memstats), sizeof_C_MStats)
// Stack numbers are part of the heap numbers, separate those out for user consumption
stats.stacks_sys = stats.stacks_inuse
stats.heap_inuse -= stats.stacks_inuse
stats.heap_sys -= stats.stacks_inuse
}
//go:linkname readGCStats runtime/debug.readGCStats
func readGCStats(pauses *[]uint64) {
onM(func() {
readGCStats_m(pauses)
})
}
func readGCStats_m(pauses *[]uint64) {
p := *pauses
// Calling code in runtime/debug should make the slice large enough.
if cap(p) < len(memstats.pause_ns)+3 {
gothrow("runtime: short slice passed to readGCStats")
}
// Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns.
lock(&mheap_.lock)
n := memstats.numgc
if n > uint32(len(memstats.pause_ns)) {
n = uint32(len(memstats.pause_ns))
}
// The pause buffer is circular. The most recent pause is at
// pause_ns[(numgc-1)%len(pause_ns)], and then backward
// from there to go back farther in time. We deliver the times
// most recent first (in p[0]).
p = p[:cap(p)]
for i := uint32(0); i < n; i++ {
j := (memstats.numgc - 1 - i) % uint32(len(memstats.pause_ns))
p[i] = memstats.pause_ns[j]
p[n+i] = memstats.pause_end[j]
}
p[n+n] = memstats.last_gc
p[n+n+1] = uint64(memstats.numgc)
p[n+n+2] = memstats.pause_total_ns
unlock(&mheap_.lock)
*pauses = p[:n+n+3]
}
func setGCPercent(in int32) (out int32) {
lock(&mheap_.lock)
out = gcpercent
if in < 0 {
in = -1
}
gcpercent = in
unlock(&mheap_.lock)
return out
}
func gchelperstart() {
_g_ := getg()
if _g_.m.helpgc < 0 || _g_.m.helpgc >= _MaxGcproc {
gothrow("gchelperstart: bad m->helpgc")
}
if _g_ != _g_.m.g0 {
gothrow("gchelper not running on g0 stack")
}
}
func wakefing() *g {
var res *g
lock(&finlock)
if fingwait && fingwake {
fingwait = false
fingwake = false
res = fing
}
unlock(&finlock)
return res
}
func addb(p *byte, n uintptr) *byte {
return (*byte)(add(unsafe.Pointer(p), n))
}
// Recursively unrolls GC program in prog.
// mask is where to store the result.
// ppos is a pointer to position in mask, in bits.
// sparse says to generate 4-bits per word mask for heap (2-bits for data/bss otherwise).
func unrollgcprog1(maskp *byte, prog *byte, ppos *uintptr, inplace, sparse bool) *byte {
arena_start := mheap_.arena_start
pos := *ppos
mask := (*[1 << 30]byte)(unsafe.Pointer(maskp))
for {
switch *prog {
default:
gothrow("unrollgcprog: unknown instruction")
case insData:
prog = addb(prog, 1)
siz := int(*prog)
prog = addb(prog, 1)
p := (*[1 << 30]byte)(unsafe.Pointer(prog))
for i := 0; i < siz; i++ {
v := p[i/_PointersPerByte]
v >>= (uint(i) % _PointersPerByte) * _BitsPerPointer
v &= _BitsMask
if inplace {
// Store directly into GC bitmap.
off := (uintptr(unsafe.Pointer(&mask[pos])) - arena_start) / ptrSize
bitp := (*byte)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1))
shift := (off % wordsPerBitmapByte) * gcBits
if shift == 0 {
*bitp = 0
}
*bitp |= v << (shift + 2)
pos += ptrSize
} else if sparse {
// 4-bits per word
v <<= (pos % 8) + 2
mask[pos/8] |= v
pos += gcBits
} else {
// 2-bits per word
v <<= pos % 8
mask[pos/8] |= v
pos += _BitsPerPointer
}
}
prog = addb(prog, round(uintptr(siz)*_BitsPerPointer, 8)/8)
case insArray:
prog = (*byte)(add(unsafe.Pointer(prog), 1))
siz := uintptr(0)
for i := uintptr(0); i < ptrSize; i++ {
siz = (siz << 8) + uintptr(*(*byte)(add(unsafe.Pointer(prog), ptrSize-i-1)))
}
prog = (*byte)(add(unsafe.Pointer(prog), ptrSize))
var prog1 *byte
for i := uintptr(0); i < siz; i++ {
prog1 = unrollgcprog1(&mask[0], prog, &pos, inplace, sparse)
}
if *prog1 != insArrayEnd {
gothrow("unrollgcprog: array does not end with insArrayEnd")
}
prog = (*byte)(add(unsafe.Pointer(prog1), 1))
case insArrayEnd, insEnd:
*ppos = pos
return prog
}
}
}
// Unrolls GC program prog for data/bss, returns dense GC mask.
func unrollglobgcprog(prog *byte, size uintptr) bitvector {
masksize := round(round(size, ptrSize)/ptrSize*bitsPerPointer, 8) / 8
mask := (*[1 << 30]byte)(persistentalloc(masksize+1, 0, &memstats.gc_sys))
mask[masksize] = 0xa1
pos := uintptr(0)
prog = unrollgcprog1(&mask[0], prog, &pos, false, false)
if pos != size/ptrSize*bitsPerPointer {
print("unrollglobgcprog: bad program size, got ", pos, ", expect ", size/ptrSize*bitsPerPointer, "\n")
gothrow("unrollglobgcprog: bad program size")
}
if *prog != insEnd {
gothrow("unrollglobgcprog: program does not end with insEnd")
}
if mask[masksize] != 0xa1 {
gothrow("unrollglobgcprog: overflow")
}
return bitvector{int32(masksize * 8), &mask[0]}
}
func unrollgcproginplace_m() {
_g_ := getg()
v := _g_.m.ptrarg[0]
typ := (*_type)(_g_.m.ptrarg[1])
size := _g_.m.scalararg[0]
size0 := _g_.m.scalararg[1]
_g_.m.ptrarg[0] = nil
_g_.m.ptrarg[1] = nil
pos := uintptr(0)
prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1])))
for pos != size0 {
unrollgcprog1((*byte)(v), prog, &pos, true, true)
}
// Mark first word as bitAllocated.
arena_start := mheap_.arena_start
off := (uintptr(v) - arena_start) / ptrSize
bitp := (*byte)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1))
shift := (off % wordsPerBitmapByte) * gcBits
*bitp |= bitBoundary << shift
// Mark word after last as BitsDead.
if size0 < size {
off := (uintptr(v) + size0 - arena_start) / ptrSize
bitp := (*byte)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1))
shift := (off % wordsPerBitmapByte) * gcBits
*bitp &= uint8(^(bitPtrMask << shift) | uintptr(bitsDead)<<(shift+2))
}
}
var unroll mutex
// Unrolls GC program in typ.gc[1] into typ.gc[0]
func unrollgcprog_m() {
_g_ := getg()
typ := (*_type)(_g_.m.ptrarg[0])
_g_.m.ptrarg[0] = nil
lock(&unroll)
mask := (*byte)(unsafe.Pointer(uintptr(typ.gc[0])))
if *mask == 0 {
pos := uintptr(8) // skip the unroll flag
prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1])))
prog = unrollgcprog1(mask, prog, &pos, false, true)
if *prog != insEnd {
gothrow("unrollgcprog: program does not end with insEnd")
}
if typ.size/ptrSize%2 != 0 {
// repeat the program
prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1])))
unrollgcprog1(mask, prog, &pos, false, true)
}
// atomic way to say mask[0] = 1
x := *(*uintptr)(unsafe.Pointer(mask))
*(*byte)(unsafe.Pointer(&x)) = 1
atomicstoreuintptr((*uintptr)(unsafe.Pointer(mask)), x)
}
unlock(&unroll)
}
// mark the span of memory at v as having n blocks of the given size.
// if leftover is true, there is left over space at the end of the span.
func markspan(v unsafe.Pointer, size uintptr, n uintptr, leftover bool) {
if uintptr(v)+size*n > mheap_.arena_used || uintptr(v) < mheap_.arena_start {
gothrow("markspan: bad pointer")
}
// Find bits of the beginning of the span.
off := (uintptr(v) - uintptr(mheap_.arena_start)) / ptrSize
if off%wordsPerBitmapByte != 0 {
gothrow("markspan: unaligned length")
}
b := mheap_.arena_start - off/wordsPerBitmapByte - 1
// Okay to use non-atomic ops here, because we control
// the entire span, and each bitmap byte has bits for only
// one span, so no other goroutines are changing these bitmap words.
if size == ptrSize {
// Possible only on 64-bits (minimal size class is 8 bytes).
// Set memory to 0x11.
if (bitBoundary|bitsDead)<<gcBits|bitBoundary|bitsDead != 0x11 {
gothrow("markspan: bad bits")
}
if n%(wordsPerBitmapByte*ptrSize) != 0 {
gothrow("markspan: unaligned length")
}
b = b - n/wordsPerBitmapByte + 1 // find first byte
if b%ptrSize != 0 {
gothrow("markspan: unaligned pointer")
}
for i := uintptr(0); i < n; i, b = i+wordsPerBitmapByte*ptrSize, b+ptrSize {
*(*uintptr)(unsafe.Pointer(b)) = uintptrMask & 0x1111111111111111 // bitBoundary | bitsDead, repeated
}
return
}
if leftover {
n++ // mark a boundary just past end of last block too
}
step := size / (ptrSize * wordsPerBitmapByte)
for i := uintptr(0); i < n; i, b = i+1, b-step {
*(*byte)(unsafe.Pointer(b)) = bitBoundary | bitsDead<<2
}
}
// unmark the span of memory at v of length n bytes.
func unmarkspan(v, n uintptr) {
if v+n > mheap_.arena_used || v < mheap_.arena_start {
gothrow("markspan: bad pointer")
}
off := (v - mheap_.arena_start) / ptrSize // word offset
if off%(ptrSize*wordsPerBitmapByte) != 0 {
gothrow("markspan: unaligned pointer")
}
b := mheap_.arena_start - off/wordsPerBitmapByte - 1
n /= ptrSize
if n%(ptrSize*wordsPerBitmapByte) != 0 {
gothrow("unmarkspan: unaligned length")
}
// Okay to use non-atomic ops here, because we control
// the entire span, and each bitmap word has bits for only
// one span, so no other goroutines are changing these
// bitmap words.
n /= wordsPerBitmapByte
memclr(unsafe.Pointer(b-n+1), n)
}
func mHeap_MapBits(h *mheap) {
// Caller has added extra mappings to the arena.
// Add extra mappings of bitmap words as needed.
// We allocate extra bitmap pieces in chunks of bitmapChunk.
const bitmapChunk = 8192
n := (h.arena_used - h.arena_start) / (ptrSize * wordsPerBitmapByte)
n = round(n, bitmapChunk)
n = round(n, _PhysPageSize)
if h.bitmap_mapped >= n {
return
}
sysMap(unsafe.Pointer(h.arena_start-n), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys)
h.bitmap_mapped = n
}
func getgcmaskcb(frame *stkframe, ctxt unsafe.Pointer) bool {
target := (*stkframe)(ctxt)
if frame.sp <= target.sp && target.sp < frame.varp {
*target = *frame
return false
}
return true
}
// Returns GC type info for object p for testing.
func getgcmask(p unsafe.Pointer, t *_type, mask **byte, len *uintptr) {
*mask = nil
*len = 0
// data
if uintptr(unsafe.Pointer(&data)) <= uintptr(p) && uintptr(p) < uintptr(unsafe.Pointer(&edata)) {
n := (*ptrtype)(unsafe.Pointer(t)).elem.size
*len = n / ptrSize
*mask = &make([]byte, *len)[0]
for i := uintptr(0); i < n; i += ptrSize {
off := (uintptr(p) + i - uintptr(unsafe.Pointer(&data))) / ptrSize
bits := (*(*byte)(add(unsafe.Pointer(gcdatamask.bytedata), off/pointersPerByte)) >> ((off % pointersPerByte) * bitsPerPointer)) & bitsMask
*(*byte)(add(unsafe.Pointer(*mask), i/ptrSize)) = bits
}
return
}
// bss
if uintptr(unsafe.Pointer(&bss)) <= uintptr(p) && uintptr(p) < uintptr(unsafe.Pointer(&ebss)) {
n := (*ptrtype)(unsafe.Pointer(t)).elem.size
*len = n / ptrSize
*mask = &make([]byte, *len)[0]
for i := uintptr(0); i < n; i += ptrSize {
off := (uintptr(p) + i - uintptr(unsafe.Pointer(&bss))) / ptrSize
bits := (*(*byte)(add(unsafe.Pointer(gcbssmask.bytedata), off/pointersPerByte)) >> ((off % pointersPerByte) * bitsPerPointer)) & bitsMask
*(*byte)(add(unsafe.Pointer(*mask), i/ptrSize)) = bits
}
return
}
// heap
var n uintptr
var base uintptr
if mlookup(uintptr(p), &base, &n, nil) != 0 {
*len = n / ptrSize
*mask = &make([]byte, *len)[0]
for i := uintptr(0); i < n; i += ptrSize {
off := (uintptr(base) + i - mheap_.arena_start) / ptrSize
b := mheap_.arena_start - off/wordsPerBitmapByte - 1
shift := (off % wordsPerBitmapByte) * gcBits
bits := (*(*byte)(unsafe.Pointer(b)) >> (shift + 2)) & bitsMask
*(*byte)(add(unsafe.Pointer(*mask), i/ptrSize)) = bits
}
return
}
// stack
var frame stkframe
frame.sp = uintptr(p)
_g_ := getg()
gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0)
if frame.fn != nil {
f := frame.fn
targetpc := frame.continpc
if targetpc == 0 {
return
}
if targetpc != f.entry {
targetpc--
}
pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc)
if pcdata == -1 {
return
}
stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
if stkmap == nil || stkmap.n <= 0 {
return
}
bv := stackmapdata(stkmap, pcdata)
size := uintptr(bv.n) / bitsPerPointer * ptrSize
n := (*ptrtype)(unsafe.Pointer(t)).elem.size
*len = n / ptrSize
*mask = &make([]byte, *len)[0]
for i := uintptr(0); i < n; i += ptrSize {
off := (uintptr(p) + i - frame.varp + size) / ptrSize
bits := ((*(*byte)(add(unsafe.Pointer(bv.bytedata), off*bitsPerPointer/8))) >> ((off * bitsPerPointer) % 8)) & bitsMask
*(*byte)(add(unsafe.Pointer(*mask), i/ptrSize)) = bits
}
}
}
func unixnanotime() int64 {
var now int64
gc_unixnanotime(&now)
return now
}
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Garbage collector (GC).
//
// GC is:
// - mark&sweep
// - mostly precise (with the exception of some C-allocated objects, assembly frames/arguments, etc)
// - parallel (up to MaxGcproc threads)
// - partially concurrent (mark is stop-the-world, while sweep is concurrent)
// - non-moving/non-compacting
// - full (non-partial)
//
// GC rate.
// Next GC is after we've allocated an extra amount of memory proportional to
// the amount already in use. The proportion is controlled by GOGC environment variable
// (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M
// (this mark is tracked in next_gc variable). This keeps the GC cost in linear
// proportion to the allocation cost. Adjusting GOGC just changes the linear constant
// (and also the amount of extra memory used).
//
// Concurrent sweep.
// The sweep phase proceeds concurrently with normal program execution.
// The heap is swept span-by-span both lazily (when a goroutine needs another span)
// and concurrently in a background goroutine (this helps programs that are not CPU bound).
// However, at the end of the stop-the-world GC phase we don't know the size of the live heap,
// and so next_gc calculation is tricky and happens as follows.
// At the end of the stop-the-world phase next_gc is conservatively set based on total
// heap size; all spans are marked as "needs sweeping".
// Whenever a span is swept, next_gc is decremented by GOGC*newly_freed_memory.
// The background sweeper goroutine simply sweeps spans one-by-one bringing next_gc
// closer to the target value. However, this is not enough to avoid over-allocating memory.
// Consider that a goroutine wants to allocate a new span for a large object and
// there are no free swept spans, but there are small-object unswept spans.
// If the goroutine naively allocates a new span, it can surpass the yet-unknown
// target next_gc value. In order to prevent such cases (1) when a goroutine needs
// to allocate a new small-object span, it sweeps small-object spans for the same
// object size until it frees at least one object; (2) when a goroutine needs to
// allocate large-object span from heap, it sweeps spans until it frees at least
// that many pages into heap. Together these two measures ensure that we don't surpass
// target next_gc value by a large margin. There is an exception: if a goroutine sweeps
// and frees two nonadjacent one-page spans to the heap, it will allocate a new two-page span,
// but there can still be other one-page unswept spans which could be combined into a two-page span.
// It's critical to ensure that no operations proceed on unswept spans (that would corrupt
// mark bits in GC bitmap). During GC all mcaches are flushed into the central cache,
// so they are empty. When a goroutine grabs a new span into mcache, it sweeps it.
// When a goroutine explicitly frees an object or sets a finalizer, it ensures that
// the span is swept (either by sweeping it, or by waiting for the concurrent sweep to finish).
// The finalizer goroutine is kicked off only when all spans are swept.
// When the next GC starts, it sweeps all not-yet-swept spans (if any).
#include "runtime.h"
#include "arch_GOARCH.h"
#include "malloc.h"
#include "stack.h"
#include "mgc0.h"
#include "chan.h"
#include "race.h"
#include "type.h"
#include "typekind.h"
#include "funcdata.h"
#include "textflag.h"
enum {
Debug = 0,
DebugPtrs = 0, // if 1, print trace of every pointer load during GC
ConcurrentSweep = 1,
WorkbufSize = 4*1024,
FinBlockSize = 4*1024,
RootData = 0,
RootBss = 1,
RootFinalizers = 2,
RootSpans = 3,
RootFlushCaches = 4,
RootCount = 5,
};
// ptrmask for an allocation containing a single pointer.
static byte oneptr[] = {BitsPointer};
// Initialized from $GOGC. GOGC=off means no gc.
extern int32 runtime·gcpercent;
// Holding worldsema grants an M the right to try to stop the world.
// The procedure is:
//
// runtime·semacquire(&runtime·worldsema);
// m->gcing = 1;
// runtime·stoptheworld();
//
// ... do stuff ...
//
// m->gcing = 0;
// runtime·semrelease(&runtime·worldsema);
// runtime·starttheworld();
//
uint32 runtime·worldsema = 1;
typedef struct Workbuf Workbuf;
struct Workbuf
{
LFNode node; // must be first
uintptr nobj;
byte* obj[(WorkbufSize-sizeof(LFNode)-sizeof(uintptr))/PtrSize];
};
extern byte runtime·data[];
extern byte runtime·edata[];
extern byte runtime·bss[];
extern byte runtime·ebss[];
extern byte runtime·gcdata[];
extern byte runtime·gcbss[];
Mutex runtime·finlock; // protects the following variables
G* runtime·fing; // goroutine that runs finalizers
FinBlock* runtime·finq; // list of finalizers that are to be executed
FinBlock* runtime·finc; // cache of free blocks
static byte finptrmask[FinBlockSize/PtrSize/PointersPerByte];
bool runtime·fingwait;
bool runtime·fingwake;
FinBlock *runtime·allfin; // list of all blocks
BitVector runtime·gcdatamask;
BitVector runtime·gcbssmask;
Mutex runtime·gclock;
static uintptr badblock[1024];
static int32 nbadblock;
static Workbuf* getempty(Workbuf*);
static Workbuf* getfull(Workbuf*);
static void putempty(Workbuf*);
static Workbuf* handoff(Workbuf*);
static void gchelperstart(void);
static void flushallmcaches(void);
static bool scanframe(Stkframe *frame, void *unused);
static void scanstack(G *gp);
static BitVector unrollglobgcprog(byte *prog, uintptr size);
void runtime·bgsweep(void);
static FuncVal bgsweepv = {runtime·bgsweep};
typedef struct WorkData WorkData;
struct WorkData {
uint64 full; // lock-free list of full blocks
uint64 empty; // lock-free list of empty blocks
byte pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait
uint32 nproc;
int64 tstart;
volatile uint32 nwait;
volatile uint32 ndone;
Note alldone;
ParFor* markfor;
// Copy of mheap.allspans for marker or sweeper.
MSpan** spans;
uint32 nspan;
};
WorkData runtime·work;
// Is _cgo_allocate linked into the binary?
static bool
have_cgo_allocate(void)
{
extern byte go·weak·runtime·_cgo_allocate_internal[1];
return go·weak·runtime·_cgo_allocate_internal != nil;
}
// scanblock scans a block of n bytes starting at pointer b for references
// to other objects, scanning any it finds recursively until there are no
// unscanned objects left. Instead of using an explicit recursion, it keeps
// a work list in the Workbuf* structures and loops in the main function
// body. Keeping an explicit work list is easier on the stack allocator and
// more efficient.
static void
scanblock(byte *b, uintptr n, byte *ptrmask)
{
byte *obj, *obj0, *p, *arena_start, *arena_used, **wp, *scanbuf[8], *ptrbitp, *bitp;
uintptr i, j, nobj, size, idx, x, off, scanbufpos, bits, xbits, shift;
Workbuf *wbuf;
Iface *iface;
Eface *eface;
Type *typ;
MSpan *s;
pageID k;
bool keepworking;
// Cache memory arena parameters in local vars.
arena_start = runtime·mheap.arena_start;
arena_used = runtime·mheap.arena_used;
wbuf = getempty(nil);
nobj = wbuf->nobj;
wp = &wbuf->obj[nobj];
keepworking = b == nil;
scanbufpos = 0;
for(i = 0; i < nelem(scanbuf); i++)
scanbuf[i] = nil;
ptrbitp = nil;
// ptrmask can have 2 possible values:
// 1. nil - obtain pointer mask from GC bitmap.
// 2. pointer to a compact mask (for stacks and data).
if(b != nil)
goto scanobj;
for(;;) {
if(nobj == 0) {
// Out of work in workbuf.
// First, see is there is any work in scanbuf.
for(i = 0; i < nelem(scanbuf); i++) {
b = scanbuf[scanbufpos];
scanbuf[scanbufpos++] = nil;
scanbufpos %= nelem(scanbuf);
if(b != nil) {
n = arena_used - b; // scan until bitBoundary or BitsDead
ptrmask = nil; // use GC bitmap for pointer info
goto scanobj;
}
}
if(!keepworking) {
putempty(wbuf);
return;
}
// Refill workbuf from global queue.
wbuf = getfull(wbuf);
if(wbuf == nil)
return;
nobj = wbuf->nobj;
wp = &wbuf->obj[nobj];
}
// If another proc wants a pointer, give it some.
if(runtime·work.nwait > 0 && nobj > 4 && runtime·work.full == 0) {
wbuf->nobj = nobj;
wbuf = handoff(wbuf);
nobj = wbuf->nobj;
wp = &wbuf->obj[nobj];
}
wp--;
nobj--;
b = *wp;
n = arena_used - b; // scan until next bitBoundary or BitsDead
ptrmask = nil; // use GC bitmap for pointer info
scanobj:
if(DebugPtrs)
runtime·printf("scanblock %p +%p %p\n", b, n, ptrmask);
// Find bits of the beginning of the object.
if(ptrmask == nil) {
off = (uintptr*)b - (uintptr*)arena_start;
ptrbitp = arena_start - off/wordsPerBitmapByte - 1;
}
for(i = 0; i < n; i += PtrSize) {
obj = nil;
// Find bits for this word.
if(ptrmask == nil) {
// Check is we have reached end of span.
if((((uintptr)b+i)%PageSize) == 0 &&
runtime·mheap.spans[(b-arena_start)>>PageShift] != runtime·mheap.spans[(b+i-arena_start)>>PageShift])
break;
// Consult GC bitmap.
bits = *ptrbitp;
if(wordsPerBitmapByte != 2)
runtime·throw("alg doesn't work for wordsPerBitmapByte != 2");
j = ((uintptr)b+i)/PtrSize & 1;
ptrbitp -= j;
bits >>= gcBits*j;
if((bits&bitBoundary) != 0 && i != 0)
break; // reached beginning of the next object
bits = (bits>>2)&BitsMask;
if(bits == BitsDead)
break; // reached no-scan part of the object
} else // dense mask (stack or data)
bits = (ptrmask[(i/PtrSize)/4]>>(((i/PtrSize)%4)*BitsPerPointer))&BitsMask;
if(bits <= BitsScalar) // BitsScalar || BitsDead
continue;
if(bits == BitsPointer) {
obj = *(byte**)(b+i);
obj0 = obj;
goto markobj;
}
// With those three out of the way, must be multi-word.
if(Debug && bits != BitsMultiWord)
runtime·throw("unexpected garbage collection bits");
// Find the next pair of bits.
if(ptrmask == nil) {
bits = *ptrbitp;
j = ((uintptr)b+i+PtrSize)/PtrSize & 1;
ptrbitp -= j;
bits >>= gcBits*j;
bits = (bits>>2)&BitsMask;
} else
bits = (ptrmask[((i+PtrSize)/PtrSize)/4]>>((((i+PtrSize)/PtrSize)%4)*BitsPerPointer))&BitsMask;
if(Debug && bits != BitsIface && bits != BitsEface)
runtime·throw("unexpected garbage collection bits");
if(bits == BitsIface) {
iface = (Iface*)(b+i);
if(iface->tab != nil) {
typ = iface->tab->type;
if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
obj = iface->data;
}
} else {
eface = (Eface*)(b+i);
typ = eface->type;
if(typ != nil) {
if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
obj = eface->data;
}
}
i += PtrSize;
obj0 = obj;
markobj:
// At this point we have extracted the next potential pointer.
// Check if it points into heap.
if(obj == nil)
continue;
if(obj < arena_start || obj >= arena_used) {
if((uintptr)obj < PhysPageSize && runtime·invalidptr) {
s = nil;
goto badobj;
}
continue;
}
// Mark the object.
obj = (byte*)((uintptr)obj & ~(PtrSize-1));
off = (uintptr*)obj - (uintptr*)arena_start;
bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapByte) * gcBits;
xbits = *bitp;
bits = (xbits >> shift) & bitMask;
if((bits&bitBoundary) == 0) {
// Not a beginning of a block, consult span table to find the block beginning.
k = (uintptr)obj>>PageShift;
x = k;
x -= (uintptr)arena_start>>PageShift;
s = runtime·mheap.spans[x];
if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse) {
// Stack pointers lie within the arena bounds but are not part of the GC heap.
// Ignore them.
if(s != nil && s->state == MSpanStack)
continue;
badobj:
// If cgo_allocate is linked into the binary, it can allocate
// memory as []unsafe.Pointer that may not contain actual
// pointers and must be scanned conservatively.
// In this case alone, allow the bad pointer.
if(have_cgo_allocate() && ptrmask == nil)
continue;
// Anything else indicates a bug somewhere.
// If we're in the middle of chasing down a different bad pointer,
// don't confuse the trace by printing about this one.
if(nbadblock > 0)
continue;
runtime·printf("runtime: garbage collector found invalid heap pointer *(%p+%p)=%p", b, i, obj);
if(s == nil)
runtime·printf(" s=nil\n");
else
runtime·printf(" span=%p-%p-%p state=%d\n", (uintptr)s->start<<PageShift, s->limit, (uintptr)(s->start+s->npages)<<PageShift, s->state);
if(ptrmask != nil)
runtime·throw("invalid heap pointer");
// Add to badblock list, which will cause the garbage collection
// to keep repeating until it has traced the chain of pointers
// leading to obj all the way back to a root.
if(nbadblock == 0)
badblock[nbadblock++] = (uintptr)b;
continue;
}
p = (byte*)((uintptr)s->start<<PageShift);
if(s->sizeclass != 0) {
size = s->elemsize;
idx = ((byte*)obj - p)/size;
p = p+idx*size;
}
if(p == obj) {
runtime·printf("runtime: failed to find block beginning for %p s=%p s->limit=%p\n",
p, s->start*PageSize, s->limit);
runtime·throw("failed to find block beginning");
}
obj = p;
goto markobj;
}
if(DebugPtrs)
runtime·printf("scan *%p = %p => base %p\n", b+i, obj0, obj);
if(nbadblock > 0 && (uintptr)obj == badblock[nbadblock-1]) {
// Running garbage collection again because
// we want to find the path from a root to a bad pointer.
// Found possible next step; extend or finish path.
for(j=0; j<nbadblock; j++)
if(badblock[j] == (uintptr)b)
goto AlreadyBad;
runtime·printf("runtime: found *(%p+%p) = %p+%p\n", b, i, obj0, (uintptr)(obj-obj0));
if(ptrmask != nil)
runtime·throw("bad pointer");
if(nbadblock >= nelem(badblock))
runtime·throw("badblock trace too long");
badblock[nbadblock++] = (uintptr)b;
AlreadyBad:;
}
// Now we have bits, bitp, and shift correct for
// obj pointing at the base of the object.
// Only care about not marked objects.
if((bits&bitMarked) != 0)
continue;
// If obj size is greater than 8, then each byte of GC bitmap
// contains info for at most one object. In such case we use
// non-atomic byte store to mark the object. This can lead
// to double enqueue of the object for scanning, but scanning
// is an idempotent operation, so it is OK. This cannot lead
// to bitmap corruption because the single marked bit is the
// only thing that can change in the byte.
// For 8-byte objects we use non-atomic store, if the other
// quadruple is already marked. Otherwise we resort to CAS
// loop for marking.
if((xbits&(bitMask|(bitMask<<gcBits))) != (bitBoundary|(bitBoundary<<gcBits)) ||
runtime·work.nproc == 1)
*bitp = xbits | (bitMarked<<shift);
else
runtime·atomicor8(bitp, bitMarked<<shift);
if(((xbits>>(shift+2))&BitsMask) == BitsDead)
continue; // noscan object
// Queue the obj for scanning.
PREFETCH(obj);
p = scanbuf[scanbufpos];
scanbuf[scanbufpos++] = obj;
scanbufpos %= nelem(scanbuf);
if(p == nil)
continue;
// If workbuf is full, obtain an empty one.
if(nobj >= nelem(wbuf->obj)) {
wbuf->nobj = nobj;
wbuf = getempty(wbuf);
nobj = wbuf->nobj;
wp = &wbuf->obj[nobj];
}
*wp = p;
wp++;
nobj++;
}
if(DebugPtrs)
runtime·printf("end scanblock %p +%p %p\n", b, n, ptrmask);
if(Debug && ptrmask == nil) {
// For heap objects ensure that we did not overscan.
n = 0;
p = nil;
if(!runtime·mlookup(b, &p, &n, nil) || b != p || i > n) {
runtime·printf("runtime: scanned (%p,%p), heap object (%p,%p)\n", b, i, p, n);
runtime·throw("scanblock: scanned invalid object");
}
}
}
}
static void
markroot(ParFor *desc, uint32 i)
{
FinBlock *fb;
MSpan *s;
uint32 spanidx, sg;
G *gp;
void *p;
uint32 status;
bool restart;
USED(&desc);
// Note: if you add a case here, please also update heapdump.c:dumproots.
switch(i) {
case RootData:
scanblock(runtime·data, runtime·edata - runtime·data, runtime·gcdatamask.bytedata);
break;
case RootBss:
scanblock(runtime·bss, runtime·ebss - runtime·bss, runtime·gcbssmask.bytedata);
break;
case RootFinalizers:
for(fb=runtime·allfin; fb; fb=fb->alllink)
scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), finptrmask);
break;
case RootSpans:
// mark MSpan.specials
sg = runtime·mheap.sweepgen;
for(spanidx=0; spanidx<runtime·work.nspan; spanidx++) {
Special *sp;
SpecialFinalizer *spf;
s = runtime·work.spans[spanidx];
if(s->state != MSpanInUse)
continue;
if(s->sweepgen != sg) {
runtime·printf("sweep %d %d\n", s->sweepgen, sg);
runtime·throw("gc: unswept span");
}
for(sp = s->specials; sp != nil; sp = sp->next) {
if(sp->kind != KindSpecialFinalizer)
continue;
// don't mark finalized object, but scan it so we
// retain everything it points to.
spf = (SpecialFinalizer*)sp;
// A finalizer can be set for an inner byte of an object, find object beginning.
p = (void*)((s->start << PageShift) + spf->special.offset/s->elemsize*s->elemsize);
scanblock(p, s->elemsize, nil);
scanblock((void*)&spf->fn, PtrSize, oneptr);
}
}
break;
case RootFlushCaches:
flushallmcaches();
break;
default:
// the rest is scanning goroutine stacks
if(i - RootCount >= runtime·allglen)
runtime·throw("markroot: bad index");
gp = runtime·allg[i - RootCount];
// remember when we've first observed the G blocked
// needed only to output in traceback
status = runtime·readgstatus(gp);
if((status == Gwaiting || status == Gsyscall) && gp->waitsince == 0)
gp->waitsince = runtime·work.tstart;
// Shrink a stack if not much of it is being used.
runtime·shrinkstack(gp);
if(runtime·readgstatus(gp) == Gdead)
gp->gcworkdone = true;
else
gp->gcworkdone = false;
restart = runtime·stopg(gp);
scanstack(gp);
if(restart)
runtime·restartg(gp);
break;
}
}
// Get an empty work buffer off the work.empty list,
// allocating new buffers as needed.
static Workbuf*
getempty(Workbuf *b)
{
MCache *c;
if(b != nil)
runtime·lfstackpush(&runtime·work.full, &b->node);
b = nil;
c = g->m->mcache;
if(c->gcworkbuf != nil) {
b = c->gcworkbuf;
c->gcworkbuf = nil;
}
if(b == nil)
b = (Workbuf*)runtime·lfstackpop(&runtime·work.empty);
if(b == nil)
b = runtime·persistentalloc(sizeof(*b), CacheLineSize, &mstats.gc_sys);
b->nobj = 0;
return b;
}
static void
putempty(Workbuf *b)
{
MCache *c;
c = g->m->mcache;
if(c->gcworkbuf == nil) {
c->gcworkbuf = b;
return;
}
runtime·lfstackpush(&runtime·work.empty, &b->node);
}
void
runtime·gcworkbuffree(void *b)
{
if(b != nil)
putempty(b);
}
// Get a full work buffer off the work.full list, or return nil.
static Workbuf*
getfull(Workbuf *b)
{
int32 i;
if(b != nil)
runtime·lfstackpush(&runtime·work.empty, &b->node);
b = (Workbuf*)runtime·lfstackpop(&runtime·work.full);
if(b != nil || runtime·work.nproc == 1)
return b;
runtime·xadd(&runtime·work.nwait, +1);
for(i=0;; i++) {
if(runtime·work.full != 0) {
runtime·xadd(&runtime·work.nwait, -1);
b = (Workbuf*)runtime·lfstackpop(&runtime·work.full);
if(b != nil)
return b;
runtime·xadd(&runtime·work.nwait, +1);
}
if(runtime·work.nwait == runtime·work.nproc)
return nil;
if(i < 10) {
g->m->gcstats.nprocyield++;
runtime·procyield(20);
} else if(i < 20) {
g->m->gcstats.nosyield++;
runtime·osyield();
} else {
g->m->gcstats.nsleep++;
runtime·usleep(100);
}
}
}
static Workbuf*
handoff(Workbuf *b)
{
int32 n;
Workbuf *b1;
// Make new buffer with half of b's pointers.
b1 = getempty(nil);
n = b->nobj/2;
b->nobj -= n;
b1->nobj = n;
runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
g->m->gcstats.nhandoff++;
g->m->gcstats.nhandoffcnt += n;
// Put b on full list - let first half of b get stolen.
runtime·lfstackpush(&runtime·work.full, &b->node);
return b1;
}
BitVector
runtime·stackmapdata(StackMap *stackmap, int32 n)
{
if(n < 0 || n >= stackmap->n)
runtime·throw("stackmapdata: index out of range");
return (BitVector){stackmap->nbit, stackmap->bytedata + n*((stackmap->nbit+31)/32*4)};
}
// Scan a stack frame: local variables and function arguments/results.
static bool
scanframe(Stkframe *frame, void *unused)
{
Func *f;
StackMap *stackmap;
BitVector bv;
uintptr size, minsize;
uintptr targetpc;
int32 pcdata;
USED(unused);
f = frame->fn;
targetpc = frame->continpc;
if(targetpc == 0) {
// Frame is dead.
return true;
}
if(Debug > 1)
runtime·printf("scanframe %s\n", runtime·funcname(f));
if(targetpc != f->entry)
targetpc--;
pcdata = runtime·pcdatavalue(f, PCDATA_StackMapIndex, targetpc);
if(pcdata == -1) {
// We do not have a valid pcdata value but there might be a
// stackmap for this function. It is likely that we are looking
// at the function prologue, assume so and hope for the best.
pcdata = 0;
}
// Scan local variables if stack frame has been allocated.
size = frame->varp - frame->sp;
if(thechar != '6' && thechar != '8')
minsize = sizeof(uintptr);
else
minsize = 0;
if(size > minsize) {
stackmap = runtime·funcdata(f, FUNCDATA_LocalsPointerMaps);
if(stackmap == nil || stackmap->n <= 0) {
runtime·printf("runtime: frame %s untyped locals %p+%p\n", runtime·funcname(f), (byte*)(frame->varp-size), size);
runtime·throw("missing stackmap");
}
// Locals bitmap information, scan just the pointers in locals.
if(pcdata < 0 || pcdata >= stackmap->n) {
// don't know where we are
runtime·printf("runtime: pcdata is %d and %d locals stack map entries for %s (targetpc=%p)\n",
pcdata, stackmap->n, runtime·funcname(f), targetpc);
runtime·throw("scanframe: bad symbol table");
}
bv = runtime·stackmapdata(stackmap, pcdata);
size = (bv.n * PtrSize) / BitsPerPointer;
scanblock((byte*)(frame->varp - size), bv.n/BitsPerPointer*PtrSize, bv.bytedata);
}
// Scan arguments.
if(frame->arglen > 0) {
if(frame->argmap != nil)
bv = *frame->argmap;
else {
stackmap = runtime·funcdata(f, FUNCDATA_ArgsPointerMaps);
if(stackmap == nil || stackmap->n <= 0) {
runtime·printf("runtime: frame %s untyped args %p+%p\n", runtime·funcname(f), frame->argp, (uintptr)frame->arglen);
runtime·throw("missing stackmap");
}
if(pcdata < 0 || pcdata >= stackmap->n) {
// don't know where we are
runtime·printf("runtime: pcdata is %d and %d args stack map entries for %s (targetpc=%p)\n",
pcdata, stackmap->n, runtime·funcname(f), targetpc);
runtime·throw("scanframe: bad symbol table");
}
bv = runtime·stackmapdata(stackmap, pcdata);
}
scanblock((byte*)frame->argp, bv.n/BitsPerPointer*PtrSize, bv.bytedata);
}
return true;
}
static void
scanstack(G *gp)
{
M *mp;
bool (*fn)(Stkframe*, void*);
if(runtime·readgstatus(gp)&Gscan == 0) {
runtime·printf("runtime: gp=%p, goid=%D, gp->atomicstatus=%d\n", gp, gp->goid, runtime·readgstatus(gp));
runtime·throw("mark - bad status");
}
switch(runtime·readgstatus(gp)&~Gscan) {
default:
runtime·printf("runtime: gp=%p, goid=%D, gp->atomicstatus=%d\n", gp, gp->goid, runtime·readgstatus(gp));
runtime·throw("mark - bad status");
case Gdead:
return;
case Grunning:
runtime·printf("runtime: gp=%p, goid=%D, gp->atomicstatus=%d\n", gp, gp->goid, runtime·readgstatus(gp));
runtime·throw("mark - world not stopped");
case Grunnable:
case Gsyscall:
case Gwaiting:
break;
}
if(gp == g)
runtime·throw("can't scan our own stack");
if((mp = gp->m) != nil && mp->helpgc)
runtime·throw("can't scan gchelper stack");
fn = scanframe;
runtime·gentraceback(~(uintptr)0, ~(uintptr)0, 0, gp, 0, nil, 0x7fffffff, &fn, nil, 0);
runtime·tracebackdefers(gp, &fn, nil);
}
// The gp has been moved to a gc safepoint. If there is gcphase specific
// work it is done here.
void
runtime·gcphasework(G *gp)
{
switch(runtime·gcphase) {
default:
runtime·throw("gcphasework in bad gcphase");
case GCoff:
case GCquiesce:
case GCstw:
case GCsweep:
// No work for now.
break;
case GCmark:
// Disabled until concurrent GC is implemented
// but indicate the scan has been done.
// scanstack(gp);
break;
}
gp->gcworkdone = true;
}
#pragma dataflag NOPTR
static byte finalizer1[] = {
// Each Finalizer is 5 words, ptr ptr uintptr ptr ptr.
// Each byte describes 4 words.
// Need 4 Finalizers described by 5 bytes before pattern repeats:
// ptr ptr uintptr ptr ptr
// ptr ptr uintptr ptr ptr
// ptr ptr uintptr ptr ptr
// ptr ptr uintptr ptr ptr
// aka
// ptr ptr uintptr ptr
// ptr ptr ptr uintptr
// ptr ptr ptr ptr
// uintptr ptr ptr ptr
// ptr uintptr ptr ptr
// Assumptions about Finalizer layout checked below.
BitsPointer | BitsPointer<<2 | BitsScalar<<4 | BitsPointer<<6,
BitsPointer | BitsPointer<<2 | BitsPointer<<4 | BitsScalar<<6,
BitsPointer | BitsPointer<<2 | BitsPointer<<4 | BitsPointer<<6,
BitsScalar | BitsPointer<<2 | BitsPointer<<4 | BitsPointer<<6,
BitsPointer | BitsScalar<<2 | BitsPointer<<4 | BitsPointer<<6,
};
void
runtime·queuefinalizer(byte *p, FuncVal *fn, uintptr nret, Type *fint, PtrType *ot)
{
FinBlock *block;
Finalizer *f;
int32 i;
runtime·lock(&runtime·finlock);
if(runtime·finq == nil || runtime·finq->cnt == runtime·finq->cap) {
if(runtime·finc == nil) {
runtime·finc = runtime·persistentalloc(FinBlockSize, 0, &mstats.gc_sys);
runtime·finc->cap = (FinBlockSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
runtime·finc->alllink = runtime·allfin;
runtime·allfin = runtime·finc;
if(finptrmask[0] == 0) {
// Build pointer mask for Finalizer array in block.
// Check assumptions made in finalizer1 array above.
if(sizeof(Finalizer) != 5*PtrSize ||
offsetof(Finalizer, fn) != 0 ||
offsetof(Finalizer, arg) != PtrSize ||
offsetof(Finalizer, nret) != 2*PtrSize ||
offsetof(Finalizer, fint) != 3*PtrSize ||
offsetof(Finalizer, ot) != 4*PtrSize ||
BitsPerPointer != 2) {
runtime·throw("finalizer out of sync");
}
for(i=0; i<nelem(finptrmask); i++)
finptrmask[i] = finalizer1[i%nelem(finalizer1)];
}
}
block = runtime·finc;
runtime·finc = block->next;
block->next = runtime·finq;
runtime·finq = block;
}
f = &runtime·finq->fin[runtime·finq->cnt];
runtime·finq->cnt++;
f->fn = fn;
f->nret = nret;
f->fint = fint;
f->ot = ot;
f->arg = p;
runtime·fingwake = true;
runtime·unlock(&runtime·finlock);
}
void
runtime·iterate_finq(void (*callback)(FuncVal*, byte*, uintptr, Type*, PtrType*))
{
FinBlock *fb;
Finalizer *f;
uintptr i;
for(fb = runtime·allfin; fb; fb = fb->alllink) {
for(i = 0; i < fb->cnt; i++) {
f = &fb->fin[i];
callback(f->fn, f->arg, f->nret, f->fint, f->ot);
}
}
}
void
runtime·MSpan_EnsureSwept(MSpan *s)
{
uint32 sg;
// Caller must disable preemption.
// Otherwise when this function returns the span can become unswept again
// (if GC is triggered on another goroutine).
if(g->m->locks == 0 && g->m->mallocing == 0 && g != g->m->g0)
runtime·throw("MSpan_EnsureSwept: m is not locked");
sg = runtime·mheap.sweepgen;
if(runtime·atomicload(&s->sweepgen) == sg)
return;
if(runtime·cas(&s->sweepgen, sg-2, sg-1)) {
runtime·MSpan_Sweep(s, false);
return;
}
// unfortunate condition, and we don't have efficient means to wait
while(runtime·atomicload(&s->sweepgen) != sg)
runtime·osyield();
}
// Sweep frees or collects finalizers for blocks not marked in the mark phase.
// It clears the mark bits in preparation for the next GC round.
// Returns true if the span was returned to heap.
// If preserve=true, don't return it to heap nor relink in MCentral lists;
// caller takes care of it.
bool
runtime·MSpan_Sweep(MSpan *s, bool preserve)
{
int32 cl, n, npages, nfree;
uintptr size, off, step;
uint32 sweepgen;
byte *p, *bitp, shift, xbits, bits;
MCache *c;
byte *arena_start;
MLink head, *end, *link;
Special *special, **specialp, *y;
bool res, sweepgenset;
// It's critical that we enter this function with preemption disabled,
// GC must not start while we are in the middle of this function.
if(g->m->locks == 0 && g->m->mallocing == 0 && g != g->m->g0)
runtime·throw("MSpan_Sweep: m is not locked");
sweepgen = runtime·mheap.sweepgen;
if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
s->state, s->sweepgen, sweepgen);
runtime·throw("MSpan_Sweep: bad span state");
}
arena_start = runtime·mheap.arena_start;
cl = s->sizeclass;
size = s->elemsize;
if(cl == 0) {
n = 1;
} else {
// Chunk full of small blocks.
npages = runtime·class_to_allocnpages[cl];
n = (npages << PageShift) / size;
}
res = false;
nfree = 0;
end = &head;
c = g->m->mcache;
sweepgenset = false;
// Mark any free objects in this span so we don't collect them.
for(link = s->freelist; link != nil; link = link->next) {
off = (uintptr*)link - (uintptr*)arena_start;
bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapByte) * gcBits;
*bitp |= bitMarked<<shift;
}
// Unlink & free special records for any objects we're about to free.
specialp = &s->specials;
special = *specialp;
while(special != nil) {
// A finalizer can be set for an inner byte of an object, find object beginning.
p = (byte*)(s->start << PageShift) + special->offset/size*size;
off = (uintptr*)p - (uintptr*)arena_start;
bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapByte) * gcBits;
bits = (*bitp>>shift) & bitMask;
if((bits&bitMarked) == 0) {
// Find the exact byte for which the special was setup
// (as opposed to object beginning).
p = (byte*)(s->start << PageShift) + special->offset;
// about to free object: splice out special record
y = special;
special = special->next;
*specialp = special;
if(!runtime·freespecial(y, p, size, false)) {
// stop freeing of object if it has a finalizer
*bitp |= bitMarked << shift;
}
} else {
// object is still live: keep special record
specialp = &special->next;
special = *specialp;
}
}
// Sweep through n objects of given size starting at p.
// This thread owns the span now, so it can manipulate
// the block bitmap without atomic operations.
p = (byte*)(s->start << PageShift);
// Find bits for the beginning of the span.
off = (uintptr*)p - (uintptr*)arena_start;
bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = 0;
step = size/(PtrSize*wordsPerBitmapByte);
// Rewind to the previous quadruple as we move to the next
// in the beginning of the loop.
bitp += step;
if(step == 0) {
// 8-byte objects.
bitp++;
shift = gcBits;
}
for(; n > 0; n--, p += size) {
bitp -= step;
if(step == 0) {
if(shift != 0)
bitp--;
shift = gcBits - shift;
}
xbits = *bitp;
bits = (xbits>>shift) & bitMask;
// Allocated and marked object, reset bits to allocated.
if((bits&bitMarked) != 0) {
*bitp &= ~(bitMarked<<shift);
continue;
}
// At this point we know that we are looking at garbage object
// that needs to be collected.
if(runtime·debug.allocfreetrace)
runtime·tracefree(p, size);
// Reset to allocated+noscan.
*bitp = (xbits & ~((bitMarked|(BitsMask<<2))<<shift)) | ((uintptr)BitsDead<<(shift+2));
if(cl == 0) {
// Free large span.
if(preserve)
runtime·throw("can't preserve large span");
runtime·unmarkspan(p, s->npages<<PageShift);
s->needzero = 1;
// important to set sweepgen before returning it to heap
runtime·atomicstore(&s->sweepgen, sweepgen);
sweepgenset = true;
// NOTE(rsc,dvyukov): The original implementation of efence
// in CL 22060046 used SysFree instead of SysFault, so that
// the operating system would eventually give the memory
// back to us again, so that an efence program could run
// longer without running out of memory. Unfortunately,
// calling SysFree here without any kind of adjustment of the
// heap data structures means that when the memory does
// come back to us, we have the wrong metadata for it, either in
// the MSpan structures or in the garbage collection bitmap.
// Using SysFault here means that the program will run out of
// memory fairly quickly in efence mode, but at least it won't
// have mysterious crashes due to confused memory reuse.
// It should be possible to switch back to SysFree if we also
// implement and then call some kind of MHeap_DeleteSpan.
if(runtime·debug.efence) {
s->limit = nil; // prevent mlookup from finding this span
runtime·SysFault(p, size);
} else
runtime·MHeap_Free(&runtime·mheap, s, 1);
c->local_nlargefree++;
c->local_largefree += size;
runtime·xadd64(&mstats.next_gc, -(uint64)(size * (runtime·gcpercent + 100)/100));
res = true;
} else {
// Free small object.
if(size > 2*sizeof(uintptr))
((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll; // mark as "needs to be zeroed"
else if(size > sizeof(uintptr))
((uintptr*)p)[1] = 0;
end->next = (MLink*)p;
end = (MLink*)p;
nfree++;
}
}
// We need to set s->sweepgen = h->sweepgen only when all blocks are swept,
// because of the potential for a concurrent free/SetFinalizer.
// But we need to set it before we make the span available for allocation
// (return it to heap or mcentral), because allocation code assumes that a
// span is already swept if available for allocation.
if(!sweepgenset && nfree == 0) {
// The span must be in our exclusive ownership until we update sweepgen,
// check for potential races.
if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
s->state, s->sweepgen, sweepgen);
runtime·throw("MSpan_Sweep: bad span state after sweep");
}
runtime·atomicstore(&s->sweepgen, sweepgen);
}
if(nfree > 0) {
c->local_nsmallfree[cl] += nfree;
c->local_cachealloc -= nfree * size;
runtime·xadd64(&mstats.next_gc, -(uint64)(nfree * size * (runtime·gcpercent + 100)/100));
res = runtime·MCentral_FreeSpan(&runtime·mheap.central[cl].mcentral, s, nfree, head.next, end, preserve);
// MCentral_FreeSpan updates sweepgen
}
return res;
}
// State of background runtime·sweep.
// Protected by runtime·gclock.
typedef struct SweepData SweepData;
struct SweepData
{
G* g;
bool parked;
uint32 spanidx; // background sweeper position
uint32 nbgsweep;
uint32 npausesweep;
};
SweepData runtime·sweep;
// sweeps one span
// returns number of pages returned to heap, or -1 if there is nothing to sweep
uintptr
runtime·sweepone(void)
{
MSpan *s;
uint32 idx, sg;
uintptr npages;
// increment locks to ensure that the goroutine is not preempted
// in the middle of sweep thus leaving the span in an inconsistent state for next GC
g->m->locks++;
sg = runtime·mheap.sweepgen;
for(;;) {
idx = runtime·xadd(&runtime·sweep.spanidx, 1) - 1;
if(idx >= runtime·work.nspan) {
runtime·mheap.sweepdone = true;
g->m->locks--;
return -1;
}
s = runtime·work.spans[idx];
if(s->state != MSpanInUse) {
s->sweepgen = sg;
continue;
}
if(s->sweepgen != sg-2 || !runtime·cas(&s->sweepgen, sg-2, sg-1))
continue;
npages = s->npages;
if(!runtime·MSpan_Sweep(s, false))
npages = 0;
g->m->locks--;
return npages;
}
}
static void
sweepone_m(void)
{
g->m->scalararg[0] = runtime·sweepone();
}
#pragma textflag NOSPLIT
uintptr
runtime·gosweepone(void)
{
void (*fn)(void);
fn = sweepone_m;
runtime·onM(&fn);
return g->m->scalararg[0];
}
#pragma textflag NOSPLIT
bool
runtime·gosweepdone(void)
{
return runtime·mheap.sweepdone;
}
void
runtime·gchelper(void)
{
uint32 nproc;
g->m->traceback = 2;
gchelperstart();
// parallel mark for over gc roots
runtime·parfordo(runtime·work.markfor);
// help other threads scan secondary blocks
scanblock(nil, 0, nil);
nproc = runtime·work.nproc; // runtime·work.nproc can change right after we increment runtime·work.ndone
if(runtime·xadd(&runtime·work.ndone, +1) == nproc-1)
runtime·notewakeup(&runtime·work.alldone);
g->m->traceback = 0;
}
static void
cachestats(void)
{
MCache *c;
P *p, **pp;
for(pp=runtime·allp; p=*pp; pp++) {
c = p->mcache;
if(c==nil)
continue;
runtime·purgecachedstats(c);
}
}
static void
flushallmcaches(void)
{
P *p, **pp;
MCache *c;
// Flush MCache's to MCentral.
for(pp=runtime·allp; p=*pp; pp++) {
c = p->mcache;
if(c==nil)
continue;
runtime·MCache_ReleaseAll(c);
runtime·stackcache_clear(c);
}
}
static void
flushallmcaches_m(G *gp)
{
flushallmcaches();
runtime·gogo(&gp->sched);
}
void
runtime·updatememstats(GCStats *stats)
{
M *mp;
MSpan *s;
int32 i;
uint64 smallfree;
uint64 *src, *dst;
void (*fn)(G*);
if(stats)
runtime·memclr((byte*)stats, sizeof(*stats));
for(mp=runtime·allm; mp; mp=mp->alllink) {
if(stats) {
src = (uint64*)&mp->gcstats;
dst = (uint64*)stats;
for(i=0; i<sizeof(*stats)/sizeof(uint64); i++)
dst[i] += src[i];
runtime·memclr((byte*)&mp->gcstats, sizeof(mp->gcstats));
}
}
mstats.mcache_inuse = runtime·mheap.cachealloc.inuse;
mstats.mspan_inuse = runtime·mheap.spanalloc.inuse;
mstats.sys = mstats.heap_sys + mstats.stacks_sys + mstats.mspan_sys +
mstats.mcache_sys + mstats.buckhash_sys + mstats.gc_sys + mstats.other_sys;
// Calculate memory allocator stats.
// During program execution we only count number of frees and amount of freed memory.
// Current number of alive object in the heap and amount of alive heap memory
// are calculated by scanning all spans.
// Total number of mallocs is calculated as number of frees plus number of alive objects.
// Similarly, total amount of allocated memory is calculated as amount of freed memory
// plus amount of alive heap memory.
mstats.alloc = 0;
mstats.total_alloc = 0;
mstats.nmalloc = 0;
mstats.nfree = 0;
for(i = 0; i < nelem(mstats.by_size); i++) {
mstats.by_size[i].nmalloc = 0;
mstats.by_size[i].nfree = 0;
}
// Flush MCache's to MCentral.
if(g == g->m->g0)
flushallmcaches();
else {
fn = flushallmcaches_m;
runtime·mcall(&fn);
}
// Aggregate local stats.
cachestats();
// Scan all spans and count number of alive objects.
runtime·lock(&runtime·mheap.lock);
for(i = 0; i < runtime·mheap.nspan; i++) {
s = runtime·mheap.allspans[i];
if(s->state != MSpanInUse)
continue;
if(s->sizeclass == 0) {
mstats.nmalloc++;
mstats.alloc += s->elemsize;
} else {
mstats.nmalloc += s->ref;
mstats.by_size[s->sizeclass].nmalloc += s->ref;
mstats.alloc += s->ref*s->elemsize;
}
}
runtime·unlock(&runtime·mheap.lock);
// Aggregate by size class.
smallfree = 0;
mstats.nfree = runtime·mheap.nlargefree;
for(i = 0; i < nelem(mstats.by_size); i++) {
mstats.nfree += runtime·mheap.nsmallfree[i];
mstats.by_size[i].nfree = runtime·mheap.nsmallfree[i];
mstats.by_size[i].nmalloc += runtime·mheap.nsmallfree[i];
smallfree += runtime·mheap.nsmallfree[i] * runtime·class_to_size[i];
}
mstats.nfree += mstats.tinyallocs;
mstats.nmalloc += mstats.nfree;
// Calculate derived stats.
mstats.total_alloc = mstats.alloc + runtime·mheap.largefree + smallfree;
mstats.heap_alloc = mstats.alloc;
mstats.heap_objects = mstats.nmalloc - mstats.nfree;
}
// Structure of arguments passed to function gc().
// This allows the arguments to be passed via runtime·mcall.
struct gc_args
{
int64 start_time; // start time of GC in ns (just before stoptheworld)
bool eagersweep;
};
static void gc(struct gc_args *args);
int32
runtime·readgogc(void)
{
byte *p;
p = runtime·getenv("GOGC");
if(p == nil || p[0] == '\0')
return 100;
if(runtime·strcmp(p, (byte*)"off") == 0)
return -1;
return runtime·atoi(p);
}
void
runtime·gcinit(void)
{
if(sizeof(Workbuf) != WorkbufSize)
runtime·throw("runtime: size of Workbuf is suboptimal");
runtime·work.markfor = runtime·parforalloc(MaxGcproc);
runtime·gcpercent = runtime·readgogc();
runtime·gcdatamask = unrollglobgcprog(runtime·gcdata, runtime·edata - runtime·data);
runtime·gcbssmask = unrollglobgcprog(runtime·gcbss, runtime·ebss - runtime·bss);
}
void
runtime·gc_m(void)
{
struct gc_args a;
G *gp;
gp = g->m->curg;
runtime·casgstatus(gp, Grunning, Gwaiting);
gp->waitreason = runtime·gostringnocopy((byte*)"garbage collection");
a.start_time = (uint64)(g->m->scalararg[0]) | ((uint64)(g->m->scalararg[1]) << 32);
a.eagersweep = g->m->scalararg[2];
gc(&a);
if(nbadblock > 0) {
// Work out path from root to bad block.
for(;;) {
gc(&a);
if(nbadblock >= nelem(badblock))
runtime·throw("cannot find path to bad pointer");
}
}
runtime·casgstatus(gp, Gwaiting, Grunning);
}
static void
gc(struct gc_args *args)
{
int64 t0, t1, t2, t3, t4;
uint64 heap0, heap1, obj;
GCStats stats;
if(DebugPtrs)
runtime·printf("GC start\n");
if(runtime·debug.allocfreetrace)
runtime·tracegc();
g->m->traceback = 2;
t0 = args->start_time;
runtime·work.tstart = args->start_time;
t1 = 0;
if(runtime·debug.gctrace)
t1 = runtime·nanotime();
// Sweep what is not sweeped by bgsweep.
while(runtime·sweepone() != -1)
runtime·sweep.npausesweep++;
// Cache runtime.mheap.allspans in work.spans to avoid conflicts with
// resizing/freeing allspans.
// New spans can be created while GC progresses, but they are not garbage for
// this round:
// - new stack spans can be created even while the world is stopped.
// - new malloc spans can be created during the concurrent sweep
// Even if this is stop-the-world, a concurrent exitsyscall can allocate a stack from heap.
runtime·lock(&runtime·mheap.lock);
// Free the old cached sweep array if necessary.
if(runtime·work.spans != nil && runtime·work.spans != runtime·mheap.allspans)
runtime·SysFree(runtime·work.spans, runtime·work.nspan*sizeof(runtime·work.spans[0]), &mstats.other_sys);
// Cache the current array for marking.
runtime·mheap.gcspans = runtime·mheap.allspans;
runtime·work.spans = runtime·mheap.allspans;
runtime·work.nspan = runtime·mheap.nspan;
runtime·unlock(&runtime·mheap.lock);
runtime·work.nwait = 0;
runtime·work.ndone = 0;
runtime·work.nproc = runtime·gcprocs();
runtime·parforsetup(runtime·work.markfor, runtime·work.nproc, RootCount + runtime·allglen, nil, false, markroot);
if(runtime·work.nproc > 1) {
runtime·noteclear(&runtime·work.alldone);
runtime·helpgc(runtime·work.nproc);
}
t2 = 0;
if(runtime·debug.gctrace)
t2 = runtime·nanotime();
gchelperstart();
runtime·parfordo(runtime·work.markfor);
scanblock(nil, 0, nil);
t3 = 0;
if(runtime·debug.gctrace)
t3 = runtime·nanotime();
if(runtime·work.nproc > 1)
runtime·notesleep(&runtime·work.alldone);
runtime·shrinkfinish();
cachestats();
// next_gc calculation is tricky with concurrent sweep since we don't know size of live heap
// estimate what was live heap size after previous GC (for tracing only)
heap0 = mstats.next_gc*100/(runtime·gcpercent+100);
// conservatively set next_gc to high value assuming that everything is live
// concurrent/lazy sweep will reduce this number while discovering new garbage
mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*runtime·gcpercent/100;
t4 = runtime·nanotime();
runtime·atomicstore64(&mstats.last_gc, runtime·unixnanotime()); // must be Unix time to make sense to user
mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t4 - t0;
mstats.pause_end[mstats.numgc%nelem(mstats.pause_end)] = t4;
mstats.pause_total_ns += t4 - t0;
mstats.numgc++;
if(mstats.debuggc)
runtime·printf("pause %D\n", t4-t0);
if(runtime·debug.gctrace) {
heap1 = mstats.heap_alloc;
runtime·updatememstats(&stats);
if(heap1 != mstats.heap_alloc) {
runtime·printf("runtime: mstats skew: heap=%D/%D\n", heap1, mstats.heap_alloc);
runtime·throw("mstats skew");
}
obj = mstats.nmalloc - mstats.nfree;
stats.nprocyield += runtime·work.markfor->nprocyield;
stats.nosyield += runtime·work.markfor->nosyield;
stats.nsleep += runtime·work.markfor->nsleep;
runtime·printf("gc%d(%d): %D+%D+%D+%D us, %D -> %D MB, %D (%D-%D) objects,"
" %d goroutines,"
" %d/%d/%d sweeps,"
" %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n",
mstats.numgc, runtime·work.nproc, (t1-t0)/1000, (t2-t1)/1000, (t3-t2)/1000, (t4-t3)/1000,
heap0>>20, heap1>>20, obj,
mstats.nmalloc, mstats.nfree,
runtime·gcount(),
runtime·work.nspan, runtime·sweep.nbgsweep, runtime·sweep.npausesweep,
stats.nhandoff, stats.nhandoffcnt,
runtime·work.markfor->nsteal, runtime·work.markfor->nstealcnt,
stats.nprocyield, stats.nosyield, stats.nsleep);
runtime·sweep.nbgsweep = runtime·sweep.npausesweep = 0;
}
// See the comment in the beginning of this function as to why we need the following.
// Even if this is still stop-the-world, a concurrent exitsyscall can allocate a stack from heap.
runtime·lock(&runtime·mheap.lock);
// Free the old cached mark array if necessary.
if(runtime·work.spans != nil && runtime·work.spans != runtime·mheap.allspans)
runtime·SysFree(runtime·work.spans, runtime·work.nspan*sizeof(runtime·work.spans[0]), &mstats.other_sys);
// Cache the current array for sweeping.
runtime·mheap.gcspans = runtime·mheap.allspans;
runtime·mheap.sweepgen += 2;
runtime·mheap.sweepdone = false;
runtime·work.spans = runtime·mheap.allspans;
runtime·work.nspan = runtime·mheap.nspan;
runtime·sweep.spanidx = 0;
runtime·unlock(&runtime·mheap.lock);
if(ConcurrentSweep && !args->eagersweep) {
runtime·lock(&runtime·gclock);
if(runtime·sweep.g == nil)
runtime·sweep.g = runtime·newproc1(&bgsweepv, nil, 0, 0, gc);
else if(runtime·sweep.parked) {
runtime·sweep.parked = false;
runtime·ready(runtime·sweep.g);
}
runtime·unlock(&runtime·gclock);
} else {
// Sweep all spans eagerly.
while(runtime·sweepone() != -1)
runtime·sweep.npausesweep++;
// Do an additional mProf_GC, because all 'free' events are now real as well.
runtime·mProf_GC();
}
runtime·mProf_GC();
g->m->traceback = 0;
if(DebugPtrs)
runtime·printf("GC end\n");
}
extern uintptr runtime·sizeof_C_MStats;
static void readmemstats_m(void);
void
runtime·readmemstats_m(void)
{
MStats *stats;
stats = g->m->ptrarg[0];
g->m->ptrarg[0] = nil;
runtime·updatememstats(nil);
// Size of the trailing by_size array differs between Go and C,
// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
runtime·memmove(stats, &mstats, runtime·sizeof_C_MStats);
// Stack numbers are part of the heap numbers, separate those out for user consumption
stats->stacks_sys = stats->stacks_inuse;
stats->heap_inuse -= stats->stacks_inuse;
stats->heap_sys -= stats->stacks_inuse;
}
static void readgcstats_m(void);
#pragma textflag NOSPLIT
void
runtimedebug·readGCStats(Slice *pauses)
{
void (*fn)(void);
g->m->ptrarg[0] = pauses;
fn = readgcstats_m;
runtime·onM(&fn);
}
static void
readgcstats_m(void)
{
Slice *pauses;
uint64 *p;
uint32 i, j, n;
pauses = g->m->ptrarg[0];
g->m->ptrarg[0] = nil;
// Calling code in runtime/debug should make the slice large enough.
if(pauses->cap < nelem(mstats.pause_ns)+3)
runtime·throw("runtime: short slice passed to readGCStats");
// Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns.
p = (uint64*)pauses->array;
runtime·lock(&runtime·mheap.lock);
n = mstats.numgc;
if(n > nelem(mstats.pause_ns))
n = nelem(mstats.pause_ns);
// The pause buffer is circular. The most recent pause is at
// pause_ns[(numgc-1)%nelem(pause_ns)], and then backward
// from there to go back farther in time. We deliver the times
// most recent first (in p[0]).
for(i=0; i<n; i++) {
j = (mstats.numgc-1-i)%nelem(mstats.pause_ns);
p[i] = mstats.pause_ns[j];
p[n+i] = mstats.pause_end[j];
}
p[n+n] = mstats.last_gc;
p[n+n+1] = mstats.numgc;
p[n+n+2] = mstats.pause_total_ns;
runtime·unlock(&runtime·mheap.lock);
pauses->len = n+n+3;
}
void
runtime·setgcpercent_m(void)
{
int32 in;
int32 out;
in = (int32)(intptr)g->m->scalararg[0];
runtime·lock(&runtime·mheap.lock);
out = runtime·gcpercent;
if(in < 0)
in = -1;
runtime·gcpercent = in;
runtime·unlock(&runtime·mheap.lock);
g->m->scalararg[0] = (uintptr)(intptr)out;
}
static void
gchelperstart(void)
{
if(g->m->helpgc < 0 || g->m->helpgc >= MaxGcproc)
runtime·throw("gchelperstart: bad m->helpgc");
if(g != g->m->g0)
runtime·throw("gchelper not running on g0 stack");
}
G*
runtime·wakefing(void)
{
G *res;
res = nil;
runtime·lock(&runtime·finlock);
if(runtime·fingwait && runtime·fingwake) {
runtime·fingwait = false;
runtime·fingwake = false;
res = runtime·fing;
}
runtime·unlock(&runtime·finlock);
return res;
}
// Recursively unrolls GC program in prog.
// mask is where to store the result.
// ppos is a pointer to position in mask, in bits.
// sparse says to generate 4-bits per word mask for heap (2-bits for data/bss otherwise).
static byte*
unrollgcprog1(byte *mask, byte *prog, uintptr *ppos, bool inplace, bool sparse)
{
uintptr pos, siz, i, off;
byte *arena_start, *prog1, v, *bitp, shift;
arena_start = runtime·mheap.arena_start;
pos = *ppos;
for(;;) {
switch(prog[0]) {
case insData:
prog++;
siz = prog[0];
prog++;
for(i = 0; i < siz; i++) {
v = prog[i/PointersPerByte];
v >>= (i%PointersPerByte)*BitsPerPointer;
v &= BitsMask;
if(inplace) {
// Store directly into GC bitmap.
off = (uintptr*)(mask+pos) - (uintptr*)arena_start;
bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapByte) * gcBits;
if(shift==0)
*bitp = 0;
*bitp |= v<<(shift+2);
pos += PtrSize;
} else if(sparse) {
// 4-bits per word
v <<= (pos%8)+2;
mask[pos/8] |= v;
pos += gcBits;
} else {
// 2-bits per word
v <<= pos%8;
mask[pos/8] |= v;
pos += BitsPerPointer;
}
}
prog += ROUND(siz*BitsPerPointer, 8)/8;
break;
case insArray:
prog++;
siz = 0;
for(i = 0; i < PtrSize; i++)
siz = (siz<<8) + prog[PtrSize-i-1];
prog += PtrSize;
prog1 = nil;
for(i = 0; i < siz; i++)
prog1 = unrollgcprog1(mask, prog, &pos, inplace, sparse);
if(prog1[0] != insArrayEnd)
runtime·throw("unrollgcprog: array does not end with insArrayEnd");
prog = prog1+1;
break;
case insArrayEnd:
case insEnd:
*ppos = pos;
return prog;
default:
runtime·throw("unrollgcprog: unknown instruction");
}
}
}
// Unrolls GC program prog for data/bss, returns dense GC mask.
static BitVector
unrollglobgcprog(byte *prog, uintptr size)
{
byte *mask;
uintptr pos, masksize;
masksize = ROUND(ROUND(size, PtrSize)/PtrSize*BitsPerPointer, 8)/8;
mask = runtime·persistentalloc(masksize+1, 0, &mstats.gc_sys);
mask[masksize] = 0xa1;
pos = 0;
prog = unrollgcprog1(mask, prog, &pos, false, false);
if(pos != size/PtrSize*BitsPerPointer) {
runtime·printf("unrollglobgcprog: bad program size, got %D, expect %D\n",
(uint64)pos, (uint64)size/PtrSize*BitsPerPointer);
runtime·throw("unrollglobgcprog: bad program size");
}
if(prog[0] != insEnd)
runtime·throw("unrollglobgcprog: program does not end with insEnd");
if(mask[masksize] != 0xa1)
runtime·throw("unrollglobgcprog: overflow");
return (BitVector){masksize*8, mask};
}
void
runtime·unrollgcproginplace_m(void)
{
uintptr size, size0, pos, off;
byte *arena_start, *prog, *bitp, shift;
Type *typ;
void *v;
v = g->m->ptrarg[0];
typ = g->m->ptrarg[1];
size = g->m->scalararg[0];
size0 = g->m->scalararg[1];
g->m->ptrarg[0] = nil;
g->m->ptrarg[1] = nil;
pos = 0;
prog = (byte*)typ->gc[1];
while(pos != size0)
unrollgcprog1(v, prog, &pos, true, true);
// Mark first word as bitAllocated.
arena_start = runtime·mheap.arena_start;
off = (uintptr*)v - (uintptr*)arena_start;
bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapByte) * gcBits;
*bitp |= bitBoundary<<shift;
// Mark word after last as BitsDead.
if(size0 < size) {
off = (uintptr*)((byte*)v + size0) - (uintptr*)arena_start;
bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapByte) * gcBits;
*bitp &= ~(bitPtrMask<<shift) | ((uintptr)BitsDead<<(shift+2));
}
}
// Unrolls GC program in typ->gc[1] into typ->gc[0]
void
runtime·unrollgcprog_m(void)
{
static Mutex lock;
Type *typ;
byte *mask, *prog;
uintptr pos;
uint32 x;
typ = g->m->ptrarg[0];
g->m->ptrarg[0] = nil;
runtime·lock(&lock);
mask = (byte*)typ->gc[0];
if(mask[0] == 0) {
pos = 8; // skip the unroll flag
prog = (byte*)typ->gc[1];
prog = unrollgcprog1(mask, prog, &pos, false, true);
if(prog[0] != insEnd)
runtime·throw("unrollgcprog: program does not end with insEnd");
if(((typ->size/PtrSize)%2) != 0) {
// repeat the program twice
prog = (byte*)typ->gc[1];
unrollgcprog1(mask, prog, &pos, false, true);
}
// atomic way to say mask[0] = 1
x = ((uint32*)mask)[0];
runtime·atomicstore((uint32*)mask, x|1);
}
runtime·unlock(&lock);
}
// mark the span of memory at v as having n blocks of the given size.
// if leftover is true, there is left over space at the end of the span.
void
runtime·markspan(void *v, uintptr size, uintptr n, bool leftover)
{
uintptr i, off, step;
byte *b;
if((byte*)v+size*n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
runtime·throw("markspan: bad pointer");
// Find bits of the beginning of the span.
off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset
b = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
if((off%wordsPerBitmapByte) != 0)
runtime·throw("markspan: unaligned length");
// Okay to use non-atomic ops here, because we control
// the entire span, and each bitmap byte has bits for only
// one span, so no other goroutines are changing these bitmap words.
if(size == PtrSize) {
// Possible only on 64-bits (minimal size class is 8 bytes).
// Poor man's memset(0x11).
if(0x11 != ((bitBoundary+BitsDead)<<gcBits) + (bitBoundary+BitsDead))
runtime·throw("markspan: bad bits");
if((n%(wordsPerBitmapByte*PtrSize)) != 0)
runtime·throw("markspan: unaligned length");
b = b - n/wordsPerBitmapByte + 1; // find first byte
if(((uintptr)b%PtrSize) != 0)
runtime·throw("markspan: unaligned pointer");
for(i = 0; i != n; i += wordsPerBitmapByte*PtrSize, b += PtrSize)
*(uintptr*)b = (uintptr)0x1111111111111111ULL; // bitBoundary+BitsDead
return;
}
if(leftover)
n++; // mark a boundary just past end of last block too
step = size/(PtrSize*wordsPerBitmapByte);
for(i = 0; i != n; i++, b -= step)
*b = bitBoundary|(BitsDead<<2);
}
// unmark the span of memory at v of length n bytes.
void
runtime·unmarkspan(void *v, uintptr n)
{
uintptr off;
byte *b;
if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
runtime·throw("markspan: bad pointer");
off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset
if((off % (PtrSize*wordsPerBitmapByte)) != 0)
runtime·throw("markspan: unaligned pointer");
b = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
n /= PtrSize;
if(n%(PtrSize*wordsPerBitmapByte) != 0)
runtime·throw("unmarkspan: unaligned length");
// Okay to use non-atomic ops here, because we control
// the entire span, and each bitmap word has bits for only
// one span, so no other goroutines are changing these
// bitmap words.
n /= wordsPerBitmapByte;
runtime·memclr(b - n + 1, n);
}
void
runtime·MHeap_MapBits(MHeap *h)
{
// Caller has added extra mappings to the arena.
// Add extra mappings of bitmap words as needed.
// We allocate extra bitmap pieces in chunks of bitmapChunk.
enum {
bitmapChunk = 8192
};
uintptr n;
n = (h->arena_used - h->arena_start) / (PtrSize*wordsPerBitmapByte);
n = ROUND(n, bitmapChunk);
n = ROUND(n, PhysPageSize);
if(h->bitmap_mapped >= n)
return;
runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped, h->arena_reserved, &mstats.gc_sys);
h->bitmap_mapped = n;
}
static bool
getgcmaskcb(Stkframe *frame, void *ctxt)
{
Stkframe *frame0;
frame0 = ctxt;
if(frame->sp <= frame0->sp && frame0->sp < frame->varp) {
*frame0 = *frame;
return false;
}
return true;
}
// Returns GC type info for object p for testing.
void
runtime·getgcmask(byte *p, Type *t, byte **mask, uintptr *len)
{
Stkframe frame;
uintptr i, n, off;
byte *base, bits, shift, *b;
bool (*cb)(Stkframe*, void*);
*mask = nil;
*len = 0;
// data
if(p >= runtime·data && p < runtime·edata) {
n = ((PtrType*)t)->elem->size;
*len = n/PtrSize;
*mask = runtime·mallocgc(*len, nil, FlagNoScan);
for(i = 0; i < n; i += PtrSize) {
off = (p+i-runtime·data)/PtrSize;
bits = (runtime·gcdatamask.bytedata[off/PointersPerByte] >> ((off%PointersPerByte)*BitsPerPointer))&BitsMask;
(*mask)[i/PtrSize] = bits;
}
return;
}
// bss
if(p >= runtime·bss && p < runtime·ebss) {
n = ((PtrType*)t)->elem->size;
*len = n/PtrSize;
*mask = runtime·mallocgc(*len, nil, FlagNoScan);
for(i = 0; i < n; i += PtrSize) {
off = (p+i-runtime·bss)/PtrSize;
bits = (runtime·gcbssmask.bytedata[off/PointersPerByte] >> ((off%PointersPerByte)*BitsPerPointer))&BitsMask;
(*mask)[i/PtrSize] = bits;
}
return;
}
// heap
if(runtime·mlookup(p, &base, &n, nil)) {
*len = n/PtrSize;
*mask = runtime·mallocgc(*len, nil, FlagNoScan);
for(i = 0; i < n; i += PtrSize) {
off = (uintptr*)(base+i) - (uintptr*)runtime·mheap.arena_start;
b = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapByte) * gcBits;
bits = (*b >> (shift+2))&BitsMask;
(*mask)[i/PtrSize] = bits;
}
return;
}
// stack
frame.fn = nil;
frame.sp = (uintptr)p;
cb = getgcmaskcb;
runtime·gentraceback(g->m->curg->sched.pc, g->m->curg->sched.sp, 0, g->m->curg, 0, nil, 1000, &cb, &frame, 0);
if(frame.fn != nil) {
Func *f;
StackMap *stackmap;
BitVector bv;
uintptr size;
uintptr targetpc;
int32 pcdata;
f = frame.fn;
targetpc = frame.continpc;
if(targetpc == 0)
return;
if(targetpc != f->entry)
targetpc--;
pcdata = runtime·pcdatavalue(f, PCDATA_StackMapIndex, targetpc);
if(pcdata == -1)
return;
stackmap = runtime·funcdata(f, FUNCDATA_LocalsPointerMaps);
if(stackmap == nil || stackmap->n <= 0)
return;
bv = runtime·stackmapdata(stackmap, pcdata);
size = bv.n/BitsPerPointer*PtrSize;
n = ((PtrType*)t)->elem->size;
*len = n/PtrSize;
*mask = runtime·mallocgc(*len, nil, FlagNoScan);
for(i = 0; i < n; i += PtrSize) {
off = (p+i-(byte*)frame.varp+size)/PtrSize;
bits = (bv.bytedata[off*BitsPerPointer/8] >> ((off*BitsPerPointer)%8))&BitsMask;
(*mask)[i/PtrSize] = bits;
}
}
}
void runtime·gc_unixnanotime(int64 *now);
int64
runtime·unixnanotime(void)
{
int64 now;
runtime·gc_unixnanotime(&now);
return now;
}
......@@ -60,10 +60,8 @@ func clearpools() {
}
}
func gosweepone() uintptr
func gosweepdone() bool
func bgsweep() {
sweep.g = getg()
getg().issystem = true
for {
for gosweepone() != ^uintptr(0) {
......
......@@ -4,11 +4,15 @@
// Garbage collector (GC)
enum {
package runtime
const (
// Four bits per word (see #defines below).
gcBits = 4,
wordsPerBitmapByte = 8/gcBits,
gcBits = 4
wordsPerBitmapByte = 8 / gcBits
)
const (
// GC type info programs.
// The programs allow to store type info required for GC in a compact form.
// Most importantly arrays take O(1) space instead of O(n).
......@@ -26,38 +30,33 @@ enum {
// For example, for type struct { x []byte; y [20]struct{ z int; w *byte }; }
// the program looks as:
//
// insData 3 (BitsMultiWord BitsSlice BitsScalar)
// insData 3 (BitsPointer BitsScalar BitsScalar)
// insArray 20 insData 2 (BitsScalar BitsPointer) insArrayEnd insEnd
//
// Total size of the program is 17 bytes (13 bytes on 32-bits).
// The corresponding GC mask would take 43 bytes (it would be repeated
// because the type has odd number of words).
insData = 1,
insArray,
insArrayEnd,
insEnd,
insData = 1 + iota
insArray
insArrayEnd
insEnd
)
const (
// Pointer map
BitsPerPointer = 2,
BitsMask = (1<<BitsPerPointer)-1,
PointersPerByte = 8/BitsPerPointer,
_BitsPerPointer = 2
_BitsMask = (1 << _BitsPerPointer) - 1
_PointersPerByte = 8 / _BitsPerPointer
// If you change these, also change scanblock.
// scanblock does "if(bits == BitsScalar || bits == BitsDead)" as "if(bits <= BitsScalar)".
BitsDead = 0,
BitsScalar = 1,
BitsPointer = 2,
BitsMultiWord = 3,
// BitsMultiWord will be set for the first word of a multi-word item.
// When it is set, one of the following will be set for the second word.
// NOT USED ANYMORE: BitsString = 0,
// NOT USED ANYMORE: BitsSlice = 1,
BitsIface = 2,
BitsEface = 3,
_BitsDead = 0
_BitsScalar = 1
_BitsPointer = 2
// 64 bytes cover objects of size 1024/512 on 64/32 bits, respectively.
MaxGCMask = 64,
};
_MaxGCMask = 64
)
// Bits in per-word bitmap.
// #defines because we shift the values beyond 32 bits.
......@@ -70,9 +69,9 @@ enum {
// there. On a 64-bit system the off'th word in the arena is tracked by
// the off/16+1'th word before mheap.arena_start. (On a 32-bit system,
// the only difference is that the divisor is 8.)
enum {
bitBoundary = 1, // boundary of an object
bitMarked = 2, // marked object
bitMask = bitBoundary | bitMarked,
bitPtrMask = BitsMask<<2,
};
const (
bitBoundary = 1 // boundary of an object
bitMarked = 2 // marked object
bitMask = bitBoundary | bitMarked
bitPtrMask = _BitsMask << 2
)
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Page heap.
//
// See malloc.h for overview.
//
// When a MSpan is in the heap free list, state == MSpanFree
// and heapmap(s->start) == span, heapmap(s->start+s->npages-1) == span.
//
// When a MSpan is allocated, state == MSpanInUse or MSpanStack
// and heapmap(i) == span for all s->start <= i < s->start+s->npages.
#include "runtime.h"
#include "arch_GOARCH.h"
#include "malloc.h"
static MSpan *MHeap_AllocSpanLocked(MHeap*, uintptr);
static void MHeap_FreeSpanLocked(MHeap*, MSpan*, bool, bool);
static bool MHeap_Grow(MHeap*, uintptr);
static MSpan *MHeap_AllocLarge(MHeap*, uintptr);
static MSpan *BestFit(MSpan*, uintptr, MSpan*);
static void
RecordSpan(void *vh, byte *p)
{
MHeap *h;
MSpan *s;
MSpan **all;
uint32 cap;
h = vh;
s = (MSpan*)p;
if(h->nspan >= h->nspancap) {
cap = 64*1024/sizeof(all[0]);
if(cap < h->nspancap*3/2)
cap = h->nspancap*3/2;
all = (MSpan**)runtime·sysAlloc(cap*sizeof(all[0]), &mstats.other_sys);
if(all == nil)
runtime·throw("runtime: cannot allocate memory");
if(h->allspans) {
runtime·memmove(all, h->allspans, h->nspancap*sizeof(all[0]));
// Don't free the old array if it's referenced by sweep.
// See the comment in mgc0.c.
if(h->allspans != runtime·mheap.gcspans)
runtime·SysFree(h->allspans, h->nspancap*sizeof(all[0]), &mstats.other_sys);
}
h->allspans = all;
h->nspancap = cap;
}
h->allspans[h->nspan++] = s;
}
// Initialize the heap; fetch memory using alloc.
void
runtime·MHeap_Init(MHeap *h)
{
uint32 i;
runtime·FixAlloc_Init(&h->spanalloc, sizeof(MSpan), RecordSpan, h, &mstats.mspan_sys);
runtime·FixAlloc_Init(&h->cachealloc, sizeof(MCache), nil, nil, &mstats.mcache_sys);
runtime·FixAlloc_Init(&h->specialfinalizeralloc, sizeof(SpecialFinalizer), nil, nil, &mstats.other_sys);
runtime·FixAlloc_Init(&h->specialprofilealloc, sizeof(SpecialProfile), nil, nil, &mstats.other_sys);
// h->mapcache needs no init
for(i=0; i<nelem(h->free); i++) {
runtime·MSpanList_Init(&h->free[i]);
runtime·MSpanList_Init(&h->busy[i]);
}
runtime·MSpanList_Init(&h->freelarge);
runtime·MSpanList_Init(&h->busylarge);
for(i=0; i<nelem(h->central); i++)
runtime·MCentral_Init(&h->central[i].mcentral, i);
}
void
runtime·MHeap_MapSpans(MHeap *h)
{
uintptr n;
// Map spans array, PageSize at a time.
n = (uintptr)h->arena_used;
n -= (uintptr)h->arena_start;
n = n / PageSize * sizeof(h->spans[0]);
n = ROUND(n, PhysPageSize);
if(h->spans_mapped >= n)
return;
runtime·SysMap((byte*)h->spans + h->spans_mapped, n - h->spans_mapped, h->arena_reserved, &mstats.other_sys);
h->spans_mapped = n;
}
// Sweeps spans in list until reclaims at least npages into heap.
// Returns the actual number of pages reclaimed.
static uintptr
MHeap_ReclaimList(MHeap *h, MSpan *list, uintptr npages)
{
MSpan *s;
uintptr n;
uint32 sg;
n = 0;
sg = runtime·mheap.sweepgen;
retry:
for(s = list->next; s != list; s = s->next) {
if(s->sweepgen == sg-2 && runtime·cas(&s->sweepgen, sg-2, sg-1)) {
runtime·MSpanList_Remove(s);
// swept spans are at the end of the list
runtime·MSpanList_InsertBack(list, s);
runtime·unlock(&h->lock);
n += runtime·MSpan_Sweep(s, false);
runtime·lock(&h->lock);
if(n >= npages)
return n;
// the span could have been moved elsewhere
goto retry;
}
if(s->sweepgen == sg-1) {
// the span is being sweept by background sweeper, skip
continue;
}
// already swept empty span,
// all subsequent ones must also be either swept or in process of sweeping
break;
}
return n;
}
// Sweeps and reclaims at least npage pages into heap.
// Called before allocating npage pages.
static void
MHeap_Reclaim(MHeap *h, uintptr npage)
{
uintptr reclaimed, n;
// First try to sweep busy spans with large objects of size >= npage,
// this has good chances of reclaiming the necessary space.
for(n=npage; n < nelem(h->busy); n++) {
if(MHeap_ReclaimList(h, &h->busy[n], npage))
return; // Bingo!
}
// Then -- even larger objects.
if(MHeap_ReclaimList(h, &h->busylarge, npage))
return; // Bingo!
// Now try smaller objects.
// One such object is not enough, so we need to reclaim several of them.
reclaimed = 0;
for(n=0; n < npage && n < nelem(h->busy); n++) {
reclaimed += MHeap_ReclaimList(h, &h->busy[n], npage-reclaimed);
if(reclaimed >= npage)
return;
}
// Now sweep everything that is not yet swept.
runtime·unlock(&h->lock);
for(;;) {
n = runtime·sweepone();
if(n == -1) // all spans are swept
break;
reclaimed += n;
if(reclaimed >= npage)
break;
}
runtime·lock(&h->lock);
}
// Allocate a new span of npage pages from the heap for GC'd memory
// and record its size class in the HeapMap and HeapMapCache.
static MSpan*
mheap_alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large)
{
MSpan *s;
if(g != g->m->g0)
runtime·throw("mheap_alloc not on M stack");
runtime·lock(&h->lock);
// To prevent excessive heap growth, before allocating n pages
// we need to sweep and reclaim at least n pages.
if(!h->sweepdone)
MHeap_Reclaim(h, npage);
// transfer stats from cache to global
mstats.heap_alloc += g->m->mcache->local_cachealloc;
g->m->mcache->local_cachealloc = 0;
mstats.tinyallocs += g->m->mcache->local_tinyallocs;
g->m->mcache->local_tinyallocs = 0;
s = MHeap_AllocSpanLocked(h, npage);
if(s != nil) {
// Record span info, because gc needs to be
// able to map interior pointer to containing span.
runtime·atomicstore(&s->sweepgen, h->sweepgen);
s->state = MSpanInUse;
s->freelist = nil;
s->ref = 0;
s->sizeclass = sizeclass;
s->elemsize = (sizeclass==0 ? s->npages<<PageShift : runtime·class_to_size[sizeclass]);
// update stats, sweep lists
if(large) {
mstats.heap_objects++;
mstats.heap_alloc += npage<<PageShift;
// Swept spans are at the end of lists.
if(s->npages < nelem(h->free))
runtime·MSpanList_InsertBack(&h->busy[s->npages], s);
else
runtime·MSpanList_InsertBack(&h->busylarge, s);
}
}
runtime·unlock(&h->lock);
return s;
}
static void
mheap_alloc_m(G *gp)
{
MHeap *h;
MSpan *s;
h = g->m->ptrarg[0];
g->m->ptrarg[0] = nil;
s = mheap_alloc(h, g->m->scalararg[0], g->m->scalararg[1], g->m->scalararg[2]);
g->m->ptrarg[0] = s;
runtime·gogo(&gp->sched);
}
MSpan*
runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large, bool needzero)
{
MSpan *s;
void (*fn)(G*);
// Don't do any operations that lock the heap on the G stack.
// It might trigger stack growth, and the stack growth code needs
// to be able to allocate heap.
if(g == g->m->g0) {
s = mheap_alloc(h, npage, sizeclass, large);
} else {
g->m->ptrarg[0] = h;
g->m->scalararg[0] = npage;
g->m->scalararg[1] = sizeclass;
g->m->scalararg[2] = large;
fn = mheap_alloc_m;
runtime·mcall(&fn);
s = g->m->ptrarg[0];
g->m->ptrarg[0] = nil;
}
if(s != nil) {
if(needzero && s->needzero)
runtime·memclr((byte*)(s->start<<PageShift), s->npages<<PageShift);
s->needzero = 0;
}
return s;
}
MSpan*
runtime·MHeap_AllocStack(MHeap *h, uintptr npage)
{
MSpan *s;
if(g != g->m->g0)
runtime·throw("mheap_allocstack not on M stack");
runtime·lock(&h->lock);
s = MHeap_AllocSpanLocked(h, npage);
if(s != nil) {
s->state = MSpanStack;
s->freelist = nil;
s->ref = 0;
mstats.stacks_inuse += s->npages<<PageShift;
}
runtime·unlock(&h->lock);
return s;
}
// Allocates a span of the given size. h must be locked.
// The returned span has been removed from the
// free list, but its state is still MSpanFree.
static MSpan*
MHeap_AllocSpanLocked(MHeap *h, uintptr npage)
{
uintptr n;
MSpan *s, *t;
pageID p;
// Try in fixed-size lists up to max.
for(n=npage; n < nelem(h->free); n++) {
if(!runtime·MSpanList_IsEmpty(&h->free[n])) {
s = h->free[n].next;
goto HaveSpan;
}
}
// Best fit in list of large spans.
if((s = MHeap_AllocLarge(h, npage)) == nil) {
if(!MHeap_Grow(h, npage))
return nil;
if((s = MHeap_AllocLarge(h, npage)) == nil)
return nil;
}
HaveSpan:
// Mark span in use.
if(s->state != MSpanFree)
runtime·throw("MHeap_AllocLocked - MSpan not free");
if(s->npages < npage)
runtime·throw("MHeap_AllocLocked - bad npages");
runtime·MSpanList_Remove(s);
if(s->next != nil || s->prev != nil)
runtime·throw("still in list");
if(s->npreleased > 0) {
runtime·SysUsed((void*)(s->start<<PageShift), s->npages<<PageShift);
mstats.heap_released -= s->npreleased<<PageShift;
s->npreleased = 0;
}
if(s->npages > npage) {
// Trim extra and put it back in the heap.
t = runtime·FixAlloc_Alloc(&h->spanalloc);
runtime·MSpan_Init(t, s->start + npage, s->npages - npage);
s->npages = npage;
p = t->start;
p -= ((uintptr)h->arena_start>>PageShift);
if(p > 0)
h->spans[p-1] = s;
h->spans[p] = t;
h->spans[p+t->npages-1] = t;
t->needzero = s->needzero;
s->state = MSpanStack; // prevent coalescing with s
t->state = MSpanStack;
MHeap_FreeSpanLocked(h, t, false, false);
t->unusedsince = s->unusedsince; // preserve age (TODO: wrong: t is possibly merged and/or deallocated at this point)
s->state = MSpanFree;
}
s->unusedsince = 0;
p = s->start;
p -= ((uintptr)h->arena_start>>PageShift);
for(n=0; n<npage; n++)
h->spans[p+n] = s;
mstats.heap_inuse += npage<<PageShift;
mstats.heap_idle -= npage<<PageShift;
//runtime·printf("spanalloc %p\n", s->start << PageShift);
if(s->next != nil || s->prev != nil)
runtime·throw("still in list");
return s;
}
// Allocate a span of exactly npage pages from the list of large spans.
static MSpan*
MHeap_AllocLarge(MHeap *h, uintptr npage)
{
return BestFit(&h->freelarge, npage, nil);
}
// Search list for smallest span with >= npage pages.
// If there are multiple smallest spans, take the one
// with the earliest starting address.
static MSpan*
BestFit(MSpan *list, uintptr npage, MSpan *best)
{
MSpan *s;
for(s=list->next; s != list; s=s->next) {
if(s->npages < npage)
continue;
if(best == nil
|| s->npages < best->npages
|| (s->npages == best->npages && s->start < best->start))
best = s;
}
return best;
}
// Try to add at least npage pages of memory to the heap,
// returning whether it worked.
static bool
MHeap_Grow(MHeap *h, uintptr npage)
{
uintptr ask;
void *v;
MSpan *s;
pageID p;
// Ask for a big chunk, to reduce the number of mappings
// the operating system needs to track; also amortizes
// the overhead of an operating system mapping.
// Allocate a multiple of 64kB.
npage = ROUND(npage, (64<<10)/PageSize);
ask = npage<<PageShift;
if(ask < HeapAllocChunk)
ask = HeapAllocChunk;
v = runtime·MHeap_SysAlloc(h, ask);
if(v == nil) {
if(ask > (npage<<PageShift)) {
ask = npage<<PageShift;
v = runtime·MHeap_SysAlloc(h, ask);
}
if(v == nil) {
runtime·printf("runtime: out of memory: cannot allocate %D-byte block (%D in use)\n", (uint64)ask, mstats.heap_sys);
return false;
}
}
// Create a fake "in use" span and free it, so that the
// right coalescing happens.
s = runtime·FixAlloc_Alloc(&h->spanalloc);
runtime·MSpan_Init(s, (uintptr)v>>PageShift, ask>>PageShift);
p = s->start;
p -= ((uintptr)h->arena_start>>PageShift);
h->spans[p] = s;
h->spans[p + s->npages - 1] = s;
runtime·atomicstore(&s->sweepgen, h->sweepgen);
s->state = MSpanInUse;
MHeap_FreeSpanLocked(h, s, false, true);
return true;
}
// Look up the span at the given address.
// Address is guaranteed to be in map
// and is guaranteed to be start or end of span.
MSpan*
runtime·MHeap_Lookup(MHeap *h, void *v)
{
uintptr p;
p = (uintptr)v;
p -= (uintptr)h->arena_start;
return h->spans[p >> PageShift];
}
// Look up the span at the given address.
// Address is *not* guaranteed to be in map
// and may be anywhere in the span.
// Map entries for the middle of a span are only
// valid for allocated spans. Free spans may have
// other garbage in their middles, so we have to
// check for that.
MSpan*
runtime·MHeap_LookupMaybe(MHeap *h, void *v)
{
MSpan *s;
pageID p, q;
if((byte*)v < h->arena_start || (byte*)v >= h->arena_used)
return nil;
p = (uintptr)v>>PageShift;
q = p;
q -= (uintptr)h->arena_start >> PageShift;
s = h->spans[q];
if(s == nil || p < s->start || v >= s->limit || s->state != MSpanInUse)
return nil;
return s;
}
// Free the span back into the heap.
static void
mheap_free(MHeap *h, MSpan *s, int32 acct)
{
if(g != g->m->g0)
runtime·throw("mheap_free not on M stack");
runtime·lock(&h->lock);
mstats.heap_alloc += g->m->mcache->local_cachealloc;
g->m->mcache->local_cachealloc = 0;
mstats.tinyallocs += g->m->mcache->local_tinyallocs;
g->m->mcache->local_tinyallocs = 0;
if(acct) {
mstats.heap_alloc -= s->npages<<PageShift;
mstats.heap_objects--;
}
MHeap_FreeSpanLocked(h, s, true, true);
runtime·unlock(&h->lock);
}
static void
mheap_free_m(G *gp)
{
MHeap *h;
MSpan *s;
h = g->m->ptrarg[0];
s = g->m->ptrarg[1];
g->m->ptrarg[0] = nil;
g->m->ptrarg[1] = nil;
mheap_free(h, s, g->m->scalararg[0]);
runtime·gogo(&gp->sched);
}
void
runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct)
{
void (*fn)(G*);
if(g == g->m->g0) {
mheap_free(h, s, acct);
} else {
g->m->ptrarg[0] = h;
g->m->ptrarg[1] = s;
g->m->scalararg[0] = acct;
fn = mheap_free_m;
runtime·mcall(&fn);
}
}
void
runtime·MHeap_FreeStack(MHeap *h, MSpan *s)
{
if(g != g->m->g0)
runtime·throw("mheap_freestack not on M stack");
s->needzero = 1;
runtime·lock(&h->lock);
mstats.stacks_inuse -= s->npages<<PageShift;
MHeap_FreeSpanLocked(h, s, true, true);
runtime·unlock(&h->lock);
}
static void
MHeap_FreeSpanLocked(MHeap *h, MSpan *s, bool acctinuse, bool acctidle)
{
MSpan *t;
pageID p;
switch(s->state) {
case MSpanStack:
if(s->ref != 0)
runtime·throw("MHeap_FreeSpanLocked - invalid stack free");
break;
case MSpanInUse:
if(s->ref != 0 || s->sweepgen != h->sweepgen) {
runtime·printf("MHeap_FreeSpanLocked - span %p ptr %p ref %d sweepgen %d/%d\n",
s, s->start<<PageShift, s->ref, s->sweepgen, h->sweepgen);
runtime·throw("MHeap_FreeSpanLocked - invalid free");
}
break;
default:
runtime·throw("MHeap_FreeSpanLocked - invalid span state");
break;
}
if(acctinuse)
mstats.heap_inuse -= s->npages<<PageShift;
if(acctidle)
mstats.heap_idle += s->npages<<PageShift;
s->state = MSpanFree;
runtime·MSpanList_Remove(s);
// Stamp newly unused spans. The scavenger will use that
// info to potentially give back some pages to the OS.
s->unusedsince = runtime·nanotime();
s->npreleased = 0;
// Coalesce with earlier, later spans.
p = s->start;
p -= (uintptr)h->arena_start >> PageShift;
if(p > 0 && (t = h->spans[p-1]) != nil && t->state != MSpanInUse && t->state != MSpanStack) {
s->start = t->start;
s->npages += t->npages;
s->npreleased = t->npreleased; // absorb released pages
s->needzero |= t->needzero;
p -= t->npages;
h->spans[p] = s;
runtime·MSpanList_Remove(t);
t->state = MSpanDead;
runtime·FixAlloc_Free(&h->spanalloc, t);
}
if((p+s->npages)*sizeof(h->spans[0]) < h->spans_mapped && (t = h->spans[p+s->npages]) != nil && t->state != MSpanInUse && t->state != MSpanStack) {
s->npages += t->npages;
s->npreleased += t->npreleased;
s->needzero |= t->needzero;
h->spans[p + s->npages - 1] = s;
runtime·MSpanList_Remove(t);
t->state = MSpanDead;
runtime·FixAlloc_Free(&h->spanalloc, t);
}
// Insert s into appropriate list.
if(s->npages < nelem(h->free))
runtime·MSpanList_Insert(&h->free[s->npages], s);
else
runtime·MSpanList_Insert(&h->freelarge, s);
}
static uintptr
scavengelist(MSpan *list, uint64 now, uint64 limit)
{
uintptr released, sumreleased;
MSpan *s;
if(runtime·MSpanList_IsEmpty(list))
return 0;
sumreleased = 0;
for(s=list->next; s != list; s=s->next) {
if((now - s->unusedsince) > limit && s->npreleased != s->npages) {
released = (s->npages - s->npreleased) << PageShift;
mstats.heap_released += released;
sumreleased += released;
s->npreleased = s->npages;
runtime·SysUnused((void*)(s->start << PageShift), s->npages << PageShift);
}
}
return sumreleased;
}
void
runtime·MHeap_Scavenge(int32 k, uint64 now, uint64 limit)
{
uint32 i;
uintptr sumreleased;
MHeap *h;
h = &runtime·mheap;
runtime·lock(&h->lock);
sumreleased = 0;
for(i=0; i < nelem(h->free); i++)
sumreleased += scavengelist(&h->free[i], now, limit);
sumreleased += scavengelist(&h->freelarge, now, limit);
runtime·unlock(&h->lock);
if(runtime·debug.gctrace > 0) {
if(sumreleased > 0)
runtime·printf("scvg%d: %D MB released\n", k, (uint64)sumreleased>>20);
// TODO(dvyukov): these stats are incorrect as we don't subtract stack usage from heap.
// But we can't call ReadMemStats on g0 holding locks.
runtime·printf("scvg%d: inuse: %D, idle: %D, sys: %D, released: %D, consumed: %D (MB)\n",
k, mstats.heap_inuse>>20, mstats.heap_idle>>20, mstats.heap_sys>>20,
mstats.heap_released>>20, (mstats.heap_sys - mstats.heap_released)>>20);
}
}
void
runtime·scavenge_m(void)
{
runtime·MHeap_Scavenge(-1, ~(uintptr)0, 0);
}
// Initialize a new span with the given start and npages.
void
runtime·MSpan_Init(MSpan *span, pageID start, uintptr npages)
{
span->next = nil;
span->prev = nil;
span->start = start;
span->npages = npages;
span->freelist = nil;
span->ref = 0;
span->sizeclass = 0;
span->incache = false;
span->elemsize = 0;
span->state = MSpanDead;
span->unusedsince = 0;
span->npreleased = 0;
span->specialLock.key = 0;
span->specials = nil;
span->needzero = 0;
}
// Initialize an empty doubly-linked list.
void
runtime·MSpanList_Init(MSpan *list)
{
list->state = MSpanListHead;
list->next = list;
list->prev = list;
}
void
runtime·MSpanList_Remove(MSpan *span)
{
if(span->prev == nil && span->next == nil)
return;
span->prev->next = span->next;
span->next->prev = span->prev;
span->prev = nil;
span->next = nil;
}
bool
runtime·MSpanList_IsEmpty(MSpan *list)
{
return list->next == list;
}
void
runtime·MSpanList_Insert(MSpan *list, MSpan *span)
{
if(span->next != nil || span->prev != nil) {
runtime·printf("failed MSpanList_Insert %p %p %p\n", span, span->next, span->prev);
runtime·throw("MSpanList_Insert");
}
span->next = list->next;
span->prev = list;
span->next->prev = span;
span->prev->next = span;
}
void
runtime·MSpanList_InsertBack(MSpan *list, MSpan *span)
{
if(span->next != nil || span->prev != nil) {
runtime·printf("failed MSpanList_Insert %p %p %p\n", span, span->next, span->prev);
runtime·throw("MSpanList_Insert");
}
span->next = list;
span->prev = list->prev;
span->next->prev = span;
span->prev->next = span;
}
// Adds the special record s to the list of special records for
// the object p. All fields of s should be filled in except for
// offset & next, which this routine will fill in.
// Returns true if the special was successfully added, false otherwise.
// (The add will fail only if a record with the same p and s->kind
// already exists.)
static bool
addspecial(void *p, Special *s)
{
MSpan *span;
Special **t, *x;
uintptr offset;
byte kind;
span = runtime·MHeap_LookupMaybe(&runtime·mheap, p);
if(span == nil)
runtime·throw("addspecial on invalid pointer");
// Ensure that the span is swept.
// GC accesses specials list w/o locks. And it's just much safer.
g->m->locks++;
runtime·MSpan_EnsureSwept(span);
offset = (uintptr)p - (span->start << PageShift);
kind = s->kind;
runtime·lock(&span->specialLock);
// Find splice point, check for existing record.
t = &span->specials;
while((x = *t) != nil) {
if(offset == x->offset && kind == x->kind) {
runtime·unlock(&span->specialLock);
g->m->locks--;
return false; // already exists
}
if(offset < x->offset || (offset == x->offset && kind < x->kind))
break;
t = &x->next;
}
// Splice in record, fill in offset.
s->offset = offset;
s->next = x;
*t = s;
runtime·unlock(&span->specialLock);
g->m->locks--;
return true;
}
// Removes the Special record of the given kind for the object p.
// Returns the record if the record existed, nil otherwise.
// The caller must FixAlloc_Free the result.
static Special*
removespecial(void *p, byte kind)
{
MSpan *span;
Special *s, **t;
uintptr offset;
span = runtime·MHeap_LookupMaybe(&runtime·mheap, p);
if(span == nil)
runtime·throw("removespecial on invalid pointer");
// Ensure that the span is swept.
// GC accesses specials list w/o locks. And it's just much safer.
g->m->locks++;
runtime·MSpan_EnsureSwept(span);
offset = (uintptr)p - (span->start << PageShift);
runtime·lock(&span->specialLock);
t = &span->specials;
while((s = *t) != nil) {
// This function is used for finalizers only, so we don't check for
// "interior" specials (p must be exactly equal to s->offset).
if(offset == s->offset && kind == s->kind) {
*t = s->next;
runtime·unlock(&span->specialLock);
g->m->locks--;
return s;
}
t = &s->next;
}
runtime·unlock(&span->specialLock);
g->m->locks--;
return nil;
}
// Adds a finalizer to the object p. Returns true if it succeeded.
bool
runtime·addfinalizer(void *p, FuncVal *f, uintptr nret, Type *fint, PtrType *ot)
{
SpecialFinalizer *s;
runtime·lock(&runtime·mheap.speciallock);
s = runtime·FixAlloc_Alloc(&runtime·mheap.specialfinalizeralloc);
runtime·unlock(&runtime·mheap.speciallock);
s->special.kind = KindSpecialFinalizer;
s->fn = f;
s->nret = nret;
s->fint = fint;
s->ot = ot;
if(addspecial(p, &s->special))
return true;
// There was an old finalizer
runtime·lock(&runtime·mheap.speciallock);
runtime·FixAlloc_Free(&runtime·mheap.specialfinalizeralloc, s);
runtime·unlock(&runtime·mheap.speciallock);
return false;
}
// Removes the finalizer (if any) from the object p.
void
runtime·removefinalizer(void *p)
{
SpecialFinalizer *s;
s = (SpecialFinalizer*)removespecial(p, KindSpecialFinalizer);
if(s == nil)
return; // there wasn't a finalizer to remove
runtime·lock(&runtime·mheap.speciallock);
runtime·FixAlloc_Free(&runtime·mheap.specialfinalizeralloc, s);
runtime·unlock(&runtime·mheap.speciallock);
}
// Set the heap profile bucket associated with addr to b.
void
runtime·setprofilebucket_m(void)
{
void *p;
Bucket *b;
SpecialProfile *s;
p = g->m->ptrarg[0];
b = g->m->ptrarg[1];
g->m->ptrarg[0] = nil;
g->m->ptrarg[1] = nil;
runtime·lock(&runtime·mheap.speciallock);
s = runtime·FixAlloc_Alloc(&runtime·mheap.specialprofilealloc);
runtime·unlock(&runtime·mheap.speciallock);
s->special.kind = KindSpecialProfile;
s->b = b;
if(!addspecial(p, &s->special))
runtime·throw("setprofilebucket: profile already set");
}
// Do whatever cleanup needs to be done to deallocate s. It has
// already been unlinked from the MSpan specials list.
// Returns true if we should keep working on deallocating p.
bool
runtime·freespecial(Special *s, void *p, uintptr size, bool freed)
{
SpecialFinalizer *sf;
SpecialProfile *sp;
switch(s->kind) {
case KindSpecialFinalizer:
sf = (SpecialFinalizer*)s;
runtime·queuefinalizer(p, sf->fn, sf->nret, sf->fint, sf->ot);
runtime·lock(&runtime·mheap.speciallock);
runtime·FixAlloc_Free(&runtime·mheap.specialfinalizeralloc, sf);
runtime·unlock(&runtime·mheap.speciallock);
return false; // don't free p until finalizer is done
case KindSpecialProfile:
sp = (SpecialProfile*)s;
runtime·mProf_Free(sp->b, size, freed);
runtime·lock(&runtime·mheap.speciallock);
runtime·FixAlloc_Free(&runtime·mheap.specialprofilealloc, sp);
runtime·unlock(&runtime·mheap.speciallock);
return true;
default:
runtime·throw("bad special kind");
return true;
}
}
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Page heap.
//
// See malloc.h for overview.
//
// When a MSpan is in the heap free list, state == MSpanFree
// and heapmap(s->start) == span, heapmap(s->start+s->npages-1) == span.
//
// When a MSpan is allocated, state == MSpanInUse or MSpanStack
// and heapmap(i) == span for all s->start <= i < s->start+s->npages.
package runtime
import "unsafe"
var h_allspans []*mspan // TODO: make this h.allspans once mheap can be defined in Go
var h_spans []*mspan // TODO: make this h.spans once mheap can be defined in Go
func recordspan(vh unsafe.Pointer, p unsafe.Pointer) {
h := (*mheap)(vh)
s := (*mspan)(p)
if len(h_allspans) >= cap(h_allspans) {
n := 64 * 1024 / ptrSize
if n < cap(h_allspans)*3/2 {
n = cap(h_allspans) * 3 / 2
}
var new []*mspan
sp := (*slice)(unsafe.Pointer(&new))
sp.array = (*byte)(sysAlloc(uintptr(n)*ptrSize, &memstats.other_sys))
if sp.array == nil {
gothrow("runtime: cannot allocate memory")
}
sp.len = uint(len(h_allspans))
sp.cap = uint(n)
if len(h_allspans) > 0 {
copy(new, h_allspans)
// Don't free the old array if it's referenced by sweep.
// See the comment in mgc0.c.
if h.allspans != mheap_.gcspans {
sysFree(unsafe.Pointer(h.allspans), uintptr(cap(h_allspans))*ptrSize, &memstats.other_sys)
}
}
h_allspans = new
h.allspans = (**mspan)(unsafe.Pointer(sp.array))
}
h_allspans = append(h_allspans, s)
h.nspan = uint32(len(h_allspans))
}
// Initialize the heap.
func mHeap_Init(h *mheap, spans_size uintptr) {
fixAlloc_Init(&h.spanalloc, unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys)
fixAlloc_Init(&h.cachealloc, unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys)
fixAlloc_Init(&h.specialfinalizeralloc, unsafe.Sizeof(specialfinalizer{}), nil, nil, &memstats.other_sys)
fixAlloc_Init(&h.specialprofilealloc, unsafe.Sizeof(specialprofile{}), nil, nil, &memstats.other_sys)
// h->mapcache needs no init
for i := range h.free {
mSpanList_Init(&h.free[i])
mSpanList_Init(&h.busy[i])
}
mSpanList_Init(&h.freelarge)
mSpanList_Init(&h.busylarge)
for i := range h.central {
mCentral_Init(&h.central[i].mcentral, int32(i))
}
sp := (*slice)(unsafe.Pointer(&h_spans))
sp.array = (*byte)(unsafe.Pointer(h.spans))
sp.len = uint(spans_size / ptrSize)
sp.cap = uint(spans_size / ptrSize)
}
func mHeap_MapSpans(h *mheap) {
// Map spans array, PageSize at a time.
n := uintptr(unsafe.Pointer(h.arena_used))
n -= uintptr(unsafe.Pointer(h.arena_start))
n = n / _PageSize * ptrSize
n = round(n, _PhysPageSize)
if h.spans_mapped >= n {
return
}
sysMap(add(unsafe.Pointer(h.spans), h.spans_mapped), n-h.spans_mapped, h.arena_reserved, &memstats.other_sys)
h.spans_mapped = n
}
// Sweeps spans in list until reclaims at least npages into heap.
// Returns the actual number of pages reclaimed.
func mHeap_ReclaimList(h *mheap, list *mspan, npages uintptr) uintptr {
n := uintptr(0)
sg := mheap_.sweepgen
retry:
for s := list.next; s != list; s = s.next {
if s.sweepgen == sg-2 && cas(&s.sweepgen, sg-2, sg-1) {
mSpanList_Remove(s)
// swept spans are at the end of the list
mSpanList_InsertBack(list, s)
unlock(&h.lock)
if mSpan_Sweep(s, false) {
// TODO(rsc,dvyukov): This is probably wrong.
// It is undercounting the number of pages reclaimed.
// See golang.org/issue/9048.
// Note that if we want to add the true count of s's pages,
// we must record that before calling mSpan_Sweep,
// because if mSpan_Sweep returns true the span has
// been
n++
}
lock(&h.lock)
if n >= npages {
return n
}
// the span could have been moved elsewhere
goto retry
}
if s.sweepgen == sg-1 {
// the span is being sweept by background sweeper, skip
continue
}
// already swept empty span,
// all subsequent ones must also be either swept or in process of sweeping
break
}
return n
}
// Sweeps and reclaims at least npage pages into heap.
// Called before allocating npage pages.
func mHeap_Reclaim(h *mheap, npage uintptr) {
// First try to sweep busy spans with large objects of size >= npage,
// this has good chances of reclaiming the necessary space.
for i := int(npage); i < len(h.busy); i++ {
if mHeap_ReclaimList(h, &h.busy[i], npage) != 0 {
return // Bingo!
}
}
// Then -- even larger objects.
if mHeap_ReclaimList(h, &h.busylarge, npage) != 0 {
return // Bingo!
}
// Now try smaller objects.
// One such object is not enough, so we need to reclaim several of them.
reclaimed := uintptr(0)
for i := 0; i < int(npage) && i < len(h.busy); i++ {
reclaimed += mHeap_ReclaimList(h, &h.busy[i], npage-reclaimed)
if reclaimed >= npage {
return
}
}
// Now sweep everything that is not yet swept.
unlock(&h.lock)
for {
n := sweepone()
if n == ^uintptr(0) { // all spans are swept
break
}
reclaimed += n
if reclaimed >= npage {
break
}
}
lock(&h.lock)
}
// Allocate a new span of npage pages from the heap for GC'd memory
// and record its size class in the HeapMap and HeapMapCache.
func mHeap_Alloc_m(h *mheap, npage uintptr, sizeclass int32, large bool) *mspan {
_g_ := getg()
if _g_ != _g_.m.g0 {
gothrow("_mheap_alloc not on M stack")
}
lock(&h.lock)
// To prevent excessive heap growth, before allocating n pages
// we need to sweep and reclaim at least n pages.
if h.sweepdone == 0 {
mHeap_Reclaim(h, npage)
}
// transfer stats from cache to global
memstats.heap_alloc += uint64(_g_.m.mcache.local_cachealloc)
_g_.m.mcache.local_cachealloc = 0
memstats.tinyallocs += uint64(_g_.m.mcache.local_tinyallocs)
_g_.m.mcache.local_tinyallocs = 0
s := mHeap_AllocSpanLocked(h, npage)
if s != nil {
// Record span info, because gc needs to be
// able to map interior pointer to containing span.
atomicstore(&s.sweepgen, h.sweepgen)
s.state = _MSpanInUse
s.freelist = nil
s.ref = 0
s.sizeclass = uint8(sizeclass)
if sizeclass == 0 {
s.elemsize = s.npages << _PageShift
} else {
s.elemsize = uintptr(class_to_size[sizeclass])
}
// update stats, sweep lists
if large {
memstats.heap_objects++
memstats.heap_alloc += uint64(npage << _PageShift)
// Swept spans are at the end of lists.
if s.npages < uintptr(len(h.free)) {
mSpanList_InsertBack(&h.busy[s.npages], s)
} else {
mSpanList_InsertBack(&h.busylarge, s)
}
}
}
unlock(&h.lock)
return s
}
func mHeap_Alloc(h *mheap, npage uintptr, sizeclass int32, large bool, needzero bool) *mspan {
// Don't do any operations that lock the heap on the G stack.
// It might trigger stack growth, and the stack growth code needs
// to be able to allocate heap.
var s *mspan
onM(func() {
s = mHeap_Alloc_m(h, npage, sizeclass, large)
})
if s != nil {
if needzero && s.needzero != 0 {
memclr(unsafe.Pointer(s.start<<_PageShift), s.npages<<_PageShift)
}
s.needzero = 0
}
return s
}
func mHeap_AllocStack(h *mheap, npage uintptr) *mspan {
_g_ := getg()
if _g_ != _g_.m.g0 {
gothrow("mheap_allocstack not on M stack")
}
lock(&h.lock)
s := mHeap_AllocSpanLocked(h, npage)
if s != nil {
s.state = _MSpanStack
s.freelist = nil
s.ref = 0
memstats.stacks_inuse += uint64(s.npages << _PageShift)
}
unlock(&h.lock)
return s
}
// Allocates a span of the given size. h must be locked.
// The returned span has been removed from the
// free list, but its state is still MSpanFree.
func mHeap_AllocSpanLocked(h *mheap, npage uintptr) *mspan {
var s *mspan
// Try in fixed-size lists up to max.
for i := int(npage); i < len(h.free); i++ {
if !mSpanList_IsEmpty(&h.free[i]) {
s = h.free[i].next
goto HaveSpan
}
}
// Best fit in list of large spans.
s = mHeap_AllocLarge(h, npage)
if s == nil {
if !mHeap_Grow(h, npage) {
return nil
}
s = mHeap_AllocLarge(h, npage)
if s == nil {
return nil
}
}
HaveSpan:
// Mark span in use.
if s.state != _MSpanFree {
gothrow("MHeap_AllocLocked - MSpan not free")
}
if s.npages < npage {
gothrow("MHeap_AllocLocked - bad npages")
}
mSpanList_Remove(s)
if s.next != nil || s.prev != nil {
gothrow("still in list")
}
if s.npreleased > 0 {
sysUsed((unsafe.Pointer)(s.start<<_PageShift), s.npages<<_PageShift)
memstats.heap_released -= uint64(s.npreleased << _PageShift)
s.npreleased = 0
}
if s.npages > npage {
// Trim extra and put it back in the heap.
t := (*mspan)(fixAlloc_Alloc(&h.spanalloc))
mSpan_Init(t, s.start+pageID(npage), s.npages-npage)
s.npages = npage
p := uintptr(t.start)
p -= (uintptr(unsafe.Pointer(h.arena_start)) >> _PageShift)
if p > 0 {
h_spans[p-1] = s
}
h_spans[p] = t
h_spans[p+t.npages-1] = t
t.needzero = s.needzero
s.state = _MSpanStack // prevent coalescing with s
t.state = _MSpanStack
mHeap_FreeSpanLocked(h, t, false, false)
t.unusedsince = s.unusedsince // preserve age (TODO: wrong: t is possibly merged and/or deallocated at this point)
s.state = _MSpanFree
}
s.unusedsince = 0
p := uintptr(s.start)
p -= (uintptr(unsafe.Pointer(h.arena_start)) >> _PageShift)
for n := uintptr(0); n < npage; n++ {
h_spans[p+n] = s
}
memstats.heap_inuse += uint64(npage << _PageShift)
memstats.heap_idle -= uint64(npage << _PageShift)
//println("spanalloc", hex(s.start<<_PageShift))
if s.next != nil || s.prev != nil {
gothrow("still in list")
}
return s
}
// Allocate a span of exactly npage pages from the list of large spans.
func mHeap_AllocLarge(h *mheap, npage uintptr) *mspan {
return bestFit(&h.freelarge, npage, nil)
}
// Search list for smallest span with >= npage pages.
// If there are multiple smallest spans, take the one
// with the earliest starting address.
func bestFit(list *mspan, npage uintptr, best *mspan) *mspan {
for s := list.next; s != list; s = s.next {
if s.npages < npage {
continue
}
if best == nil || s.npages < best.npages || (s.npages == best.npages && s.start < best.start) {
best = s
}
}
return best
}
// Try to add at least npage pages of memory to the heap,
// returning whether it worked.
func mHeap_Grow(h *mheap, npage uintptr) bool {
// Ask for a big chunk, to reduce the number of mappings
// the operating system needs to track; also amortizes
// the overhead of an operating system mapping.
// Allocate a multiple of 64kB.
npage = round(npage, (64<<10)/_PageSize)
ask := npage << _PageShift
if ask < _HeapAllocChunk {
ask = _HeapAllocChunk
}
v := mHeap_SysAlloc(h, ask)
if v == nil {
if ask > npage<<_PageShift {
ask = npage << _PageShift
v = mHeap_SysAlloc(h, ask)
}
if v == nil {
print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
return false
}
}
// Create a fake "in use" span and free it, so that the
// right coalescing happens.
s := (*mspan)(fixAlloc_Alloc(&h.spanalloc))
mSpan_Init(s, pageID(uintptr(v)>>_PageShift), ask>>_PageShift)
p := uintptr(s.start)
p -= (uintptr(unsafe.Pointer(h.arena_start)) >> _PageShift)
h_spans[p] = s
h_spans[p+s.npages-1] = s
atomicstore(&s.sweepgen, h.sweepgen)
s.state = _MSpanInUse
mHeap_FreeSpanLocked(h, s, false, true)
return true
}
// Look up the span at the given address.
// Address is guaranteed to be in map
// and is guaranteed to be start or end of span.
func mHeap_Lookup(h *mheap, v unsafe.Pointer) *mspan {
p := uintptr(v)
p -= uintptr(unsafe.Pointer(h.arena_start))
return h_spans[p>>_PageShift]
}
// Look up the span at the given address.
// Address is *not* guaranteed to be in map
// and may be anywhere in the span.
// Map entries for the middle of a span are only
// valid for allocated spans. Free spans may have
// other garbage in their middles, so we have to
// check for that.
func mHeap_LookupMaybe(h *mheap, v unsafe.Pointer) *mspan {
if uintptr(v) < uintptr(unsafe.Pointer(h.arena_start)) || uintptr(v) >= uintptr(unsafe.Pointer(h.arena_used)) {
return nil
}
p := uintptr(v) >> _PageShift
q := p
q -= uintptr(unsafe.Pointer(h.arena_start)) >> _PageShift
s := h_spans[q]
if s == nil || p < uintptr(s.start) || uintptr(v) >= uintptr(unsafe.Pointer(s.limit)) || s.state != _MSpanInUse {
return nil
}
return s
}
// Free the span back into the heap.
func mHeap_Free(h *mheap, s *mspan, acct int32) {
onM(func() {
mp := getg().m
lock(&h.lock)
memstats.heap_alloc += uint64(mp.mcache.local_cachealloc)
mp.mcache.local_cachealloc = 0
memstats.tinyallocs += uint64(mp.mcache.local_tinyallocs)
mp.mcache.local_tinyallocs = 0
if acct != 0 {
memstats.heap_alloc -= uint64(s.npages << _PageShift)
memstats.heap_objects--
}
mHeap_FreeSpanLocked(h, s, true, true)
unlock(&h.lock)
})
}
func mHeap_FreeStack(h *mheap, s *mspan) {
_g_ := getg()
if _g_ != _g_.m.g0 {
gothrow("mheap_freestack not on M stack")
}
s.needzero = 1
lock(&h.lock)
memstats.stacks_inuse -= uint64(s.npages << _PageShift)
mHeap_FreeSpanLocked(h, s, true, true)
unlock(&h.lock)
}
func mHeap_FreeSpanLocked(h *mheap, s *mspan, acctinuse, acctidle bool) {
switch s.state {
case _MSpanStack:
if s.ref != 0 {
gothrow("MHeap_FreeSpanLocked - invalid stack free")
}
case _MSpanInUse:
if s.ref != 0 || s.sweepgen != h.sweepgen {
print("MHeap_FreeSpanLocked - span ", s, " ptr ", hex(s.start<<_PageShift), " ref ", s.ref, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n")
gothrow("MHeap_FreeSpanLocked - invalid free")
}
default:
gothrow("MHeap_FreeSpanLocked - invalid span state")
}
if acctinuse {
memstats.heap_inuse -= uint64(s.npages << _PageShift)
}
if acctidle {
memstats.heap_idle += uint64(s.npages << _PageShift)
}
s.state = _MSpanFree
mSpanList_Remove(s)
// Stamp newly unused spans. The scavenger will use that
// info to potentially give back some pages to the OS.
s.unusedsince = nanotime()
s.npreleased = 0
// Coalesce with earlier, later spans.
p := uintptr(s.start)
p -= uintptr(unsafe.Pointer(h.arena_start)) >> _PageShift
if p > 0 {
t := h_spans[p-1]
if t != nil && t.state != _MSpanInUse && t.state != _MSpanStack {
s.start = t.start
s.npages += t.npages
s.npreleased = t.npreleased // absorb released pages
s.needzero |= t.needzero
p -= t.npages
h_spans[p] = s
mSpanList_Remove(t)
t.state = _MSpanDead
fixAlloc_Free(&h.spanalloc, (unsafe.Pointer)(t))
}
}
if (p+s.npages)*ptrSize < h.spans_mapped {
t := h_spans[p+s.npages]
if t != nil && t.state != _MSpanInUse && t.state != _MSpanStack {
s.npages += t.npages
s.npreleased += t.npreleased
s.needzero |= t.needzero
h_spans[p+s.npages-1] = s
mSpanList_Remove(t)
t.state = _MSpanDead
fixAlloc_Free(&h.spanalloc, (unsafe.Pointer)(t))
}
}
// Insert s into appropriate list.
if s.npages < uintptr(len(h.free)) {
mSpanList_Insert(&h.free[s.npages], s)
} else {
mSpanList_Insert(&h.freelarge, s)
}
}
func scavengelist(list *mspan, now, limit uint64) uintptr {
if mSpanList_IsEmpty(list) {
return 0
}
var sumreleased uintptr
for s := list.next; s != list; s = s.next {
if (now-uint64(s.unusedsince)) > limit && s.npreleased != s.npages {
released := (s.npages - s.npreleased) << _PageShift
memstats.heap_released += uint64(released)
sumreleased += released
s.npreleased = s.npages
sysUnused((unsafe.Pointer)(s.start<<_PageShift), s.npages<<_PageShift)
}
}
return sumreleased
}
func mHeap_Scavenge(k int32, now, limit uint64) {
h := &mheap_
lock(&h.lock)
var sumreleased uintptr
for i := 0; i < len(h.free); i++ {
sumreleased += scavengelist(&h.free[i], now, limit)
}
sumreleased += scavengelist(&h.freelarge, now, limit)
unlock(&h.lock)
if debug.gctrace > 0 {
if sumreleased > 0 {
print("scvg", k, ": ", sumreleased>>20, " MB released\n")
}
// TODO(dvyukov): these stats are incorrect as we don't subtract stack usage from heap.
// But we can't call ReadMemStats on g0 holding locks.
print("scvg", k, ": inuse: ", memstats.heap_inuse>>20, ", idle: ", memstats.heap_idle>>20, ", sys: ", memstats.heap_sys>>20, ", released: ", memstats.heap_released>>20, ", consumed: ", (memstats.heap_sys-memstats.heap_released)>>20, " (MB)\n")
}
}
func scavenge_m() {
mHeap_Scavenge(-1, ^uint64(0), 0)
}
// Initialize a new span with the given start and npages.
func mSpan_Init(span *mspan, start pageID, npages uintptr) {
span.next = nil
span.prev = nil
span.start = start
span.npages = npages
span.freelist = nil
span.ref = 0
span.sizeclass = 0
span.incache = false
span.elemsize = 0
span.state = _MSpanDead
span.unusedsince = 0
span.npreleased = 0
span.speciallock.key = 0
span.specials = nil
span.needzero = 0
}
// Initialize an empty doubly-linked list.
func mSpanList_Init(list *mspan) {
list.state = _MSpanListHead
list.next = list
list.prev = list
}
func mSpanList_Remove(span *mspan) {
if span.prev == nil && span.next == nil {
return
}
span.prev.next = span.next
span.next.prev = span.prev
span.prev = nil
span.next = nil
}
func mSpanList_IsEmpty(list *mspan) bool {
return list.next == list
}
func mSpanList_Insert(list *mspan, span *mspan) {
if span.next != nil || span.prev != nil {
println("failed MSpanList_Insert", span, span.next, span.prev)
gothrow("MSpanList_Insert")
}
span.next = list.next
span.prev = list
span.next.prev = span
span.prev.next = span
}
func mSpanList_InsertBack(list *mspan, span *mspan) {
if span.next != nil || span.prev != nil {
println("failed MSpanList_InsertBack", span, span.next, span.prev)
gothrow("MSpanList_InsertBack")
}
span.next = list
span.prev = list.prev
span.next.prev = span
span.prev.next = span
}
// Adds the special record s to the list of special records for
// the object p. All fields of s should be filled in except for
// offset & next, which this routine will fill in.
// Returns true if the special was successfully added, false otherwise.
// (The add will fail only if a record with the same p and s->kind
// already exists.)
func addspecial(p unsafe.Pointer, s *special) bool {
span := mHeap_LookupMaybe(&mheap_, p)
if span == nil {
gothrow("addspecial on invalid pointer")
}
// Ensure that the span is swept.
// GC accesses specials list w/o locks. And it's just much safer.
mp := acquirem()
mSpan_EnsureSwept(span)
offset := uintptr(p) - uintptr(span.start<<_PageShift)
kind := s.kind
lock(&span.speciallock)
// Find splice point, check for existing record.
t := &span.specials
for {
x := *t
if x == nil {
break
}
if offset == uintptr(x.offset) && kind == x.kind {
unlock(&span.speciallock)
releasem(mp)
return false // already exists
}
if offset < uintptr(x.offset) || (offset == uintptr(x.offset) && kind < x.kind) {
break
}
t = &x.next
}
// Splice in record, fill in offset.
s.offset = uint16(offset)
s.next = *t
*t = s
unlock(&span.speciallock)
releasem(mp)
return true
}
// Removes the Special record of the given kind for the object p.
// Returns the record if the record existed, nil otherwise.
// The caller must FixAlloc_Free the result.
func removespecial(p unsafe.Pointer, kind uint8) *special {
span := mHeap_LookupMaybe(&mheap_, p)
if span == nil {
gothrow("removespecial on invalid pointer")
}
// Ensure that the span is swept.
// GC accesses specials list w/o locks. And it's just much safer.
mp := acquirem()
mSpan_EnsureSwept(span)
offset := uintptr(p) - uintptr(span.start<<_PageShift)
lock(&span.speciallock)
t := &span.specials
for {
s := *t
if s == nil {
break
}
// This function is used for finalizers only, so we don't check for
// "interior" specials (p must be exactly equal to s->offset).
if offset == uintptr(s.offset) && kind == s.kind {
*t = s.next
unlock(&span.speciallock)
releasem(mp)
return s
}
t = &s.next
}
unlock(&span.speciallock)
releasem(mp)
return nil
}
// Adds a finalizer to the object p. Returns true if it succeeded.
func addfinalizer(p unsafe.Pointer, f *funcval, nret uintptr, fint *_type, ot *ptrtype) bool {
lock(&mheap_.speciallock)
s := (*specialfinalizer)(fixAlloc_Alloc(&mheap_.specialfinalizeralloc))
unlock(&mheap_.speciallock)
s.special.kind = _KindSpecialFinalizer
s.fn = f
s.nret = nret
s.fint = fint
s.ot = ot
if addspecial(p, &s.special) {
return true
}
// There was an old finalizer
lock(&mheap_.speciallock)
fixAlloc_Free(&mheap_.specialfinalizeralloc, (unsafe.Pointer)(s))
unlock(&mheap_.speciallock)
return false
}
// Removes the finalizer (if any) from the object p.
func removefinalizer(p unsafe.Pointer) {
s := (*specialfinalizer)(unsafe.Pointer(removespecial(p, _KindSpecialFinalizer)))
if s == nil {
return // there wasn't a finalizer to remove
}
lock(&mheap_.speciallock)
fixAlloc_Free(&mheap_.specialfinalizeralloc, (unsafe.Pointer)(s))
unlock(&mheap_.speciallock)
}
// Set the heap profile bucket associated with addr to b.
func setprofilebucket(p unsafe.Pointer, b *bucket) {
lock(&mheap_.speciallock)
s := (*specialprofile)(fixAlloc_Alloc(&mheap_.specialprofilealloc))
unlock(&mheap_.speciallock)
s.special.kind = _KindSpecialProfile
s.b = b
if !addspecial(p, &s.special) {
gothrow("setprofilebucket: profile already set")
}
}
// Do whatever cleanup needs to be done to deallocate s. It has
// already been unlinked from the MSpan specials list.
// Returns true if we should keep working on deallocating p.
func freespecial(s *special, p unsafe.Pointer, size uintptr, freed bool) bool {
switch s.kind {
case _KindSpecialFinalizer:
sf := (*specialfinalizer)(unsafe.Pointer(s))
queuefinalizer(p, sf.fn, sf.nret, sf.fint, sf.ot)
lock(&mheap_.speciallock)
fixAlloc_Free(&mheap_.specialfinalizeralloc, (unsafe.Pointer)(sf))
unlock(&mheap_.speciallock)
return false // don't free p until finalizer is done
case _KindSpecialProfile:
sp := (*specialprofile)(unsafe.Pointer(s))
mProf_Free(sp.b, size, freed)
lock(&mheap_.speciallock)
fixAlloc_Free(&mheap_.specialprofilealloc, (unsafe.Pointer)(sp))
unlock(&mheap_.speciallock)
return true
default:
gothrow("bad special kind")
panic("not reached")
}
}
......@@ -190,8 +190,6 @@ func stkbucket(typ bucketType, size uintptr, stk []uintptr, alloc bool) *bucket
return b
}
func sysAlloc(n uintptr, stat *uint64) unsafe.Pointer
func eqslice(x, y []uintptr) bool {
if len(x) != len(y) {
return false
......@@ -246,16 +244,9 @@ func mProf_Malloc(p unsafe.Pointer, size uintptr) {
// This reduces potential contention and chances of deadlocks.
// Since the object must be alive during call to mProf_Malloc,
// it's fine to do this non-atomically.
setprofilebucket(p, b)
}
func setprofilebucket_m() // mheap.c
func setprofilebucket(p unsafe.Pointer, b *bucket) {
g := getg()
g.m.ptrarg[0] = p
g.m.ptrarg[1] = unsafe.Pointer(b)
onM(setprofilebucket_m)
onM(func() {
setprofilebucket(p, b)
})
}
// Called when freeing a profiled block.
......@@ -519,8 +510,6 @@ func ThreadCreateProfile(p []StackRecord) (n int, ok bool) {
return
}
var allgs []*g // proc.c
// GoroutineProfile returns n, the number of records in the active goroutine stack profile.
// If len(p) >= n, GoroutineProfile copies the profile into p and returns n, true.
// If len(p) < n, GoroutineProfile does not change p and returns n, false.
......
......@@ -25,15 +25,10 @@
//
// TODO(rsc): Compute max waste for any given size.
#include "runtime.h"
#include "arch_GOARCH.h"
#include "malloc.h"
#include "textflag.h"
package runtime
#pragma dataflag NOPTR
int32 runtime·class_to_size[NumSizeClasses];
#pragma dataflag NOPTR
int32 runtime·class_to_allocnpages[NumSizeClasses];
//var class_to_size [_NumSizeClasses]int32
//var class_to_allocnpages [_NumSizeClasses]int32
// The SizeToClass lookup is implemented using two arrays,
// one mapping sizes <= 1024 to their class and one mapping
......@@ -43,142 +38,137 @@ int32 runtime·class_to_allocnpages[NumSizeClasses];
// are 128-aligned, so the second array is indexed by the
// size divided by 128 (rounded up). The arrays are filled in
// by InitSizes.
//var size_to_class8 [1024/8 + 1]int8
//var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8
#pragma dataflag NOPTR
int8 runtime·size_to_class8[1024/8 + 1];
#pragma dataflag NOPTR
int8 runtime·size_to_class128[(MaxSmallSize-1024)/128 + 1];
void runtime·testdefersizes(void);
int32
runtime·SizeToClass(int32 size)
{
if(size > MaxSmallSize)
runtime·throw("SizeToClass - invalid size");
if(size > 1024-8)
return runtime·size_to_class128[(size-1024+127) >> 7];
return runtime·size_to_class8[(size+7)>>3];
func sizeToClass(size int32) int32 {
if size > _MaxSmallSize {
gothrow("SizeToClass - invalid size")
}
if size > 1024-8 {
return int32(size_to_class128[(size-1024+127)>>7])
}
return int32(size_to_class8[(size+7)>>3])
}
void
runtime·InitSizes(void)
{
int32 align, sizeclass, size, nextsize, n;
uint32 i;
uintptr allocsize, npages;
func initSizes() {
// Initialize the runtime·class_to_size table (and choose class sizes in the process).
runtime·class_to_size[0] = 0;
sizeclass = 1; // 0 means no class
align = 8;
for(size = align; size <= MaxSmallSize; size += align) {
if((size&(size-1)) == 0) { // bump alignment once in a while
if(size >= 2048)
align = 256;
else if(size >= 128)
align = size / 8;
else if(size >= 16)
align = 16; // required for x86 SSE instructions, if we want to use them
class_to_size[0] = 0
sizeclass := 1 // 0 means no class
align := 8
for size := align; size <= _MaxSmallSize; size += align {
if size&(size-1) == 0 { // bump alignment once in a while
if size >= 2048 {
align = 256
} else if size >= 128 {
align = size / 8
} else if size >= 16 {
align = 16 // required for x86 SSE instructions, if we want to use them
}
}
if align&(align-1) != 0 {
gothrow("InitSizes - bug")
}
if((align&(align-1)) != 0)
runtime·throw("InitSizes - bug");
// Make the allocnpages big enough that
// the leftover is less than 1/8 of the total,
// so wasted space is at most 12.5%.
allocsize = PageSize;
while(allocsize%size > allocsize/8)
allocsize += PageSize;
npages = allocsize >> PageShift;
allocsize := _PageSize
for allocsize%size > allocsize/8 {
allocsize += _PageSize
}
npages := allocsize >> _PageShift
// If the previous sizeclass chose the same
// allocation size and fit the same number of
// objects into the page, we might as well
// use just this size instead of having two
// different sizes.
if(sizeclass > 1 &&
npages == runtime·class_to_allocnpages[sizeclass-1] &&
allocsize/size == allocsize/runtime·class_to_size[sizeclass-1]) {
runtime·class_to_size[sizeclass-1] = size;
continue;
if sizeclass > 1 && npages == int(class_to_allocnpages[sizeclass-1]) && allocsize/size == allocsize/int(class_to_size[sizeclass-1]) {
class_to_size[sizeclass-1] = int32(size)
continue
}
runtime·class_to_allocnpages[sizeclass] = npages;
runtime·class_to_size[sizeclass] = size;
sizeclass++;
class_to_allocnpages[sizeclass] = int32(npages)
class_to_size[sizeclass] = int32(size)
sizeclass++
}
if(sizeclass != NumSizeClasses) {
runtime·printf("sizeclass=%d NumSizeClasses=%d\n", sizeclass, NumSizeClasses);
runtime·throw("InitSizes - bad NumSizeClasses");
if sizeclass != _NumSizeClasses {
print("sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n")
gothrow("InitSizes - bad NumSizeClasses")
}
// Initialize the size_to_class tables.
nextsize = 0;
for (sizeclass = 1; sizeclass < NumSizeClasses; sizeclass++) {
for(; nextsize < 1024 && nextsize <= runtime·class_to_size[sizeclass]; nextsize+=8)
runtime·size_to_class8[nextsize/8] = sizeclass;
if(nextsize >= 1024)
for(; nextsize <= runtime·class_to_size[sizeclass]; nextsize += 128)
runtime·size_to_class128[(nextsize-1024)/128] = sizeclass;
nextsize := 0
for sizeclass = 1; sizeclass < _NumSizeClasses; sizeclass++ {
for ; nextsize < 1024 && nextsize <= int(class_to_size[sizeclass]); nextsize += 8 {
size_to_class8[nextsize/8] = int8(sizeclass)
}
if nextsize >= 1024 {
for ; nextsize <= int(class_to_size[sizeclass]); nextsize += 128 {
size_to_class128[(nextsize-1024)/128] = int8(sizeclass)
}
}
}
// Double-check SizeToClass.
if(0) {
for(n=0; n < MaxSmallSize; n++) {
sizeclass = runtime·SizeToClass(n);
if(sizeclass < 1 || sizeclass >= NumSizeClasses || runtime·class_to_size[sizeclass] < n) {
runtime·printf("size=%d sizeclass=%d runtime·class_to_size=%d\n", n, sizeclass, runtime·class_to_size[sizeclass]);
runtime·printf("incorrect SizeToClass");
goto dump;
if false {
for n := int32(0); n < _MaxSmallSize; n++ {
sizeclass := sizeToClass(n)
if sizeclass < 1 || sizeclass >= _NumSizeClasses || class_to_size[sizeclass] < n {
print("size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
print("incorrect SizeToClass\n")
goto dump
}
if(sizeclass > 1 && runtime·class_to_size[sizeclass-1] >= n) {
runtime·printf("size=%d sizeclass=%d runtime·class_to_size=%d\n", n, sizeclass, runtime·class_to_size[sizeclass]);
runtime·printf("SizeToClass too big");
goto dump;
if sizeclass > 1 && class_to_size[sizeclass-1] >= n {
print("size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
print("SizeToClass too big\n")
goto dump
}
}
}
runtime·testdefersizes();
testdefersizes()
// Copy out for statistics table.
for(i=0; i<nelem(runtime·class_to_size); i++)
mstats.by_size[i].size = runtime·class_to_size[i];
return;
for i := 0; i < len(class_to_size); i++ {
memstats.by_size[i].size = uint32(class_to_size[i])
}
return
dump:
if(1){
runtime·printf("NumSizeClasses=%d\n", NumSizeClasses);
runtime·printf("runtime·class_to_size:");
for(sizeclass=0; sizeclass<NumSizeClasses; sizeclass++)
runtime·printf(" %d", runtime·class_to_size[sizeclass]);
runtime·printf("\n\n");
runtime·printf("size_to_class8:");
for(i=0; i<nelem(runtime·size_to_class8); i++)
runtime·printf(" %d=>%d(%d)\n", i*8, runtime·size_to_class8[i],
runtime·class_to_size[runtime·size_to_class8[i]]);
runtime·printf("\n");
runtime·printf("size_to_class128:");
for(i=0; i<nelem(runtime·size_to_class128); i++)
runtime·printf(" %d=>%d(%d)\n", i*128, runtime·size_to_class128[i],
runtime·class_to_size[runtime·size_to_class128[i]]);
runtime·printf("\n");
if true {
print("NumSizeClasses=", _NumSizeClasses, "\n")
print("runtime·class_to_size:")
for sizeclass = 0; sizeclass < _NumSizeClasses; sizeclass++ {
print(" ", class_to_size[sizeclass], "")
}
print("\n\n")
print("size_to_class8:")
for i := 0; i < len(size_to_class8); i++ {
print(" ", i*8, "=>", size_to_class8[i], "(", class_to_size[size_to_class8[i]], ")\n")
}
print("\n")
print("size_to_class128:")
for i := 0; i < len(size_to_class128); i++ {
print(" ", i*128, "=>", size_to_class128[i], "(", class_to_size[size_to_class128[i]], ")\n")
}
print("\n")
}
runtime·throw("InitSizes failed");
gothrow("InitSizes failed")
}
// Returns size of the memory block that mallocgc will allocate if you ask for the size.
uintptr
runtime·roundupsize(uintptr size)
{
if(size < MaxSmallSize) {
if(size <= 1024-8)
return runtime·class_to_size[runtime·size_to_class8[(size+7)>>3]];
else
return runtime·class_to_size[runtime·size_to_class128[(size-1024+127) >> 7]];
func roundupsize(size uintptr) uintptr {
if size < _MaxSmallSize {
if size <= 1024-8 {
return uintptr(class_to_size[size_to_class8[(size+7)>>3]])
} else {
return uintptr(class_to_size[size_to_class128[(size-1024+127)>>7]])
}
}
if size+_PageSize < size {
return size
}
if(size + PageSize < size)
return size;
return ROUND(size, PageSize);
return round(size, _PageSize)
}
......@@ -22,11 +22,11 @@ func makeslice(t *slicetype, len64 int64, cap64 int64) sliceStruct {
// but since the cap is only being supplied implicitly, saying len is clearer.
// See issue 4085.
len := int(len64)
if len64 < 0 || int64(len) != len64 || t.elem.size > 0 && uintptr(len) > maxmem/uintptr(t.elem.size) {
if len64 < 0 || int64(len) != len64 || t.elem.size > 0 && uintptr(len) > _MaxMem/uintptr(t.elem.size) {
panic(errorString("makeslice: len out of range"))
}
cap := int(cap64)
if cap < len || int64(cap) != cap64 || t.elem.size > 0 && uintptr(cap) > maxmem/uintptr(t.elem.size) {
if cap < len || int64(cap) != cap64 || t.elem.size > 0 && uintptr(cap) > _MaxMem/uintptr(t.elem.size) {
panic(errorString("makeslice: cap out of range"))
}
p := newarray(t.elem, uintptr(cap))
......@@ -42,7 +42,7 @@ func growslice(t *slicetype, old sliceStruct, n int64) sliceStruct {
cap64 := int64(old.cap) + n
cap := int(cap64)
if int64(cap) != cap64 || cap < old.cap || t.elem.size > 0 && uintptr(cap) > maxmem/uintptr(t.elem.size) {
if int64(cap) != cap64 || cap < old.cap || t.elem.size > 0 && uintptr(cap) > _MaxMem/uintptr(t.elem.size) {
panic(errorString("growslice: cap out of range"))
}
......@@ -72,7 +72,7 @@ func growslice(t *slicetype, old sliceStruct, n int64) sliceStruct {
}
}
if uintptr(newcap) >= maxmem/uintptr(et.size) {
if uintptr(newcap) >= _MaxMem/uintptr(et.size) {
panic(errorString("growslice: cap out of range"))
}
lenmem := uintptr(old.len) * uintptr(et.size)
......
......@@ -225,7 +225,7 @@ func rawbyteslice(size int) (b []byte) {
// rawruneslice allocates a new rune slice. The rune slice is not zeroed.
func rawruneslice(size int) (b []rune) {
if uintptr(size) > maxmem/4 {
if uintptr(size) > _MaxMem/4 {
gothrow("out of memory")
}
mem := goroundupsize(uintptr(size) * 4)
......@@ -255,9 +255,6 @@ func gostringsize(n int) string {
return s
}
//go:noescape
func findnull(*byte) int
func gostring(p *byte) string {
l := findnull(p)
if l == 0 {
......@@ -296,3 +293,12 @@ func contains(s, t string) bool {
func hasprefix(s, t string) bool {
return len(s) >= len(t) && s[:len(t)] == t
}
func goatoi(s string) int {
n := 0
for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
n = n*10 + int(s[0]) - '0'
s = s[1:]
}
return n
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment