mprof.goc 11.5 KB
Newer Older
1 2 3 4 5 6 7 8 9
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Malloc profiling.
// Patterned after tcmalloc's algorithms; shorter code.

package runtime
#include "runtime.h"
Russ Cox's avatar
Russ Cox committed
10
#include "arch_GOARCH.h"
11
#include "malloc.h"
Russ Cox's avatar
Russ Cox committed
12
#include "defs_GOOS_GOARCH.h"
13 14 15
#include "type.h"

// NOTE(rsc): Everything here could use cas if contention became an issue.
16
static Lock proflock;
17 18 19 20

// All memory allocations are local and do not escape outside of the profiler.
// The profiler is forbidden from referring to garbage-collected memory.

21 22 23
enum { MProf, BProf };  // profile types

// Per-call-stack profiling information.
24 25 26 27 28
// Lookup by hashing call stack into a linked-list hash table.
typedef struct Bucket Bucket;
struct Bucket
{
	Bucket	*next;	// next in hash list
29 30
	Bucket	*allnext;	// next in list of all mbuckets/bbuckets
	int32	typ;
31 32
	// Generally unions can break precise GC,
	// this one is fine because it does not contain pointers.
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
	union
	{
		struct  // typ == MProf
		{
			uintptr	allocs;
			uintptr	frees;
			uintptr	alloc_bytes;
			uintptr	free_bytes;
			uintptr	recent_allocs;  // since last gc
			uintptr	recent_frees;
			uintptr	recent_alloc_bytes;
			uintptr	recent_free_bytes;
		};
		struct  // typ == BProf
		{
			int64	count;
			int64	cycles;
		};
	};
52 53 54 55 56 57 58 59
	uintptr	hash;
	uintptr	nstk;
	uintptr	stk[1];
};
enum {
	BuckHashSize = 179999,
};
static Bucket **buckhash;
60 61
static Bucket *mbuckets;  // memory profile buckets
static Bucket *bbuckets;  // blocking profile buckets
62 63 64 65
static uintptr bucketmem;

// Return the bucket for stk[0:nstk], allocating new bucket if needed.
static Bucket*
66
stkbucket(int32 typ, uintptr *stk, int32 nstk, bool alloc)
67 68 69 70 71
{
	int32 i;
	uintptr h;
	Bucket *b;

72
	if(buckhash == nil) {
73
		buckhash = runtime·SysAlloc(BuckHashSize*sizeof buckhash[0]);
74 75
		if(buckhash == nil)
			runtime·throw("runtime: cannot allocate memory");
76 77
		mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0];
	}
78 79 80 81 82 83 84 85 86 87

	// Hash stack.
	h = 0;
	for(i=0; i<nstk; i++) {
		h += stk[i];
		h += h<<10;
		h ^= h>>6;
	}
	h += h<<3;
	h ^= h>>11;
88

89 90
	i = h%BuckHashSize;
	for(b = buckhash[i]; b; b=b->next)
91
		if(b->typ == typ && b->hash == h && b->nstk == nstk &&
92
		   runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
93 94
			return b;

95 96 97
	if(!alloc)
		return nil;

98
	b = runtime·persistentalloc(sizeof *b + nstk*sizeof stk[0], 0);
99
	bucketmem += sizeof *b + nstk*sizeof stk[0];
100
	runtime·memmove(b->stk, stk, nstk*sizeof stk[0]);
101
	b->typ = typ;
102 103 104 105
	b->hash = h;
	b->nstk = nstk;
	b->next = buckhash[i];
	buckhash[i] = b;
106 107 108 109 110 111 112
	if(typ == MProf) {
		b->allnext = mbuckets;
		mbuckets = b;
	} else {
		b->allnext = bbuckets;
		bbuckets = b;
	}
113 114 115
	return b;
}

116 117
static void
MProf_GC(void)
118 119
{
	Bucket *b;
120

121
	for(b=mbuckets; b; b=b->allnext) {
122 123 124 125 126 127 128 129 130
		b->allocs += b->recent_allocs;
		b->frees += b->recent_frees;
		b->alloc_bytes += b->recent_alloc_bytes;
		b->free_bytes += b->recent_free_bytes;
		b->recent_allocs = 0;
		b->recent_frees = 0;
		b->recent_alloc_bytes = 0;
		b->recent_free_bytes = 0;
	}
131 132 133 134 135 136 137 138
}

// Record that a gc just happened: all the 'recent' statistics are now real.
void
runtime·MProf_GC(void)
{
	runtime·lock(&proflock);
	MProf_GC();
139 140 141
	runtime·unlock(&proflock);
}

142 143
// Map from pointer to Bucket* that allocated it.
// Three levels:
144 145 146
//	Linked-list hash table for top N-AddrHashShift bits.
//	Array index for next AddrDenseBits bits.
//	Linked list for next AddrHashShift-AddrDenseBits bits.
147 148 149 150 151 152
// This is more efficient than using a general map,
// because of the typical clustering of the pointer keys.

typedef struct AddrHash AddrHash;
typedef struct AddrEntry AddrEntry;

153 154 155 156 157 158
enum {
	AddrHashBits = 12,	// good for 4GB of used address space
	AddrHashShift = 20,	// each AddrHash knows about 1MB of address space
	AddrDenseBits = 8,	// good for a profiling rate of 4096 bytes
};

159 160 161 162
struct AddrHash
{
	AddrHash *next;	// next in top-level hash table linked list
	uintptr addr;	// addr>>20
163
	AddrEntry *dense[1<<AddrDenseBits];
164 165 166 167 168 169 170 171 172
};

struct AddrEntry
{
	AddrEntry *next;	// next in bottom-level linked list
	uint32 addr;
	Bucket *b;
};

173
static AddrHash **addrhash;	// points to (AddrHash*)[1<<AddrHashBits]
174 175 176 177 178 179 180
static AddrEntry *addrfree;
static uintptr addrmem;

// Multiplicative hash function:
// hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)).
// This is a good multiplier as suggested in CLR, Knuth.  The hash
// value is taken to be the top AddrHashBits bits of the bottom 32 bits
Robert Hencke's avatar
Robert Hencke committed
181
// of the multiplied value.
182 183 184 185 186 187 188 189 190 191 192 193 194
enum {
	HashMultiplier = 2654435769U
};

// Set the bucket associated with addr to b.
static void
setaddrbucket(uintptr addr, Bucket *b)
{
	int32 i;
	uint32 h;
	AddrHash *ah;
	AddrEntry *e;

195
	h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
196
	for(ah=addrhash[h]; ah; ah=ah->next)
197
		if(ah->addr == (addr>>AddrHashShift))
198 199
			goto found;

200
	ah = runtime·persistentalloc(sizeof *ah, 0);
201 202
	addrmem += sizeof *ah;
	ah->next = addrhash[h];
203
	ah->addr = addr>>AddrHashShift;
204 205 206 207
	addrhash[h] = ah;

found:
	if((e = addrfree) == nil) {
208
		e = runtime·persistentalloc(64*sizeof *e, 0);
209 210 211 212 213 214
		addrmem += 64*sizeof *e;
		for(i=0; i+1<64; i++)
			e[i].next = &e[i+1];
		e[63].next = nil;
	}
	addrfree = e->next;
215
	e->addr = (uint32)~(addr & ((1<<AddrHashShift)-1));
216
	e->b = b;
217
	h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1);	// entry in dense is top 8 bits of low 20.
218 219 220 221 222 223 224 225 226 227 228 229
	e->next = ah->dense[h];
	ah->dense[h] = e;
}

// Get the bucket associated with addr and clear the association.
static Bucket*
getaddrbucket(uintptr addr)
{
	uint32 h;
	AddrHash *ah;
	AddrEntry *e, **l;
	Bucket *b;
230

231
	h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
232
	for(ah=addrhash[h]; ah; ah=ah->next)
233
		if(ah->addr == (addr>>AddrHashShift))
234 235 236 237
			goto found;
	return nil;

found:
238
	h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1);	// entry in dense is top 8 bits of low 20.
239
	for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
240
		if(e->addr == (uint32)~(addr & ((1<<AddrHashShift)-1))) {
241 242 243 244 245 246 247 248 249 250 251 252
			*l = e->next;
			b = e->b;
			e->next = addrfree;
			addrfree = e;
			return b;
		}
	}
	return nil;
}

// Called by malloc to record a profiled block.
void
253
runtime·MProf_Malloc(void *p, uintptr size)
254 255 256 257 258
{
	int32 nstk;
	uintptr stk[32];
	Bucket *b;

259 260 261 262
	if(m->nomemprof > 0)
		return;

	m->nomemprof++;
263 264
	nstk = runtime·callers(1, stk, 32);
	runtime·lock(&proflock);
265
	b = stkbucket(MProf, stk, nstk, true);
266 267
	b->recent_allocs++;
	b->recent_alloc_bytes += size;
268
	setaddrbucket((uintptr)p, b);
269
	runtime·unlock(&proflock);
270
	m->nomemprof--;
271 272 273 274
}

// Called when freeing a profiled block.
void
275
runtime·MProf_Free(void *p, uintptr size)
276 277 278
{
	Bucket *b;

279 280 281 282
	if(m->nomemprof > 0)
		return;

	m->nomemprof++;
283
	runtime·lock(&proflock);
284 285
	b = getaddrbucket((uintptr)p);
	if(b != nil) {
286 287
		b->recent_frees++;
		b->recent_free_bytes += size;
288
	}
289
	runtime·unlock(&proflock);
290
	m->nomemprof--;
291 292
}

293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
int64 runtime·blockprofilerate;  // in CPU ticks

void
runtime·SetBlockProfileRate(intgo rate)
{
	runtime·atomicstore64((uint64*)&runtime·blockprofilerate, rate * runtime·tickspersecond() / (1000*1000*1000));
}

void
runtime·blockevent(int64 cycles, int32 skip)
{
	int32 nstk;
	int64 rate;
	uintptr stk[32];
	Bucket *b;

	if(cycles <= 0)
		return;
	rate = runtime·atomicload64((uint64*)&runtime·blockprofilerate);
	if(rate <= 0 || (rate > cycles && runtime·fastrand1()%rate > cycles))
		return;

	nstk = runtime·callers(skip, stk, 32);
	runtime·lock(&proflock);
	b = stkbucket(BProf, stk, nstk, true);
	b->count++;
	b->cycles += cycles;
	runtime·unlock(&proflock);
}
322

323
// Go interface to profile data.  (Declared in debug.go)
324

325
// Must match MemProfileRecord in debug.go.
326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
typedef struct Record Record;
struct Record {
	int64 alloc_bytes, free_bytes;
	int64 alloc_objects, free_objects;
	uintptr stk[32];
};

// Write b's data to r.
static void
record(Record *r, Bucket *b)
{
	int32 i;

	r->alloc_bytes = b->alloc_bytes;
	r->free_bytes = b->free_bytes;
	r->alloc_objects = b->allocs;
	r->free_objects = b->frees;
	for(i=0; i<b->nstk && i<nelem(r->stk); i++)
		r->stk[i] = b->stk[i];
	for(; i<nelem(r->stk); i++)
		r->stk[i] = 0;
347 348
}

Russ Cox's avatar
Russ Cox committed
349
func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) {
350 351
	Bucket *b;
	Record *r;
352
	bool clear;
353

354
	runtime·lock(&proflock);
355
	n = 0;
356 357
	clear = true;
	for(b=mbuckets; b; b=b->allnext) {
358 359
		if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
			n++;
360 361 362 363 364 365 366 367 368 369 370 371 372 373
		if(b->allocs != 0 || b->frees != 0)
			clear = false;
	}
	if(clear) {
		// Absolutely no data, suggesting that a garbage collection
		// has not yet happened. In order to allow profiling when
		// garbage collection is disabled from the beginning of execution,
		// accumulate stats as if a GC just happened, and recount buckets.
		MProf_GC();
		n = 0;
		for(b=mbuckets; b; b=b->allnext)
			if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
				n++;
	}
374 375 376 377
	ok = false;
	if(n <= p.len) {
		ok = true;
		r = (Record*)p.array;
378
		for(b=mbuckets; b; b=b->allnext)
379 380 381
			if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
				record(r++, b);
	}
382
	runtime·unlock(&proflock);
383
}
384

385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417
// Must match BlockProfileRecord in debug.go.
typedef struct BRecord BRecord;
struct BRecord {
	int64 count;
	int64 cycles;
	uintptr stk[32];
};

func BlockProfile(p Slice) (n int, ok bool) {
	Bucket *b;
	BRecord *r;
	int32 i;

	runtime·lock(&proflock);
	n = 0;
	for(b=bbuckets; b; b=b->allnext)
		n++;
	ok = false;
	if(n <= p.len) {
		ok = true;
		r = (BRecord*)p.array;
		for(b=bbuckets; b; b=b->allnext, r++) {
			r->count = b->count;
			r->cycles = b->cycles;
			for(i=0; i<b->nstk && i<nelem(r->stk); i++)
				r->stk[i] = b->stk[i];
			for(; i<nelem(r->stk); i++)
				r->stk[i] = 0;			
		}
	}
	runtime·unlock(&proflock);
}

418
// Must match StackRecord in debug.go.
419 420 421 422 423
typedef struct TRecord TRecord;
struct TRecord {
	uintptr stk[32];
};

Russ Cox's avatar
Russ Cox committed
424
func ThreadCreateProfile(p Slice) (n int, ok bool) {
425
	TRecord *r;
426
	M *first, *mp;
427 428 429
	
	first = runtime·atomicloadp(&runtime·allm);
	n = 0;
430
	for(mp=first; mp; mp=mp->alllink)
431 432 433 434 435
		n++;
	ok = false;
	if(n <= p.len) {
		ok = true;
		r = (TRecord*)p.array;
436 437
		for(mp=first; mp; mp=mp->alllink) {
			runtime·memmove(r->stk, mp->createstack, sizeof r->stk);
438 439 440 441
			r++;
		}
	}
}
442

Russ Cox's avatar
Russ Cox committed
443
func Stack(b Slice, all bool) (n int) {
444
	uintptr pc, sp;
445 446
	
	sp = runtime·getcallersp(&b);
447
	pc = (uintptr)runtime·getcallerpc(&b);
448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471

	if(all) {
		runtime·semacquire(&runtime·worldsema);
		m->gcing = 1;
		runtime·stoptheworld();
	}

	if(b.len == 0)
		n = 0;
	else{
		g->writebuf = (byte*)b.array;
		g->writenbuf = b.len;
		runtime·goroutineheader(g);
		runtime·traceback(pc, sp, 0, g);
		if(all)
			runtime·tracebackothers(g);
		n = b.len - g->writenbuf;
		g->writebuf = nil;
		g->writenbuf = 0;
	}
	
	if(all) {
		m->gcing = 0;
		runtime·semrelease(&runtime·worldsema);
472
		runtime·starttheworld();
473 474 475 476
	}
}

static void
477
saveg(uintptr pc, uintptr sp, G *gp, TRecord *r)
478 479 480
{
	int32 n;
	
481
	n = runtime·gentraceback((uintptr)pc, (uintptr)sp, 0, gp, 0, r->stk, nelem(r->stk), nil, nil);
482 483 484 485
	if(n < nelem(r->stk))
		r->stk[n] = 0;
}

Russ Cox's avatar
Russ Cox committed
486
func GoroutineProfile(b Slice) (n int, ok bool) {
487
	uintptr pc, sp;
488 489 490 491
	TRecord *r;
	G *gp;
	
	sp = runtime·getcallersp(&b);
492
	pc = (uintptr)runtime·getcallerpc(&b);
493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508
	
	ok = false;
	n = runtime·gcount();
	if(n <= b.len) {
		runtime·semacquire(&runtime·worldsema);
		m->gcing = 1;
		runtime·stoptheworld();

		n = runtime·gcount();
		if(n <= b.len) {
			ok = true;
			r = (TRecord*)b.array;
			saveg(pc, sp, g, r++);
			for(gp = runtime·allg; gp != nil; gp = gp->alllink) {
				if(gp == g || gp->status == Gdead)
					continue;
509
				saveg(gp->sched.pc, gp->sched.sp, gp, r++);
510 511 512 513 514
			}
		}
	
		m->gcing = 0;
		runtime·semrelease(&runtime·worldsema);
515
		runtime·starttheworld();
516 517 518
	}
}

519 520 521
void
runtime·mprofinit(void)
{
522
	addrhash = runtime·persistentalloc((1<<AddrHashBits)*sizeof *addrhash, 0);
523
}