diff --git a/src/pkg/runtime/cgocall.c b/src/pkg/runtime/cgocall.c
index 6b2ae59ddc5214bb35078cc275d5770b48e12b3f..a6383feebdd0a765e5bd1411436483f70ea36c36 100644
--- a/src/pkg/runtime/cgocall.c
+++ b/src/pkg/runtime/cgocall.c
@@ -129,7 +129,6 @@ runtime路cgocall(void (*fn)(void*), void *arg)
 	d.link = g->defer;
 	d.argp = (void*)-1;  // unused because unlockm never recovers
 	d.special = true;
-	d.free = false;
 	g->defer = &d;
 
 	m->ncgo++;
@@ -285,7 +284,6 @@ runtime路cgocallbackg1(void)
 	d.link = g->defer;
 	d.argp = (void*)-1;  // unused because unwindm never recovers
 	d.special = true;
-	d.free = false;
 	g->defer = &d;
 
 	if(raceenabled && !m->racecall)
diff --git a/src/pkg/runtime/mgc0.c b/src/pkg/runtime/mgc0.c
index 6a1d625a75a1a2e8f7663a102b7334c8626a9ac2..b959c90ed80609175cd00606193fb4be9e2afe71 100644
--- a/src/pkg/runtime/mgc0.c
+++ b/src/pkg/runtime/mgc0.c
@@ -60,16 +60,25 @@ sync路runtime_registerPool(void **p)
 static void
 clearpools(void)
 {
-	void **p, **next;
+	void **pool, **next;
+	P *p, **pp;
+	int32 i;
 
-	for(p = pools.head; p != nil; p = next) {
-		next = p[0];
-		p[0] = nil; // next
-		p[1] = nil; // slice
-		p[2] = nil;
-		p[3] = nil;
+	// clear sync.Pool's
+	for(pool = pools.head; pool != nil; pool = next) {
+		next = pool[0];
+		pool[0] = nil; // next
+		pool[1] = nil; // slice
+		pool[2] = nil;
+		pool[3] = nil;
 	}
 	pools.head = nil;
+
+	// clear defer pools
+	for(pp=runtime路allp; p=*pp; pp++) {
+		for(i=0; i<nelem(p->deferpool); i++)
+			p->deferpool[i] = nil;
+	}
 }
 
 // Bits in per-word bitmap.
diff --git a/src/pkg/runtime/msize.c b/src/pkg/runtime/msize.c
index 50b372b6134a88eafa6e5f41bad06f2b8a0291e0..630cda2d43ed7145fd17984437a2a59554b2eb63 100644
--- a/src/pkg/runtime/msize.c
+++ b/src/pkg/runtime/msize.c
@@ -44,8 +44,10 @@ int32 runtime路class_to_allocnpages[NumSizeClasses];
 int8 runtime路size_to_class8[1024/8 + 1];
 int8 runtime路size_to_class128[(MaxSmallSize-1024)/128 + 1];
 
-static int32
-SizeToClass(int32 size)
+void runtime路testdefersizes(void);
+
+int32
+runtime路SizeToClass(int32 size)
 {
 	if(size > MaxSmallSize)
 		runtime路throw("SizeToClass - invalid size");
@@ -119,7 +121,7 @@ runtime路InitSizes(void)
 	// Double-check SizeToClass.
 	if(0) {
 		for(n=0; n < MaxSmallSize; n++) {
-			sizeclass = SizeToClass(n);
+			sizeclass = runtime路SizeToClass(n);
 			if(sizeclass < 1 || sizeclass >= NumSizeClasses || runtime路class_to_size[sizeclass] < n) {
 				runtime路printf("size=%d sizeclass=%d runtime路class_to_size=%d\n", n, sizeclass, runtime路class_to_size[sizeclass]);
 				runtime路printf("incorrect SizeToClass");
@@ -133,6 +135,8 @@ runtime路InitSizes(void)
 		}
 	}
 
+	runtime路testdefersizes();
+
 	// Copy out for statistics table.
 	for(i=0; i<nelem(runtime路class_to_size); i++)
 		mstats.by_size[i].size = runtime路class_to_size[i];
diff --git a/src/pkg/runtime/panic.c b/src/pkg/runtime/panic.c
index d85a8fefba7c9166d895bf5f5a5bd09646dcff94..7bd408aea8b64592c42c15bcbf8d9a1c730ab7e4 100644
--- a/src/pkg/runtime/panic.c
+++ b/src/pkg/runtime/panic.c
@@ -13,108 +13,63 @@
 uint32 runtime路panicking;
 static Lock paniclk;
 
-enum
-{
-	DeferChunkSize = 2048
-};
+// Each P holds pool for defers with arg sizes 8, 24, 40, 56 and 72 bytes.
+// Memory block is 40 (24 for 32 bits) bytes larger due to Defer header.
+// This maps exactly to malloc size classes.
+
+// defer size class for arg size sz
+#define DEFERCLASS(sz) (((sz)+7)>>4)
+// total size of memory block for defer with arg size sz
+#define TOTALSIZE(sz) (sizeof(Defer) - sizeof(((Defer*)nil)->args) + ROUND(sz, sizeof(uintptr)))
 
-// Allocate a Defer, usually as part of the larger frame of deferred functions.
-// Each defer must be released with both popdefer and freedefer.
+// Allocate a Defer, usually using per-P pool.
+// Each defer must be released with freedefer.
 static Defer*
 newdefer(int32 siz)
 {
-	int32 total;
-	DeferChunk *c;
+	int32 total, sc;
 	Defer *d;
-	
-	c = g->dchunk;
-	total = sizeof(*d) + ROUND(siz, sizeof(uintptr)) - sizeof(d->args);
-	if(c == nil || total > DeferChunkSize - c->off) {
-		if(total > DeferChunkSize / 2) {
-			// Not worth putting in any chunk.
-			// Allocate a separate block.
-			d = runtime路malloc(total);
-			d->siz = siz;
-			d->special = 1;
-			d->free = 1;
-			d->link = g->defer;
-			g->defer = d;
-			return d;
-		}
-
-		// Cannot fit in current chunk.
-		// Switch to next chunk, allocating if necessary.
-		c = g->dchunknext;
-		if(c == nil)
-			c = runtime路malloc(DeferChunkSize);
-		c->prev = g->dchunk;
-		c->off = sizeof(*c);
-		g->dchunk = c;
-		g->dchunknext = nil;
+	P *p;
+
+	d = nil;
+	sc = DEFERCLASS(siz);
+	if(sc < nelem(p->deferpool)) {
+		p = m->p;
+		d = p->deferpool[sc];
+		if(d)
+			p->deferpool[sc] = d->link;
+	}
+	if(d == nil) {
+		// deferpool is empty or just a big defer
+		total = TOTALSIZE(siz);
+		d = runtime路malloc(total);
 	}
-
-	d = (Defer*)((byte*)c + c->off);
-	c->off += total;
 	d->siz = siz;
 	d->special = 0;
-	d->free = 0;
 	d->link = g->defer;
 	g->defer = d;
-	return d;	
-}
-
-// Pop the current defer from the defer stack.
-// Its contents are still valid until the goroutine begins executing again.
-// In particular it is safe to call reflect.call(d->fn, d->argp, d->siz) after
-// popdefer returns.
-static void
-popdefer(void)
-{
-	Defer *d;
-	DeferChunk *c;
-	int32 total;
-	
-	d = g->defer;
-	if(d == nil)
-		runtime路throw("runtime: popdefer nil");
-	g->defer = d->link;
-	if(d->special) {
-		// Nothing else to do.
-		return;
-	}
-	total = sizeof(*d) + ROUND(d->siz, sizeof(uintptr)) - sizeof(d->args);
-	c = g->dchunk;
-	if(c == nil || (byte*)d+total != (byte*)c+c->off)
-		runtime路throw("runtime: popdefer phase error");
-	c->off -= total;
-	if(c->off == sizeof(*c)) {
-		// Chunk now empty, so pop from stack.
-		// Save in dchunknext both to help with pingponging between frames
-		// and to make sure d is still valid on return.
-		if(g->dchunknext != nil)
-			runtime路free(g->dchunknext);
-		g->dchunknext = c;
-		g->dchunk = c->prev;
-	}
+	return d;
 }
 
 // Free the given defer.
-// For defers in the per-goroutine chunk this just clears the saved arguments.
-// For large defers allocated on the heap, this frees them.
 // The defer cannot be used after this call.
 static void
 freedefer(Defer *d)
 {
-	int32 total;
+	int32 sc;
+	P *p;
 
-	if(d->special) {
-		if(d->free)
-			runtime路free(d);
-	} else {
-		// Wipe out any possible pointers in argp/pc/fn/args.
-		total = sizeof(*d) + ROUND(d->siz, sizeof(uintptr)) - sizeof(d->args);
-		runtime路memclr((byte*)d, total);
-	}
+	if(d->special)
+		return;
+	sc = DEFERCLASS(d->siz);
+	if(sc < nelem(p->deferpool)) {
+		p = m->p;
+		d->link = p->deferpool[sc];
+		p->deferpool[sc] = d;
+		// No need to wipe out pointers in argp/pc/fn/args,
+		// because we empty the pool before GC.
+	} else
+		runtime路free(d);
 }
 
 // Create a new deferred function fn with siz bytes of arguments.
@@ -182,7 +137,7 @@ runtime路deferreturn(uintptr arg0)
 	m->locks++;
 	runtime路memmove(argp, d->args, d->siz);
 	fn = d->fn;
-	popdefer();
+	g->defer = d->link;
 	freedefer(d);
 	m->locks--;
 	if(m->locks == 0 && g->preempt)
@@ -190,6 +145,37 @@ runtime路deferreturn(uintptr arg0)
 	runtime路jmpdefer(fn, argp);
 }
 
+// Ensure that defer arg sizes that map to the same defer size class
+// also map to the same malloc size class.
+void
+runtime路testdefersizes(void)
+{
+	P *p;
+	int32 i, siz, defersc, mallocsc;
+	int32 map[nelem(p->deferpool)];
+
+	for(i=0; i<nelem(p->deferpool); i++)
+		map[i] = -1;
+	for(i=0;; i++) {
+		defersc = DEFERCLASS(i);
+		if(defersc >= nelem(p->deferpool))
+			break;
+		siz = TOTALSIZE(i);
+		mallocsc = runtime路SizeToClass(siz);
+		siz = runtime路class_to_size[mallocsc];
+		// runtime路printf("defer class %d: arg size %d, block size %d(%d)\n", defersc, i, siz, mallocsc);
+		if(map[defersc] < 0) {
+			map[defersc] = mallocsc;
+			continue;
+		}
+		if(map[defersc] != mallocsc) {
+			runtime路printf("bad defer size class: i=%d siz=%d mallocsc=%d/%d\n",
+				i, siz, map[defersc], mallocsc);
+			runtime路throw("bad defer size class");
+		}
+	}
+}
+
 // Run all deferred functions for the current goroutine.
 static void
 rundefer(void)
@@ -197,7 +183,7 @@ rundefer(void)
 	Defer *d;
 
 	while((d = g->defer) != nil) {
-		popdefer();
+		g->defer = d->link;
 		reflect路call(d->fn, (byte*)d->args, d->siz);
 		freedefer(d);
 	}
@@ -239,7 +225,7 @@ runtime路panic(Eface e)
 		if(d == nil)
 			break;
 		// take defer off list in case of recursive panic
-		popdefer();
+		g->defer = d->link;
 		g->ispanic = true;	// rock for newstack, where reflect.newstackcall ends up
 		argp = d->argp;
 		pc = d->pc;
diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c
index 693cacaa5885169385136c90f431fb5b28974783..47012ae550d2d7471dae78bb2b9b47aa128ff344 100644
--- a/src/pkg/runtime/proc.c
+++ b/src/pkg/runtime/proc.c
@@ -204,7 +204,6 @@ runtime路main(void)
 	d.link = g->defer;
 	d.argp = (void*)-1;
 	d.special = true;
-	d.free = false;
 	g->defer = &d;
 
 	if(m != &runtime路m0)
diff --git a/src/pkg/runtime/runtime.h b/src/pkg/runtime/runtime.h
index 1945938402941325dd483ad46972bdfbbc4b1df7..578f2574a930957902c425e2878c198cfc89a5e4 100644
--- a/src/pkg/runtime/runtime.h
+++ b/src/pkg/runtime/runtime.h
@@ -70,7 +70,6 @@ typedef	struct	PtrType		PtrType;
 typedef	struct	ChanType		ChanType;
 typedef	struct	MapType		MapType;
 typedef	struct	Defer		Defer;
-typedef	struct	DeferChunk	DeferChunk;
 typedef	struct	Panic		Panic;
 typedef	struct	Hmap		Hmap;
 typedef	struct	Hchan		Hchan;
@@ -281,8 +280,6 @@ struct	G
 	int32	sig;
 	int32	writenbuf;
 	byte*	writebuf;
-	DeferChunk*	dchunk;
-	DeferChunk*	dchunknext;
 	uintptr	sigcode0;
 	uintptr	sigcode1;
 	uintptr	sigpc;
@@ -387,6 +384,7 @@ struct P
 	uint32	syscalltick;	// incremented on every system call
 	M*	m;		// back-link to associated M (nil if idle)
 	MCache*	mcache;
+	Defer*	deferpool[5];	// pool of available Defer structs of different sizes (see panic.c)
 
 	// Queue of runnable goroutines.
 	uint32	runqhead;
@@ -676,7 +674,6 @@ struct Defer
 {
 	int32	siz;
 	bool	special;	// not part of defer frame
-	bool	free;		// if special, free when done
 	byte*	argp;		// where args were copied from
 	byte*	pc;
 	FuncVal*	fn;
@@ -684,12 +681,6 @@ struct Defer
 	void*	args[1];	// padded to actual size
 };
 
-struct DeferChunk
-{
-	DeferChunk	*prev;
-	uintptr	off;
-};
-
 /*
  * panics
  */