runtime: fix bug in maps at the intersection of iterators, growing, and NaN keys

If an iterator is started while a map is in the middle of a grow, and the map has NaN keys, then those keys might get returned by the iterator more than once. This fix makes the evacuation decision deterministic and repeatable for NaN keys so each one gets returned only once. R=golang-dev, r, khr, iant CC=golang-dev https://golang.org/cl/14367043

runtime: fix bug in maps at the intersection of iterators, growing, and NaN keys
If an iterator is started while a map is in the middle of a grow, and the map has NaN keys, then those keys might get returned by the iterator more than once. This fix makes the evacuation decision deterministic and repeatable for NaN keys so each one gets returned only once. R=golang-dev, r, khr, iant CC=golang-dev https://golang.org/cl/14367043
869368a5 · Keith Randall · 15baf6b4 · 869368a5 · 869368a5 · 869368a5
Commit 869368a5 authored Oct 04, 2013 by Keith Randall
Showing with 91 additions and 15 deletions

src/pkg/runtime/export_test.go src/pkg/runtime/export_test.go +3 -0

src/pkg/runtime/hashmap.c src/pkg/runtime/hashmap.c +50 -15

src/pkg/runtime/map_test.go src/pkg/runtime/map_test.go +38 -0

No files found.
--- a/src/pkg/runtime/export_test.go
+++ b/src/pkg/runtime/export_test.go
@@ -81,3 +81,6 @@ var Int32Hash = int32Hash
 var Int64Hash = int64Hash

 func GogoBytes() int32
+
+var hashLoad float64 // declared in hashmap.c
+var HashLoad = &hashLoad
--- a/src/pkg/runtime/hashmap.c
+++ b/src/pkg/runtime/hashmap.c
@@ -288,6 +288,8 @@ evacuate(MapType *t, Hmap *h, uintptr oldbucket)
 	uintptr i;
 	byte *k, *v;
 	byte *xk, *yk, *xv, *yv;
+	uint8 top;
+	bool eq;

 	b = (Bucket*)(h->oldbuckets + oldbucket * h->bucketsize);
 	newbit = (uintptr)1 << (h->B - 1);
@@ -306,13 +308,38 @@ evacuate(MapType *t, Hmap *h, uintptr oldbucket)
 		yv = yk + h->keysize * BUCKETSIZE;
 		do {
 			for(i = 0, k = b->data, v = k + h->keysize * BUCKETSIZE; i < BUCKETSIZE; i++, k += h->keysize, v += h->valuesize) {
-				if(b->tophash[i] == 0)
+				top = b->tophash[i];
+				if(top == 0)
 					continue;
+
+				// Compute hash to make our evacuation decision (whether we need
+				// to send this key/value to bucket x or bucket y).
 				hash = h->hash0;
 				t->key->alg->hash(&hash, t->key->size, IK(h, k));
-				// NOTE: if key != key, then this hash could be (and probably will be)
-				// entirely different from the old hash.  We effectively only update
-				// the B'th bit of the hash in this case.
+				if((h->flags & Iterator) != 0) {
+					t->key->alg->equal(&eq, t->key->size, IK(h, k), IK(h, k));
+					if(!eq) {
+						// If key != key (NaNs), then the hash could be (and probably
+						// will be) entirely different from the old hash.  Moreover,
+						// it isn't reproducible.  Reproducibility is required in the
+						// presence of iterators, as our evacuation decision must
+						// match whatever decision the iterator made.
+						// Fortunately, we have the freedom to send these keys either
+						// way.  Also, tophash is meaningless for these kinds of keys.
+						// We let the low bit of tophash drive the evacuation decision.
+						// We recompute a new random tophash for the next level so
+						// these keys will get evenly distributed across all buckets
+						// after multiple grows.
+						if((top & 1) != 0)
+							hash |= newbit;
+						else
+							hash &= ~newbit;
+						top = hash >> (8*sizeof(uintptr)-8);
+						if(top == 0)
+							top = 1;
+					}
+				}
+
 				if((hash & newbit) == 0) {
 					if(xi == BUCKETSIZE) {
 						if(checkgc) mstats.next_gc = mstats.heap_alloc;
@@ -323,7 +350,7 @@ evacuate(MapType *t, Hmap *h, uintptr oldbucket)
 						xk = x->data;
 						xv = xk + h->keysize * BUCKETSIZE;
 					}
-					x->tophash[xi] = b->tophash[i];
+					x->tophash[xi] = top;
 					if((h->flags & IndirectKey) != 0) {
 						*(byte**)xk = *(byte**)k;               // copy pointer
 					} else {
@@ -347,7 +374,7 @@ evacuate(MapType *t, Hmap *h, uintptr oldbucket)
 						yk = y->data;
 						yv = yk + h->keysize * BUCKETSIZE;
 					}
-					y->tophash[yi] = b->tophash[i];
+					y->tophash[yi] = top;
 					if((h->flags & IndirectKey) != 0) {
 						*(byte**)yk = *(byte**)k;
 					} else {
@@ -838,18 +865,12 @@ next:
 			if(check_bucket >= 0) {
 				// Special case: iterator was started during a grow and the
 				// grow is not done yet.  We're working on a bucket whose
-				// oldbucket has not been evacuated yet.  So we iterate
+				// oldbucket has not been evacuated yet.  So we're iterating
 				// through the oldbucket, skipping any keys that will go
 				// to the other new bucket (each oldbucket expands to two
 				// buckets during a grow).
 				t->key->alg->equal(&eq, t->key->size, IK(h, k), IK(h, k));
-				if(!eq) {
-					// Hash is meaningless if k != k (NaNs).  Return all
-					// NaNs during the first of the two new buckets.
-					if(bucket >= ((uintptr)1 << (it->B - 1))) {
-						continue;
-					}
-				} else {
+				if(eq) {
 					// If the item in the oldbucket is not destined for
 					// the current new bucket in the iteration, skip it.
 					hash = h->hash0;
@@ -857,6 +878,14 @@ next:
 					if((hash & (((uintptr)1 << it->B) - 1)) != check_bucket) {
 						continue;
 					}
+				} else {
+					// Hash isn't repeatable if k != k (NaNs).  We need a
+					// repeatable and randomish choice of which direction
+					// to send NaNs during evacuation.  We'll use the low
+					// bit of tophash to decide which way NaNs go.
+					if(check_bucket >> (it->B - 1) != (b->tophash[i] & 1)) {
+						continue;
+					}
 				}
 			}
 			if(!evacuated(b)) {
@@ -1091,7 +1120,10 @@ runtime·mapassign(MapType *t, Hmap *h, byte *ak, byte *av)
 		runtime·prints("; key=");
 		t->key->alg->print(t->key->size, ak);
 		runtime·prints("; val=");
-		t->elem->alg->print(t->elem->size, av);
+		if(av)
+			t->elem->alg->print(t->elem->size, av);
+		else
+			runtime·prints("nil");
 		runtime·prints("\n");
 	}
 }
@@ -1330,3 +1362,6 @@ runtime·mapiter2(struct hash_iter *it, ...)
 		runtime·prints("\n");
 	}
 }
+
+// exported value for testing
+float64 runtime·hashLoad = LOAD;
--- a/src/pkg/runtime/map_test.go
+++ b/src/pkg/runtime/map_test.go
@@ -371,3 +371,41 @@ func testMapLookups(t *testing.T, m map[string]string) {
 		}
 	}
 }
+
+// Tests whether the iterator returns the right elements when
+// started in the middle of a grow, when the keys are NaNs.
+func TestMapNanGrowIterator(t *testing.T) {
+	m := make(map[float64]int)
+	nan := math.NaN()
+	const nBuckets = 16
+	// To fill nBuckets buckets takes LOAD * nBuckets keys.
+	nKeys := int(nBuckets * *runtime.HashLoad)
+
+	// Get map to full point with nan keys.
+	for i := 0; i < nKeys; i++ {
+		m[nan] = i
+	}
+	// Trigger grow
+	m[1.0] = 1
+	delete(m, 1.0)
+
+	// Run iterator
+	found := make(map[int]struct{})
+	for _, v := range m {
+		if v != -1 {
+			if _, repeat := found[v]; repeat {
+				t.Fatalf("repeat of value %d", v)
+			}
+			found[v] = struct{}{}
+		}
+		if len(found) == nKeys/2 {
+			// Halfway through iteration, finish grow.
+			for i := 0; i < nBuckets; i++ {
+				delete(m, 1.0)
+			}
+		}
+	}
+	if len(found) != nKeys {
+		t.Fatalf("missing value")
+	}
+}