hash/maphash: revise API to be more idiomatic

This CL makes these changes to the hash/maphash API to make it fit a bit more into the standard library: - Move some of the package doc onto type Hash, so that `go doc maphash.Hash` shows it. - Instead of having identical AddBytes and Write methods, standardize on Write, the usual name for this function. Similarly, AddString -> WriteString, AddByte -> WriteByte. - Instead of having identical Hash and Sum64 methods, standardize on Sum64 (for hash.Hash64). Dropping the "Hash" method also helps because Hash is usually reserved to mean the state of a hash function (hash.Hash etc), not the hash value itself. - Make an uninitialized hash.Hash auto-seed with a random seed. It is critical that users not use the same seed for all hash functions in their program, at least not accidentally. So the Hash implementation must either panic if uninitialized or initialize itself. Initializing itself is less work for users and can be done lazily. - Now that the zero hash.Hash is useful, drop maphash.New in favor of new(maphash.Hash) or simply declaring a maphash.Hash. - Add a [0]func()-typed field to the Hash so that Hashes cannot be compared. (I considered doing the same for Seed but comparing seeds seems OK.) - Drop the integer argument from MakeSeed, to match the original design in golang.org/issue/28322. There is no point to giving users control over the specific seed bits, since we want the interpretation of those bits to be different in every different process. The only thing users need is to be able to create a new random seed at each call. (Fixes a TODO in MakeSeed's public doc comment.) This API is new in Go 1.14, so these changes do not violate the compatibility promise. Fixes #35060. Fixes #35348. Change-Id: Ie6fecc441f3f5ef66388c6ead92e875c0871f805 Reviewed-on: https://go-review.googlesource.com/c/go/+/205069 Run-TryBot: Russ Cox <rsc@golang.org> Reviewed-by: Alan Donovan <adonovan@google.com> Reviewed-by: Keith Randall <khr@golang.org>

hash/maphash: revise API to be more idiomatic
This CL makes these changes to the hash/maphash API to make it fit a bit more into the standard library: - Move some of the package doc onto type Hash, so that `go doc maphash.Hash` shows it. - Instead of having identical AddBytes and Write methods, standardize on Write, the usual name for this function. Similarly, AddString -> WriteString, AddByte -> WriteByte. - Instead of having identical Hash and Sum64 methods, standardize on Sum64 (for hash.Hash64). Dropping the "Hash" method also helps because Hash is usually reserved to mean the state of a hash function (hash.Hash etc), not the hash value itself. - Make an uninitialized hash.Hash auto-seed with a random seed. It is critical that users not use the same seed for all hash functions in their program, at least not accidentally. So the Hash implementation must either panic if uninitialized or initialize itself. Initializing itself is less work for users and can be done lazily. - Now that the zero hash.Hash is useful, drop maphash.New in favor of new(maphash.Hash) or simply declaring a maphash.Hash. - Add a [0]func()-typed field to the Hash so that Hashes cannot be compared. (I considered doing the same for Seed but comparing seeds seems OK.) - Drop the integer argument from MakeSeed, to match the original design in golang.org/issue/28322. There is no point to giving users control over the specific seed bits, since we want the interpretation of those bits to be different in every different process. The only thing users need is to be able to create a new random seed at each call. (Fixes a TODO in MakeSeed's public doc comment.) This API is new in Go 1.14, so these changes do not violate the compatibility promise. Fixes #35060. Fixes #35348. Change-Id: Ie6fecc441f3f5ef66388c6ead92e875c0871f805 Reviewed-on: https://go-review.googlesource.com/c/go/+/205069 Run-TryBot: Russ Cox <rsc@golang.org> Reviewed-by: Alan Donovan <adonovan@google.com> Reviewed-by: Keith Randall <khr@golang.org>
5a7c571e · Russ Cox · 03aca99f · 5a7c571e · 5a7c571e · 5a7c571e
Commit 5a7c571e authored Nov 04, 2019 by Russ Cox
Showing with 168 additions and 125 deletions

src/hash/maphash/maphash.go src/hash/maphash/maphash.go +113 -82

src/hash/maphash/maphash_test.go src/hash/maphash/maphash_test.go +31 -23

src/hash/maphash/smhasher_test.go src/hash/maphash/smhasher_test.go +24 -20

No files found.
--- a/src/hash/maphash/maphash.go
+++ b/src/hash/maphash/maphash.go
--- a/src/hash/maphash/maphash_test.go
+++ b/src/hash/maphash/maphash_test.go
@@ -2,19 +2,18 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-package maphash_test
+package maphash

 import (
 	"hash"
-	"hash/maphash"
 	"testing"
 )

 func TestUnseededHash(t *testing.T) {
 	m := map[uint64]struct{}{}
 	for i := 0; i < 1000; i++ {
-		h := maphash.New()
-		m[h.Hash()] = struct{}{}
+		h := new(Hash)
+		m[h.Sum64()] = struct{}{}
 	}
 	if len(m) < 900 {
 		t.Errorf("empty hash not sufficiently random: got %d, want 1000", len(m))
@@ -22,12 +21,12 @@ func TestUnseededHash(t *testing.T) {
 }

 func TestSeededHash(t *testing.T) {
-	s := maphash.MakeSeed(1234)
+	s := MakeSeed()
 	m := map[uint64]struct{}{}
 	for i := 0; i < 1000; i++ {
-		h := maphash.New()
+		h := new(Hash)
 		h.SetSeed(s)
-		m[h.Hash()] = struct{}{}
+		m[h.Sum64()] = struct{}{}
 	}
 	if len(m) != 1 {
 		t.Errorf("seeded hash is random: got %d, want 1", len(m))
@@ -36,14 +35,17 @@ func TestSeededHash(t *testing.T) {

 func TestHashGrouping(t *testing.T) {
 	b := []byte("foo")
-	h1 := maphash.New()
-	h2 := maphash.New()
+	h1 := new(Hash)
+	h2 := new(Hash)
 	h2.SetSeed(h1.Seed())
-	h1.AddBytes(b)
+	h1.Write(b)
 	for _, x := range b {
-		h2.AddByte(x)
+		err := h2.WriteByte(x)
+		if err != nil {
+			t.Fatalf("WriteByte: %v", err)
+		}
 	}
-	if h1.Hash() != h2.Hash() {
+	if h1.Sum64() != h2.Sum64() {
 		t.Errorf("hash of \"foo\" and \"f\",\"o\",\"o\" not identical")
 	}
 }
@@ -51,13 +53,19 @@ func TestHashGrouping(t *testing.T) {
 func TestHashBytesVsString(t *testing.T) {
 	s := "foo"
 	b := []byte(s)
-	h1 := maphash.New()
-	h2 := maphash.New()
+	h1 := new(Hash)
+	h2 := new(Hash)
 	h2.SetSeed(h1.Seed())
-	h1.AddString(s)
-	h2.AddBytes(b)
-	if h1.Hash() != h2.Hash() {
-		t.Errorf("hash of string and byts not identical")
+	n1, err1 := h1.WriteString(s)
+	if n1 != len(s) || err1 != nil {
+		t.Fatalf("WriteString(s) = %d, %v, want %d, nil", n1, err1, len(s))
+	}
+	n2, err2 := h2.Write(b)
+	if n2 != len(b) || err2 != nil {
+		t.Fatalf("Write(b) = %d, %v, want %d, nil", n2, err2, len(b))
+	}
+	if h1.Sum64() != h2.Sum64() {
+		t.Errorf("hash of string and bytes not identical")
 	}
 }

@@ -66,9 +74,9 @@ func TestHashHighBytes(t *testing.T) {
 	const N = 10
 	m := map[uint64]struct{}{}
 	for i := 0; i < N; i++ {
-		h := maphash.New()
-		h.AddString("foo")
-		m[h.Hash()>>32] = struct{}{}
+		h := new(Hash)
+		h.WriteString("foo")
+		m[h.Sum64()>>32] = struct{}{}
 	}
 	if len(m) < N/2 {
 		t.Errorf("from %d seeds, wanted at least %d different hashes; got %d", N, N/2, len(m))
@@ -76,5 +84,5 @@ func TestHashHighBytes(t *testing.T) {
 }

 // Make sure a Hash implements the hash.Hash and hash.Hash64 interfaces.
-var _ hash.Hash = &maphash.Hash{}
-var _ hash.Hash64 = &maphash.Hash{}
+var _ hash.Hash = &Hash{}
+var _ hash.Hash64 = &Hash{}
--- a/src/hash/maphash/smhasher_test.go
+++ b/src/hash/maphash/smhasher_test.go
@@ -2,11 +2,10 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-package maphash_test
+package maphash

 import (
 	"fmt"
-	"hash/maphash"
 	"math"
 	"math/rand"
 	"runtime"
@@ -19,6 +18,8 @@ import (
 // https://code.google.com/p/smhasher/
 // This code is a port of some of the Smhasher tests to Go.

+var fixedSeed = MakeSeed()
+
 // Sanity checks.
 // hash should not depend on values outside key.
 // hash should not depend on alignment.
@@ -36,7 +37,7 @@ func TestSmhasherSanity(t *testing.T) {
 				randBytes(r, b[:])
 				randBytes(r, c[:])
 				copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n])
-				if bytesHash(b[PAD:PAD+n], 0) != bytesHash(c[PAD+i:PAD+i+n], 0) {
+				if bytesHash(b[PAD:PAD+n]) != bytesHash(c[PAD+i:PAD+i+n]) {
 					t.Errorf("hash depends on bytes outside key")
 				}
 			}
@@ -44,17 +45,17 @@ func TestSmhasherSanity(t *testing.T) {
 	}
 }

-func bytesHash(b []byte, seed uint64) uint64 {
-	h := maphash.New()
-	h.SetSeed(maphash.MakeSeed(seed))
-	h.AddBytes(b)
-	return h.Hash()
+func bytesHash(b []byte) uint64 {
+	var h Hash
+	h.SetSeed(fixedSeed)
+	h.Write(b)
+	return h.Sum64()
 }
-func stringHash(s string, seed uint64) uint64 {
-	h := maphash.New()
-	h.SetSeed(maphash.MakeSeed(seed))
-	h.AddString(s)
-	return h.Hash()
+func stringHash(s string) uint64 {
+	var h Hash
+	h.SetSeed(fixedSeed)
+	h.WriteString(s)
+	return h.Sum64()
 }

 const hashSize = 64
@@ -77,13 +78,16 @@ func (s *hashSet) add(h uint64) {
 	s.n++
 }
 func (s *hashSet) addS(x string) {
-	s.add(stringHash(x, 0))
+	s.add(stringHash(x))
 }
 func (s *hashSet) addB(x []byte) {
-	s.add(bytesHash(x, 0))
+	s.add(bytesHash(x))
 }
-func (s *hashSet) addS_seed(x string, seed uint64) {
-	s.add(stringHash(x, seed))
+func (s *hashSet) addS_seed(x string, seed Seed) {
+	var h Hash
+	h.SetSeed(seed)
+	h.WriteString(x)
+	s.add(h.Sum64())
 }
 func (s *hashSet) check(t *testing.T) {
 	const SLOP = 10.0
@@ -312,7 +316,7 @@ func (k *bytesKey) flipBit(i int) {
 	k.b[i>>3] ^= byte(1 << uint(i&7))
 }
 func (k *bytesKey) hash() uint64 {
-	return bytesHash(k.b, 0)
+	return bytesHash(k.b)
 }
 func (k *bytesKey) name() string {
 	return fmt.Sprintf("bytes%d", len(k.b))
@@ -458,8 +462,8 @@ func TestSmhasherSeed(t *testing.T) {
 	const N = 100000
 	s := "hello"
 	for i := 0; i < N; i++ {
-		h.addS_seed(s, uint64(i))
-		h.addS_seed(s, uint64(i)<<32) // make sure high bits are used
+		h.addS_seed(s, Seed{s: uint64(i + 1)})
+		h.addS_seed(s, Seed{s: uint64(i+1) << 32}) // make sure high bits are used
 	}
 	h.check(t)
 }