Commit 5a7c571e authored by Russ Cox's avatar Russ Cox

hash/maphash: revise API to be more idiomatic

This CL makes these changes to the hash/maphash API to make it fit a bit
more into the standard library:

 - Move some of the package doc onto type Hash, so that `go doc maphash.Hash` shows it.

 - Instead of having identical AddBytes and Write methods,
   standardize on Write, the usual name for this function.
   Similarly, AddString -> WriteString, AddByte -> WriteByte.

 - Instead of having identical Hash and Sum64 methods,
   standardize on Sum64 (for hash.Hash64). Dropping the "Hash" method
   also helps because Hash is usually reserved to mean the state of a
   hash function (hash.Hash etc), not the hash value itself.

 - Make an uninitialized hash.Hash auto-seed with a random seed.
   It is critical that users not use the same seed for all hash functions
   in their program, at least not accidentally. So the Hash implementation
   must either panic if uninitialized or initialize itself.
   Initializing itself is less work for users and can be done lazily.

 - Now that the zero hash.Hash is useful, drop maphash.New in favor of
   new(maphash.Hash) or simply declaring a maphash.Hash.

 - Add a [0]func()-typed field to the Hash so that Hashes cannot be compared.
   (I considered doing the same for Seed but comparing seeds seems OK.)

 - Drop the integer argument from MakeSeed, to match the original design
   in golang.org/issue/28322. There is no point to giving users control
   over the specific seed bits, since we want the interpretation of those
   bits to be different in every different process. The only thing users
   need is to be able to create a new random seed at each call.
   (Fixes a TODO in MakeSeed's public doc comment.)

This API is new in Go 1.14, so these changes do not violate the compatibility promise.

Fixes #35060.
Fixes #35348.

Change-Id: Ie6fecc441f3f5ef66388c6ead92e875c0871f805
Reviewed-on: https://go-review.googlesource.com/c/go/+/205069
Run-TryBot: Russ Cox <rsc@golang.org>
Reviewed-by: default avatarAlan Donovan <adonovan@google.com>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 03aca99f
This diff is collapsed.
......@@ -2,19 +2,18 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package maphash_test
package maphash
import (
"hash"
"hash/maphash"
"testing"
)
func TestUnseededHash(t *testing.T) {
m := map[uint64]struct{}{}
for i := 0; i < 1000; i++ {
h := maphash.New()
m[h.Hash()] = struct{}{}
h := new(Hash)
m[h.Sum64()] = struct{}{}
}
if len(m) < 900 {
t.Errorf("empty hash not sufficiently random: got %d, want 1000", len(m))
......@@ -22,12 +21,12 @@ func TestUnseededHash(t *testing.T) {
}
func TestSeededHash(t *testing.T) {
s := maphash.MakeSeed(1234)
s := MakeSeed()
m := map[uint64]struct{}{}
for i := 0; i < 1000; i++ {
h := maphash.New()
h := new(Hash)
h.SetSeed(s)
m[h.Hash()] = struct{}{}
m[h.Sum64()] = struct{}{}
}
if len(m) != 1 {
t.Errorf("seeded hash is random: got %d, want 1", len(m))
......@@ -36,14 +35,17 @@ func TestSeededHash(t *testing.T) {
func TestHashGrouping(t *testing.T) {
b := []byte("foo")
h1 := maphash.New()
h2 := maphash.New()
h1 := new(Hash)
h2 := new(Hash)
h2.SetSeed(h1.Seed())
h1.AddBytes(b)
h1.Write(b)
for _, x := range b {
h2.AddByte(x)
err := h2.WriteByte(x)
if err != nil {
t.Fatalf("WriteByte: %v", err)
}
}
if h1.Hash() != h2.Hash() {
if h1.Sum64() != h2.Sum64() {
t.Errorf("hash of \"foo\" and \"f\",\"o\",\"o\" not identical")
}
}
......@@ -51,13 +53,19 @@ func TestHashGrouping(t *testing.T) {
func TestHashBytesVsString(t *testing.T) {
s := "foo"
b := []byte(s)
h1 := maphash.New()
h2 := maphash.New()
h1 := new(Hash)
h2 := new(Hash)
h2.SetSeed(h1.Seed())
h1.AddString(s)
h2.AddBytes(b)
if h1.Hash() != h2.Hash() {
t.Errorf("hash of string and byts not identical")
n1, err1 := h1.WriteString(s)
if n1 != len(s) || err1 != nil {
t.Fatalf("WriteString(s) = %d, %v, want %d, nil", n1, err1, len(s))
}
n2, err2 := h2.Write(b)
if n2 != len(b) || err2 != nil {
t.Fatalf("Write(b) = %d, %v, want %d, nil", n2, err2, len(b))
}
if h1.Sum64() != h2.Sum64() {
t.Errorf("hash of string and bytes not identical")
}
}
......@@ -66,9 +74,9 @@ func TestHashHighBytes(t *testing.T) {
const N = 10
m := map[uint64]struct{}{}
for i := 0; i < N; i++ {
h := maphash.New()
h.AddString("foo")
m[h.Hash()>>32] = struct{}{}
h := new(Hash)
h.WriteString("foo")
m[h.Sum64()>>32] = struct{}{}
}
if len(m) < N/2 {
t.Errorf("from %d seeds, wanted at least %d different hashes; got %d", N, N/2, len(m))
......@@ -76,5 +84,5 @@ func TestHashHighBytes(t *testing.T) {
}
// Make sure a Hash implements the hash.Hash and hash.Hash64 interfaces.
var _ hash.Hash = &maphash.Hash{}
var _ hash.Hash64 = &maphash.Hash{}
var _ hash.Hash = &Hash{}
var _ hash.Hash64 = &Hash{}
......@@ -2,11 +2,10 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package maphash_test
package maphash
import (
"fmt"
"hash/maphash"
"math"
"math/rand"
"runtime"
......@@ -19,6 +18,8 @@ import (
// https://code.google.com/p/smhasher/
// This code is a port of some of the Smhasher tests to Go.
var fixedSeed = MakeSeed()
// Sanity checks.
// hash should not depend on values outside key.
// hash should not depend on alignment.
......@@ -36,7 +37,7 @@ func TestSmhasherSanity(t *testing.T) {
randBytes(r, b[:])
randBytes(r, c[:])
copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n])
if bytesHash(b[PAD:PAD+n], 0) != bytesHash(c[PAD+i:PAD+i+n], 0) {
if bytesHash(b[PAD:PAD+n]) != bytesHash(c[PAD+i:PAD+i+n]) {
t.Errorf("hash depends on bytes outside key")
}
}
......@@ -44,17 +45,17 @@ func TestSmhasherSanity(t *testing.T) {
}
}
func bytesHash(b []byte, seed uint64) uint64 {
h := maphash.New()
h.SetSeed(maphash.MakeSeed(seed))
h.AddBytes(b)
return h.Hash()
func bytesHash(b []byte) uint64 {
var h Hash
h.SetSeed(fixedSeed)
h.Write(b)
return h.Sum64()
}
func stringHash(s string, seed uint64) uint64 {
h := maphash.New()
h.SetSeed(maphash.MakeSeed(seed))
h.AddString(s)
return h.Hash()
func stringHash(s string) uint64 {
var h Hash
h.SetSeed(fixedSeed)
h.WriteString(s)
return h.Sum64()
}
const hashSize = 64
......@@ -77,13 +78,16 @@ func (s *hashSet) add(h uint64) {
s.n++
}
func (s *hashSet) addS(x string) {
s.add(stringHash(x, 0))
s.add(stringHash(x))
}
func (s *hashSet) addB(x []byte) {
s.add(bytesHash(x, 0))
s.add(bytesHash(x))
}
func (s *hashSet) addS_seed(x string, seed uint64) {
s.add(stringHash(x, seed))
func (s *hashSet) addS_seed(x string, seed Seed) {
var h Hash
h.SetSeed(seed)
h.WriteString(x)
s.add(h.Sum64())
}
func (s *hashSet) check(t *testing.T) {
const SLOP = 10.0
......@@ -312,7 +316,7 @@ func (k *bytesKey) flipBit(i int) {
k.b[i>>3] ^= byte(1 << uint(i&7))
}
func (k *bytesKey) hash() uint64 {
return bytesHash(k.b, 0)
return bytesHash(k.b)
}
func (k *bytesKey) name() string {
return fmt.Sprintf("bytes%d", len(k.b))
......@@ -458,8 +462,8 @@ func TestSmhasherSeed(t *testing.T) {
const N = 100000
s := "hello"
for i := 0; i < N; i++ {
h.addS_seed(s, uint64(i))
h.addS_seed(s, uint64(i)<<32) // make sure high bits are used
h.addS_seed(s, Seed{s: uint64(i + 1)})
h.addS_seed(s, Seed{s: uint64(i+1) << 32}) // make sure high bits are used
}
h.check(t)
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment