Commit bf36219c authored by Keith Randall's avatar Keith Randall Committed by Keith Randall

bytes/hash: add hashing package for bytes and strings

Fixes #28322

R=go1.14

RELNOTE=yes

Change-Id: Ic29f8b587c8c77472260836a5c3e13edaded13fa
Reviewed-on: https://go-review.googlesource.com/c/go/+/186877Reviewed-by: default avatarAlan Donovan <adonovan@google.com>
parent fbfb41e6
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package bytes/hash provides hash functions on byte sequences. These
// hash functions are intended to be used to implement hash tables or
// other data structures that need to map arbitrary strings or byte
// sequences to a uniform distribution of integers. The hash functions
// are collision-resistant but are not cryptographically secure (use
// one of the hash functions in crypto/* if you need that).
//
// The produced hashes depend only on the sequence of bytes provided
// to the Hash object, not on the way in which they are provided. For
// example, the calls
// h.AddString("foo")
// h.AddBytes([]byte{'f','o','o'})
// h.AddByte('f'); h.AddByte('o'); h.AddByte('o')
// will all have the same effect.
//
// Two Hash instances in the same process using the same seed
// behave identically.
//
// Two Hash instances with the same seed in different processes are
// not guaranteed to behave identically, even if the processes share
// the same binary.
//
// Hashes are intended to be collision-resistant, even for situations
// where an adversary controls the byte sequences being hashed.
// All bits of the Hash result are close to uniformly and
// independently distributed, so can be safely restricted to a range
// using bit masking, shifting, or modular arithmetic.
package hash
import (
"unsafe"
)
// A Seed controls the behavior of a Hash. Two Hash objects with the
// same seed in the same process will behave identically. Two Hash
// objects with different seeds will very likely behave differently.
type Seed struct {
s uint64
}
// A Hash object is used to compute the hash of a byte sequence.
type Hash struct {
seed Seed // initial seed used for this hash
state Seed // current hash of all flushed bytes
buf [64]byte // unflushed byte buffer
n int // number of unflushed bytes
}
// AddByte adds b to the sequence of bytes hashed by h.
func (h *Hash) AddByte(b byte) {
if h.n == len(h.buf) {
h.flush()
}
h.buf[h.n] = b
h.n++
}
// AddBytes adds b to the sequence of bytes hashed by h.
func (h *Hash) AddBytes(b []byte) {
for h.n+len(b) > len(h.buf) {
k := copy(h.buf[h.n:], b)
h.n = len(h.buf)
b = b[k:]
h.flush()
}
h.n += copy(h.buf[h.n:], b)
}
// AddString adds the bytes of s to the sequence of bytes hashed by h.
func (h *Hash) AddString(s string) {
for h.n+len(s) > len(h.buf) {
k := copy(h.buf[h.n:], s)
h.n = len(h.buf)
s = s[k:]
h.flush()
}
h.n += copy(h.buf[h.n:], s)
}
// Seed returns the seed value specified in the most recent call to
// SetSeed, or the initial seed if SetSeed was never called.
func (h *Hash) Seed() Seed {
return h.seed
}
// SetSeed sets the seed used by h. Two Hash objects with the same
// seed in the same process will behave identically. Two Hash objects
// with different seeds will very likely behave differently. Any
// bytes added to h previous to this call will be discarded.
func (h *Hash) SetSeed(seed Seed) {
h.seed = seed
h.state = seed
h.n = 0
}
// Reset discards all bytes added to h.
// (The seed remains the same.)
func (h *Hash) Reset() {
h.state = h.seed
h.n = 0
}
// precondition: buffer is full.
func (h *Hash) flush() {
if h.n != len(h.buf) {
panic("flush of partially full buffer")
}
h.state.s = rthash(h.buf[:], h.state.s)
h.n = 0
}
// Hash returns a value which depends on h's seed and the sequence of
// bytes added to h (since the last call to Reset or SetSeed).
func (h *Hash) Hash() uint64 {
return rthash(h.buf[:h.n], h.state.s)
}
// MakeSeed returns a Seed initialized using the bits in s.
// Two seeds generated with the same s are guaranteed to be equal.
// Two seeds generated with different s are very likely to be different.
// TODO: disallow this? See Alan's comment in the issue.
func MakeSeed(s uint64) Seed {
return Seed{s: s}
}
// New returns a new Hash object. Different hash objects allocated by
// this function will very likely have different seeds.
func New() *Hash {
seed := Seed{s: uint64(runtime_fastrand())}
return &Hash{
seed: seed,
state: seed,
}
}
//go:linkname runtime_fastrand runtime.fastrand
func runtime_fastrand() uint32
func rthash(b []byte, seed uint64) uint64 {
if len(b) == 0 {
return seed
}
// The runtime hasher only works on uintptr. For 64-bit
// architectures, we use the hasher directly. Otherwise,
// we use two parallel hashers on the lower and upper 32 bits.
if unsafe.Sizeof(uintptr(0)) == 8 {
return uint64(runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed), uintptr(len(b))))
}
lo := runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed), uintptr(len(b)))
hi := runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed>>32), uintptr(len(b)))
// TODO: mix lo/hi? Get 64 bits some other way?
return uint64(hi)<<32 | uint64(lo)
}
//go:linkname runtime_memhash runtime.memhash
func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr
// Wrapper functions so that a bytes/hash.Hash implements
// the hash.Hash and hash.Hash64 interfaces.
func (h *Hash) Write(b []byte) (int, error) {
h.AddBytes(b)
return len(b), nil
}
func (h *Hash) Sum(b []byte) []byte {
x := h.Hash()
return append(b,
byte(x>>0),
byte(x>>8),
byte(x>>16),
byte(x>>24),
byte(x>>32),
byte(x>>40),
byte(x>>48),
byte(x>>56))
}
func (h *Hash) Sum64() uint64 {
return h.Hash()
}
func (h *Hash) Size() int { return 8 }
func (h *Hash) BlockSize() int { return len(h.buf) }
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package hash_test
import (
"bytes/hash"
basehash "hash"
"testing"
)
func TestUnseededHash(t *testing.T) {
m := map[uint64]struct{}{}
for i := 0; i < 1000; i++ {
h := hash.New()
m[h.Hash()] = struct{}{}
}
if len(m) < 900 {
t.Errorf("empty hash not sufficiently random: got %d, want 1000", len(m))
}
}
func TestSeededHash(t *testing.T) {
s := hash.MakeSeed(1234)
m := map[uint64]struct{}{}
for i := 0; i < 1000; i++ {
h := hash.New()
h.SetSeed(s)
m[h.Hash()] = struct{}{}
}
if len(m) != 1 {
t.Errorf("seeded hash is random: got %d, want 1", len(m))
}
}
func TestHashGrouping(t *testing.T) {
b := []byte("foo")
h1 := hash.New()
h2 := hash.New()
h2.SetSeed(h1.Seed())
h1.AddBytes(b)
for _, x := range b {
h2.AddByte(x)
}
if h1.Hash() != h2.Hash() {
t.Errorf("hash of \"foo\" and \"f\",\"o\",\"o\" not identical")
}
}
func TestHashBytesVsString(t *testing.T) {
s := "foo"
b := []byte(s)
h1 := hash.New()
h2 := hash.New()
h2.SetSeed(h1.Seed())
h1.AddString(s)
h2.AddBytes(b)
if h1.Hash() != h2.Hash() {
t.Errorf("hash of string and byts not identical")
}
}
// Make sure a Hash implements the hash.Hash and hash.Hash64 interfaces.
var _ basehash.Hash = &hash.Hash{}
var _ basehash.Hash64 = &hash.Hash{}
This diff is collapsed.
......@@ -84,16 +84,18 @@ var pkgDeps = map[string][]string{
},
// L2 adds Unicode and strings processing.
"bufio": {"L0", "unicode/utf8", "bytes"},
"bytes": {"L0", "unicode", "unicode/utf8"},
"path": {"L0", "unicode/utf8", "strings"},
"strings": {"L0", "unicode", "unicode/utf8"},
"unicode": {},
"bufio": {"L0", "unicode/utf8", "bytes"},
"bytes": {"L0", "unicode", "unicode/utf8"},
"bytes/hash": {"L0"},
"path": {"L0", "unicode/utf8", "strings"},
"strings": {"L0", "unicode", "unicode/utf8"},
"unicode": {},
"L2": {
"L1",
"bufio",
"bytes",
"bytes/hash",
"path",
"strings",
"unicode",
......@@ -242,51 +244,51 @@ var pkgDeps = map[string][]string{
"go/types": {"L4", "GOPARSER", "container/heap", "go/constant"},
// One of a kind.
"archive/tar": {"L4", "OS", "syscall", "os/user"},
"archive/zip": {"L4", "OS", "compress/flate"},
"container/heap": {"sort"},
"compress/bzip2": {"L4"},
"compress/flate": {"L4"},
"compress/gzip": {"L4", "compress/flate"},
"compress/lzw": {"L4"},
"compress/zlib": {"L4", "compress/flate"},
"context": {"errors", "internal/reflectlite", "sync", "time"},
"database/sql": {"L4", "container/list", "context", "database/sql/driver", "database/sql/internal"},
"database/sql/driver": {"L4", "context", "time", "database/sql/internal"},
"debug/dwarf": {"L4"},
"debug/elf": {"L4", "OS", "debug/dwarf", "compress/zlib"},
"debug/gosym": {"L4"},
"debug/macho": {"L4", "OS", "debug/dwarf", "compress/zlib"},
"debug/pe": {"L4", "OS", "debug/dwarf", "compress/zlib"},
"debug/plan9obj": {"L4", "OS"},
"encoding": {"L4"},
"encoding/ascii85": {"L4"},
"encoding/asn1": {"L4", "math/big"},
"encoding/csv": {"L4"},
"encoding/gob": {"L4", "OS", "encoding"},
"encoding/hex": {"L4"},
"encoding/json": {"L4", "encoding"},
"encoding/pem": {"L4"},
"encoding/xml": {"L4", "encoding"},
"flag": {"L4", "OS"},
"go/build": {"L4", "OS", "GOPARSER", "internal/goroot", "internal/goversion"},
"html": {"L4"},
"image/draw": {"L4", "image/internal/imageutil"},
"image/gif": {"L4", "compress/lzw", "image/color/palette", "image/draw"},
"image/internal/imageutil": {"L4"},
"image/jpeg": {"L4", "image/internal/imageutil"},
"image/png": {"L4", "compress/zlib"},
"index/suffixarray": {"L4", "regexp"},
"internal/goroot": {"L4", "OS"},
"internal/singleflight": {"sync"},
"internal/trace": {"L4", "OS", "container/heap"},
"internal/xcoff": {"L4", "OS", "debug/dwarf"},
"math/big": {"L4"},
"mime": {"L4", "OS", "syscall", "internal/syscall/windows/registry"},
"mime/quotedprintable": {"L4"},
"net/internal/socktest": {"L4", "OS", "syscall", "internal/syscall/windows"},
"net/url": {"L4"},
"plugin": {"L0", "OS", "CGO"},
"archive/tar": {"L4", "OS", "syscall", "os/user"},
"archive/zip": {"L4", "OS", "compress/flate"},
"container/heap": {"sort"},
"compress/bzip2": {"L4"},
"compress/flate": {"L4"},
"compress/gzip": {"L4", "compress/flate"},
"compress/lzw": {"L4"},
"compress/zlib": {"L4", "compress/flate"},
"context": {"errors", "internal/reflectlite", "sync", "time"},
"database/sql": {"L4", "container/list", "context", "database/sql/driver", "database/sql/internal"},
"database/sql/driver": {"L4", "context", "time", "database/sql/internal"},
"debug/dwarf": {"L4"},
"debug/elf": {"L4", "OS", "debug/dwarf", "compress/zlib"},
"debug/gosym": {"L4"},
"debug/macho": {"L4", "OS", "debug/dwarf", "compress/zlib"},
"debug/pe": {"L4", "OS", "debug/dwarf", "compress/zlib"},
"debug/plan9obj": {"L4", "OS"},
"encoding": {"L4"},
"encoding/ascii85": {"L4"},
"encoding/asn1": {"L4", "math/big"},
"encoding/csv": {"L4"},
"encoding/gob": {"L4", "OS", "encoding"},
"encoding/hex": {"L4"},
"encoding/json": {"L4", "encoding"},
"encoding/pem": {"L4"},
"encoding/xml": {"L4", "encoding"},
"flag": {"L4", "OS"},
"go/build": {"L4", "OS", "GOPARSER", "internal/goroot", "internal/goversion"},
"html": {"L4"},
"image/draw": {"L4", "image/internal/imageutil"},
"image/gif": {"L4", "compress/lzw", "image/color/palette", "image/draw"},
"image/internal/imageutil": {"L4"},
"image/jpeg": {"L4", "image/internal/imageutil"},
"image/png": {"L4", "compress/zlib"},
"index/suffixarray": {"L4", "regexp"},
"internal/goroot": {"L4", "OS"},
"internal/singleflight": {"sync"},
"internal/trace": {"L4", "OS", "container/heap"},
"internal/xcoff": {"L4", "OS", "debug/dwarf"},
"math/big": {"L4"},
"mime": {"L4", "OS", "syscall", "internal/syscall/windows/registry"},
"mime/quotedprintable": {"L4"},
"net/internal/socktest": {"L4", "OS", "syscall", "internal/syscall/windows"},
"net/url": {"L4"},
"plugin": {"L0", "OS", "CGO"},
"runtime/pprof/internal/profile": {"L4", "OS", "compress/gzip", "regexp"},
"testing/internal/testdeps": {"L4", "internal/testlog", "runtime/pprof", "regexp"},
"text/scanner": {"L4", "OS"},
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment