runtime: map hasher

2024-06-30 22:16:46 +08:00
parent 439e377111
commit 8d193ab39f
7 changed files with 412 additions and 1 deletions
--- a/internal/runtime/alg.go
+++ b/internal/runtime/alg.go
@@ -2,8 +2,199 @@ package runtime

 import (
 	"unsafe"
+
+	"github.com/goplus/llgo/internal/abi"
+	"github.com/goplus/llgo/internal/runtime/goarch"
 )

+const (
+	c0 = uintptr((8-goarch.PtrSize)/4*2860486313 + (goarch.PtrSize-4)/4*33054211828000289)
+	c1 = uintptr((8-goarch.PtrSize)/4*3267000013 + (goarch.PtrSize-4)/4*23344194077549503)
+)
+
+func memhash0(p unsafe.Pointer, h uintptr) uintptr {
+	return h
+}
+
+func memhash8(p unsafe.Pointer, h uintptr) uintptr {
+	return memhash(p, h, 1)
+}
+
+func memhash16(p unsafe.Pointer, h uintptr) uintptr {
+	return memhash(p, h, 2)
+}
+
+func memhash128(p unsafe.Pointer, h uintptr) uintptr {
+	return memhash(p, h, 16)
+}
+
+//go:nosplit
+// func memhash_varlen(p unsafe.Pointer, h uintptr) uintptr {
+// 	ptr := getclosureptr()
+// 	size := *(*uintptr)(unsafe.Pointer(ptr + unsafe.Sizeof(h)))
+// 	return memhash(p, h, size)
+// }
+
+// in asm_*.s
+// func memhash(p unsafe.Pointer, h, s uintptr) uintptr
+// func memhash32(p unsafe.Pointer, h uintptr) uintptr
+// func memhash64(p unsafe.Pointer, h uintptr) uintptr
+// func strhash(p unsafe.Pointer, h uintptr) uintptr
+
+func strhash(a unsafe.Pointer, h uintptr) uintptr {
+	x := (*String)(a)
+	return memhash(x.data, h, uintptr(x.len))
+}
+
+// NOTE: Because NaN != NaN, a map can contain any
+// number of (mostly useless) entries keyed with NaNs.
+// To avoid long hash chains, we assign a random number
+// as the hash value for a NaN.
+
+func f32hash(p unsafe.Pointer, h uintptr) uintptr {
+	f := *(*float32)(p)
+	switch {
+	case f == 0:
+		return c1 * (c0 ^ h) // +0, -0
+	case f != f:
+		return c1 * (c0 ^ h ^ uintptr(fastrand())) // any kind of NaN
+	default:
+		return memhash(p, h, 4)
+	}
+}
+
+func f64hash(p unsafe.Pointer, h uintptr) uintptr {
+	f := *(*float64)(p)
+	switch {
+	case f == 0:
+		return c1 * (c0 ^ h) // +0, -0
+	case f != f:
+		return c1 * (c0 ^ h ^ uintptr(fastrand())) // any kind of NaN
+	default:
+		return memhash(p, h, 8)
+	}
+}
+
+func c64hash(p unsafe.Pointer, h uintptr) uintptr {
+	x := (*[2]float32)(p)
+	return f32hash(unsafe.Pointer(&x[1]), f32hash(unsafe.Pointer(&x[0]), h))
+}
+
+func c128hash(p unsafe.Pointer, h uintptr) uintptr {
+	x := (*[2]float64)(p)
+	return f64hash(unsafe.Pointer(&x[1]), f64hash(unsafe.Pointer(&x[0]), h))
+}
+
+func interhash(p unsafe.Pointer, h uintptr) uintptr {
+	a := (*iface)(p)
+	tab := a.tab
+	if tab == nil {
+		return h
+	}
+	t := tab._type
+	if t.Equal == nil {
+		// Check hashability here. We could do this check inside
+		// typehash, but we want to report the topmost type in
+		// the error text (e.g. in a struct with a field of slice type
+		// we want to report the struct, not the slice).
+		panic(errorString("hash of unhashable type " + t.String()))
+	}
+	if isDirectIface(t) {
+		return c1 * typehash(t, unsafe.Pointer(&a.data), h^c0)
+	} else {
+		return c1 * typehash(t, a.data, h^c0)
+	}
+}
+
+func nilinterhash(p unsafe.Pointer, h uintptr) uintptr {
+	a := (*eface)(p)
+	t := a._type
+	if t == nil {
+		return h
+	}
+	if t.Equal == nil {
+		// See comment in interhash above.
+		panic(errorString("hash of unhashable type " + t.String()))
+	}
+	if isDirectIface(t) {
+		return c1 * typehash(t, unsafe.Pointer(&a.data), h^c0)
+	} else {
+		return c1 * typehash(t, a.data, h^c0)
+	}
+}
+
+// typehash computes the hash of the object of type t at address p.
+// h is the seed.
+// This function is seldom used. Most maps use for hashing either
+// fixed functions (e.g. f32hash) or compiler-generated functions
+// (e.g. for a type like struct { x, y string }). This implementation
+// is slower but more general and is used for hashing interface types
+// (called from interhash or nilinterhash, above) or for hashing in
+// maps generated by reflect.MapOf (reflect_typehash, below).
+// Note: this function must match the compiler generated
+// functions exactly. See issue 37716.
+func typehash(t *_type, p unsafe.Pointer, h uintptr) uintptr {
+	// if t.TFlag&abi.TFlagRegularMemory != 0 {
+	// 	// Handle ptr sizes specially, see issue 37086.
+	// 	switch t.Size_ {
+	// 	case 4:
+	// 		return memhash32(p, h)
+	// 	case 8:
+	// 		return memhash64(p, h)
+	// 	default:
+	// 		return memhash(p, h, t.Size_)
+	// 	}
+	// }
+	switch t.Kind() {
+	case abi.Bool, abi.Int, abi.Int8, abi.Int16, abi.Int32, abi.Int64,
+		abi.Uint, abi.Uint8, abi.Uint16, abi.Uint32, abi.Uint64,
+		abi.Uintptr, abi.UnsafePointer:
+		switch t.Size_ {
+		case 4:
+			return memhash32(p, h)
+		case 8:
+			return memhash64(p, h)
+		default:
+			return memhash(p, h, t.Size_)
+		}
+	case abi.Float32:
+		return f32hash(p, h)
+	case abi.Float64:
+		return f64hash(p, h)
+	case abi.Complex64:
+		return c64hash(p, h)
+	case abi.Complex128:
+		return c128hash(p, h)
+	case abi.String:
+		return strhash(p, h)
+	case abi.Interface:
+		i := (*interfacetype)(unsafe.Pointer(t))
+		if len(i.Methods) == 0 {
+			return nilinterhash(p, h)
+		}
+		return interhash(p, h)
+	case abi.Array:
+		a := (*arraytype)(unsafe.Pointer(t))
+		for i := uintptr(0); i < a.Len; i++ {
+			h = typehash(a.Elem, add(p, i*a.Elem.Size_), h)
+		}
+		return h
+	case abi.Struct:
+		s := (*structtype)(unsafe.Pointer(t))
+		for _, f := range s.Fields {
+			if f.Name_ == "_" {
+				continue
+			}
+			h = typehash(f.Typ, add(p, f.Offset), h)
+		}
+		return h
+	default:
+		// Should never happen, as typehash should only be called
+		// with comparable types.
+		panic(errorString("hash of unhashable type " + t.String()))
+	}
+}
+
 func ptrequal(p, q unsafe.Pointer) bool {
 	return p == q
 }
@@ -81,3 +272,51 @@ func ifaceeq(tab *itab, x, y unsafe.Pointer) bool {
 	}
 	return eq(x, y)
 }
+
+// Testing adapters for hash quality tests (see hash_test.go)
+func stringHash(s string, seed uintptr) uintptr {
+	return strhash(noescape(unsafe.Pointer(&s)), seed)
+}
+
+func bytesHash(b []byte, seed uintptr) uintptr {
+	s := (*slice)(unsafe.Pointer(&b))
+	return memhash(s.array, seed, uintptr(s.len))
+}
+
+func int32Hash(i uint32, seed uintptr) uintptr {
+	return memhash32(noescape(unsafe.Pointer(&i)), seed)
+}
+
+func int64Hash(i uint64, seed uintptr) uintptr {
+	return memhash64(noescape(unsafe.Pointer(&i)), seed)
+}
+
+func efaceHash(i any, seed uintptr) uintptr {
+	return nilinterhash(noescape(unsafe.Pointer(&i)), seed)
+}
+
+func ifaceHash(i interface {
+	F()
+}, seed uintptr) uintptr {
+	return interhash(noescape(unsafe.Pointer(&i)), seed)
+}
+
+var hashkey [4]uintptr
+
+// Note: These routines perform the read with a native endianness.
+func readUnaligned32(p unsafe.Pointer) uint32 {
+	q := (*[4]byte)(p)
+	if goarch.BigEndian {
+		return uint32(q[3]) | uint32(q[2])<<8 | uint32(q[1])<<16 | uint32(q[0])<<24
+	}
+	return uint32(q[0]) | uint32(q[1])<<8 | uint32(q[2])<<16 | uint32(q[3])<<24
+}
+
+func readUnaligned64(p unsafe.Pointer) uint64 {
+	q := (*[8]byte)(p)
+	if goarch.BigEndian {
+		return uint64(q[7]) | uint64(q[6])<<8 | uint64(q[5])<<16 | uint64(q[4])<<24 |
+			uint64(q[3])<<32 | uint64(q[2])<<40 | uint64(q[1])<<48 | uint64(q[0])<<56
+	}
+	return uint64(q[0]) | uint64(q[1])<<8 | uint64(q[2])<<16 | uint64(q[3])<<24 | uint64(q[4])<<32 | uint64(q[5])<<40 | uint64(q[6])<<48 | uint64(q[7])<<56
+}
--- a/internal/runtime/goarch/endian_big.go
+++ b/internal/runtime/goarch/endian_big.go
@@ -0,0 +1,7 @@
+//go:build 386 || amd64 || arm || arm64 || ppc64le || mips64le || mipsle || riscv64 || wasm
+// +build 386 amd64 arm arm64 ppc64le mips64le mipsle riscv64 wasm
+
+package goarch
+
+const BigEndian = true
+const LittleEndian = false
--- a/internal/runtime/goarch/endian_little.go
+++ b/internal/runtime/goarch/endian_little.go
@@ -0,0 +1,9 @@
+//go:build ppc64 || s390x || mips || mips64
+// +build ppc64 s390x mips mips64
+
+package goarch
+
+const (
+	BigEndian    = false
+	LittleEndian = true
+)
--- a/internal/runtime/hash32.go
+++ b/internal/runtime/hash32.go
@@ -0,0 +1,61 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Hashing algorithm inspired by
+// wyhash: https://github.com/wangyi-fudan/wyhash/blob/ceb019b530e2c1c14d70b79bfa2bc49de7d95bc1/Modern%20Non-Cryptographic%20Hash%20Function%20and%20Pseudorandom%20Number%20Generator.pdf
+
+//go:build 386 || arm || mips || mipsle
+
+package runtime
+
+import "unsafe"
+
+func memhash32(p unsafe.Pointer, seed uintptr) uintptr {
+	a, b := mix32(uint32(seed), uint32(4^hashkey[0]))
+	t := readUnaligned32(p)
+	a ^= t
+	b ^= t
+	a, b = mix32(a, b)
+	a, b = mix32(a, b)
+	return uintptr(a ^ b)
+}
+
+func memhash64(p unsafe.Pointer, seed uintptr) uintptr {
+	a, b := mix32(uint32(seed), uint32(8^hashkey[0]))
+	a ^= readUnaligned32(p)
+	b ^= readUnaligned32(add(p, 4))
+	a, b = mix32(a, b)
+	a, b = mix32(a, b)
+	return uintptr(a ^ b)
+}
+
+func memhash(p unsafe.Pointer, seed, s uintptr) uintptr {
+	a, b := mix32(uint32(seed), uint32(s^hashkey[0]))
+	if s == 0 {
+		return uintptr(a ^ b)
+	}
+	for ; s > 8; s -= 8 {
+		a ^= readUnaligned32(p)
+		b ^= readUnaligned32(add(p, 4))
+		a, b = mix32(a, b)
+		p = add(p, 8)
+	}
+	if s >= 4 {
+		a ^= readUnaligned32(p)
+		b ^= readUnaligned32(add(p, s-4))
+	} else {
+		t := uint32(*(*byte)(p))
+		t |= uint32(*(*byte)(add(p, s>>1))) << 8
+		t |= uint32(*(*byte)(add(p, s-1))) << 16
+		b ^= t
+	}
+	a, b = mix32(a, b)
+	a, b = mix32(a, b)
+	return uintptr(a ^ b)
+}
+
+func mix32(a, b uint32) (uint32, uint32) {
+	c := uint64(a^uint32(hashkey[1])) * uint64(b^uint32(hashkey[2]))
+	return uint32(c), uint32(c >> 32)
+}
--- a/internal/runtime/hash64.go
+++ b/internal/runtime/hash64.go
@@ -0,0 +1,93 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Hashing algorithm inspired by
+// wyhash: https://github.com/wangyi-fudan/wyhash
+
+//go:build amd64 || arm64 || loong64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || wasm
+
+package runtime
+
+import (
+	"unsafe"
+
+	"github.com/goplus/llgo/internal/runtime/math"
+)
+
+const (
+	m1 = 0xa0761d6478bd642f
+	m2 = 0xe7037ed1a0b428db
+	m3 = 0x8ebc6af09c88c6e3
+	m4 = 0x589965cc75374cc3
+	m5 = 0x1d8e4e27c47d124f
+)
+
+func memhash(p unsafe.Pointer, seed, s uintptr) uintptr {
+	var a, b uintptr
+	seed ^= hashkey[0] ^ m1
+	switch {
+	case s == 0:
+		return seed
+	case s < 4:
+		a = uintptr(*(*byte)(p))
+		a |= uintptr(*(*byte)(add(p, s>>1))) << 8
+		a |= uintptr(*(*byte)(add(p, s-1))) << 16
+	case s == 4:
+		a = r4(p)
+		b = a
+	case s < 8:
+		a = r4(p)
+		b = r4(add(p, s-4))
+	case s == 8:
+		a = r8(p)
+		b = a
+	case s <= 16:
+		a = r8(p)
+		b = r8(add(p, s-8))
+	default:
+		l := s
+		if l > 48 {
+			seed1 := seed
+			seed2 := seed
+			for ; l > 48; l -= 48 {
+				seed = mix(r8(p)^m2, r8(add(p, 8))^seed)
+				seed1 = mix(r8(add(p, 16))^m3, r8(add(p, 24))^seed1)
+				seed2 = mix(r8(add(p, 32))^m4, r8(add(p, 40))^seed2)
+				p = add(p, 48)
+			}
+			seed ^= seed1 ^ seed2
+		}
+		for ; l > 16; l -= 16 {
+			seed = mix(r8(p)^m2, r8(add(p, 8))^seed)
+			p = add(p, 16)
+		}
+		a = r8(add(p, l-16))
+		b = r8(add(p, l-8))
+	}
+
+	return mix(m5^s, mix(a^m2, b^seed))
+}
+
+func memhash32(p unsafe.Pointer, seed uintptr) uintptr {
+	a := r4(p)
+	return mix(m5^4, mix(a^m2, a^seed^hashkey[0]^m1))
+}
+
+func memhash64(p unsafe.Pointer, seed uintptr) uintptr {
+	a := r8(p)
+	return mix(m5^8, mix(a^m2, a^seed^hashkey[0]^m1))
+}
+
+func mix(a, b uintptr) uintptr {
+	hi, lo := math.Mul64(uint64(a), uint64(b))
+	return uintptr(hi ^ lo)
+}
+
+func r4(p unsafe.Pointer) uintptr {
+	return uintptr(readUnaligned32(p))
+}
+
+func r8(p unsafe.Pointer) uintptr {
+	return uintptr(readUnaligned64(p))
+}
--- a/internal/runtime/z_map.go
+++ b/internal/runtime/z_map.go
@@ -25,6 +25,8 @@ import (
 // Map represents a Go map.
 type Map = hmap
 type maptype = abi.MapType
+type arraytype = abi.ArrayType
+type structtype = abi.StructType

 type slice struct {
 	array unsafe.Pointer
--- a/internal/runtime/z_type.go
+++ b/internal/runtime/z_type.go
@@ -230,7 +230,7 @@ func MapOf(key, elem *Type, bucket *Type, flags int) *Type {
 		Flags:      uint32(flags),
 	}
 	ret.Hasher = func(p unsafe.Pointer, seed uintptr) uintptr {
-		return uintptr(p)
+		return typehash(key, p, seed)
 	}
 	return &ret.Type
 }