runtime: map hasher

This commit is contained in:
visualfc
2024-06-30 22:16:46 +08:00
parent 439e377111
commit 8d193ab39f
7 changed files with 412 additions and 1 deletions

View File

@@ -2,8 +2,199 @@ package runtime
import (
"unsafe"
"github.com/goplus/llgo/internal/abi"
"github.com/goplus/llgo/internal/runtime/goarch"
)
const (
c0 = uintptr((8-goarch.PtrSize)/4*2860486313 + (goarch.PtrSize-4)/4*33054211828000289)
c1 = uintptr((8-goarch.PtrSize)/4*3267000013 + (goarch.PtrSize-4)/4*23344194077549503)
)
func memhash0(p unsafe.Pointer, h uintptr) uintptr {
return h
}
func memhash8(p unsafe.Pointer, h uintptr) uintptr {
return memhash(p, h, 1)
}
func memhash16(p unsafe.Pointer, h uintptr) uintptr {
return memhash(p, h, 2)
}
func memhash128(p unsafe.Pointer, h uintptr) uintptr {
return memhash(p, h, 16)
}
//go:nosplit
// func memhash_varlen(p unsafe.Pointer, h uintptr) uintptr {
// ptr := getclosureptr()
// size := *(*uintptr)(unsafe.Pointer(ptr + unsafe.Sizeof(h)))
// return memhash(p, h, size)
// }
// in asm_*.s
// func memhash(p unsafe.Pointer, h, s uintptr) uintptr
// func memhash32(p unsafe.Pointer, h uintptr) uintptr
// func memhash64(p unsafe.Pointer, h uintptr) uintptr
// func strhash(p unsafe.Pointer, h uintptr) uintptr
func strhash(a unsafe.Pointer, h uintptr) uintptr {
x := (*String)(a)
return memhash(x.data, h, uintptr(x.len))
}
// NOTE: Because NaN != NaN, a map can contain any
// number of (mostly useless) entries keyed with NaNs.
// To avoid long hash chains, we assign a random number
// as the hash value for a NaN.
func f32hash(p unsafe.Pointer, h uintptr) uintptr {
f := *(*float32)(p)
switch {
case f == 0:
return c1 * (c0 ^ h) // +0, -0
case f != f:
return c1 * (c0 ^ h ^ uintptr(fastrand())) // any kind of NaN
default:
return memhash(p, h, 4)
}
}
func f64hash(p unsafe.Pointer, h uintptr) uintptr {
f := *(*float64)(p)
switch {
case f == 0:
return c1 * (c0 ^ h) // +0, -0
case f != f:
return c1 * (c0 ^ h ^ uintptr(fastrand())) // any kind of NaN
default:
return memhash(p, h, 8)
}
}
func c64hash(p unsafe.Pointer, h uintptr) uintptr {
x := (*[2]float32)(p)
return f32hash(unsafe.Pointer(&x[1]), f32hash(unsafe.Pointer(&x[0]), h))
}
func c128hash(p unsafe.Pointer, h uintptr) uintptr {
x := (*[2]float64)(p)
return f64hash(unsafe.Pointer(&x[1]), f64hash(unsafe.Pointer(&x[0]), h))
}
func interhash(p unsafe.Pointer, h uintptr) uintptr {
a := (*iface)(p)
tab := a.tab
if tab == nil {
return h
}
t := tab._type
if t.Equal == nil {
// Check hashability here. We could do this check inside
// typehash, but we want to report the topmost type in
// the error text (e.g. in a struct with a field of slice type
// we want to report the struct, not the slice).
panic(errorString("hash of unhashable type " + t.String()))
}
if isDirectIface(t) {
return c1 * typehash(t, unsafe.Pointer(&a.data), h^c0)
} else {
return c1 * typehash(t, a.data, h^c0)
}
}
func nilinterhash(p unsafe.Pointer, h uintptr) uintptr {
a := (*eface)(p)
t := a._type
if t == nil {
return h
}
if t.Equal == nil {
// See comment in interhash above.
panic(errorString("hash of unhashable type " + t.String()))
}
if isDirectIface(t) {
return c1 * typehash(t, unsafe.Pointer(&a.data), h^c0)
} else {
return c1 * typehash(t, a.data, h^c0)
}
}
// typehash computes the hash of the object of type t at address p.
// h is the seed.
// This function is seldom used. Most maps use for hashing either
// fixed functions (e.g. f32hash) or compiler-generated functions
// (e.g. for a type like struct { x, y string }). This implementation
// is slower but more general and is used for hashing interface types
// (called from interhash or nilinterhash, above) or for hashing in
// maps generated by reflect.MapOf (reflect_typehash, below).
// Note: this function must match the compiler generated
// functions exactly. See issue 37716.
func typehash(t *_type, p unsafe.Pointer, h uintptr) uintptr {
// if t.TFlag&abi.TFlagRegularMemory != 0 {
// // Handle ptr sizes specially, see issue 37086.
// switch t.Size_ {
// case 4:
// return memhash32(p, h)
// case 8:
// return memhash64(p, h)
// default:
// return memhash(p, h, t.Size_)
// }
// }
switch t.Kind() {
case abi.Bool, abi.Int, abi.Int8, abi.Int16, abi.Int32, abi.Int64,
abi.Uint, abi.Uint8, abi.Uint16, abi.Uint32, abi.Uint64,
abi.Uintptr, abi.UnsafePointer:
switch t.Size_ {
case 4:
return memhash32(p, h)
case 8:
return memhash64(p, h)
default:
return memhash(p, h, t.Size_)
}
case abi.Float32:
return f32hash(p, h)
case abi.Float64:
return f64hash(p, h)
case abi.Complex64:
return c64hash(p, h)
case abi.Complex128:
return c128hash(p, h)
case abi.String:
return strhash(p, h)
case abi.Interface:
i := (*interfacetype)(unsafe.Pointer(t))
if len(i.Methods) == 0 {
return nilinterhash(p, h)
}
return interhash(p, h)
case abi.Array:
a := (*arraytype)(unsafe.Pointer(t))
for i := uintptr(0); i < a.Len; i++ {
h = typehash(a.Elem, add(p, i*a.Elem.Size_), h)
}
return h
case abi.Struct:
s := (*structtype)(unsafe.Pointer(t))
for _, f := range s.Fields {
if f.Name_ == "_" {
continue
}
h = typehash(f.Typ, add(p, f.Offset), h)
}
return h
default:
// Should never happen, as typehash should only be called
// with comparable types.
panic(errorString("hash of unhashable type " + t.String()))
}
}
func ptrequal(p, q unsafe.Pointer) bool {
return p == q
}
@@ -81,3 +272,51 @@ func ifaceeq(tab *itab, x, y unsafe.Pointer) bool {
}
return eq(x, y)
}
// Testing adapters for hash quality tests (see hash_test.go)
func stringHash(s string, seed uintptr) uintptr {
return strhash(noescape(unsafe.Pointer(&s)), seed)
}
func bytesHash(b []byte, seed uintptr) uintptr {
s := (*slice)(unsafe.Pointer(&b))
return memhash(s.array, seed, uintptr(s.len))
}
func int32Hash(i uint32, seed uintptr) uintptr {
return memhash32(noescape(unsafe.Pointer(&i)), seed)
}
func int64Hash(i uint64, seed uintptr) uintptr {
return memhash64(noescape(unsafe.Pointer(&i)), seed)
}
func efaceHash(i any, seed uintptr) uintptr {
return nilinterhash(noescape(unsafe.Pointer(&i)), seed)
}
func ifaceHash(i interface {
F()
}, seed uintptr) uintptr {
return interhash(noescape(unsafe.Pointer(&i)), seed)
}
var hashkey [4]uintptr
// Note: These routines perform the read with a native endianness.
func readUnaligned32(p unsafe.Pointer) uint32 {
q := (*[4]byte)(p)
if goarch.BigEndian {
return uint32(q[3]) | uint32(q[2])<<8 | uint32(q[1])<<16 | uint32(q[0])<<24
}
return uint32(q[0]) | uint32(q[1])<<8 | uint32(q[2])<<16 | uint32(q[3])<<24
}
func readUnaligned64(p unsafe.Pointer) uint64 {
q := (*[8]byte)(p)
if goarch.BigEndian {
return uint64(q[7]) | uint64(q[6])<<8 | uint64(q[5])<<16 | uint64(q[4])<<24 |
uint64(q[3])<<32 | uint64(q[2])<<40 | uint64(q[1])<<48 | uint64(q[0])<<56
}
return uint64(q[0]) | uint64(q[1])<<8 | uint64(q[2])<<16 | uint64(q[3])<<24 | uint64(q[4])<<32 | uint64(q[5])<<40 | uint64(q[6])<<48 | uint64(q[7])<<56
}

View File

@@ -0,0 +1,7 @@
//go:build 386 || amd64 || arm || arm64 || ppc64le || mips64le || mipsle || riscv64 || wasm
// +build 386 amd64 arm arm64 ppc64le mips64le mipsle riscv64 wasm
package goarch
const BigEndian = true
const LittleEndian = false

View File

@@ -0,0 +1,9 @@
//go:build ppc64 || s390x || mips || mips64
// +build ppc64 s390x mips mips64
package goarch
const (
BigEndian = false
LittleEndian = true
)

View File

@@ -0,0 +1,61 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Hashing algorithm inspired by
// wyhash: https://github.com/wangyi-fudan/wyhash/blob/ceb019b530e2c1c14d70b79bfa2bc49de7d95bc1/Modern%20Non-Cryptographic%20Hash%20Function%20and%20Pseudorandom%20Number%20Generator.pdf
//go:build 386 || arm || mips || mipsle
package runtime
import "unsafe"
func memhash32(p unsafe.Pointer, seed uintptr) uintptr {
a, b := mix32(uint32(seed), uint32(4^hashkey[0]))
t := readUnaligned32(p)
a ^= t
b ^= t
a, b = mix32(a, b)
a, b = mix32(a, b)
return uintptr(a ^ b)
}
func memhash64(p unsafe.Pointer, seed uintptr) uintptr {
a, b := mix32(uint32(seed), uint32(8^hashkey[0]))
a ^= readUnaligned32(p)
b ^= readUnaligned32(add(p, 4))
a, b = mix32(a, b)
a, b = mix32(a, b)
return uintptr(a ^ b)
}
func memhash(p unsafe.Pointer, seed, s uintptr) uintptr {
a, b := mix32(uint32(seed), uint32(s^hashkey[0]))
if s == 0 {
return uintptr(a ^ b)
}
for ; s > 8; s -= 8 {
a ^= readUnaligned32(p)
b ^= readUnaligned32(add(p, 4))
a, b = mix32(a, b)
p = add(p, 8)
}
if s >= 4 {
a ^= readUnaligned32(p)
b ^= readUnaligned32(add(p, s-4))
} else {
t := uint32(*(*byte)(p))
t |= uint32(*(*byte)(add(p, s>>1))) << 8
t |= uint32(*(*byte)(add(p, s-1))) << 16
b ^= t
}
a, b = mix32(a, b)
a, b = mix32(a, b)
return uintptr(a ^ b)
}
func mix32(a, b uint32) (uint32, uint32) {
c := uint64(a^uint32(hashkey[1])) * uint64(b^uint32(hashkey[2]))
return uint32(c), uint32(c >> 32)
}

View File

@@ -0,0 +1,93 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Hashing algorithm inspired by
// wyhash: https://github.com/wangyi-fudan/wyhash
//go:build amd64 || arm64 || loong64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || wasm
package runtime
import (
"unsafe"
"github.com/goplus/llgo/internal/runtime/math"
)
const (
m1 = 0xa0761d6478bd642f
m2 = 0xe7037ed1a0b428db
m3 = 0x8ebc6af09c88c6e3
m4 = 0x589965cc75374cc3
m5 = 0x1d8e4e27c47d124f
)
func memhash(p unsafe.Pointer, seed, s uintptr) uintptr {
var a, b uintptr
seed ^= hashkey[0] ^ m1
switch {
case s == 0:
return seed
case s < 4:
a = uintptr(*(*byte)(p))
a |= uintptr(*(*byte)(add(p, s>>1))) << 8
a |= uintptr(*(*byte)(add(p, s-1))) << 16
case s == 4:
a = r4(p)
b = a
case s < 8:
a = r4(p)
b = r4(add(p, s-4))
case s == 8:
a = r8(p)
b = a
case s <= 16:
a = r8(p)
b = r8(add(p, s-8))
default:
l := s
if l > 48 {
seed1 := seed
seed2 := seed
for ; l > 48; l -= 48 {
seed = mix(r8(p)^m2, r8(add(p, 8))^seed)
seed1 = mix(r8(add(p, 16))^m3, r8(add(p, 24))^seed1)
seed2 = mix(r8(add(p, 32))^m4, r8(add(p, 40))^seed2)
p = add(p, 48)
}
seed ^= seed1 ^ seed2
}
for ; l > 16; l -= 16 {
seed = mix(r8(p)^m2, r8(add(p, 8))^seed)
p = add(p, 16)
}
a = r8(add(p, l-16))
b = r8(add(p, l-8))
}
return mix(m5^s, mix(a^m2, b^seed))
}
func memhash32(p unsafe.Pointer, seed uintptr) uintptr {
a := r4(p)
return mix(m5^4, mix(a^m2, a^seed^hashkey[0]^m1))
}
func memhash64(p unsafe.Pointer, seed uintptr) uintptr {
a := r8(p)
return mix(m5^8, mix(a^m2, a^seed^hashkey[0]^m1))
}
func mix(a, b uintptr) uintptr {
hi, lo := math.Mul64(uint64(a), uint64(b))
return uintptr(hi ^ lo)
}
func r4(p unsafe.Pointer) uintptr {
return uintptr(readUnaligned32(p))
}
func r8(p unsafe.Pointer) uintptr {
return uintptr(readUnaligned64(p))
}

View File

@@ -25,6 +25,8 @@ import (
// Map represents a Go map.
type Map = hmap
type maptype = abi.MapType
type arraytype = abi.ArrayType
type structtype = abi.StructType
type slice struct {
array unsafe.Pointer

View File

@@ -230,7 +230,7 @@ func MapOf(key, elem *Type, bucket *Type, flags int) *Type {
Flags: uint32(flags),
}
ret.Hasher = func(p unsafe.Pointer, seed uintptr) uintptr {
return uintptr(p)
return typehash(key, p, seed)
}
return &ret.Type
}