Initial commit: Go 1.23 release state

This commit is contained in:
Vorapol Rinsatitnon
2024-09-21 23:49:08 +10:00
commit 17cd57a668
13231 changed files with 3114330 additions and 0 deletions

110
src/strings/builder.go Normal file
View File

@@ -0,0 +1,110 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strings
import (
"internal/abi"
"internal/bytealg"
"unicode/utf8"
"unsafe"
)
// A Builder is used to efficiently build a string using [Builder.Write] methods.
// It minimizes memory copying. The zero value is ready to use.
// Do not copy a non-zero Builder.
type Builder struct {
addr *Builder // of receiver, to detect copies by value
// External users should never get direct access to this buffer, since
// the slice at some point will be converted to a string using unsafe, also
// data between len(buf) and cap(buf) might be uninitialized.
buf []byte
}
func (b *Builder) copyCheck() {
if b.addr == nil {
// This hack works around a failing of Go's escape analysis
// that was causing b to escape and be heap allocated.
// See issue 23382.
// TODO: once issue 7921 is fixed, this should be reverted to
// just "b.addr = b".
b.addr = (*Builder)(abi.NoEscape(unsafe.Pointer(b)))
} else if b.addr != b {
panic("strings: illegal use of non-zero Builder copied by value")
}
}
// String returns the accumulated string.
func (b *Builder) String() string {
return unsafe.String(unsafe.SliceData(b.buf), len(b.buf))
}
// Len returns the number of accumulated bytes; b.Len() == len(b.String()).
func (b *Builder) Len() int { return len(b.buf) }
// Cap returns the capacity of the builder's underlying byte slice. It is the
// total space allocated for the string being built and includes any bytes
// already written.
func (b *Builder) Cap() int { return cap(b.buf) }
// Reset resets the [Builder] to be empty.
func (b *Builder) Reset() {
b.addr = nil
b.buf = nil
}
// grow copies the buffer to a new, larger buffer so that there are at least n
// bytes of capacity beyond len(b.buf).
func (b *Builder) grow(n int) {
buf := bytealg.MakeNoZero(2*cap(b.buf) + n)[:len(b.buf)]
copy(buf, b.buf)
b.buf = buf
}
// Grow grows b's capacity, if necessary, to guarantee space for
// another n bytes. After Grow(n), at least n bytes can be written to b
// without another allocation. If n is negative, Grow panics.
func (b *Builder) Grow(n int) {
b.copyCheck()
if n < 0 {
panic("strings.Builder.Grow: negative count")
}
if cap(b.buf)-len(b.buf) < n {
b.grow(n)
}
}
// Write appends the contents of p to b's buffer.
// Write always returns len(p), nil.
func (b *Builder) Write(p []byte) (int, error) {
b.copyCheck()
b.buf = append(b.buf, p...)
return len(p), nil
}
// WriteByte appends the byte c to b's buffer.
// The returned error is always nil.
func (b *Builder) WriteByte(c byte) error {
b.copyCheck()
b.buf = append(b.buf, c)
return nil
}
// WriteRune appends the UTF-8 encoding of Unicode code point r to b's buffer.
// It returns the length of r and a nil error.
func (b *Builder) WriteRune(r rune) (int, error) {
b.copyCheck()
n := len(b.buf)
b.buf = utf8.AppendRune(b.buf, r)
return len(b.buf) - n, nil
}
// WriteString appends the contents of s to b's buffer.
// It returns the length of s and a nil error.
func (b *Builder) WriteString(s string) (int, error) {
b.copyCheck()
b.buf = append(b.buf, s...)
return len(s), nil
}

400
src/strings/builder_test.go Normal file
View File

@@ -0,0 +1,400 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strings_test
import (
"bytes"
. "strings"
"testing"
"unicode/utf8"
)
func check(t *testing.T, b *Builder, want string) {
t.Helper()
got := b.String()
if got != want {
t.Errorf("String: got %#q; want %#q", got, want)
return
}
if n := b.Len(); n != len(got) {
t.Errorf("Len: got %d; but len(String()) is %d", n, len(got))
}
if n := b.Cap(); n < len(got) {
t.Errorf("Cap: got %d; but len(String()) is %d", n, len(got))
}
}
func TestBuilder(t *testing.T) {
var b Builder
check(t, &b, "")
n, err := b.WriteString("hello")
if err != nil || n != 5 {
t.Errorf("WriteString: got %d,%s; want 5,nil", n, err)
}
check(t, &b, "hello")
if err = b.WriteByte(' '); err != nil {
t.Errorf("WriteByte: %s", err)
}
check(t, &b, "hello ")
n, err = b.WriteString("world")
if err != nil || n != 5 {
t.Errorf("WriteString: got %d,%s; want 5,nil", n, err)
}
check(t, &b, "hello world")
}
func TestBuilderString(t *testing.T) {
var b Builder
b.WriteString("alpha")
check(t, &b, "alpha")
s1 := b.String()
b.WriteString("beta")
check(t, &b, "alphabeta")
s2 := b.String()
b.WriteString("gamma")
check(t, &b, "alphabetagamma")
s3 := b.String()
// Check that subsequent operations didn't change the returned strings.
if want := "alpha"; s1 != want {
t.Errorf("first String result is now %q; want %q", s1, want)
}
if want := "alphabeta"; s2 != want {
t.Errorf("second String result is now %q; want %q", s2, want)
}
if want := "alphabetagamma"; s3 != want {
t.Errorf("third String result is now %q; want %q", s3, want)
}
}
func TestBuilderReset(t *testing.T) {
var b Builder
check(t, &b, "")
b.WriteString("aaa")
s := b.String()
check(t, &b, "aaa")
b.Reset()
check(t, &b, "")
// Ensure that writing after Reset doesn't alter
// previously returned strings.
b.WriteString("bbb")
check(t, &b, "bbb")
if want := "aaa"; s != want {
t.Errorf("previous String result changed after Reset: got %q; want %q", s, want)
}
}
func TestBuilderGrow(t *testing.T) {
for _, growLen := range []int{0, 100, 1000, 10000, 100000} {
p := bytes.Repeat([]byte{'a'}, growLen)
allocs := testing.AllocsPerRun(100, func() {
var b Builder
b.Grow(growLen) // should be only alloc, when growLen > 0
if b.Cap() < growLen {
t.Fatalf("growLen=%d: Cap() is lower than growLen", growLen)
}
b.Write(p)
if b.String() != string(p) {
t.Fatalf("growLen=%d: bad data written after Grow", growLen)
}
})
wantAllocs := 1
if growLen == 0 {
wantAllocs = 0
}
if g, w := int(allocs), wantAllocs; g != w {
t.Errorf("growLen=%d: got %d allocs during Write; want %v", growLen, g, w)
}
}
// when growLen < 0, should panic
var a Builder
n := -1
defer func() {
if r := recover(); r == nil {
t.Errorf("a.Grow(%d) should panic()", n)
}
}()
a.Grow(n)
}
func TestBuilderWrite2(t *testing.T) {
const s0 = "hello 世界"
for _, tt := range []struct {
name string
fn func(b *Builder) (int, error)
n int
want string
}{
{
"Write",
func(b *Builder) (int, error) { return b.Write([]byte(s0)) },
len(s0),
s0,
},
{
"WriteRune",
func(b *Builder) (int, error) { return b.WriteRune('a') },
1,
"a",
},
{
"WriteRuneWide",
func(b *Builder) (int, error) { return b.WriteRune('世') },
3,
"世",
},
{
"WriteString",
func(b *Builder) (int, error) { return b.WriteString(s0) },
len(s0),
s0,
},
} {
t.Run(tt.name, func(t *testing.T) {
var b Builder
n, err := tt.fn(&b)
if err != nil {
t.Fatalf("first call: got %s", err)
}
if n != tt.n {
t.Errorf("first call: got n=%d; want %d", n, tt.n)
}
check(t, &b, tt.want)
n, err = tt.fn(&b)
if err != nil {
t.Fatalf("second call: got %s", err)
}
if n != tt.n {
t.Errorf("second call: got n=%d; want %d", n, tt.n)
}
check(t, &b, tt.want+tt.want)
})
}
}
func TestBuilderWriteByte(t *testing.T) {
var b Builder
if err := b.WriteByte('a'); err != nil {
t.Error(err)
}
if err := b.WriteByte(0); err != nil {
t.Error(err)
}
check(t, &b, "a\x00")
}
func TestBuilderAllocs(t *testing.T) {
// Issue 23382; verify that copyCheck doesn't force the
// Builder to escape and be heap allocated.
n := testing.AllocsPerRun(10000, func() {
var b Builder
b.Grow(5)
b.WriteString("abcde")
_ = b.String()
})
if n != 1 {
t.Errorf("Builder allocs = %v; want 1", n)
}
}
func TestBuilderCopyPanic(t *testing.T) {
tests := []struct {
name string
fn func()
wantPanic bool
}{
{
name: "String",
wantPanic: false,
fn: func() {
var a Builder
a.WriteByte('x')
b := a
_ = b.String() // appease vet
},
},
{
name: "Len",
wantPanic: false,
fn: func() {
var a Builder
a.WriteByte('x')
b := a
b.Len()
},
},
{
name: "Cap",
wantPanic: false,
fn: func() {
var a Builder
a.WriteByte('x')
b := a
b.Cap()
},
},
{
name: "Reset",
wantPanic: false,
fn: func() {
var a Builder
a.WriteByte('x')
b := a
b.Reset()
b.WriteByte('y')
},
},
{
name: "Write",
wantPanic: true,
fn: func() {
var a Builder
a.Write([]byte("x"))
b := a
b.Write([]byte("y"))
},
},
{
name: "WriteByte",
wantPanic: true,
fn: func() {
var a Builder
a.WriteByte('x')
b := a
b.WriteByte('y')
},
},
{
name: "WriteString",
wantPanic: true,
fn: func() {
var a Builder
a.WriteString("x")
b := a
b.WriteString("y")
},
},
{
name: "WriteRune",
wantPanic: true,
fn: func() {
var a Builder
a.WriteRune('x')
b := a
b.WriteRune('y')
},
},
{
name: "Grow",
wantPanic: true,
fn: func() {
var a Builder
a.Grow(1)
b := a
b.Grow(2)
},
},
}
for _, tt := range tests {
didPanic := make(chan bool)
go func() {
defer func() { didPanic <- recover() != nil }()
tt.fn()
}()
if got := <-didPanic; got != tt.wantPanic {
t.Errorf("%s: panicked = %v; want %v", tt.name, got, tt.wantPanic)
}
}
}
func TestBuilderWriteInvalidRune(t *testing.T) {
// Invalid runes, including negative ones, should be written as
// utf8.RuneError.
for _, r := range []rune{-1, utf8.MaxRune + 1} {
var b Builder
b.WriteRune(r)
check(t, &b, "\uFFFD")
}
}
var someBytes = []byte("some bytes sdljlk jsklj3lkjlk djlkjw")
var sinkS string
func benchmarkBuilder(b *testing.B, f func(b *testing.B, numWrite int, grow bool)) {
b.Run("1Write_NoGrow", func(b *testing.B) {
b.ReportAllocs()
f(b, 1, false)
})
b.Run("3Write_NoGrow", func(b *testing.B) {
b.ReportAllocs()
f(b, 3, false)
})
b.Run("3Write_Grow", func(b *testing.B) {
b.ReportAllocs()
f(b, 3, true)
})
}
func BenchmarkBuildString_Builder(b *testing.B) {
benchmarkBuilder(b, func(b *testing.B, numWrite int, grow bool) {
for i := 0; i < b.N; i++ {
var buf Builder
if grow {
buf.Grow(len(someBytes) * numWrite)
}
for i := 0; i < numWrite; i++ {
buf.Write(someBytes)
}
sinkS = buf.String()
}
})
}
func BenchmarkBuildString_WriteString(b *testing.B) {
someString := string(someBytes)
benchmarkBuilder(b, func(b *testing.B, numWrite int, grow bool) {
for i := 0; i < b.N; i++ {
var buf Builder
if grow {
buf.Grow(len(someString) * numWrite)
}
for i := 0; i < numWrite; i++ {
buf.WriteString(someString)
}
sinkS = buf.String()
}
})
}
func BenchmarkBuildString_ByteBuffer(b *testing.B) {
benchmarkBuilder(b, func(b *testing.B, numWrite int, grow bool) {
for i := 0; i < b.N; i++ {
var buf bytes.Buffer
if grow {
buf.Grow(len(someBytes) * numWrite)
}
for i := 0; i < numWrite; i++ {
buf.Write(someBytes)
}
sinkS = buf.String()
}
})
}
func TestBuilderGrowSizeclasses(t *testing.T) {
s := Repeat("a", 19)
allocs := testing.AllocsPerRun(100, func() {
var b Builder
b.Grow(18)
b.WriteString(s)
_ = b.String()
})
if allocs > 1 {
t.Fatalf("unexpected amount of allocations: %v, want: 1", allocs)
}
}

23
src/strings/clone.go Normal file
View File

@@ -0,0 +1,23 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strings
import (
"internal/stringslite"
)
// Clone returns a fresh copy of s.
// It guarantees to make a copy of s into a new allocation,
// which can be important when retaining only a small substring
// of a much larger string. Using Clone can help such programs
// use less memory. Of course, since using Clone makes a copy,
// overuse of Clone can make programs use more memory.
// Clone should typically be used only rarely, and only when
// profiling indicates that it is needed.
// For strings of length zero the string "" will be returned
// and no allocation is made.
func Clone(s string) string {
return stringslite.Clone(s)
}

45
src/strings/clone_test.go Normal file
View File

@@ -0,0 +1,45 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strings_test
import (
"strings"
"testing"
"unsafe"
)
var emptyString string
func TestClone(t *testing.T) {
var cloneTests = []string{
"",
strings.Clone(""),
strings.Repeat("a", 42)[:0],
"short",
strings.Repeat("a", 42),
}
for _, input := range cloneTests {
clone := strings.Clone(input)
if clone != input {
t.Errorf("Clone(%q) = %q; want %q", input, clone, input)
}
if len(input) != 0 && unsafe.StringData(clone) == unsafe.StringData(input) {
t.Errorf("Clone(%q) return value should not reference inputs backing memory.", input)
}
if len(input) == 0 && unsafe.StringData(clone) != unsafe.StringData(emptyString) {
t.Errorf("Clone(%#v) return value should be equal to empty string.", unsafe.StringData(input))
}
}
}
func BenchmarkClone(b *testing.B) {
var str = strings.Repeat("a", 42)
b.ReportAllocs()
for i := 0; i < b.N; i++ {
stringSink = strings.Clone(str)
}
}

17
src/strings/compare.go Normal file
View File

@@ -0,0 +1,17 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strings
import "internal/bytealg"
// Compare returns an integer comparing two strings lexicographically.
// The result will be 0 if a == b, -1 if a < b, and +1 if a > b.
//
// Use Compare when you need to perform a three-way comparison (with
// [slices.SortFunc], for example). It is usually clearer and always faster
// to use the built-in string comparison operators ==, <, >, and so on.
func Compare(a, b string) int {
return bytealg.CompareString(a, b)
}

119
src/strings/compare_test.go Normal file
View File

@@ -0,0 +1,119 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strings_test
// Derived from bytes/compare_test.go.
// Benchmarks omitted since the underlying implementation is identical.
import (
"internal/testenv"
. "strings"
"testing"
"unsafe"
)
var compareTests = []struct {
a, b string
i int
}{
{"", "", 0},
{"a", "", 1},
{"", "a", -1},
{"abc", "abc", 0},
{"ab", "abc", -1},
{"abc", "ab", 1},
{"x", "ab", 1},
{"ab", "x", -1},
{"x", "a", 1},
{"b", "x", -1},
// test runtime·memeq's chunked implementation
{"abcdefgh", "abcdefgh", 0},
{"abcdefghi", "abcdefghi", 0},
{"abcdefghi", "abcdefghj", -1},
}
func TestCompare(t *testing.T) {
for _, tt := range compareTests {
cmp := Compare(tt.a, tt.b)
if cmp != tt.i {
t.Errorf(`Compare(%q, %q) = %v`, tt.a, tt.b, cmp)
}
}
}
func TestCompareIdenticalString(t *testing.T) {
var s = "Hello Gophers!"
if Compare(s, s) != 0 {
t.Error("s != s")
}
if Compare(s, s[:1]) != 1 {
t.Error("s > s[:1] failed")
}
}
func TestCompareStrings(t *testing.T) {
// unsafeString converts a []byte to a string with no allocation.
// The caller must not modify b while the result string is in use.
unsafeString := func(b []byte) string {
return unsafe.String(unsafe.SliceData(b), len(b))
}
lengths := make([]int, 0) // lengths to test in ascending order
for i := 0; i <= 128; i++ {
lengths = append(lengths, i)
}
lengths = append(lengths, 256, 512, 1024, 1333, 4095, 4096, 4097)
if !testing.Short() || testenv.Builder() != "" {
lengths = append(lengths, 65535, 65536, 65537, 99999)
}
n := lengths[len(lengths)-1]
a := make([]byte, n+1)
b := make([]byte, n+1)
lastLen := 0
for _, len := range lengths {
// randomish but deterministic data. No 0 or 255.
for i := 0; i < len; i++ {
a[i] = byte(1 + 31*i%254)
b[i] = byte(1 + 31*i%254)
}
// data past the end is different
for i := len; i <= n; i++ {
a[i] = 8
b[i] = 9
}
sa, sb := unsafeString(a), unsafeString(b)
cmp := Compare(sa[:len], sb[:len])
if cmp != 0 {
t.Errorf(`CompareIdentical(%d) = %d`, len, cmp)
}
if len > 0 {
cmp = Compare(sa[:len-1], sb[:len])
if cmp != -1 {
t.Errorf(`CompareAshorter(%d) = %d`, len, cmp)
}
cmp = Compare(sa[:len], sb[:len-1])
if cmp != 1 {
t.Errorf(`CompareBshorter(%d) = %d`, len, cmp)
}
}
for k := lastLen; k < len; k++ {
b[k] = a[k] - 1
cmp = Compare(unsafeString(a[:len]), unsafeString(b[:len]))
if cmp != 1 {
t.Errorf(`CompareAbigger(%d,%d) = %d`, len, k, cmp)
}
b[k] = a[k] + 1
cmp = Compare(unsafeString(a[:len]), unsafeString(b[:len]))
if cmp != -1 {
t.Errorf(`CompareBbigger(%d,%d) = %d`, len, k, cmp)
}
b[k] = a[k]
}
lastLen = len
}
}

460
src/strings/example_test.go Normal file
View File

@@ -0,0 +1,460 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strings_test
import (
"fmt"
"strings"
"unicode"
"unsafe"
)
func ExampleClone() {
s := "abc"
clone := strings.Clone(s)
fmt.Println(s == clone)
fmt.Println(unsafe.StringData(s) == unsafe.StringData(clone))
// Output:
// true
// false
}
func ExampleBuilder() {
var b strings.Builder
for i := 3; i >= 1; i-- {
fmt.Fprintf(&b, "%d...", i)
}
b.WriteString("ignition")
fmt.Println(b.String())
// Output: 3...2...1...ignition
}
func ExampleCompare() {
fmt.Println(strings.Compare("a", "b"))
fmt.Println(strings.Compare("a", "a"))
fmt.Println(strings.Compare("b", "a"))
// Output:
// -1
// 0
// 1
}
func ExampleContains() {
fmt.Println(strings.Contains("seafood", "foo"))
fmt.Println(strings.Contains("seafood", "bar"))
fmt.Println(strings.Contains("seafood", ""))
fmt.Println(strings.Contains("", ""))
// Output:
// true
// false
// true
// true
}
func ExampleContainsAny() {
fmt.Println(strings.ContainsAny("team", "i"))
fmt.Println(strings.ContainsAny("fail", "ui"))
fmt.Println(strings.ContainsAny("ure", "ui"))
fmt.Println(strings.ContainsAny("failure", "ui"))
fmt.Println(strings.ContainsAny("foo", ""))
fmt.Println(strings.ContainsAny("", ""))
// Output:
// false
// true
// true
// true
// false
// false
}
func ExampleContainsRune() {
// Finds whether a string contains a particular Unicode code point.
// The code point for the lowercase letter "a", for example, is 97.
fmt.Println(strings.ContainsRune("aardvark", 97))
fmt.Println(strings.ContainsRune("timeout", 97))
// Output:
// true
// false
}
func ExampleContainsFunc() {
f := func(r rune) bool {
return r == 'a' || r == 'e' || r == 'i' || r == 'o' || r == 'u'
}
fmt.Println(strings.ContainsFunc("hello", f))
fmt.Println(strings.ContainsFunc("rhythms", f))
// Output:
// true
// false
}
func ExampleCount() {
fmt.Println(strings.Count("cheese", "e"))
fmt.Println(strings.Count("five", "")) // before & after each rune
// Output:
// 3
// 5
}
func ExampleCut() {
show := func(s, sep string) {
before, after, found := strings.Cut(s, sep)
fmt.Printf("Cut(%q, %q) = %q, %q, %v\n", s, sep, before, after, found)
}
show("Gopher", "Go")
show("Gopher", "ph")
show("Gopher", "er")
show("Gopher", "Badger")
// Output:
// Cut("Gopher", "Go") = "", "pher", true
// Cut("Gopher", "ph") = "Go", "er", true
// Cut("Gopher", "er") = "Goph", "", true
// Cut("Gopher", "Badger") = "Gopher", "", false
}
func ExampleCutPrefix() {
show := func(s, sep string) {
after, found := strings.CutPrefix(s, sep)
fmt.Printf("CutPrefix(%q, %q) = %q, %v\n", s, sep, after, found)
}
show("Gopher", "Go")
show("Gopher", "ph")
// Output:
// CutPrefix("Gopher", "Go") = "pher", true
// CutPrefix("Gopher", "ph") = "Gopher", false
}
func ExampleCutSuffix() {
show := func(s, sep string) {
before, found := strings.CutSuffix(s, sep)
fmt.Printf("CutSuffix(%q, %q) = %q, %v\n", s, sep, before, found)
}
show("Gopher", "Go")
show("Gopher", "er")
// Output:
// CutSuffix("Gopher", "Go") = "Gopher", false
// CutSuffix("Gopher", "er") = "Goph", true
}
func ExampleEqualFold() {
fmt.Println(strings.EqualFold("Go", "go"))
fmt.Println(strings.EqualFold("AB", "ab")) // true because comparison uses simple case-folding
fmt.Println(strings.EqualFold("ß", "ss")) // false because comparison does not use full case-folding
// Output:
// true
// true
// false
}
func ExampleFields() {
fmt.Printf("Fields are: %q", strings.Fields(" foo bar baz "))
// Output: Fields are: ["foo" "bar" "baz"]
}
func ExampleFieldsFunc() {
f := func(c rune) bool {
return !unicode.IsLetter(c) && !unicode.IsNumber(c)
}
fmt.Printf("Fields are: %q", strings.FieldsFunc(" foo1;bar2,baz3...", f))
// Output: Fields are: ["foo1" "bar2" "baz3"]
}
func ExampleHasPrefix() {
fmt.Println(strings.HasPrefix("Gopher", "Go"))
fmt.Println(strings.HasPrefix("Gopher", "C"))
fmt.Println(strings.HasPrefix("Gopher", ""))
// Output:
// true
// false
// true
}
func ExampleHasSuffix() {
fmt.Println(strings.HasSuffix("Amigo", "go"))
fmt.Println(strings.HasSuffix("Amigo", "O"))
fmt.Println(strings.HasSuffix("Amigo", "Ami"))
fmt.Println(strings.HasSuffix("Amigo", ""))
// Output:
// true
// false
// false
// true
}
func ExampleIndex() {
fmt.Println(strings.Index("chicken", "ken"))
fmt.Println(strings.Index("chicken", "dmr"))
// Output:
// 4
// -1
}
func ExampleIndexFunc() {
f := func(c rune) bool {
return unicode.Is(unicode.Han, c)
}
fmt.Println(strings.IndexFunc("Hello, 世界", f))
fmt.Println(strings.IndexFunc("Hello, world", f))
// Output:
// 7
// -1
}
func ExampleIndexAny() {
fmt.Println(strings.IndexAny("chicken", "aeiouy"))
fmt.Println(strings.IndexAny("crwth", "aeiouy"))
// Output:
// 2
// -1
}
func ExampleIndexByte() {
fmt.Println(strings.IndexByte("golang", 'g'))
fmt.Println(strings.IndexByte("gophers", 'h'))
fmt.Println(strings.IndexByte("golang", 'x'))
// Output:
// 0
// 3
// -1
}
func ExampleIndexRune() {
fmt.Println(strings.IndexRune("chicken", 'k'))
fmt.Println(strings.IndexRune("chicken", 'd'))
// Output:
// 4
// -1
}
func ExampleLastIndex() {
fmt.Println(strings.Index("go gopher", "go"))
fmt.Println(strings.LastIndex("go gopher", "go"))
fmt.Println(strings.LastIndex("go gopher", "rodent"))
// Output:
// 0
// 3
// -1
}
func ExampleLastIndexAny() {
fmt.Println(strings.LastIndexAny("go gopher", "go"))
fmt.Println(strings.LastIndexAny("go gopher", "rodent"))
fmt.Println(strings.LastIndexAny("go gopher", "fail"))
// Output:
// 4
// 8
// -1
}
func ExampleLastIndexByte() {
fmt.Println(strings.LastIndexByte("Hello, world", 'l'))
fmt.Println(strings.LastIndexByte("Hello, world", 'o'))
fmt.Println(strings.LastIndexByte("Hello, world", 'x'))
// Output:
// 10
// 8
// -1
}
func ExampleLastIndexFunc() {
fmt.Println(strings.LastIndexFunc("go 123", unicode.IsNumber))
fmt.Println(strings.LastIndexFunc("123 go", unicode.IsNumber))
fmt.Println(strings.LastIndexFunc("go", unicode.IsNumber))
// Output:
// 5
// 2
// -1
}
func ExampleJoin() {
s := []string{"foo", "bar", "baz"}
fmt.Println(strings.Join(s, ", "))
// Output: foo, bar, baz
}
func ExampleRepeat() {
fmt.Println("ba" + strings.Repeat("na", 2))
// Output: banana
}
func ExampleReplace() {
fmt.Println(strings.Replace("oink oink oink", "k", "ky", 2))
fmt.Println(strings.Replace("oink oink oink", "oink", "moo", -1))
// Output:
// oinky oinky oink
// moo moo moo
}
func ExampleReplaceAll() {
fmt.Println(strings.ReplaceAll("oink oink oink", "oink", "moo"))
// Output:
// moo moo moo
}
func ExampleSplit() {
fmt.Printf("%q\n", strings.Split("a,b,c", ","))
fmt.Printf("%q\n", strings.Split("a man a plan a canal panama", "a "))
fmt.Printf("%q\n", strings.Split(" xyz ", ""))
fmt.Printf("%q\n", strings.Split("", "Bernardo O'Higgins"))
// Output:
// ["a" "b" "c"]
// ["" "man " "plan " "canal panama"]
// [" " "x" "y" "z" " "]
// [""]
}
func ExampleSplitN() {
fmt.Printf("%q\n", strings.SplitN("a,b,c", ",", 2))
z := strings.SplitN("a,b,c", ",", 0)
fmt.Printf("%q (nil = %v)\n", z, z == nil)
// Output:
// ["a" "b,c"]
// [] (nil = true)
}
func ExampleSplitAfter() {
fmt.Printf("%q\n", strings.SplitAfter("a,b,c", ","))
// Output: ["a," "b," "c"]
}
func ExampleSplitAfterN() {
fmt.Printf("%q\n", strings.SplitAfterN("a,b,c", ",", 2))
// Output: ["a," "b,c"]
}
func ExampleTitle() {
// Compare this example to the ToTitle example.
fmt.Println(strings.Title("her royal highness"))
fmt.Println(strings.Title("loud noises"))
fmt.Println(strings.Title("хлеб"))
// Output:
// Her Royal Highness
// Loud Noises
// Хлеб
}
func ExampleToTitle() {
// Compare this example to the Title example.
fmt.Println(strings.ToTitle("her royal highness"))
fmt.Println(strings.ToTitle("loud noises"))
fmt.Println(strings.ToTitle("хлеб"))
// Output:
// HER ROYAL HIGHNESS
// LOUD NOISES
// ХЛЕБ
}
func ExampleToTitleSpecial() {
fmt.Println(strings.ToTitleSpecial(unicode.TurkishCase, "dünyanın ilk borsa yapısı Aizonai kabul edilir"))
// Output:
// DÜNYANIN İLK BORSA YAPISI AİZONAİ KABUL EDİLİR
}
func ExampleMap() {
rot13 := func(r rune) rune {
switch {
case r >= 'A' && r <= 'Z':
return 'A' + (r-'A'+13)%26
case r >= 'a' && r <= 'z':
return 'a' + (r-'a'+13)%26
}
return r
}
fmt.Println(strings.Map(rot13, "'Twas brillig and the slithy gopher..."))
// Output: 'Gjnf oevyyvt naq gur fyvgul tbcure...
}
func ExampleNewReplacer() {
r := strings.NewReplacer("<", "&lt;", ">", "&gt;")
fmt.Println(r.Replace("This is <b>HTML</b>!"))
// Output: This is &lt;b&gt;HTML&lt;/b&gt;!
}
func ExampleToUpper() {
fmt.Println(strings.ToUpper("Gopher"))
// Output: GOPHER
}
func ExampleToUpperSpecial() {
fmt.Println(strings.ToUpperSpecial(unicode.TurkishCase, "örnek iş"))
// Output: ÖRNEK İŞ
}
func ExampleToLower() {
fmt.Println(strings.ToLower("Gopher"))
// Output: gopher
}
func ExampleToLowerSpecial() {
fmt.Println(strings.ToLowerSpecial(unicode.TurkishCase, "Önnek İş"))
// Output: önnek iş
}
func ExampleTrim() {
fmt.Print(strings.Trim("¡¡¡Hello, Gophers!!!", "!¡"))
// Output: Hello, Gophers
}
func ExampleTrimSpace() {
fmt.Println(strings.TrimSpace(" \t\n Hello, Gophers \n\t\r\n"))
// Output: Hello, Gophers
}
func ExampleTrimPrefix() {
var s = "¡¡¡Hello, Gophers!!!"
s = strings.TrimPrefix(s, "¡¡¡Hello, ")
s = strings.TrimPrefix(s, "¡¡¡Howdy, ")
fmt.Print(s)
// Output: Gophers!!!
}
func ExampleTrimSuffix() {
var s = "¡¡¡Hello, Gophers!!!"
s = strings.TrimSuffix(s, ", Gophers!!!")
s = strings.TrimSuffix(s, ", Marmots!!!")
fmt.Print(s)
// Output: ¡¡¡Hello
}
func ExampleTrimFunc() {
fmt.Print(strings.TrimFunc("¡¡¡Hello, Gophers!!!", func(r rune) bool {
return !unicode.IsLetter(r) && !unicode.IsNumber(r)
}))
// Output: Hello, Gophers
}
func ExampleTrimLeft() {
fmt.Print(strings.TrimLeft("¡¡¡Hello, Gophers!!!", "!¡"))
// Output: Hello, Gophers!!!
}
func ExampleTrimLeftFunc() {
fmt.Print(strings.TrimLeftFunc("¡¡¡Hello, Gophers!!!", func(r rune) bool {
return !unicode.IsLetter(r) && !unicode.IsNumber(r)
}))
// Output: Hello, Gophers!!!
}
func ExampleTrimRight() {
fmt.Print(strings.TrimRight("¡¡¡Hello, Gophers!!!", "!¡"))
// Output: ¡¡¡Hello, Gophers
}
func ExampleTrimRightFunc() {
fmt.Print(strings.TrimRightFunc("¡¡¡Hello, Gophers!!!", func(r rune) bool {
return !unicode.IsLetter(r) && !unicode.IsNumber(r)
}))
// Output: ¡¡¡Hello, Gophers
}
func ExampleToValidUTF8() {
fmt.Printf("%s\n", strings.ToValidUTF8("abc", "\uFFFD"))
fmt.Printf("%s\n", strings.ToValidUTF8("a\xffb\xC0\xAFc\xff", ""))
fmt.Printf("%s\n", strings.ToValidUTF8("\xed\xa0\x80", "abc"))
// Output:
// abc
// abc
// abc
}

View File

@@ -0,0 +1,47 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strings
func (r *Replacer) Replacer() any {
r.once.Do(r.buildOnce)
return r.r
}
func (r *Replacer) PrintTrie() string {
r.once.Do(r.buildOnce)
gen := r.r.(*genericReplacer)
return gen.printNode(&gen.root, 0)
}
func (r *genericReplacer) printNode(t *trieNode, depth int) (s string) {
if t.priority > 0 {
s += "+"
} else {
s += "-"
}
s += "\n"
if t.prefix != "" {
s += Repeat(".", depth) + t.prefix
s += r.printNode(t.next, depth+len(t.prefix))
} else if t.table != nil {
for b, m := range r.mapping {
if int(m) != r.tableSize && t.table[m] != nil {
s += Repeat(".", depth) + string([]byte{byte(b)})
s += r.printNode(t.table[m], depth+1)
}
}
}
return
}
func StringFind(pattern, text string) int {
return makeStringFinder(pattern).next(text)
}
func DumpTables(pattern string) ([]int, []int) {
finder := makeStringFinder(pattern)
return finder.badCharSkip[:], finder.goodSuffixSkip
}

160
src/strings/reader.go Normal file
View File

@@ -0,0 +1,160 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strings
import (
"errors"
"io"
"unicode/utf8"
)
// A Reader implements the [io.Reader], [io.ReaderAt], [io.ByteReader], [io.ByteScanner],
// [io.RuneReader], [io.RuneScanner], [io.Seeker], and [io.WriterTo] interfaces by reading
// from a string.
// The zero value for Reader operates like a Reader of an empty string.
type Reader struct {
s string
i int64 // current reading index
prevRune int // index of previous rune; or < 0
}
// Len returns the number of bytes of the unread portion of the
// string.
func (r *Reader) Len() int {
if r.i >= int64(len(r.s)) {
return 0
}
return int(int64(len(r.s)) - r.i)
}
// Size returns the original length of the underlying string.
// Size is the number of bytes available for reading via [Reader.ReadAt].
// The returned value is always the same and is not affected by calls
// to any other method.
func (r *Reader) Size() int64 { return int64(len(r.s)) }
// Read implements the [io.Reader] interface.
func (r *Reader) Read(b []byte) (n int, err error) {
if r.i >= int64(len(r.s)) {
return 0, io.EOF
}
r.prevRune = -1
n = copy(b, r.s[r.i:])
r.i += int64(n)
return
}
// ReadAt implements the [io.ReaderAt] interface.
func (r *Reader) ReadAt(b []byte, off int64) (n int, err error) {
// cannot modify state - see io.ReaderAt
if off < 0 {
return 0, errors.New("strings.Reader.ReadAt: negative offset")
}
if off >= int64(len(r.s)) {
return 0, io.EOF
}
n = copy(b, r.s[off:])
if n < len(b) {
err = io.EOF
}
return
}
// ReadByte implements the [io.ByteReader] interface.
func (r *Reader) ReadByte() (byte, error) {
r.prevRune = -1
if r.i >= int64(len(r.s)) {
return 0, io.EOF
}
b := r.s[r.i]
r.i++
return b, nil
}
// UnreadByte implements the [io.ByteScanner] interface.
func (r *Reader) UnreadByte() error {
if r.i <= 0 {
return errors.New("strings.Reader.UnreadByte: at beginning of string")
}
r.prevRune = -1
r.i--
return nil
}
// ReadRune implements the [io.RuneReader] interface.
func (r *Reader) ReadRune() (ch rune, size int, err error) {
if r.i >= int64(len(r.s)) {
r.prevRune = -1
return 0, 0, io.EOF
}
r.prevRune = int(r.i)
if c := r.s[r.i]; c < utf8.RuneSelf {
r.i++
return rune(c), 1, nil
}
ch, size = utf8.DecodeRuneInString(r.s[r.i:])
r.i += int64(size)
return
}
// UnreadRune implements the [io.RuneScanner] interface.
func (r *Reader) UnreadRune() error {
if r.i <= 0 {
return errors.New("strings.Reader.UnreadRune: at beginning of string")
}
if r.prevRune < 0 {
return errors.New("strings.Reader.UnreadRune: previous operation was not ReadRune")
}
r.i = int64(r.prevRune)
r.prevRune = -1
return nil
}
// Seek implements the [io.Seeker] interface.
func (r *Reader) Seek(offset int64, whence int) (int64, error) {
r.prevRune = -1
var abs int64
switch whence {
case io.SeekStart:
abs = offset
case io.SeekCurrent:
abs = r.i + offset
case io.SeekEnd:
abs = int64(len(r.s)) + offset
default:
return 0, errors.New("strings.Reader.Seek: invalid whence")
}
if abs < 0 {
return 0, errors.New("strings.Reader.Seek: negative position")
}
r.i = abs
return abs, nil
}
// WriteTo implements the [io.WriterTo] interface.
func (r *Reader) WriteTo(w io.Writer) (n int64, err error) {
r.prevRune = -1
if r.i >= int64(len(r.s)) {
return 0, nil
}
s := r.s[r.i:]
m, err := io.WriteString(w, s)
if m > len(s) {
panic("strings.Reader.WriteTo: invalid WriteString count")
}
r.i += int64(m)
n = int64(m)
if m != len(s) && err == nil {
err = io.ErrShortWrite
}
return
}
// Reset resets the [Reader] to be reading from s.
func (r *Reader) Reset(s string) { *r = Reader{s, 0, -1} }
// NewReader returns a new [Reader] reading from s.
// It is similar to [bytes.NewBufferString] but more efficient and non-writable.
func NewReader(s string) *Reader { return &Reader{s, 0, -1} }

233
src/strings/reader_test.go Normal file
View File

@@ -0,0 +1,233 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strings_test
import (
"bytes"
"fmt"
"io"
"strings"
"sync"
"testing"
)
func TestReader(t *testing.T) {
r := strings.NewReader("0123456789")
tests := []struct {
off int64
seek int
n int
want string
wantpos int64
readerr error
seekerr string
}{
{seek: io.SeekStart, off: 0, n: 20, want: "0123456789"},
{seek: io.SeekStart, off: 1, n: 1, want: "1"},
{seek: io.SeekCurrent, off: 1, wantpos: 3, n: 2, want: "34"},
{seek: io.SeekStart, off: -1, seekerr: "strings.Reader.Seek: negative position"},
{seek: io.SeekStart, off: 1 << 33, wantpos: 1 << 33, readerr: io.EOF},
{seek: io.SeekCurrent, off: 1, wantpos: 1<<33 + 1, readerr: io.EOF},
{seek: io.SeekStart, n: 5, want: "01234"},
{seek: io.SeekCurrent, n: 5, want: "56789"},
{seek: io.SeekEnd, off: -1, n: 1, wantpos: 9, want: "9"},
}
for i, tt := range tests {
pos, err := r.Seek(tt.off, tt.seek)
if err == nil && tt.seekerr != "" {
t.Errorf("%d. want seek error %q", i, tt.seekerr)
continue
}
if err != nil && err.Error() != tt.seekerr {
t.Errorf("%d. seek error = %q; want %q", i, err.Error(), tt.seekerr)
continue
}
if tt.wantpos != 0 && tt.wantpos != pos {
t.Errorf("%d. pos = %d, want %d", i, pos, tt.wantpos)
}
buf := make([]byte, tt.n)
n, err := r.Read(buf)
if err != tt.readerr {
t.Errorf("%d. read = %v; want %v", i, err, tt.readerr)
continue
}
got := string(buf[:n])
if got != tt.want {
t.Errorf("%d. got %q; want %q", i, got, tt.want)
}
}
}
func TestReadAfterBigSeek(t *testing.T) {
r := strings.NewReader("0123456789")
if _, err := r.Seek(1<<31+5, io.SeekStart); err != nil {
t.Fatal(err)
}
if n, err := r.Read(make([]byte, 10)); n != 0 || err != io.EOF {
t.Errorf("Read = %d, %v; want 0, EOF", n, err)
}
}
func TestReaderAt(t *testing.T) {
r := strings.NewReader("0123456789")
tests := []struct {
off int64
n int
want string
wanterr any
}{
{0, 10, "0123456789", nil},
{1, 10, "123456789", io.EOF},
{1, 9, "123456789", nil},
{11, 10, "", io.EOF},
{0, 0, "", nil},
{-1, 0, "", "strings.Reader.ReadAt: negative offset"},
}
for i, tt := range tests {
b := make([]byte, tt.n)
rn, err := r.ReadAt(b, tt.off)
got := string(b[:rn])
if got != tt.want {
t.Errorf("%d. got %q; want %q", i, got, tt.want)
}
if fmt.Sprintf("%v", err) != fmt.Sprintf("%v", tt.wanterr) {
t.Errorf("%d. got error = %v; want %v", i, err, tt.wanterr)
}
}
}
func TestReaderAtConcurrent(t *testing.T) {
// Test for the race detector, to verify ReadAt doesn't mutate
// any state.
r := strings.NewReader("0123456789")
var wg sync.WaitGroup
for i := 0; i < 5; i++ {
wg.Add(1)
go func(i int) {
defer wg.Done()
var buf [1]byte
r.ReadAt(buf[:], int64(i))
}(i)
}
wg.Wait()
}
func TestEmptyReaderConcurrent(t *testing.T) {
// Test for the race detector, to verify a Read that doesn't yield any bytes
// is okay to use from multiple goroutines. This was our historic behavior.
// See golang.org/issue/7856
r := strings.NewReader("")
var wg sync.WaitGroup
for i := 0; i < 5; i++ {
wg.Add(2)
go func() {
defer wg.Done()
var buf [1]byte
r.Read(buf[:])
}()
go func() {
defer wg.Done()
r.Read(nil)
}()
}
wg.Wait()
}
func TestWriteTo(t *testing.T) {
const str = "0123456789"
for i := 0; i <= len(str); i++ {
s := str[i:]
r := strings.NewReader(s)
var b bytes.Buffer
n, err := r.WriteTo(&b)
if expect := int64(len(s)); n != expect {
t.Errorf("got %v; want %v", n, expect)
}
if err != nil {
t.Errorf("for length %d: got error = %v; want nil", len(s), err)
}
if b.String() != s {
t.Errorf("got string %q; want %q", b.String(), s)
}
if r.Len() != 0 {
t.Errorf("reader contains %v bytes; want 0", r.Len())
}
}
}
// tests that Len is affected by reads, but Size is not.
func TestReaderLenSize(t *testing.T) {
r := strings.NewReader("abc")
io.CopyN(io.Discard, r, 1)
if r.Len() != 2 {
t.Errorf("Len = %d; want 2", r.Len())
}
if r.Size() != 3 {
t.Errorf("Size = %d; want 3", r.Size())
}
}
func TestReaderReset(t *testing.T) {
r := strings.NewReader("世界")
if _, _, err := r.ReadRune(); err != nil {
t.Errorf("ReadRune: unexpected error: %v", err)
}
const want = "abcdef"
r.Reset(want)
if err := r.UnreadRune(); err == nil {
t.Errorf("UnreadRune: expected error, got nil")
}
buf, err := io.ReadAll(r)
if err != nil {
t.Errorf("ReadAll: unexpected error: %v", err)
}
if got := string(buf); got != want {
t.Errorf("ReadAll: got %q, want %q", got, want)
}
}
func TestReaderZero(t *testing.T) {
if l := (&strings.Reader{}).Len(); l != 0 {
t.Errorf("Len: got %d, want 0", l)
}
if n, err := (&strings.Reader{}).Read(nil); n != 0 || err != io.EOF {
t.Errorf("Read: got %d, %v; want 0, io.EOF", n, err)
}
if n, err := (&strings.Reader{}).ReadAt(nil, 11); n != 0 || err != io.EOF {
t.Errorf("ReadAt: got %d, %v; want 0, io.EOF", n, err)
}
if b, err := (&strings.Reader{}).ReadByte(); b != 0 || err != io.EOF {
t.Errorf("ReadByte: got %d, %v; want 0, io.EOF", b, err)
}
if ch, size, err := (&strings.Reader{}).ReadRune(); ch != 0 || size != 0 || err != io.EOF {
t.Errorf("ReadRune: got %d, %d, %v; want 0, 0, io.EOF", ch, size, err)
}
if offset, err := (&strings.Reader{}).Seek(11, io.SeekStart); offset != 11 || err != nil {
t.Errorf("Seek: got %d, %v; want 11, nil", offset, err)
}
if s := (&strings.Reader{}).Size(); s != 0 {
t.Errorf("Size: got %d, want 0", s)
}
if (&strings.Reader{}).UnreadByte() == nil {
t.Errorf("UnreadByte: got nil, want error")
}
if (&strings.Reader{}).UnreadRune() == nil {
t.Errorf("UnreadRune: got nil, want error")
}
if n, err := (&strings.Reader{}).WriteTo(io.Discard); n != 0 || err != nil {
t.Errorf("WriteTo: got %d, %v; want 0, nil", n, err)
}
}

578
src/strings/replace.go Normal file
View File

@@ -0,0 +1,578 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strings
import (
"io"
"sync"
)
// Replacer replaces a list of strings with replacements.
// It is safe for concurrent use by multiple goroutines.
type Replacer struct {
once sync.Once // guards buildOnce method
r replacer
oldnew []string
}
// replacer is the interface that a replacement algorithm needs to implement.
type replacer interface {
Replace(s string) string
WriteString(w io.Writer, s string) (n int, err error)
}
// NewReplacer returns a new [Replacer] from a list of old, new string
// pairs. Replacements are performed in the order they appear in the
// target string, without overlapping matches. The old string
// comparisons are done in argument order.
//
// NewReplacer panics if given an odd number of arguments.
func NewReplacer(oldnew ...string) *Replacer {
if len(oldnew)%2 == 1 {
panic("strings.NewReplacer: odd argument count")
}
return &Replacer{oldnew: append([]string(nil), oldnew...)}
}
func (r *Replacer) buildOnce() {
r.r = r.build()
r.oldnew = nil
}
func (b *Replacer) build() replacer {
oldnew := b.oldnew
if len(oldnew) == 2 && len(oldnew[0]) > 1 {
return makeSingleStringReplacer(oldnew[0], oldnew[1])
}
allNewBytes := true
for i := 0; i < len(oldnew); i += 2 {
if len(oldnew[i]) != 1 {
return makeGenericReplacer(oldnew)
}
if len(oldnew[i+1]) != 1 {
allNewBytes = false
}
}
if allNewBytes {
r := byteReplacer{}
for i := range r {
r[i] = byte(i)
}
// The first occurrence of old->new map takes precedence
// over the others with the same old string.
for i := len(oldnew) - 2; i >= 0; i -= 2 {
o := oldnew[i][0]
n := oldnew[i+1][0]
r[o] = n
}
return &r
}
r := byteStringReplacer{toReplace: make([]string, 0, len(oldnew)/2)}
// The first occurrence of old->new map takes precedence
// over the others with the same old string.
for i := len(oldnew) - 2; i >= 0; i -= 2 {
o := oldnew[i][0]
n := oldnew[i+1]
// To avoid counting repetitions multiple times.
if r.replacements[o] == nil {
// We need to use string([]byte{o}) instead of string(o),
// to avoid utf8 encoding of o.
// E. g. byte(150) produces string of length 2.
r.toReplace = append(r.toReplace, string([]byte{o}))
}
r.replacements[o] = []byte(n)
}
return &r
}
// Replace returns a copy of s with all replacements performed.
func (r *Replacer) Replace(s string) string {
r.once.Do(r.buildOnce)
return r.r.Replace(s)
}
// WriteString writes s to w with all replacements performed.
func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) {
r.once.Do(r.buildOnce)
return r.r.WriteString(w, s)
}
// trieNode is a node in a lookup trie for prioritized key/value pairs. Keys
// and values may be empty. For example, the trie containing keys "ax", "ay",
// "bcbc", "x" and "xy" could have eight nodes:
//
// n0 -
// n1 a-
// n2 .x+
// n3 .y+
// n4 b-
// n5 .cbc+
// n6 x+
// n7 .y+
//
// n0 is the root node, and its children are n1, n4 and n6; n1's children are
// n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked
// with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7
// (marked with a trailing "+") are complete keys.
type trieNode struct {
// value is the value of the trie node's key/value pair. It is empty if
// this node is not a complete key.
value string
// priority is the priority (higher is more important) of the trie node's
// key/value pair; keys are not necessarily matched shortest- or longest-
// first. Priority is positive if this node is a complete key, and zero
// otherwise. In the example above, positive/zero priorities are marked
// with a trailing "+" or "-".
priority int
// A trie node may have zero, one or more child nodes:
// * if the remaining fields are zero, there are no children.
// * if prefix and next are non-zero, there is one child in next.
// * if table is non-zero, it defines all the children.
//
// Prefixes are preferred over tables when there is one child, but the
// root node always uses a table for lookup efficiency.
// prefix is the difference in keys between this trie node and the next.
// In the example above, node n4 has prefix "cbc" and n4's next node is n5.
// Node n5 has no children and so has zero prefix, next and table fields.
prefix string
next *trieNode
// table is a lookup table indexed by the next byte in the key, after
// remapping that byte through genericReplacer.mapping to create a dense
// index. In the example above, the keys only use 'a', 'b', 'c', 'x' and
// 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and
// genericReplacer.tableSize will be 5. Node n0's table will be
// []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped
// 'a', 'b' and 'x'.
table []*trieNode
}
func (t *trieNode) add(key, val string, priority int, r *genericReplacer) {
if key == "" {
if t.priority == 0 {
t.value = val
t.priority = priority
}
return
}
if t.prefix != "" {
// Need to split the prefix among multiple nodes.
var n int // length of the longest common prefix
for ; n < len(t.prefix) && n < len(key); n++ {
if t.prefix[n] != key[n] {
break
}
}
if n == len(t.prefix) {
t.next.add(key[n:], val, priority, r)
} else if n == 0 {
// First byte differs, start a new lookup table here. Looking up
// what is currently t.prefix[0] will lead to prefixNode, and
// looking up key[0] will lead to keyNode.
var prefixNode *trieNode
if len(t.prefix) == 1 {
prefixNode = t.next
} else {
prefixNode = &trieNode{
prefix: t.prefix[1:],
next: t.next,
}
}
keyNode := new(trieNode)
t.table = make([]*trieNode, r.tableSize)
t.table[r.mapping[t.prefix[0]]] = prefixNode
t.table[r.mapping[key[0]]] = keyNode
t.prefix = ""
t.next = nil
keyNode.add(key[1:], val, priority, r)
} else {
// Insert new node after the common section of the prefix.
next := &trieNode{
prefix: t.prefix[n:],
next: t.next,
}
t.prefix = t.prefix[:n]
t.next = next
next.add(key[n:], val, priority, r)
}
} else if t.table != nil {
// Insert into existing table.
m := r.mapping[key[0]]
if t.table[m] == nil {
t.table[m] = new(trieNode)
}
t.table[m].add(key[1:], val, priority, r)
} else {
t.prefix = key
t.next = new(trieNode)
t.next.add("", val, priority, r)
}
}
func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) {
// Iterate down the trie to the end, and grab the value and keylen with
// the highest priority.
bestPriority := 0
node := &r.root
n := 0
for node != nil {
if node.priority > bestPriority && !(ignoreRoot && node == &r.root) {
bestPriority = node.priority
val = node.value
keylen = n
found = true
}
if s == "" {
break
}
if node.table != nil {
index := r.mapping[s[0]]
if int(index) == r.tableSize {
break
}
node = node.table[index]
s = s[1:]
n++
} else if node.prefix != "" && HasPrefix(s, node.prefix) {
n += len(node.prefix)
s = s[len(node.prefix):]
node = node.next
} else {
break
}
}
return
}
// genericReplacer is the fully generic algorithm.
// It's used as a fallback when nothing faster can be used.
type genericReplacer struct {
root trieNode
// tableSize is the size of a trie node's lookup table. It is the number
// of unique key bytes.
tableSize int
// mapping maps from key bytes to a dense index for trieNode.table.
mapping [256]byte
}
func makeGenericReplacer(oldnew []string) *genericReplacer {
r := new(genericReplacer)
// Find each byte used, then assign them each an index.
for i := 0; i < len(oldnew); i += 2 {
key := oldnew[i]
for j := 0; j < len(key); j++ {
r.mapping[key[j]] = 1
}
}
for _, b := range r.mapping {
r.tableSize += int(b)
}
var index byte
for i, b := range r.mapping {
if b == 0 {
r.mapping[i] = byte(r.tableSize)
} else {
r.mapping[i] = index
index++
}
}
// Ensure root node uses a lookup table (for performance).
r.root.table = make([]*trieNode, r.tableSize)
for i := 0; i < len(oldnew); i += 2 {
r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r)
}
return r
}
type appendSliceWriter []byte
// Write writes to the buffer to satisfy [io.Writer].
func (w *appendSliceWriter) Write(p []byte) (int, error) {
*w = append(*w, p...)
return len(p), nil
}
// WriteString writes to the buffer without string->[]byte->string allocations.
func (w *appendSliceWriter) WriteString(s string) (int, error) {
*w = append(*w, s...)
return len(s), nil
}
type stringWriter struct {
w io.Writer
}
func (w stringWriter) WriteString(s string) (int, error) {
return w.w.Write([]byte(s))
}
func getStringWriter(w io.Writer) io.StringWriter {
sw, ok := w.(io.StringWriter)
if !ok {
sw = stringWriter{w}
}
return sw
}
func (r *genericReplacer) Replace(s string) string {
buf := make(appendSliceWriter, 0, len(s))
r.WriteString(&buf, s)
return string(buf)
}
func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) {
sw := getStringWriter(w)
var last, wn int
var prevMatchEmpty bool
for i := 0; i <= len(s); {
// Fast path: s[i] is not a prefix of any pattern.
if i != len(s) && r.root.priority == 0 {
index := int(r.mapping[s[i]])
if index == r.tableSize || r.root.table[index] == nil {
i++
continue
}
}
// Ignore the empty match iff the previous loop found the empty match.
val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
prevMatchEmpty = match && keylen == 0
if match {
wn, err = sw.WriteString(s[last:i])
n += wn
if err != nil {
return
}
wn, err = sw.WriteString(val)
n += wn
if err != nil {
return
}
i += keylen
last = i
continue
}
i++
}
if last != len(s) {
wn, err = sw.WriteString(s[last:])
n += wn
}
return
}
// singleStringReplacer is the implementation that's used when there is only
// one string to replace (and that string has more than one byte).
type singleStringReplacer struct {
finder *stringFinder
// value is the new string that replaces that pattern when it's found.
value string
}
func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer {
return &singleStringReplacer{finder: makeStringFinder(pattern), value: value}
}
func (r *singleStringReplacer) Replace(s string) string {
var buf Builder
i, matched := 0, false
for {
match := r.finder.next(s[i:])
if match == -1 {
break
}
matched = true
buf.Grow(match + len(r.value))
buf.WriteString(s[i : i+match])
buf.WriteString(r.value)
i += match + len(r.finder.pattern)
}
if !matched {
return s
}
buf.WriteString(s[i:])
return buf.String()
}
func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
sw := getStringWriter(w)
var i, wn int
for {
match := r.finder.next(s[i:])
if match == -1 {
break
}
wn, err = sw.WriteString(s[i : i+match])
n += wn
if err != nil {
return
}
wn, err = sw.WriteString(r.value)
n += wn
if err != nil {
return
}
i += match + len(r.finder.pattern)
}
wn, err = sw.WriteString(s[i:])
n += wn
return
}
// byteReplacer is the implementation that's used when all the "old"
// and "new" values are single ASCII bytes.
// The array contains replacement bytes indexed by old byte.
type byteReplacer [256]byte
func (r *byteReplacer) Replace(s string) string {
var buf []byte // lazily allocated
for i := 0; i < len(s); i++ {
b := s[i]
if r[b] != b {
if buf == nil {
buf = []byte(s)
}
buf[i] = r[b]
}
}
if buf == nil {
return s
}
return string(buf)
}
func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) {
sw := getStringWriter(w)
last := 0
for i := 0; i < len(s); i++ {
b := s[i]
if r[b] == b {
continue
}
if last != i {
wn, err := sw.WriteString(s[last:i])
n += wn
if err != nil {
return n, err
}
}
last = i + 1
nw, err := w.Write(r[b : int(b)+1])
n += nw
if err != nil {
return n, err
}
}
if last != len(s) {
nw, err := sw.WriteString(s[last:])
n += nw
if err != nil {
return n, err
}
}
return n, nil
}
// byteStringReplacer is the implementation that's used when all the
// "old" values are single ASCII bytes but the "new" values vary in size.
type byteStringReplacer struct {
// replacements contains replacement byte slices indexed by old byte.
// A nil []byte means that the old byte should not be replaced.
replacements [256][]byte
// toReplace keeps a list of bytes to replace. Depending on length of toReplace
// and length of target string it may be faster to use Count, or a plain loop.
// We store single byte as a string, because Count takes a string.
toReplace []string
}
// countCutOff controls the ratio of a string length to a number of replacements
// at which (*byteStringReplacer).Replace switches algorithms.
// For strings with higher ration of length to replacements than that value,
// we call Count, for each replacement from toReplace.
// For strings, with a lower ratio we use simple loop, because of Count overhead.
// countCutOff is an empirically determined overhead multiplier.
// TODO(tocarip) revisit once we have register-based abi/mid-stack inlining.
const countCutOff = 8
func (r *byteStringReplacer) Replace(s string) string {
newSize := len(s)
anyChanges := false
// Is it faster to use Count?
if len(r.toReplace)*countCutOff <= len(s) {
for _, x := range r.toReplace {
if c := Count(s, x); c != 0 {
// The -1 is because we are replacing 1 byte with len(replacements[b]) bytes.
newSize += c * (len(r.replacements[x[0]]) - 1)
anyChanges = true
}
}
} else {
for i := 0; i < len(s); i++ {
b := s[i]
if r.replacements[b] != nil {
// See above for explanation of -1
newSize += len(r.replacements[b]) - 1
anyChanges = true
}
}
}
if !anyChanges {
return s
}
buf := make([]byte, newSize)
j := 0
for i := 0; i < len(s); i++ {
b := s[i]
if r.replacements[b] != nil {
j += copy(buf[j:], r.replacements[b])
} else {
buf[j] = b
j++
}
}
return string(buf)
}
func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
sw := getStringWriter(w)
last := 0
for i := 0; i < len(s); i++ {
b := s[i]
if r.replacements[b] == nil {
continue
}
if last != i {
nw, err := sw.WriteString(s[last:i])
n += nw
if err != nil {
return n, err
}
}
last = i + 1
nw, err := w.Write(r.replacements[b])
n += nw
if err != nil {
return n, err
}
}
if last != len(s) {
var nw int
nw, err = sw.WriteString(s[last:])
n += nw
}
return
}

583
src/strings/replace_test.go Normal file
View File

@@ -0,0 +1,583 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strings_test
import (
"bytes"
"fmt"
. "strings"
"testing"
)
var htmlEscaper = NewReplacer(
"&", "&amp;",
"<", "&lt;",
">", "&gt;",
`"`, "&quot;",
"'", "&apos;",
)
var htmlUnescaper = NewReplacer(
"&amp;", "&",
"&lt;", "<",
"&gt;", ">",
"&quot;", `"`,
"&apos;", "'",
)
// The http package's old HTML escaping function.
func oldHTMLEscape(s string) string {
s = Replace(s, "&", "&amp;", -1)
s = Replace(s, "<", "&lt;", -1)
s = Replace(s, ">", "&gt;", -1)
s = Replace(s, `"`, "&quot;", -1)
s = Replace(s, "'", "&apos;", -1)
return s
}
var capitalLetters = NewReplacer("a", "A", "b", "B")
// TestReplacer tests the replacer implementations.
func TestReplacer(t *testing.T) {
type testCase struct {
r *Replacer
in, out string
}
var testCases []testCase
// str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8.
str := func(b byte) string {
return string([]byte{b})
}
var s []string
// inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00".
s = nil
for i := 0; i < 256; i++ {
s = append(s, str(byte(i)), str(byte(i+1)))
}
inc := NewReplacer(s...)
// Test cases with 1-byte old strings, 1-byte new strings.
testCases = append(testCases,
testCase{capitalLetters, "brad", "BrAd"},
testCase{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)},
testCase{capitalLetters, "", ""},
testCase{inc, "brad", "csbe"},
testCase{inc, "\x00\xff", "\x01\x00"},
testCase{inc, "", ""},
testCase{NewReplacer("a", "1", "a", "2"), "brad", "br1d"},
)
// repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ...
s = nil
for i := 0; i < 256; i++ {
n := i + 1 - 'a'
if n < 1 {
n = 1
}
s = append(s, str(byte(i)), Repeat(str(byte(i)), n))
}
repeat := NewReplacer(s...)
// Test cases with 1-byte old strings, variable length new strings.
testCases = append(testCases,
testCase{htmlEscaper, "No changes", "No changes"},
testCase{htmlEscaper, "I <3 escaping & stuff", "I &lt;3 escaping &amp; stuff"},
testCase{htmlEscaper, "&&&", "&amp;&amp;&amp;"},
testCase{htmlEscaper, "", ""},
testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
testCase{repeat, "abba", "abbbba"},
testCase{repeat, "", ""},
testCase{NewReplacer("a", "11", "a", "22"), "brad", "br11d"},
)
// The remaining test cases have variable length old strings.
testCases = append(testCases,
testCase{htmlUnescaper, "&amp;amp;", "&amp;"},
testCase{htmlUnescaper, "&lt;b&gt;HTML&apos;s neat&lt;/b&gt;", "<b>HTML's neat</b>"},
testCase{htmlUnescaper, "", ""},
testCase{NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
testCase{NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
testCase{NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
)
// gen1 has multiple old strings of variable length. There is no
// overall non-empty common prefix, but some pairwise common prefixes.
gen1 := NewReplacer(
"aaa", "3[aaa]",
"aa", "2[aa]",
"a", "1[a]",
"i", "i",
"longerst", "most long",
"longer", "medium",
"long", "short",
"xx", "xx",
"x", "X",
"X", "Y",
"Y", "Z",
)
testCases = append(testCases,
testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"},
testCase{gen1, "long, longerst, longer", "short, most long, medium"},
testCase{gen1, "xxxxx", "xxxxX"},
testCase{gen1, "XiX", "YiY"},
testCase{gen1, "", ""},
)
// gen2 has multiple old strings with no pairwise common prefix.
gen2 := NewReplacer(
"roses", "red",
"violets", "blue",
"sugar", "sweet",
)
testCases = append(testCases,
testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."},
testCase{gen2, "", ""},
)
// gen3 has multiple old strings with an overall common prefix.
gen3 := NewReplacer(
"abracadabra", "poof",
"abracadabrakazam", "splat",
"abraham", "lincoln",
"abrasion", "scrape",
"abraham", "isaac",
)
testCases = append(testCases,
testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"},
testCase{gen3, "abrasion abracad", "scrape abracad"},
testCase{gen3, "abba abram abrasive", "abba abram abrasive"},
testCase{gen3, "", ""},
)
// foo{1,2,3,4} have multiple old strings with an overall common prefix
// and 1- or 2- byte extensions from the common prefix.
foo1 := NewReplacer(
"foo1", "A",
"foo2", "B",
"foo3", "C",
)
foo2 := NewReplacer(
"foo1", "A",
"foo2", "B",
"foo31", "C",
"foo32", "D",
)
foo3 := NewReplacer(
"foo11", "A",
"foo12", "B",
"foo31", "C",
"foo32", "D",
)
foo4 := NewReplacer(
"foo12", "B",
"foo32", "D",
)
testCases = append(testCases,
testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"},
testCase{foo1, "", ""},
testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"},
testCase{foo2, "", ""},
testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"},
testCase{foo3, "", ""},
testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"},
testCase{foo4, "", ""},
)
// genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things.
allBytes := make([]byte, 256)
for i := range allBytes {
allBytes[i] = byte(i)
}
allString := string(allBytes)
genAll := NewReplacer(
allString, "[all]",
"\xff", "[ff]",
"\x00", "[00]",
)
testCases = append(testCases,
testCase{genAll, allString, "[all]"},
testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"},
testCase{genAll, "", ""},
)
// Test cases with empty old strings.
blankToX1 := NewReplacer("", "X")
blankToX2 := NewReplacer("", "X", "", "")
blankHighPriority := NewReplacer("", "X", "o", "O")
blankLowPriority := NewReplacer("o", "O", "", "X")
blankNoOp1 := NewReplacer("", "")
blankNoOp2 := NewReplacer("", "", "", "A")
blankFoo := NewReplacer("", "X", "foobar", "R", "foobaz", "Z")
testCases = append(testCases,
testCase{blankToX1, "foo", "XfXoXoX"},
testCase{blankToX1, "", "X"},
testCase{blankToX2, "foo", "XfXoXoX"},
testCase{blankToX2, "", "X"},
testCase{blankHighPriority, "oo", "XOXOX"},
testCase{blankHighPriority, "ii", "XiXiX"},
testCase{blankHighPriority, "oiio", "XOXiXiXOX"},
testCase{blankHighPriority, "iooi", "XiXOXOXiX"},
testCase{blankHighPriority, "", "X"},
testCase{blankLowPriority, "oo", "OOX"},
testCase{blankLowPriority, "ii", "XiXiX"},
testCase{blankLowPriority, "oiio", "OXiXiOX"},
testCase{blankLowPriority, "iooi", "XiOOXiX"},
testCase{blankLowPriority, "", "X"},
testCase{blankNoOp1, "foo", "foo"},
testCase{blankNoOp1, "", ""},
testCase{blankNoOp2, "foo", "foo"},
testCase{blankNoOp2, "", ""},
testCase{blankFoo, "foobarfoobaz", "XRXZX"},
testCase{blankFoo, "foobar-foobaz", "XRX-XZX"},
testCase{blankFoo, "", "X"},
)
// single string replacer
abcMatcher := NewReplacer("abc", "[match]")
testCases = append(testCases,
testCase{abcMatcher, "", ""},
testCase{abcMatcher, "ab", "ab"},
testCase{abcMatcher, "abc", "[match]"},
testCase{abcMatcher, "abcd", "[match]d"},
testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"},
)
// Issue 6659 cases (more single string replacer)
noHello := NewReplacer("Hello", "")
testCases = append(testCases,
testCase{noHello, "Hello", ""},
testCase{noHello, "Hellox", "x"},
testCase{noHello, "xHello", "x"},
testCase{noHello, "xHellox", "xx"},
)
// No-arg test cases.
nop := NewReplacer()
testCases = append(testCases,
testCase{nop, "abc", "abc"},
testCase{nop, "", ""},
)
// Run the test cases.
for i, tc := range testCases {
if s := tc.r.Replace(tc.in); s != tc.out {
t.Errorf("%d. Replace(%q) = %q, want %q", i, tc.in, s, tc.out)
}
var buf bytes.Buffer
n, err := tc.r.WriteString(&buf, tc.in)
if err != nil {
t.Errorf("%d. WriteString: %v", i, err)
continue
}
got := buf.String()
if got != tc.out {
t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out)
continue
}
if n != len(tc.out) {
t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)",
i, tc.in, n, len(tc.out), tc.out)
}
}
}
var algorithmTestCases = []struct {
r *Replacer
want string
}{
{capitalLetters, "*strings.byteReplacer"},
{htmlEscaper, "*strings.byteStringReplacer"},
{NewReplacer("12", "123"), "*strings.singleStringReplacer"},
{NewReplacer("1", "12"), "*strings.byteStringReplacer"},
{NewReplacer("", "X"), "*strings.genericReplacer"},
{NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"},
}
// TestPickAlgorithm tests that NewReplacer picks the correct algorithm.
func TestPickAlgorithm(t *testing.T) {
for i, tc := range algorithmTestCases {
got := fmt.Sprintf("%T", tc.r.Replacer())
if got != tc.want {
t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want)
}
}
}
type errWriter struct{}
func (errWriter) Write(p []byte) (n int, err error) {
return 0, fmt.Errorf("unwritable")
}
// TestWriteStringError tests that WriteString returns an error
// received from the underlying io.Writer.
func TestWriteStringError(t *testing.T) {
for i, tc := range algorithmTestCases {
n, err := tc.r.WriteString(errWriter{}, "abc")
if n != 0 || err == nil || err.Error() != "unwritable" {
t.Errorf("%d. WriteStringError = %d, %v, want 0, unwritable", i, n, err)
}
}
}
// TestGenericTrieBuilding verifies the structure of the generated trie. There
// is one node per line, and the key ending with the current line is in the
// trie if it ends with a "+".
func TestGenericTrieBuilding(t *testing.T) {
testCases := []struct{ in, out string }{
{"abc;abdef;abdefgh;xx;xy;z", `-
a-
.b-
..c+
..d-
...ef+
.....gh+
x-
.x+
.y+
z+
`},
{"abracadabra;abracadabrakazam;abraham;abrasion", `-
a-
.bra-
....c-
.....adabra+
...........kazam+
....h-
.....am+
....s-
.....ion+
`},
{"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `-
X+
Y+
a+
.a+
..a+
i+
l-
.ong+
....er+
......st+
x+
.x+
`},
{"foo;;foo;foo1", `+
f-
.oo+
...1+
`},
}
for _, tc := range testCases {
keys := Split(tc.in, ";")
args := make([]string, len(keys)*2)
for i, key := range keys {
args[i*2] = key
}
got := NewReplacer(args...).PrintTrie()
// Remove tabs from tc.out
wantbuf := make([]byte, 0, len(tc.out))
for i := 0; i < len(tc.out); i++ {
if tc.out[i] != '\t' {
wantbuf = append(wantbuf, tc.out[i])
}
}
want := string(wantbuf)
if got != want {
t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want)
}
}
}
func BenchmarkGenericNoMatch(b *testing.B) {
str := Repeat("A", 100) + Repeat("B", 100)
generic := NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic
for i := 0; i < b.N; i++ {
generic.Replace(str)
}
}
func BenchmarkGenericMatch1(b *testing.B) {
str := Repeat("a", 100) + Repeat("b", 100)
generic := NewReplacer("a", "A", "b", "B", "12", "123")
for i := 0; i < b.N; i++ {
generic.Replace(str)
}
}
func BenchmarkGenericMatch2(b *testing.B) {
str := Repeat("It&apos;s &lt;b&gt;HTML&lt;/b&gt;!", 100)
for i := 0; i < b.N; i++ {
htmlUnescaper.Replace(str)
}
}
func benchmarkSingleString(b *testing.B, pattern, text string) {
r := NewReplacer(pattern, "[match]")
b.SetBytes(int64(len(text)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
r.Replace(text)
}
}
func BenchmarkSingleMaxSkipping(b *testing.B) {
benchmarkSingleString(b, Repeat("b", 25), Repeat("a", 10000))
}
func BenchmarkSingleLongSuffixFail(b *testing.B) {
benchmarkSingleString(b, "b"+Repeat("a", 500), Repeat("a", 1002))
}
func BenchmarkSingleMatch(b *testing.B) {
benchmarkSingleString(b, "abcdef", Repeat("abcdefghijklmno", 1000))
}
func BenchmarkByteByteNoMatch(b *testing.B) {
str := Repeat("A", 100) + Repeat("B", 100)
for i := 0; i < b.N; i++ {
capitalLetters.Replace(str)
}
}
func BenchmarkByteByteMatch(b *testing.B) {
str := Repeat("a", 100) + Repeat("b", 100)
for i := 0; i < b.N; i++ {
capitalLetters.Replace(str)
}
}
func BenchmarkByteStringMatch(b *testing.B) {
str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">"
for i := 0; i < b.N; i++ {
htmlEscaper.Replace(str)
}
}
func BenchmarkHTMLEscapeNew(b *testing.B) {
str := "I <3 to escape HTML & other text too."
for i := 0; i < b.N; i++ {
htmlEscaper.Replace(str)
}
}
func BenchmarkHTMLEscapeOld(b *testing.B) {
str := "I <3 to escape HTML & other text too."
for i := 0; i < b.N; i++ {
oldHTMLEscape(str)
}
}
func BenchmarkByteStringReplacerWriteString(b *testing.B) {
str := Repeat("I <3 to escape HTML & other text too.", 100)
buf := new(bytes.Buffer)
for i := 0; i < b.N; i++ {
htmlEscaper.WriteString(buf, str)
buf.Reset()
}
}
func BenchmarkByteReplacerWriteString(b *testing.B) {
str := Repeat("abcdefghijklmnopqrstuvwxyz", 100)
buf := new(bytes.Buffer)
for i := 0; i < b.N; i++ {
capitalLetters.WriteString(buf, str)
buf.Reset()
}
}
// BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
func BenchmarkByteByteReplaces(b *testing.B) {
str := Repeat("a", 100) + Repeat("b", 100)
for i := 0; i < b.N; i++ {
Replace(Replace(str, "a", "A", -1), "b", "B", -1)
}
}
// BenchmarkByteByteMap compares byteByteImpl against Map.
func BenchmarkByteByteMap(b *testing.B) {
str := Repeat("a", 100) + Repeat("b", 100)
fn := func(r rune) rune {
switch r {
case 'a':
return 'A'
case 'b':
return 'B'
}
return r
}
for i := 0; i < b.N; i++ {
Map(fn, str)
}
}
var mapdata = []struct{ name, data string }{
{"ASCII", "a b c d e f g h i j k l m n o p q r s t u v w x y z"},
{"Greek", "α β γ δ ε ζ η θ ι κ λ μ ν ξ ο π ρ ς σ τ υ φ χ ψ ω"},
}
func BenchmarkMap(b *testing.B) {
mapidentity := func(r rune) rune {
return r
}
b.Run("identity", func(b *testing.B) {
for _, md := range mapdata {
b.Run(md.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
Map(mapidentity, md.data)
}
})
}
})
mapchange := func(r rune) rune {
if 'a' <= r && r <= 'z' {
return r + 'A' - 'a'
}
if 'α' <= r && r <= 'ω' {
return r + 'Α' - 'α'
}
return r
}
b.Run("change", func(b *testing.B) {
for _, md := range mapdata {
b.Run(md.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
Map(mapchange, md.data)
}
})
}
})
}

117
src/strings/search.go Normal file
View File

@@ -0,0 +1,117 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strings
// stringFinder efficiently finds strings in a source text. It's implemented
// using the Boyer-Moore string search algorithm:
// https://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm
// https://www.cs.utexas.edu/~moore/publications/fstrpos.pdf (note: this aged
// document uses 1-based indexing)
type stringFinder struct {
// pattern is the string that we are searching for in the text.
pattern string
// badCharSkip[b] contains the distance between the last byte of pattern
// and the rightmost occurrence of b in pattern. If b is not in pattern,
// badCharSkip[b] is len(pattern).
//
// Whenever a mismatch is found with byte b in the text, we can safely
// shift the matching frame at least badCharSkip[b] until the next time
// the matching char could be in alignment.
badCharSkip [256]int
// goodSuffixSkip[i] defines how far we can shift the matching frame given
// that the suffix pattern[i+1:] matches, but the byte pattern[i] does
// not. There are two cases to consider:
//
// 1. The matched suffix occurs elsewhere in pattern (with a different
// byte preceding it that we might possibly match). In this case, we can
// shift the matching frame to align with the next suffix chunk. For
// example, the pattern "mississi" has the suffix "issi" next occurring
// (in right-to-left order) at index 1, so goodSuffixSkip[3] ==
// shift+len(suffix) == 3+4 == 7.
//
// 2. If the matched suffix does not occur elsewhere in pattern, then the
// matching frame may share part of its prefix with the end of the
// matching suffix. In this case, goodSuffixSkip[i] will contain how far
// to shift the frame to align this portion of the prefix to the
// suffix. For example, in the pattern "abcxxxabc", when the first
// mismatch from the back is found to be in position 3, the matching
// suffix "xxabc" is not found elsewhere in the pattern. However, its
// rightmost "abc" (at position 6) is a prefix of the whole pattern, so
// goodSuffixSkip[3] == shift+len(suffix) == 6+5 == 11.
goodSuffixSkip []int
}
func makeStringFinder(pattern string) *stringFinder {
f := &stringFinder{
pattern: pattern,
goodSuffixSkip: make([]int, len(pattern)),
}
// last is the index of the last character in the pattern.
last := len(pattern) - 1
// Build bad character table.
// Bytes not in the pattern can skip one pattern's length.
for i := range f.badCharSkip {
f.badCharSkip[i] = len(pattern)
}
// The loop condition is < instead of <= so that the last byte does not
// have a zero distance to itself. Finding this byte out of place implies
// that it is not in the last position.
for i := 0; i < last; i++ {
f.badCharSkip[pattern[i]] = last - i
}
// Build good suffix table.
// First pass: set each value to the next index which starts a prefix of
// pattern.
lastPrefix := last
for i := last; i >= 0; i-- {
if HasPrefix(pattern, pattern[i+1:]) {
lastPrefix = i + 1
}
// lastPrefix is the shift, and (last-i) is len(suffix).
f.goodSuffixSkip[i] = lastPrefix + last - i
}
// Second pass: find repeats of pattern's suffix starting from the front.
for i := 0; i < last; i++ {
lenSuffix := longestCommonSuffix(pattern, pattern[1:i+1])
if pattern[i-lenSuffix] != pattern[last-lenSuffix] {
// (last-i) is the shift, and lenSuffix is len(suffix).
f.goodSuffixSkip[last-lenSuffix] = lenSuffix + last - i
}
}
return f
}
func longestCommonSuffix(a, b string) (i int) {
for ; i < len(a) && i < len(b); i++ {
if a[len(a)-1-i] != b[len(b)-1-i] {
break
}
}
return
}
// next returns the index in text of the first occurrence of the pattern. If
// the pattern is not found, it returns -1.
func (f *stringFinder) next(text string) int {
i := len(f.pattern) - 1
for i < len(text) {
// Compare backwards from the end until the first unmatching character.
j := len(f.pattern) - 1
for j >= 0 && text[i] == f.pattern[j] {
i--
j--
}
if j < 0 {
return i + 1 // match
}
i += max(f.badCharSkip[text[i]], f.goodSuffixSkip[j])
}
return -1
}

View File

@@ -0,0 +1,90 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strings_test
import (
"reflect"
. "strings"
"testing"
)
func TestFinderNext(t *testing.T) {
testCases := []struct {
pat, text string
index int
}{
{"", "", 0},
{"", "abc", 0},
{"abc", "", -1},
{"abc", "abc", 0},
{"d", "abcdefg", 3},
{"nan", "banana", 2},
{"pan", "anpanman", 2},
{"nnaaman", "anpanmanam", -1},
{"abcd", "abc", -1},
{"abcd", "bcd", -1},
{"bcd", "abcd", 1},
{"abc", "acca", -1},
{"aa", "aaa", 0},
{"baa", "aaaaa", -1},
{"at that", "which finally halts. at that point", 22},
}
for _, tc := range testCases {
got := StringFind(tc.pat, tc.text)
want := tc.index
if got != want {
t.Errorf("stringFind(%q, %q) got %d, want %d\n", tc.pat, tc.text, got, want)
}
}
}
func TestFinderCreation(t *testing.T) {
testCases := []struct {
pattern string
bad [256]int
suf []int
}{
{
"abc",
[256]int{'a': 2, 'b': 1, 'c': 3},
[]int{5, 4, 1},
},
{
"mississi",
[256]int{'i': 3, 'm': 7, 's': 1},
[]int{15, 14, 13, 7, 11, 10, 7, 1},
},
// From https://www.cs.utexas.edu/~moore/publications/fstrpos.pdf
{
"abcxxxabc",
[256]int{'a': 2, 'b': 1, 'c': 6, 'x': 3},
[]int{14, 13, 12, 11, 10, 9, 11, 10, 1},
},
{
"abyxcdeyx",
[256]int{'a': 8, 'b': 7, 'c': 4, 'd': 3, 'e': 2, 'y': 1, 'x': 5},
[]int{17, 16, 15, 14, 13, 12, 7, 10, 1},
},
}
for _, tc := range testCases {
bad, good := DumpTables(tc.pattern)
for i, got := range bad {
want := tc.bad[i]
if want == 0 {
want = len(tc.pattern)
}
if got != want {
t.Errorf("boyerMoore(%q) bad['%c']: got %d want %d", tc.pattern, i, got, want)
}
}
if !reflect.DeepEqual(good, tc.suf) {
t.Errorf("boyerMoore(%q) got %v want %v", tc.pattern, good, tc.suf)
}
}
}

1246
src/strings/strings.go Normal file

File diff suppressed because it is too large Load Diff

2088
src/strings/strings_test.go Normal file

File diff suppressed because it is too large Load Diff