Initial commit: Go 1.23 release state
This commit is contained in:
110
src/strings/builder.go
Normal file
110
src/strings/builder.go
Normal file
@@ -0,0 +1,110 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package strings
|
||||
|
||||
import (
|
||||
"internal/abi"
|
||||
"internal/bytealg"
|
||||
"unicode/utf8"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// A Builder is used to efficiently build a string using [Builder.Write] methods.
|
||||
// It minimizes memory copying. The zero value is ready to use.
|
||||
// Do not copy a non-zero Builder.
|
||||
type Builder struct {
|
||||
addr *Builder // of receiver, to detect copies by value
|
||||
|
||||
// External users should never get direct access to this buffer, since
|
||||
// the slice at some point will be converted to a string using unsafe, also
|
||||
// data between len(buf) and cap(buf) might be uninitialized.
|
||||
buf []byte
|
||||
}
|
||||
|
||||
func (b *Builder) copyCheck() {
|
||||
if b.addr == nil {
|
||||
// This hack works around a failing of Go's escape analysis
|
||||
// that was causing b to escape and be heap allocated.
|
||||
// See issue 23382.
|
||||
// TODO: once issue 7921 is fixed, this should be reverted to
|
||||
// just "b.addr = b".
|
||||
b.addr = (*Builder)(abi.NoEscape(unsafe.Pointer(b)))
|
||||
} else if b.addr != b {
|
||||
panic("strings: illegal use of non-zero Builder copied by value")
|
||||
}
|
||||
}
|
||||
|
||||
// String returns the accumulated string.
|
||||
func (b *Builder) String() string {
|
||||
return unsafe.String(unsafe.SliceData(b.buf), len(b.buf))
|
||||
}
|
||||
|
||||
// Len returns the number of accumulated bytes; b.Len() == len(b.String()).
|
||||
func (b *Builder) Len() int { return len(b.buf) }
|
||||
|
||||
// Cap returns the capacity of the builder's underlying byte slice. It is the
|
||||
// total space allocated for the string being built and includes any bytes
|
||||
// already written.
|
||||
func (b *Builder) Cap() int { return cap(b.buf) }
|
||||
|
||||
// Reset resets the [Builder] to be empty.
|
||||
func (b *Builder) Reset() {
|
||||
b.addr = nil
|
||||
b.buf = nil
|
||||
}
|
||||
|
||||
// grow copies the buffer to a new, larger buffer so that there are at least n
|
||||
// bytes of capacity beyond len(b.buf).
|
||||
func (b *Builder) grow(n int) {
|
||||
buf := bytealg.MakeNoZero(2*cap(b.buf) + n)[:len(b.buf)]
|
||||
copy(buf, b.buf)
|
||||
b.buf = buf
|
||||
}
|
||||
|
||||
// Grow grows b's capacity, if necessary, to guarantee space for
|
||||
// another n bytes. After Grow(n), at least n bytes can be written to b
|
||||
// without another allocation. If n is negative, Grow panics.
|
||||
func (b *Builder) Grow(n int) {
|
||||
b.copyCheck()
|
||||
if n < 0 {
|
||||
panic("strings.Builder.Grow: negative count")
|
||||
}
|
||||
if cap(b.buf)-len(b.buf) < n {
|
||||
b.grow(n)
|
||||
}
|
||||
}
|
||||
|
||||
// Write appends the contents of p to b's buffer.
|
||||
// Write always returns len(p), nil.
|
||||
func (b *Builder) Write(p []byte) (int, error) {
|
||||
b.copyCheck()
|
||||
b.buf = append(b.buf, p...)
|
||||
return len(p), nil
|
||||
}
|
||||
|
||||
// WriteByte appends the byte c to b's buffer.
|
||||
// The returned error is always nil.
|
||||
func (b *Builder) WriteByte(c byte) error {
|
||||
b.copyCheck()
|
||||
b.buf = append(b.buf, c)
|
||||
return nil
|
||||
}
|
||||
|
||||
// WriteRune appends the UTF-8 encoding of Unicode code point r to b's buffer.
|
||||
// It returns the length of r and a nil error.
|
||||
func (b *Builder) WriteRune(r rune) (int, error) {
|
||||
b.copyCheck()
|
||||
n := len(b.buf)
|
||||
b.buf = utf8.AppendRune(b.buf, r)
|
||||
return len(b.buf) - n, nil
|
||||
}
|
||||
|
||||
// WriteString appends the contents of s to b's buffer.
|
||||
// It returns the length of s and a nil error.
|
||||
func (b *Builder) WriteString(s string) (int, error) {
|
||||
b.copyCheck()
|
||||
b.buf = append(b.buf, s...)
|
||||
return len(s), nil
|
||||
}
|
||||
400
src/strings/builder_test.go
Normal file
400
src/strings/builder_test.go
Normal file
@@ -0,0 +1,400 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package strings_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
. "strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func check(t *testing.T, b *Builder, want string) {
|
||||
t.Helper()
|
||||
got := b.String()
|
||||
if got != want {
|
||||
t.Errorf("String: got %#q; want %#q", got, want)
|
||||
return
|
||||
}
|
||||
if n := b.Len(); n != len(got) {
|
||||
t.Errorf("Len: got %d; but len(String()) is %d", n, len(got))
|
||||
}
|
||||
if n := b.Cap(); n < len(got) {
|
||||
t.Errorf("Cap: got %d; but len(String()) is %d", n, len(got))
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuilder(t *testing.T) {
|
||||
var b Builder
|
||||
check(t, &b, "")
|
||||
n, err := b.WriteString("hello")
|
||||
if err != nil || n != 5 {
|
||||
t.Errorf("WriteString: got %d,%s; want 5,nil", n, err)
|
||||
}
|
||||
check(t, &b, "hello")
|
||||
if err = b.WriteByte(' '); err != nil {
|
||||
t.Errorf("WriteByte: %s", err)
|
||||
}
|
||||
check(t, &b, "hello ")
|
||||
n, err = b.WriteString("world")
|
||||
if err != nil || n != 5 {
|
||||
t.Errorf("WriteString: got %d,%s; want 5,nil", n, err)
|
||||
}
|
||||
check(t, &b, "hello world")
|
||||
}
|
||||
|
||||
func TestBuilderString(t *testing.T) {
|
||||
var b Builder
|
||||
b.WriteString("alpha")
|
||||
check(t, &b, "alpha")
|
||||
s1 := b.String()
|
||||
b.WriteString("beta")
|
||||
check(t, &b, "alphabeta")
|
||||
s2 := b.String()
|
||||
b.WriteString("gamma")
|
||||
check(t, &b, "alphabetagamma")
|
||||
s3 := b.String()
|
||||
|
||||
// Check that subsequent operations didn't change the returned strings.
|
||||
if want := "alpha"; s1 != want {
|
||||
t.Errorf("first String result is now %q; want %q", s1, want)
|
||||
}
|
||||
if want := "alphabeta"; s2 != want {
|
||||
t.Errorf("second String result is now %q; want %q", s2, want)
|
||||
}
|
||||
if want := "alphabetagamma"; s3 != want {
|
||||
t.Errorf("third String result is now %q; want %q", s3, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuilderReset(t *testing.T) {
|
||||
var b Builder
|
||||
check(t, &b, "")
|
||||
b.WriteString("aaa")
|
||||
s := b.String()
|
||||
check(t, &b, "aaa")
|
||||
b.Reset()
|
||||
check(t, &b, "")
|
||||
|
||||
// Ensure that writing after Reset doesn't alter
|
||||
// previously returned strings.
|
||||
b.WriteString("bbb")
|
||||
check(t, &b, "bbb")
|
||||
if want := "aaa"; s != want {
|
||||
t.Errorf("previous String result changed after Reset: got %q; want %q", s, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuilderGrow(t *testing.T) {
|
||||
for _, growLen := range []int{0, 100, 1000, 10000, 100000} {
|
||||
p := bytes.Repeat([]byte{'a'}, growLen)
|
||||
allocs := testing.AllocsPerRun(100, func() {
|
||||
var b Builder
|
||||
b.Grow(growLen) // should be only alloc, when growLen > 0
|
||||
if b.Cap() < growLen {
|
||||
t.Fatalf("growLen=%d: Cap() is lower than growLen", growLen)
|
||||
}
|
||||
b.Write(p)
|
||||
if b.String() != string(p) {
|
||||
t.Fatalf("growLen=%d: bad data written after Grow", growLen)
|
||||
}
|
||||
})
|
||||
wantAllocs := 1
|
||||
if growLen == 0 {
|
||||
wantAllocs = 0
|
||||
}
|
||||
if g, w := int(allocs), wantAllocs; g != w {
|
||||
t.Errorf("growLen=%d: got %d allocs during Write; want %v", growLen, g, w)
|
||||
}
|
||||
}
|
||||
// when growLen < 0, should panic
|
||||
var a Builder
|
||||
n := -1
|
||||
defer func() {
|
||||
if r := recover(); r == nil {
|
||||
t.Errorf("a.Grow(%d) should panic()", n)
|
||||
}
|
||||
}()
|
||||
a.Grow(n)
|
||||
}
|
||||
|
||||
func TestBuilderWrite2(t *testing.T) {
|
||||
const s0 = "hello 世界"
|
||||
for _, tt := range []struct {
|
||||
name string
|
||||
fn func(b *Builder) (int, error)
|
||||
n int
|
||||
want string
|
||||
}{
|
||||
{
|
||||
"Write",
|
||||
func(b *Builder) (int, error) { return b.Write([]byte(s0)) },
|
||||
len(s0),
|
||||
s0,
|
||||
},
|
||||
{
|
||||
"WriteRune",
|
||||
func(b *Builder) (int, error) { return b.WriteRune('a') },
|
||||
1,
|
||||
"a",
|
||||
},
|
||||
{
|
||||
"WriteRuneWide",
|
||||
func(b *Builder) (int, error) { return b.WriteRune('世') },
|
||||
3,
|
||||
"世",
|
||||
},
|
||||
{
|
||||
"WriteString",
|
||||
func(b *Builder) (int, error) { return b.WriteString(s0) },
|
||||
len(s0),
|
||||
s0,
|
||||
},
|
||||
} {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
var b Builder
|
||||
n, err := tt.fn(&b)
|
||||
if err != nil {
|
||||
t.Fatalf("first call: got %s", err)
|
||||
}
|
||||
if n != tt.n {
|
||||
t.Errorf("first call: got n=%d; want %d", n, tt.n)
|
||||
}
|
||||
check(t, &b, tt.want)
|
||||
|
||||
n, err = tt.fn(&b)
|
||||
if err != nil {
|
||||
t.Fatalf("second call: got %s", err)
|
||||
}
|
||||
if n != tt.n {
|
||||
t.Errorf("second call: got n=%d; want %d", n, tt.n)
|
||||
}
|
||||
check(t, &b, tt.want+tt.want)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuilderWriteByte(t *testing.T) {
|
||||
var b Builder
|
||||
if err := b.WriteByte('a'); err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if err := b.WriteByte(0); err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
check(t, &b, "a\x00")
|
||||
}
|
||||
|
||||
func TestBuilderAllocs(t *testing.T) {
|
||||
// Issue 23382; verify that copyCheck doesn't force the
|
||||
// Builder to escape and be heap allocated.
|
||||
n := testing.AllocsPerRun(10000, func() {
|
||||
var b Builder
|
||||
b.Grow(5)
|
||||
b.WriteString("abcde")
|
||||
_ = b.String()
|
||||
})
|
||||
if n != 1 {
|
||||
t.Errorf("Builder allocs = %v; want 1", n)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuilderCopyPanic(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
fn func()
|
||||
wantPanic bool
|
||||
}{
|
||||
{
|
||||
name: "String",
|
||||
wantPanic: false,
|
||||
fn: func() {
|
||||
var a Builder
|
||||
a.WriteByte('x')
|
||||
b := a
|
||||
_ = b.String() // appease vet
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Len",
|
||||
wantPanic: false,
|
||||
fn: func() {
|
||||
var a Builder
|
||||
a.WriteByte('x')
|
||||
b := a
|
||||
b.Len()
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Cap",
|
||||
wantPanic: false,
|
||||
fn: func() {
|
||||
var a Builder
|
||||
a.WriteByte('x')
|
||||
b := a
|
||||
b.Cap()
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Reset",
|
||||
wantPanic: false,
|
||||
fn: func() {
|
||||
var a Builder
|
||||
a.WriteByte('x')
|
||||
b := a
|
||||
b.Reset()
|
||||
b.WriteByte('y')
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Write",
|
||||
wantPanic: true,
|
||||
fn: func() {
|
||||
var a Builder
|
||||
a.Write([]byte("x"))
|
||||
b := a
|
||||
b.Write([]byte("y"))
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "WriteByte",
|
||||
wantPanic: true,
|
||||
fn: func() {
|
||||
var a Builder
|
||||
a.WriteByte('x')
|
||||
b := a
|
||||
b.WriteByte('y')
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "WriteString",
|
||||
wantPanic: true,
|
||||
fn: func() {
|
||||
var a Builder
|
||||
a.WriteString("x")
|
||||
b := a
|
||||
b.WriteString("y")
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "WriteRune",
|
||||
wantPanic: true,
|
||||
fn: func() {
|
||||
var a Builder
|
||||
a.WriteRune('x')
|
||||
b := a
|
||||
b.WriteRune('y')
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Grow",
|
||||
wantPanic: true,
|
||||
fn: func() {
|
||||
var a Builder
|
||||
a.Grow(1)
|
||||
b := a
|
||||
b.Grow(2)
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
didPanic := make(chan bool)
|
||||
go func() {
|
||||
defer func() { didPanic <- recover() != nil }()
|
||||
tt.fn()
|
||||
}()
|
||||
if got := <-didPanic; got != tt.wantPanic {
|
||||
t.Errorf("%s: panicked = %v; want %v", tt.name, got, tt.wantPanic)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuilderWriteInvalidRune(t *testing.T) {
|
||||
// Invalid runes, including negative ones, should be written as
|
||||
// utf8.RuneError.
|
||||
for _, r := range []rune{-1, utf8.MaxRune + 1} {
|
||||
var b Builder
|
||||
b.WriteRune(r)
|
||||
check(t, &b, "\uFFFD")
|
||||
}
|
||||
}
|
||||
|
||||
var someBytes = []byte("some bytes sdljlk jsklj3lkjlk djlkjw")
|
||||
|
||||
var sinkS string
|
||||
|
||||
func benchmarkBuilder(b *testing.B, f func(b *testing.B, numWrite int, grow bool)) {
|
||||
b.Run("1Write_NoGrow", func(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
f(b, 1, false)
|
||||
})
|
||||
b.Run("3Write_NoGrow", func(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
f(b, 3, false)
|
||||
})
|
||||
b.Run("3Write_Grow", func(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
f(b, 3, true)
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkBuildString_Builder(b *testing.B) {
|
||||
benchmarkBuilder(b, func(b *testing.B, numWrite int, grow bool) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
var buf Builder
|
||||
if grow {
|
||||
buf.Grow(len(someBytes) * numWrite)
|
||||
}
|
||||
for i := 0; i < numWrite; i++ {
|
||||
buf.Write(someBytes)
|
||||
}
|
||||
sinkS = buf.String()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkBuildString_WriteString(b *testing.B) {
|
||||
someString := string(someBytes)
|
||||
benchmarkBuilder(b, func(b *testing.B, numWrite int, grow bool) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
var buf Builder
|
||||
if grow {
|
||||
buf.Grow(len(someString) * numWrite)
|
||||
}
|
||||
for i := 0; i < numWrite; i++ {
|
||||
buf.WriteString(someString)
|
||||
}
|
||||
sinkS = buf.String()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkBuildString_ByteBuffer(b *testing.B) {
|
||||
benchmarkBuilder(b, func(b *testing.B, numWrite int, grow bool) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
var buf bytes.Buffer
|
||||
if grow {
|
||||
buf.Grow(len(someBytes) * numWrite)
|
||||
}
|
||||
for i := 0; i < numWrite; i++ {
|
||||
buf.Write(someBytes)
|
||||
}
|
||||
sinkS = buf.String()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestBuilderGrowSizeclasses(t *testing.T) {
|
||||
s := Repeat("a", 19)
|
||||
allocs := testing.AllocsPerRun(100, func() {
|
||||
var b Builder
|
||||
b.Grow(18)
|
||||
b.WriteString(s)
|
||||
_ = b.String()
|
||||
})
|
||||
if allocs > 1 {
|
||||
t.Fatalf("unexpected amount of allocations: %v, want: 1", allocs)
|
||||
}
|
||||
}
|
||||
23
src/strings/clone.go
Normal file
23
src/strings/clone.go
Normal file
@@ -0,0 +1,23 @@
|
||||
// Copyright 2021 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package strings
|
||||
|
||||
import (
|
||||
"internal/stringslite"
|
||||
)
|
||||
|
||||
// Clone returns a fresh copy of s.
|
||||
// It guarantees to make a copy of s into a new allocation,
|
||||
// which can be important when retaining only a small substring
|
||||
// of a much larger string. Using Clone can help such programs
|
||||
// use less memory. Of course, since using Clone makes a copy,
|
||||
// overuse of Clone can make programs use more memory.
|
||||
// Clone should typically be used only rarely, and only when
|
||||
// profiling indicates that it is needed.
|
||||
// For strings of length zero the string "" will be returned
|
||||
// and no allocation is made.
|
||||
func Clone(s string) string {
|
||||
return stringslite.Clone(s)
|
||||
}
|
||||
45
src/strings/clone_test.go
Normal file
45
src/strings/clone_test.go
Normal file
@@ -0,0 +1,45 @@
|
||||
// Copyright 2021 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package strings_test
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
var emptyString string
|
||||
|
||||
func TestClone(t *testing.T) {
|
||||
var cloneTests = []string{
|
||||
"",
|
||||
strings.Clone(""),
|
||||
strings.Repeat("a", 42)[:0],
|
||||
"short",
|
||||
strings.Repeat("a", 42),
|
||||
}
|
||||
for _, input := range cloneTests {
|
||||
clone := strings.Clone(input)
|
||||
if clone != input {
|
||||
t.Errorf("Clone(%q) = %q; want %q", input, clone, input)
|
||||
}
|
||||
|
||||
if len(input) != 0 && unsafe.StringData(clone) == unsafe.StringData(input) {
|
||||
t.Errorf("Clone(%q) return value should not reference inputs backing memory.", input)
|
||||
}
|
||||
|
||||
if len(input) == 0 && unsafe.StringData(clone) != unsafe.StringData(emptyString) {
|
||||
t.Errorf("Clone(%#v) return value should be equal to empty string.", unsafe.StringData(input))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkClone(b *testing.B) {
|
||||
var str = strings.Repeat("a", 42)
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
stringSink = strings.Clone(str)
|
||||
}
|
||||
}
|
||||
17
src/strings/compare.go
Normal file
17
src/strings/compare.go
Normal file
@@ -0,0 +1,17 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package strings
|
||||
|
||||
import "internal/bytealg"
|
||||
|
||||
// Compare returns an integer comparing two strings lexicographically.
|
||||
// The result will be 0 if a == b, -1 if a < b, and +1 if a > b.
|
||||
//
|
||||
// Use Compare when you need to perform a three-way comparison (with
|
||||
// [slices.SortFunc], for example). It is usually clearer and always faster
|
||||
// to use the built-in string comparison operators ==, <, >, and so on.
|
||||
func Compare(a, b string) int {
|
||||
return bytealg.CompareString(a, b)
|
||||
}
|
||||
119
src/strings/compare_test.go
Normal file
119
src/strings/compare_test.go
Normal file
@@ -0,0 +1,119 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package strings_test
|
||||
|
||||
// Derived from bytes/compare_test.go.
|
||||
// Benchmarks omitted since the underlying implementation is identical.
|
||||
|
||||
import (
|
||||
"internal/testenv"
|
||||
. "strings"
|
||||
"testing"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
var compareTests = []struct {
|
||||
a, b string
|
||||
i int
|
||||
}{
|
||||
{"", "", 0},
|
||||
{"a", "", 1},
|
||||
{"", "a", -1},
|
||||
{"abc", "abc", 0},
|
||||
{"ab", "abc", -1},
|
||||
{"abc", "ab", 1},
|
||||
{"x", "ab", 1},
|
||||
{"ab", "x", -1},
|
||||
{"x", "a", 1},
|
||||
{"b", "x", -1},
|
||||
// test runtime·memeq's chunked implementation
|
||||
{"abcdefgh", "abcdefgh", 0},
|
||||
{"abcdefghi", "abcdefghi", 0},
|
||||
{"abcdefghi", "abcdefghj", -1},
|
||||
}
|
||||
|
||||
func TestCompare(t *testing.T) {
|
||||
for _, tt := range compareTests {
|
||||
cmp := Compare(tt.a, tt.b)
|
||||
if cmp != tt.i {
|
||||
t.Errorf(`Compare(%q, %q) = %v`, tt.a, tt.b, cmp)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCompareIdenticalString(t *testing.T) {
|
||||
var s = "Hello Gophers!"
|
||||
if Compare(s, s) != 0 {
|
||||
t.Error("s != s")
|
||||
}
|
||||
if Compare(s, s[:1]) != 1 {
|
||||
t.Error("s > s[:1] failed")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCompareStrings(t *testing.T) {
|
||||
// unsafeString converts a []byte to a string with no allocation.
|
||||
// The caller must not modify b while the result string is in use.
|
||||
unsafeString := func(b []byte) string {
|
||||
return unsafe.String(unsafe.SliceData(b), len(b))
|
||||
}
|
||||
|
||||
lengths := make([]int, 0) // lengths to test in ascending order
|
||||
for i := 0; i <= 128; i++ {
|
||||
lengths = append(lengths, i)
|
||||
}
|
||||
lengths = append(lengths, 256, 512, 1024, 1333, 4095, 4096, 4097)
|
||||
|
||||
if !testing.Short() || testenv.Builder() != "" {
|
||||
lengths = append(lengths, 65535, 65536, 65537, 99999)
|
||||
}
|
||||
|
||||
n := lengths[len(lengths)-1]
|
||||
a := make([]byte, n+1)
|
||||
b := make([]byte, n+1)
|
||||
lastLen := 0
|
||||
for _, len := range lengths {
|
||||
// randomish but deterministic data. No 0 or 255.
|
||||
for i := 0; i < len; i++ {
|
||||
a[i] = byte(1 + 31*i%254)
|
||||
b[i] = byte(1 + 31*i%254)
|
||||
}
|
||||
// data past the end is different
|
||||
for i := len; i <= n; i++ {
|
||||
a[i] = 8
|
||||
b[i] = 9
|
||||
}
|
||||
|
||||
sa, sb := unsafeString(a), unsafeString(b)
|
||||
cmp := Compare(sa[:len], sb[:len])
|
||||
if cmp != 0 {
|
||||
t.Errorf(`CompareIdentical(%d) = %d`, len, cmp)
|
||||
}
|
||||
if len > 0 {
|
||||
cmp = Compare(sa[:len-1], sb[:len])
|
||||
if cmp != -1 {
|
||||
t.Errorf(`CompareAshorter(%d) = %d`, len, cmp)
|
||||
}
|
||||
cmp = Compare(sa[:len], sb[:len-1])
|
||||
if cmp != 1 {
|
||||
t.Errorf(`CompareBshorter(%d) = %d`, len, cmp)
|
||||
}
|
||||
}
|
||||
for k := lastLen; k < len; k++ {
|
||||
b[k] = a[k] - 1
|
||||
cmp = Compare(unsafeString(a[:len]), unsafeString(b[:len]))
|
||||
if cmp != 1 {
|
||||
t.Errorf(`CompareAbigger(%d,%d) = %d`, len, k, cmp)
|
||||
}
|
||||
b[k] = a[k] + 1
|
||||
cmp = Compare(unsafeString(a[:len]), unsafeString(b[:len]))
|
||||
if cmp != -1 {
|
||||
t.Errorf(`CompareBbigger(%d,%d) = %d`, len, k, cmp)
|
||||
}
|
||||
b[k] = a[k]
|
||||
}
|
||||
lastLen = len
|
||||
}
|
||||
}
|
||||
460
src/strings/example_test.go
Normal file
460
src/strings/example_test.go
Normal file
@@ -0,0 +1,460 @@
|
||||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package strings_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
func ExampleClone() {
|
||||
s := "abc"
|
||||
clone := strings.Clone(s)
|
||||
fmt.Println(s == clone)
|
||||
fmt.Println(unsafe.StringData(s) == unsafe.StringData(clone))
|
||||
// Output:
|
||||
// true
|
||||
// false
|
||||
}
|
||||
|
||||
func ExampleBuilder() {
|
||||
var b strings.Builder
|
||||
for i := 3; i >= 1; i-- {
|
||||
fmt.Fprintf(&b, "%d...", i)
|
||||
}
|
||||
b.WriteString("ignition")
|
||||
fmt.Println(b.String())
|
||||
|
||||
// Output: 3...2...1...ignition
|
||||
}
|
||||
|
||||
func ExampleCompare() {
|
||||
fmt.Println(strings.Compare("a", "b"))
|
||||
fmt.Println(strings.Compare("a", "a"))
|
||||
fmt.Println(strings.Compare("b", "a"))
|
||||
// Output:
|
||||
// -1
|
||||
// 0
|
||||
// 1
|
||||
}
|
||||
|
||||
func ExampleContains() {
|
||||
fmt.Println(strings.Contains("seafood", "foo"))
|
||||
fmt.Println(strings.Contains("seafood", "bar"))
|
||||
fmt.Println(strings.Contains("seafood", ""))
|
||||
fmt.Println(strings.Contains("", ""))
|
||||
// Output:
|
||||
// true
|
||||
// false
|
||||
// true
|
||||
// true
|
||||
}
|
||||
|
||||
func ExampleContainsAny() {
|
||||
fmt.Println(strings.ContainsAny("team", "i"))
|
||||
fmt.Println(strings.ContainsAny("fail", "ui"))
|
||||
fmt.Println(strings.ContainsAny("ure", "ui"))
|
||||
fmt.Println(strings.ContainsAny("failure", "ui"))
|
||||
fmt.Println(strings.ContainsAny("foo", ""))
|
||||
fmt.Println(strings.ContainsAny("", ""))
|
||||
// Output:
|
||||
// false
|
||||
// true
|
||||
// true
|
||||
// true
|
||||
// false
|
||||
// false
|
||||
}
|
||||
|
||||
func ExampleContainsRune() {
|
||||
// Finds whether a string contains a particular Unicode code point.
|
||||
// The code point for the lowercase letter "a", for example, is 97.
|
||||
fmt.Println(strings.ContainsRune("aardvark", 97))
|
||||
fmt.Println(strings.ContainsRune("timeout", 97))
|
||||
// Output:
|
||||
// true
|
||||
// false
|
||||
}
|
||||
|
||||
func ExampleContainsFunc() {
|
||||
f := func(r rune) bool {
|
||||
return r == 'a' || r == 'e' || r == 'i' || r == 'o' || r == 'u'
|
||||
}
|
||||
fmt.Println(strings.ContainsFunc("hello", f))
|
||||
fmt.Println(strings.ContainsFunc("rhythms", f))
|
||||
// Output:
|
||||
// true
|
||||
// false
|
||||
}
|
||||
|
||||
func ExampleCount() {
|
||||
fmt.Println(strings.Count("cheese", "e"))
|
||||
fmt.Println(strings.Count("five", "")) // before & after each rune
|
||||
// Output:
|
||||
// 3
|
||||
// 5
|
||||
}
|
||||
|
||||
func ExampleCut() {
|
||||
show := func(s, sep string) {
|
||||
before, after, found := strings.Cut(s, sep)
|
||||
fmt.Printf("Cut(%q, %q) = %q, %q, %v\n", s, sep, before, after, found)
|
||||
}
|
||||
show("Gopher", "Go")
|
||||
show("Gopher", "ph")
|
||||
show("Gopher", "er")
|
||||
show("Gopher", "Badger")
|
||||
// Output:
|
||||
// Cut("Gopher", "Go") = "", "pher", true
|
||||
// Cut("Gopher", "ph") = "Go", "er", true
|
||||
// Cut("Gopher", "er") = "Goph", "", true
|
||||
// Cut("Gopher", "Badger") = "Gopher", "", false
|
||||
}
|
||||
|
||||
func ExampleCutPrefix() {
|
||||
show := func(s, sep string) {
|
||||
after, found := strings.CutPrefix(s, sep)
|
||||
fmt.Printf("CutPrefix(%q, %q) = %q, %v\n", s, sep, after, found)
|
||||
}
|
||||
show("Gopher", "Go")
|
||||
show("Gopher", "ph")
|
||||
// Output:
|
||||
// CutPrefix("Gopher", "Go") = "pher", true
|
||||
// CutPrefix("Gopher", "ph") = "Gopher", false
|
||||
}
|
||||
|
||||
func ExampleCutSuffix() {
|
||||
show := func(s, sep string) {
|
||||
before, found := strings.CutSuffix(s, sep)
|
||||
fmt.Printf("CutSuffix(%q, %q) = %q, %v\n", s, sep, before, found)
|
||||
}
|
||||
show("Gopher", "Go")
|
||||
show("Gopher", "er")
|
||||
// Output:
|
||||
// CutSuffix("Gopher", "Go") = "Gopher", false
|
||||
// CutSuffix("Gopher", "er") = "Goph", true
|
||||
}
|
||||
|
||||
func ExampleEqualFold() {
|
||||
fmt.Println(strings.EqualFold("Go", "go"))
|
||||
fmt.Println(strings.EqualFold("AB", "ab")) // true because comparison uses simple case-folding
|
||||
fmt.Println(strings.EqualFold("ß", "ss")) // false because comparison does not use full case-folding
|
||||
// Output:
|
||||
// true
|
||||
// true
|
||||
// false
|
||||
}
|
||||
|
||||
func ExampleFields() {
|
||||
fmt.Printf("Fields are: %q", strings.Fields(" foo bar baz "))
|
||||
// Output: Fields are: ["foo" "bar" "baz"]
|
||||
}
|
||||
|
||||
func ExampleFieldsFunc() {
|
||||
f := func(c rune) bool {
|
||||
return !unicode.IsLetter(c) && !unicode.IsNumber(c)
|
||||
}
|
||||
fmt.Printf("Fields are: %q", strings.FieldsFunc(" foo1;bar2,baz3...", f))
|
||||
// Output: Fields are: ["foo1" "bar2" "baz3"]
|
||||
}
|
||||
|
||||
func ExampleHasPrefix() {
|
||||
fmt.Println(strings.HasPrefix("Gopher", "Go"))
|
||||
fmt.Println(strings.HasPrefix("Gopher", "C"))
|
||||
fmt.Println(strings.HasPrefix("Gopher", ""))
|
||||
// Output:
|
||||
// true
|
||||
// false
|
||||
// true
|
||||
}
|
||||
|
||||
func ExampleHasSuffix() {
|
||||
fmt.Println(strings.HasSuffix("Amigo", "go"))
|
||||
fmt.Println(strings.HasSuffix("Amigo", "O"))
|
||||
fmt.Println(strings.HasSuffix("Amigo", "Ami"))
|
||||
fmt.Println(strings.HasSuffix("Amigo", ""))
|
||||
// Output:
|
||||
// true
|
||||
// false
|
||||
// false
|
||||
// true
|
||||
}
|
||||
|
||||
func ExampleIndex() {
|
||||
fmt.Println(strings.Index("chicken", "ken"))
|
||||
fmt.Println(strings.Index("chicken", "dmr"))
|
||||
// Output:
|
||||
// 4
|
||||
// -1
|
||||
}
|
||||
|
||||
func ExampleIndexFunc() {
|
||||
f := func(c rune) bool {
|
||||
return unicode.Is(unicode.Han, c)
|
||||
}
|
||||
fmt.Println(strings.IndexFunc("Hello, 世界", f))
|
||||
fmt.Println(strings.IndexFunc("Hello, world", f))
|
||||
// Output:
|
||||
// 7
|
||||
// -1
|
||||
}
|
||||
|
||||
func ExampleIndexAny() {
|
||||
fmt.Println(strings.IndexAny("chicken", "aeiouy"))
|
||||
fmt.Println(strings.IndexAny("crwth", "aeiouy"))
|
||||
// Output:
|
||||
// 2
|
||||
// -1
|
||||
}
|
||||
|
||||
func ExampleIndexByte() {
|
||||
fmt.Println(strings.IndexByte("golang", 'g'))
|
||||
fmt.Println(strings.IndexByte("gophers", 'h'))
|
||||
fmt.Println(strings.IndexByte("golang", 'x'))
|
||||
// Output:
|
||||
// 0
|
||||
// 3
|
||||
// -1
|
||||
}
|
||||
func ExampleIndexRune() {
|
||||
fmt.Println(strings.IndexRune("chicken", 'k'))
|
||||
fmt.Println(strings.IndexRune("chicken", 'd'))
|
||||
// Output:
|
||||
// 4
|
||||
// -1
|
||||
}
|
||||
|
||||
func ExampleLastIndex() {
|
||||
fmt.Println(strings.Index("go gopher", "go"))
|
||||
fmt.Println(strings.LastIndex("go gopher", "go"))
|
||||
fmt.Println(strings.LastIndex("go gopher", "rodent"))
|
||||
// Output:
|
||||
// 0
|
||||
// 3
|
||||
// -1
|
||||
}
|
||||
|
||||
func ExampleLastIndexAny() {
|
||||
fmt.Println(strings.LastIndexAny("go gopher", "go"))
|
||||
fmt.Println(strings.LastIndexAny("go gopher", "rodent"))
|
||||
fmt.Println(strings.LastIndexAny("go gopher", "fail"))
|
||||
// Output:
|
||||
// 4
|
||||
// 8
|
||||
// -1
|
||||
}
|
||||
|
||||
func ExampleLastIndexByte() {
|
||||
fmt.Println(strings.LastIndexByte("Hello, world", 'l'))
|
||||
fmt.Println(strings.LastIndexByte("Hello, world", 'o'))
|
||||
fmt.Println(strings.LastIndexByte("Hello, world", 'x'))
|
||||
// Output:
|
||||
// 10
|
||||
// 8
|
||||
// -1
|
||||
}
|
||||
|
||||
func ExampleLastIndexFunc() {
|
||||
fmt.Println(strings.LastIndexFunc("go 123", unicode.IsNumber))
|
||||
fmt.Println(strings.LastIndexFunc("123 go", unicode.IsNumber))
|
||||
fmt.Println(strings.LastIndexFunc("go", unicode.IsNumber))
|
||||
// Output:
|
||||
// 5
|
||||
// 2
|
||||
// -1
|
||||
}
|
||||
|
||||
func ExampleJoin() {
|
||||
s := []string{"foo", "bar", "baz"}
|
||||
fmt.Println(strings.Join(s, ", "))
|
||||
// Output: foo, bar, baz
|
||||
}
|
||||
|
||||
func ExampleRepeat() {
|
||||
fmt.Println("ba" + strings.Repeat("na", 2))
|
||||
// Output: banana
|
||||
}
|
||||
|
||||
func ExampleReplace() {
|
||||
fmt.Println(strings.Replace("oink oink oink", "k", "ky", 2))
|
||||
fmt.Println(strings.Replace("oink oink oink", "oink", "moo", -1))
|
||||
// Output:
|
||||
// oinky oinky oink
|
||||
// moo moo moo
|
||||
}
|
||||
|
||||
func ExampleReplaceAll() {
|
||||
fmt.Println(strings.ReplaceAll("oink oink oink", "oink", "moo"))
|
||||
// Output:
|
||||
// moo moo moo
|
||||
}
|
||||
|
||||
func ExampleSplit() {
|
||||
fmt.Printf("%q\n", strings.Split("a,b,c", ","))
|
||||
fmt.Printf("%q\n", strings.Split("a man a plan a canal panama", "a "))
|
||||
fmt.Printf("%q\n", strings.Split(" xyz ", ""))
|
||||
fmt.Printf("%q\n", strings.Split("", "Bernardo O'Higgins"))
|
||||
// Output:
|
||||
// ["a" "b" "c"]
|
||||
// ["" "man " "plan " "canal panama"]
|
||||
// [" " "x" "y" "z" " "]
|
||||
// [""]
|
||||
}
|
||||
|
||||
func ExampleSplitN() {
|
||||
fmt.Printf("%q\n", strings.SplitN("a,b,c", ",", 2))
|
||||
z := strings.SplitN("a,b,c", ",", 0)
|
||||
fmt.Printf("%q (nil = %v)\n", z, z == nil)
|
||||
// Output:
|
||||
// ["a" "b,c"]
|
||||
// [] (nil = true)
|
||||
}
|
||||
|
||||
func ExampleSplitAfter() {
|
||||
fmt.Printf("%q\n", strings.SplitAfter("a,b,c", ","))
|
||||
// Output: ["a," "b," "c"]
|
||||
}
|
||||
|
||||
func ExampleSplitAfterN() {
|
||||
fmt.Printf("%q\n", strings.SplitAfterN("a,b,c", ",", 2))
|
||||
// Output: ["a," "b,c"]
|
||||
}
|
||||
|
||||
func ExampleTitle() {
|
||||
// Compare this example to the ToTitle example.
|
||||
fmt.Println(strings.Title("her royal highness"))
|
||||
fmt.Println(strings.Title("loud noises"))
|
||||
fmt.Println(strings.Title("хлеб"))
|
||||
// Output:
|
||||
// Her Royal Highness
|
||||
// Loud Noises
|
||||
// Хлеб
|
||||
}
|
||||
|
||||
func ExampleToTitle() {
|
||||
// Compare this example to the Title example.
|
||||
fmt.Println(strings.ToTitle("her royal highness"))
|
||||
fmt.Println(strings.ToTitle("loud noises"))
|
||||
fmt.Println(strings.ToTitle("хлеб"))
|
||||
// Output:
|
||||
// HER ROYAL HIGHNESS
|
||||
// LOUD NOISES
|
||||
// ХЛЕБ
|
||||
}
|
||||
|
||||
func ExampleToTitleSpecial() {
|
||||
fmt.Println(strings.ToTitleSpecial(unicode.TurkishCase, "dünyanın ilk borsa yapısı Aizonai kabul edilir"))
|
||||
// Output:
|
||||
// DÜNYANIN İLK BORSA YAPISI AİZONAİ KABUL EDİLİR
|
||||
}
|
||||
|
||||
func ExampleMap() {
|
||||
rot13 := func(r rune) rune {
|
||||
switch {
|
||||
case r >= 'A' && r <= 'Z':
|
||||
return 'A' + (r-'A'+13)%26
|
||||
case r >= 'a' && r <= 'z':
|
||||
return 'a' + (r-'a'+13)%26
|
||||
}
|
||||
return r
|
||||
}
|
||||
fmt.Println(strings.Map(rot13, "'Twas brillig and the slithy gopher..."))
|
||||
// Output: 'Gjnf oevyyvt naq gur fyvgul tbcure...
|
||||
}
|
||||
|
||||
func ExampleNewReplacer() {
|
||||
r := strings.NewReplacer("<", "<", ">", ">")
|
||||
fmt.Println(r.Replace("This is <b>HTML</b>!"))
|
||||
// Output: This is <b>HTML</b>!
|
||||
}
|
||||
|
||||
func ExampleToUpper() {
|
||||
fmt.Println(strings.ToUpper("Gopher"))
|
||||
// Output: GOPHER
|
||||
}
|
||||
|
||||
func ExampleToUpperSpecial() {
|
||||
fmt.Println(strings.ToUpperSpecial(unicode.TurkishCase, "örnek iş"))
|
||||
// Output: ÖRNEK İŞ
|
||||
}
|
||||
|
||||
func ExampleToLower() {
|
||||
fmt.Println(strings.ToLower("Gopher"))
|
||||
// Output: gopher
|
||||
}
|
||||
|
||||
func ExampleToLowerSpecial() {
|
||||
fmt.Println(strings.ToLowerSpecial(unicode.TurkishCase, "Önnek İş"))
|
||||
// Output: önnek iş
|
||||
}
|
||||
|
||||
func ExampleTrim() {
|
||||
fmt.Print(strings.Trim("¡¡¡Hello, Gophers!!!", "!¡"))
|
||||
// Output: Hello, Gophers
|
||||
}
|
||||
|
||||
func ExampleTrimSpace() {
|
||||
fmt.Println(strings.TrimSpace(" \t\n Hello, Gophers \n\t\r\n"))
|
||||
// Output: Hello, Gophers
|
||||
}
|
||||
|
||||
func ExampleTrimPrefix() {
|
||||
var s = "¡¡¡Hello, Gophers!!!"
|
||||
s = strings.TrimPrefix(s, "¡¡¡Hello, ")
|
||||
s = strings.TrimPrefix(s, "¡¡¡Howdy, ")
|
||||
fmt.Print(s)
|
||||
// Output: Gophers!!!
|
||||
}
|
||||
|
||||
func ExampleTrimSuffix() {
|
||||
var s = "¡¡¡Hello, Gophers!!!"
|
||||
s = strings.TrimSuffix(s, ", Gophers!!!")
|
||||
s = strings.TrimSuffix(s, ", Marmots!!!")
|
||||
fmt.Print(s)
|
||||
// Output: ¡¡¡Hello
|
||||
}
|
||||
|
||||
func ExampleTrimFunc() {
|
||||
fmt.Print(strings.TrimFunc("¡¡¡Hello, Gophers!!!", func(r rune) bool {
|
||||
return !unicode.IsLetter(r) && !unicode.IsNumber(r)
|
||||
}))
|
||||
// Output: Hello, Gophers
|
||||
}
|
||||
|
||||
func ExampleTrimLeft() {
|
||||
fmt.Print(strings.TrimLeft("¡¡¡Hello, Gophers!!!", "!¡"))
|
||||
// Output: Hello, Gophers!!!
|
||||
}
|
||||
|
||||
func ExampleTrimLeftFunc() {
|
||||
fmt.Print(strings.TrimLeftFunc("¡¡¡Hello, Gophers!!!", func(r rune) bool {
|
||||
return !unicode.IsLetter(r) && !unicode.IsNumber(r)
|
||||
}))
|
||||
// Output: Hello, Gophers!!!
|
||||
}
|
||||
|
||||
func ExampleTrimRight() {
|
||||
fmt.Print(strings.TrimRight("¡¡¡Hello, Gophers!!!", "!¡"))
|
||||
// Output: ¡¡¡Hello, Gophers
|
||||
}
|
||||
|
||||
func ExampleTrimRightFunc() {
|
||||
fmt.Print(strings.TrimRightFunc("¡¡¡Hello, Gophers!!!", func(r rune) bool {
|
||||
return !unicode.IsLetter(r) && !unicode.IsNumber(r)
|
||||
}))
|
||||
// Output: ¡¡¡Hello, Gophers
|
||||
}
|
||||
|
||||
func ExampleToValidUTF8() {
|
||||
fmt.Printf("%s\n", strings.ToValidUTF8("abc", "\uFFFD"))
|
||||
fmt.Printf("%s\n", strings.ToValidUTF8("a\xffb\xC0\xAFc\xff", ""))
|
||||
fmt.Printf("%s\n", strings.ToValidUTF8("\xed\xa0\x80", "abc"))
|
||||
// Output:
|
||||
// abc
|
||||
// abc
|
||||
// abc
|
||||
}
|
||||
47
src/strings/export_test.go
Normal file
47
src/strings/export_test.go
Normal file
@@ -0,0 +1,47 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package strings
|
||||
|
||||
func (r *Replacer) Replacer() any {
|
||||
r.once.Do(r.buildOnce)
|
||||
return r.r
|
||||
}
|
||||
|
||||
func (r *Replacer) PrintTrie() string {
|
||||
r.once.Do(r.buildOnce)
|
||||
gen := r.r.(*genericReplacer)
|
||||
return gen.printNode(&gen.root, 0)
|
||||
}
|
||||
|
||||
func (r *genericReplacer) printNode(t *trieNode, depth int) (s string) {
|
||||
if t.priority > 0 {
|
||||
s += "+"
|
||||
} else {
|
||||
s += "-"
|
||||
}
|
||||
s += "\n"
|
||||
|
||||
if t.prefix != "" {
|
||||
s += Repeat(".", depth) + t.prefix
|
||||
s += r.printNode(t.next, depth+len(t.prefix))
|
||||
} else if t.table != nil {
|
||||
for b, m := range r.mapping {
|
||||
if int(m) != r.tableSize && t.table[m] != nil {
|
||||
s += Repeat(".", depth) + string([]byte{byte(b)})
|
||||
s += r.printNode(t.table[m], depth+1)
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func StringFind(pattern, text string) int {
|
||||
return makeStringFinder(pattern).next(text)
|
||||
}
|
||||
|
||||
func DumpTables(pattern string) ([]int, []int) {
|
||||
finder := makeStringFinder(pattern)
|
||||
return finder.badCharSkip[:], finder.goodSuffixSkip
|
||||
}
|
||||
160
src/strings/reader.go
Normal file
160
src/strings/reader.go
Normal file
@@ -0,0 +1,160 @@
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package strings
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// A Reader implements the [io.Reader], [io.ReaderAt], [io.ByteReader], [io.ByteScanner],
|
||||
// [io.RuneReader], [io.RuneScanner], [io.Seeker], and [io.WriterTo] interfaces by reading
|
||||
// from a string.
|
||||
// The zero value for Reader operates like a Reader of an empty string.
|
||||
type Reader struct {
|
||||
s string
|
||||
i int64 // current reading index
|
||||
prevRune int // index of previous rune; or < 0
|
||||
}
|
||||
|
||||
// Len returns the number of bytes of the unread portion of the
|
||||
// string.
|
||||
func (r *Reader) Len() int {
|
||||
if r.i >= int64(len(r.s)) {
|
||||
return 0
|
||||
}
|
||||
return int(int64(len(r.s)) - r.i)
|
||||
}
|
||||
|
||||
// Size returns the original length of the underlying string.
|
||||
// Size is the number of bytes available for reading via [Reader.ReadAt].
|
||||
// The returned value is always the same and is not affected by calls
|
||||
// to any other method.
|
||||
func (r *Reader) Size() int64 { return int64(len(r.s)) }
|
||||
|
||||
// Read implements the [io.Reader] interface.
|
||||
func (r *Reader) Read(b []byte) (n int, err error) {
|
||||
if r.i >= int64(len(r.s)) {
|
||||
return 0, io.EOF
|
||||
}
|
||||
r.prevRune = -1
|
||||
n = copy(b, r.s[r.i:])
|
||||
r.i += int64(n)
|
||||
return
|
||||
}
|
||||
|
||||
// ReadAt implements the [io.ReaderAt] interface.
|
||||
func (r *Reader) ReadAt(b []byte, off int64) (n int, err error) {
|
||||
// cannot modify state - see io.ReaderAt
|
||||
if off < 0 {
|
||||
return 0, errors.New("strings.Reader.ReadAt: negative offset")
|
||||
}
|
||||
if off >= int64(len(r.s)) {
|
||||
return 0, io.EOF
|
||||
}
|
||||
n = copy(b, r.s[off:])
|
||||
if n < len(b) {
|
||||
err = io.EOF
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// ReadByte implements the [io.ByteReader] interface.
|
||||
func (r *Reader) ReadByte() (byte, error) {
|
||||
r.prevRune = -1
|
||||
if r.i >= int64(len(r.s)) {
|
||||
return 0, io.EOF
|
||||
}
|
||||
b := r.s[r.i]
|
||||
r.i++
|
||||
return b, nil
|
||||
}
|
||||
|
||||
// UnreadByte implements the [io.ByteScanner] interface.
|
||||
func (r *Reader) UnreadByte() error {
|
||||
if r.i <= 0 {
|
||||
return errors.New("strings.Reader.UnreadByte: at beginning of string")
|
||||
}
|
||||
r.prevRune = -1
|
||||
r.i--
|
||||
return nil
|
||||
}
|
||||
|
||||
// ReadRune implements the [io.RuneReader] interface.
|
||||
func (r *Reader) ReadRune() (ch rune, size int, err error) {
|
||||
if r.i >= int64(len(r.s)) {
|
||||
r.prevRune = -1
|
||||
return 0, 0, io.EOF
|
||||
}
|
||||
r.prevRune = int(r.i)
|
||||
if c := r.s[r.i]; c < utf8.RuneSelf {
|
||||
r.i++
|
||||
return rune(c), 1, nil
|
||||
}
|
||||
ch, size = utf8.DecodeRuneInString(r.s[r.i:])
|
||||
r.i += int64(size)
|
||||
return
|
||||
}
|
||||
|
||||
// UnreadRune implements the [io.RuneScanner] interface.
|
||||
func (r *Reader) UnreadRune() error {
|
||||
if r.i <= 0 {
|
||||
return errors.New("strings.Reader.UnreadRune: at beginning of string")
|
||||
}
|
||||
if r.prevRune < 0 {
|
||||
return errors.New("strings.Reader.UnreadRune: previous operation was not ReadRune")
|
||||
}
|
||||
r.i = int64(r.prevRune)
|
||||
r.prevRune = -1
|
||||
return nil
|
||||
}
|
||||
|
||||
// Seek implements the [io.Seeker] interface.
|
||||
func (r *Reader) Seek(offset int64, whence int) (int64, error) {
|
||||
r.prevRune = -1
|
||||
var abs int64
|
||||
switch whence {
|
||||
case io.SeekStart:
|
||||
abs = offset
|
||||
case io.SeekCurrent:
|
||||
abs = r.i + offset
|
||||
case io.SeekEnd:
|
||||
abs = int64(len(r.s)) + offset
|
||||
default:
|
||||
return 0, errors.New("strings.Reader.Seek: invalid whence")
|
||||
}
|
||||
if abs < 0 {
|
||||
return 0, errors.New("strings.Reader.Seek: negative position")
|
||||
}
|
||||
r.i = abs
|
||||
return abs, nil
|
||||
}
|
||||
|
||||
// WriteTo implements the [io.WriterTo] interface.
|
||||
func (r *Reader) WriteTo(w io.Writer) (n int64, err error) {
|
||||
r.prevRune = -1
|
||||
if r.i >= int64(len(r.s)) {
|
||||
return 0, nil
|
||||
}
|
||||
s := r.s[r.i:]
|
||||
m, err := io.WriteString(w, s)
|
||||
if m > len(s) {
|
||||
panic("strings.Reader.WriteTo: invalid WriteString count")
|
||||
}
|
||||
r.i += int64(m)
|
||||
n = int64(m)
|
||||
if m != len(s) && err == nil {
|
||||
err = io.ErrShortWrite
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Reset resets the [Reader] to be reading from s.
|
||||
func (r *Reader) Reset(s string) { *r = Reader{s, 0, -1} }
|
||||
|
||||
// NewReader returns a new [Reader] reading from s.
|
||||
// It is similar to [bytes.NewBufferString] but more efficient and non-writable.
|
||||
func NewReader(s string) *Reader { return &Reader{s, 0, -1} }
|
||||
233
src/strings/reader_test.go
Normal file
233
src/strings/reader_test.go
Normal file
@@ -0,0 +1,233 @@
|
||||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package strings_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestReader(t *testing.T) {
|
||||
r := strings.NewReader("0123456789")
|
||||
tests := []struct {
|
||||
off int64
|
||||
seek int
|
||||
n int
|
||||
want string
|
||||
wantpos int64
|
||||
readerr error
|
||||
seekerr string
|
||||
}{
|
||||
{seek: io.SeekStart, off: 0, n: 20, want: "0123456789"},
|
||||
{seek: io.SeekStart, off: 1, n: 1, want: "1"},
|
||||
{seek: io.SeekCurrent, off: 1, wantpos: 3, n: 2, want: "34"},
|
||||
{seek: io.SeekStart, off: -1, seekerr: "strings.Reader.Seek: negative position"},
|
||||
{seek: io.SeekStart, off: 1 << 33, wantpos: 1 << 33, readerr: io.EOF},
|
||||
{seek: io.SeekCurrent, off: 1, wantpos: 1<<33 + 1, readerr: io.EOF},
|
||||
{seek: io.SeekStart, n: 5, want: "01234"},
|
||||
{seek: io.SeekCurrent, n: 5, want: "56789"},
|
||||
{seek: io.SeekEnd, off: -1, n: 1, wantpos: 9, want: "9"},
|
||||
}
|
||||
|
||||
for i, tt := range tests {
|
||||
pos, err := r.Seek(tt.off, tt.seek)
|
||||
if err == nil && tt.seekerr != "" {
|
||||
t.Errorf("%d. want seek error %q", i, tt.seekerr)
|
||||
continue
|
||||
}
|
||||
if err != nil && err.Error() != tt.seekerr {
|
||||
t.Errorf("%d. seek error = %q; want %q", i, err.Error(), tt.seekerr)
|
||||
continue
|
||||
}
|
||||
if tt.wantpos != 0 && tt.wantpos != pos {
|
||||
t.Errorf("%d. pos = %d, want %d", i, pos, tt.wantpos)
|
||||
}
|
||||
buf := make([]byte, tt.n)
|
||||
n, err := r.Read(buf)
|
||||
if err != tt.readerr {
|
||||
t.Errorf("%d. read = %v; want %v", i, err, tt.readerr)
|
||||
continue
|
||||
}
|
||||
got := string(buf[:n])
|
||||
if got != tt.want {
|
||||
t.Errorf("%d. got %q; want %q", i, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadAfterBigSeek(t *testing.T) {
|
||||
r := strings.NewReader("0123456789")
|
||||
if _, err := r.Seek(1<<31+5, io.SeekStart); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if n, err := r.Read(make([]byte, 10)); n != 0 || err != io.EOF {
|
||||
t.Errorf("Read = %d, %v; want 0, EOF", n, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReaderAt(t *testing.T) {
|
||||
r := strings.NewReader("0123456789")
|
||||
tests := []struct {
|
||||
off int64
|
||||
n int
|
||||
want string
|
||||
wanterr any
|
||||
}{
|
||||
{0, 10, "0123456789", nil},
|
||||
{1, 10, "123456789", io.EOF},
|
||||
{1, 9, "123456789", nil},
|
||||
{11, 10, "", io.EOF},
|
||||
{0, 0, "", nil},
|
||||
{-1, 0, "", "strings.Reader.ReadAt: negative offset"},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
b := make([]byte, tt.n)
|
||||
rn, err := r.ReadAt(b, tt.off)
|
||||
got := string(b[:rn])
|
||||
if got != tt.want {
|
||||
t.Errorf("%d. got %q; want %q", i, got, tt.want)
|
||||
}
|
||||
if fmt.Sprintf("%v", err) != fmt.Sprintf("%v", tt.wanterr) {
|
||||
t.Errorf("%d. got error = %v; want %v", i, err, tt.wanterr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestReaderAtConcurrent(t *testing.T) {
|
||||
// Test for the race detector, to verify ReadAt doesn't mutate
|
||||
// any state.
|
||||
r := strings.NewReader("0123456789")
|
||||
var wg sync.WaitGroup
|
||||
for i := 0; i < 5; i++ {
|
||||
wg.Add(1)
|
||||
go func(i int) {
|
||||
defer wg.Done()
|
||||
var buf [1]byte
|
||||
r.ReadAt(buf[:], int64(i))
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func TestEmptyReaderConcurrent(t *testing.T) {
|
||||
// Test for the race detector, to verify a Read that doesn't yield any bytes
|
||||
// is okay to use from multiple goroutines. This was our historic behavior.
|
||||
// See golang.org/issue/7856
|
||||
r := strings.NewReader("")
|
||||
var wg sync.WaitGroup
|
||||
for i := 0; i < 5; i++ {
|
||||
wg.Add(2)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
var buf [1]byte
|
||||
r.Read(buf[:])
|
||||
}()
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
r.Read(nil)
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func TestWriteTo(t *testing.T) {
|
||||
const str = "0123456789"
|
||||
for i := 0; i <= len(str); i++ {
|
||||
s := str[i:]
|
||||
r := strings.NewReader(s)
|
||||
var b bytes.Buffer
|
||||
n, err := r.WriteTo(&b)
|
||||
if expect := int64(len(s)); n != expect {
|
||||
t.Errorf("got %v; want %v", n, expect)
|
||||
}
|
||||
if err != nil {
|
||||
t.Errorf("for length %d: got error = %v; want nil", len(s), err)
|
||||
}
|
||||
if b.String() != s {
|
||||
t.Errorf("got string %q; want %q", b.String(), s)
|
||||
}
|
||||
if r.Len() != 0 {
|
||||
t.Errorf("reader contains %v bytes; want 0", r.Len())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// tests that Len is affected by reads, but Size is not.
|
||||
func TestReaderLenSize(t *testing.T) {
|
||||
r := strings.NewReader("abc")
|
||||
io.CopyN(io.Discard, r, 1)
|
||||
if r.Len() != 2 {
|
||||
t.Errorf("Len = %d; want 2", r.Len())
|
||||
}
|
||||
if r.Size() != 3 {
|
||||
t.Errorf("Size = %d; want 3", r.Size())
|
||||
}
|
||||
}
|
||||
|
||||
func TestReaderReset(t *testing.T) {
|
||||
r := strings.NewReader("世界")
|
||||
if _, _, err := r.ReadRune(); err != nil {
|
||||
t.Errorf("ReadRune: unexpected error: %v", err)
|
||||
}
|
||||
|
||||
const want = "abcdef"
|
||||
r.Reset(want)
|
||||
if err := r.UnreadRune(); err == nil {
|
||||
t.Errorf("UnreadRune: expected error, got nil")
|
||||
}
|
||||
buf, err := io.ReadAll(r)
|
||||
if err != nil {
|
||||
t.Errorf("ReadAll: unexpected error: %v", err)
|
||||
}
|
||||
if got := string(buf); got != want {
|
||||
t.Errorf("ReadAll: got %q, want %q", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReaderZero(t *testing.T) {
|
||||
if l := (&strings.Reader{}).Len(); l != 0 {
|
||||
t.Errorf("Len: got %d, want 0", l)
|
||||
}
|
||||
|
||||
if n, err := (&strings.Reader{}).Read(nil); n != 0 || err != io.EOF {
|
||||
t.Errorf("Read: got %d, %v; want 0, io.EOF", n, err)
|
||||
}
|
||||
|
||||
if n, err := (&strings.Reader{}).ReadAt(nil, 11); n != 0 || err != io.EOF {
|
||||
t.Errorf("ReadAt: got %d, %v; want 0, io.EOF", n, err)
|
||||
}
|
||||
|
||||
if b, err := (&strings.Reader{}).ReadByte(); b != 0 || err != io.EOF {
|
||||
t.Errorf("ReadByte: got %d, %v; want 0, io.EOF", b, err)
|
||||
}
|
||||
|
||||
if ch, size, err := (&strings.Reader{}).ReadRune(); ch != 0 || size != 0 || err != io.EOF {
|
||||
t.Errorf("ReadRune: got %d, %d, %v; want 0, 0, io.EOF", ch, size, err)
|
||||
}
|
||||
|
||||
if offset, err := (&strings.Reader{}).Seek(11, io.SeekStart); offset != 11 || err != nil {
|
||||
t.Errorf("Seek: got %d, %v; want 11, nil", offset, err)
|
||||
}
|
||||
|
||||
if s := (&strings.Reader{}).Size(); s != 0 {
|
||||
t.Errorf("Size: got %d, want 0", s)
|
||||
}
|
||||
|
||||
if (&strings.Reader{}).UnreadByte() == nil {
|
||||
t.Errorf("UnreadByte: got nil, want error")
|
||||
}
|
||||
|
||||
if (&strings.Reader{}).UnreadRune() == nil {
|
||||
t.Errorf("UnreadRune: got nil, want error")
|
||||
}
|
||||
|
||||
if n, err := (&strings.Reader{}).WriteTo(io.Discard); n != 0 || err != nil {
|
||||
t.Errorf("WriteTo: got %d, %v; want 0, nil", n, err)
|
||||
}
|
||||
}
|
||||
578
src/strings/replace.go
Normal file
578
src/strings/replace.go
Normal file
@@ -0,0 +1,578 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package strings
|
||||
|
||||
import (
|
||||
"io"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Replacer replaces a list of strings with replacements.
|
||||
// It is safe for concurrent use by multiple goroutines.
|
||||
type Replacer struct {
|
||||
once sync.Once // guards buildOnce method
|
||||
r replacer
|
||||
oldnew []string
|
||||
}
|
||||
|
||||
// replacer is the interface that a replacement algorithm needs to implement.
|
||||
type replacer interface {
|
||||
Replace(s string) string
|
||||
WriteString(w io.Writer, s string) (n int, err error)
|
||||
}
|
||||
|
||||
// NewReplacer returns a new [Replacer] from a list of old, new string
|
||||
// pairs. Replacements are performed in the order they appear in the
|
||||
// target string, without overlapping matches. The old string
|
||||
// comparisons are done in argument order.
|
||||
//
|
||||
// NewReplacer panics if given an odd number of arguments.
|
||||
func NewReplacer(oldnew ...string) *Replacer {
|
||||
if len(oldnew)%2 == 1 {
|
||||
panic("strings.NewReplacer: odd argument count")
|
||||
}
|
||||
return &Replacer{oldnew: append([]string(nil), oldnew...)}
|
||||
}
|
||||
|
||||
func (r *Replacer) buildOnce() {
|
||||
r.r = r.build()
|
||||
r.oldnew = nil
|
||||
}
|
||||
|
||||
func (b *Replacer) build() replacer {
|
||||
oldnew := b.oldnew
|
||||
if len(oldnew) == 2 && len(oldnew[0]) > 1 {
|
||||
return makeSingleStringReplacer(oldnew[0], oldnew[1])
|
||||
}
|
||||
|
||||
allNewBytes := true
|
||||
for i := 0; i < len(oldnew); i += 2 {
|
||||
if len(oldnew[i]) != 1 {
|
||||
return makeGenericReplacer(oldnew)
|
||||
}
|
||||
if len(oldnew[i+1]) != 1 {
|
||||
allNewBytes = false
|
||||
}
|
||||
}
|
||||
|
||||
if allNewBytes {
|
||||
r := byteReplacer{}
|
||||
for i := range r {
|
||||
r[i] = byte(i)
|
||||
}
|
||||
// The first occurrence of old->new map takes precedence
|
||||
// over the others with the same old string.
|
||||
for i := len(oldnew) - 2; i >= 0; i -= 2 {
|
||||
o := oldnew[i][0]
|
||||
n := oldnew[i+1][0]
|
||||
r[o] = n
|
||||
}
|
||||
return &r
|
||||
}
|
||||
|
||||
r := byteStringReplacer{toReplace: make([]string, 0, len(oldnew)/2)}
|
||||
// The first occurrence of old->new map takes precedence
|
||||
// over the others with the same old string.
|
||||
for i := len(oldnew) - 2; i >= 0; i -= 2 {
|
||||
o := oldnew[i][0]
|
||||
n := oldnew[i+1]
|
||||
// To avoid counting repetitions multiple times.
|
||||
if r.replacements[o] == nil {
|
||||
// We need to use string([]byte{o}) instead of string(o),
|
||||
// to avoid utf8 encoding of o.
|
||||
// E. g. byte(150) produces string of length 2.
|
||||
r.toReplace = append(r.toReplace, string([]byte{o}))
|
||||
}
|
||||
r.replacements[o] = []byte(n)
|
||||
|
||||
}
|
||||
return &r
|
||||
}
|
||||
|
||||
// Replace returns a copy of s with all replacements performed.
|
||||
func (r *Replacer) Replace(s string) string {
|
||||
r.once.Do(r.buildOnce)
|
||||
return r.r.Replace(s)
|
||||
}
|
||||
|
||||
// WriteString writes s to w with all replacements performed.
|
||||
func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) {
|
||||
r.once.Do(r.buildOnce)
|
||||
return r.r.WriteString(w, s)
|
||||
}
|
||||
|
||||
// trieNode is a node in a lookup trie for prioritized key/value pairs. Keys
|
||||
// and values may be empty. For example, the trie containing keys "ax", "ay",
|
||||
// "bcbc", "x" and "xy" could have eight nodes:
|
||||
//
|
||||
// n0 -
|
||||
// n1 a-
|
||||
// n2 .x+
|
||||
// n3 .y+
|
||||
// n4 b-
|
||||
// n5 .cbc+
|
||||
// n6 x+
|
||||
// n7 .y+
|
||||
//
|
||||
// n0 is the root node, and its children are n1, n4 and n6; n1's children are
|
||||
// n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked
|
||||
// with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7
|
||||
// (marked with a trailing "+") are complete keys.
|
||||
type trieNode struct {
|
||||
// value is the value of the trie node's key/value pair. It is empty if
|
||||
// this node is not a complete key.
|
||||
value string
|
||||
// priority is the priority (higher is more important) of the trie node's
|
||||
// key/value pair; keys are not necessarily matched shortest- or longest-
|
||||
// first. Priority is positive if this node is a complete key, and zero
|
||||
// otherwise. In the example above, positive/zero priorities are marked
|
||||
// with a trailing "+" or "-".
|
||||
priority int
|
||||
|
||||
// A trie node may have zero, one or more child nodes:
|
||||
// * if the remaining fields are zero, there are no children.
|
||||
// * if prefix and next are non-zero, there is one child in next.
|
||||
// * if table is non-zero, it defines all the children.
|
||||
//
|
||||
// Prefixes are preferred over tables when there is one child, but the
|
||||
// root node always uses a table for lookup efficiency.
|
||||
|
||||
// prefix is the difference in keys between this trie node and the next.
|
||||
// In the example above, node n4 has prefix "cbc" and n4's next node is n5.
|
||||
// Node n5 has no children and so has zero prefix, next and table fields.
|
||||
prefix string
|
||||
next *trieNode
|
||||
|
||||
// table is a lookup table indexed by the next byte in the key, after
|
||||
// remapping that byte through genericReplacer.mapping to create a dense
|
||||
// index. In the example above, the keys only use 'a', 'b', 'c', 'x' and
|
||||
// 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and
|
||||
// genericReplacer.tableSize will be 5. Node n0's table will be
|
||||
// []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped
|
||||
// 'a', 'b' and 'x'.
|
||||
table []*trieNode
|
||||
}
|
||||
|
||||
func (t *trieNode) add(key, val string, priority int, r *genericReplacer) {
|
||||
if key == "" {
|
||||
if t.priority == 0 {
|
||||
t.value = val
|
||||
t.priority = priority
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if t.prefix != "" {
|
||||
// Need to split the prefix among multiple nodes.
|
||||
var n int // length of the longest common prefix
|
||||
for ; n < len(t.prefix) && n < len(key); n++ {
|
||||
if t.prefix[n] != key[n] {
|
||||
break
|
||||
}
|
||||
}
|
||||
if n == len(t.prefix) {
|
||||
t.next.add(key[n:], val, priority, r)
|
||||
} else if n == 0 {
|
||||
// First byte differs, start a new lookup table here. Looking up
|
||||
// what is currently t.prefix[0] will lead to prefixNode, and
|
||||
// looking up key[0] will lead to keyNode.
|
||||
var prefixNode *trieNode
|
||||
if len(t.prefix) == 1 {
|
||||
prefixNode = t.next
|
||||
} else {
|
||||
prefixNode = &trieNode{
|
||||
prefix: t.prefix[1:],
|
||||
next: t.next,
|
||||
}
|
||||
}
|
||||
keyNode := new(trieNode)
|
||||
t.table = make([]*trieNode, r.tableSize)
|
||||
t.table[r.mapping[t.prefix[0]]] = prefixNode
|
||||
t.table[r.mapping[key[0]]] = keyNode
|
||||
t.prefix = ""
|
||||
t.next = nil
|
||||
keyNode.add(key[1:], val, priority, r)
|
||||
} else {
|
||||
// Insert new node after the common section of the prefix.
|
||||
next := &trieNode{
|
||||
prefix: t.prefix[n:],
|
||||
next: t.next,
|
||||
}
|
||||
t.prefix = t.prefix[:n]
|
||||
t.next = next
|
||||
next.add(key[n:], val, priority, r)
|
||||
}
|
||||
} else if t.table != nil {
|
||||
// Insert into existing table.
|
||||
m := r.mapping[key[0]]
|
||||
if t.table[m] == nil {
|
||||
t.table[m] = new(trieNode)
|
||||
}
|
||||
t.table[m].add(key[1:], val, priority, r)
|
||||
} else {
|
||||
t.prefix = key
|
||||
t.next = new(trieNode)
|
||||
t.next.add("", val, priority, r)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) {
|
||||
// Iterate down the trie to the end, and grab the value and keylen with
|
||||
// the highest priority.
|
||||
bestPriority := 0
|
||||
node := &r.root
|
||||
n := 0
|
||||
for node != nil {
|
||||
if node.priority > bestPriority && !(ignoreRoot && node == &r.root) {
|
||||
bestPriority = node.priority
|
||||
val = node.value
|
||||
keylen = n
|
||||
found = true
|
||||
}
|
||||
|
||||
if s == "" {
|
||||
break
|
||||
}
|
||||
if node.table != nil {
|
||||
index := r.mapping[s[0]]
|
||||
if int(index) == r.tableSize {
|
||||
break
|
||||
}
|
||||
node = node.table[index]
|
||||
s = s[1:]
|
||||
n++
|
||||
} else if node.prefix != "" && HasPrefix(s, node.prefix) {
|
||||
n += len(node.prefix)
|
||||
s = s[len(node.prefix):]
|
||||
node = node.next
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// genericReplacer is the fully generic algorithm.
|
||||
// It's used as a fallback when nothing faster can be used.
|
||||
type genericReplacer struct {
|
||||
root trieNode
|
||||
// tableSize is the size of a trie node's lookup table. It is the number
|
||||
// of unique key bytes.
|
||||
tableSize int
|
||||
// mapping maps from key bytes to a dense index for trieNode.table.
|
||||
mapping [256]byte
|
||||
}
|
||||
|
||||
func makeGenericReplacer(oldnew []string) *genericReplacer {
|
||||
r := new(genericReplacer)
|
||||
// Find each byte used, then assign them each an index.
|
||||
for i := 0; i < len(oldnew); i += 2 {
|
||||
key := oldnew[i]
|
||||
for j := 0; j < len(key); j++ {
|
||||
r.mapping[key[j]] = 1
|
||||
}
|
||||
}
|
||||
|
||||
for _, b := range r.mapping {
|
||||
r.tableSize += int(b)
|
||||
}
|
||||
|
||||
var index byte
|
||||
for i, b := range r.mapping {
|
||||
if b == 0 {
|
||||
r.mapping[i] = byte(r.tableSize)
|
||||
} else {
|
||||
r.mapping[i] = index
|
||||
index++
|
||||
}
|
||||
}
|
||||
// Ensure root node uses a lookup table (for performance).
|
||||
r.root.table = make([]*trieNode, r.tableSize)
|
||||
|
||||
for i := 0; i < len(oldnew); i += 2 {
|
||||
r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r)
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
type appendSliceWriter []byte
|
||||
|
||||
// Write writes to the buffer to satisfy [io.Writer].
|
||||
func (w *appendSliceWriter) Write(p []byte) (int, error) {
|
||||
*w = append(*w, p...)
|
||||
return len(p), nil
|
||||
}
|
||||
|
||||
// WriteString writes to the buffer without string->[]byte->string allocations.
|
||||
func (w *appendSliceWriter) WriteString(s string) (int, error) {
|
||||
*w = append(*w, s...)
|
||||
return len(s), nil
|
||||
}
|
||||
|
||||
type stringWriter struct {
|
||||
w io.Writer
|
||||
}
|
||||
|
||||
func (w stringWriter) WriteString(s string) (int, error) {
|
||||
return w.w.Write([]byte(s))
|
||||
}
|
||||
|
||||
func getStringWriter(w io.Writer) io.StringWriter {
|
||||
sw, ok := w.(io.StringWriter)
|
||||
if !ok {
|
||||
sw = stringWriter{w}
|
||||
}
|
||||
return sw
|
||||
}
|
||||
|
||||
func (r *genericReplacer) Replace(s string) string {
|
||||
buf := make(appendSliceWriter, 0, len(s))
|
||||
r.WriteString(&buf, s)
|
||||
return string(buf)
|
||||
}
|
||||
|
||||
func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) {
|
||||
sw := getStringWriter(w)
|
||||
var last, wn int
|
||||
var prevMatchEmpty bool
|
||||
for i := 0; i <= len(s); {
|
||||
// Fast path: s[i] is not a prefix of any pattern.
|
||||
if i != len(s) && r.root.priority == 0 {
|
||||
index := int(r.mapping[s[i]])
|
||||
if index == r.tableSize || r.root.table[index] == nil {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Ignore the empty match iff the previous loop found the empty match.
|
||||
val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
|
||||
prevMatchEmpty = match && keylen == 0
|
||||
if match {
|
||||
wn, err = sw.WriteString(s[last:i])
|
||||
n += wn
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
wn, err = sw.WriteString(val)
|
||||
n += wn
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
i += keylen
|
||||
last = i
|
||||
continue
|
||||
}
|
||||
i++
|
||||
}
|
||||
if last != len(s) {
|
||||
wn, err = sw.WriteString(s[last:])
|
||||
n += wn
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// singleStringReplacer is the implementation that's used when there is only
|
||||
// one string to replace (and that string has more than one byte).
|
||||
type singleStringReplacer struct {
|
||||
finder *stringFinder
|
||||
// value is the new string that replaces that pattern when it's found.
|
||||
value string
|
||||
}
|
||||
|
||||
func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer {
|
||||
return &singleStringReplacer{finder: makeStringFinder(pattern), value: value}
|
||||
}
|
||||
|
||||
func (r *singleStringReplacer) Replace(s string) string {
|
||||
var buf Builder
|
||||
i, matched := 0, false
|
||||
for {
|
||||
match := r.finder.next(s[i:])
|
||||
if match == -1 {
|
||||
break
|
||||
}
|
||||
matched = true
|
||||
buf.Grow(match + len(r.value))
|
||||
buf.WriteString(s[i : i+match])
|
||||
buf.WriteString(r.value)
|
||||
i += match + len(r.finder.pattern)
|
||||
}
|
||||
if !matched {
|
||||
return s
|
||||
}
|
||||
buf.WriteString(s[i:])
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
|
||||
sw := getStringWriter(w)
|
||||
var i, wn int
|
||||
for {
|
||||
match := r.finder.next(s[i:])
|
||||
if match == -1 {
|
||||
break
|
||||
}
|
||||
wn, err = sw.WriteString(s[i : i+match])
|
||||
n += wn
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
wn, err = sw.WriteString(r.value)
|
||||
n += wn
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
i += match + len(r.finder.pattern)
|
||||
}
|
||||
wn, err = sw.WriteString(s[i:])
|
||||
n += wn
|
||||
return
|
||||
}
|
||||
|
||||
// byteReplacer is the implementation that's used when all the "old"
|
||||
// and "new" values are single ASCII bytes.
|
||||
// The array contains replacement bytes indexed by old byte.
|
||||
type byteReplacer [256]byte
|
||||
|
||||
func (r *byteReplacer) Replace(s string) string {
|
||||
var buf []byte // lazily allocated
|
||||
for i := 0; i < len(s); i++ {
|
||||
b := s[i]
|
||||
if r[b] != b {
|
||||
if buf == nil {
|
||||
buf = []byte(s)
|
||||
}
|
||||
buf[i] = r[b]
|
||||
}
|
||||
}
|
||||
if buf == nil {
|
||||
return s
|
||||
}
|
||||
return string(buf)
|
||||
}
|
||||
|
||||
func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) {
|
||||
sw := getStringWriter(w)
|
||||
last := 0
|
||||
for i := 0; i < len(s); i++ {
|
||||
b := s[i]
|
||||
if r[b] == b {
|
||||
continue
|
||||
}
|
||||
if last != i {
|
||||
wn, err := sw.WriteString(s[last:i])
|
||||
n += wn
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
}
|
||||
last = i + 1
|
||||
nw, err := w.Write(r[b : int(b)+1])
|
||||
n += nw
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
}
|
||||
if last != len(s) {
|
||||
nw, err := sw.WriteString(s[last:])
|
||||
n += nw
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// byteStringReplacer is the implementation that's used when all the
|
||||
// "old" values are single ASCII bytes but the "new" values vary in size.
|
||||
type byteStringReplacer struct {
|
||||
// replacements contains replacement byte slices indexed by old byte.
|
||||
// A nil []byte means that the old byte should not be replaced.
|
||||
replacements [256][]byte
|
||||
// toReplace keeps a list of bytes to replace. Depending on length of toReplace
|
||||
// and length of target string it may be faster to use Count, or a plain loop.
|
||||
// We store single byte as a string, because Count takes a string.
|
||||
toReplace []string
|
||||
}
|
||||
|
||||
// countCutOff controls the ratio of a string length to a number of replacements
|
||||
// at which (*byteStringReplacer).Replace switches algorithms.
|
||||
// For strings with higher ration of length to replacements than that value,
|
||||
// we call Count, for each replacement from toReplace.
|
||||
// For strings, with a lower ratio we use simple loop, because of Count overhead.
|
||||
// countCutOff is an empirically determined overhead multiplier.
|
||||
// TODO(tocarip) revisit once we have register-based abi/mid-stack inlining.
|
||||
const countCutOff = 8
|
||||
|
||||
func (r *byteStringReplacer) Replace(s string) string {
|
||||
newSize := len(s)
|
||||
anyChanges := false
|
||||
// Is it faster to use Count?
|
||||
if len(r.toReplace)*countCutOff <= len(s) {
|
||||
for _, x := range r.toReplace {
|
||||
if c := Count(s, x); c != 0 {
|
||||
// The -1 is because we are replacing 1 byte with len(replacements[b]) bytes.
|
||||
newSize += c * (len(r.replacements[x[0]]) - 1)
|
||||
anyChanges = true
|
||||
}
|
||||
|
||||
}
|
||||
} else {
|
||||
for i := 0; i < len(s); i++ {
|
||||
b := s[i]
|
||||
if r.replacements[b] != nil {
|
||||
// See above for explanation of -1
|
||||
newSize += len(r.replacements[b]) - 1
|
||||
anyChanges = true
|
||||
}
|
||||
}
|
||||
}
|
||||
if !anyChanges {
|
||||
return s
|
||||
}
|
||||
buf := make([]byte, newSize)
|
||||
j := 0
|
||||
for i := 0; i < len(s); i++ {
|
||||
b := s[i]
|
||||
if r.replacements[b] != nil {
|
||||
j += copy(buf[j:], r.replacements[b])
|
||||
} else {
|
||||
buf[j] = b
|
||||
j++
|
||||
}
|
||||
}
|
||||
return string(buf)
|
||||
}
|
||||
|
||||
func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
|
||||
sw := getStringWriter(w)
|
||||
last := 0
|
||||
for i := 0; i < len(s); i++ {
|
||||
b := s[i]
|
||||
if r.replacements[b] == nil {
|
||||
continue
|
||||
}
|
||||
if last != i {
|
||||
nw, err := sw.WriteString(s[last:i])
|
||||
n += nw
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
}
|
||||
last = i + 1
|
||||
nw, err := w.Write(r.replacements[b])
|
||||
n += nw
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
}
|
||||
if last != len(s) {
|
||||
var nw int
|
||||
nw, err = sw.WriteString(s[last:])
|
||||
n += nw
|
||||
}
|
||||
return
|
||||
}
|
||||
583
src/strings/replace_test.go
Normal file
583
src/strings/replace_test.go
Normal file
@@ -0,0 +1,583 @@
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package strings_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
. "strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var htmlEscaper = NewReplacer(
|
||||
"&", "&",
|
||||
"<", "<",
|
||||
">", ">",
|
||||
`"`, """,
|
||||
"'", "'",
|
||||
)
|
||||
|
||||
var htmlUnescaper = NewReplacer(
|
||||
"&", "&",
|
||||
"<", "<",
|
||||
">", ">",
|
||||
""", `"`,
|
||||
"'", "'",
|
||||
)
|
||||
|
||||
// The http package's old HTML escaping function.
|
||||
func oldHTMLEscape(s string) string {
|
||||
s = Replace(s, "&", "&", -1)
|
||||
s = Replace(s, "<", "<", -1)
|
||||
s = Replace(s, ">", ">", -1)
|
||||
s = Replace(s, `"`, """, -1)
|
||||
s = Replace(s, "'", "'", -1)
|
||||
return s
|
||||
}
|
||||
|
||||
var capitalLetters = NewReplacer("a", "A", "b", "B")
|
||||
|
||||
// TestReplacer tests the replacer implementations.
|
||||
func TestReplacer(t *testing.T) {
|
||||
type testCase struct {
|
||||
r *Replacer
|
||||
in, out string
|
||||
}
|
||||
var testCases []testCase
|
||||
|
||||
// str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8.
|
||||
str := func(b byte) string {
|
||||
return string([]byte{b})
|
||||
}
|
||||
var s []string
|
||||
|
||||
// inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00".
|
||||
s = nil
|
||||
for i := 0; i < 256; i++ {
|
||||
s = append(s, str(byte(i)), str(byte(i+1)))
|
||||
}
|
||||
inc := NewReplacer(s...)
|
||||
|
||||
// Test cases with 1-byte old strings, 1-byte new strings.
|
||||
testCases = append(testCases,
|
||||
testCase{capitalLetters, "brad", "BrAd"},
|
||||
testCase{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)},
|
||||
testCase{capitalLetters, "", ""},
|
||||
|
||||
testCase{inc, "brad", "csbe"},
|
||||
testCase{inc, "\x00\xff", "\x01\x00"},
|
||||
testCase{inc, "", ""},
|
||||
|
||||
testCase{NewReplacer("a", "1", "a", "2"), "brad", "br1d"},
|
||||
)
|
||||
|
||||
// repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ...
|
||||
s = nil
|
||||
for i := 0; i < 256; i++ {
|
||||
n := i + 1 - 'a'
|
||||
if n < 1 {
|
||||
n = 1
|
||||
}
|
||||
s = append(s, str(byte(i)), Repeat(str(byte(i)), n))
|
||||
}
|
||||
repeat := NewReplacer(s...)
|
||||
|
||||
// Test cases with 1-byte old strings, variable length new strings.
|
||||
testCases = append(testCases,
|
||||
testCase{htmlEscaper, "No changes", "No changes"},
|
||||
testCase{htmlEscaper, "I <3 escaping & stuff", "I <3 escaping & stuff"},
|
||||
testCase{htmlEscaper, "&&&", "&&&"},
|
||||
testCase{htmlEscaper, "", ""},
|
||||
|
||||
testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
|
||||
testCase{repeat, "abba", "abbbba"},
|
||||
testCase{repeat, "", ""},
|
||||
|
||||
testCase{NewReplacer("a", "11", "a", "22"), "brad", "br11d"},
|
||||
)
|
||||
|
||||
// The remaining test cases have variable length old strings.
|
||||
|
||||
testCases = append(testCases,
|
||||
testCase{htmlUnescaper, "&amp;", "&"},
|
||||
testCase{htmlUnescaper, "<b>HTML's neat</b>", "<b>HTML's neat</b>"},
|
||||
testCase{htmlUnescaper, "", ""},
|
||||
|
||||
testCase{NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
|
||||
|
||||
testCase{NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
|
||||
|
||||
testCase{NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
|
||||
)
|
||||
|
||||
// gen1 has multiple old strings of variable length. There is no
|
||||
// overall non-empty common prefix, but some pairwise common prefixes.
|
||||
gen1 := NewReplacer(
|
||||
"aaa", "3[aaa]",
|
||||
"aa", "2[aa]",
|
||||
"a", "1[a]",
|
||||
"i", "i",
|
||||
"longerst", "most long",
|
||||
"longer", "medium",
|
||||
"long", "short",
|
||||
"xx", "xx",
|
||||
"x", "X",
|
||||
"X", "Y",
|
||||
"Y", "Z",
|
||||
)
|
||||
testCases = append(testCases,
|
||||
testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"},
|
||||
testCase{gen1, "long, longerst, longer", "short, most long, medium"},
|
||||
testCase{gen1, "xxxxx", "xxxxX"},
|
||||
testCase{gen1, "XiX", "YiY"},
|
||||
testCase{gen1, "", ""},
|
||||
)
|
||||
|
||||
// gen2 has multiple old strings with no pairwise common prefix.
|
||||
gen2 := NewReplacer(
|
||||
"roses", "red",
|
||||
"violets", "blue",
|
||||
"sugar", "sweet",
|
||||
)
|
||||
testCases = append(testCases,
|
||||
testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."},
|
||||
testCase{gen2, "", ""},
|
||||
)
|
||||
|
||||
// gen3 has multiple old strings with an overall common prefix.
|
||||
gen3 := NewReplacer(
|
||||
"abracadabra", "poof",
|
||||
"abracadabrakazam", "splat",
|
||||
"abraham", "lincoln",
|
||||
"abrasion", "scrape",
|
||||
"abraham", "isaac",
|
||||
)
|
||||
testCases = append(testCases,
|
||||
testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"},
|
||||
testCase{gen3, "abrasion abracad", "scrape abracad"},
|
||||
testCase{gen3, "abba abram abrasive", "abba abram abrasive"},
|
||||
testCase{gen3, "", ""},
|
||||
)
|
||||
|
||||
// foo{1,2,3,4} have multiple old strings with an overall common prefix
|
||||
// and 1- or 2- byte extensions from the common prefix.
|
||||
foo1 := NewReplacer(
|
||||
"foo1", "A",
|
||||
"foo2", "B",
|
||||
"foo3", "C",
|
||||
)
|
||||
foo2 := NewReplacer(
|
||||
"foo1", "A",
|
||||
"foo2", "B",
|
||||
"foo31", "C",
|
||||
"foo32", "D",
|
||||
)
|
||||
foo3 := NewReplacer(
|
||||
"foo11", "A",
|
||||
"foo12", "B",
|
||||
"foo31", "C",
|
||||
"foo32", "D",
|
||||
)
|
||||
foo4 := NewReplacer(
|
||||
"foo12", "B",
|
||||
"foo32", "D",
|
||||
)
|
||||
testCases = append(testCases,
|
||||
testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"},
|
||||
testCase{foo1, "", ""},
|
||||
|
||||
testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"},
|
||||
testCase{foo2, "", ""},
|
||||
|
||||
testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"},
|
||||
testCase{foo3, "", ""},
|
||||
|
||||
testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"},
|
||||
testCase{foo4, "", ""},
|
||||
)
|
||||
|
||||
// genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things.
|
||||
allBytes := make([]byte, 256)
|
||||
for i := range allBytes {
|
||||
allBytes[i] = byte(i)
|
||||
}
|
||||
allString := string(allBytes)
|
||||
genAll := NewReplacer(
|
||||
allString, "[all]",
|
||||
"\xff", "[ff]",
|
||||
"\x00", "[00]",
|
||||
)
|
||||
testCases = append(testCases,
|
||||
testCase{genAll, allString, "[all]"},
|
||||
testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"},
|
||||
testCase{genAll, "", ""},
|
||||
)
|
||||
|
||||
// Test cases with empty old strings.
|
||||
|
||||
blankToX1 := NewReplacer("", "X")
|
||||
blankToX2 := NewReplacer("", "X", "", "")
|
||||
blankHighPriority := NewReplacer("", "X", "o", "O")
|
||||
blankLowPriority := NewReplacer("o", "O", "", "X")
|
||||
blankNoOp1 := NewReplacer("", "")
|
||||
blankNoOp2 := NewReplacer("", "", "", "A")
|
||||
blankFoo := NewReplacer("", "X", "foobar", "R", "foobaz", "Z")
|
||||
testCases = append(testCases,
|
||||
testCase{blankToX1, "foo", "XfXoXoX"},
|
||||
testCase{blankToX1, "", "X"},
|
||||
|
||||
testCase{blankToX2, "foo", "XfXoXoX"},
|
||||
testCase{blankToX2, "", "X"},
|
||||
|
||||
testCase{blankHighPriority, "oo", "XOXOX"},
|
||||
testCase{blankHighPriority, "ii", "XiXiX"},
|
||||
testCase{blankHighPriority, "oiio", "XOXiXiXOX"},
|
||||
testCase{blankHighPriority, "iooi", "XiXOXOXiX"},
|
||||
testCase{blankHighPriority, "", "X"},
|
||||
|
||||
testCase{blankLowPriority, "oo", "OOX"},
|
||||
testCase{blankLowPriority, "ii", "XiXiX"},
|
||||
testCase{blankLowPriority, "oiio", "OXiXiOX"},
|
||||
testCase{blankLowPriority, "iooi", "XiOOXiX"},
|
||||
testCase{blankLowPriority, "", "X"},
|
||||
|
||||
testCase{blankNoOp1, "foo", "foo"},
|
||||
testCase{blankNoOp1, "", ""},
|
||||
|
||||
testCase{blankNoOp2, "foo", "foo"},
|
||||
testCase{blankNoOp2, "", ""},
|
||||
|
||||
testCase{blankFoo, "foobarfoobaz", "XRXZX"},
|
||||
testCase{blankFoo, "foobar-foobaz", "XRX-XZX"},
|
||||
testCase{blankFoo, "", "X"},
|
||||
)
|
||||
|
||||
// single string replacer
|
||||
|
||||
abcMatcher := NewReplacer("abc", "[match]")
|
||||
|
||||
testCases = append(testCases,
|
||||
testCase{abcMatcher, "", ""},
|
||||
testCase{abcMatcher, "ab", "ab"},
|
||||
testCase{abcMatcher, "abc", "[match]"},
|
||||
testCase{abcMatcher, "abcd", "[match]d"},
|
||||
testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"},
|
||||
)
|
||||
|
||||
// Issue 6659 cases (more single string replacer)
|
||||
|
||||
noHello := NewReplacer("Hello", "")
|
||||
testCases = append(testCases,
|
||||
testCase{noHello, "Hello", ""},
|
||||
testCase{noHello, "Hellox", "x"},
|
||||
testCase{noHello, "xHello", "x"},
|
||||
testCase{noHello, "xHellox", "xx"},
|
||||
)
|
||||
|
||||
// No-arg test cases.
|
||||
|
||||
nop := NewReplacer()
|
||||
testCases = append(testCases,
|
||||
testCase{nop, "abc", "abc"},
|
||||
testCase{nop, "", ""},
|
||||
)
|
||||
|
||||
// Run the test cases.
|
||||
|
||||
for i, tc := range testCases {
|
||||
if s := tc.r.Replace(tc.in); s != tc.out {
|
||||
t.Errorf("%d. Replace(%q) = %q, want %q", i, tc.in, s, tc.out)
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
n, err := tc.r.WriteString(&buf, tc.in)
|
||||
if err != nil {
|
||||
t.Errorf("%d. WriteString: %v", i, err)
|
||||
continue
|
||||
}
|
||||
got := buf.String()
|
||||
if got != tc.out {
|
||||
t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out)
|
||||
continue
|
||||
}
|
||||
if n != len(tc.out) {
|
||||
t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)",
|
||||
i, tc.in, n, len(tc.out), tc.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var algorithmTestCases = []struct {
|
||||
r *Replacer
|
||||
want string
|
||||
}{
|
||||
{capitalLetters, "*strings.byteReplacer"},
|
||||
{htmlEscaper, "*strings.byteStringReplacer"},
|
||||
{NewReplacer("12", "123"), "*strings.singleStringReplacer"},
|
||||
{NewReplacer("1", "12"), "*strings.byteStringReplacer"},
|
||||
{NewReplacer("", "X"), "*strings.genericReplacer"},
|
||||
{NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"},
|
||||
}
|
||||
|
||||
// TestPickAlgorithm tests that NewReplacer picks the correct algorithm.
|
||||
func TestPickAlgorithm(t *testing.T) {
|
||||
for i, tc := range algorithmTestCases {
|
||||
got := fmt.Sprintf("%T", tc.r.Replacer())
|
||||
if got != tc.want {
|
||||
t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type errWriter struct{}
|
||||
|
||||
func (errWriter) Write(p []byte) (n int, err error) {
|
||||
return 0, fmt.Errorf("unwritable")
|
||||
}
|
||||
|
||||
// TestWriteStringError tests that WriteString returns an error
|
||||
// received from the underlying io.Writer.
|
||||
func TestWriteStringError(t *testing.T) {
|
||||
for i, tc := range algorithmTestCases {
|
||||
n, err := tc.r.WriteString(errWriter{}, "abc")
|
||||
if n != 0 || err == nil || err.Error() != "unwritable" {
|
||||
t.Errorf("%d. WriteStringError = %d, %v, want 0, unwritable", i, n, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestGenericTrieBuilding verifies the structure of the generated trie. There
|
||||
// is one node per line, and the key ending with the current line is in the
|
||||
// trie if it ends with a "+".
|
||||
func TestGenericTrieBuilding(t *testing.T) {
|
||||
testCases := []struct{ in, out string }{
|
||||
{"abc;abdef;abdefgh;xx;xy;z", `-
|
||||
a-
|
||||
.b-
|
||||
..c+
|
||||
..d-
|
||||
...ef+
|
||||
.....gh+
|
||||
x-
|
||||
.x+
|
||||
.y+
|
||||
z+
|
||||
`},
|
||||
{"abracadabra;abracadabrakazam;abraham;abrasion", `-
|
||||
a-
|
||||
.bra-
|
||||
....c-
|
||||
.....adabra+
|
||||
...........kazam+
|
||||
....h-
|
||||
.....am+
|
||||
....s-
|
||||
.....ion+
|
||||
`},
|
||||
{"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `-
|
||||
X+
|
||||
Y+
|
||||
a+
|
||||
.a+
|
||||
..a+
|
||||
i+
|
||||
l-
|
||||
.ong+
|
||||
....er+
|
||||
......st+
|
||||
x+
|
||||
.x+
|
||||
`},
|
||||
{"foo;;foo;foo1", `+
|
||||
f-
|
||||
.oo+
|
||||
...1+
|
||||
`},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
keys := Split(tc.in, ";")
|
||||
args := make([]string, len(keys)*2)
|
||||
for i, key := range keys {
|
||||
args[i*2] = key
|
||||
}
|
||||
|
||||
got := NewReplacer(args...).PrintTrie()
|
||||
// Remove tabs from tc.out
|
||||
wantbuf := make([]byte, 0, len(tc.out))
|
||||
for i := 0; i < len(tc.out); i++ {
|
||||
if tc.out[i] != '\t' {
|
||||
wantbuf = append(wantbuf, tc.out[i])
|
||||
}
|
||||
}
|
||||
want := string(wantbuf)
|
||||
|
||||
if got != want {
|
||||
t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkGenericNoMatch(b *testing.B) {
|
||||
str := Repeat("A", 100) + Repeat("B", 100)
|
||||
generic := NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic
|
||||
for i := 0; i < b.N; i++ {
|
||||
generic.Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkGenericMatch1(b *testing.B) {
|
||||
str := Repeat("a", 100) + Repeat("b", 100)
|
||||
generic := NewReplacer("a", "A", "b", "B", "12", "123")
|
||||
for i := 0; i < b.N; i++ {
|
||||
generic.Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkGenericMatch2(b *testing.B) {
|
||||
str := Repeat("It's <b>HTML</b>!", 100)
|
||||
for i := 0; i < b.N; i++ {
|
||||
htmlUnescaper.Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkSingleString(b *testing.B, pattern, text string) {
|
||||
r := NewReplacer(pattern, "[match]")
|
||||
b.SetBytes(int64(len(text)))
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
r.Replace(text)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkSingleMaxSkipping(b *testing.B) {
|
||||
benchmarkSingleString(b, Repeat("b", 25), Repeat("a", 10000))
|
||||
}
|
||||
|
||||
func BenchmarkSingleLongSuffixFail(b *testing.B) {
|
||||
benchmarkSingleString(b, "b"+Repeat("a", 500), Repeat("a", 1002))
|
||||
}
|
||||
|
||||
func BenchmarkSingleMatch(b *testing.B) {
|
||||
benchmarkSingleString(b, "abcdef", Repeat("abcdefghijklmno", 1000))
|
||||
}
|
||||
|
||||
func BenchmarkByteByteNoMatch(b *testing.B) {
|
||||
str := Repeat("A", 100) + Repeat("B", 100)
|
||||
for i := 0; i < b.N; i++ {
|
||||
capitalLetters.Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkByteByteMatch(b *testing.B) {
|
||||
str := Repeat("a", 100) + Repeat("b", 100)
|
||||
for i := 0; i < b.N; i++ {
|
||||
capitalLetters.Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkByteStringMatch(b *testing.B) {
|
||||
str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">"
|
||||
for i := 0; i < b.N; i++ {
|
||||
htmlEscaper.Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkHTMLEscapeNew(b *testing.B) {
|
||||
str := "I <3 to escape HTML & other text too."
|
||||
for i := 0; i < b.N; i++ {
|
||||
htmlEscaper.Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkHTMLEscapeOld(b *testing.B) {
|
||||
str := "I <3 to escape HTML & other text too."
|
||||
for i := 0; i < b.N; i++ {
|
||||
oldHTMLEscape(str)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkByteStringReplacerWriteString(b *testing.B) {
|
||||
str := Repeat("I <3 to escape HTML & other text too.", 100)
|
||||
buf := new(bytes.Buffer)
|
||||
for i := 0; i < b.N; i++ {
|
||||
htmlEscaper.WriteString(buf, str)
|
||||
buf.Reset()
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkByteReplacerWriteString(b *testing.B) {
|
||||
str := Repeat("abcdefghijklmnopqrstuvwxyz", 100)
|
||||
buf := new(bytes.Buffer)
|
||||
for i := 0; i < b.N; i++ {
|
||||
capitalLetters.WriteString(buf, str)
|
||||
buf.Reset()
|
||||
}
|
||||
}
|
||||
|
||||
// BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
|
||||
func BenchmarkByteByteReplaces(b *testing.B) {
|
||||
str := Repeat("a", 100) + Repeat("b", 100)
|
||||
for i := 0; i < b.N; i++ {
|
||||
Replace(Replace(str, "a", "A", -1), "b", "B", -1)
|
||||
}
|
||||
}
|
||||
|
||||
// BenchmarkByteByteMap compares byteByteImpl against Map.
|
||||
func BenchmarkByteByteMap(b *testing.B) {
|
||||
str := Repeat("a", 100) + Repeat("b", 100)
|
||||
fn := func(r rune) rune {
|
||||
switch r {
|
||||
case 'a':
|
||||
return 'A'
|
||||
case 'b':
|
||||
return 'B'
|
||||
}
|
||||
return r
|
||||
}
|
||||
for i := 0; i < b.N; i++ {
|
||||
Map(fn, str)
|
||||
}
|
||||
}
|
||||
|
||||
var mapdata = []struct{ name, data string }{
|
||||
{"ASCII", "a b c d e f g h i j k l m n o p q r s t u v w x y z"},
|
||||
{"Greek", "α β γ δ ε ζ η θ ι κ λ μ ν ξ ο π ρ ς σ τ υ φ χ ψ ω"},
|
||||
}
|
||||
|
||||
func BenchmarkMap(b *testing.B) {
|
||||
mapidentity := func(r rune) rune {
|
||||
return r
|
||||
}
|
||||
|
||||
b.Run("identity", func(b *testing.B) {
|
||||
for _, md := range mapdata {
|
||||
b.Run(md.name, func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
Map(mapidentity, md.data)
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
mapchange := func(r rune) rune {
|
||||
if 'a' <= r && r <= 'z' {
|
||||
return r + 'A' - 'a'
|
||||
}
|
||||
if 'α' <= r && r <= 'ω' {
|
||||
return r + 'Α' - 'α'
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
b.Run("change", func(b *testing.B) {
|
||||
for _, md := range mapdata {
|
||||
b.Run(md.name, func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
Map(mapchange, md.data)
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
117
src/strings/search.go
Normal file
117
src/strings/search.go
Normal file
@@ -0,0 +1,117 @@
|
||||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package strings
|
||||
|
||||
// stringFinder efficiently finds strings in a source text. It's implemented
|
||||
// using the Boyer-Moore string search algorithm:
|
||||
// https://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm
|
||||
// https://www.cs.utexas.edu/~moore/publications/fstrpos.pdf (note: this aged
|
||||
// document uses 1-based indexing)
|
||||
type stringFinder struct {
|
||||
// pattern is the string that we are searching for in the text.
|
||||
pattern string
|
||||
|
||||
// badCharSkip[b] contains the distance between the last byte of pattern
|
||||
// and the rightmost occurrence of b in pattern. If b is not in pattern,
|
||||
// badCharSkip[b] is len(pattern).
|
||||
//
|
||||
// Whenever a mismatch is found with byte b in the text, we can safely
|
||||
// shift the matching frame at least badCharSkip[b] until the next time
|
||||
// the matching char could be in alignment.
|
||||
badCharSkip [256]int
|
||||
|
||||
// goodSuffixSkip[i] defines how far we can shift the matching frame given
|
||||
// that the suffix pattern[i+1:] matches, but the byte pattern[i] does
|
||||
// not. There are two cases to consider:
|
||||
//
|
||||
// 1. The matched suffix occurs elsewhere in pattern (with a different
|
||||
// byte preceding it that we might possibly match). In this case, we can
|
||||
// shift the matching frame to align with the next suffix chunk. For
|
||||
// example, the pattern "mississi" has the suffix "issi" next occurring
|
||||
// (in right-to-left order) at index 1, so goodSuffixSkip[3] ==
|
||||
// shift+len(suffix) == 3+4 == 7.
|
||||
//
|
||||
// 2. If the matched suffix does not occur elsewhere in pattern, then the
|
||||
// matching frame may share part of its prefix with the end of the
|
||||
// matching suffix. In this case, goodSuffixSkip[i] will contain how far
|
||||
// to shift the frame to align this portion of the prefix to the
|
||||
// suffix. For example, in the pattern "abcxxxabc", when the first
|
||||
// mismatch from the back is found to be in position 3, the matching
|
||||
// suffix "xxabc" is not found elsewhere in the pattern. However, its
|
||||
// rightmost "abc" (at position 6) is a prefix of the whole pattern, so
|
||||
// goodSuffixSkip[3] == shift+len(suffix) == 6+5 == 11.
|
||||
goodSuffixSkip []int
|
||||
}
|
||||
|
||||
func makeStringFinder(pattern string) *stringFinder {
|
||||
f := &stringFinder{
|
||||
pattern: pattern,
|
||||
goodSuffixSkip: make([]int, len(pattern)),
|
||||
}
|
||||
// last is the index of the last character in the pattern.
|
||||
last := len(pattern) - 1
|
||||
|
||||
// Build bad character table.
|
||||
// Bytes not in the pattern can skip one pattern's length.
|
||||
for i := range f.badCharSkip {
|
||||
f.badCharSkip[i] = len(pattern)
|
||||
}
|
||||
// The loop condition is < instead of <= so that the last byte does not
|
||||
// have a zero distance to itself. Finding this byte out of place implies
|
||||
// that it is not in the last position.
|
||||
for i := 0; i < last; i++ {
|
||||
f.badCharSkip[pattern[i]] = last - i
|
||||
}
|
||||
|
||||
// Build good suffix table.
|
||||
// First pass: set each value to the next index which starts a prefix of
|
||||
// pattern.
|
||||
lastPrefix := last
|
||||
for i := last; i >= 0; i-- {
|
||||
if HasPrefix(pattern, pattern[i+1:]) {
|
||||
lastPrefix = i + 1
|
||||
}
|
||||
// lastPrefix is the shift, and (last-i) is len(suffix).
|
||||
f.goodSuffixSkip[i] = lastPrefix + last - i
|
||||
}
|
||||
// Second pass: find repeats of pattern's suffix starting from the front.
|
||||
for i := 0; i < last; i++ {
|
||||
lenSuffix := longestCommonSuffix(pattern, pattern[1:i+1])
|
||||
if pattern[i-lenSuffix] != pattern[last-lenSuffix] {
|
||||
// (last-i) is the shift, and lenSuffix is len(suffix).
|
||||
f.goodSuffixSkip[last-lenSuffix] = lenSuffix + last - i
|
||||
}
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func longestCommonSuffix(a, b string) (i int) {
|
||||
for ; i < len(a) && i < len(b); i++ {
|
||||
if a[len(a)-1-i] != b[len(b)-1-i] {
|
||||
break
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// next returns the index in text of the first occurrence of the pattern. If
|
||||
// the pattern is not found, it returns -1.
|
||||
func (f *stringFinder) next(text string) int {
|
||||
i := len(f.pattern) - 1
|
||||
for i < len(text) {
|
||||
// Compare backwards from the end until the first unmatching character.
|
||||
j := len(f.pattern) - 1
|
||||
for j >= 0 && text[i] == f.pattern[j] {
|
||||
i--
|
||||
j--
|
||||
}
|
||||
if j < 0 {
|
||||
return i + 1 // match
|
||||
}
|
||||
i += max(f.badCharSkip[text[i]], f.goodSuffixSkip[j])
|
||||
}
|
||||
return -1
|
||||
}
|
||||
90
src/strings/search_test.go
Normal file
90
src/strings/search_test.go
Normal file
@@ -0,0 +1,90 @@
|
||||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package strings_test
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
. "strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestFinderNext(t *testing.T) {
|
||||
testCases := []struct {
|
||||
pat, text string
|
||||
index int
|
||||
}{
|
||||
{"", "", 0},
|
||||
{"", "abc", 0},
|
||||
{"abc", "", -1},
|
||||
{"abc", "abc", 0},
|
||||
{"d", "abcdefg", 3},
|
||||
{"nan", "banana", 2},
|
||||
{"pan", "anpanman", 2},
|
||||
{"nnaaman", "anpanmanam", -1},
|
||||
{"abcd", "abc", -1},
|
||||
{"abcd", "bcd", -1},
|
||||
{"bcd", "abcd", 1},
|
||||
{"abc", "acca", -1},
|
||||
{"aa", "aaa", 0},
|
||||
{"baa", "aaaaa", -1},
|
||||
{"at that", "which finally halts. at that point", 22},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
got := StringFind(tc.pat, tc.text)
|
||||
want := tc.index
|
||||
if got != want {
|
||||
t.Errorf("stringFind(%q, %q) got %d, want %d\n", tc.pat, tc.text, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFinderCreation(t *testing.T) {
|
||||
testCases := []struct {
|
||||
pattern string
|
||||
bad [256]int
|
||||
suf []int
|
||||
}{
|
||||
{
|
||||
"abc",
|
||||
[256]int{'a': 2, 'b': 1, 'c': 3},
|
||||
[]int{5, 4, 1},
|
||||
},
|
||||
{
|
||||
"mississi",
|
||||
[256]int{'i': 3, 'm': 7, 's': 1},
|
||||
[]int{15, 14, 13, 7, 11, 10, 7, 1},
|
||||
},
|
||||
// From https://www.cs.utexas.edu/~moore/publications/fstrpos.pdf
|
||||
{
|
||||
"abcxxxabc",
|
||||
[256]int{'a': 2, 'b': 1, 'c': 6, 'x': 3},
|
||||
[]int{14, 13, 12, 11, 10, 9, 11, 10, 1},
|
||||
},
|
||||
{
|
||||
"abyxcdeyx",
|
||||
[256]int{'a': 8, 'b': 7, 'c': 4, 'd': 3, 'e': 2, 'y': 1, 'x': 5},
|
||||
[]int{17, 16, 15, 14, 13, 12, 7, 10, 1},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
bad, good := DumpTables(tc.pattern)
|
||||
|
||||
for i, got := range bad {
|
||||
want := tc.bad[i]
|
||||
if want == 0 {
|
||||
want = len(tc.pattern)
|
||||
}
|
||||
if got != want {
|
||||
t.Errorf("boyerMoore(%q) bad['%c']: got %d want %d", tc.pattern, i, got, want)
|
||||
}
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(good, tc.suf) {
|
||||
t.Errorf("boyerMoore(%q) got %v want %v", tc.pattern, good, tc.suf)
|
||||
}
|
||||
}
|
||||
}
|
||||
1246
src/strings/strings.go
Normal file
1246
src/strings/strings.go
Normal file
File diff suppressed because it is too large
Load Diff
2088
src/strings/strings_test.go
Normal file
2088
src/strings/strings_test.go
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user