Initial commit: Go 1.23 release state

2024-09-21 23:49:08 +10:00
commit 17cd57a668
13231 changed files with 3114330 additions and 0 deletions
--- a/test/codegen/README
+++ b/test/codegen/README
@@ -0,0 +1,152 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+The codegen directory contains code generation tests for the gc
+compiler.
+
+
+- Introduction
+
+The test harness compiles Go code inside files in this directory and
+matches the generated assembly (the output of `go tool compile -S`)
+against a set of regexps to be specified in comments that follow a
+special syntax (described below). The test driver is implemented as
+an action within the GOROOT/test test suite, called "asmcheck".
+
+The codegen harness is part of the all.bash test suite, but for
+performance reasons only the codegen tests for the host machine's
+GOARCH are enabled by default, and only on GOOS=linux.
+
+To perform comprehensive tests for all the supported architectures
+(even on a non-Linux system), one can run the following command:
+
+  $ ../../bin/go test cmd/internal/testdir -run='Test/codegen' -all_codegen -v
+
+This is recommended after any change that affect the compiler's code.
+
+The test harness compiles the tests with the same go toolchain that is
+used to run the test. After writing tests for a newly added codegen
+transformation, it can be useful to first run the test harness with a
+toolchain from a released Go version (and verify that the new tests
+fail), and then re-running the tests using the devel toolchain.
+
+
+- Regexps comments syntax
+
+Instructions to match are specified inside plain comments that start
+with an architecture tag, followed by a colon and a quoted Go-style
+regexp to be matched. For example, the following test:
+
+  func Sqrt(x float64) float64 {
+  	   // amd64:"SQRTSD"
+  	   // arm64:"FSQRTD"
+  	   return math.Sqrt(x)
+  }
+
+verifies that math.Sqrt calls are intrinsified to a SQRTSD instruction
+on amd64, and to a FSQRTD instruction on arm64.
+
+It is possible to put multiple architectures checks into the same
+line, as:
+
+  // amd64:"SQRTSD" arm64:"FSQRTD"
+
+although this form should be avoided when doing so would make the
+regexps line excessively long and difficult to read.
+
+Comments that are on their own line will be matched against the first
+subsequent non-comment line. Inline comments are also supported; the
+regexp will be matched against the code found on the same line:
+
+  func Sqrt(x float64) float64 {
+  	   return math.Sqrt(x) // arm:"SQRTD"
+  }
+
+It's possible to specify a comma-separated list of regexps to be
+matched. For example, the following test:
+
+  func TZ8(n uint8) int {
+  	   // amd64:"BSFQ","ORQ\t\\$256"
+  	   return bits.TrailingZeros8(n)
+  }
+
+verifies that the code generated for a bits.TrailingZeros8 call on
+amd64 contains both a "BSFQ" instruction and an "ORQ $256".
+
+Note how the ORQ regex includes a tab char (\t). In the Go assembly
+syntax, operands are separated from opcodes by a tabulation.
+
+Regexps can be quoted using either " or `. Special characters must be
+escaped accordingly. Both of these are accepted, and equivalent:
+
+  // amd64:"ADDQ\t\\$3"
+  // amd64:`ADDQ\t\$3`
+
+and they'll match this assembly line:
+
+  ADDQ	$3
+
+Negative matches can be specified using a - before the quoted regexp.
+For example:
+
+  func MoveSmall() {
+  	   x := [...]byte{1, 2, 3, 4, 5, 6, 7}
+  	   copy(x[1:], x[:]) // arm64:-".*memmove"
+  }
+
+verifies that NO memmove call is present in the assembly generated for
+the copy() line.
+
+
+- Architecture specifiers
+
+There are three different ways to specify on which architecture a test
+should be run:
+
+* Specify only the architecture (eg: "amd64"). This indicates that the
+  check should be run on all the supported architecture variants. For
+  instance, arm checks will be run against all supported GOARM
+  variations (5,6,7).
+* Specify both the architecture and a variant, separated by a slash
+  (eg: "arm/7"). This means that the check will be run only on that
+  specific variant.
+* Specify the operating system, the architecture and the variant,
+  separated by slashes (eg: "plan9/386/sse2", "plan9/amd64/"). This is
+  needed in the rare case that you need to do a codegen test affected
+  by a specific operating system; by default, tests are compiled only
+  targeting linux.
+
+
+- Remarks, and Caveats
+
+-- Write small test functions
+
+As a general guideline, test functions should be small, to avoid
+possible interactions between unrelated lines of code that may be
+introduced, for example, by the compiler's optimization passes.
+
+Any given line of Go code could get assigned more instructions than it
+may appear from reading the source. In particular, matching all MOV
+instructions should be avoided; the compiler may add them for
+unrelated reasons and this may render the test ineffective.
+
+-- Line matching logic
+
+Regexps are always matched from the start of the instructions line.
+This means, for example, that the "MULQ" regexp is equivalent to
+"^MULQ" (^ representing the start of the line), and it will NOT match
+the following assembly line:
+
+  IMULQ	$99, AX
+
+To force a match at any point of the line, ".*MULQ" should be used.
+
+For the same reason, a negative regexp like -"memmove" is not enough
+to make sure that no memmove call is included in the assembly. A
+memmove call looks like this:
+
+  CALL	runtime.memmove(SB)
+
+To make sure that the "memmove" symbol does not appear anywhere in the
+assembly, the negative regexp to be used is -".*memmove".
--- a/test/codegen/addrcalc.go
+++ b/test/codegen/addrcalc.go
@@ -0,0 +1,14 @@
+// asmcheck
+
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// Make sure we use ADDQ instead of LEAQ when we can.
+
+func f(p *[4][2]int, x int) *int {
+	// amd64:"ADDQ",-"LEAQ"
+	return &p[x][0]
+}
--- a/test/codegen/alloc.go
+++ b/test/codegen/alloc.go
@@ -0,0 +1,34 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// These tests check that allocating a 0-size object does not
+// introduce a call to runtime.newobject.
+
+package codegen
+
+func zeroAllocNew1() *struct{} {
+	// 386:-`CALL\truntime\.newobject`
+	// amd64:-`CALL\truntime\.newobject`
+	// arm:-`CALL\truntime\.newobject`
+	// arm64:-`CALL\truntime\.newobject`
+	return new(struct{})
+}
+
+func zeroAllocNew2() *[0]int {
+	// 386:-`CALL\truntime\.newobject`
+	// amd64:-`CALL\truntime\.newobject`
+	// arm:-`CALL\truntime\.newobject`
+	// arm64:-`CALL\truntime\.newobject`
+	return new([0]int)
+}
+
+func zeroAllocSliceLit() []int {
+	// 386:-`CALL\truntime\.newobject`
+	// amd64:-`CALL\truntime\.newobject`
+	// arm:-`CALL\truntime\.newobject`
+	// arm64:-`CALL\truntime\.newobject`
+	return []int{}
+}
--- a/test/codegen/arithmetic.go
+++ b/test/codegen/arithmetic.go
@@ -0,0 +1,631 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// This file contains codegen tests related to arithmetic
+// simplifications and optimizations on integer types.
+// For codegen tests on float types, see floats.go.
+
+// ----------------- //
+//    Addition       //
+// ----------------- //
+
+func AddLargeConst(a uint64, out []uint64) {
+	// ppc64x/power10:"ADD\t[$]4294967296,"
+	// ppc64x/power9:"MOVD\t[$]1", "SLD\t[$]32" "ADD\tR[0-9]*"
+	// ppc64x/power8:"MOVD\t[$]1", "SLD\t[$]32" "ADD\tR[0-9]*"
+	out[0] = a + 0x100000000
+	// ppc64x/power10:"ADD\t[$]-8589934592,"
+	// ppc64x/power9:"MOVD\t[$]-1", "SLD\t[$]33" "ADD\tR[0-9]*"
+	// ppc64x/power8:"MOVD\t[$]-1", "SLD\t[$]33" "ADD\tR[0-9]*"
+	out[1] = a + 0xFFFFFFFE00000000
+	// ppc64x/power10:"ADD\t[$]1234567,"
+	// ppc64x/power9:"ADDIS\t[$]19,", "ADD\t[$]-10617,"
+	// ppc64x/power8:"ADDIS\t[$]19,", "ADD\t[$]-10617,"
+	out[2] = a + 1234567
+	// ppc64x/power10:"ADD\t[$]-1234567,"
+	// ppc64x/power9:"ADDIS\t[$]-19,", "ADD\t[$]10617,"
+	// ppc64x/power8:"ADDIS\t[$]-19,", "ADD\t[$]10617,"
+	out[3] = a - 1234567
+	// ppc64x/power10:"ADD\t[$]2147450879,"
+	// ppc64x/power9:"ADDIS\t[$]32767,", "ADD\t[$]32767,"
+	// ppc64x/power8:"ADDIS\t[$]32767,", "ADD\t[$]32767,"
+	out[4] = a + 0x7FFF7FFF
+	// ppc64x/power10:"ADD\t[$]-2147483647,"
+	// ppc64x/power9:"ADDIS\t[$]-32768,", "ADD\t[$]1,"
+	// ppc64x/power8:"ADDIS\t[$]-32768,", "ADD\t[$]1,"
+	out[5] = a - 2147483647
+	// ppc64x:"ADDIS\t[$]-32768,", ^"ADD\t"
+	out[6] = a - 2147483648
+	// ppc64x:"ADD\t[$]2147450880,", ^"ADDIS\t"
+	out[7] = a + 0x7FFF8000
+	// ppc64x:"ADD\t[$]-32768,", ^"ADDIS\t"
+	out[8] = a - 32768
+	// ppc64x/power10:"ADD\t[$]-32769,"
+	// ppc64x/power9:"ADDIS\t[$]-1,", "ADD\t[$]32767,"
+	// ppc64x/power8:"ADDIS\t[$]-1,", "ADD\t[$]32767,"
+	out[9] = a - 32769
+}
+
+// ----------------- //
+//    Subtraction    //
+// ----------------- //
+
+var ef int
+
+func SubMem(arr []int, b, c, d int) int {
+	// 386:`SUBL\s[A-Z]+,\s8\([A-Z]+\)`
+	// amd64:`SUBQ\s[A-Z]+,\s16\([A-Z]+\)`
+	arr[2] -= b
+	// 386:`SUBL\s[A-Z]+,\s12\([A-Z]+\)`
+	// amd64:`SUBQ\s[A-Z]+,\s24\([A-Z]+\)`
+	arr[3] -= b
+	// 386:`DECL\s16\([A-Z]+\)`
+	arr[4]--
+	// 386:`ADDL\s[$]-20,\s20\([A-Z]+\)`
+	arr[5] -= 20
+	// 386:`SUBL\s\([A-Z]+\)\([A-Z]+\*4\),\s[A-Z]+`
+	ef -= arr[b]
+	// 386:`SUBL\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+\*4\)`
+	arr[c] -= b
+	// 386:`ADDL\s[$]-15,\s\([A-Z]+\)\([A-Z]+\*4\)`
+	arr[d] -= 15
+	// 386:`DECL\s\([A-Z]+\)\([A-Z]+\*4\)`
+	arr[b]--
+	// amd64:`DECQ\s64\([A-Z]+\)`
+	arr[8]--
+	// 386:"SUBL\t4"
+	// amd64:"SUBQ\t8"
+	return arr[0] - arr[1]
+}
+
+func SubFromConst(a int) int {
+	// ppc64x: `SUBC\tR[0-9]+,\s[$]40,\sR`
+	b := 40 - a
+	return b
+}
+
+func SubFromConstNeg(a int) int {
+	// ppc64x: `ADD\t[$]40,\sR[0-9]+,\sR`
+	c := 40 - (-a)
+	return c
+}
+
+func SubSubFromConst(a int) int {
+	// ppc64x: `ADD\t[$]20,\sR[0-9]+,\sR`
+	c := 40 - (20 - a)
+	return c
+}
+
+func AddSubFromConst(a int) int {
+	// ppc64x: `SUBC\tR[0-9]+,\s[$]60,\sR`
+	c := 40 + (20 - a)
+	return c
+}
+
+func NegSubFromConst(a int) int {
+	// ppc64x: `ADD\t[$]-20,\sR[0-9]+,\sR`
+	c := -(20 - a)
+	return c
+}
+
+func NegAddFromConstNeg(a int) int {
+	// ppc64x: `SUBC\tR[0-9]+,\s[$]40,\sR`
+	c := -(-40 + a)
+	return c
+}
+
+func SubSubNegSimplify(a, b int) int {
+	// amd64:"NEGQ"
+	// ppc64x:"NEG"
+	r := (a - b) - a
+	return r
+}
+
+func SubAddSimplify(a, b int) int {
+	// amd64:-"SUBQ",-"ADDQ"
+	// ppc64x:-"SUB",-"ADD"
+	r := a + (b - a)
+	return r
+}
+
+func SubAddSimplify2(a, b, c int) (int, int, int, int, int, int) {
+	// amd64:-"ADDQ"
+	r := (a + b) - (a + c)
+	// amd64:-"ADDQ"
+	r1 := (a + b) - (c + a)
+	// amd64:-"ADDQ"
+	r2 := (b + a) - (a + c)
+	// amd64:-"ADDQ"
+	r3 := (b + a) - (c + a)
+	// amd64:-"SUBQ"
+	r4 := (a - c) + (c + b)
+	// amd64:-"SUBQ"
+	r5 := (a - c) + (b + c)
+	return r, r1, r2, r3, r4, r5
+}
+
+func SubAddNegSimplify(a, b int) int {
+	// amd64:"NEGQ",-"ADDQ",-"SUBQ"
+	// ppc64x:"NEG",-"ADD",-"SUB"
+	r := a - (b + a)
+	return r
+}
+
+func AddAddSubSimplify(a, b, c int) int {
+	// amd64:-"SUBQ"
+	// ppc64x:-"SUB"
+	r := a + (b + (c - a))
+	return r
+}
+
+// -------------------- //
+//    Multiplication    //
+// -------------------- //
+
+func Pow2Muls(n1, n2 int) (int, int) {
+	// amd64:"SHLQ\t[$]5",-"IMULQ"
+	// 386:"SHLL\t[$]5",-"IMULL"
+	// arm:"SLL\t[$]5",-"MUL"
+	// arm64:"LSL\t[$]5",-"MUL"
+	// ppc64x:"SLD\t[$]5",-"MUL"
+	a := n1 * 32
+
+	// amd64:"SHLQ\t[$]6",-"IMULQ"
+	// 386:"SHLL\t[$]6",-"IMULL"
+	// arm:"SLL\t[$]6",-"MUL"
+	// arm64:`NEG\sR[0-9]+<<6,\sR[0-9]+`,-`LSL`,-`MUL`
+	// ppc64x:"SLD\t[$]6","NEG\\sR[0-9]+,\\sR[0-9]+",-"MUL"
+	b := -64 * n2
+
+	return a, b
+}
+
+func Mul_96(n int) int {
+	// amd64:`SHLQ\t[$]5`,`LEAQ\t\(.*\)\(.*\*2\),`,-`IMULQ`
+	// 386:`SHLL\t[$]5`,`LEAL\t\(.*\)\(.*\*2\),`,-`IMULL`
+	// arm64:`LSL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
+	// arm:`SLL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
+	// s390x:`SLD\t[$]5`,`SLD\t[$]6`,-`MULLD`
+	return n * 96
+}
+
+func Mul_n120(n int) int {
+	// s390x:`SLD\t[$]3`,`SLD\t[$]7`,-`MULLD`
+	return n * -120
+}
+
+func MulMemSrc(a []uint32, b []float32) {
+	// 386:`IMULL\s4\([A-Z]+\),\s[A-Z]+`
+	a[0] *= a[1]
+	// 386/sse2:`MULSS\s4\([A-Z]+\),\sX[0-9]+`
+	// amd64:`MULSS\s4\([A-Z]+\),\sX[0-9]+`
+	b[0] *= b[1]
+}
+
+// Multiplications merging tests
+
+func MergeMuls1(n int) int {
+	// amd64:"IMUL3Q\t[$]46"
+	// 386:"IMUL3L\t[$]46"
+	// ppc64x:"MULLD\t[$]46"
+	return 15*n + 31*n // 46n
+}
+
+func MergeMuls2(n int) int {
+	// amd64:"IMUL3Q\t[$]23","(ADDQ\t[$]29)|(LEAQ\t29)"
+	// 386:"IMUL3L\t[$]23","ADDL\t[$]29"
+	// ppc64x/power9:"MADDLD",-"MULLD\t[$]23",-"ADD\t[$]29"
+	// ppc64x/power8:"MULLD\t[$]23","ADD\t[$]29"
+	return 5*n + 7*(n+1) + 11*(n+2) // 23n + 29
+}
+
+func MergeMuls3(a, n int) int {
+	// amd64:"ADDQ\t[$]19",-"IMULQ\t[$]19"
+	// 386:"ADDL\t[$]19",-"IMULL\t[$]19"
+	// ppc64x:"ADD\t[$]19",-"MULLD\t[$]19"
+	return a*n + 19*n // (a+19)n
+}
+
+func MergeMuls4(n int) int {
+	// amd64:"IMUL3Q\t[$]14"
+	// 386:"IMUL3L\t[$]14"
+	// ppc64x:"MULLD\t[$]14"
+	return 23*n - 9*n // 14n
+}
+
+func MergeMuls5(a, n int) int {
+	// amd64:"ADDQ\t[$]-19",-"IMULQ\t[$]19"
+	// 386:"ADDL\t[$]-19",-"IMULL\t[$]19"
+	// ppc64x:"ADD\t[$]-19",-"MULLD\t[$]19"
+	return a*n - 19*n // (a-19)n
+}
+
+// -------------- //
+//    Division    //
+// -------------- //
+
+func DivMemSrc(a []float64) {
+	// 386/sse2:`DIVSD\s8\([A-Z]+\),\sX[0-9]+`
+	// amd64:`DIVSD\s8\([A-Z]+\),\sX[0-9]+`
+	a[0] /= a[1]
+}
+
+func Pow2Divs(n1 uint, n2 int) (uint, int) {
+	// 386:"SHRL\t[$]5",-"DIVL"
+	// amd64:"SHRQ\t[$]5",-"DIVQ"
+	// arm:"SRL\t[$]5",-".*udiv"
+	// arm64:"LSR\t[$]5",-"UDIV"
+	// ppc64x:"SRD"
+	a := n1 / 32 // unsigned
+
+	// amd64:"SARQ\t[$]6",-"IDIVQ"
+	// 386:"SARL\t[$]6",-"IDIVL"
+	// arm:"SRA\t[$]6",-".*udiv"
+	// arm64:"ASR\t[$]6",-"SDIV"
+	// ppc64x:"SRAD"
+	b := n2 / 64 // signed
+
+	return a, b
+}
+
+// Check that constant divisions get turned into MULs
+func ConstDivs(n1 uint, n2 int) (uint, int) {
+	// amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ"
+	// 386:"MOVL\t[$]-252645135","MULL",-"DIVL"
+	// arm64:`MOVD`,`UMULH`,-`DIV`
+	// arm:`MOVW`,`MUL`,-`.*udiv`
+	a := n1 / 17 // unsigned
+
+	// amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ"
+	// 386:"MOVL\t[$]-252645135","IMULL",-"IDIVL"
+	// arm64:`SMULH`,-`DIV`
+	// arm:`MOVW`,`MUL`,-`.*udiv`
+	b := n2 / 17 // signed
+
+	return a, b
+}
+
+func FloatDivs(a []float32) float32 {
+	// amd64:`DIVSS\s8\([A-Z]+\),\sX[0-9]+`
+	// 386/sse2:`DIVSS\s8\([A-Z]+\),\sX[0-9]+`
+	return a[1] / a[2]
+}
+
+func Pow2Mods(n1 uint, n2 int) (uint, int) {
+	// 386:"ANDL\t[$]31",-"DIVL"
+	// amd64:"ANDL\t[$]31",-"DIVQ"
+	// arm:"AND\t[$]31",-".*udiv"
+	// arm64:"AND\t[$]31",-"UDIV"
+	// ppc64x:"RLDICL"
+	a := n1 % 32 // unsigned
+
+	// 386:"SHRL",-"IDIVL"
+	// amd64:"SHRQ",-"IDIVQ"
+	// arm:"SRA",-".*udiv"
+	// arm64:"ASR",-"REM"
+	// ppc64x:"SRAD"
+	b := n2 % 64 // signed
+
+	return a, b
+}
+
+// Check that signed divisibility checks get converted to AND on low bits
+func Pow2DivisibleSigned(n1, n2 int) (bool, bool) {
+	// 386:"TESTL\t[$]63",-"DIVL",-"SHRL"
+	// amd64:"TESTQ\t[$]63",-"DIVQ",-"SHRQ"
+	// arm:"AND\t[$]63",-".*udiv",-"SRA"
+	// arm64:"TST\t[$]63",-"UDIV",-"ASR",-"AND"
+	// ppc64x:"ANDCC",-"RLDICL",-"SRAD",-"CMP"
+	a := n1%64 == 0 // signed divisible
+
+	// 386:"TESTL\t[$]63",-"DIVL",-"SHRL"
+	// amd64:"TESTQ\t[$]63",-"DIVQ",-"SHRQ"
+	// arm:"AND\t[$]63",-".*udiv",-"SRA"
+	// arm64:"TST\t[$]63",-"UDIV",-"ASR",-"AND"
+	// ppc64x:"ANDCC",-"RLDICL",-"SRAD",-"CMP"
+	b := n2%64 != 0 // signed indivisible
+
+	return a, b
+}
+
+// Check that constant modulo divs get turned into MULs
+func ConstMods(n1 uint, n2 int) (uint, int) {
+	// amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ"
+	// 386:"MOVL\t[$]-252645135","MULL",-"DIVL"
+	// arm64:`MOVD`,`UMULH`,-`DIV`
+	// arm:`MOVW`,`MUL`,-`.*udiv`
+	a := n1 % 17 // unsigned
+
+	// amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ"
+	// 386:"MOVL\t[$]-252645135","IMULL",-"IDIVL"
+	// arm64:`SMULH`,-`DIV`
+	// arm:`MOVW`,`MUL`,-`.*udiv`
+	b := n2 % 17 // signed
+
+	return a, b
+}
+
+// Check that divisibility checks x%c==0 are converted to MULs and rotates
+func DivisibleU(n uint) (bool, bool) {
+	// amd64:"MOVQ\t[$]-6148914691236517205","IMULQ","ROLQ\t[$]63",-"DIVQ"
+	// 386:"IMUL3L\t[$]-1431655765","ROLL\t[$]31",-"DIVQ"
+	// arm64:"MOVD\t[$]-6148914691236517205","MOVD\t[$]3074457345618258602","MUL","ROR",-"DIV"
+	// arm:"MUL","CMP\t[$]715827882",-".*udiv"
+	// ppc64x:"MULLD","ROTL\t[$]63"
+	even := n%6 == 0
+
+	// amd64:"MOVQ\t[$]-8737931403336103397","IMULQ",-"ROLQ",-"DIVQ"
+	// 386:"IMUL3L\t[$]678152731",-"ROLL",-"DIVQ"
+	// arm64:"MOVD\t[$]-8737931403336103397","MUL",-"ROR",-"DIV"
+	// arm:"MUL","CMP\t[$]226050910",-".*udiv"
+	// ppc64x:"MULLD",-"ROTL"
+	odd := n%19 == 0
+
+	return even, odd
+}
+
+func Divisible(n int) (bool, bool) {
+	// amd64:"IMULQ","ADD","ROLQ\t[$]63",-"DIVQ"
+	// 386:"IMUL3L\t[$]-1431655765","ADDL\t[$]715827882","ROLL\t[$]31",-"DIVQ"
+	// arm64:"MOVD\t[$]-6148914691236517205","MOVD\t[$]3074457345618258602","MUL","ADD\tR","ROR",-"DIV"
+	// arm:"MUL","ADD\t[$]715827882",-".*udiv"
+	// ppc64x/power8:"MULLD","ADD","ROTL\t[$]63"
+	// ppc64x/power9:"MADDLD","ROTL\t[$]63"
+	even := n%6 == 0
+
+	// amd64:"IMULQ","ADD",-"ROLQ",-"DIVQ"
+	// 386:"IMUL3L\t[$]678152731","ADDL\t[$]113025455",-"ROLL",-"DIVQ"
+	// arm64:"MUL","MOVD\t[$]485440633518672410","ADD",-"ROR",-"DIV"
+	// arm:"MUL","ADD\t[$]113025455",-".*udiv"
+	// ppc64x/power8:"MULLD","ADD",-"ROTL"
+	// ppc64x/power9:"MADDLD",-"ROTL"
+	odd := n%19 == 0
+
+	return even, odd
+}
+
+// Check that fix-up code is not generated for divisions where it has been proven that
+// that the divisor is not -1 or that the dividend is > MinIntNN.
+func NoFix64A(divr int64) (int64, int64) {
+	var d int64 = 42
+	var e int64 = 84
+	if divr > 5 {
+		d /= divr // amd64:-"JMP"
+		e %= divr // amd64:-"JMP"
+		// The following statement is to avoid conflict between the above check
+		// and the normal JMP generated at the end of the block.
+		d += e
+	}
+	return d, e
+}
+
+func NoFix64B(divd int64) (int64, int64) {
+	var d int64
+	var e int64
+	var divr int64 = -1
+	if divd > -9223372036854775808 {
+		d = divd / divr // amd64:-"JMP"
+		e = divd % divr // amd64:-"JMP"
+		d += e
+	}
+	return d, e
+}
+
+func NoFix32A(divr int32) (int32, int32) {
+	var d int32 = 42
+	var e int32 = 84
+	if divr > 5 {
+		// amd64:-"JMP"
+		// 386:-"JMP"
+		d /= divr
+		// amd64:-"JMP"
+		// 386:-"JMP"
+		e %= divr
+		d += e
+	}
+	return d, e
+}
+
+func NoFix32B(divd int32) (int32, int32) {
+	var d int32
+	var e int32
+	var divr int32 = -1
+	if divd > -2147483648 {
+		// amd64:-"JMP"
+		// 386:-"JMP"
+		d = divd / divr
+		// amd64:-"JMP"
+		// 386:-"JMP"
+		e = divd % divr
+		d += e
+	}
+	return d, e
+}
+
+func NoFix16A(divr int16) (int16, int16) {
+	var d int16 = 42
+	var e int16 = 84
+	if divr > 5 {
+		// amd64:-"JMP"
+		// 386:-"JMP"
+		d /= divr
+		// amd64:-"JMP"
+		// 386:-"JMP"
+		e %= divr
+		d += e
+	}
+	return d, e
+}
+
+func NoFix16B(divd int16) (int16, int16) {
+	var d int16
+	var e int16
+	var divr int16 = -1
+	if divd > -32768 {
+		// amd64:-"JMP"
+		// 386:-"JMP"
+		d = divd / divr
+		// amd64:-"JMP"
+		// 386:-"JMP"
+		e = divd % divr
+		d += e
+	}
+	return d, e
+}
+
+// Check that len() and cap() calls divided by powers of two are
+// optimized into shifts and ands
+
+func LenDiv1(a []int) int {
+	// 386:"SHRL\t[$]10"
+	// amd64:"SHRQ\t[$]10"
+	// arm64:"LSR\t[$]10",-"SDIV"
+	// arm:"SRL\t[$]10",-".*udiv"
+	// ppc64x:"SRD"\t[$]10"
+	return len(a) / 1024
+}
+
+func LenDiv2(s string) int {
+	// 386:"SHRL\t[$]11"
+	// amd64:"SHRQ\t[$]11"
+	// arm64:"LSR\t[$]11",-"SDIV"
+	// arm:"SRL\t[$]11",-".*udiv"
+	// ppc64x:"SRD\t[$]11"
+	return len(s) / (4097 >> 1)
+}
+
+func LenMod1(a []int) int {
+	// 386:"ANDL\t[$]1023"
+	// amd64:"ANDL\t[$]1023"
+	// arm64:"AND\t[$]1023",-"SDIV"
+	// arm/6:"AND",-".*udiv"
+	// arm/7:"BFC",-".*udiv",-"AND"
+	// ppc64x:"RLDICL"
+	return len(a) % 1024
+}
+
+func LenMod2(s string) int {
+	// 386:"ANDL\t[$]2047"
+	// amd64:"ANDL\t[$]2047"
+	// arm64:"AND\t[$]2047",-"SDIV"
+	// arm/6:"AND",-".*udiv"
+	// arm/7:"BFC",-".*udiv",-"AND"
+	// ppc64x:"RLDICL"
+	return len(s) % (4097 >> 1)
+}
+
+func CapDiv(a []int) int {
+	// 386:"SHRL\t[$]12"
+	// amd64:"SHRQ\t[$]12"
+	// arm64:"LSR\t[$]12",-"SDIV"
+	// arm:"SRL\t[$]12",-".*udiv"
+	// ppc64x:"SRD\t[$]12"
+	return cap(a) / ((1 << 11) + 2048)
+}
+
+func CapMod(a []int) int {
+	// 386:"ANDL\t[$]4095"
+	// amd64:"ANDL\t[$]4095"
+	// arm64:"AND\t[$]4095",-"SDIV"
+	// arm/6:"AND",-".*udiv"
+	// arm/7:"BFC",-".*udiv",-"AND"
+	// ppc64x:"RLDICL"
+	return cap(a) % ((1 << 11) + 2048)
+}
+
+func AddMul(x int) int {
+	// amd64:"LEAQ\t1"
+	return 2*x + 1
+}
+
+func MULA(a, b, c uint32) (uint32, uint32, uint32) {
+	// arm:`MULA`,-`MUL\s`
+	// arm64:`MADDW`,-`MULW`
+	r0 := a*b + c
+	// arm:`MULA`,-`MUL\s`
+	// arm64:`MADDW`,-`MULW`
+	r1 := c*79 + a
+	// arm:`ADD`,-`MULA`,-`MUL\s`
+	// arm64:`ADD`,-`MADD`,-`MULW`
+	// ppc64x:`ADD`,-`MULLD`
+	r2 := b*64 + c
+	return r0, r1, r2
+}
+
+func MULS(a, b, c uint32) (uint32, uint32, uint32) {
+	// arm/7:`MULS`,-`MUL\s`
+	// arm/6:`SUB`,`MUL\s`,-`MULS`
+	// arm64:`MSUBW`,-`MULW`
+	r0 := c - a*b
+	// arm/7:`MULS`,-`MUL\s`
+	// arm/6:`SUB`,`MUL\s`,-`MULS`
+	// arm64:`MSUBW`,-`MULW`
+	r1 := a - c*79
+	// arm/7:`SUB`,-`MULS`,-`MUL\s`
+	// arm64:`SUB`,-`MSUBW`,-`MULW`
+	// ppc64x:`SUB`,-`MULLD`
+	r2 := c - b*64
+	return r0, r1, r2
+}
+
+func addSpecial(a, b, c uint32) (uint32, uint32, uint32) {
+	// amd64:`INCL`
+	a++
+	// amd64:`DECL`
+	b--
+	// amd64:`SUBL.*-128`
+	c += 128
+	return a, b, c
+}
+
+// Divide -> shift rules usually require fixup for negative inputs.
+// If the input is non-negative, make sure the fixup is eliminated.
+func divInt(v int64) int64 {
+	if v < 0 {
+		return 0
+	}
+	// amd64:-`.*SARQ.*63,`, -".*SHRQ", ".*SARQ.*[$]9,"
+	return v / 512
+}
+
+// The reassociate rules "x - (z + C) -> (x - z) - C" and
+// "(z + C) -x -> C + (z - x)" can optimize the following cases.
+func constantFold1(i0, j0, i1, j1, i2, j2, i3, j3 int) (int, int, int, int) {
+	// arm64:"SUB","ADD\t[$]2"
+	// ppc64x:"SUB","ADD\t[$]2"
+	r0 := (i0 + 3) - (j0 + 1)
+	// arm64:"SUB","SUB\t[$]4"
+	// ppc64x:"SUB","ADD\t[$]-4"
+	r1 := (i1 - 3) - (j1 + 1)
+	// arm64:"SUB","ADD\t[$]4"
+	// ppc64x:"SUB","ADD\t[$]4"
+	r2 := (i2 + 3) - (j2 - 1)
+	// arm64:"SUB","SUB\t[$]2"
+	// ppc64x:"SUB","ADD\t[$]-2"
+	r3 := (i3 - 3) - (j3 - 1)
+	return r0, r1, r2, r3
+}
+
+// The reassociate rules "x - (z + C) -> (x - z) - C" and
+// "(C - z) - x -> C - (z + x)" can optimize the following cases.
+func constantFold2(i0, j0, i1, j1 int) (int, int) {
+	// arm64:"ADD","MOVD\t[$]2","SUB"
+	// ppc64x: `SUBC\tR[0-9]+,\s[$]2,\sR`
+	r0 := (3 - i0) - (j0 + 1)
+	// arm64:"ADD","MOVD\t[$]4","SUB"
+	// ppc64x: `SUBC\tR[0-9]+,\s[$]4,\sR`
+	r1 := (3 - i1) - (j1 - 1)
+	return r0, r1
+}
+
+func constantFold3(i, j int) int {
+	// arm64: "MOVD\t[$]30","MUL",-"ADD",-"LSL"
+	// ppc64x:"MULLD\t[$]30","MULLD"
+	r := (5 * i) * (6 * j)
+	return r
+}
--- a/test/codegen/atomics.go
+++ b/test/codegen/atomics.go
@@ -0,0 +1,27 @@
+// asmcheck
+
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// These tests check that atomic instructions without dynamic checks are
+// generated for architectures that support them
+
+package codegen
+
+import "sync/atomic"
+
+type Counter struct {
+	count int32
+}
+
+func (c *Counter) Increment() {
+	// Check that ARm64 v8.0 has both atomic instruction (LDADDALW) and a dynamic check
+	// (for arm64HasATOMICS), while ARM64 v8.1 has only atomic and no dynamic check.
+	// arm64/v8.0:"LDADDALW"
+	// arm64/v8.1:"LDADDALW"
+	// arm64/v8.0:".*arm64HasATOMICS"
+	// arm64/v8.1:-".*arm64HasATOMICS"
+	atomic.AddInt32(&c.count, 1)
+}
+
--- a/test/codegen/bitfield.go
+++ b/test/codegen/bitfield.go
@@ -0,0 +1,368 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// This file contains codegen tests related to bit field
+// insertion/extraction simplifications/optimizations.
+
+func extr1(x, x2 uint64) uint64 {
+	return x<<7 + x2>>57 // arm64:"EXTR\t[$]57,"
+}
+
+func extr2(x, x2 uint64) uint64 {
+	return x<<7 | x2>>57 // arm64:"EXTR\t[$]57,"
+}
+
+func extr3(x, x2 uint64) uint64 {
+	return x<<7 ^ x2>>57 // arm64:"EXTR\t[$]57,"
+}
+
+func extr4(x, x2 uint32) uint32 {
+	return x<<7 + x2>>25 // arm64:"EXTRW\t[$]25,"
+}
+
+func extr5(x, x2 uint32) uint32 {
+	return x<<7 | x2>>25 // arm64:"EXTRW\t[$]25,"
+}
+
+func extr6(x, x2 uint32) uint32 {
+	return x<<7 ^ x2>>25 // arm64:"EXTRW\t[$]25,"
+}
+
+// check 32-bit shift masking
+func mask32(x uint32) uint32 {
+	return (x << 29) >> 29 // arm64:"AND\t[$]7, R[0-9]+",-"LSR",-"LSL"
+}
+
+// check 16-bit shift masking
+func mask16(x uint16) uint16 {
+	return (x << 14) >> 14 // arm64:"AND\t[$]3, R[0-9]+",-"LSR",-"LSL"
+}
+
+// check 8-bit shift masking
+func mask8(x uint8) uint8 {
+	return (x << 7) >> 7 // arm64:"AND\t[$]1, R[0-9]+",-"LSR",-"LSL"
+}
+
+func maskshift(x uint64) uint64 {
+	// arm64:"AND\t[$]4095, R[0-9]+",-"LSL",-"LSR",-"UBFIZ",-"UBFX"
+	return ((x << 5) & (0xfff << 5)) >> 5
+}
+
+// bitfield ops
+// bfi
+func bfi1(x, y uint64) uint64 {
+	// arm64:"BFI\t[$]4, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND"
+	return ((x & 0xfff) << 4) | (y & 0xffffffffffff000f)
+}
+
+func bfi2(x, y uint64) uint64 {
+	// arm64:"BFI\t[$]12, R[0-9]+, [$]40",-"LSL",-"LSR",-"AND"
+	return (x << 24 >> 12) | (y & 0xfff0000000000fff)
+}
+
+// bfxil
+func bfxil1(x, y uint64) uint64 {
+	// arm64:"BFXIL\t[$]5, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND"
+	return ((x >> 5) & 0xfff) | (y & 0xfffffffffffff000)
+}
+
+func bfxil2(x, y uint64) uint64 {
+	// arm64:"BFXIL\t[$]12, R[0-9]+, [$]40",-"LSL",-"LSR",-"AND"
+	return (x << 12 >> 24) | (y & 0xffffff0000000000)
+}
+
+// sbfiz
+// merge shifts into sbfiz: (x << lc) >> rc && lc > rc.
+func sbfiz1(x int64) int64 {
+	// arm64:"SBFIZ\t[$]1, R[0-9]+, [$]60",-"LSL",-"ASR"
+	return (x << 4) >> 3
+}
+
+// merge shift and sign-extension into sbfiz.
+func sbfiz2(x int32) int64 {
+	return int64(x << 3) // arm64:"SBFIZ\t[$]3, R[0-9]+, [$]29",-"LSL"
+}
+
+func sbfiz3(x int16) int64 {
+	return int64(x << 3) // arm64:"SBFIZ\t[$]3, R[0-9]+, [$]13",-"LSL"
+}
+
+func sbfiz4(x int8) int64 {
+	return int64(x << 3) // arm64:"SBFIZ\t[$]3, R[0-9]+, [$]5",-"LSL"
+}
+
+// sbfiz combinations.
+// merge shift with sbfiz into sbfiz.
+func sbfiz5(x int32) int32 {
+	// arm64:"SBFIZ\t[$]1, R[0-9]+, [$]28",-"LSL",-"ASR"
+	return (x << 4) >> 3
+}
+
+func sbfiz6(x int16) int64 {
+	return int64(x+1) << 3 // arm64:"SBFIZ\t[$]3, R[0-9]+, [$]16",-"LSL"
+}
+
+func sbfiz7(x int8) int64 {
+	return int64(x+1) << 62 // arm64:"SBFIZ\t[$]62, R[0-9]+, [$]2",-"LSL"
+}
+
+func sbfiz8(x int32) int64 {
+	return int64(x+1) << 40 // arm64:"SBFIZ\t[$]40, R[0-9]+, [$]24",-"LSL"
+}
+
+// sbfx
+// merge shifts into sbfx: (x << lc) >> rc && lc <= rc.
+func sbfx1(x int64) int64 {
+	return (x << 3) >> 4 // arm64:"SBFX\t[$]1, R[0-9]+, [$]60",-"LSL",-"ASR"
+}
+
+func sbfx2(x int64) int64 {
+	return (x << 60) >> 60 // arm64:"SBFX\t[$]0, R[0-9]+, [$]4",-"LSL",-"ASR"
+}
+
+// merge shift and sign-extension into sbfx.
+func sbfx3(x int32) int64 {
+	return int64(x) >> 3 // arm64:"SBFX\t[$]3, R[0-9]+, [$]29",-"ASR"
+}
+
+func sbfx4(x int16) int64 {
+	return int64(x) >> 3 // arm64:"SBFX\t[$]3, R[0-9]+, [$]13",-"ASR"
+}
+
+func sbfx5(x int8) int64 {
+	return int64(x) >> 3 // arm64:"SBFX\t[$]3, R[0-9]+, [$]5",-"ASR"
+}
+
+func sbfx6(x int32) int64 {
+	return int64(x >> 30) // arm64:"SBFX\t[$]30, R[0-9]+, [$]2"
+}
+
+func sbfx7(x int16) int64 {
+	return int64(x >> 10) // arm64:"SBFX\t[$]10, R[0-9]+, [$]6"
+}
+
+func sbfx8(x int8) int64 {
+	return int64(x >> 5) // arm64:"SBFX\t[$]5, R[0-9]+, [$]3"
+}
+
+// sbfx combinations.
+// merge shifts with sbfiz into sbfx.
+func sbfx9(x int32) int32 {
+	return (x << 3) >> 4 // arm64:"SBFX\t[$]1, R[0-9]+, [$]28",-"LSL",-"ASR"
+}
+
+// merge sbfx and sign-extension into sbfx.
+func sbfx10(x int32) int64 {
+	c := x + 5
+	return int64(c >> 20) // arm64"SBFX\t[$]20, R[0-9]+, [$]12",-"MOVW\tR[0-9]+, R[0-9]+"
+}
+
+// ubfiz
+// merge shifts into ubfiz: (x<<lc)>>rc && lc>rc
+func ubfiz1(x uint64) uint64 {
+	// arm64:"UBFIZ\t[$]1, R[0-9]+, [$]60",-"LSL",-"LSR"
+	// s390x:"RISBGZ\t[$]3, [$]62, [$]1, ",-"SLD",-"SRD"
+	return (x << 4) >> 3
+}
+
+// merge shift and zero-extension into ubfiz.
+func ubfiz2(x uint32) uint64 {
+	return uint64(x+1) << 3 // arm64:"UBFIZ\t[$]3, R[0-9]+, [$]32",-"LSL"
+}
+
+func ubfiz3(x uint16) uint64 {
+	return uint64(x+1) << 3 // arm64:"UBFIZ\t[$]3, R[0-9]+, [$]16",-"LSL"
+}
+
+func ubfiz4(x uint8) uint64 {
+	return uint64(x+1) << 3 // arm64:"UBFIZ\t[$]3, R[0-9]+, [$]8",-"LSL"
+}
+
+func ubfiz5(x uint8) uint64 {
+	return uint64(x) << 60 // arm64:"UBFIZ\t[$]60, R[0-9]+, [$]4",-"LSL"
+}
+
+func ubfiz6(x uint32) uint64 {
+	return uint64(x << 30) // arm64:"UBFIZ\t[$]30, R[0-9]+, [$]2",
+}
+
+func ubfiz7(x uint16) uint64 {
+	return uint64(x << 10) // arm64:"UBFIZ\t[$]10, R[0-9]+, [$]6",
+}
+
+func ubfiz8(x uint8) uint64 {
+	return uint64(x << 7) // arm64:"UBFIZ\t[$]7, R[0-9]+, [$]1",
+}
+
+// merge ANDconst into ubfiz.
+func ubfiz9(x uint64) uint64 {
+	// arm64:"UBFIZ\t[$]3, R[0-9]+, [$]12",-"LSL",-"AND"
+	// s390x:"RISBGZ\t[$]49, [$]60, [$]3,",-"SLD",-"AND"
+	return (x & 0xfff) << 3
+}
+
+func ubfiz10(x uint64) uint64 {
+	// arm64:"UBFIZ\t[$]4, R[0-9]+, [$]12",-"LSL",-"AND"
+	// s390x:"RISBGZ\t[$]48, [$]59, [$]4,",-"SLD",-"AND"
+	return (x << 4) & 0xfff0
+}
+
+// ubfiz combinations
+func ubfiz11(x uint32) uint32 {
+	// arm64:"UBFIZ\t[$]1, R[0-9]+, [$]28",-"LSL",-"LSR"
+	return (x << 4) >> 3
+}
+
+func ubfiz12(x uint64) uint64 {
+	// arm64:"UBFIZ\t[$]1, R[0-9]+, [$]20",-"LSL",-"LSR"
+	// s390x:"RISBGZ\t[$]43, [$]62, [$]1, ",-"SLD",-"SRD",-"AND"
+	return ((x & 0xfffff) << 4) >> 3
+}
+
+func ubfiz13(x uint64) uint64 {
+	// arm64:"UBFIZ\t[$]5, R[0-9]+, [$]13",-"LSL",-"LSR",-"AND"
+	return ((x << 3) & 0xffff) << 2
+}
+
+func ubfiz14(x uint64) uint64 {
+	// arm64:"UBFIZ\t[$]7, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND"
+	// s390x:"RISBGZ\t[$]45, [$]56, [$]7, ",-"SLD",-"SRD",-"AND"
+	return ((x << 5) & (0xfff << 5)) << 2
+}
+
+// ubfx
+// merge shifts into ubfx: (x<<lc)>>rc && lc<rc
+func ubfx1(x uint64) uint64 {
+	// arm64:"UBFX\t[$]1, R[0-9]+, [$]62",-"LSL",-"LSR"
+	// s390x:"RISBGZ\t[$]2, [$]63, [$]63,",-"SLD",-"SRD"
+	return (x << 1) >> 2
+}
+
+// merge shift and zero-extension into ubfx.
+func ubfx2(x uint32) uint64 {
+	return uint64(x >> 15) // arm64:"UBFX\t[$]15, R[0-9]+, [$]17",-"LSR"
+}
+
+func ubfx3(x uint16) uint64 {
+	return uint64(x >> 9) // arm64:"UBFX\t[$]9, R[0-9]+, [$]7",-"LSR"
+}
+
+func ubfx4(x uint8) uint64 {
+	return uint64(x >> 3) // arm64:"UBFX\t[$]3, R[0-9]+, [$]5",-"LSR"
+}
+
+func ubfx5(x uint32) uint64 {
+	return uint64(x) >> 30 // arm64:"UBFX\t[$]30, R[0-9]+, [$]2"
+}
+
+func ubfx6(x uint16) uint64 {
+	return uint64(x) >> 10 // arm64:"UBFX\t[$]10, R[0-9]+, [$]6"
+}
+
+func ubfx7(x uint8) uint64 {
+	return uint64(x) >> 3 // arm64:"UBFX\t[$]3, R[0-9]+, [$]5"
+}
+
+// merge ANDconst into ubfx.
+func ubfx8(x uint64) uint64 {
+	// arm64:"UBFX\t[$]25, R[0-9]+, [$]10",-"LSR",-"AND"
+	// s390x:"RISBGZ\t[$]54, [$]63, [$]39, ",-"SRD",-"AND"
+	return (x >> 25) & 1023
+}
+
+func ubfx9(x uint64) uint64 {
+	// arm64:"UBFX\t[$]4, R[0-9]+, [$]8",-"LSR",-"AND"
+	// s390x:"RISBGZ\t[$]56, [$]63, [$]60, ",-"SRD",-"AND"
+	return (x & 0x0ff0) >> 4
+}
+
+// ubfx combinations.
+func ubfx10(x uint32) uint32 {
+	// arm64:"UBFX\t[$]1, R[0-9]+, [$]30",-"LSL",-"LSR"
+	return (x << 1) >> 2
+}
+
+func ubfx11(x uint64) uint64 {
+	// arm64:"UBFX\t[$]1, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND"
+	// s390x:"RISBGZ\t[$]52, [$]63, [$]63,",-"SLD",-"SRD",-"AND"
+	return ((x << 1) >> 2) & 0xfff
+}
+
+func ubfx12(x uint64) uint64 {
+	// arm64:"UBFX\t[$]4, R[0-9]+, [$]11",-"LSL",-"LSR",-"AND"
+	// s390x:"RISBGZ\t[$]53, [$]63, [$]60, ",-"SLD",-"SRD",-"AND"
+	return ((x >> 3) & 0xfff) >> 1
+}
+
+func ubfx13(x uint64) uint64 {
+	// arm64:"UBFX\t[$]5, R[0-9]+, [$]56",-"LSL",-"LSR"
+	// s390x:"RISBGZ\t[$]8, [$]63, [$]59, ",-"SLD",-"SRD"
+	return ((x >> 2) << 5) >> 8
+}
+
+func ubfx14(x uint64) uint64 {
+	// arm64:"UBFX\t[$]1, R[0-9]+, [$]19",-"LSL",-"LSR"
+	// s390x:"RISBGZ\t[$]45, [$]63, [$]63, ",-"SLD",-"SRD",-"AND"
+	return ((x & 0xfffff) << 3) >> 4
+}
+
+// merge ubfx and zero-extension into ubfx.
+func ubfx15(x uint64) bool {
+	midr := x + 10
+	part_num := uint16((midr >> 4) & 0xfff)
+	if part_num == 0xd0c { // arm64:"UBFX\t[$]4, R[0-9]+, [$]12",-"MOVHU\tR[0-9]+, R[0-9]+"
+		return true
+	}
+	return false
+}
+
+// merge ANDconst and ubfx into ubfx
+func ubfx16(x uint64) uint64 {
+	// arm64:"UBFX\t[$]4, R[0-9]+, [$]6",-"AND\t[$]63"
+	return ((x >> 3) & 0xfff) >> 1 & 0x3f
+}
+
+// Check that we don't emit comparisons for constant shifts.
+//
+//go:nosplit
+func shift_no_cmp(x int) int {
+	// arm64:`LSL\t[$]17`,-`CMP`
+	// mips64:`SLLV\t[$]17`,-`SGT`
+	return x << 17
+}
+
+func rev16(c uint64) (uint64, uint64, uint64) {
+	// arm64:`REV16`,-`AND`,-`LSR`,-`AND`,-`ORR\tR[0-9]+<<8`
+	b1 := ((c & 0xff00ff00ff00ff00) >> 8) | ((c & 0x00ff00ff00ff00ff) << 8)
+	// arm64:-`ADD\tR[0-9]+<<8`
+	b2 := ((c & 0xff00ff00ff00ff00) >> 8) + ((c & 0x00ff00ff00ff00ff) << 8)
+	// arm64:-`EOR\tR[0-9]+<<8`
+	b3 := ((c & 0xff00ff00ff00ff00) >> 8) ^ ((c & 0x00ff00ff00ff00ff) << 8)
+	return b1, b2, b3
+}
+
+func rev16w(c uint32) (uint32, uint32, uint32) {
+	// arm64:`REV16W`,-`AND`,-`UBFX`,-`AND`,-`ORR\tR[0-9]+<<8`
+	b1 := ((c & 0xff00ff00) >> 8) | ((c & 0x00ff00ff) << 8)
+	// arm64:-`ADD\tR[0-9]+<<8`
+	b2 := ((c & 0xff00ff00) >> 8) + ((c & 0x00ff00ff) << 8)
+	// arm64:-`EOR\tR[0-9]+<<8`
+	b3 := ((c & 0xff00ff00) >> 8) ^ ((c & 0x00ff00ff) << 8)
+	return b1, b2, b3
+}
+
+func shift(x uint32, y uint16, z uint8) uint64 {
+	// arm64:-`MOVWU`,-`LSR\t[$]32`
+	a := uint64(x) >> 32
+	// arm64:-`MOVHU
+	b := uint64(y) >> 16
+	// arm64:-`MOVBU`
+	c := uint64(z) >> 8
+	// arm64:`MOVD\tZR`,-`ADD\tR[0-9]+>>16`,-`ADD\tR[0-9]+>>8`,
+	return a + b + c
+}
--- a/test/codegen/bits.go
+++ b/test/codegen/bits.go
@@ -0,0 +1,423 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import "math/bits"
+
+/************************************
+ * 64-bit instructions
+ ************************************/
+
+func bitcheck64_constleft(a uint64) (n int) {
+	// amd64:"BTQ\t[$]63"
+	if a&(1<<63) != 0 {
+		return 1
+	}
+	// amd64:"BTQ\t[$]60"
+	if a&(1<<60) != 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]0"
+	if a&(1<<0) != 0 {
+		return 1
+	}
+	return 0
+}
+
+func bitcheck64_constright(a [8]uint64) (n int) {
+	// amd64:"BTQ\t[$]63"
+	if (a[0]>>63)&1 != 0 {
+		return 1
+	}
+	// amd64:"BTQ\t[$]63"
+	if a[1]>>63 != 0 {
+		return 1
+	}
+	// amd64:"BTQ\t[$]63"
+	if a[2]>>63 == 0 {
+		return 1
+	}
+	// amd64:"BTQ\t[$]60"
+	if (a[3]>>60)&1 == 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]1"
+	if (a[4]>>1)&1 == 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]0"
+	if (a[5]>>0)&1 == 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]7"
+	if (a[6]>>5)&4 == 0 {
+		return 1
+	}
+	return 0
+}
+
+func bitcheck64_var(a, b uint64) (n int) {
+	// amd64:"BTQ"
+	if a&(1<<(b&63)) != 0 {
+		return 1
+	}
+	// amd64:"BTQ",-"BT.\t[$]0"
+	if (b>>(a&63))&1 != 0 {
+		return 1
+	}
+	return 0
+}
+
+func bitcheck64_mask(a uint64) (n int) {
+	// amd64:"BTQ\t[$]63"
+	if a&0x8000000000000000 != 0 {
+		return 1
+	}
+	// amd64:"BTQ\t[$]59"
+	if a&0x800000000000000 != 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]0"
+	if a&0x1 != 0 {
+		return 1
+	}
+	return 0
+}
+
+func biton64(a, b uint64) (n uint64) {
+	// amd64:"BTSQ"
+	n += b | (1 << (a & 63))
+
+	// amd64:"BTSQ\t[$]63"
+	n += a | (1 << 63)
+
+	// amd64:"BTSQ\t[$]60"
+	n += a | (1 << 60)
+
+	// amd64:"ORQ\t[$]1"
+	n += a | (1 << 0)
+
+	return n
+}
+
+func bitoff64(a, b uint64) (n uint64) {
+	// amd64:"BTRQ"
+	n += b &^ (1 << (a & 63))
+
+	// amd64:"BTRQ\t[$]63"
+	n += a &^ (1 << 63)
+
+	// amd64:"BTRQ\t[$]60"
+	n += a &^ (1 << 60)
+
+	// amd64:"ANDQ\t[$]-2"
+	n += a &^ (1 << 0)
+
+	return n
+}
+
+func bitcompl64(a, b uint64) (n uint64) {
+	// amd64:"BTCQ"
+	n += b ^ (1 << (a & 63))
+
+	// amd64:"BTCQ\t[$]63"
+	n += a ^ (1 << 63)
+
+	// amd64:"BTCQ\t[$]60"
+	n += a ^ (1 << 60)
+
+	// amd64:"XORQ\t[$]1"
+	n += a ^ (1 << 0)
+
+	return n
+}
+
+/************************************
+ * 32-bit instructions
+ ************************************/
+
+func bitcheck32_constleft(a uint32) (n int) {
+	// amd64:"BTL\t[$]31"
+	if a&(1<<31) != 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]28"
+	if a&(1<<28) != 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]0"
+	if a&(1<<0) != 0 {
+		return 1
+	}
+	return 0
+}
+
+func bitcheck32_constright(a [8]uint32) (n int) {
+	// amd64:"BTL\t[$]31"
+	if (a[0]>>31)&1 != 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]31"
+	if a[1]>>31 != 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]31"
+	if a[2]>>31 == 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]28"
+	if (a[3]>>28)&1 == 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]1"
+	if (a[4]>>1)&1 == 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]0"
+	if (a[5]>>0)&1 == 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]7"
+	if (a[6]>>5)&4 == 0 {
+		return 1
+	}
+	return 0
+}
+
+func bitcheck32_var(a, b uint32) (n int) {
+	// amd64:"BTL"
+	if a&(1<<(b&31)) != 0 {
+		return 1
+	}
+	// amd64:"BTL",-"BT.\t[$]0"
+	if (b>>(a&31))&1 != 0 {
+		return 1
+	}
+	return 0
+}
+
+func bitcheck32_mask(a uint32) (n int) {
+	// amd64:"BTL\t[$]31"
+	if a&0x80000000 != 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]27"
+	if a&0x8000000 != 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]0"
+	if a&0x1 != 0 {
+		return 1
+	}
+	return 0
+}
+
+func biton32(a, b uint32) (n uint32) {
+	// amd64:"BTSL"
+	n += b | (1 << (a & 31))
+
+	// amd64:"ORL\t[$]-2147483648"
+	n += a | (1 << 31)
+
+	// amd64:"ORL\t[$]268435456"
+	n += a | (1 << 28)
+
+	// amd64:"ORL\t[$]1"
+	n += a | (1 << 0)
+
+	return n
+}
+
+func bitoff32(a, b uint32) (n uint32) {
+	// amd64:"BTRL"
+	n += b &^ (1 << (a & 31))
+
+	// amd64:"ANDL\t[$]2147483647"
+	n += a &^ (1 << 31)
+
+	// amd64:"ANDL\t[$]-268435457"
+	n += a &^ (1 << 28)
+
+	// amd64:"ANDL\t[$]-2"
+	n += a &^ (1 << 0)
+
+	return n
+}
+
+func bitcompl32(a, b uint32) (n uint32) {
+	// amd64:"BTCL"
+	n += b ^ (1 << (a & 31))
+
+	// amd64:"XORL\t[$]-2147483648"
+	n += a ^ (1 << 31)
+
+	// amd64:"XORL\t[$]268435456"
+	n += a ^ (1 << 28)
+
+	// amd64:"XORL\t[$]1"
+	n += a ^ (1 << 0)
+
+	return n
+}
+
+// check direct operation on memory with constant and shifted constant sources
+func bitOpOnMem(a []uint32, b, c, d uint32) {
+	// amd64:`ANDL\s[$]200,\s\([A-Z][A-Z0-9]+\)`
+	a[0] &= 200
+	// amd64:`ORL\s[$]220,\s4\([A-Z][A-Z0-9]+\)`
+	a[1] |= 220
+	// amd64:`XORL\s[$]240,\s8\([A-Z][A-Z0-9]+\)`
+	a[2] ^= 240
+}
+
+func bitcheckMostNegative(b uint8) bool {
+	// amd64:"TESTB"
+	return b&0x80 == 0x80
+}
+
+// Check AND masking on arm64 (Issue #19857)
+
+func and_mask_1(a uint64) uint64 {
+	// arm64:`AND\t`
+	return a & ((1 << 63) - 1)
+}
+
+func and_mask_2(a uint64) uint64 {
+	// arm64:`AND\t`
+	return a & (1 << 63)
+}
+
+func and_mask_3(a, b uint32) (uint32, uint32) {
+	// arm/7:`BIC`,-`AND`
+	a &= 0xffffaaaa
+	// arm/7:`BFC`,-`AND`,-`BIC`
+	b &= 0xffc003ff
+	return a, b
+}
+
+// Check generation of arm64 BIC/EON/ORN instructions
+
+func op_bic(x, y uint32) uint32 {
+	// arm64:`BIC\t`,-`AND`
+	return x &^ y
+}
+
+func op_eon(x, y, z uint32, a []uint32, n, m uint64) uint64 {
+	// arm64:`EON\t`,-`EOR`,-`MVN`
+	a[0] = x ^ (y ^ 0xffffffff)
+
+	// arm64:`EON\t`,-`EOR`,-`MVN`
+	a[1] = ^(y ^ z)
+
+	// arm64:`EON\t`,-`XOR`
+	a[2] = x ^ ^z
+
+	// arm64:`EON\t`,-`EOR`,-`MVN`
+	return n ^ (m ^ 0xffffffffffffffff)
+}
+
+func op_orn(x, y uint32) uint32 {
+	// arm64:`ORN\t`,-`ORR`
+	return x | ^y
+}
+
+// check bitsets
+func bitSetPowerOf2Test(x int) bool {
+	// amd64:"BTL\t[$]3"
+	return x&8 == 8
+}
+
+func bitSetTest(x int) bool {
+	// amd64:"ANDL\t[$]9, AX"
+	// amd64:"CMPQ\tAX, [$]9"
+	return x&9 == 9
+}
+
+// mask contiguous one bits
+func cont1Mask64U(x uint64) uint64 {
+	// s390x:"RISBGZ\t[$]16, [$]47, [$]0,"
+	return x & 0x0000ffffffff0000
+}
+
+// mask contiguous zero bits
+func cont0Mask64U(x uint64) uint64 {
+	// s390x:"RISBGZ\t[$]48, [$]15, [$]0,"
+	return x & 0xffff00000000ffff
+}
+
+func issue44228a(a []int64, i int) bool {
+	// amd64: "BTQ", -"SHL"
+	return a[i>>6]&(1<<(i&63)) != 0
+}
+func issue44228b(a []int32, i int) bool {
+	// amd64: "BTL", -"SHL"
+	return a[i>>5]&(1<<(i&31)) != 0
+}
+
+func issue48467(x, y uint64) uint64 {
+	// arm64: -"NEG"
+	d, borrow := bits.Sub64(x, y, 0)
+	return x - d&(-borrow)
+}
+
+func foldConst(x, y uint64) uint64 {
+	// arm64: "ADDS\t[$]7",-"MOVD\t[$]7"
+	d, b := bits.Add64(x, 7, 0)
+	return b & d
+}
+
+func foldConstOutOfRange(a uint64) uint64 {
+	// arm64: "MOVD\t[$]19088744",-"ADD\t[$]19088744"
+	return a + 0x1234568
+}
+
+// Verify sign-extended values are not zero-extended under a bit mask (#61297)
+func signextendAndMask8to64(a int8) (s, z uint64) {
+	// ppc64x: "MOVB", "ANDCC\t[$]1015,"
+	s = uint64(a) & 0x3F7
+	// ppc64x: -"MOVB", "ANDCC\t[$]247,"
+	z = uint64(uint8(a)) & 0x3F7
+	return
+}
+
+// Verify zero-extended values are not sign-extended under a bit mask (#61297)
+func zeroextendAndMask8to64(a int8, b int16) (x, y uint64) {
+	// ppc64x: -"MOVB\t", -"ANDCC", "MOVBZ"
+	x = uint64(a) & 0xFF
+	// ppc64x: -"MOVH\t", -"ANDCC", "MOVHZ"
+	y = uint64(b) & 0xFFFF
+	return
+}
+
+// Verify rotate and mask instructions, and further simplified instructions for small types
+func bitRotateAndMask(io64 [8]uint64, io32 [4]uint32, io16 [4]uint16, io8 [4]uint8) {
+	// ppc64x: "RLDICR\t[$]0, R[0-9]*, [$]47, R"
+	io64[0] = io64[0] & 0xFFFFFFFFFFFF0000
+	// ppc64x: "RLDICL\t[$]0, R[0-9]*, [$]16, R"
+	io64[1] = io64[1] & 0x0000FFFFFFFFFFFF
+	// ppc64x: -"SRD", -"AND", "RLDICL\t[$]60, R[0-9]*, [$]16, R"
+	io64[2] = (io64[2] >> 4) & 0x0000FFFFFFFFFFFF
+	// ppc64x: -"SRD", -"AND", "RLDICL\t[$]36, R[0-9]*, [$]28, R"
+	io64[3] = (io64[3] >> 28) & 0x0000FFFFFFFFFFFF
+
+	// ppc64x: "MOVWZ", "RLWNM\t[$]1, R[0-9]*, [$]28, [$]3, R"
+	io64[4] = uint64(bits.RotateLeft32(io32[0], 1) & 0xF000000F)
+
+	// ppc64x: "RLWNM\t[$]0, R[0-9]*, [$]4, [$]19, R"
+	io32[0] = io32[0] & 0x0FFFF000
+	// ppc64x: "RLWNM\t[$]0, R[0-9]*, [$]20, [$]3, R"
+	io32[1] = io32[1] & 0xF0000FFF
+	// ppc64x: -"RLWNM", MOVD, AND
+	io32[2] = io32[2] & 0xFFFF0002
+
+	var bigc uint32 = 0x12345678
+	// ppc64x: "ANDCC\t[$]22136"
+	io16[0] = io16[0] & uint16(bigc)
+
+	// ppc64x: "ANDCC\t[$]120"
+	io8[0] = io8[0] & uint8(bigc)
+}
--- a/test/codegen/bmi.go
+++ b/test/codegen/bmi.go
@@ -0,0 +1,209 @@
+// asmcheck
+
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func andn64(x, y int64) int64 {
+	// amd64/v3:"ANDNQ"
+	return x &^ y
+}
+
+func andn32(x, y int32) int32 {
+	// amd64/v3:"ANDNL"
+	return x &^ y
+}
+
+func blsi64(x int64) int64 {
+	// amd64/v3:"BLSIQ"
+	return x & -x
+}
+
+func blsi32(x int32) int32 {
+	// amd64/v3:"BLSIL"
+	return x & -x
+}
+
+func blsmsk64(x int64) int64 {
+	// amd64/v3:"BLSMSKQ"
+	return x ^ (x - 1)
+}
+
+func blsmsk32(x int32) int32 {
+	// amd64/v3:"BLSMSKL"
+	return x ^ (x - 1)
+}
+
+func blsr64(x int64) int64 {
+	// amd64/v3:"BLSRQ"
+	return x & (x - 1)
+}
+
+func blsr32(x int32) int32 {
+	// amd64/v3:"BLSRL"
+	return x & (x - 1)
+}
+
+func isPowerOfTwo64(x int64) bool {
+	// amd64/v3:"BLSRQ",-"TESTQ",-"CALL"
+	return blsr64(x) == 0
+}
+
+func isPowerOfTwo32(x int32) bool {
+	// amd64/v3:"BLSRL",-"TESTL",-"CALL"
+	return blsr32(x) == 0
+}
+
+func isPowerOfTwoSelect64(x, a, b int64) int64 {
+	var r int64
+	// amd64/v3:"BLSRQ",-"TESTQ",-"CALL"
+	if isPowerOfTwo64(x) {
+		r = a
+	} else {
+		r = b
+	}
+	// amd64/v3:"CMOVQEQ",-"TESTQ",-"CALL"
+	return r * 2 // force return blocks joining
+}
+
+func isPowerOfTwoSelect32(x, a, b int32) int32 {
+	var r int32
+	// amd64/v3:"BLSRL",-"TESTL",-"CALL"
+	if isPowerOfTwo32(x) {
+		r = a
+	} else {
+		r = b
+	}
+	// amd64/v3:"CMOVLEQ",-"TESTL",-"CALL"
+	return r * 2 // force return blocks joining
+}
+
+func isPowerOfTwoBranch64(x int64, a func(bool), b func(string)) {
+	// amd64/v3:"BLSRQ",-"TESTQ",-"CALL"
+	if isPowerOfTwo64(x) {
+		a(true)
+	} else {
+		b("false")
+	}
+}
+
+func isPowerOfTwoBranch32(x int32, a func(bool), b func(string)) {
+	// amd64/v3:"BLSRL",-"TESTL",-"CALL"
+	if isPowerOfTwo32(x) {
+		a(true)
+	} else {
+		b("false")
+	}
+}
+
+func isNotPowerOfTwo64(x int64) bool {
+	// amd64/v3:"BLSRQ",-"TESTQ",-"CALL"
+	return blsr64(x) != 0
+}
+
+func isNotPowerOfTwo32(x int32) bool {
+	// amd64/v3:"BLSRL",-"TESTL",-"CALL"
+	return blsr32(x) != 0
+}
+
+func isNotPowerOfTwoSelect64(x, a, b int64) int64 {
+	var r int64
+	// amd64/v3:"BLSRQ",-"TESTQ",-"CALL"
+	if isNotPowerOfTwo64(x) {
+		r = a
+	} else {
+		r = b
+	}
+	// amd64/v3:"CMOVQNE",-"TESTQ",-"CALL"
+	return r * 2 // force return blocks joining
+}
+
+func isNotPowerOfTwoSelect32(x, a, b int32) int32 {
+	var r int32
+	// amd64/v3:"BLSRL",-"TESTL",-"CALL"
+	if isNotPowerOfTwo32(x) {
+		r = a
+	} else {
+		r = b
+	}
+	// amd64/v3:"CMOVLNE",-"TESTL",-"CALL"
+	return r * 2 // force return blocks joining
+}
+
+func isNotPowerOfTwoBranch64(x int64, a func(bool), b func(string)) {
+	// amd64/v3:"BLSRQ",-"TESTQ",-"CALL"
+	if isNotPowerOfTwo64(x) {
+		a(true)
+	} else {
+		b("false")
+	}
+}
+
+func isNotPowerOfTwoBranch32(x int32, a func(bool), b func(string)) {
+	// amd64/v3:"BLSRL",-"TESTL",-"CALL"
+	if isNotPowerOfTwo32(x) {
+		a(true)
+	} else {
+		b("false")
+	}
+}
+
+func sarx64(x, y int64) int64 {
+	// amd64/v3:"SARXQ"
+	return x >> y
+}
+
+func sarx32(x, y int32) int32 {
+	// amd64/v3:"SARXL"
+	return x >> y
+}
+
+func sarx64_load(x []int64, i int) int64 {
+	// amd64/v3: `SARXQ\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+	s := x[i] >> (i & 63)
+	// amd64/v3: `SARXQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+	s = x[i+1] >> (s & 63)
+	return s
+}
+
+func sarx32_load(x []int32, i int) int32 {
+	// amd64/v3: `SARXL\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+	s := x[i] >> (i & 63)
+	// amd64/v3: `SARXL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+	s = x[i+1] >> (s & 63)
+	return s
+}
+
+func shlrx64(x, y uint64) uint64 {
+	// amd64/v3:"SHRXQ"
+	s := x >> y
+	// amd64/v3:"SHLXQ"
+	s = s << y
+	return s
+}
+
+func shlrx32(x, y uint32) uint32 {
+	// amd64/v3:"SHRXL"
+	s := x >> y
+	// amd64/v3:"SHLXL"
+	s = s << y
+	return s
+}
+
+func shlrx64_load(x []uint64, i int, s uint64) uint64 {
+	// amd64/v3: `SHRXQ\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+	s = x[i] >> i
+	// amd64/v3: `SHLXQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+	s = x[i+1] << s
+	return s
+}
+
+func shlrx32_load(x []uint32, i int, s uint32) uint32 {
+	// amd64/v3: `SHRXL\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+	s = x[i] >> i
+	// amd64/v3: `SHLXL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+	s = x[i+1] << s
+	return s
+}
--- a/test/codegen/bool.go
+++ b/test/codegen/bool.go
@@ -0,0 +1,276 @@
+// asmcheck
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import (
+	"math/bits"
+)
+
+// This file contains codegen tests related to boolean simplifications/optimizations.
+
+func convertNeq0B(x uint8, c bool) bool {
+	// amd64:"ANDL\t[$]1",-"SETNE"
+	// ppc64x:"RLDICL",-"CMPW",-"ISEL"
+	b := x&1 != 0
+	return c && b
+}
+
+func convertNeq0W(x uint16, c bool) bool {
+	// amd64:"ANDL\t[$]1",-"SETNE"
+	// ppc64x:"RLDICL",-"CMPW",-"ISEL"
+	b := x&1 != 0
+	return c && b
+}
+
+func convertNeq0L(x uint32, c bool) bool {
+	// amd64:"ANDL\t[$]1",-"SETB"
+	// ppc64x:"RLDICL",-"CMPW",-"ISEL"
+	b := x&1 != 0
+	return c && b
+}
+
+func convertNeq0Q(x uint64, c bool) bool {
+	// amd64:"ANDL\t[$]1",-"SETB"
+	// ppc64x:"RLDICL",-"CMP",-"ISEL"
+	b := x&1 != 0
+	return c && b
+}
+
+func convertNeqBool32(x uint32) bool {
+	// ppc64x:"RLDICL",-"CMPW",-"ISEL"
+	return x&1 != 0
+}
+
+func convertEqBool32(x uint32) bool {
+	// ppc64x:"RLDICL",-"CMPW","XOR",-"ISEL"
+	return x&1 == 0
+}
+
+func convertNeqBool64(x uint64) bool {
+	// ppc64x:"RLDICL",-"CMP",-"ISEL"
+	return x&1 != 0
+}
+
+func convertEqBool64(x uint64) bool {
+	// ppc64x:"RLDICL","XOR",-"CMP",-"ISEL"
+	return x&1 == 0
+}
+
+func TestSetEq64(x uint64, y uint64) bool {
+	// ppc64x/power10:"SETBC\tCR0EQ",-"ISEL"
+	// ppc64x/power9:"CMP","ISEL",-"SETBC\tCR0EQ"
+	// ppc64x/power8:"CMP","ISEL",-"SETBC\tCR0EQ"
+	b := x == y
+	return b
+}
+func TestSetNeq64(x uint64, y uint64) bool {
+	// ppc64x/power10:"SETBCR\tCR0EQ",-"ISEL"
+	// ppc64x/power9:"CMP","ISEL",-"SETBCR\tCR0EQ"
+	// ppc64x/power8:"CMP","ISEL",-"SETBCR\tCR0EQ"
+	b := x != y
+	return b
+}
+func TestSetLt64(x uint64, y uint64) bool {
+	// ppc64x/power10:"SETBC\tCR0GT",-"ISEL"
+	// ppc64x/power9:"CMP","ISEL",-"SETBC\tCR0GT"
+	// ppc64x/power8:"CMP","ISEL",-"SETBC\tCR0GT"
+	b := x < y
+	return b
+}
+func TestSetLe64(x uint64, y uint64) bool {
+	// ppc64x/power10:"SETBCR\tCR0LT",-"ISEL"
+	// ppc64x/power9:"CMP","ISEL",-"SETBCR\tCR0LT"
+	// ppc64x/power8:"CMP","ISEL",-"SETBCR\tCR0LT"
+	b := x <= y
+	return b
+}
+func TestSetGt64(x uint64, y uint64) bool {
+	// ppc64x/power10:"SETBC\tCR0LT",-"ISEL"
+	// ppc64x/power9:"CMP","ISEL",-"SETBC\tCR0LT"
+	// ppc64x/power8:"CMP","ISEL",-"SETBC\tCR0LT"
+	b := x > y
+	return b
+}
+func TestSetGe64(x uint64, y uint64) bool {
+	// ppc64x/power10:"SETBCR\tCR0GT",-"ISEL"
+	// ppc64x/power9:"CMP","ISEL",-"SETBCR\tCR0GT"
+	// ppc64x/power8:"CMP","ISEL",-"SETBCR\tCR0GT"
+	b := x >= y
+	return b
+}
+func TestSetLtFp64(x float64, y float64) bool {
+	// ppc64x/power10:"SETBC\tCR0LT",-"ISEL"
+	// ppc64x/power9:"FCMP","ISEL",-"SETBC\tCR0LT"
+	// ppc64x/power8:"FCMP","ISEL",-"SETBC\tCR0LT"
+	b := x < y
+	return b
+}
+func TestSetLeFp64(x float64, y float64) bool {
+	// ppc64x/power10:"SETBC\tCR0LT","SETBC\tCR0EQ","OR",-"ISEL",-"ISEL"
+	// ppc64x/power9:"ISEL","ISEL",-"SETBC\tCR0LT",-"SETBC\tCR0EQ","OR"
+	// ppc64x/power8:"ISEL","ISEL",-"SETBC\tCR0LT",-"SETBC\tCR0EQ","OR"
+	b := x <= y
+	return b
+}
+func TestSetGtFp64(x float64, y float64) bool {
+	// ppc64x/power10:"SETBC\tCR0LT",-"ISEL"
+	// ppc64x/power9:"FCMP","ISEL",-"SETBC\tCR0LT"
+	// ppc64x/power8:"FCMP","ISEL",-"SETBC\tCR0LT"
+	b := x > y
+	return b
+}
+func TestSetGeFp64(x float64, y float64) bool {
+	// ppc64x/power10:"SETBC\tCR0LT","SETBC\tCR0EQ","OR",-"ISEL",-"ISEL"
+	// ppc64x/power9:"ISEL","ISEL",-"SETBC\tCR0LT",-"SETBC\tCR0EQ","OR"
+	// ppc64x/power8:"ISEL","ISEL",-"SETBC\tCR0LT",-"SETBC\tCR0EQ","OR"
+	b := x >= y
+	return b
+}
+func TestSetInvEq64(x uint64, y uint64) bool {
+	// ppc64x/power10:"SETBCR\tCR0EQ",-"ISEL"
+	// ppc64x/power9:"CMP","ISEL",-"SETBCR\tCR0EQ"
+	// ppc64x/power8:"CMP","ISEL",-"SETBCR\tCR0EQ"
+	b := !(x == y)
+	return b
+}
+func TestSetInvNeq64(x uint64, y uint64) bool {
+	// ppc64x/power10:"SETBC\tCR0EQ",-"ISEL"
+	// ppc64x/power9:"CMP","ISEL",-"SETBC\tCR0EQ"
+	// ppc64x/power8:"CMP","ISEL",-"SETBC\tCR0EQ"
+	b := !(x != y)
+	return b
+}
+func TestSetInvLt64(x uint64, y uint64) bool {
+	// ppc64x/power10:"SETBCR\tCR0GT",-"ISEL"
+	// ppc64x/power9:"CMP","ISEL",-"SETBCR\tCR0GT"
+	// ppc64x/power8:"CMP","ISEL",-"SETBCR\tCR0GT"
+	b := !(x < y)
+	return b
+}
+func TestSetInvLe64(x uint64, y uint64) bool {
+	// ppc64x/power10:"SETBC\tCR0LT",-"ISEL"
+	// ppc64x/power9:"CMP","ISEL",-"SETBC\tCR0LT"
+	// ppc64x/power8:"CMP","ISEL",-"SETBC\tCR0LT"
+	b := !(x <= y)
+	return b
+}
+func TestSetInvGt64(x uint64, y uint64) bool {
+	// ppc64x/power10:"SETBCR\tCR0LT",-"ISEL"
+	// ppc64x/power9:"CMP","ISEL",-"SETBCR\tCR0LT"
+	// ppc64x/power8:"CMP","ISEL",-"SETBCR\tCR0LT"
+	b := !(x > y)
+	return b
+}
+func TestSetInvGe64(x uint64, y uint64) bool {
+	// ppc64x/power10:"SETBC\tCR0GT",-"ISEL"
+	// ppc64x/power9:"CMP","ISEL",-"SETBC\tCR0GT"
+	// ppc64x/power8:"CMP","ISEL",-"SETBC\tCR0GT"
+	b := !(x >= y)
+	return b
+}
+
+func TestSetInvEqFp64(x float64, y float64) bool {
+	// ppc64x/power10:"SETBCR\tCR0EQ",-"ISEL"
+	// ppc64x/power9:"FCMP","ISEL",-"SETBCR\tCR0EQ"
+	// ppc64x/power8:"FCMP","ISEL",-"SETBCR\tCR0EQ"
+	b := !(x == y)
+	return b
+}
+func TestSetInvNeqFp64(x float64, y float64) bool {
+	// ppc64x/power10:"SETBC\tCR0EQ",-"ISEL"
+	// ppc64x/power9:"FCMP","ISEL",-"SETBC\tCR0EQ"
+	// ppc64x/power8:"FCMP","ISEL",-"SETBC\tCR0EQ"
+	b := !(x != y)
+	return b
+}
+func TestSetInvLtFp64(x float64, y float64) bool {
+	// ppc64x/power10:"SETBCR\tCR0LT",-"ISEL"
+	// ppc64x/power9:"FCMP","ISEL",-"SETBCR\tCR0LT"
+	// ppc64x/power8:"FCMP","ISEL",-"SETBCR\tCR0LT"
+	b := !(x < y)
+	return b
+}
+func TestSetInvLeFp64(x float64, y float64) bool {
+	// ppc64x/power10:"SETBC\tCR0LT",-"ISEL"
+	// ppc64x/power9:"FCMP","ISEL",-"SETBC\tCR0LT"
+	// ppc64x/power8:"FCMP","ISEL",-"SETBC\tCR0LT"
+	b := !(x <= y)
+	return b
+}
+func TestSetInvGtFp64(x float64, y float64) bool {
+	// ppc64x/power10:"SETBCR\tCR0LT",-"ISEL"
+	// ppc64x/power9:"FCMP","ISEL",-"SETBCR\tCR0LT"
+	// ppc64x/power8:"FCMP","ISEL",-"SETBCR\tCR0LT"
+	b := !(x > y)
+	return b
+}
+func TestSetInvGeFp64(x float64, y float64) bool {
+	// ppc64x/power10:"SETBC\tCR0LT",-"ISEL"
+	// ppc64x/power9:"FCMP","ISEL",-"SETBC\tCR0LT"
+	// ppc64x/power8:"FCMP","ISEL",-"SETBC\tCR0LT"
+	b := !(x >= y)
+	return b
+}
+func TestLogicalCompareZero(x *[64]uint64) {
+	// ppc64x:"ANDCC",^"AND"
+	b := x[0]&3
+	if b!=0 {
+		x[0] = b
+	}
+	// ppc64x:"ANDCC",^"AND"
+	b = x[1]&x[2]
+	if b!=0 {
+		x[1] = b
+	}
+	// ppc64x:"ANDNCC",^"ANDN"
+	b = x[1]&^x[2]
+	if b!=0 {
+		x[1] = b
+	}
+	// ppc64x:"ORCC",^"OR"
+	b = x[3]|x[4]
+	if b!=0 {
+		x[3] = b
+	}
+	// ppc64x:"SUBCC",^"SUB"
+	b = x[5]-x[6]
+	if b!=0 {
+		x[5] = b
+	}
+	// ppc64x:"NORCC",^"NOR"
+	b = ^(x[5]|x[6])
+	if b!=0 {
+		x[5] = b
+	}
+	// ppc64x:"XORCC",^"XOR"
+	b = x[7]^x[8]
+	if b!=0 {
+		x[7] = b
+	}
+	// ppc64x:"ADDCC",^"ADD"
+	b = x[9]+x[10]
+	if b!=0 {
+		x[9] = b
+	}
+	// ppc64x:"NEGCC",^"NEG"
+	b = -x[11]
+	if b!=0 {
+		x[11] = b
+	}
+	// ppc64x:"CNTLZDCC",^"CNTLZD"
+	b = uint64(bits.LeadingZeros64(x[12]))
+	if b!=0 {
+		x[12] = b
+	}
+
+	// ppc64x:"ADDCCC\t[$]4,"
+	c := int64(x[12]) + 4
+	if c <= 0 {
+		x[12] = uint64(c)
+	}
+
+}
--- a/test/codegen/clobberdead.go
+++ b/test/codegen/clobberdead.go
@@ -0,0 +1,35 @@
+// asmcheck -gcflags=-clobberdead
+
+//go:build amd64 || arm64
+
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+type T [2]*int // contain pointer, not SSA-able (so locals are not registerized)
+
+var p1, p2, p3 T
+
+func F() {
+	// 3735936685 is 0xdeaddead. On ARM64 R27 is REGTMP.
+	// clobber x, y at entry. not clobber z (stack object).
+	// amd64:`MOVL\t\$3735936685, command-line-arguments\.x`, `MOVL\t\$3735936685, command-line-arguments\.y`, -`MOVL\t\$3735936685, command-line-arguments\.z`
+	// arm64:`MOVW\tR27, command-line-arguments\.x`, `MOVW\tR27, command-line-arguments\.y`, -`MOVW\tR27, command-line-arguments\.z`
+	x, y, z := p1, p2, p3
+	addrTaken(&z)
+	// x is dead at the call (the value of x is loaded before the CALL), y is not
+	// amd64:`MOVL\t\$3735936685, command-line-arguments\.x`, -`MOVL\t\$3735936685, command-line-arguments\.y`
+	// arm64:`MOVW\tR27, command-line-arguments\.x`, -`MOVW\tR27, command-line-arguments\.y`
+	use(x)
+	// amd64:`MOVL\t\$3735936685, command-line-arguments\.x`, `MOVL\t\$3735936685, command-line-arguments\.y`
+	// arm64:`MOVW\tR27, command-line-arguments\.x`, `MOVW\tR27, command-line-arguments\.y`
+	use(y)
+}
+
+//go:noinline
+func use(T) {}
+
+//go:noinline
+func addrTaken(*T) {}
--- a/test/codegen/clobberdeadreg.go
+++ b/test/codegen/clobberdeadreg.go
@@ -0,0 +1,33 @@
+// asmcheck -gcflags=-clobberdeadreg
+
+//go:build amd64
+
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+type S struct {
+	a, b, c, d, e, f int
+}
+
+func F(a, b, c int, d S) {
+	// -2401018187971961171 is 0xdeaddeaddeaddead
+	// amd64:`MOVQ\t\$-2401018187971961171, AX`, `MOVQ\t\$-2401018187971961171, BX`, `MOVQ\t\$-2401018187971961171, CX`
+	// amd64:`MOVQ\t\$-2401018187971961171, DX`, `MOVQ\t\$-2401018187971961171, SI`, `MOVQ\t\$-2401018187971961171, DI`
+	// amd64:`MOVQ\t\$-2401018187971961171, R8`, `MOVQ\t\$-2401018187971961171, R9`, `MOVQ\t\$-2401018187971961171, R10`
+	// amd64:`MOVQ\t\$-2401018187971961171, R11`, `MOVQ\t\$-2401018187971961171, R12`, `MOVQ\t\$-2401018187971961171, R13`
+	// amd64:-`MOVQ\t\$-2401018187971961171, BP` // frame pointer is not clobbered
+	StackArgsCall([10]int{a, b, c})
+	// amd64:`MOVQ\t\$-2401018187971961171, R12`, `MOVQ\t\$-2401018187971961171, R13`, `MOVQ\t\$-2401018187971961171, DX`
+	// amd64:-`MOVQ\t\$-2401018187971961171, AX`, -`MOVQ\t\$-2401018187971961171, R11` // register args are not clobbered
+	RegArgsCall(a, b, c, d)
+}
+
+//go:noinline
+func StackArgsCall([10]int) {}
+
+//go:noinline
+//go:registerparams
+func RegArgsCall(int, int, int, S) {}
--- a/test/codegen/compare_and_branch.go
+++ b/test/codegen/compare_and_branch.go
@@ -0,0 +1,244 @@
+// asmcheck
+
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+//go:noinline
+func dummy() {}
+
+// Signed 64-bit compare-and-branch.
+func si64(x, y chan int64) {
+	// s390x:"CGRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
+	for <-x < <-y {
+		dummy()
+	}
+
+	// s390x:"CL?GRJ\t[$]8, R[0-9]+, R[0-9]+, "
+	for <-x == <-y {
+		dummy()
+	}
+}
+
+// Signed 64-bit compare-and-branch with 8-bit immediate.
+func si64x8(doNotOptimize int64) {
+	// take in doNotOptimize as an argument to avoid the loops being rewritten to count down
+	// s390x:"CGIJ\t[$]12, R[0-9]+, [$]127, "
+	for i := doNotOptimize; i < 128; i++ {
+		dummy()
+	}
+
+	// s390x:"CGIJ\t[$]10, R[0-9]+, [$]-128, "
+	for i := doNotOptimize; i > -129; i-- {
+		dummy()
+	}
+
+	// s390x:"CGIJ\t[$]2, R[0-9]+, [$]127, "
+	for i := doNotOptimize; i >= 128; i++ {
+		dummy()
+	}
+
+	// s390x:"CGIJ\t[$]4, R[0-9]+, [$]-128, "
+	for i := doNotOptimize; i <= -129; i-- {
+		dummy()
+	}
+}
+
+// Unsigned 64-bit compare-and-branch.
+func ui64(x, y chan uint64) {
+	// s390x:"CLGRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
+	for <-x > <-y {
+		dummy()
+	}
+
+	// s390x:"CL?GRJ\t[$]6, R[0-9]+, R[0-9]+, "
+	for <-x != <-y {
+		dummy()
+	}
+}
+
+// Unsigned 64-bit comparison with 8-bit immediate.
+func ui64x8() {
+	// s390x:"CLGIJ\t[$]4, R[0-9]+, [$]128, "
+	for i := uint64(0); i < 128; i++ {
+		dummy()
+	}
+
+	// s390x:"CLGIJ\t[$]12, R[0-9]+, [$]255, "
+	for i := uint64(0); i < 256; i++ {
+		dummy()
+	}
+
+	// s390x:"CLGIJ\t[$]2, R[0-9]+, [$]255, "
+	for i := uint64(257); i >= 256; i-- {
+		dummy()
+	}
+
+	// s390x:"CLGIJ\t[$]2, R[0-9]+, [$]0, "
+	for i := uint64(1024); i > 0; i-- {
+		dummy()
+	}
+}
+
+// Signed 32-bit compare-and-branch.
+func si32(x, y chan int32) {
+	// s390x:"CRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
+	for <-x < <-y {
+		dummy()
+	}
+
+	// s390x:"CL?RJ\t[$]8, R[0-9]+, R[0-9]+, "
+	for <-x == <-y {
+		dummy()
+	}
+}
+
+// Signed 32-bit compare-and-branch with 8-bit immediate.
+func si32x8(doNotOptimize int32) {
+	// take in doNotOptimize as an argument to avoid the loops being rewritten to count down
+	// s390x:"CIJ\t[$]12, R[0-9]+, [$]127, "
+	for i := doNotOptimize; i < 128; i++ {
+		dummy()
+	}
+
+	// s390x:"CIJ\t[$]10, R[0-9]+, [$]-128, "
+	for i := doNotOptimize; i > -129; i-- {
+		dummy()
+	}
+
+	// s390x:"CIJ\t[$]2, R[0-9]+, [$]127, "
+	for i := doNotOptimize; i >= 128; i++ {
+		dummy()
+	}
+
+	// s390x:"CIJ\t[$]4, R[0-9]+, [$]-128, "
+	for i := doNotOptimize; i <= -129; i-- {
+		dummy()
+	}
+}
+
+// Unsigned 32-bit compare-and-branch.
+func ui32(x, y chan uint32) {
+	// s390x:"CLRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
+	for <-x > <-y {
+		dummy()
+	}
+
+	// s390x:"CL?RJ\t[$]6, R[0-9]+, R[0-9]+, "
+	for <-x != <-y {
+		dummy()
+	}
+}
+
+// Unsigned 32-bit comparison with 8-bit immediate.
+func ui32x8() {
+	// s390x:"CLIJ\t[$]4, R[0-9]+, [$]128, "
+	for i := uint32(0); i < 128; i++ {
+		dummy()
+	}
+
+	// s390x:"CLIJ\t[$]12, R[0-9]+, [$]255, "
+	for i := uint32(0); i < 256; i++ {
+		dummy()
+	}
+
+	// s390x:"CLIJ\t[$]2, R[0-9]+, [$]255, "
+	for i := uint32(257); i >= 256; i-- {
+		dummy()
+	}
+
+	// s390x:"CLIJ\t[$]2, R[0-9]+, [$]0, "
+	for i := uint32(1024); i > 0; i-- {
+		dummy()
+	}
+}
+
+// Signed 64-bit comparison with unsigned 8-bit immediate.
+func si64xu8(x chan int64) {
+	// s390x:"CLGIJ\t[$]8, R[0-9]+, [$]128, "
+	for <-x == 128 {
+		dummy()
+	}
+
+	// s390x:"CLGIJ\t[$]6, R[0-9]+, [$]255, "
+	for <-x != 255 {
+		dummy()
+	}
+}
+
+// Signed 32-bit comparison with unsigned 8-bit immediate.
+func si32xu8(x chan int32) {
+	// s390x:"CLIJ\t[$]8, R[0-9]+, [$]255, "
+	for <-x == 255 {
+		dummy()
+	}
+
+	// s390x:"CLIJ\t[$]6, R[0-9]+, [$]128, "
+	for <-x != 128 {
+		dummy()
+	}
+}
+
+// Unsigned 64-bit comparison with signed 8-bit immediate.
+func ui64xu8(x chan uint64) {
+	// s390x:"CGIJ\t[$]8, R[0-9]+, [$]-1, "
+	for <-x == ^uint64(0) {
+		dummy()
+	}
+
+	// s390x:"CGIJ\t[$]6, R[0-9]+, [$]-128, "
+	for <-x != ^uint64(127) {
+		dummy()
+	}
+}
+
+// Unsigned 32-bit comparison with signed 8-bit immediate.
+func ui32xu8(x chan uint32) {
+	// s390x:"CIJ\t[$]8, R[0-9]+, [$]-128, "
+	for <-x == ^uint32(127) {
+		dummy()
+	}
+
+	// s390x:"CIJ\t[$]6, R[0-9]+, [$]-1, "
+	for <-x != ^uint32(0) {
+		dummy()
+	}
+}
+
+// Signed 64-bit comparison with 1/-1 to comparison with 0.
+func si64x0(x chan int64) {
+	// riscv64:"BGTZ"
+	for <-x >= 1 {
+		dummy()
+	}
+
+	// riscv64:"BLEZ"
+	for <-x < 1 {
+		dummy()
+	}
+
+	// riscv64:"BLTZ"
+	for <-x <= -1 {
+		dummy()
+	}
+
+	// riscv64:"BGEZ"
+	for <-x > -1 {
+		dummy()
+	}
+}
+
+// Unsigned 64-bit comparison with 1 to comparison with 0.
+func ui64x0(x chan uint64) {
+	// riscv64:"BNEZ"
+	for <-x >= 1 {
+		dummy()
+	}
+
+	// riscv64:"BEQZ"
+	for <-x < 1 {
+		dummy()
+	}
+}
--- a/test/codegen/comparisons.go
+++ b/test/codegen/comparisons.go
@@ -0,0 +1,827 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import (
+	"cmp"
+	"unsafe"
+)
+
+// This file contains code generation tests related to the comparison
+// operators.
+
+// -------------- //
+//    Equality    //
+// -------------- //
+
+// Check that compare to constant string use 2/4/8 byte compares
+
+func CompareString1(s string) bool {
+	// amd64:`CMPW\t\(.*\), [$]`
+	// arm64:`MOVHU\t\(.*\), [R]`,`MOVD\t[$]`,`CMPW\tR`
+	// ppc64le:`MOVHZ\t\(.*\), [R]`,`CMPW\t.*, [$]`
+	// s390x:`MOVHBR\t\(.*\), [R]`,`CMPW\t.*, [$]`
+	return s == "xx"
+}
+
+func CompareString2(s string) bool {
+	// amd64:`CMPL\t\(.*\), [$]`
+	// arm64:`MOVWU\t\(.*\), [R]`,`CMPW\t.*, [R]`
+	// ppc64le:`MOVWZ\t\(.*\), [R]`,`CMPW\t.*, [R]`
+	// s390x:`MOVWBR\t\(.*\), [R]`,`CMPW\t.*, [$]`
+	return s == "xxxx"
+}
+
+func CompareString3(s string) bool {
+	// amd64:`CMPQ\t\(.*\), [A-Z]`
+	// arm64:-`CMPW\t`
+	// ppc64x:-`CMPW\t`
+	// s390x:-`CMPW\t`
+	return s == "xxxxxxxx"
+}
+
+// Check that arrays compare use 2/4/8 byte compares
+
+func CompareArray1(a, b [2]byte) bool {
+	// amd64:`CMPW\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+	// arm64:-`MOVBU\t`
+	// ppc64le:-`MOVBZ\t`
+	// s390x:-`MOVBZ\t`
+	return a == b
+}
+
+func CompareArray2(a, b [3]uint16) bool {
+	// amd64:`CMPL\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+	// amd64:`CMPW\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+	return a == b
+}
+
+func CompareArray3(a, b [3]int16) bool {
+	// amd64:`CMPL\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+	// amd64:`CMPW\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+	return a == b
+}
+
+func CompareArray4(a, b [12]int8) bool {
+	// amd64:`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+	// amd64:`CMPL\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+	return a == b
+}
+
+func CompareArray5(a, b [15]byte) bool {
+	// amd64:`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+	return a == b
+}
+
+// This was a TODO in mapaccess1_faststr
+func CompareArray6(a, b unsafe.Pointer) bool {
+	// amd64:`CMPL\t\(.*\), [A-Z]`
+	// arm64:`MOVWU\t\(.*\), [R]`,`CMPW\t.*, [R]`
+	// ppc64le:`MOVWZ\t\(.*\), [R]`,`CMPW\t.*, [R]`
+	// s390x:`MOVWBR\t\(.*\), [R]`,`CMPW\t.*, [R]`
+	return *((*[4]byte)(a)) != *((*[4]byte)(b))
+}
+
+// Check that some structs generate 2/4/8 byte compares.
+
+type T1 struct {
+	a [8]byte
+}
+
+func CompareStruct1(s1, s2 T1) bool {
+	// amd64:`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+	// amd64:-`CALL`
+	return s1 == s2
+}
+
+type T2 struct {
+	a [16]byte
+}
+
+func CompareStruct2(s1, s2 T2) bool {
+	// amd64:`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+	// amd64:-`CALL`
+	return s1 == s2
+}
+
+// Assert that a memequal call is still generated when
+// inlining would increase binary size too much.
+
+type T3 struct {
+	a [24]byte
+}
+
+func CompareStruct3(s1, s2 T3) bool {
+	// amd64:-`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+	// amd64:`CALL`
+	return s1 == s2
+}
+
+type T4 struct {
+	a [32]byte
+}
+
+func CompareStruct4(s1, s2 T4) bool {
+	// amd64:-`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+	// amd64:`CALL`
+	return s1 == s2
+}
+
+// -------------- //
+//    Ordering    //
+// -------------- //
+
+// Test that LEAQ/ADDQconst are folded into SETx ops
+
+var r bool
+
+func CmpFold(x uint32) {
+	// amd64:`SETHI\t.*\(SB\)`
+	r = x > 4
+}
+
+// Test that direct comparisons with memory are generated when
+// possible
+
+func CmpMem1(p int, q *int) bool {
+	// amd64:`CMPQ\t\(.*\), [A-Z]`
+	return p < *q
+}
+
+func CmpMem2(p *int, q int) bool {
+	// amd64:`CMPQ\t\(.*\), [A-Z]`
+	return *p < q
+}
+
+func CmpMem3(p *int) bool {
+	// amd64:`CMPQ\t\(.*\), [$]7`
+	return *p < 7
+}
+
+func CmpMem4(p *int) bool {
+	// amd64:`CMPQ\t\(.*\), [$]7`
+	return 7 < *p
+}
+
+func CmpMem5(p **int) {
+	// amd64:`CMPL\truntime.writeBarrier\(SB\), [$]0`
+	*p = nil
+}
+
+func CmpMem6(a []int) int {
+	// 386:`CMPL\s8\([A-Z]+\),`
+	// amd64:`CMPQ\s16\([A-Z]+\),`
+	if a[1] > a[2] {
+		return 1
+	} else {
+		return 2
+	}
+}
+
+// Check tbz/tbnz are generated when comparing against zero on arm64
+
+func CmpZero1(a int32, ptr *int) {
+	if a < 0 { // arm64:"TBZ"
+		*ptr = 0
+	}
+}
+
+func CmpZero2(a int64, ptr *int) {
+	if a < 0 { // arm64:"TBZ"
+		*ptr = 0
+	}
+}
+
+func CmpZero3(a int32, ptr *int) {
+	if a >= 0 { // arm64:"TBNZ"
+		*ptr = 0
+	}
+}
+
+func CmpZero4(a int64, ptr *int) {
+	if a >= 0 { // arm64:"TBNZ"
+		*ptr = 0
+	}
+}
+
+func CmpToZero(a, b, d int32, e, f int64, deOptC0, deOptC1 bool) int32 {
+	// arm:`TST`,-`AND`
+	// arm64:`TSTW`,-`AND`
+	// 386:`TESTL`,-`ANDL`
+	// amd64:`TESTL`,-`ANDL`
+	c0 := a&b < 0
+	// arm:`CMN`,-`ADD`
+	// arm64:`CMNW`,-`ADD`
+	c1 := a+b < 0
+	// arm:`TEQ`,-`XOR`
+	c2 := a^b < 0
+	// arm64:`TST`,-`AND`
+	// amd64:`TESTQ`,-`ANDQ`
+	c3 := e&f < 0
+	// arm64:`CMN`,-`ADD`
+	c4 := e+f < 0
+	// not optimized to single CMNW/CMN due to further use of b+d
+	// arm64:`ADD`,-`CMNW`
+	// arm:`ADD`,-`CMN`
+	c5 := b+d == 0
+	// not optimized to single TSTW/TST due to further use of a&d
+	// arm64:`AND`,-`TSTW`
+	// arm:`AND`,-`TST`
+	// 386:`ANDL`
+	c6 := a&d >= 0
+	// arm64:`TST\sR[0-9]+<<3,\sR[0-9]+`
+	c7 := e&(f<<3) < 0
+	// arm64:`CMN\sR[0-9]+<<3,\sR[0-9]+`
+	c8 := e+(f<<3) < 0
+	// arm64:`TST\sR[0-9],\sR[0-9]+`
+	c9 := e&(-19) < 0
+	if c0 {
+		return 1
+	} else if c1 {
+		return 2
+	} else if c2 {
+		return 3
+	} else if c3 {
+		return 4
+	} else if c4 {
+		return 5
+	} else if c5 {
+		return 6
+	} else if c6 {
+		return 7
+	} else if c7 {
+		return 9
+	} else if c8 {
+		return 10
+	} else if c9 {
+		return 11
+	} else if deOptC0 {
+		return b + d
+	} else if deOptC1 {
+		return a & d
+	} else {
+		return 0
+	}
+}
+
+func CmpLogicalToZero(a, b, c uint32, d, e uint64) uint64 {
+
+	// ppc64x:"ANDCC",-"CMPW"
+	// wasm:"I64Eqz",-"I32Eqz",-"I64ExtendI32U",-"I32WrapI64"
+	if a&63 == 0 {
+		return 1
+	}
+
+	// ppc64x:"ANDCC",-"CMP"
+	// wasm:"I64Eqz",-"I32Eqz",-"I64ExtendI32U",-"I32WrapI64"
+	if d&255 == 0 {
+		return 1
+	}
+
+	// ppc64x:"ANDCC",-"CMP"
+	// wasm:"I64Eqz",-"I32Eqz",-"I64ExtendI32U",-"I32WrapI64"
+	if d&e == 0 {
+		return 1
+	}
+	// ppc64x:"ORCC",-"CMP"
+	// wasm:"I64Eqz",-"I32Eqz",-"I64ExtendI32U",-"I32WrapI64"
+	if d|e == 0 {
+		return 1
+	}
+
+	// ppc64x:"XORCC",-"CMP"
+	// wasm:"I64Eqz","I32Eqz",-"I64ExtendI32U",-"I32WrapI64"
+	if e^d == 0 {
+		return 1
+	}
+	return 0
+}
+
+// The following CmpToZero_ex* check that cmp|cmn with bmi|bpl are generated for
+// 'comparing to zero' expressions
+
+// var + const
+// 'x-const' might be canonicalized to 'x+(-const)', so we check both
+// CMN and CMP for subtraction expressions to make the pattern robust.
+func CmpToZero_ex1(a int64, e int32) int {
+	// arm64:`CMN`,-`ADD`,`(BMI|BPL)`
+	if a+3 < 0 {
+		return 1
+	}
+
+	// arm64:`CMN`,-`ADD`,`BEQ`,`(BMI|BPL)`
+	if a+5 <= 0 {
+		return 1
+	}
+
+	// arm64:`CMN`,-`ADD`,`(BMI|BPL)`
+	if a+13 >= 0 {
+		return 2
+	}
+
+	// arm64:`CMP|CMN`,-`(ADD|SUB)`,`(BMI|BPL)`
+	if a-7 < 0 {
+		return 3
+	}
+
+	// arm64:`SUB`,`TBZ`
+	if a-11 >= 0 {
+		return 4
+	}
+
+	// arm64:`SUB`,`CMP`,`BGT`
+	if a-19 > 0 {
+		return 4
+	}
+
+	// arm64:`CMNW`,-`ADDW`,`(BMI|BPL)`
+	// arm:`CMN`,-`ADD`,`(BMI|BPL)`
+	if e+3 < 0 {
+		return 5
+	}
+
+	// arm64:`CMNW`,-`ADDW`,`(BMI|BPL)`
+	// arm:`CMN`,-`ADD`,`(BMI|BPL)`
+	if e+13 >= 0 {
+		return 6
+	}
+
+	// arm64:`CMPW|CMNW`,`(BMI|BPL)`
+	// arm:`CMP|CMN`, -`(ADD|SUB)`, `(BMI|BPL)`
+	if e-7 < 0 {
+		return 7
+	}
+
+	// arm64:`SUB`,`TBNZ`
+	// arm:`CMP|CMN`, -`(ADD|SUB)`, `(BMI|BPL)`
+	if e-11 >= 0 {
+		return 8
+	}
+
+	return 0
+}
+
+// var + var
+// TODO: optimize 'var - var'
+func CmpToZero_ex2(a, b, c int64, e, f, g int32) int {
+	// arm64:`CMN`,-`ADD`,`(BMI|BPL)`
+	if a+b < 0 {
+		return 1
+	}
+
+	// arm64:`CMN`,-`ADD`,`BEQ`,`(BMI|BPL)`
+	if a+c <= 0 {
+		return 1
+	}
+
+	// arm64:`CMN`,-`ADD`,`(BMI|BPL)`
+	if b+c >= 0 {
+		return 2
+	}
+
+	// arm64:`CMNW`,-`ADDW`,`(BMI|BPL)`
+	// arm:`CMN`,-`ADD`,`(BMI|BPL)`
+	if e+f < 0 {
+		return 5
+	}
+
+	// arm64:`CMNW`,-`ADDW`,`(BMI|BPL)`
+	// arm:`CMN`,-`ADD`,`(BMI|BPL)`
+	if f+g >= 0 {
+		return 6
+	}
+	return 0
+}
+
+// var + var*var
+func CmpToZero_ex3(a, b, c, d int64, e, f, g, h int32) int {
+	// arm64:`CMN`,-`MADD`,`MUL`,`(BMI|BPL)`
+	if a+b*c < 0 {
+		return 1
+	}
+
+	// arm64:`CMN`,-`MADD`,`MUL`,`(BMI|BPL)`
+	if b+c*d >= 0 {
+		return 2
+	}
+
+	// arm64:`CMNW`,-`MADDW`,`MULW`,`BEQ`,`(BMI|BPL)`
+	// arm:`CMN`,-`MULA`,`MUL`,`BEQ`,`(BMI|BPL)`
+	if e+f*g > 0 {
+		return 5
+	}
+
+	// arm64:`CMNW`,-`MADDW`,`MULW`,`BEQ`,`(BMI|BPL)`
+	// arm:`CMN`,-`MULA`,`MUL`,`BEQ`,`(BMI|BPL)`
+	if f+g*h <= 0 {
+		return 6
+	}
+	return 0
+}
+
+// var - var*var
+func CmpToZero_ex4(a, b, c, d int64, e, f, g, h int32) int {
+	// arm64:`CMP`,-`MSUB`,`MUL`,`BEQ`,`(BMI|BPL)`
+	if a-b*c > 0 {
+		return 1
+	}
+
+	// arm64:`CMP`,-`MSUB`,`MUL`,`(BMI|BPL)`
+	if b-c*d >= 0 {
+		return 2
+	}
+
+	// arm64:`CMPW`,-`MSUBW`,`MULW`,`(BMI|BPL)`
+	if e-f*g < 0 {
+		return 5
+	}
+
+	// arm64:`CMPW`,-`MSUBW`,`MULW`,`(BMI|BPL)`
+	if f-g*h >= 0 {
+		return 6
+	}
+	return 0
+}
+
+func CmpToZero_ex5(e, f int32, u uint32) int {
+	// arm:`CMN`,-`ADD`,`BEQ`,`(BMI|BPL)`
+	if e+f<<1 > 0 {
+		return 1
+	}
+
+	// arm:`CMP`,-`SUB`,`(BMI|BPL)`
+	if f-int32(u>>2) >= 0 {
+		return 2
+	}
+	return 0
+}
+
+func UintLtZero(a uint8, b uint16, c uint32, d uint64) int {
+	// amd64: -`(TESTB|TESTW|TESTL|TESTQ|JCC|JCS)`
+	// arm64: -`(CMPW|CMP|BHS|BLO)`
+	if a < 0 || b < 0 || c < 0 || d < 0 {
+		return 1
+	}
+	return 0
+}
+
+func UintGeqZero(a uint8, b uint16, c uint32, d uint64) int {
+	// amd64: -`(TESTB|TESTW|TESTL|TESTQ|JCS|JCC)`
+	// arm64: -`(CMPW|CMP|BLO|BHS)`
+	if a >= 0 || b >= 0 || c >= 0 || d >= 0 {
+		return 1
+	}
+	return 0
+}
+
+func UintGtZero(a uint8, b uint16, c uint32, d uint64) int {
+	// arm64: `(CBN?ZW)`, `(CBN?Z[^W])`, -`(CMPW|CMP|BLS|BHI)`
+	if a > 0 || b > 0 || c > 0 || d > 0 {
+		return 1
+	}
+	return 0
+}
+
+func UintLeqZero(a uint8, b uint16, c uint32, d uint64) int {
+	// arm64: `(CBN?ZW)`, `(CBN?Z[^W])`, -`(CMPW|CMP|BHI|BLS)`
+	if a <= 0 || b <= 0 || c <= 0 || d <= 0 {
+		return 1
+	}
+	return 0
+}
+
+func UintLtOne(a uint8, b uint16, c uint32, d uint64) int {
+	// arm64: `(CBN?ZW)`, `(CBN?Z[^W])`, -`(CMPW|CMP|BHS|BLO)`
+	if a < 1 || b < 1 || c < 1 || d < 1 {
+		return 1
+	}
+	return 0
+}
+
+func UintGeqOne(a uint8, b uint16, c uint32, d uint64) int {
+	// arm64: `(CBN?ZW)`, `(CBN?Z[^W])`, -`(CMPW|CMP|BLO|BHS)`
+	if a >= 1 || b >= 1 || c >= 1 || d >= 1 {
+		return 1
+	}
+	return 0
+}
+
+func CmpToZeroU_ex1(a uint8, b uint16, c uint32, d uint64) int {
+	// wasm:"I64Eqz"-"I64LtU"
+	if 0 < a {
+		return 1
+	}
+	// wasm:"I64Eqz"-"I64LtU"
+	if 0 < b {
+		return 1
+	}
+	// wasm:"I64Eqz"-"I64LtU"
+	if 0 < c {
+		return 1
+	}
+	// wasm:"I64Eqz"-"I64LtU"
+	if 0 < d {
+		return 1
+	}
+	return 0
+}
+
+func CmpToZeroU_ex2(a uint8, b uint16, c uint32, d uint64) int {
+	// wasm:"I64Eqz"-"I64LeU"
+	if a <= 0 {
+		return 1
+	}
+	// wasm:"I64Eqz"-"I64LeU"
+	if b <= 0 {
+		return 1
+	}
+	// wasm:"I64Eqz"-"I64LeU"
+	if c <= 0 {
+		return 1
+	}
+	// wasm:"I64Eqz"-"I64LeU"
+	if d <= 0 {
+		return 1
+	}
+	return 0
+}
+
+func CmpToOneU_ex1(a uint8, b uint16, c uint32, d uint64) int {
+	// wasm:"I64Eqz"-"I64LtU"
+	if a < 1 {
+		return 1
+	}
+	// wasm:"I64Eqz"-"I64LtU"
+	if b < 1 {
+		return 1
+	}
+	// wasm:"I64Eqz"-"I64LtU"
+	if c < 1 {
+		return 1
+	}
+	// wasm:"I64Eqz"-"I64LtU"
+	if d < 1 {
+		return 1
+	}
+	return 0
+}
+
+func CmpToOneU_ex2(a uint8, b uint16, c uint32, d uint64) int {
+	// wasm:"I64Eqz"-"I64LeU"
+	if 1 <= a {
+		return 1
+	}
+	// wasm:"I64Eqz"-"I64LeU"
+	if 1 <= b {
+		return 1
+	}
+	// wasm:"I64Eqz"-"I64LeU"
+	if 1 <= c {
+		return 1
+	}
+	// wasm:"I64Eqz"-"I64LeU"
+	if 1 <= d {
+		return 1
+	}
+	return 0
+}
+
+// Check that small memequals are replaced with eq instructions
+
+func equalConstString1() bool {
+	a := string("A")
+	b := string("Z")
+	// amd64:-".*memequal"
+	// arm64:-".*memequal"
+	// ppc64x:-".*memequal"
+	return a == b
+}
+
+func equalVarString1(a string) bool {
+	b := string("Z")
+	// amd64:-".*memequal"
+	// arm64:-".*memequal"
+	// ppc64x:-".*memequal"
+	return a[:1] == b
+}
+
+func equalConstString2() bool {
+	a := string("AA")
+	b := string("ZZ")
+	// amd64:-".*memequal"
+	// arm64:-".*memequal"
+	// ppc64x:-".*memequal"
+	return a == b
+}
+
+func equalVarString2(a string) bool {
+	b := string("ZZ")
+	// amd64:-".*memequal"
+	// arm64:-".*memequal"
+	// ppc64x:-".*memequal"
+	return a[:2] == b
+}
+
+func equalConstString4() bool {
+	a := string("AAAA")
+	b := string("ZZZZ")
+	// amd64:-".*memequal"
+	// arm64:-".*memequal"
+	// ppc64x:-".*memequal"
+	return a == b
+}
+
+func equalVarString4(a string) bool {
+	b := string("ZZZZ")
+	// amd64:-".*memequal"
+	// arm64:-".*memequal"
+	// ppc64x:-".*memequal"
+	return a[:4] == b
+}
+
+func equalConstString8() bool {
+	a := string("AAAAAAAA")
+	b := string("ZZZZZZZZ")
+	// amd64:-".*memequal"
+	// arm64:-".*memequal"
+	// ppc64x:-".*memequal"
+	return a == b
+}
+
+func equalVarString8(a string) bool {
+	b := string("ZZZZZZZZ")
+	// amd64:-".*memequal"
+	// arm64:-".*memequal"
+	// ppc64x:-".*memequal"
+	return a[:8] == b
+}
+
+func cmpToCmn(a, b, c, d int) int {
+	var c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 int
+	// arm64:`CMN`,-`CMP`
+	if a < -8 {
+		c1 = 1
+	}
+	// arm64:`CMN`,-`CMP`
+	if a+1 == 0 {
+		c2 = 1
+	}
+	// arm64:`CMN`,-`CMP`
+	if a+3 != 0 {
+		c3 = 1
+	}
+	// arm64:`CMN`,-`CMP`
+	if a+b == 0 {
+		c4 = 1
+	}
+	// arm64:`CMN`,-`CMP`
+	if b+c != 0 {
+		c5 = 1
+	}
+	// arm64:`CMN`,-`CMP`
+	if a == -c {
+		c6 = 1
+	}
+	// arm64:`CMN`,-`CMP`
+	if b != -d {
+		c7 = 1
+	}
+	// arm64:`CMN`,-`CMP`
+	if a*b+c == 0 {
+		c8 = 1
+	}
+	// arm64:`CMN`,-`CMP`
+	if a*c+b != 0 {
+		c9 = 1
+	}
+	// arm64:`CMP`,-`CMN`
+	if b*c-a == 0 {
+		c10 = 1
+	}
+	// arm64:`CMP`,-`CMN`
+	if a*d-b != 0 {
+		c11 = 1
+	}
+	return c1 + c2 + c3 + c4 + c5 + c6 + c7 + c8 + c9 + c10 + c11
+}
+
+func cmpToCmnLessThan(a, b, c, d int) int {
+	var c1, c2, c3, c4 int
+	// arm64:`CMN`,`CSET\tMI`,-`CMP`
+	if a+1 < 0 {
+		c1 = 1
+	}
+	// arm64:`CMN`,`CSET\tMI`,-`CMP`
+	if a+b < 0 {
+		c2 = 1
+	}
+	// arm64:`CMN`,`CSET\tMI`,-`CMP`
+	if a*b+c < 0 {
+		c3 = 1
+	}
+	// arm64:`CMP`,`CSET\tMI`,-`CMN`
+	if a-b*c < 0 {
+		c4 = 1
+	}
+	return c1 + c2 + c3 + c4
+}
+
+func cmpToCmnGreaterThanEqual(a, b, c, d int) int {
+	var c1, c2, c3, c4 int
+	// arm64:`CMN`,`CSET\tPL`,-`CMP`
+	if a+1 >= 0 {
+		c1 = 1
+	}
+	// arm64:`CMN`,`CSET\tPL`,-`CMP`
+	if a+b >= 0 {
+		c2 = 1
+	}
+	// arm64:`CMN`,`CSET\tPL`,-`CMP`
+	if a*b+c >= 0 {
+		c3 = 1
+	}
+	// arm64:`CMP`,`CSET\tPL`,-`CMN`
+	if a-b*c >= 0 {
+		c4 = 1
+	}
+	return c1 + c2 + c3 + c4
+}
+
+func cmp1(val string) bool {
+	var z string
+	// amd64:-".*memequal"
+	return z == val
+}
+
+func cmp2(val string) bool {
+	var z string
+	// amd64:-".*memequal"
+	return val == z
+}
+
+func cmp3(val string) bool {
+	z := "food"
+	// amd64:-".*memequal"
+	return z == val
+}
+
+func cmp4(val string) bool {
+	z := "food"
+	// amd64:-".*memequal"
+	return val == z
+}
+
+func cmp5[T comparable](val T) bool {
+	var z T
+	// amd64:-".*memequal"
+	return z == val
+}
+
+func cmp6[T comparable](val T) bool {
+	var z T
+	// amd64:-".*memequal"
+	return val == z
+}
+
+func cmp7() {
+	cmp5[string]("") // force instantiation
+	cmp6[string]("") // force instantiation
+}
+
+type Point struct {
+	X, Y int
+}
+
+// invertLessThanNoov checks (LessThanNoov (InvertFlags x)) is lowered as
+// CMP, CSET, CSEL instruction sequence. InvertFlags are only generated under
+// certain conditions, see canonLessThan, so if the code below does not
+// generate an InvertFlags OP, this check may fail.
+func invertLessThanNoov(p1, p2, p3 Point) bool {
+	// arm64:`CMP`,`CSET`,`CSEL`
+	return (p1.X-p3.X)*(p2.Y-p3.Y)-(p2.X-p3.X)*(p1.Y-p3.Y) < 0
+}
+
+func cmpstring1(x, y string) int {
+	// amd64:".*cmpstring"
+	if x < y {
+		return -1
+	}
+	// amd64:-".*cmpstring"
+	if x > y {
+		return +1
+	}
+	return 0
+}
+func cmpstring2(x, y string) int {
+	// We want to fail if there are two calls to cmpstring.
+	// They will both have the same line number, so a test
+	// like in cmpstring1 will not work. Instead, we
+	// look for spill/restore instructions, which only
+	// need to exist if there are 2 calls.
+	//amd64:-`MOVQ\t.*\(SP\)`
+	return cmp.Compare(x, y)
+}
--- a/test/codegen/condmove.go
+++ b/test/codegen/condmove.go
@@ -0,0 +1,453 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func cmovint(c int) int {
+	x := c + 4
+	if x < 0 {
+		x = 182
+	}
+	// amd64:"CMOVQLT"
+	// arm64:"CSEL\tLT"
+	// ppc64x:"ISEL\t[$]0"
+	// wasm:"Select"
+	return x
+}
+
+func cmovchan(x, y chan int) chan int {
+	if x != y {
+		x = y
+	}
+	// amd64:"CMOVQNE"
+	// arm64:"CSEL\tNE"
+	// ppc64x:"ISEL\t[$]2"
+	// wasm:"Select"
+	return x
+}
+
+func cmovuintptr(x, y uintptr) uintptr {
+	if x < y {
+		x = -y
+	}
+	// amd64:"CMOVQ(HI|CS)"
+	// arm64:"CSNEG\tLS"
+	// ppc64x:"ISEL\t[$]1"
+	// wasm:"Select"
+	return x
+}
+
+func cmov32bit(x, y uint32) uint32 {
+	if x < y {
+		x = -y
+	}
+	// amd64:"CMOVL(HI|CS)"
+	// arm64:"CSNEG\t(LS|HS)"
+	// ppc64x:"ISEL\t[$]1"
+	// wasm:"Select"
+	return x
+}
+
+func cmov16bit(x, y uint16) uint16 {
+	if x < y {
+		x = -y
+	}
+	// amd64:"CMOVW(HI|CS)"
+	// arm64:"CSNEG\t(LS|HS)"
+	// ppc64x:"ISEL\t[$][01]"
+	// wasm:"Select"
+	return x
+}
+
+// Floating point comparison. For EQ/NE, we must
+// generate special code to handle NaNs.
+func cmovfloateq(x, y float64) int {
+	a := 128
+	if x == y {
+		a = 256
+	}
+	// amd64:"CMOVQNE","CMOVQPC"
+	// arm64:"CSEL\tEQ"
+	// ppc64x:"ISEL\t[$]2"
+	// wasm:"Select"
+	return a
+}
+
+func cmovfloatne(x, y float64) int {
+	a := 128
+	if x != y {
+		a = 256
+	}
+	// amd64:"CMOVQNE","CMOVQPS"
+	// arm64:"CSEL\tNE"
+	// ppc64x:"ISEL\t[$]2"
+	// wasm:"Select"
+	return a
+}
+
+//go:noinline
+func frexp(f float64) (frac float64, exp int) {
+	return 1.0, 4
+}
+
+//go:noinline
+func ldexp(frac float64, exp int) float64 {
+	return 1.0
+}
+
+// Generate a CMOV with a floating comparison and integer move.
+func cmovfloatint2(x, y float64) float64 {
+	yfr, yexp := 4.0, 5
+
+	r := x
+	for r >= y {
+		rfr, rexp := frexp(r)
+		if rfr < yfr {
+			rexp = rexp - 1
+		}
+		// amd64:"CMOVQHI"
+		// arm64:"CSEL\tMI"
+		// ppc64x:"ISEL\t[$]0"
+		// wasm:"Select"
+		r = r - ldexp(y, rexp-yexp)
+	}
+	return r
+}
+
+func cmovloaded(x [4]int, y int) int {
+	if x[2] != 0 {
+		y = x[2]
+	} else {
+		y = y >> 2
+	}
+	// amd64:"CMOVQNE"
+	// arm64:"CSEL\tNE"
+	// ppc64x:"ISEL\t[$]2"
+	// wasm:"Select"
+	return y
+}
+
+func cmovuintptr2(x, y uintptr) uintptr {
+	a := x * 2
+	if a == 0 {
+		a = 256
+	}
+	// amd64:"CMOVQEQ"
+	// arm64:"CSEL\tEQ"
+	// ppc64x:"ISEL\t[$]2"
+	// wasm:"Select"
+	return a
+}
+
+// Floating point CMOVs are not supported by amd64/arm64/ppc64x
+func cmovfloatmove(x, y int) float64 {
+	a := 1.0
+	if x <= y {
+		a = 2.0
+	}
+	// amd64:-"CMOV"
+	// arm64:-"CSEL"
+	// ppc64x:-"ISEL"
+	// wasm:-"Select"
+	return a
+}
+
+// On amd64, the following patterns trigger comparison inversion.
+// Test that we correctly invert the CMOV condition
+var gsink int64
+var gusink uint64
+
+func cmovinvert1(x, y int64) int64 {
+	if x < gsink {
+		y = -y
+	}
+	// amd64:"CMOVQGT"
+	return y
+}
+func cmovinvert2(x, y int64) int64 {
+	if x <= gsink {
+		y = -y
+	}
+	// amd64:"CMOVQGE"
+	return y
+}
+func cmovinvert3(x, y int64) int64 {
+	if x == gsink {
+		y = -y
+	}
+	// amd64:"CMOVQEQ"
+	return y
+}
+func cmovinvert4(x, y int64) int64 {
+	if x != gsink {
+		y = -y
+	}
+	// amd64:"CMOVQNE"
+	return y
+}
+func cmovinvert5(x, y uint64) uint64 {
+	if x > gusink {
+		y = -y
+	}
+	// amd64:"CMOVQCS"
+	return y
+}
+func cmovinvert6(x, y uint64) uint64 {
+	if x >= gusink {
+		y = -y
+	}
+	// amd64:"CMOVQLS"
+	return y
+}
+
+func cmovload(a []int, i int, b bool) int {
+	if b {
+		i++
+	}
+	// See issue 26306
+	// amd64:-"CMOVQNE"
+	return a[i]
+}
+
+func cmovstore(a []int, i int, b bool) {
+	if b {
+		i++
+	}
+	// amd64:"CMOVQNE"
+	a[i] = 7
+}
+
+var r0, r1, r2, r3, r4, r5 int
+
+func cmovinc(cond bool, a, b, c int) {
+	var x0, x1 int
+
+	if cond {
+		x0 = a
+	} else {
+		x0 = b + 1
+	}
+	// arm64:"CSINC\tNE", -"CSEL"
+	r0 = x0
+
+	if cond {
+		x1 = b + 1
+	} else {
+		x1 = a
+	}
+	// arm64:"CSINC\tEQ", -"CSEL"
+	r1 = x1
+
+	if cond {
+		c++
+	}
+	// arm64:"CSINC\tEQ", -"CSEL"
+	r2 = c
+}
+
+func cmovinv(cond bool, a, b int) {
+	var x0, x1 int
+
+	if cond {
+		x0 = a
+	} else {
+		x0 = ^b
+	}
+	// arm64:"CSINV\tNE", -"CSEL"
+	r0 = x0
+
+	if cond {
+		x1 = ^b
+	} else {
+		x1 = a
+	}
+	// arm64:"CSINV\tEQ", -"CSEL"
+	r1 = x1
+}
+
+func cmovneg(cond bool, a, b, c int) {
+	var x0, x1 int
+
+	if cond {
+		x0 = a
+	} else {
+		x0 = -b
+	}
+	// arm64:"CSNEG\tNE", -"CSEL"
+	r0 = x0
+
+	if cond {
+		x1 = -b
+	} else {
+		x1 = a
+	}
+	// arm64:"CSNEG\tEQ", -"CSEL"
+	r1 = x1
+}
+
+func cmovsetm(cond bool, x int) {
+	var x0, x1 int
+
+	if cond {
+		x0 = -1
+	} else {
+		x0 = 0
+	}
+	// arm64:"CSETM\tNE", -"CSEL"
+	r0 = x0
+
+	if cond {
+		x1 = 0
+	} else {
+		x1 = -1
+	}
+	// arm64:"CSETM\tEQ", -"CSEL"
+	r1 = x1
+}
+
+func cmovFcmp0(s, t float64, a, b int) {
+	var x0, x1, x2, x3, x4, x5 int
+
+	if s < t {
+		x0 = a
+	} else {
+		x0 = b + 1
+	}
+	// arm64:"CSINC\tMI", -"CSEL"
+	r0 = x0
+
+	if s <= t {
+		x1 = a
+	} else {
+		x1 = ^b
+	}
+	// arm64:"CSINV\tLS", -"CSEL"
+	r1 = x1
+
+	if s > t {
+		x2 = a
+	} else {
+		x2 = -b
+	}
+	// arm64:"CSNEG\tMI", -"CSEL"
+	r2 = x2
+
+	if s >= t {
+		x3 = -1
+	} else {
+		x3 = 0
+	}
+	// arm64:"CSETM\tLS", -"CSEL"
+	r3 = x3
+
+	if s == t {
+		x4 = a
+	} else {
+		x4 = b + 1
+	}
+	// arm64:"CSINC\tEQ", -"CSEL"
+	r4 = x4
+
+	if s != t {
+		x5 = a
+	} else {
+		x5 = b + 1
+	}
+	// arm64:"CSINC\tNE", -"CSEL"
+	r5 = x5
+}
+
+func cmovFcmp1(s, t float64, a, b int) {
+	var x0, x1, x2, x3, x4, x5 int
+
+	if s < t {
+		x0 = b + 1
+	} else {
+		x0 = a
+	}
+	// arm64:"CSINC\tPL", -"CSEL"
+	r0 = x0
+
+	if s <= t {
+		x1 = ^b
+	} else {
+		x1 = a
+	}
+	// arm64:"CSINV\tHI", -"CSEL"
+	r1 = x1
+
+	if s > t {
+		x2 = -b
+	} else {
+		x2 = a
+	}
+	// arm64:"CSNEG\tPL", -"CSEL"
+	r2 = x2
+
+	if s >= t {
+		x3 = 0
+	} else {
+		x3 = -1
+	}
+	// arm64:"CSETM\tHI", -"CSEL"
+	r3 = x3
+
+	if s == t {
+		x4 = b + 1
+	} else {
+		x4 = a
+	}
+	// arm64:"CSINC\tNE", -"CSEL"
+	r4 = x4
+
+	if s != t {
+		x5 = b + 1
+	} else {
+		x5 = a
+	}
+	// arm64:"CSINC\tEQ", -"CSEL"
+	r5 = x5
+}
+
+func cmovzero1(c bool) int {
+	var x int
+	if c {
+		x = 182
+	}
+	// loong64:"MASKEQZ", -"MASKNEZ"
+	return x
+}
+
+func cmovzero2(c bool) int {
+	var x int
+	if !c {
+		x = 182
+	}
+	// loong64:"MASKNEZ", -"MASKEQZ"
+	return x
+}
+
+// Conditionally selecting between a value or 0 can be done without
+// an extra load of 0 to a register on PPC64 by using R0 (which always
+// holds the value $0) instead. Verify both cases where either arg1
+// or arg2 is zero.
+func cmovzeroreg0(a, b int) int {
+	x := 0
+	if a == b {
+		x = a
+	}
+	// ppc64x:"ISEL\t[$]2, R[0-9]+, R0, R[0-9]+"
+	return x
+}
+
+func cmovzeroreg1(a, b int) int {
+	x := a
+	if a == b {
+		x = 0
+	}
+	// ppc64x:"ISEL\t[$]2, R0, R[0-9]+, R[0-9]+"
+	return x
+}
--- a/test/codegen/constants.go
+++ b/test/codegen/constants.go
@@ -0,0 +1,33 @@
+// asmcheck
+
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// A uint16 or sint16 constant shifted left.
+func shifted16BitConstants(out [64]uint64) {
+	// ppc64x: "MOVD\t[$]8193,", "SLD\t[$]27,"
+	out[0] = 0x0000010008000000
+	// ppc64x: "MOVD\t[$]-32767", "SLD\t[$]26,"
+	out[1] = 0xFFFFFE0004000000
+	// ppc64x: "MOVD\t[$]-1", "SLD\t[$]48,"
+	out[2] = 0xFFFF000000000000
+	// ppc64x: "MOVD\t[$]65535", "SLD\t[$]44,"
+	out[3] = 0x0FFFF00000000000
+}
+
+// A contiguous set of 1 bits, potentially wrapping.
+func contiguousMaskConstants(out [64]uint64) {
+	// ppc64x: "MOVD\t[$]-1", "RLDC\tR[0-9]+, [$]44, [$]63,"
+	out[0] = 0xFFFFF00000000001
+	// ppc64x: "MOVD\t[$]-1", "RLDC\tR[0-9]+, [$]43, [$]63,"
+	out[1] = 0xFFFFF80000000001
+	// ppc64x: "MOVD\t[$]-1", "RLDC\tR[0-9]+, [$]43, [$]4,"
+	out[2] = 0x0FFFF80000000000
+	// ppc64x/power8: "MOVD\t[$]-1", "RLDC\tR[0-9]+, [$]33, [$]63,"
+	// ppc64x/power9: "MOVD\t[$]-1", "RLDC\tR[0-9]+, [$]33, [$]63,"
+	// ppc64x/power10: "MOVD\t[$]-8589934591,"
+	out[3] = 0xFFFFFFFE00000001
+}
--- a/test/codegen/copy.go
+++ b/test/codegen/copy.go
@@ -0,0 +1,159 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import "runtime"
+
+// Check small copies are replaced with moves.
+
+func movesmall4() {
+	x := [...]byte{1, 2, 3, 4}
+	// 386:-".*memmove"
+	// amd64:-".*memmove"
+	// arm:-".*memmove"
+	// arm64:-".*memmove"
+	// ppc64x:-".*memmove"
+	copy(x[1:], x[:])
+}
+
+func movesmall7() {
+	x := [...]byte{1, 2, 3, 4, 5, 6, 7}
+	// 386:-".*memmove"
+	// amd64:-".*memmove"
+	// arm64:-".*memmove"
+	// ppc64x:-".*memmove"
+	copy(x[1:], x[:])
+}
+
+func movesmall16() {
+	x := [...]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
+	// amd64:-".*memmove"
+	// ppc64x:".*memmove"
+	copy(x[1:], x[:])
+}
+
+var x [256]byte
+
+// Check that large disjoint copies are replaced with moves.
+
+func moveDisjointStack32() {
+	var s [32]byte
+	// ppc64x:-".*memmove"
+	// ppc64x/power8:"LXVD2X",-"ADD",-"BC"
+	// ppc64x/power9:"LXV",-"LXVD2X",-"ADD",-"BC"
+	copy(s[:], x[:32])
+	runtime.KeepAlive(&s)
+}
+
+func moveDisjointStack64() {
+	var s [96]byte
+	// ppc64x:-".*memmove"
+	// ppc64x/power8:"LXVD2X","ADD","BC"
+	// ppc64x/power9:"LXV",-"LXVD2X",-"ADD",-"BC"
+	copy(s[:], x[:96])
+	runtime.KeepAlive(&s)
+}
+
+func moveDisjointStack() {
+	var s [256]byte
+	// s390x:-".*memmove"
+	// amd64:-".*memmove"
+	// ppc64x:-".*memmove"
+	// ppc64x/power8:"LXVD2X"
+	// ppc64x/power9:"LXV",-"LXVD2X"
+	copy(s[:], x[:])
+	runtime.KeepAlive(&s)
+}
+
+func moveDisjointArg(b *[256]byte) {
+	var s [256]byte
+	// s390x:-".*memmove"
+	// amd64:-".*memmove"
+	// ppc64x:-".*memmove"
+	// ppc64x/power8:"LXVD2X"
+	// ppc64x/power9:"LXV",-"LXVD2X"
+	copy(s[:], b[:])
+	runtime.KeepAlive(&s)
+}
+
+func moveDisjointNoOverlap(a *[256]byte) {
+	// s390x:-".*memmove"
+	// amd64:-".*memmove"
+	// ppc64x:-".*memmove"
+	// ppc64x/power8:"LXVD2X"
+	// ppc64x/power9:"LXV",-"LXVD2X"
+	copy(a[:], a[128:])
+}
+
+// Check arch-specific memmove lowering. See issue 41662 fot details
+
+func moveArchLowering1(b []byte, x *[1]byte) {
+	_ = b[1]
+	// amd64:-".*memmove"
+	// arm64:-".*memmove"
+	// ppc64x:-".*memmove"
+	copy(b, x[:])
+}
+
+func moveArchLowering2(b []byte, x *[2]byte) {
+	_ = b[2]
+	// amd64:-".*memmove"
+	// arm64:-".*memmove"
+	// ppc64x:-".*memmove"
+	copy(b, x[:])
+}
+
+func moveArchLowering4(b []byte, x *[4]byte) {
+	_ = b[4]
+	// amd64:-".*memmove"
+	// arm64:-".*memmove"
+	// ppc64x:-".*memmove"
+	copy(b, x[:])
+}
+
+func moveArchLowering8(b []byte, x *[8]byte) {
+	_ = b[8]
+	// amd64:-".*memmove"
+	// arm64:-".*memmove"
+	// ppc64x:-".*memmove"
+	copy(b, x[:])
+}
+
+func moveArchLowering16(b []byte, x *[16]byte) {
+	_ = b[16]
+	// amd64:-".*memmove"
+	copy(b, x[:])
+}
+
+// Check that no branches are generated when the pointers are [not] equal.
+
+func ptrEqual() {
+	// amd64:-"JEQ",-"JNE"
+	// ppc64x:-"BEQ",-"BNE"
+	// s390x:-"BEQ",-"BNE"
+	copy(x[:], x[:])
+}
+
+func ptrOneOffset() {
+	// amd64:-"JEQ",-"JNE"
+	// ppc64x:-"BEQ",-"BNE"
+	// s390x:-"BEQ",-"BNE"
+	copy(x[1:], x[:])
+}
+
+func ptrBothOffset() {
+	// amd64:-"JEQ",-"JNE"
+	// ppc64x:-"BEQ",-"BNE"
+	// s390x:-"BEQ",-"BNE"
+	copy(x[1:], x[2:])
+}
+
+// Verify #62698 on PPC64.
+func noMaskOnCopy(a []int, s string, x int) int {
+	// ppc64x:-"MOVD\t$-1", -"AND"
+	return a[x&^copy([]byte{}, s)]
+}
--- a/test/codegen/floats.go
+++ b/test/codegen/floats.go
@@ -0,0 +1,229 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// This file contains codegen tests related to arithmetic
+// simplifications and optimizations on float types.
+// For codegen tests on integer types, see arithmetic.go.
+
+// --------------------- //
+//    Strength-reduce    //
+// --------------------- //
+
+func Mul2(f float64) float64 {
+	// 386/sse2:"ADDSD",-"MULSD"
+	// amd64:"ADDSD",-"MULSD"
+	// arm/7:"ADDD",-"MULD"
+	// arm64:"FADDD",-"FMULD"
+	// ppc64x:"FADD",-"FMUL"
+	// riscv64:"FADDD",-"FMULD"
+	return f * 2.0
+}
+
+func DivPow2(f1, f2, f3 float64) (float64, float64, float64) {
+	// 386/sse2:"MULSD",-"DIVSD"
+	// amd64:"MULSD",-"DIVSD"
+	// arm/7:"MULD",-"DIVD"
+	// arm64:"FMULD",-"FDIVD"
+	// ppc64x:"FMUL",-"FDIV"
+	// riscv64:"FMULD",-"FDIVD"
+	x := f1 / 16.0
+
+	// 386/sse2:"MULSD",-"DIVSD"
+	// amd64:"MULSD",-"DIVSD"
+	// arm/7:"MULD",-"DIVD"
+	// arm64:"FMULD",-"FDIVD"
+	// ppc64x:"FMUL",-"FDIVD"
+	// riscv64:"FMULD",-"FDIVD"
+	y := f2 / 0.125
+
+	// 386/sse2:"ADDSD",-"DIVSD",-"MULSD"
+	// amd64:"ADDSD",-"DIVSD",-"MULSD"
+	// arm/7:"ADDD",-"MULD",-"DIVD"
+	// arm64:"FADDD",-"FMULD",-"FDIVD"
+	// ppc64x:"FADD",-"FMUL",-"FDIV"
+	// riscv64:"FADDD",-"FMULD",-"FDIVD"
+	z := f3 / 0.5
+
+	return x, y, z
+}
+
+func indexLoad(b0 []float32, b1 float32, idx int) float32 {
+	// arm64:`FMOVS\s\(R[0-9]+\)\(R[0-9]+<<2\),\sF[0-9]+`
+	return b0[idx] * b1
+}
+
+func indexStore(b0 []float64, b1 float64, idx int) {
+	// arm64:`FMOVD\sF[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<3\)`
+	b0[idx] = b1
+}
+
+// ----------- //
+//    Fused    //
+// ----------- //
+
+func FusedAdd32(x, y, z float32) float32 {
+	// s390x:"FMADDS\t"
+	// ppc64x:"FMADDS\t"
+	// arm64:"FMADDS"
+	// riscv64:"FMADDS\t"
+	return x*y + z
+}
+
+func FusedSub32_a(x, y, z float32) float32 {
+	// s390x:"FMSUBS\t"
+	// ppc64x:"FMSUBS\t"
+	// riscv64:"FMSUBS\t"
+	return x*y - z
+}
+
+func FusedSub32_b(x, y, z float32) float32 {
+	// arm64:"FMSUBS"
+	// riscv64:"FNMSUBS\t"
+	return z - x*y
+}
+
+func FusedAdd64(x, y, z float64) float64 {
+	// s390x:"FMADD\t"
+	// ppc64x:"FMADD\t"
+	// arm64:"FMADDD"
+	// riscv64:"FMADDD\t"
+	return x*y + z
+}
+
+func FusedSub64_a(x, y, z float64) float64 {
+	// s390x:"FMSUB\t"
+	// ppc64x:"FMSUB\t"
+	// riscv64:"FMSUBD\t"
+	return x*y - z
+}
+
+func FusedSub64_b(x, y, z float64) float64 {
+	// arm64:"FMSUBD"
+	// riscv64:"FNMSUBD\t"
+	return z - x*y
+}
+
+func Cmp(f float64) bool {
+	// arm64:"FCMPD","(BGT|BLE|BMI|BPL)",-"CSET\tGT",-"CBZ"
+	return f > 4 || f < -4
+}
+
+func CmpZero64(f float64) bool {
+	// s390x:"LTDBR",-"FCMPU"
+	return f <= 0
+}
+
+func CmpZero32(f float32) bool {
+	// s390x:"LTEBR",-"CEBR"
+	return f <= 0
+}
+
+func CmpWithSub(a float64, b float64) bool {
+	f := a - b
+	// s390x:-"LTDBR"
+	return f <= 0
+}
+
+func CmpWithAdd(a float64, b float64) bool {
+	f := a + b
+	// s390x:-"LTDBR"
+	return f <= 0
+}
+
+// ---------------- //
+//    Non-floats    //
+// ---------------- //
+
+// We should make sure that the compiler doesn't generate floating point
+// instructions for non-float operations on Plan 9, because floating point
+// operations are not allowed in the note handler.
+
+func ArrayZero() [16]byte {
+	// amd64:"MOVUPS"
+	// plan9/amd64/:-"MOVUPS"
+	var a [16]byte
+	return a
+}
+
+func ArrayCopy(a [16]byte) (b [16]byte) {
+	// amd64:"MOVUPS"
+	// plan9/amd64/:-"MOVUPS"
+	b = a
+	return
+}
+
+// ---------------- //
+//  Float Min/Max   //
+// ---------------- //
+
+func Float64Min(a, b float64) float64 {
+	// amd64:"MINSD"
+	// arm64:"FMIND"
+	// riscv64:"FMIN"
+	// ppc64/power9:"XSMINJDP"
+	// ppc64/power10:"XSMINJDP"
+	return min(a, b)
+}
+
+func Float64Max(a, b float64) float64 {
+	// amd64:"MINSD"
+	// arm64:"FMAXD"
+	// riscv64:"FMAX"
+	// ppc64/power9:"XSMAXJDP"
+	// ppc64/power10:"XSMAXJDP"
+	return max(a, b)
+}
+
+func Float32Min(a, b float32) float32 {
+	// amd64:"MINSS"
+	// arm64:"FMINS"
+	// riscv64:"FMINS"
+	// ppc64/power9:"XSMINJDP"
+	// ppc64/power10:"XSMINJDP"
+	return min(a, b)
+}
+
+func Float32Max(a, b float32) float32 {
+	// amd64:"MINSS"
+	// arm64:"FMAXS"
+	// riscv64:"FMAXS"
+	// ppc64/power9:"XSMAXJDP"
+	// ppc64/power10:"XSMAXJDP"
+	return max(a, b)
+}
+
+// ------------------------ //
+//  Constant Optimizations  //
+// ------------------------ //
+
+func Float32Constant() float32 {
+	// ppc64x/power8:"FMOVS\t[$]f32\\.42440000\\(SB\\)"
+	// ppc64x/power9:"FMOVS\t[$]f32\\.42440000\\(SB\\)"
+	// ppc64x/power10:"XXSPLTIDP\t[$]1111752704,"
+	return 49.0
+}
+
+func Float64Constant() float64 {
+	// ppc64x/power8:"FMOVD\t[$]f64\\.4048800000000000\\(SB\\)"
+	// ppc64x/power9:"FMOVD\t[$]f64\\.4048800000000000\\(SB\\)"
+	// ppc64x/power10:"XXSPLTIDP\t[$]1111752704,"
+	return 49.0
+}
+
+func Float32DenormalConstant() float32 {
+	// ppc64x:"FMOVS\t[$]f32\\.00400000\\(SB\\)"
+	return 0x1p-127
+}
+
+// A float64 constant which can be exactly represented as a
+// denormal float32 value. On ppc64x, denormal values cannot
+// be used with XXSPLTIDP.
+func Float64DenormalFloat32Constant() float64 {
+	// ppc64x:"FMOVD\t[$]f64\\.3800000000000000\\(SB\\)"
+	return 0x1p-127
+}
--- a/test/codegen/fuse.go
+++ b/test/codegen/fuse.go
@@ -0,0 +1,197 @@
+// asmcheck
+
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// Notes:
+// - these examples use channels to provide a source of
+//   unknown values that cannot be optimized away
+// - these examples use for loops to force branches
+//   backward (predicted taken)
+
+// ---------------------------------- //
+// signed integer range (conjunction) //
+// ---------------------------------- //
+
+func si1c(c <-chan int64) {
+	// amd64:"CMPQ\t.+, [$]256"
+	// s390x:"CLGIJ\t[$]12, R[0-9]+, [$]255"
+	for x := <-c; x >= 0 && x < 256; x = <-c {
+	}
+}
+
+func si2c(c <-chan int32) {
+	// amd64:"CMPL\t.+, [$]256"
+	// s390x:"CLIJ\t[$]12, R[0-9]+, [$]255"
+	for x := <-c; x >= 0 && x < 256; x = <-c {
+	}
+}
+
+func si3c(c <-chan int16) {
+	// amd64:"CMPW\t.+, [$]256"
+	// s390x:"CLIJ\t[$]12, R[0-9]+, [$]255"
+	for x := <-c; x >= 0 && x < 256; x = <-c {
+	}
+}
+
+func si4c(c <-chan int8) {
+	// amd64:"CMPB\t.+, [$]10"
+	// s390x:"CLIJ\t[$]4, R[0-9]+, [$]10"
+	for x := <-c; x >= 0 && x < 10; x = <-c {
+	}
+}
+
+func si5c(c <-chan int64) {
+	// amd64:"CMPQ\t.+, [$]251","ADDQ\t[$]-5,"
+	// s390x:"CLGIJ\t[$]4, R[0-9]+, [$]251","ADD\t[$]-5,"
+	for x := <-c; x < 256 && x > 4; x = <-c {
+	}
+}
+
+func si6c(c <-chan int32) {
+	// amd64:"CMPL\t.+, [$]255","DECL\t"
+	// s390x:"CLIJ\t[$]12, R[0-9]+, [$]255","ADDW\t[$]-1,"
+	for x := <-c; x > 0 && x <= 256; x = <-c {
+	}
+}
+
+func si7c(c <-chan int16) {
+	// amd64:"CMPW\t.+, [$]60","ADDL\t[$]10,"
+	// s390x:"CLIJ\t[$]12, R[0-9]+, [$]60","ADDW\t[$]10,"
+	for x := <-c; x >= -10 && x <= 50; x = <-c {
+	}
+}
+
+func si8c(c <-chan int8) {
+	// amd64:"CMPB\t.+, [$]126","ADDL\t[$]126,"
+	// s390x:"CLIJ\t[$]4, R[0-9]+, [$]126","ADDW\t[$]126,"
+	for x := <-c; x >= -126 && x < 0; x = <-c {
+	}
+}
+
+// ---------------------------------- //
+// signed integer range (disjunction) //
+// ---------------------------------- //
+
+func si1d(c <-chan int64) {
+	// amd64:"CMPQ\t.+, [$]256"
+	// s390x:"CLGIJ\t[$]2, R[0-9]+, [$]255"
+	for x := <-c; x < 0 || x >= 256; x = <-c {
+	}
+}
+
+func si2d(c <-chan int32) {
+	// amd64:"CMPL\t.+, [$]256"
+	// s390x:"CLIJ\t[$]2, R[0-9]+, [$]255"
+	for x := <-c; x < 0 || x >= 256; x = <-c {
+	}
+}
+
+func si3d(c <-chan int16) {
+	// amd64:"CMPW\t.+, [$]256"
+	// s390x:"CLIJ\t[$]2, R[0-9]+, [$]255"
+	for x := <-c; x < 0 || x >= 256; x = <-c {
+	}
+}
+
+func si4d(c <-chan int8) {
+	// amd64:"CMPB\t.+, [$]10"
+	// s390x:"CLIJ\t[$]10, R[0-9]+, [$]10"
+	for x := <-c; x < 0 || x >= 10; x = <-c {
+	}
+}
+
+func si5d(c <-chan int64) {
+	// amd64:"CMPQ\t.+, [$]251","ADDQ\t[$]-5,"
+	// s390x:"CLGIJ\t[$]10, R[0-9]+, [$]251","ADD\t[$]-5,"
+	for x := <-c; x >= 256 || x <= 4; x = <-c {
+	}
+}
+
+func si6d(c <-chan int32) {
+	// amd64:"CMPL\t.+, [$]255","DECL\t"
+	// s390x:"CLIJ\t[$]2, R[0-9]+, [$]255","ADDW\t[$]-1,"
+	for x := <-c; x <= 0 || x > 256; x = <-c {
+	}
+}
+
+func si7d(c <-chan int16) {
+	// amd64:"CMPW\t.+, [$]60","ADDL\t[$]10,"
+	// s390x:"CLIJ\t[$]2, R[0-9]+, [$]60","ADDW\t[$]10,"
+	for x := <-c; x < -10 || x > 50; x = <-c {
+	}
+}
+
+func si8d(c <-chan int8) {
+	// amd64:"CMPB\t.+, [$]126","ADDL\t[$]126,"
+	// s390x:"CLIJ\t[$]10, R[0-9]+, [$]126","ADDW\t[$]126,"
+	for x := <-c; x < -126 || x >= 0; x = <-c {
+	}
+}
+
+// ------------------------------------ //
+// unsigned integer range (conjunction) //
+// ------------------------------------ //
+
+func ui1c(c <-chan uint64) {
+	// amd64:"CMPQ\t.+, [$]251","ADDQ\t[$]-5,"
+	// s390x:"CLGIJ\t[$]4, R[0-9]+, [$]251","ADD\t[$]-5,"
+	for x := <-c; x < 256 && x > 4; x = <-c {
+	}
+}
+
+func ui2c(c <-chan uint32) {
+	// amd64:"CMPL\t.+, [$]255","DECL\t"
+	// s390x:"CLIJ\t[$]12, R[0-9]+, [$]255","ADDW\t[$]-1,"
+	for x := <-c; x > 0 && x <= 256; x = <-c {
+	}
+}
+
+func ui3c(c <-chan uint16) {
+	// amd64:"CMPW\t.+, [$]40","ADDL\t[$]-10,"
+	// s390x:"CLIJ\t[$]12, R[0-9]+, [$]40","ADDW\t[$]-10,"
+	for x := <-c; x >= 10 && x <= 50; x = <-c {
+	}
+}
+
+func ui4c(c <-chan uint8) {
+	// amd64:"CMPB\t.+, [$]2","ADDL\t[$]-126,"
+	// s390x:"CLIJ\t[$]4, R[0-9]+, [$]2","ADDW\t[$]-126,"
+	for x := <-c; x >= 126 && x < 128; x = <-c {
+	}
+}
+
+// ------------------------------------ //
+// unsigned integer range (disjunction) //
+// ------------------------------------ //
+
+func ui1d(c <-chan uint64) {
+	// amd64:"CMPQ\t.+, [$]251","ADDQ\t[$]-5,"
+	// s390x:"CLGIJ\t[$]10, R[0-9]+, [$]251","ADD\t[$]-5,"
+	for x := <-c; x >= 256 || x <= 4; x = <-c {
+	}
+}
+
+func ui2d(c <-chan uint32) {
+	// amd64:"CMPL\t.+, [$]254","ADDL\t[$]-2,"
+	// s390x:"CLIJ\t[$]2, R[0-9]+, [$]254","ADDW\t[$]-2,"
+	for x := <-c; x <= 1 || x > 256; x = <-c {
+	}
+}
+
+func ui3d(c <-chan uint16) {
+	// amd64:"CMPW\t.+, [$]40","ADDL\t[$]-10,"
+	// s390x:"CLIJ\t[$]2, R[0-9]+, [$]40","ADDW\t[$]-10,"
+	for x := <-c; x < 10 || x > 50; x = <-c {
+	}
+}
+
+func ui4d(c <-chan uint8) {
+	// amd64:"CMPB\t.+, [$]2","ADDL\t[$]-126,"
+	// s390x:"CLIJ\t[$]10, R[0-9]+, [$]2","ADDW\t[$]-126,"
+	for x := <-c; x < 126 || x >= 128; x = <-c {
+	}
+}
--- a/test/codegen/ifaces.go
+++ b/test/codegen/ifaces.go
@@ -0,0 +1,27 @@
+// asmcheck
+
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+type I interface{ M() }
+
+func NopConvertIface(x I) I {
+	// amd64:-`.*runtime.convI2I`
+	return I(x)
+}
+
+func NopConvertGeneric[T any](x T) T {
+	// amd64:-`.*runtime.convI2I`
+	return T(x)
+}
+
+var NopConvertGenericIface = NopConvertGeneric[I]
+
+func ConvToM(x any) I {
+	// amd64:`CALL\truntime.typeAssert`,`MOVL\t16\(.*\)`,`MOVQ\t8\(.*\)(.*\*1)`
+	// arm64:`CALL\truntime.typeAssert`,`LDAR`,`MOVWU`,`MOVD\t\(R.*\)\(R.*\)`
+	return x.(I)
+}
--- a/test/codegen/issue22703.go
+++ b/test/codegen/issue22703.go
@@ -0,0 +1,535 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+type I interface {
+	foo000()
+	foo001()
+	foo002()
+	foo003()
+	foo004()
+	foo005()
+	foo006()
+	foo007()
+	foo008()
+	foo009()
+	foo010()
+	foo011()
+	foo012()
+	foo013()
+	foo014()
+	foo015()
+	foo016()
+	foo017()
+	foo018()
+	foo019()
+	foo020()
+	foo021()
+	foo022()
+	foo023()
+	foo024()
+	foo025()
+	foo026()
+	foo027()
+	foo028()
+	foo029()
+	foo030()
+	foo031()
+	foo032()
+	foo033()
+	foo034()
+	foo035()
+	foo036()
+	foo037()
+	foo038()
+	foo039()
+	foo040()
+	foo041()
+	foo042()
+	foo043()
+	foo044()
+	foo045()
+	foo046()
+	foo047()
+	foo048()
+	foo049()
+	foo050()
+	foo051()
+	foo052()
+	foo053()
+	foo054()
+	foo055()
+	foo056()
+	foo057()
+	foo058()
+	foo059()
+	foo060()
+	foo061()
+	foo062()
+	foo063()
+	foo064()
+	foo065()
+	foo066()
+	foo067()
+	foo068()
+	foo069()
+	foo070()
+	foo071()
+	foo072()
+	foo073()
+	foo074()
+	foo075()
+	foo076()
+	foo077()
+	foo078()
+	foo079()
+	foo080()
+	foo081()
+	foo082()
+	foo083()
+	foo084()
+	foo085()
+	foo086()
+	foo087()
+	foo088()
+	foo089()
+	foo090()
+	foo091()
+	foo092()
+	foo093()
+	foo094()
+	foo095()
+	foo096()
+	foo097()
+	foo098()
+	foo099()
+	foo100()
+	foo101()
+	foo102()
+	foo103()
+	foo104()
+	foo105()
+	foo106()
+	foo107()
+	foo108()
+	foo109()
+	foo110()
+	foo111()
+	foo112()
+	foo113()
+	foo114()
+	foo115()
+	foo116()
+	foo117()
+	foo118()
+	foo119()
+	foo120()
+	foo121()
+	foo122()
+	foo123()
+	foo124()
+	foo125()
+	foo126()
+	foo127()
+	foo128()
+	foo129()
+	foo130()
+	foo131()
+	foo132()
+	foo133()
+	foo134()
+	foo135()
+	foo136()
+	foo137()
+	foo138()
+	foo139()
+	foo140()
+	foo141()
+	foo142()
+	foo143()
+	foo144()
+	foo145()
+	foo146()
+	foo147()
+	foo148()
+	foo149()
+	foo150()
+	foo151()
+	foo152()
+	foo153()
+	foo154()
+	foo155()
+	foo156()
+	foo157()
+	foo158()
+	foo159()
+	foo160()
+	foo161()
+	foo162()
+	foo163()
+	foo164()
+	foo165()
+	foo166()
+	foo167()
+	foo168()
+	foo169()
+	foo170()
+	foo171()
+	foo172()
+	foo173()
+	foo174()
+	foo175()
+	foo176()
+	foo177()
+	foo178()
+	foo179()
+	foo180()
+	foo181()
+	foo182()
+	foo183()
+	foo184()
+	foo185()
+	foo186()
+	foo187()
+	foo188()
+	foo189()
+	foo190()
+	foo191()
+	foo192()
+	foo193()
+	foo194()
+	foo195()
+	foo196()
+	foo197()
+	foo198()
+	foo199()
+	foo200()
+	foo201()
+	foo202()
+	foo203()
+	foo204()
+	foo205()
+	foo206()
+	foo207()
+	foo208()
+	foo209()
+	foo210()
+	foo211()
+	foo212()
+	foo213()
+	foo214()
+	foo215()
+	foo216()
+	foo217()
+	foo218()
+	foo219()
+	foo220()
+	foo221()
+	foo222()
+	foo223()
+	foo224()
+	foo225()
+	foo226()
+	foo227()
+	foo228()
+	foo229()
+	foo230()
+	foo231()
+	foo232()
+	foo233()
+	foo234()
+	foo235()
+	foo236()
+	foo237()
+	foo238()
+	foo239()
+	foo240()
+	foo241()
+	foo242()
+	foo243()
+	foo244()
+	foo245()
+	foo246()
+	foo247()
+	foo248()
+	foo249()
+	foo250()
+	foo251()
+	foo252()
+	foo253()
+	foo254()
+	foo255()
+	foo256()
+	foo257()
+	foo258()
+	foo259()
+	foo260()
+	foo261()
+	foo262()
+	foo263()
+	foo264()
+	foo265()
+	foo266()
+	foo267()
+	foo268()
+	foo269()
+	foo270()
+	foo271()
+	foo272()
+	foo273()
+	foo274()
+	foo275()
+	foo276()
+	foo277()
+	foo278()
+	foo279()
+	foo280()
+	foo281()
+	foo282()
+	foo283()
+	foo284()
+	foo285()
+	foo286()
+	foo287()
+	foo288()
+	foo289()
+	foo290()
+	foo291()
+	foo292()
+	foo293()
+	foo294()
+	foo295()
+	foo296()
+	foo297()
+	foo298()
+	foo299()
+	foo300()
+	foo301()
+	foo302()
+	foo303()
+	foo304()
+	foo305()
+	foo306()
+	foo307()
+	foo308()
+	foo309()
+	foo310()
+	foo311()
+	foo312()
+	foo313()
+	foo314()
+	foo315()
+	foo316()
+	foo317()
+	foo318()
+	foo319()
+	foo320()
+	foo321()
+	foo322()
+	foo323()
+	foo324()
+	foo325()
+	foo326()
+	foo327()
+	foo328()
+	foo329()
+	foo330()
+	foo331()
+	foo332()
+	foo333()
+	foo334()
+	foo335()
+	foo336()
+	foo337()
+	foo338()
+	foo339()
+	foo340()
+	foo341()
+	foo342()
+	foo343()
+	foo344()
+	foo345()
+	foo346()
+	foo347()
+	foo348()
+	foo349()
+	foo350()
+	foo351()
+	foo352()
+	foo353()
+	foo354()
+	foo355()
+	foo356()
+	foo357()
+	foo358()
+	foo359()
+	foo360()
+	foo361()
+	foo362()
+	foo363()
+	foo364()
+	foo365()
+	foo366()
+	foo367()
+	foo368()
+	foo369()
+	foo370()
+	foo371()
+	foo372()
+	foo373()
+	foo374()
+	foo375()
+	foo376()
+	foo377()
+	foo378()
+	foo379()
+	foo380()
+	foo381()
+	foo382()
+	foo383()
+	foo384()
+	foo385()
+	foo386()
+	foo387()
+	foo388()
+	foo389()
+	foo390()
+	foo391()
+	foo392()
+	foo393()
+	foo394()
+	foo395()
+	foo396()
+	foo397()
+	foo398()
+	foo399()
+	foo400()
+	foo401()
+	foo402()
+	foo403()
+	foo404()
+	foo405()
+	foo406()
+	foo407()
+	foo408()
+	foo409()
+	foo410()
+	foo411()
+	foo412()
+	foo413()
+	foo414()
+	foo415()
+	foo416()
+	foo417()
+	foo418()
+	foo419()
+	foo420()
+	foo421()
+	foo422()
+	foo423()
+	foo424()
+	foo425()
+	foo426()
+	foo427()
+	foo428()
+	foo429()
+	foo430()
+	foo431()
+	foo432()
+	foo433()
+	foo434()
+	foo435()
+	foo436()
+	foo437()
+	foo438()
+	foo439()
+	foo440()
+	foo441()
+	foo442()
+	foo443()
+	foo444()
+	foo445()
+	foo446()
+	foo447()
+	foo448()
+	foo449()
+	foo450()
+	foo451()
+	foo452()
+	foo453()
+	foo454()
+	foo455()
+	foo456()
+	foo457()
+	foo458()
+	foo459()
+	foo460()
+	foo461()
+	foo462()
+	foo463()
+	foo464()
+	foo465()
+	foo466()
+	foo467()
+	foo468()
+	foo469()
+	foo470()
+	foo471()
+	foo472()
+	foo473()
+	foo474()
+	foo475()
+	foo476()
+	foo477()
+	foo478()
+	foo479()
+	foo480()
+	foo481()
+	foo482()
+	foo483()
+	foo484()
+	foo485()
+	foo486()
+	foo487()
+	foo488()
+	foo489()
+	foo490()
+	foo491()
+	foo492()
+	foo493()
+	foo494()
+	foo495()
+	foo496()
+	foo497()
+	foo498()
+	foo499()
+	foo500()
+	foo501()
+	foo502()
+	foo503()
+	foo504()
+	foo505()
+	foo506()
+	foo507()
+	foo508()
+	foo509()
+	foo510()
+	foo511()
+}
+
+// Nil checks before calling interface methods.
+// We need it only when the offset is large.
+
+func callMethodSmallOffset(i I) {
+	// amd64:-"TESTB"
+	i.foo001()
+}
+
+func callMethodLargeOffset(i I) {
+	// amd64:"TESTB"
+	i.foo511()
+}
--- a/test/codegen/issue25378.go
+++ b/test/codegen/issue25378.go
@@ -0,0 +1,22 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+var wsp = [256]bool{
+	' ':  true,
+	'\t': true,
+	'\n': true,
+	'\r': true,
+}
+
+func zeroExtArgByte(ch [2]byte) bool {
+	return wsp[ch[0]] // amd64:-"MOVBLZX\t..,.."
+}
+
+func zeroExtArgUint16(ch [2]uint16) bool {
+	return wsp[ch[0]] // amd64:-"MOVWLZX\t..,.."
+}
--- a/test/codegen/issue31618.go
+++ b/test/codegen/issue31618.go
@@ -0,0 +1,22 @@
+// asmcheck
+
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// Make sure we remove both inline marks in the following code.
+// Both +5 and +6 should map to real instructions, which can
+// be used as inline marks instead of explicit nops.
+func f(x int) int {
+	// amd64:-"XCHGL"
+	x = g(x) + 5
+	// amd64:-"XCHGL"
+	x = g(x) + 6
+	return x
+}
+
+func g(x int) int {
+	return x >> 3
+}
--- a/test/codegen/issue33580.go
+++ b/test/codegen/issue33580.go
@@ -0,0 +1,25 @@
+// asmcheck
+
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Make sure we reuse large constant loads, if we can.
+// See issue 33580.
+
+package codegen
+
+const (
+	A = 7777777777777777
+	B = 8888888888888888
+)
+
+func f(x, y uint64) uint64 {
+	p := x & A
+	q := y & A
+	r := x & B
+	// amd64:-"MOVQ.*8888888888888888"
+	s := y & B
+
+	return p * q * r * s
+}
--- a/test/codegen/issue38554.go
+++ b/test/codegen/issue38554.go
@@ -0,0 +1,15 @@
+// asmcheck
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test that we are zeroing directly instead of
+// copying a large zero value. Issue 38554.
+
+package codegen
+
+func retlarge() [256]byte {
+	// amd64:-"DUFFCOPY"
+	return [256]byte{}
+}
--- a/test/codegen/issue42610.go
+++ b/test/codegen/issue42610.go
@@ -0,0 +1,28 @@
+// asmcheck
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Don't allow 0 masks in shift lowering rules on ppc64x.
+// See issue 42610.
+
+package codegen
+
+func f32(a []int32, i uint32) {
+        g := func(p int32) int32 {
+                i = uint32(p) * (uint32(p) & (i & 1))
+                return 1
+        }
+        // ppc64x: -"RLWNIM"
+        a[0] = g(8) >> 1
+}
+
+func f(a []int, i uint) {
+	g := func(p int) int {
+		i = uint(p) * (uint(p) & (i & 1))
+		return 1
+	}
+	// ppc64x: -"RLDIC"
+	a[0] = g(8) >> 1
+}
--- a/test/codegen/issue48054.go
+++ b/test/codegen/issue48054.go
@@ -0,0 +1,31 @@
+// asmcheck
+
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func a(n string) bool {
+	// arm64:"CBZ"
+	if len(n) > 0 {
+		return true
+	}
+	return false
+}
+
+func a2(n []int) bool {
+	// arm64:"CBZ"
+	if len(n) > 0 {
+		return true
+	}
+	return false
+}
+
+func a3(n []int) bool {
+	// amd64:"TESTQ"
+	if len(n) < 1 {
+		return true
+	}
+	return false
+}
--- a/test/codegen/issue52635.go
+++ b/test/codegen/issue52635.go
@@ -0,0 +1,41 @@
+// asmcheck
+
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test that optimized range memclr works with pointers to arrays.
+// The clears get inlined, see https://github.com/golang/go/issues/56997
+
+package codegen
+
+type T struct {
+	a *[10]int
+	b [10]int
+}
+
+func (t *T) f() {
+	// amd64:-".*runtime.memclrNoHeapPointers"
+	// amd64:"DUFFZERO"
+	for i := range t.a {
+		t.a[i] = 0
+	}
+
+	// amd64:-".*runtime.memclrNoHeapPointers"
+	// amd64:"DUFFZERO"
+	for i := range *t.a {
+		t.a[i] = 0
+	}
+
+	// amd64:-".*runtime.memclrNoHeapPointers"
+	// amd64:"DUFFZERO"
+	for i := range t.a {
+		(*t.a)[i] = 0
+	}
+
+	// amd64:-".*runtime.memclrNoHeapPointers"
+	// amd64:"DUFFZERO"
+	for i := range *t.a {
+		(*t.a)[i] = 0
+	}
+}
--- a/test/codegen/issue54467.go
+++ b/test/codegen/issue54467.go
@@ -0,0 +1,59 @@
+// asmcheck
+
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func f1(x *[4]int, y *[4]int) {
+	// amd64:".*memmove"
+	*x = *y
+}
+func f2(x *[4]int, y [4]int) {
+	// amd64:-".*memmove"
+	*x = y
+}
+func f3(x *[4]int, y *[4]int) {
+	// amd64:-".*memmove"
+	t := *y
+	// amd64:-".*memmove"
+	*x = t
+}
+func f4(x *[4]int, y [4]int) {
+	// amd64:-".*memmove"
+	t := y
+	// amd64:-".*memmove"
+	*x = t
+}
+
+type T struct {
+	a [4]int
+}
+
+func f5(x, y *T) {
+	// amd64:-".*memmove"
+	x.a = y.a
+}
+func f6(x *T, y T) {
+	// amd64:-".*memmove"
+	x.a = y.a
+}
+func f7(x *T, y *[4]int) {
+	// amd64:-".*memmove"
+	x.a = *y
+}
+func f8(x *[4]int, y *T) {
+	// amd64:-".*memmove"
+	*x = y.a
+}
+
+func f9(x [][4]int, y [][4]int, i, j int) {
+	// amd64:-".*memmove"
+	x[i] = y[j]
+}
+
+func f10() []byte {
+	// amd64:-".*memmove"
+	return []byte("aReasonablyBigTestString")
+}
--- a/test/codegen/issue56440.go
+++ b/test/codegen/issue56440.go
@@ -0,0 +1,34 @@
+// asmcheck
+
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Check to make sure that we recognize when the length of an append
+// is constant. We check this by making sure that the constant length
+// is folded into a load offset.
+
+package p
+
+func f(x []int) int {
+	s := make([]int, 3)
+	s = append(s, 4, 5)
+	// amd64:`MOVQ\t40\(.*\),`
+	return x[len(s)]
+}
+
+func g(x []int, p *bool) int {
+	s := make([]int, 3)
+	for {
+		s = s[:3]
+		if cap(s) < 5 {
+			s = make([]int, 3, 5)
+		}
+		s = append(s, 4, 5)
+		if *p {
+			// amd64:`MOVQ\t40\(.*\),`
+			return x[len(s)]
+		}
+	}
+	return 0
+}
--- a/test/codegen/issue58166.go
+++ b/test/codegen/issue58166.go
@@ -0,0 +1,23 @@
+// asmcheck
+
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package p
+
+func dgemmSerialNotNot(m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) {
+	for i := 0; i < m; i++ {
+		ctmp := c[i*ldc : i*ldc+n]
+		for l, v := range a[i*lda : i*lda+k] {
+			tmp := alpha * v
+			if tmp != 0 {
+				x := b[l*ldb : l*ldb+n]
+				// amd64:"INCQ"
+				for i, v := range x {
+					ctmp[i] += tmp * v
+				}
+			}
+		}
+	}
+}
--- a/test/codegen/issue60324.go
+++ b/test/codegen/issue60324.go
@@ -0,0 +1,36 @@
+// asmcheck
+
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func main() {
+	// amd64:"LEAQ\tcommand-line-arguments\\.main\\.f\\.g\\.h\\.func3"
+	f(1)()
+
+	// amd64:"LEAQ\tcommand-line-arguments\\.main\\.g\\.h\\.func2"
+	g(2)()
+
+	// amd64:"LEAQ\tcommand-line-arguments\\.main\\.h\\.func1"
+	h(3)()
+
+	// amd64:"LEAQ\tcommand-line-arguments\\.main\\.f\\.g\\.h\\.func4"
+	f(4)()
+}
+
+func f(x int) func() {
+	// amd64:"LEAQ\tcommand-line-arguments\\.f\\.g\\.h\\.func1"
+	return g(x)
+}
+
+func g(x int) func() {
+	// amd64:"LEAQ\tcommand-line-arguments\\.g\\.h\\.func1"
+	return h(x)
+}
+
+func h(x int) func() {
+	// amd64:"LEAQ\tcommand-line-arguments\\.h\\.func1"
+	return func() { recover() }
+}
--- a/test/codegen/issue60673.go
+++ b/test/codegen/issue60673.go
@@ -0,0 +1,18 @@
+// asmcheck
+
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+//go:noinline
+func f(x int32) {
+}
+
+func g(p *int32) {
+	// argument marshaling code should live at line 17, not line 15.
+	x := *p
+	// 386: `MOVL\s[A-Z]+,\s\(SP\)`
+	f(x)
+}
--- a/test/codegen/issue61356.go
+++ b/test/codegen/issue61356.go
@@ -0,0 +1,55 @@
+// asmcheck
+
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Make sure this code doesn't generate spill/restore.
+
+package codegen
+
+func pack20(in *[20]uint64) uint64 {
+	var out uint64
+	out |= 4
+	// amd64:-`.*SP.*`
+	out |= in[0] << 4
+	// amd64:-`.*SP.*`
+	out |= in[1] << 7
+	// amd64:-`.*SP.*`
+	out |= in[2] << 10
+	// amd64:-`.*SP.*`
+	out |= in[3] << 13
+	// amd64:-`.*SP.*`
+	out |= in[4] << 16
+	// amd64:-`.*SP.*`
+	out |= in[5] << 19
+	// amd64:-`.*SP.*`
+	out |= in[6] << 22
+	// amd64:-`.*SP.*`
+	out |= in[7] << 25
+	// amd64:-`.*SP.*`
+	out |= in[8] << 28
+	// amd64:-`.*SP.*`
+	out |= in[9] << 31
+	// amd64:-`.*SP.*`
+	out |= in[10] << 34
+	// amd64:-`.*SP.*`
+	out |= in[11] << 37
+	// amd64:-`.*SP.*`
+	out |= in[12] << 40
+	// amd64:-`.*SP.*`
+	out |= in[13] << 43
+	// amd64:-`.*SP.*`
+	out |= in[14] << 46
+	// amd64:-`.*SP.*`
+	out |= in[15] << 49
+	// amd64:-`.*SP.*`
+	out |= in[16] << 52
+	// amd64:-`.*SP.*`
+	out |= in[17] << 55
+	// amd64:-`.*SP.*`
+	out |= in[18] << 58
+	// amd64:-`.*SP.*`
+	out |= in[19] << 61
+	return out
+}
--- a/test/codegen/issue63332.go
+++ b/test/codegen/issue63332.go
@@ -0,0 +1,14 @@
+// asmcheck
+
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func issue63332(c chan int) {
+	x := 0
+	// amd64:-`MOVQ`
+	x += 2
+	c <- x
+}
--- a/test/codegen/issue66585.go
+++ b/test/codegen/issue66585.go
@@ -0,0 +1,24 @@
+// asmcheck
+
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package p
+
+var x = func() int {
+	n := 0
+	f(&n)
+	return n
+}()
+
+func f(p *int) {
+	*p = 1
+}
+
+var y = 1
+
+// z can be static initialized.
+//
+// amd64:-"MOVQ"
+var z = y
--- a/test/codegen/logic.go
+++ b/test/codegen/logic.go
@@ -0,0 +1,41 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// Test to make sure that (CMPQ (ANDQ x y) [0]) does not get rewritten to
+// (TESTQ x y) if the ANDQ has other uses. If that rewrite happens, then one
+// of the args of the ANDQ needs to be saved so it can be used as the arg to TESTQ.
+func andWithUse(x, y int) int {
+	z := x & y
+	// amd64:`TESTQ\s(AX, AX|BX, BX|CX, CX|DX, DX|SI, SI|DI, DI|R8, R8|R9, R9|R10, R10|R11, R11|R12, R12|R13, R13|R15, R15)`
+	if z == 0 {
+		return 77
+	}
+	// use z by returning it
+	return z
+}
+
+// Verify (OR x (NOT y)) rewrites to (ORN x y) where supported
+func ornot(x, y int) int {
+	// ppc64x:"ORN"
+	z := x | ^y
+	return z
+}
+
+// Verify that (OR (NOT x) (NOT y)) rewrites to (NOT (AND x y))
+func orDemorgans(x, y int) int {
+	// amd64:"AND",-"OR"
+	z := ^x | ^y
+	return z
+}
+
+// Verify that (AND (NOT x) (NOT y)) rewrites to (NOT (OR x y))
+func andDemorgans(x, y int) int {
+	// amd64:"OR",-"AND"
+	z := ^x & ^y
+	return z
+}
--- a/test/codegen/mapaccess.go
+++ b/test/codegen/mapaccess.go
@@ -0,0 +1,484 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// These tests check that mapaccess calls are not used.
+// Issues #23661 and #24364.
+
+func mapCompoundAssignmentInt8() {
+	m := make(map[int8]int8, 0)
+	var k int8 = 0
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] += 67
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] -= 123
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] *= 45
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] |= 78
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] ^= 89
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] <<= 9
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] >>= 10
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k]++
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k]--
+}
+
+func mapCompoundAssignmentInt32() {
+	m := make(map[int32]int32, 0)
+	var k int32 = 0
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] += 67890
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] -= 123
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] *= 456
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] |= 78
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] ^= 89
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] <<= 9
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] >>= 10
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k]++
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k]--
+}
+
+func mapCompoundAssignmentInt64() {
+	m := make(map[int64]int64, 0)
+	var k int64 = 0
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] += 67890
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] -= 123
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] *= 456
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] |= 78
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] ^= 89
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] <<= 9
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] >>= 10
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k]++
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k]--
+}
+
+func mapCompoundAssignmentComplex128() {
+	m := make(map[complex128]complex128, 0)
+	var k complex128 = 0
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] += 67890
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] -= 123
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] *= 456
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k]++
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k]--
+}
+
+func mapCompoundAssignmentString() {
+	m := make(map[string]string, 0)
+	var k string = "key"
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] += "value"
+}
+
+var sinkAppend bool
+
+func mapAppendAssignmentInt8() {
+	m := make(map[int8][]int8, 0)
+	var k int8 = 0
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] = append(m[k], 1)
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] = append(m[k], 1, 2, 3)
+
+	a := []int8{7, 8, 9, 0}
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] = append(m[k], a...)
+
+	// Exceptions
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(a, m[k]...)
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	sinkAppend, m[k] = !sinkAppend, append(m[k], 99)
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(m[k+1], 100)
+}
+
+func mapAppendAssignmentInt32() {
+	m := make(map[int32][]int32, 0)
+	var k int32 = 0
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] = append(m[k], 1)
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] = append(m[k], 1, 2, 3)
+
+	a := []int32{7, 8, 9, 0}
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] = append(m[k], a...)
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k+1] = append(m[k+1], a...)
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[-k] = append(m[-k], a...)
+
+	// Exceptions
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(a, m[k]...)
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	sinkAppend, m[k] = !sinkAppend, append(m[k], 99)
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(m[k+1], 100)
+}
+
+func mapAppendAssignmentInt64() {
+	m := make(map[int64][]int64, 0)
+	var k int64 = 0
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] = append(m[k], 1)
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] = append(m[k], 1, 2, 3)
+
+	a := []int64{7, 8, 9, 0}
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] = append(m[k], a...)
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k+1] = append(m[k+1], a...)
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[-k] = append(m[-k], a...)
+
+	// Exceptions
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(a, m[k]...)
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	sinkAppend, m[k] = !sinkAppend, append(m[k], 99)
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(m[k+1], 100)
+}
+
+func mapAppendAssignmentComplex128() {
+	m := make(map[complex128][]complex128, 0)
+	var k complex128 = 0
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] = append(m[k], 1)
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] = append(m[k], 1, 2, 3)
+
+	a := []complex128{7, 8, 9, 0}
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] = append(m[k], a...)
+
+	// Exceptions
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(a, m[k]...)
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	sinkAppend, m[k] = !sinkAppend, append(m[k], 99)
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(m[k+1], 100)
+}
+
+func mapAppendAssignmentString() {
+	m := make(map[string][]string, 0)
+	var k string = "key"
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] = append(m[k], "1")
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] = append(m[k], "1", "2", "3")
+
+	a := []string{"7", "8", "9", "0"}
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] = append(m[k], a...)
+
+	// Exceptions
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(a, m[k]...)
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	sinkAppend, m[k] = !sinkAppend, append(m[k], "99")
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(m[k+"1"], "100")
+}
--- a/test/codegen/maps.go
+++ b/test/codegen/maps.go
@@ -0,0 +1,201 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// This file contains code generation tests related to the handling of
+// map types.
+
+// ------------------- //
+//     Access Const    //
+// ------------------- //
+
+// Direct use of constants in fast map access calls (Issue #19015).
+
+func AccessInt1(m map[int]int) int {
+	// amd64:"MOV[LQ]\t[$]5"
+	return m[5]
+}
+
+func AccessInt2(m map[int]int) bool {
+	// amd64:"MOV[LQ]\t[$]5"
+	_, ok := m[5]
+	return ok
+}
+
+func AccessString1(m map[string]int) int {
+	// amd64:`.*"abc"`
+	return m["abc"]
+}
+
+func AccessString2(m map[string]int) bool {
+	// amd64:`.*"abc"`
+	_, ok := m["abc"]
+	return ok
+}
+
+// ------------------- //
+//  String Conversion  //
+// ------------------- //
+
+func LookupStringConversionSimple(m map[string]int, bytes []byte) int {
+	// amd64:-`.*runtime\.slicebytetostring\(`
+	return m[string(bytes)]
+}
+
+func LookupStringConversionStructLit(m map[struct{ string }]int, bytes []byte) int {
+	// amd64:-`.*runtime\.slicebytetostring\(`
+	return m[struct{ string }{string(bytes)}]
+}
+
+func LookupStringConversionArrayLit(m map[[2]string]int, bytes []byte) int {
+	// amd64:-`.*runtime\.slicebytetostring\(`
+	return m[[2]string{string(bytes), string(bytes)}]
+}
+
+func LookupStringConversionNestedLit(m map[[1]struct{ s [1]string }]int, bytes []byte) int {
+	// amd64:-`.*runtime\.slicebytetostring\(`
+	return m[[1]struct{ s [1]string }{struct{ s [1]string }{s: [1]string{string(bytes)}}}]
+}
+
+func LookupStringConversionKeyedArrayLit(m map[[2]string]int, bytes []byte) int {
+	// amd64:-`.*runtime\.slicebytetostring\(`
+	return m[[2]string{0: string(bytes)}]
+}
+
+// ------------------- //
+//     Map Clear       //
+// ------------------- //
+
+// Optimization of map clear idiom (Issue #20138).
+
+func MapClearReflexive(m map[int]int) {
+	// amd64:`.*runtime\.mapclear`
+	// amd64:-`.*runtime\.mapiterinit`
+	for k := range m {
+		delete(m, k)
+	}
+}
+
+func MapClearIndirect(m map[int]int) {
+	s := struct{ m map[int]int }{m: m}
+	// amd64:`.*runtime\.mapclear`
+	// amd64:-`.*runtime\.mapiterinit`
+	for k := range s.m {
+		delete(s.m, k)
+	}
+}
+
+func MapClearPointer(m map[*byte]int) {
+	// amd64:`.*runtime\.mapclear`
+	// amd64:-`.*runtime\.mapiterinit`
+	for k := range m {
+		delete(m, k)
+	}
+}
+
+func MapClearNotReflexive(m map[float64]int) {
+	// amd64:`.*runtime\.mapiterinit`
+	// amd64:-`.*runtime\.mapclear`
+	for k := range m {
+		delete(m, k)
+	}
+}
+
+func MapClearInterface(m map[interface{}]int) {
+	// amd64:`.*runtime\.mapiterinit`
+	// amd64:-`.*runtime\.mapclear`
+	for k := range m {
+		delete(m, k)
+	}
+}
+
+func MapClearSideEffect(m map[int]int) int {
+	k := 0
+	// amd64:`.*runtime\.mapiterinit`
+	// amd64:-`.*runtime\.mapclear`
+	for k = range m {
+		delete(m, k)
+	}
+	return k
+}
+
+func MapLiteralSizing(x int) (map[int]int, map[int]int) {
+	// This is tested for internal/abi/maps.go:MapBucketCountBits={3,4,5}
+	// amd64:"MOVL\t[$]33,"
+	m := map[int]int{
+		0:  0,
+		1:  1,
+		2:  2,
+		3:  3,
+		4:  4,
+		5:  5,
+		6:  6,
+		7:  7,
+		8:  8,
+		9:  9,
+		10: 10,
+		11: 11,
+		12: 12,
+		13: 13,
+		14: 14,
+		15: 15,
+		16: 16,
+		17: 17,
+		18: 18,
+		19: 19,
+		20: 20,
+		21: 21,
+		22: 22,
+		23: 23,
+		24: 24,
+		25: 25,
+		26: 26,
+		27: 27,
+		28: 28,
+		29: 29,
+		30: 30,
+		31: 32,
+		32: 32,
+	}
+	// amd64:"MOVL\t[$]33,"
+	n := map[int]int{
+		0:  0,
+		1:  1,
+		2:  2,
+		3:  3,
+		4:  4,
+		5:  5,
+		6:  6,
+		7:  7,
+		8:  8,
+		9:  9,
+		10: 10,
+		11: 11,
+		12: 12,
+		13: 13,
+		14: 14,
+		15: 15,
+		16: 16,
+		17: 17,
+		18: 18,
+		19: 19,
+		20: 20,
+		21: 21,
+		22: 22,
+		23: 23,
+		24: 24,
+		25: 25,
+		26: 26,
+		27: 27,
+		28: 28,
+		29: 29,
+		30: 30,
+		31: 32,
+		32: 32,
+	}
+	return m, n
+}
--- a/test/codegen/math.go
+++ b/test/codegen/math.go
@@ -0,0 +1,255 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import "math"
+
+var sink64 [8]float64
+
+func approx(x float64) {
+	// amd64/v2:-".*x86HasSSE41" amd64/v3:-".*x86HasSSE41"
+	// amd64:"ROUNDSD\t[$]2"
+	// s390x:"FIDBR\t[$]6"
+	// arm64:"FRINTPD"
+	// ppc64x:"FRIP"
+	// wasm:"F64Ceil"
+	sink64[0] = math.Ceil(x)
+
+	// amd64/v2:-".*x86HasSSE41" amd64/v3:-".*x86HasSSE41"
+	// amd64:"ROUNDSD\t[$]1"
+	// s390x:"FIDBR\t[$]7"
+	// arm64:"FRINTMD"
+	// ppc64x:"FRIM"
+	// wasm:"F64Floor"
+	sink64[1] = math.Floor(x)
+
+	// s390x:"FIDBR\t[$]1"
+	// arm64:"FRINTAD"
+	// ppc64x:"FRIN"
+	sink64[2] = math.Round(x)
+
+	// amd64/v2:-".*x86HasSSE41" amd64/v3:-".*x86HasSSE41"
+	// amd64:"ROUNDSD\t[$]3"
+	// s390x:"FIDBR\t[$]5"
+	// arm64:"FRINTZD"
+	// ppc64x:"FRIZ"
+	// wasm:"F64Trunc"
+	sink64[3] = math.Trunc(x)
+
+	// amd64/v2:-".*x86HasSSE41" amd64/v3:-".*x86HasSSE41"
+	// amd64:"ROUNDSD\t[$]0"
+	// s390x:"FIDBR\t[$]4"
+	// arm64:"FRINTND"
+	// wasm:"F64Nearest"
+	sink64[4] = math.RoundToEven(x)
+}
+
+func sqrt(x float64) float64 {
+	// amd64:"SQRTSD"
+	// 386/sse2:"SQRTSD" 386/softfloat:-"SQRTD"
+	// arm64:"FSQRTD"
+	// arm/7:"SQRTD"
+	// mips/hardfloat:"SQRTD" mips/softfloat:-"SQRTD"
+	// mips64/hardfloat:"SQRTD" mips64/softfloat:-"SQRTD"
+	// wasm:"F64Sqrt"
+	// ppc64x:"FSQRT"
+	// riscv64: "FSQRTD"
+	return math.Sqrt(x)
+}
+
+func sqrt32(x float32) float32 {
+	// amd64:"SQRTSS"
+	// 386/sse2:"SQRTSS" 386/softfloat:-"SQRTS"
+	// arm64:"FSQRTS"
+	// arm/7:"SQRTF"
+	// mips/hardfloat:"SQRTF" mips/softfloat:-"SQRTF"
+	// mips64/hardfloat:"SQRTF" mips64/softfloat:-"SQRTF"
+	// wasm:"F32Sqrt"
+	// ppc64x:"FSQRTS"
+	// riscv64: "FSQRTS"
+	return float32(math.Sqrt(float64(x)))
+}
+
+// Check that it's using integer registers
+func abs(x, y float64) {
+	// amd64:"BTRQ\t[$]63"
+	// arm64:"FABSD\t"
+	// s390x:"LPDFR\t",-"MOVD\t"     (no integer load/store)
+	// ppc64x:"FABS\t"
+	// riscv64:"FABSD\t"
+	// wasm:"F64Abs"
+	// arm/6:"ABSD\t"
+	// mips64/hardfloat:"ABSD\t"
+	// mips/hardfloat:"ABSD\t"
+	sink64[0] = math.Abs(x)
+
+	// amd64:"BTRQ\t[$]63","PXOR"    (TODO: this should be BTSQ)
+	// s390x:"LNDFR\t",-"MOVD\t"     (no integer load/store)
+	// ppc64x:"FNABS\t"
+	sink64[1] = -math.Abs(y)
+}
+
+// Check that it's using integer registers
+func abs32(x float32) float32 {
+	// s390x:"LPDFR",-"LDEBR",-"LEDBR"     (no float64 conversion)
+	return float32(math.Abs(float64(x)))
+}
+
+// Check that it's using integer registers
+func copysign(a, b, c float64) {
+	// amd64:"BTRQ\t[$]63","ANDQ","ORQ"
+	// s390x:"CPSDR",-"MOVD"         (no integer load/store)
+	// ppc64x:"FCPSGN"
+	// riscv64:"FSGNJD"
+	// wasm:"F64Copysign"
+	sink64[0] = math.Copysign(a, b)
+
+	// amd64:"BTSQ\t[$]63"
+	// s390x:"LNDFR\t",-"MOVD\t"     (no integer load/store)
+	// ppc64x:"FCPSGN"
+	// riscv64:"FSGNJD"
+	// arm64:"ORR", -"AND"
+	sink64[1] = math.Copysign(c, -1)
+
+	// Like math.Copysign(c, -1), but with integer operations. Useful
+	// for platforms that have a copysign opcode to see if it's detected.
+	// s390x:"LNDFR\t",-"MOVD\t"     (no integer load/store)
+	sink64[2] = math.Float64frombits(math.Float64bits(a) | 1<<63)
+
+	// amd64:"ANDQ","ORQ"
+	// s390x:"CPSDR\t",-"MOVD\t"     (no integer load/store)
+	// ppc64x:"FCPSGN"
+	// riscv64:"FSGNJD"
+	sink64[3] = math.Copysign(-1, c)
+}
+
+func fma(x, y, z float64) float64 {
+	// amd64/v3:-".*x86HasFMA"
+	// amd64:"VFMADD231SD"
+	// arm/6:"FMULAD"
+	// arm64:"FMADDD"
+	// s390x:"FMADD"
+	// ppc64x:"FMADD"
+	// riscv64:"FMADDD"
+	return math.FMA(x, y, z)
+}
+
+func fms(x, y, z float64) float64 {
+	// riscv64:"FMSUBD"
+	return math.FMA(x, y, -z)
+}
+
+func fnms(x, y, z float64) float64 {
+	// riscv64:"FNMSUBD",-"FNMADDD"
+	return math.FMA(-x, y, z)
+}
+
+func fnma(x, y, z float64) float64 {
+	// riscv64:"FNMADDD",-"FNMSUBD"
+	return math.FMA(x, -y, -z)
+}
+
+func fromFloat64(f64 float64) uint64 {
+	// amd64:"MOVQ\tX.*, [^X].*"
+	// arm64:"FMOVD\tF.*, R.*"
+	// ppc64x:"MFVSRD"
+	// mips64/hardfloat:"MOVV\tF.*, R.*"
+	return math.Float64bits(f64+1) + 1
+}
+
+func fromFloat32(f32 float32) uint32 {
+	// amd64:"MOVL\tX.*, [^X].*"
+	// arm64:"FMOVS\tF.*, R.*"
+	// mips64/hardfloat:"MOVW\tF.*, R.*"
+	return math.Float32bits(f32+1) + 1
+}
+
+func toFloat64(u64 uint64) float64 {
+	// amd64:"MOVQ\t[^X].*, X.*"
+	// arm64:"FMOVD\tR.*, F.*"
+	// ppc64x:"MTVSRD"
+	// mips64/hardfloat:"MOVV\tR.*, F.*"
+	return math.Float64frombits(u64+1) + 1
+}
+
+func toFloat32(u32 uint32) float32 {
+	// amd64:"MOVL\t[^X].*, X.*"
+	// arm64:"FMOVS\tR.*, F.*"
+	// mips64/hardfloat:"MOVW\tR.*, F.*"
+	return math.Float32frombits(u32+1) + 1
+}
+
+// Test that comparisons with constants converted to float
+// are evaluated at compile-time
+
+func constantCheck64() bool {
+	// amd64:"(MOVB\t[$]0)|(XORL\t[A-Z][A-Z0-9]+, [A-Z][A-Z0-9]+)",-"FCMP",-"MOVB\t[$]1"
+	// s390x:"MOV(B|BZ|D)\t[$]0,",-"FCMPU",-"MOV(B|BZ|D)\t[$]1,"
+	return 0.5 == float64(uint32(1)) || 1.5 > float64(uint64(1<<63))
+}
+
+func constantCheck32() bool {
+	// amd64:"MOV(B|L)\t[$]1",-"FCMP",-"MOV(B|L)\t[$]0"
+	// s390x:"MOV(B|BZ|D)\t[$]1,",-"FCMPU",-"MOV(B|BZ|D)\t[$]0,"
+	return float32(0.5) <= float32(int64(1)) && float32(1.5) >= float32(int32(-1<<31))
+}
+
+// Test that integer constants are converted to floating point constants
+// at compile-time
+
+func constantConvert32(x float32) float32 {
+	// amd64:"MOVSS\t[$]f32.3f800000\\(SB\\)"
+	// s390x:"FMOVS\t[$]f32.3f800000\\(SB\\)"
+	// ppc64x/power8:"FMOVS\t[$]f32.3f800000\\(SB\\)"
+	// ppc64x/power9:"FMOVS\t[$]f32.3f800000\\(SB\\)"
+	// ppc64x/power10:"XXSPLTIDP\t[$]1065353216, VS0"
+	// arm64:"FMOVS\t[$]\\(1.0\\)"
+	if x > math.Float32frombits(0x3f800000) {
+		return -x
+	}
+	return x
+}
+
+func constantConvertInt32(x uint32) uint32 {
+	// amd64:-"MOVSS"
+	// s390x:-"FMOVS"
+	// ppc64x:-"FMOVS"
+	// arm64:-"FMOVS"
+	if x > math.Float32bits(1) {
+		return -x
+	}
+	return x
+}
+
+func nanGenerate64() float64 {
+	// Test to make sure we don't generate a NaN while constant propagating.
+	// See issue 36400.
+	zero := 0.0
+	// amd64:-"DIVSD"
+	inf := 1 / zero // +inf. We can constant propagate this one.
+	negone := -1.0
+
+	// amd64:"DIVSD"
+	z0 := zero / zero
+	// amd64:"MULSD"
+	z1 := zero * inf
+	// amd64:"SQRTSD"
+	z2 := math.Sqrt(negone)
+	return z0 + z1 + z2
+}
+
+func nanGenerate32() float32 {
+	zero := float32(0.0)
+	// amd64:-"DIVSS"
+	inf := 1 / zero // +inf. We can constant propagate this one.
+
+	// amd64:"DIVSS"
+	z0 := zero / zero
+	// amd64:"MULSS"
+	z1 := zero * inf
+	return z0 + z1
+}
--- a/test/codegen/mathbits.go
+++ b/test/codegen/mathbits.go
@@ -0,0 +1,880 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import "math/bits"
+
+// ----------------------- //
+//    bits.LeadingZeros    //
+// ----------------------- //
+
+func LeadingZeros(n uint) int {
+	// amd64/v1,amd64/v2:"BSRQ"
+	// amd64/v3:"LZCNTQ", -"BSRQ"
+	// s390x:"FLOGR"
+	// arm:"CLZ" arm64:"CLZ"
+	// mips:"CLZ"
+	// wasm:"I64Clz"
+	// ppc64x:"CNTLZD"
+	return bits.LeadingZeros(n)
+}
+
+func LeadingZeros64(n uint64) int {
+	// amd64/v1,amd64/v2:"BSRQ"
+	// amd64/v3:"LZCNTQ", -"BSRQ"
+	// s390x:"FLOGR"
+	// arm:"CLZ" arm64:"CLZ"
+	// mips:"CLZ"
+	// wasm:"I64Clz"
+	// ppc64x:"CNTLZD"
+	return bits.LeadingZeros64(n)
+}
+
+func LeadingZeros32(n uint32) int {
+	// amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ"
+	// amd64/v3: "LZCNTL",- "BSRL"
+	// s390x:"FLOGR"
+	// arm:"CLZ" arm64:"CLZW"
+	// mips:"CLZ"
+	// wasm:"I64Clz"
+	// ppc64x:"CNTLZW"
+	return bits.LeadingZeros32(n)
+}
+
+func LeadingZeros16(n uint16) int {
+	// amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ"
+	// amd64/v3: "LZCNTL",- "BSRL"
+	// s390x:"FLOGR"
+	// arm:"CLZ" arm64:"CLZ"
+	// mips:"CLZ"
+	// wasm:"I64Clz"
+	// ppc64x:"CNTLZD"
+	return bits.LeadingZeros16(n)
+}
+
+func LeadingZeros8(n uint8) int {
+	// amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ"
+	// amd64/v3: "LZCNTL",- "BSRL"
+	// s390x:"FLOGR"
+	// arm:"CLZ" arm64:"CLZ"
+	// mips:"CLZ"
+	// wasm:"I64Clz"
+	// ppc64x:"CNTLZD"
+	return bits.LeadingZeros8(n)
+}
+
+// --------------- //
+//    bits.Len*    //
+// --------------- //
+
+func Len(n uint) int {
+	// amd64/v1,amd64/v2:"BSRQ"
+	// amd64/v3: "LZCNTQ"
+	// s390x:"FLOGR"
+	// arm:"CLZ" arm64:"CLZ"
+	// mips:"CLZ"
+	// wasm:"I64Clz"
+	// ppc64x:"SUBC","CNTLZD"
+	return bits.Len(n)
+}
+
+func Len64(n uint64) int {
+	// amd64/v1,amd64/v2:"BSRQ"
+	// amd64/v3: "LZCNTQ"
+	// s390x:"FLOGR"
+	// arm:"CLZ" arm64:"CLZ"
+	// mips:"CLZ"
+	// wasm:"I64Clz"
+	// ppc64x:"SUBC","CNTLZD"
+	return bits.Len64(n)
+}
+
+func SubFromLen64(n uint64) int {
+	// ppc64x:"CNTLZD",-"SUBC"
+	return 64 - bits.Len64(n)
+}
+
+func Len32(n uint32) int {
+	// amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ"
+	// amd64/v3: "LZCNTL"
+	// s390x:"FLOGR"
+	// arm:"CLZ" arm64:"CLZ"
+	// mips:"CLZ"
+	// wasm:"I64Clz"
+	// ppc64x: "CNTLZW"
+	return bits.Len32(n)
+}
+
+func Len16(n uint16) int {
+	// amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ"
+	// amd64/v3: "LZCNTL"
+	// s390x:"FLOGR"
+	// arm:"CLZ" arm64:"CLZ"
+	// mips:"CLZ"
+	// wasm:"I64Clz"
+	// ppc64x:"SUBC","CNTLZD"
+	return bits.Len16(n)
+}
+
+func Len8(n uint8) int {
+	// amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ"
+	// amd64/v3: "LZCNTL"
+	// s390x:"FLOGR"
+	// arm:"CLZ" arm64:"CLZ"
+	// mips:"CLZ"
+	// wasm:"I64Clz"
+	// ppc64x:"SUBC","CNTLZD"
+	return bits.Len8(n)
+}
+
+// -------------------- //
+//    bits.OnesCount    //
+// -------------------- //
+
+// TODO(register args) Restore a m d 6 4 / v 1 :.*x86HasPOPCNT when only one ABI is tested.
+func OnesCount(n uint) int {
+	// amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT"
+	// amd64:"POPCNTQ"
+	// arm64:"VCNT","VUADDLV"
+	// s390x:"POPCNT"
+	// ppc64x:"POPCNTD"
+	// wasm:"I64Popcnt"
+	return bits.OnesCount(n)
+}
+
+func OnesCount64(n uint64) int {
+	// amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT"
+	// amd64:"POPCNTQ"
+	// arm64:"VCNT","VUADDLV"
+	// s390x:"POPCNT"
+	// ppc64x:"POPCNTD"
+	// wasm:"I64Popcnt"
+	return bits.OnesCount64(n)
+}
+
+func OnesCount32(n uint32) int {
+	// amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT"
+	// amd64:"POPCNTL"
+	// arm64:"VCNT","VUADDLV"
+	// s390x:"POPCNT"
+	// ppc64x:"POPCNTW"
+	// wasm:"I64Popcnt"
+	return bits.OnesCount32(n)
+}
+
+func OnesCount16(n uint16) int {
+	// amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT"
+	// amd64:"POPCNTL"
+	// arm64:"VCNT","VUADDLV"
+	// s390x:"POPCNT"
+	// ppc64x:"POPCNTW"
+	// wasm:"I64Popcnt"
+	return bits.OnesCount16(n)
+}
+
+func OnesCount8(n uint8) int {
+	// s390x:"POPCNT"
+	// ppc64x:"POPCNTB"
+	// wasm:"I64Popcnt"
+	return bits.OnesCount8(n)
+}
+
+// ----------------------- //
+//    bits.ReverseBytes    //
+// ----------------------- //
+
+func ReverseBytes(n uint) uint {
+	// amd64:"BSWAPQ"
+	// 386:"BSWAPL"
+	// s390x:"MOVDBR"
+	// arm64:"REV"
+	return bits.ReverseBytes(n)
+}
+
+func ReverseBytes64(n uint64) uint64 {
+	// amd64:"BSWAPQ"
+	// 386:"BSWAPL"
+	// s390x:"MOVDBR"
+	// arm64:"REV"
+	// ppc64x/power10: "BRD"
+	return bits.ReverseBytes64(n)
+}
+
+func ReverseBytes32(n uint32) uint32 {
+	// amd64:"BSWAPL"
+	// 386:"BSWAPL"
+	// s390x:"MOVWBR"
+	// arm64:"REVW"
+	// ppc64x/power10: "BRW"
+	return bits.ReverseBytes32(n)
+}
+
+func ReverseBytes16(n uint16) uint16 {
+	// amd64:"ROLW"
+	// arm64:"REV16W",-"UBFX",-"ORR"
+	// arm/5:"SLL","SRL","ORR"
+	// arm/6:"REV16"
+	// arm/7:"REV16"
+	// ppc64x/power10: "BRH"
+	return bits.ReverseBytes16(n)
+}
+
+// --------------------- //
+//    bits.RotateLeft    //
+// --------------------- //
+
+func RotateLeft64(n uint64) uint64 {
+	// amd64:"ROLQ"
+	// arm64:"ROR"
+	// ppc64x:"ROTL"
+	// s390x:"RISBGZ\t[$]0, [$]63, [$]37, "
+	// wasm:"I64Rotl"
+	return bits.RotateLeft64(n, 37)
+}
+
+func RotateLeft32(n uint32) uint32 {
+	// amd64:"ROLL" 386:"ROLL"
+	// arm:`MOVW\tR[0-9]+@>23`
+	// arm64:"RORW"
+	// ppc64x:"ROTLW"
+	// s390x:"RLL"
+	// wasm:"I32Rotl"
+	return bits.RotateLeft32(n, 9)
+}
+
+func RotateLeft16(n uint16, s int) uint16 {
+	// amd64:"ROLW" 386:"ROLW"
+	// arm64:"RORW",-"CSEL"
+	return bits.RotateLeft16(n, s)
+}
+
+func RotateLeft8(n uint8, s int) uint8 {
+	// amd64:"ROLB" 386:"ROLB"
+	// arm64:"LSL","LSR",-"CSEL"
+	return bits.RotateLeft8(n, s)
+}
+
+func RotateLeftVariable(n uint, m int) uint {
+	// amd64:"ROLQ"
+	// arm64:"ROR"
+	// ppc64x:"ROTL"
+	// s390x:"RLLG"
+	// wasm:"I64Rotl"
+	return bits.RotateLeft(n, m)
+}
+
+func RotateLeftVariable64(n uint64, m int) uint64 {
+	// amd64:"ROLQ"
+	// arm64:"ROR"
+	// ppc64x:"ROTL"
+	// s390x:"RLLG"
+	// wasm:"I64Rotl"
+	return bits.RotateLeft64(n, m)
+}
+
+func RotateLeftVariable32(n uint32, m int) uint32 {
+	// arm:`MOVW\tR[0-9]+@>R[0-9]+`
+	// amd64:"ROLL"
+	// arm64:"RORW"
+	// ppc64x:"ROTLW"
+	// s390x:"RLL"
+	// wasm:"I32Rotl"
+	return bits.RotateLeft32(n, m)
+}
+
+// ------------------------ //
+//    bits.TrailingZeros    //
+// ------------------------ //
+
+func TrailingZeros(n uint) int {
+	// amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ"
+	// amd64/v3:"TZCNTQ"
+	// 386:"BSFL"
+	// arm:"CLZ"
+	// arm64:"RBIT","CLZ"
+	// s390x:"FLOGR"
+	// ppc64x/power8:"ANDN","POPCNTD"
+	// ppc64x/power9: "CNTTZD"
+	// wasm:"I64Ctz"
+	return bits.TrailingZeros(n)
+}
+
+func TrailingZeros64(n uint64) int {
+	// amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ"
+	// amd64/v3:"TZCNTQ"
+	// 386:"BSFL"
+	// arm64:"RBIT","CLZ"
+	// s390x:"FLOGR"
+	// ppc64x/power8:"ANDN","POPCNTD"
+	// ppc64x/power9: "CNTTZD"
+	// wasm:"I64Ctz"
+	return bits.TrailingZeros64(n)
+}
+
+func TrailingZeros64Subtract(n uint64) int {
+	// ppc64x/power8:"NEG","SUBC","ANDN","POPCNTD"
+	// ppc64x/power9:"SUBC","CNTTZD"
+	return bits.TrailingZeros64(1 - n)
+}
+
+func TrailingZeros32(n uint32) int {
+	// amd64/v1,amd64/v2:"BTSQ\\t\\$32","BSFQ"
+	// amd64/v3:"TZCNTL"
+	// 386:"BSFL"
+	// arm:"CLZ"
+	// arm64:"RBITW","CLZW"
+	// s390x:"FLOGR","MOVWZ"
+	// ppc64x/power8:"ANDN","POPCNTW"
+	// ppc64x/power9: "CNTTZW"
+	// wasm:"I64Ctz"
+	return bits.TrailingZeros32(n)
+}
+
+func TrailingZeros16(n uint16) int {
+	// amd64:"BSFL","ORL\\t\\$65536"
+	// 386:"BSFL\t"
+	// arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR"
+	// arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t"
+	// s390x:"FLOGR","OR\t\\$65536"
+	// ppc64x/power8:"POPCNTD","ORIS\\t\\$1"
+	// ppc64x/power9:"CNTTZD","ORIS\\t\\$1"
+	// wasm:"I64Ctz"
+	return bits.TrailingZeros16(n)
+}
+
+func TrailingZeros8(n uint8) int {
+	// amd64:"BSFL","ORL\\t\\$256"
+	// 386:"BSFL"
+	// arm:"ORR\t\\$256","CLZ",-"MOVBU\tR"
+	// arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t"
+	// s390x:"FLOGR","OR\t\\$256"
+	// wasm:"I64Ctz"
+	return bits.TrailingZeros8(n)
+}
+
+// IterateBitsNN checks special handling of TrailingZerosNN when the input is known to be non-zero.
+
+func IterateBits(n uint) int {
+	i := 0
+	for n != 0 {
+		// amd64/v1,amd64/v2:"BSFQ",-"CMOVEQ"
+		// amd64/v3:"TZCNTQ"
+		i += bits.TrailingZeros(n)
+		n &= n - 1
+	}
+	return i
+}
+
+func IterateBits64(n uint64) int {
+	i := 0
+	for n != 0 {
+		// amd64/v1,amd64/v2:"BSFQ",-"CMOVEQ"
+		// amd64/v3:"TZCNTQ"
+		i += bits.TrailingZeros64(n)
+		n &= n - 1
+	}
+	return i
+}
+
+func IterateBits32(n uint32) int {
+	i := 0
+	for n != 0 {
+		// amd64/v1,amd64/v2:"BSFL",-"BTSQ"
+		// amd64/v3:"TZCNTL"
+		i += bits.TrailingZeros32(n)
+		n &= n - 1
+	}
+	return i
+}
+
+func IterateBits16(n uint16) int {
+	i := 0
+	for n != 0 {
+		// amd64/v1,amd64/v2:"BSFL",-"BTSL"
+		// amd64/v3:"TZCNTL"
+		// arm64:"RBITW","CLZW",-"ORR"
+		i += bits.TrailingZeros16(n)
+		n &= n - 1
+	}
+	return i
+}
+
+func IterateBits8(n uint8) int {
+	i := 0
+	for n != 0 {
+		// amd64/v1,amd64/v2:"BSFL",-"BTSL"
+		// amd64/v3:"TZCNTL"
+		// arm64:"RBITW","CLZW",-"ORR"
+		i += bits.TrailingZeros8(n)
+		n &= n - 1
+	}
+	return i
+}
+
+// --------------- //
+//    bits.Add*    //
+// --------------- //
+
+func Add(x, y, ci uint) (r, co uint) {
+	// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
+	// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
+	// ppc64x: "ADDC", "ADDE", "ADDZE"
+	// s390x:"ADDE","ADDC\t[$]-1,"
+	// riscv64: "ADD","SLTU"
+	return bits.Add(x, y, ci)
+}
+
+func AddC(x, ci uint) (r, co uint) {
+	// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
+	// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
+	// loong64: "ADDV", "SGTU"
+	// ppc64x: "ADDC", "ADDE", "ADDZE"
+	// s390x:"ADDE","ADDC\t[$]-1,"
+	// mips64:"ADDV","SGTU"
+	// riscv64: "ADD","SLTU"
+	return bits.Add(x, 7, ci)
+}
+
+func AddZ(x, y uint) (r, co uint) {
+	// arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP"
+	// amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ"
+	// loong64: "ADDV", "SGTU"
+	// ppc64x: "ADDC", -"ADDE", "ADDZE"
+	// s390x:"ADDC",-"ADDC\t[$]-1,"
+	// mips64:"ADDV","SGTU"
+	// riscv64: "ADD","SLTU"
+	return bits.Add(x, y, 0)
+}
+
+func AddR(x, y, ci uint) uint {
+	// arm64:"ADDS","ADCS",-"ADD\t",-"CMP"
+	// amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ"
+	// loong64: "ADDV", -"SGTU"
+	// ppc64x: "ADDC", "ADDE", -"ADDZE"
+	// s390x:"ADDE","ADDC\t[$]-1,"
+	// mips64:"ADDV",-"SGTU"
+	// riscv64: "ADD",-"SLTU"
+	r, _ := bits.Add(x, y, ci)
+	return r
+}
+
+func AddM(p, q, r *[3]uint) {
+	var c uint
+	r[0], c = bits.Add(p[0], q[0], c)
+	// arm64:"ADCS",-"ADD\t",-"CMP"
+	// amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ"
+	// s390x:"ADDE",-"ADDC\t[$]-1,"
+	r[1], c = bits.Add(p[1], q[1], c)
+	r[2], c = bits.Add(p[2], q[2], c)
+}
+
+func Add64(x, y, ci uint64) (r, co uint64) {
+	// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
+	// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
+	// loong64: "ADDV", "SGTU"
+	// ppc64x: "ADDC", "ADDE", "ADDZE"
+	// s390x:"ADDE","ADDC\t[$]-1,"
+	// mips64:"ADDV","SGTU"
+	// riscv64: "ADD","SLTU"
+	return bits.Add64(x, y, ci)
+}
+
+func Add64C(x, ci uint64) (r, co uint64) {
+	// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
+	// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
+	// loong64: "ADDV", "SGTU"
+	// ppc64x: "ADDC", "ADDE", "ADDZE"
+	// s390x:"ADDE","ADDC\t[$]-1,"
+	// mips64:"ADDV","SGTU"
+	// riscv64: "ADD","SLTU"
+	return bits.Add64(x, 7, ci)
+}
+
+func Add64Z(x, y uint64) (r, co uint64) {
+	// arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP"
+	// amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ"
+	// loong64: "ADDV", "SGTU"
+	// ppc64x: "ADDC", -"ADDE", "ADDZE"
+	// s390x:"ADDC",-"ADDC\t[$]-1,"
+	// mips64:"ADDV","SGTU"
+	// riscv64: "ADD","SLTU"
+	return bits.Add64(x, y, 0)
+}
+
+func Add64R(x, y, ci uint64) uint64 {
+	// arm64:"ADDS","ADCS",-"ADD\t",-"CMP"
+	// amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ"
+	// loong64: "ADDV", -"SGTU"
+	// ppc64x: "ADDC", "ADDE", -"ADDZE"
+	// s390x:"ADDE","ADDC\t[$]-1,"
+	// mips64:"ADDV",-"SGTU"
+	// riscv64: "ADD",-"SLTU"
+	r, _ := bits.Add64(x, y, ci)
+	return r
+}
+
+func Add64M(p, q, r *[3]uint64) {
+	var c uint64
+	r[0], c = bits.Add64(p[0], q[0], c)
+	// arm64:"ADCS",-"ADD\t",-"CMP"
+	// amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ"
+	// ppc64x: -"ADDC", "ADDE", -"ADDZE"
+	// s390x:"ADDE",-"ADDC\t[$]-1,"
+	r[1], c = bits.Add64(p[1], q[1], c)
+	r[2], c = bits.Add64(p[2], q[2], c)
+}
+
+func Add64M0(p, q, r *[3]uint64) {
+	var c uint64
+	r[0], c = bits.Add64(p[0], q[0], 0)
+	// ppc64x: -"ADDC", -"ADDE", "ADDZE\tR[1-9]"
+	r[1], c = bits.Add64(p[1], 0, c)
+	// ppc64x: -"ADDC", "ADDE", -"ADDZE"
+	r[2], c = bits.Add64(p[2], p[2], c)
+}
+
+func Add64MSaveC(p, q, r, c *[2]uint64) {
+	// ppc64x: "ADDC\tR", "ADDZE"
+	r[0], c[0] = bits.Add64(p[0], q[0], 0)
+	// ppc64x: "ADDC\t[$]-1", "ADDE", "ADDZE"
+	r[1], c[1] = bits.Add64(p[1], q[1], c[0])
+}
+
+func Add64PanicOnOverflowEQ(a, b uint64) uint64 {
+	r, c := bits.Add64(a, b, 0)
+	// s390x:"BRC\t[$]3,",-"ADDE"
+	if c == 1 {
+		panic("overflow")
+	}
+	return r
+}
+
+func Add64PanicOnOverflowNE(a, b uint64) uint64 {
+	r, c := bits.Add64(a, b, 0)
+	// s390x:"BRC\t[$]3,",-"ADDE"
+	if c != 0 {
+		panic("overflow")
+	}
+	return r
+}
+
+func Add64PanicOnOverflowGT(a, b uint64) uint64 {
+	r, c := bits.Add64(a, b, 0)
+	// s390x:"BRC\t[$]3,",-"ADDE"
+	if c > 0 {
+		panic("overflow")
+	}
+	return r
+}
+
+func Add64MPanicOnOverflowEQ(a, b [2]uint64) [2]uint64 {
+	var r [2]uint64
+	var c uint64
+	r[0], c = bits.Add64(a[0], b[0], c)
+	r[1], c = bits.Add64(a[1], b[1], c)
+	// s390x:"BRC\t[$]3,"
+	if c == 1 {
+		panic("overflow")
+	}
+	return r
+}
+
+func Add64MPanicOnOverflowNE(a, b [2]uint64) [2]uint64 {
+	var r [2]uint64
+	var c uint64
+	r[0], c = bits.Add64(a[0], b[0], c)
+	r[1], c = bits.Add64(a[1], b[1], c)
+	// s390x:"BRC\t[$]3,"
+	if c != 0 {
+		panic("overflow")
+	}
+	return r
+}
+
+func Add64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 {
+	var r [2]uint64
+	var c uint64
+	r[0], c = bits.Add64(a[0], b[0], c)
+	r[1], c = bits.Add64(a[1], b[1], c)
+	// s390x:"BRC\t[$]3,"
+	if c > 0 {
+		panic("overflow")
+	}
+	return r
+}
+
+// Verify independent carry chain operations are scheduled efficiently
+// and do not cause unnecessary save/restore of the CA bit.
+//
+// This is an example of why CarryChainTail priority must be lower
+// (earlier in the block) than Memory. f[0]=f1 could be scheduled
+// after the first two lower 64 bit limb adds, but before either
+// high 64 bit limbs are added.
+//
+// This is what happened on PPC64 when compiling
+// crypto/internal/edwards25519/field.feMulGeneric.
+func Add64MultipleChains(a, b, c, d [2]uint64) {
+	var cx, d1, d2 uint64
+	a1, a2 := a[0], a[1]
+	b1, b2 := b[0], b[1]
+	c1, c2 := c[0], c[1]
+
+	// ppc64x: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER"
+	d1, cx = bits.Add64(a1, b1, 0)
+	// ppc64x: "ADDE", -"ADDC", -"MOVD\t.*, XER"
+	d2, _ = bits.Add64(a2, b2, cx)
+
+	// ppc64x: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER"
+	d1, cx = bits.Add64(c1, d1, 0)
+	// ppc64x: "ADDE", -"ADDC", -"MOVD\t.*, XER"
+	d2, _ = bits.Add64(c2, d2, cx)
+	d[0] = d1
+	d[1] = d2
+}
+
+// --------------- //
+//    bits.Sub*    //
+// --------------- //
+
+func Sub(x, y, ci uint) (r, co uint) {
+	// amd64:"NEGL","SBBQ","NEGQ"
+	// arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
+	// loong64:"SUBV","SGTU"
+	// ppc64x:"SUBC", "SUBE", "SUBZE", "NEG"
+	// s390x:"SUBE"
+	// mips64:"SUBV","SGTU"
+	// riscv64: "SUB","SLTU"
+	return bits.Sub(x, y, ci)
+}
+
+func SubC(x, ci uint) (r, co uint) {
+	// amd64:"NEGL","SBBQ","NEGQ"
+	// arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
+	// loong64:"SUBV","SGTU"
+	// ppc64x:"SUBC", "SUBE", "SUBZE", "NEG"
+	// s390x:"SUBE"
+	// mips64:"SUBV","SGTU"
+	// riscv64: "SUB","SLTU"
+	return bits.Sub(x, 7, ci)
+}
+
+func SubZ(x, y uint) (r, co uint) {
+	// amd64:"SUBQ","SBBQ","NEGQ",-"NEGL"
+	// arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP"
+	// loong64:"SUBV","SGTU"
+	// ppc64x:"SUBC", -"SUBE", "SUBZE", "NEG"
+	// s390x:"SUBC"
+	// mips64:"SUBV","SGTU"
+	// riscv64: "SUB","SLTU"
+	return bits.Sub(x, y, 0)
+}
+
+func SubR(x, y, ci uint) uint {
+	// amd64:"NEGL","SBBQ",-"NEGQ"
+	// arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP"
+	// loong64:"SUBV",-"SGTU"
+	// ppc64x:"SUBC", "SUBE", -"SUBZE", -"NEG"
+	// s390x:"SUBE"
+	// riscv64: "SUB",-"SLTU"
+	r, _ := bits.Sub(x, y, ci)
+	return r
+}
+func SubM(p, q, r *[3]uint) {
+	var c uint
+	r[0], c = bits.Sub(p[0], q[0], c)
+	// amd64:"SBBQ",-"NEGL",-"NEGQ"
+	// arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP"
+	// ppc64x:-"SUBC", "SUBE", -"SUBZE", -"NEG"
+	// s390x:"SUBE"
+	r[1], c = bits.Sub(p[1], q[1], c)
+	r[2], c = bits.Sub(p[2], q[2], c)
+}
+
+func Sub64(x, y, ci uint64) (r, co uint64) {
+	// amd64:"NEGL","SBBQ","NEGQ"
+	// arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
+	// loong64:"SUBV","SGTU"
+	// ppc64x:"SUBC", "SUBE", "SUBZE", "NEG"
+	// s390x:"SUBE"
+	// mips64:"SUBV","SGTU"
+	// riscv64: "SUB","SLTU"
+	return bits.Sub64(x, y, ci)
+}
+
+func Sub64C(x, ci uint64) (r, co uint64) {
+	// amd64:"NEGL","SBBQ","NEGQ"
+	// arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
+	// loong64:"SUBV","SGTU"
+	// ppc64x:"SUBC", "SUBE", "SUBZE", "NEG"
+	// s390x:"SUBE"
+	// mips64:"SUBV","SGTU"
+	// riscv64: "SUB","SLTU"
+	return bits.Sub64(x, 7, ci)
+}
+
+func Sub64Z(x, y uint64) (r, co uint64) {
+	// amd64:"SUBQ","SBBQ","NEGQ",-"NEGL"
+	// arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP"
+	// loong64:"SUBV","SGTU"
+	// ppc64x:"SUBC", -"SUBE", "SUBZE", "NEG"
+	// s390x:"SUBC"
+	// mips64:"SUBV","SGTU"
+	// riscv64: "SUB","SLTU"
+	return bits.Sub64(x, y, 0)
+}
+
+func Sub64R(x, y, ci uint64) uint64 {
+	// amd64:"NEGL","SBBQ",-"NEGQ"
+	// arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP"
+	// loong64:"SUBV",-"SGTU"
+	// ppc64x:"SUBC", "SUBE", -"SUBZE", -"NEG"
+	// s390x:"SUBE"
+	// riscv64: "SUB",-"SLTU"
+	r, _ := bits.Sub64(x, y, ci)
+	return r
+}
+func Sub64M(p, q, r *[3]uint64) {
+	var c uint64
+	r[0], c = bits.Sub64(p[0], q[0], c)
+	// amd64:"SBBQ",-"NEGL",-"NEGQ"
+	// arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP"
+	// s390x:"SUBE"
+	r[1], c = bits.Sub64(p[1], q[1], c)
+	r[2], c = bits.Sub64(p[2], q[2], c)
+}
+
+func Sub64MSaveC(p, q, r, c *[2]uint64) {
+	// ppc64x:"SUBC\tR\\d+, R\\d+,", "SUBZE", "NEG"
+	r[0], c[0] = bits.Sub64(p[0], q[0], 0)
+	// ppc64x:"SUBC\tR\\d+, [$]0,", "SUBE", "SUBZE", "NEG"
+	r[1], c[1] = bits.Sub64(p[1], q[1], c[0])
+}
+
+func Sub64PanicOnOverflowEQ(a, b uint64) uint64 {
+	r, b := bits.Sub64(a, b, 0)
+	// s390x:"BRC\t[$]12,",-"ADDE",-"SUBE"
+	if b == 1 {
+		panic("overflow")
+	}
+	return r
+}
+
+func Sub64PanicOnOverflowNE(a, b uint64) uint64 {
+	r, b := bits.Sub64(a, b, 0)
+	// s390x:"BRC\t[$]12,",-"ADDE",-"SUBE"
+	if b != 0 {
+		panic("overflow")
+	}
+	return r
+}
+
+func Sub64PanicOnOverflowGT(a, b uint64) uint64 {
+	r, b := bits.Sub64(a, b, 0)
+	// s390x:"BRC\t[$]12,",-"ADDE",-"SUBE"
+	if b > 0 {
+		panic("overflow")
+	}
+	return r
+}
+
+func Sub64MPanicOnOverflowEQ(a, b [2]uint64) [2]uint64 {
+	var r [2]uint64
+	var c uint64
+	r[0], c = bits.Sub64(a[0], b[0], c)
+	r[1], c = bits.Sub64(a[1], b[1], c)
+	// s390x:"BRC\t[$]12,"
+	if c == 1 {
+		panic("overflow")
+	}
+	return r
+}
+
+func Sub64MPanicOnOverflowNE(a, b [2]uint64) [2]uint64 {
+	var r [2]uint64
+	var c uint64
+	r[0], c = bits.Sub64(a[0], b[0], c)
+	r[1], c = bits.Sub64(a[1], b[1], c)
+	// s390x:"BRC\t[$]12,"
+	if c != 0 {
+		panic("overflow")
+	}
+	return r
+}
+
+func Sub64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 {
+	var r [2]uint64
+	var c uint64
+	r[0], c = bits.Sub64(a[0], b[0], c)
+	r[1], c = bits.Sub64(a[1], b[1], c)
+	// s390x:"BRC\t[$]12,"
+	if c > 0 {
+		panic("overflow")
+	}
+	return r
+}
+
+// --------------- //
+//    bits.Mul*    //
+// --------------- //
+
+func Mul(x, y uint) (hi, lo uint) {
+	// amd64:"MULQ"
+	// arm64:"UMULH","MUL"
+	// ppc64x:"MULHDU","MULLD"
+	// s390x:"MLGR"
+	// mips64: "MULVU"
+	// riscv64:"MULHU","MUL"
+	return bits.Mul(x, y)
+}
+
+func Mul64(x, y uint64) (hi, lo uint64) {
+	// amd64:"MULQ"
+	// arm64:"UMULH","MUL"
+	// ppc64x:"MULHDU","MULLD"
+	// s390x:"MLGR"
+	// mips64: "MULVU"
+	// riscv64:"MULHU","MUL"
+	return bits.Mul64(x, y)
+}
+
+func Mul64HiOnly(x, y uint64) uint64 {
+	// arm64:"UMULH",-"MUL"
+	// riscv64:"MULHU",-"MUL\t"
+	hi, _ := bits.Mul64(x, y)
+	return hi
+}
+
+func Mul64LoOnly(x, y uint64) uint64 {
+	// arm64:"MUL",-"UMULH"
+	// riscv64:"MUL\t",-"MULHU"
+	_, lo := bits.Mul64(x, y)
+	return lo
+}
+
+// --------------- //
+//    bits.Div*    //
+// --------------- //
+
+func Div(hi, lo, x uint) (q, r uint) {
+	// amd64:"DIVQ"
+	return bits.Div(hi, lo, x)
+}
+
+func Div32(hi, lo, x uint32) (q, r uint32) {
+	// arm64:"ORR","UDIV","MSUB",-"UREM"
+	return bits.Div32(hi, lo, x)
+}
+
+func Div64(hi, lo, x uint64) (q, r uint64) {
+	// amd64:"DIVQ"
+	return bits.Div64(hi, lo, x)
+}
+
+func Div64degenerate(x uint64) (q, r uint64) {
+	// amd64:-"DIVQ"
+	return bits.Div64(0, x, 5)
+}
--- a/test/codegen/memcombine.go
+++ b/test/codegen/memcombine.go
@@ -0,0 +1,940 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import (
+	"encoding/binary"
+	"runtime"
+)
+
+// ------------- //
+//    Loading    //
+// ------------- //
+
+func load_le64(b []byte) uint64 {
+	// amd64:`MOVQ\s\(.*\),`,-`MOV[BWL]\t[^$]`,-`OR`
+	// s390x:`MOVDBR\s\(.*\),`
+	// arm64:`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`
+	// ppc64le:`MOVD\s`,-`MOV[BHW]Z`
+	// ppc64:`MOVDBR\s`,-`MOV[BHW]Z`
+	return binary.LittleEndian.Uint64(b)
+}
+
+func load_le64_idx(b []byte, idx int) uint64 {
+	// amd64:`MOVQ\s\(.*\)\(.*\*1\),`,-`MOV[BWL]\t[^$]`,-`OR`
+	// s390x:`MOVDBR\s\(.*\)\(.*\*1\),`
+	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BHW]`
+	// ppc64le:`MOVD\s`,-`MOV[BHW]Z\s`
+	// ppc64:`MOVDBR\s`,-`MOV[BHW]Z\s`
+	return binary.LittleEndian.Uint64(b[idx:])
+}
+
+func load_le32(b []byte) uint32 {
+	// amd64:`MOVL\s\(.*\),`,-`MOV[BW]`,-`OR`
+	// 386:`MOVL\s\(.*\),`,-`MOV[BW]`,-`OR`
+	// s390x:`MOVWBR\s\(.*\),`
+	// arm64:`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`
+	// ppc64le:`MOVWZ\s`,-`MOV[BH]Z\s`
+	// ppc64:`MOVWBR\s`,-`MOV[BH]Z\s`
+	return binary.LittleEndian.Uint32(b)
+}
+
+func load_le32_idx(b []byte, idx int) uint32 {
+	// amd64:`MOVL\s\(.*\)\(.*\*1\),`,-`MOV[BW]`,-`OR`
+	// 386:`MOVL\s\(.*\)\(.*\*1\),`,-`MOV[BW]`,-`OR`
+	// s390x:`MOVWBR\s\(.*\)\(.*\*1\),`
+	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BH]`
+	// ppc64le:`MOVWZ\s`,-`MOV[BH]Z\s`
+	// ppc64:`MOVWBR\s`,-`MOV[BH]Z\s'
+	return binary.LittleEndian.Uint32(b[idx:])
+}
+
+func load_le16(b []byte) uint16 {
+	// amd64:`MOVWLZX\s\(.*\),`,-`MOVB`,-`OR`
+	// ppc64le:`MOVHZ\s`,-`MOVBZ`
+	// arm64:`MOVHU\s\(R[0-9]+\),`,-`MOVB`
+	// s390x:`MOVHBR\s\(.*\),`
+	// ppc64:`MOVHBR\s`,-`MOVBZ`
+	return binary.LittleEndian.Uint16(b)
+}
+
+func load_le16_idx(b []byte, idx int) uint16 {
+	// amd64:`MOVWLZX\s\(.*\),`,-`MOVB`,-`OR`
+	// ppc64le:`MOVHZ\s`,-`MOVBZ`
+	// ppc64:`MOVHBR\s`,-`MOVBZ`
+	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB`
+	// s390x:`MOVHBR\s\(.*\)\(.*\*1\),`
+	return binary.LittleEndian.Uint16(b[idx:])
+}
+
+func load_be64(b []byte) uint64 {
+	// amd64/v1,amd64/v2:`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR`
+	// amd64/v3:`MOVBEQ`
+	// s390x:`MOVD\s\(.*\),`
+	// arm64:`REV`,`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`,-`REVW`,-`REV16W`
+	// ppc64le:`MOVDBR`,-`MOV[BHW]Z`
+	// ppc64:`MOVD`,-`MOV[BHW]Z`
+	return binary.BigEndian.Uint64(b)
+}
+
+func load_be64_idx(b []byte, idx int) uint64 {
+	// amd64/v1,amd64/v2:`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR`
+	// amd64/v3: `MOVBEQ\t\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
+	// s390x:`MOVD\s\(.*\)\(.*\*1\),`
+	// arm64:`REV`,`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[WHB]`,-`REVW`,-`REV16W`
+	// ppc64le:`MOVDBR`,-`MOV[BHW]Z`
+	// ppc64:`MOVD`,-`MOV[BHW]Z`
+	return binary.BigEndian.Uint64(b[idx:])
+}
+
+func load_be32(b []byte) uint32 {
+	// amd64/v1,amd64/v2:`BSWAPL`,-`MOV[BW]`,-`OR`
+	// amd64/v3: `MOVBEL`
+	// s390x:`MOVWZ\s\(.*\),`
+	// arm64:`REVW`,`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`,-`REV16W`
+	// ppc64le:`MOVWBR`,-`MOV[BH]Z`
+	// ppc64:`MOVWZ`,-MOV[BH]Z`
+	return binary.BigEndian.Uint32(b)
+}
+
+func load_be32_idx(b []byte, idx int) uint32 {
+	// amd64/v1,amd64/v2:`BSWAPL`,-`MOV[BW]`,-`OR`
+	// amd64/v3: `MOVBEL\t\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
+	// s390x:`MOVWZ\s\(.*\)\(.*\*1\),`
+	// arm64:`REVW`,`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[HB]`,-`REV16W`
+	// ppc64le:`MOVWBR`,-`MOV[BH]Z`
+	// ppc64:`MOVWZ`,-MOV[BH]Z`
+	return binary.BigEndian.Uint32(b[idx:])
+}
+
+func load_be16(b []byte) uint16 {
+	// amd64:`ROLW\s\$8`,-`MOVB`,-`OR`
+	// arm64:`REV16W`,`MOVHU\s\(R[0-9]+\),`,-`MOVB`
+	// ppc64le:`MOVHBR`,-`MOVBZ`
+	// ppc64:`MOVHZ`,-`MOVBZ`
+	// s390x:`MOVHZ\s\(.*\),`,-`OR`,-`ORW`,-`SLD`,-`SLW`
+	return binary.BigEndian.Uint16(b)
+}
+
+func load_be16_idx(b []byte, idx int) uint16 {
+	// amd64:`ROLW\s\$8`,-`MOVB`,-`OR`
+	// arm64:`REV16W`,`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB`
+	// ppc64le:`MOVHBR`,-`MOVBZ`
+	// ppc64:`MOVHZ`,-`MOVBZ`
+	// s390x:`MOVHZ\s\(.*\)\(.*\*1\),`,-`OR`,-`ORW`,-`SLD`,-`SLW`
+	return binary.BigEndian.Uint16(b[idx:])
+}
+
+func load_le_byte2_uint16(s []byte) uint16 {
+	// arm64:`MOVHU\t\(R[0-9]+\)`,-`ORR`,-`MOVB`
+	// 386:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
+	// amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
+	// ppc64le:`MOVHZ\t\(R[0-9]+\)`,-`MOVBZ`
+	// ppc64:`MOVHBR`,-`MOVBZ`
+	return uint16(s[0]) | uint16(s[1])<<8
+}
+
+func load_le_byte2_uint16_inv(s []byte) uint16 {
+	// arm64:`MOVHU\t\(R[0-9]+\)`,-`ORR`,-`MOVB`
+	// 386:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
+	// amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
+	// ppc64le:`MOVHZ\t\(R[0-9]+\)`,-`MOVBZ`
+	// ppc64:`MOVHBR`,-`MOVBZ`
+	return uint16(s[1])<<8 | uint16(s[0])
+}
+
+func load_le_byte4_uint32(s []byte) uint32 {
+	// arm64:`MOVWU\t\(R[0-9]+\)`,-`ORR`,-`MOV[BH]`
+	// 386:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
+	// amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
+	// ppc64le:`MOVWZ\t\(R[0-9]+\)`,-`MOV[BH]Z`
+	// ppc64:`MOVWBR`,-MOV[BH]Z`
+	return uint32(s[0]) | uint32(s[1])<<8 | uint32(s[2])<<16 | uint32(s[3])<<24
+}
+
+func load_le_byte4_uint32_inv(s []byte) uint32 {
+	// arm64:`MOVWU\t\(R[0-9]+\)`,-`ORR`,-`MOV[BH]`
+	// ppc64le:`MOVWZ`,-`MOV[BH]Z`
+	// ppc64:`MOVWBR`,-`MOV[BH]Z`
+	return uint32(s[3])<<24 | uint32(s[2])<<16 | uint32(s[1])<<8 | uint32(s[0])
+}
+
+func load_le_byte8_uint64(s []byte) uint64 {
+	// arm64:`MOVD\t\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]`
+	// amd64:`MOVQ\s\([A-Z]+\),\s[A-Z]+`,-`MOV[BWL]\t[^$]`,-`OR`
+	// ppc64le:`MOVD\t\(R[0-9]+\)`,-`MOV[BHW]Z`
+	// ppc64:`MOVDBR`,-`MOVW[WHB]Z`
+	return uint64(s[0]) | uint64(s[1])<<8 | uint64(s[2])<<16 | uint64(s[3])<<24 | uint64(s[4])<<32 | uint64(s[5])<<40 | uint64(s[6])<<48 | uint64(s[7])<<56
+}
+
+func load_le_byte8_uint64_inv(s []byte) uint64 {
+	// arm64:`MOVD\t\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]`
+	// ppc64le:`MOVD`,-`MOV[WHB]Z`
+	// ppc64:`MOVDBR`,-`MOV[WHB]Z`
+	return uint64(s[7])<<56 | uint64(s[6])<<48 | uint64(s[5])<<40 | uint64(s[4])<<32 | uint64(s[3])<<24 | uint64(s[2])<<16 | uint64(s[1])<<8 | uint64(s[0])
+}
+
+func load_be_byte2_uint16(s []byte) uint16 {
+	// arm64:`MOVHU\t\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
+	// amd64:`MOVWLZX\s\([A-Z]+\)`,`ROLW`,-`MOVB`,-`OR`
+	// ppc64le:`MOVHBR\t\(R[0-9]+\)`,-`MOVBZ`
+	// ppc64:`MOVHZ`,-`MOVBZ`
+	return uint16(s[0])<<8 | uint16(s[1])
+}
+
+func load_be_byte2_uint16_inv(s []byte) uint16 {
+	// arm64:`MOVHU\t\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
+	// amd64:`MOVWLZX\s\([A-Z]+\)`,`ROLW`,-`MOVB`,-`OR`
+	// ppc64le:`MOVHBR\t\(R[0-9]+\)`,-`MOVBZ`
+	// ppc64:`MOVHZ`,-`MOVBZ`
+	return uint16(s[1]) | uint16(s[0])<<8
+}
+
+func load_be_byte4_uint32(s []byte) uint32 {
+	// arm64:`MOVWU\t\(R[0-9]+\)`,`REVW`,-`ORR`,-`REV16W`,-`MOV[BH]`
+	// ppc64le:`MOVWBR`,-`MOV[HB]Z`
+	// ppc64:`MOVWZ`,-`MOV[HB]Z`
+	return uint32(s[0])<<24 | uint32(s[1])<<16 | uint32(s[2])<<8 | uint32(s[3])
+}
+
+func load_be_byte4_uint32_inv(s []byte) uint32 {
+	// arm64:`MOVWU\t\(R[0-9]+\)`,`REVW`,-`ORR`,-`REV16W`,-`MOV[BH]`
+	// amd64/v1,amd64/v2:`MOVL\s\([A-Z]+\)`,`BSWAPL`,-`MOV[BW]`,-`OR`
+	// amd64/v3: `MOVBEL`
+	// ppc64le:`MOVWBR`,-`MOV[HB]Z`
+	// ppc64:`MOVWZ`,-`MOV[HB]Z`
+	return uint32(s[3]) | uint32(s[2])<<8 | uint32(s[1])<<16 | uint32(s[0])<<24
+}
+
+func load_be_byte8_uint64(s []byte) uint64 {
+	// arm64:`MOVD\t\(R[0-9]+\)`,`REV`,-`ORR`,-`REVW`,-`REV16W`,-`MOV[BHW]`
+	// ppc64le:`MOVDBR\t\(R[0-9]+\)`,-`MOV[BHW]Z`
+	// ppc64:`MOVD`,-`MOV[WHB]Z`
+	return uint64(s[0])<<56 | uint64(s[1])<<48 | uint64(s[2])<<40 | uint64(s[3])<<32 | uint64(s[4])<<24 | uint64(s[5])<<16 | uint64(s[6])<<8 | uint64(s[7])
+}
+
+func load_be_byte8_uint64_inv(s []byte) uint64 {
+	// arm64:`MOVD\t\(R[0-9]+\)`,`REV`,-`ORR`,-`REVW`,-`REV16W`,-`MOV[BHW]`
+	// amd64/v1,amd64/v2:`MOVQ\s\([A-Z]+\),\s[A-Z]+`,`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR`
+	// amd64/v3: `MOVBEQ`
+	// ppc64le:`MOVDBR\t\(R[0-9]+\)`,-`MOV[BHW]Z`
+	// ppc64:`MOVD`,-`MOV[BHW]Z`
+	return uint64(s[7]) | uint64(s[6])<<8 | uint64(s[5])<<16 | uint64(s[4])<<24 | uint64(s[3])<<32 | uint64(s[2])<<40 | uint64(s[1])<<48 | uint64(s[0])<<56
+}
+
+func load_le_byte2_uint16_idx(s []byte, idx int) uint16 {
+	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOVB`
+	// 386:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`ORL`,-`MOVB`
+	// amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR`
+	// ppc64le:`MOVHZ`,-`MOVBZ`
+	// ppc64:`MOVHBR`,-`MOVBZ`
+	return uint16(s[idx]) | uint16(s[idx+1])<<8
+}
+
+func load_le_byte2_uint16_idx_inv(s []byte, idx int) uint16 {
+	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOVB`
+	// 386:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`ORL`,-`MOVB`
+	// amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR`
+	// ppc64le:`MOVHZ`,-`MOVBZ`
+	// ppc64:`MOVHBR`,-`MOVBZ`
+	return uint16(s[idx+1])<<8 | uint16(s[idx])
+}
+
+func load_le_byte4_uint32_idx(s []byte, idx int) uint32 {
+	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BH]`
+	// amd64:`MOVL\s\([A-Z]+\)\([A-Z]+`,-`MOV[BW]`,-`OR`
+	return uint32(s[idx]) | uint32(s[idx+1])<<8 | uint32(s[idx+2])<<16 | uint32(s[idx+3])<<24
+}
+
+func load_le_byte4_uint32_idx_inv(s []byte, idx int) uint32 {
+	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BH]`
+	return uint32(s[idx+3])<<24 | uint32(s[idx+2])<<16 | uint32(s[idx+1])<<8 | uint32(s[idx])
+}
+
+func load_le_byte8_uint64_idx(s []byte, idx int) uint64 {
+	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]`
+	// amd64:`MOVQ\s\([A-Z]+\)\([A-Z]+`,-`MOV[BWL]`,-`OR`
+	return uint64(s[idx]) | uint64(s[idx+1])<<8 | uint64(s[idx+2])<<16 | uint64(s[idx+3])<<24 | uint64(s[idx+4])<<32 | uint64(s[idx+5])<<40 | uint64(s[idx+6])<<48 | uint64(s[idx+7])<<56
+}
+
+func load_le_byte8_uint64_idx_inv(s []byte, idx int) uint64 {
+	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]`
+	return uint64(s[idx+7])<<56 | uint64(s[idx+6])<<48 | uint64(s[idx+5])<<40 | uint64(s[idx+4])<<32 | uint64(s[idx+3])<<24 | uint64(s[idx+2])<<16 | uint64(s[idx+1])<<8 | uint64(s[idx])
+}
+
+func load_be_byte2_uint16_idx(s []byte, idx int) uint16 {
+	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
+	// amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR`
+	return uint16(s[idx])<<8 | uint16(s[idx+1])
+}
+
+func load_be_byte2_uint16_idx_inv(s []byte, idx int) uint16 {
+	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
+	// amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR`
+	return uint16(s[idx+1]) | uint16(s[idx])<<8
+}
+
+func load_be_byte4_uint32_idx(s []byte, idx int) uint32 {
+	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,`REVW`,-`ORR`,-`MOV[BH]`,-`REV16W`
+	return uint32(s[idx])<<24 | uint32(s[idx+1])<<16 | uint32(s[idx+2])<<8 | uint32(s[idx+3])
+}
+
+func load_be_byte8_uint64_idx(s []byte, idx int) uint64 {
+	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\)`,`REV`,-`ORR`,-`MOV[BHW]`,-`REVW`,-`REV16W`
+	return uint64(s[idx])<<56 | uint64(s[idx+1])<<48 | uint64(s[idx+2])<<40 | uint64(s[idx+3])<<32 | uint64(s[idx+4])<<24 | uint64(s[idx+5])<<16 | uint64(s[idx+6])<<8 | uint64(s[idx+7])
+}
+
+func load_le_byte2_uint16_idx2(s []byte, idx int) uint16 {
+	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`ORR`,-`MOVB`
+	return uint16(s[idx<<1]) | uint16(s[(idx<<1)+1])<<8
+}
+
+func load_le_byte2_uint16_idx2_inv(s []byte, idx int) uint16 {
+	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`ORR`,-`MOVB`
+	return uint16(s[(idx<<1)+1])<<8 | uint16(s[idx<<1])
+}
+
+func load_le_byte4_uint32_idx4(s []byte, idx int) uint32 {
+	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`ORR`,-`MOV[BH]`
+	return uint32(s[idx<<2]) | uint32(s[(idx<<2)+1])<<8 | uint32(s[(idx<<2)+2])<<16 | uint32(s[(idx<<2)+3])<<24
+}
+
+func load_le_byte4_uint32_idx4_inv(s []byte, idx int) uint32 {
+	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`ORR`,-`MOV[BH]`
+	return uint32(s[(idx<<2)+3])<<24 | uint32(s[(idx<<2)+2])<<16 | uint32(s[(idx<<2)+1])<<8 | uint32(s[idx<<2])
+}
+
+func load_le_byte8_uint64_idx8(s []byte, idx int) uint64 {
+	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,-`ORR`,-`MOV[BHW]`
+	return uint64(s[idx<<3]) | uint64(s[(idx<<3)+1])<<8 | uint64(s[(idx<<3)+2])<<16 | uint64(s[(idx<<3)+3])<<24 | uint64(s[(idx<<3)+4])<<32 | uint64(s[(idx<<3)+5])<<40 | uint64(s[(idx<<3)+6])<<48 | uint64(s[(idx<<3)+7])<<56
+}
+
+func load_le_byte8_uint64_idx8_inv(s []byte, idx int) uint64 {
+	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,-`ORR`,-`MOV[BHW]`
+	return uint64(s[(idx<<3)+7])<<56 | uint64(s[(idx<<3)+6])<<48 | uint64(s[(idx<<3)+5])<<40 | uint64(s[(idx<<3)+4])<<32 | uint64(s[(idx<<3)+3])<<24 | uint64(s[(idx<<3)+2])<<16 | uint64(s[(idx<<3)+1])<<8 | uint64(s[idx<<3])
+}
+
+func load_be_byte2_uint16_idx2(s []byte, idx int) uint16 {
+	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,`REV16W`,-`ORR`,-`MOVB`
+	return uint16(s[idx<<1])<<8 | uint16(s[(idx<<1)+1])
+}
+
+func load_be_byte2_uint16_idx2_inv(s []byte, idx int) uint16 {
+	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,`REV16W`,-`ORR`,-`MOVB`
+	return uint16(s[(idx<<1)+1]) | uint16(s[idx<<1])<<8
+}
+
+func load_be_byte4_uint32_idx4(s []byte, idx int) uint32 {
+	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,`REVW`,-`ORR`,-`MOV[BH]`,-`REV16W`
+	return uint32(s[idx<<2])<<24 | uint32(s[(idx<<2)+1])<<16 | uint32(s[(idx<<2)+2])<<8 | uint32(s[(idx<<2)+3])
+}
+
+func load_be_byte8_uint64_idx8(s []byte, idx int) uint64 {
+	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,`REV`,-`ORR`,-`MOV[BHW]`,-`REVW`,-`REV16W`
+	return uint64(s[idx<<3])<<56 | uint64(s[(idx<<3)+1])<<48 | uint64(s[(idx<<3)+2])<<40 | uint64(s[(idx<<3)+3])<<32 | uint64(s[(idx<<3)+4])<<24 | uint64(s[(idx<<3)+5])<<16 | uint64(s[(idx<<3)+6])<<8 | uint64(s[(idx<<3)+7])
+}
+
+// Some tougher cases for the memcombine pass.
+
+func reassoc_load_uint32(b []byte) uint32 {
+	// amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
+	return (uint32(b[0]) | uint32(b[1])<<8) | (uint32(b[2])<<16 | uint32(b[3])<<24)
+}
+
+func extrashift_load_uint32(b []byte) uint32 {
+	// amd64:`MOVL\s\([A-Z]+\)`,`SHLL\s[$]2`,-`MOV[BW]`,-`OR`
+	return uint32(b[0])<<2 | uint32(b[1])<<10 | uint32(b[2])<<18 | uint32(b[3])<<26
+}
+
+func outoforder_load_uint32(b []byte) uint32 {
+	// amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
+	return uint32(b[0]) | uint32(b[2])<<16 | uint32(b[1])<<8 | uint32(b[3])<<24
+}
+
+func extraOr_load_uint32(b []byte, x, y uint32) uint32 {
+	// amd64:`ORL\s\([A-Z]+\)`,-`MOV[BW]`
+	return x | binary.LittleEndian.Uint32(b) | y
+	// TODO: Note that
+	//   x | uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 | y
+	// doesn't work because it associates in a way that memcombine can't detect it.
+}
+
+// Check load combining across function calls.
+
+func fcall_byte(a [2]byte) [2]byte {
+	return fcall_byte(fcall_byte(a)) // amd64:`MOVW`
+}
+
+func fcall_uint16(a [2]uint16) [2]uint16 {
+	return fcall_uint16(fcall_uint16(a)) // amd64:`MOVL`
+}
+
+func fcall_uint32(a [2]uint32) [2]uint32 {
+	return fcall_uint32(fcall_uint32(a)) // amd64:`MOVQ`
+}
+
+// We want to merge load+op in the first function, but not in the
+// second. See Issue 19595.
+func load_op_merge(p, q *int) {
+	x := *p // amd64:`ADDQ\t\(`
+	*q += x // The combined nilcheck and load would normally have this line number, but we want that combined operation to have the line number of the nil check instead (see #33724).
+}
+func load_op_no_merge(p, q *int) {
+	x := *p
+	for i := 0; i < 10; i++ {
+		*q += x // amd64:`ADDQ\t[A-Z]`
+	}
+}
+
+// Make sure offsets are folded into loads and stores.
+func offsets_fold(_, a [20]byte) (b [20]byte) {
+	// arm64:`MOVD\tcommand-line-arguments\.a\+[0-9]+\(FP\), R[0-9]+`,`MOVD\tR[0-9]+, command-line-arguments\.b\+[0-9]+\(FP\)`
+	b = a
+	return
+}
+
+// Make sure we don't put pointers in SSE registers across safe
+// points.
+
+func safe_point(p, q *[2]*int) {
+	a, b := p[0], p[1] // amd64:-`MOVUPS`
+	runtime.GC()
+	q[0], q[1] = a, b // amd64:-`MOVUPS`
+}
+
+// ------------- //
+//    Storing    //
+// ------------- //
+
+func store_le64(b []byte, x uint64) {
+	// amd64:`MOVQ\s.*\(.*\)$`,-`SHR.`
+	// arm64:`MOVD`,-`MOV[WBH]`
+	// ppc64le:`MOVD\s`,-`MOV[BHW]\s`
+	// ppc64:`MOVDBR`,-MOVB\s`
+	// s390x:`MOVDBR\s.*\(.*\)$`
+	binary.LittleEndian.PutUint64(b, x)
+}
+
+func store_le64_idx(b []byte, x uint64, idx int) {
+	// amd64:`MOVQ\s.*\(.*\)\(.*\*1\)$`,-`SHR.`
+	// arm64:`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]`
+	// ppc64le:`MOVD\s`,-`MOV[BHW]\s`
+	// ppc64:`MOVDBR`,-`MOVBZ`
+	// s390x:`MOVDBR\s.*\(.*\)\(.*\*1\)$`
+	binary.LittleEndian.PutUint64(b[idx:], x)
+}
+
+func store_le64_idx2(dst []byte, d, length, offset int) []byte {
+	a := dst[d : d+length]
+	b := dst[d-offset:]
+	// amd64:`MOVQ\s.*\(.*\)\(.*\*1\)$`,-`SHR.`
+	binary.LittleEndian.PutUint64(a, binary.LittleEndian.Uint64(b))
+	return dst
+}
+
+func store_le64_idx_const(b []byte, idx int) {
+	// amd64:`MOVQ\s\$123, \(.*\)\(.*\*1\)$`
+	binary.LittleEndian.PutUint64(b[idx:], 123)
+}
+
+func store_le64_load(b []byte, x *[8]byte) {
+	_ = b[8]
+	// amd64:-`MOV[BWL]`
+	// arm64:-`MOV[BWH]`
+	// ppc64le:`MOVD\s`,-`MOV[BWH]Z`
+	// ppc64:`MOVDBR`
+	// s390x:-`MOVB`,-`MOV[WH]BR`
+	binary.LittleEndian.PutUint64(b, binary.LittleEndian.Uint64(x[:]))
+}
+
+func store_le32(b []byte, x uint32) {
+	// amd64:`MOVL\s`
+	// arm64:`MOVW`,-`MOV[BH]`
+	// ppc64le:`MOVW\s`
+	// ppc64:`MOVWBR`
+	// s390x:`MOVWBR\s.*\(.*\)$`
+	binary.LittleEndian.PutUint32(b, x)
+}
+
+func store_le32_idx(b []byte, x uint32, idx int) {
+	// amd64:`MOVL\s`
+	// arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]`
+	// ppc64le:`MOVW\s`
+	// ppc64:`MOVWBR`
+	// s390x:`MOVWBR\s.*\(.*\)\(.*\*1\)$`
+	binary.LittleEndian.PutUint32(b[idx:], x)
+}
+
+func store_le32_idx_const(b []byte, idx int) {
+	// amd64:`MOVL\s\$123, \(.*\)\(.*\*1\)$`
+	// ppc64x:`MOVW\s`,-MOV[HB]`
+	binary.LittleEndian.PutUint32(b[idx:], 123)
+}
+
+func store_le16(b []byte, x uint16) {
+	// amd64:`MOVW\s`
+	// arm64:`MOVH`,-`MOVB`
+	// ppc64le:`MOVH\s`
+	// ppc64:`MOVHBR`
+	// s390x:`MOVHBR\s.*\(.*\)$`
+	binary.LittleEndian.PutUint16(b, x)
+}
+
+func store_le16_idx(b []byte, x uint16, idx int) {
+	// amd64:`MOVW\s`
+	// arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
+	// ppc64le:`MOVH\s`
+	// ppc64:`MOVHBR\s`
+	// s390x:`MOVHBR\s.*\(.*\)\(.*\*1\)$`
+	binary.LittleEndian.PutUint16(b[idx:], x)
+}
+
+func store_le16_idx_const(b []byte, idx int) {
+	// amd64:`MOVW\s\$123, \(.*\)\(.*\*1\)$`
+	// ppc64x:`MOVH\s`
+	binary.LittleEndian.PutUint16(b[idx:], 123)
+}
+
+func store_be64(b []byte, x uint64) {
+	// amd64/v1,amd64/v2:`BSWAPQ`,-`SHR.`
+	// amd64/v3: `MOVBEQ`
+	// arm64:`MOVD`,`REV`,-`MOV[WBH]`,-`REVW`,-`REV16W`
+	// ppc64le:`MOVDBR`
+	// ppc64:`MOVD\s`
+	// s390x:`MOVD\s.*\(.*\)$`,-`SRW\s`,-`SRD\s`
+	binary.BigEndian.PutUint64(b, x)
+}
+
+func store_be64_idx(b []byte, x uint64, idx int) {
+	// amd64/v1,amd64/v2:`BSWAPQ`,-`SHR.`
+	// amd64/v3:`MOVBEQ\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+	// arm64:`REV`,`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]`,-`REV16W`,-`REVW`
+	// ppc64le:`MOVDBR`
+	// ppc64:`MOVD\s`
+	// s390x:`MOVD\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s`
+	binary.BigEndian.PutUint64(b[idx:], x)
+}
+
+func store_be32(b []byte, x uint32) {
+	// amd64/v1,amd64/v2:`BSWAPL`,-`SHR.`
+	// amd64/v3:`MOVBEL`
+	// arm64:`MOVW`,`REVW`,-`MOV[BH]`,-`REV16W`
+	// ppc64le:`MOVWBR`
+	// ppc64:`MOVW\s`
+	// s390x:`MOVW\s.*\(.*\)$`,-`SRW\s`,-`SRD\s`
+	binary.BigEndian.PutUint32(b, x)
+}
+
+func store_be64_load(b, x *[8]byte) {
+	// arm64:-`REV`
+	// amd64:-`BSWAPQ`
+	binary.BigEndian.PutUint64(b[:], binary.BigEndian.Uint64(x[:]))
+}
+
+func store_be32_load(b, x *[8]byte) {
+	// arm64:-`REVW`
+	// amd64:-`BSWAPL`
+	binary.BigEndian.PutUint32(b[:], binary.BigEndian.Uint32(x[:]))
+}
+
+func store_be32_idx(b []byte, x uint32, idx int) {
+	// amd64/v1,amd64/v2:`BSWAPL`,-`SHR.`
+	// amd64/v3:`MOVBEL\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+	// arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]`,-`REV16W`
+	// ppc64le:`MOVWBR`
+	// ppc64:`MOVW\s`
+	// s390x:`MOVW\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s`
+	binary.BigEndian.PutUint32(b[idx:], x)
+}
+
+func store_be16(b []byte, x uint16) {
+	// amd64/v1,amd64/v2:`ROLW\s\$8`,-`SHR.`
+	// amd64/v3:`MOVBEW`,-`ROLW`
+	// arm64:`MOVH`,`REV16W`,-`MOVB`
+	// ppc64le:`MOVHBR`
+	// ppc64:`MOVH\s`
+	// s390x:`MOVH\s.*\(.*\)$`,-`SRW\s`,-`SRD\s`
+	binary.BigEndian.PutUint16(b, x)
+}
+
+func store_be16_idx(b []byte, x uint16, idx int) {
+	// amd64/v1,amd64/v2:`ROLW\s\$8`,-`SHR.`
+	// amd64/v3:`MOVBEW\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+	// arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`MOVB`
+	// ppc64le:`MOVHBR`
+	// ppc64:`MOVH\s`
+	// s390x:`MOVH\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s`
+	binary.BigEndian.PutUint16(b[idx:], x)
+}
+
+func store_le_byte_2(b []byte, val uint16) {
+	_ = b[2]
+	// arm64:`MOVH\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`
+	// 386:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
+	// amd64:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
+	// ppc64le:`MOVH\s`,-`MOVB`
+	// ppc64:`MOVHBR`,-`MOVB`
+	b[1], b[2] = byte(val), byte(val>>8)
+}
+
+func store_le_byte_2_inv(b []byte, val uint16) {
+	_ = b[2]
+	// 386:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
+	// amd64:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
+	// ppc64le:`MOVH\s`,-`MOVB`
+	// ppc64:`MOVHBR`,-`MOVB`
+	b[2], b[1] = byte(val>>8), byte(val)
+}
+
+func store_le_byte_4(b []byte, val uint32) {
+	_ = b[4]
+	// arm64:`MOVW\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`
+	// 386:`MOVL\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`
+	// amd64:`MOVL\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`
+	// ppc64le:`MOVW\s`
+	// ppc64:`MOVWBR\s`
+	b[1], b[2], b[3], b[4] = byte(val), byte(val>>8), byte(val>>16), byte(val>>24)
+}
+
+func store_le_byte_8(b []byte, val uint64) {
+	_ = b[8]
+	// arm64:`MOVD\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`MOVW`
+	// amd64:`MOVQ\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`,-`MOVL`
+	// ppc64le:`MOVD\s`,-`MOVW`
+	// ppc64:`MOVDBR\s`
+	b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8] = byte(val), byte(val>>8), byte(val>>16), byte(val>>24), byte(val>>32), byte(val>>40), byte(val>>48), byte(val>>56)
+}
+
+func store_be_byte_2(b []byte, val uint16) {
+	_ = b[2]
+	// arm64:`REV16W`,`MOVH\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`
+	// amd64/v1,amd64/v2:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
+	// amd64/v3: `MOVBEW`
+	// ppc64le:`MOVHBR`
+	// ppc64:`MOVH\s`
+	b[1], b[2] = byte(val>>8), byte(val)
+}
+
+func store_be_byte_4(b []byte, val uint32) {
+	_ = b[4]
+	// arm64:`REVW`,`MOVW\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`REV16W`
+	// amd64/v1,amd64/v2:`MOVL\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`
+	// amd64/v3:`MOVBEL\s[A-Z]+,\s1\([A-Z]+\)`
+	// ppc64le:`MOVWBR`
+	// ppc64:`MOVW\s`
+	b[1], b[2], b[3], b[4] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
+}
+
+func store_be_byte_8(b []byte, val uint64) {
+	_ = b[8]
+	// arm64:`REV`,`MOVD\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`MOVW`,-`REV16W`,-`REVW`
+	// amd64/v1,amd64/v2:`MOVQ\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`,-`MOVL`
+	// amd64/v3:`MOVBEQ\s[A-Z]+,\s1\([A-Z]+\)`, -`MOVBEL`
+	// ppc64le:`MOVDBR`
+	// ppc64:`MOVD`
+	b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8] = byte(val>>56), byte(val>>48), byte(val>>40), byte(val>>32), byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
+}
+
+func store_le_byte_2_idx(b []byte, idx int, val uint16) {
+	_, _ = b[idx+0], b[idx+1]
+	// arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
+	// 386:`MOVW\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+`,-`MOVB`
+	// ppc64le:`MOVH\s`
+	// ppc64:`MOVHBR`
+	b[idx+1], b[idx+0] = byte(val>>8), byte(val)
+}
+
+func store_le_byte_2_idx_inv(b []byte, idx int, val uint16) {
+	_, _ = b[idx+0], b[idx+1]
+	// 386:`MOVW\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+`,-`MOVB`
+	// ppc64le:`MOVH\s`
+	// ppc64:`MOVHBR`
+	b[idx+0], b[idx+1] = byte(val), byte(val>>8)
+}
+
+func store_le_byte_4_idx(b []byte, idx int, val uint32) {
+	_, _, _, _ = b[idx+0], b[idx+1], b[idx+2], b[idx+3]
+	// arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`,-`MOVH`
+	// ppc64le:`MOVW\s`
+	// ppc64:`MOVWBR`
+	b[idx+3], b[idx+2], b[idx+1], b[idx+0] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
+}
+
+func store_be_byte_2_idx(b []byte, idx int, val uint16) {
+	_, _ = b[idx+0], b[idx+1]
+	// arm64:`REV16W`,`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
+	// ppc64le:`MOVHBR`
+	// ppc64:`MOVH\s`
+	b[idx+0], b[idx+1] = byte(val>>8), byte(val)
+}
+
+func store_be_byte_4_idx(b []byte, idx int, val uint32) {
+	_, _, _, _ = b[idx+0], b[idx+1], b[idx+2], b[idx+3]
+	// arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`REV16W`
+	// ppc64le:`MOVWBR`
+	// ppc64:`MOVW\s`
+	b[idx+0], b[idx+1], b[idx+2], b[idx+3] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
+}
+
+func store_be_byte_2_idx2(b []byte, idx int, val uint16) {
+	_, _ = b[(idx<<1)+0], b[(idx<<1)+1]
+	// arm64:`REV16W`,`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`MOVB`
+	// ppc64le:`MOVHBR`
+	// ppc64:`MOVH\s`
+	b[(idx<<1)+0], b[(idx<<1)+1] = byte(val>>8), byte(val)
+}
+
+func store_le_byte_2_idx2(b []byte, idx int, val uint16) {
+	_, _ = b[(idx<<1)+0], b[(idx<<1)+1]
+	// arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`MOVB`
+	// ppc64le:`MOVH\s`
+	// ppc64:`MOVHBR`
+	b[(idx<<1)+1], b[(idx<<1)+0] = byte(val>>8), byte(val)
+}
+
+func store_be_byte_4_idx4(b []byte, idx int, val uint32) {
+	_, _, _, _ = b[(idx<<2)+0], b[(idx<<2)+1], b[(idx<<2)+2], b[(idx<<2)+3]
+	// arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`MOVB`,-`MOVH`,-`REV16W`
+	// ppc64le:`MOVWBR`
+	// ppc64:`MOVW\s`
+	b[(idx<<2)+0], b[(idx<<2)+1], b[(idx<<2)+2], b[(idx<<2)+3] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
+}
+
+func store_le_byte_4_idx4_inv(b []byte, idx int, val uint32) {
+	_, _, _, _ = b[(idx<<2)+0], b[(idx<<2)+1], b[(idx<<2)+2], b[(idx<<2)+3]
+	// arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`MOVB`,-`MOVH`
+	// ppc64le:`MOVW\s`
+	// ppc64:`MOVWBR`
+	b[(idx<<2)+3], b[(idx<<2)+2], b[(idx<<2)+1], b[(idx<<2)+0] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
+}
+
+// ------------- //
+//    Zeroing    //
+// ------------- //
+
+// Check that zero stores are combined into larger stores
+
+func zero_byte_2(b1, b2 []byte) {
+	// bounds checks to guarantee safety of writes below
+	_, _ = b1[1], b2[1]
+	// arm64:"MOVH\tZR",-"MOVB"
+	// amd64:`MOVW\s[$]0,\s\([A-Z]+\)`
+	// 386:`MOVW\s[$]0,\s\([A-Z]+\)`
+	// ppc64x:`MOVH\s`
+	b1[0], b1[1] = 0, 0
+	// arm64:"MOVH\tZR",-"MOVB"
+	// 386:`MOVW\s[$]0,\s\([A-Z]+\)`
+	// amd64:`MOVW\s[$]0,\s\([A-Z]+\)`
+	// ppc64x:`MOVH`
+	b2[1], b2[0] = 0, 0
+}
+
+func zero_byte_4(b1, b2 []byte) {
+	_, _ = b1[3], b2[3]
+	// arm64:"MOVW\tZR",-"MOVB",-"MOVH"
+	// amd64:`MOVL\s[$]0,\s\([A-Z]+\)`
+	// 386:`MOVL\s[$]0,\s\([A-Z]+\)`
+	// ppc64x:`MOVW\s`
+	b1[0], b1[1], b1[2], b1[3] = 0, 0, 0, 0
+	// arm64:"MOVW\tZR",-"MOVB",-"MOVH"
+	// ppc64x:`MOVW\s`
+	b2[2], b2[3], b2[1], b2[0] = 0, 0, 0, 0
+}
+
+func zero_byte_8(b []byte) {
+	_ = b[7]
+	b[0], b[1], b[2], b[3] = 0, 0, 0, 0 // arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
+	b[4], b[5], b[6], b[7] = 0, 0, 0, 0
+}
+
+func zero_byte_16(b []byte) {
+	_ = b[15]
+	b[0], b[1], b[2], b[3] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH",-"MOVW"
+	b[4], b[5], b[6], b[7] = 0, 0, 0, 0
+	b[8], b[9], b[10], b[11] = 0, 0, 0, 0
+	b[12], b[13], b[14], b[15] = 0, 0, 0, 0
+}
+
+func zero_byte_30(a *[30]byte) {
+	*a = [30]byte{} // arm64:"STP",-"MOVB",-"MOVH",-"MOVW"
+}
+
+func zero_byte_39(a *[39]byte) {
+	*a = [39]byte{} // arm64:"MOVD",-"MOVB",-"MOVH",-"MOVW"
+}
+
+func zero_byte_2_idx(b []byte, idx int) {
+	_, _ = b[idx+0], b[idx+1]
+	// arm64:`MOVH\sZR,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
+	// ppc64x:`MOVH\s`
+	b[idx+0], b[idx+1] = 0, 0
+}
+
+func zero_byte_2_idx2(b []byte, idx int) {
+	_, _ = b[(idx<<1)+0], b[(idx<<1)+1]
+	// arm64:`MOVH\sZR,\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`MOVB`
+	// ppc64x:`MOVH\s`
+	b[(idx<<1)+0], b[(idx<<1)+1] = 0, 0
+}
+
+func zero_uint16_2(h1, h2 []uint16) {
+	_, _ = h1[1], h2[1]
+	// arm64:"MOVW\tZR",-"MOVB",-"MOVH"
+	// amd64:`MOVL\s[$]0,\s\([A-Z]+\)`
+	// 386:`MOVL\s[$]0,\s\([A-Z]+\)`
+	// ppc64x:`MOVW\s`
+	h1[0], h1[1] = 0, 0
+	// arm64:"MOVW\tZR",-"MOVB",-"MOVH"
+	// amd64:`MOVL\s[$]0,\s\([A-Z]+\)`
+	// 386:`MOVL\s[$]0,\s\([A-Z]+\)`
+	// ppc64x:`MOVW`
+	h2[1], h2[0] = 0, 0
+}
+
+func zero_uint16_4(h1, h2 []uint16) {
+	_, _ = h1[3], h2[3]
+	// arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
+	// amd64:`MOVQ\s[$]0,\s\([A-Z]+\)`
+	// ppc64x:`MOVD\s`
+	h1[0], h1[1], h1[2], h1[3] = 0, 0, 0, 0
+	// arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
+	// ppc64x:`MOVD\s`
+	h2[2], h2[3], h2[1], h2[0] = 0, 0, 0, 0
+}
+
+func zero_uint16_8(h []uint16) {
+	_ = h[7]
+	h[0], h[1], h[2], h[3] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
+	h[4], h[5], h[6], h[7] = 0, 0, 0, 0
+}
+
+func zero_uint32_2(w1, w2 []uint32) {
+	_, _ = w1[1], w2[1]
+	// arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
+	// amd64:`MOVQ\s[$]0,\s\([A-Z]+\)`
+	// ppc64x:`MOVD\s`
+	w1[0], w1[1] = 0, 0
+	// arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
+	// amd64:`MOVQ\s[$]0,\s\([A-Z]+\)`
+	// ppc64x:`MOVD\s`
+	w2[1], w2[0] = 0, 0
+}
+
+func zero_uint32_4(w1, w2 []uint32) {
+	_, _ = w1[3], w2[3]
+	w1[0], w1[1], w1[2], w1[3] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
+	w2[2], w2[3], w2[1], w2[0] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
+}
+
+func zero_uint64_2(d1, d2 []uint64) {
+	_, _ = d1[1], d2[1]
+	d1[0], d1[1] = 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
+	d2[1], d2[0] = 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
+}
+
+func loadstore(p, q *[4]uint8) {
+	// amd64:"MOVL",-"MOVB"
+	// arm64:"MOVWU",-"MOVBU"
+	x0, x1, x2, x3 := q[0], q[1], q[2], q[3]
+	// amd64:"MOVL",-"MOVB"
+	// arm64:"MOVW",-"MOVB"
+	p[0], p[1], p[2], p[3] = x0, x1, x2, x3
+}
+
+type S1 struct {
+	a, b int16
+}
+
+func loadstore2(p, q *S1) {
+	// amd64:"MOVL",-"MOVWLZX"
+	// arm64:"MOVWU",-"MOVH"
+	a, b := p.a, p.b
+	// amd64:"MOVL",-"MOVW"
+	// arm64:"MOVW",-"MOVH"
+	q.a, q.b = a, b
+}
+
+func wideStore(p *[8]uint64) {
+	if p == nil {
+		return
+	}
+
+	// amd64:"MOVUPS",-"MOVQ"
+	// arm64:"STP",-"MOVD"
+	p[0] = 0
+	// amd64:-"MOVUPS",-"MOVQ"
+	// arm64:-"STP",-"MOVD"
+	p[1] = 0
+}
+
+func wideStore2(p *[8]uint64, x, y uint64) {
+	if p == nil {
+		return
+	}
+
+	// s390x:"STMG"
+	p[0] = x
+	// s390x:-"STMG",-"MOVD"
+	p[1] = y
+}
+
+func store32le(p *struct{ a, b uint32 }, x uint64) {
+	// amd64:"MOVQ",-"MOVL",-"SHRQ"
+	// arm64:"MOVD",-"MOVW",-"LSR"
+	// ppc64le:"MOVD",-"MOVW",-"SRD"
+	p.a = uint32(x)
+	// amd64:-"MOVL",-"SHRQ"
+	// arm64:-"MOVW",-"LSR"
+	// ppc64le:-"MOVW",-"SRD"
+	p.b = uint32(x >> 32)
+}
+func store32be(p *struct{ a, b uint32 }, x uint64) {
+	// ppc64:"MOVD",-"MOVW",-"SRD"
+	// s390x:"MOVD",-"MOVW",-"SRD"
+	p.a = uint32(x >> 32)
+	// ppc64:-"MOVW",-"SRD"
+	// s390x:-"MOVW",-"SRD"
+	p.b = uint32(x)
+}
+func store16le(p *struct{ a, b uint16 }, x uint32) {
+	// amd64:"MOVL",-"MOVW",-"SHRL"
+	// arm64:"MOVW",-"MOVH",-"UBFX"
+	// ppc64le:"MOVW",-"MOVH",-"SRW"
+	p.a = uint16(x)
+	// amd64:-"MOVW",-"SHRL"
+	// arm64:-"MOVH",-"UBFX"
+	// ppc64le:-"MOVH",-"SRW"
+	p.b = uint16(x >> 16)
+}
+func store16be(p *struct{ a, b uint16 }, x uint32) {
+	// ppc64:"MOVW",-"MOVH",-"SRW"
+	// s390x:"MOVW",-"MOVH",-"SRW"
+	p.a = uint16(x >> 16)
+	// ppc64:-"MOVH",-"SRW"
+	// s390x:-"MOVH",-"SRW"
+	p.b = uint16(x)
+}
+
+func storeBoolConst(p *struct{ a, b bool }) {
+	// amd64:"MOVW",-"MOVB"
+	// arm64:"MOVH",-"MOVB"
+	p.a = true
+	p.b = true
+}
+func issue66413(p *struct {
+	a byte
+	b bool
+	c bool
+	d int8
+}) {
+	// amd64:"MOVL",-"MOVB"
+	// arm64:"MOVW",-"MOVB"
+	p.a = 31
+	p.b = false
+	p.c = true
+	p.d = 12
+}
--- a/test/codegen/memops.go
+++ b/test/codegen/memops.go
@@ -0,0 +1,403 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+var x [2]bool
+var x8 [2]uint8
+var x16 [2]uint16
+var x32 [2]uint32
+var x64 [2]uint64
+
+func compMem1() int {
+	// amd64:`CMPB\tcommand-line-arguments.x\+1\(SB\), [$]0`
+	if x[1] {
+		return 1
+	}
+	// amd64:`CMPB\tcommand-line-arguments.x8\+1\(SB\), [$]7`
+	if x8[1] == 7 {
+		return 1
+	}
+	// amd64:`CMPW\tcommand-line-arguments.x16\+2\(SB\), [$]7`
+	if x16[1] == 7 {
+		return 1
+	}
+	// amd64:`CMPL\tcommand-line-arguments.x32\+4\(SB\), [$]7`
+	if x32[1] == 7 {
+		return 1
+	}
+	// amd64:`CMPQ\tcommand-line-arguments.x64\+8\(SB\), [$]7`
+	if x64[1] == 7 {
+		return 1
+	}
+	return 0
+}
+
+type T struct {
+	x   bool
+	x8  uint8
+	x16 uint16
+	x32 uint32
+	x64 uint64
+	a   [2]int // force it passed in memory
+}
+
+func compMem2(t T) int {
+	// amd64:`CMPB\t.*\(SP\), [$]0`
+	if t.x {
+		return 1
+	}
+	// amd64:`CMPB\t.*\(SP\), [$]7`
+	if t.x8 == 7 {
+		return 1
+	}
+	// amd64:`CMPW\t.*\(SP\), [$]7`
+	if t.x16 == 7 {
+		return 1
+	}
+	// amd64:`CMPL\t.*\(SP\), [$]7`
+	if t.x32 == 7 {
+		return 1
+	}
+	// amd64:`CMPQ\t.*\(SP\), [$]7`
+	if t.x64 == 7 {
+		return 1
+	}
+	return 0
+}
+
+func compMem3(x, y *int) (int, bool) {
+	// We can do comparisons of a register with memory even if
+	// the register is used subsequently.
+	r := *x
+	// amd64:`CMPQ\t\(`
+	// 386:`CMPL\t\(`
+	return r, r < *y
+}
+
+// The following functions test that indexed load/store operations get generated.
+
+func idxInt8(x, y []int8, i int) {
+	var t int8
+	// amd64: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
+	//   386: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
+	t = x[i+1]
+	// amd64: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+	//   386: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+	y[i+1] = t
+	// amd64: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+	//   386: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+	x[i+1] = 77
+}
+
+func idxInt16(x, y []int16, i int) {
+	var t int16
+	// amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
+	//   386: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
+	t = x[i+1]
+	// amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
+	//   386: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
+	y[i+1] = t
+	// amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
+	//   386: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
+	t = x[16*i+1]
+	// amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
+	//   386: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
+	y[16*i+1] = t
+	// amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
+	//   386: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
+	x[i+1] = 77
+	// amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
+	//   386: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
+	x[16*i+1] = 77
+}
+
+func idxInt32(x, y []int32, i int) {
+	var t int32
+	// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+	//   386: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+	t = x[i+1]
+	// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	//   386: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	y[i+1] = t
+	// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+	t = x[2*i+1]
+	// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	y[2*i+1] = t
+	// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
+	//   386: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
+	t = x[16*i+1]
+	// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+	//   386: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+	y[16*i+1] = t
+	// amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	//   386: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	x[i+1] = 77
+	// amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+	//   386: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+	x[16*i+1] = 77
+}
+
+func idxInt64(x, y []int64, i int) {
+	var t int64
+	// amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+	t = x[i+1]
+	// amd64: `MOVQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	y[i+1] = t
+	// amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), [A-Z]+[0-9]*`
+	t = x[16*i+1]
+	// amd64: `MOVQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
+	y[16*i+1] = t
+	// amd64: `MOVQ\t[$]77, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	x[i+1] = 77
+	// amd64: `MOVQ\t[$]77, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
+	x[16*i+1] = 77
+}
+
+func idxFloat32(x, y []float32, i int) {
+	var t float32
+	//    amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+	// 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+	//    arm64: `FMOVS\t\(R[0-9]*\)\(R[0-9]*<<2\), F[0-9]+`
+	t = x[i+1]
+	//    amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	// 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	//    arm64: `FMOVS\tF[0-9]+, \(R[0-9]*\)\(R[0-9]*<<2\)`
+	y[i+1] = t
+	//    amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
+	// 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
+	t = x[16*i+1]
+	//    amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+	// 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+	y[16*i+1] = t
+}
+
+func idxFloat64(x, y []float64, i int) {
+	var t float64
+	//    amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+	// 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+	//    arm64: `FMOVD\t\(R[0-9]*\)\(R[0-9]*<<3\), F[0-9]+`
+	t = x[i+1]
+	//    amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	// 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	//    arm64: `FMOVD\tF[0-9]+, \(R[0-9]*\)\(R[0-9]*<<3\)`
+	y[i+1] = t
+	//    amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
+	// 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
+	t = x[16*i+1]
+	//    amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
+	// 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
+	y[16*i+1] = t
+}
+
+func idxLoadPlusOp32(x []int32, i int) int32 {
+	s := x[0]
+	// 386: `ADDL\t4\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+	// amd64: `ADDL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+	s += x[i+1]
+	// 386: `SUBL\t8\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+	// amd64: `SUBL\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+	s -= x[i+2]
+	// 386: `IMULL\t12\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+	s *= x[i+3]
+	// 386: `ANDL\t16\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+	// amd64: `ANDL\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+	s &= x[i+4]
+	// 386: `ORL\t20\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+	// amd64: `ORL\t20\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+	s |= x[i+5]
+	// 386: `XORL\t24\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+	// amd64: `XORL\t24\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+	s ^= x[i+6]
+	return s
+}
+
+func idxLoadPlusOp64(x []int64, i int) int64 {
+	s := x[0]
+	// amd64: `ADDQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+	s += x[i+1]
+	// amd64: `SUBQ\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+	s -= x[i+2]
+	// amd64: `ANDQ\t24\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+	s &= x[i+3]
+	// amd64: `ORQ\t32\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+	s |= x[i+4]
+	// amd64: `XORQ\t40\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+	s ^= x[i+5]
+	return s
+}
+
+func idxStorePlusOp32(x []int32, i int, v int32) {
+	// 386: `ADDL\t[A-Z]+, 4\([A-Z]+\)\([A-Z]+\*4\)`
+	// amd64: `ADDL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	x[i+1] += v
+	// 386: `SUBL\t[A-Z]+, 8\([A-Z]+\)\([A-Z]+\*4\)`
+	// amd64: `SUBL\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	x[i+2] -= v
+	// 386: `ANDL\t[A-Z]+, 12\([A-Z]+\)\([A-Z]+\*4\)`
+	// amd64: `ANDL\t[A-Z]+[0-9]*, 12\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	x[i+3] &= v
+	// 386: `ORL\t[A-Z]+, 16\([A-Z]+\)\([A-Z]+\*4\)`
+	// amd64: `ORL\t[A-Z]+[0-9]*, 16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	x[i+4] |= v
+	// 386: `XORL\t[A-Z]+, 20\([A-Z]+\)\([A-Z]+\*4\)`
+	// amd64: `XORL\t[A-Z]+[0-9]*, 20\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	x[i+5] ^= v
+
+	// 386: `ADDL\t[$]77, 24\([A-Z]+\)\([A-Z]+\*4\)`
+	// amd64: `ADDL\t[$]77, 24\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	x[i+6] += 77
+	// 386: `ANDL\t[$]77, 28\([A-Z]+\)\([A-Z]+\*4\)`
+	// amd64: `ANDL\t[$]77, 28\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	x[i+7] &= 77
+	// 386: `ORL\t[$]77, 32\([A-Z]+\)\([A-Z]+\*4\)`
+	// amd64: `ORL\t[$]77, 32\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	x[i+8] |= 77
+	// 386: `XORL\t[$]77, 36\([A-Z]+\)\([A-Z]+\*4\)`
+	// amd64: `XORL\t[$]77, 36\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	x[i+9] ^= 77
+}
+
+func idxStorePlusOp64(x []int64, i int, v int64) {
+	// amd64: `ADDQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	x[i+1] += v
+	// amd64: `SUBQ\t[A-Z]+[0-9]*, 16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	x[i+2] -= v
+	// amd64: `ANDQ\t[A-Z]+[0-9]*, 24\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	x[i+3] &= v
+	// amd64: `ORQ\t[A-Z]+[0-9]*, 32\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	x[i+4] |= v
+	// amd64: `XORQ\t[A-Z]+[0-9]*, 40\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	x[i+5] ^= v
+
+	// amd64: `ADDQ\t[$]77, 48\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	x[i+6] += 77
+	// amd64: `ANDQ\t[$]77, 56\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	x[i+7] &= 77
+	// amd64: `ORQ\t[$]77, 64\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	x[i+8] |= 77
+	// amd64: `XORQ\t[$]77, 72\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	x[i+9] ^= 77
+}
+
+func idxCompare(i int) int {
+	// amd64: `MOVBLZX\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
+	if x8[i+1] < x8[0] {
+		return 0
+	}
+	// amd64: `MOVWLZX\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
+	if x16[i+1] < x16[0] {
+		return 0
+	}
+	// amd64: `MOVWLZX\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
+	if x16[16*i+1] < x16[0] {
+		return 0
+	}
+	// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+	if x32[i+1] < x32[0] {
+		return 0
+	}
+	// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
+	if x32[16*i+1] < x32[0] {
+		return 0
+	}
+	// amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+	if x64[i+1] < x64[0] {
+		return 0
+	}
+	// amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), [A-Z]+[0-9]*`
+	if x64[16*i+1] < x64[0] {
+		return 0
+	}
+	// amd64: `MOVBLZX\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
+	if x8[i+2] < 77 {
+		return 0
+	}
+	// amd64: `MOVWLZX\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
+	if x16[i+2] < 77 {
+		return 0
+	}
+	// amd64: `MOVWLZX\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
+	if x16[16*i+2] < 77 {
+		return 0
+	}
+	// amd64: `MOVL\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+	if x32[i+2] < 77 {
+		return 0
+	}
+	// amd64: `MOVL\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
+	if x32[16*i+2] < 77 {
+		return 0
+	}
+	// amd64: `MOVQ\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+	if x64[i+2] < 77 {
+		return 0
+	}
+	// amd64: `MOVQ\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), [A-Z]+[0-9]*`
+	if x64[16*i+2] < 77 {
+		return 0
+	}
+	return 1
+}
+
+func idxFloatOps(a []float64, b []float32, i int) (float64, float32) {
+	c := float64(7)
+	// amd64: `ADDSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+	c += a[i+1]
+	// amd64: `SUBSD\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+	c -= a[i+2]
+	// amd64: `MULSD\t24\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+	c *= a[i+3]
+	// amd64: `DIVSD\t32\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+	c /= a[i+4]
+
+	d := float32(8)
+	// amd64: `ADDSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+	d += b[i+1]
+	// amd64: `SUBSS\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+	d -= b[i+2]
+	// amd64: `MULSS\t12\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+	d *= b[i+3]
+	// amd64: `DIVSS\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+	d /= b[i+4]
+	return c, d
+}
+
+func storeTest(a []bool, v int, i int) {
+	// amd64: `BTL\t\$0,`,`SETCS\t4\([A-Z]+[0-9]*\)`
+	a[4] = v&1 != 0
+	// amd64: `BTL\t\$1,`,`SETCS\t3\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+	a[3+i] = v&2 != 0
+}
+
+func bitOps(p *[12]uint64) {
+	// amd64: `ORQ\t\$8, \(AX\)`
+	p[0] |= 8
+	// amd64: `ORQ\t\$1073741824, 8\(AX\)`
+	p[1] |= 1 << 30
+	// amd64: `BTSQ\t\$31, 16\(AX\)`
+	p[2] |= 1 << 31
+	// amd64: `BTSQ\t\$63, 24\(AX\)`
+	p[3] |= 1 << 63
+
+	// amd64: `ANDQ\t\$-9, 32\(AX\)`
+	p[4] &^= 8
+	// amd64: `ANDQ\t\$-1073741825, 40\(AX\)`
+	p[5] &^= 1 << 30
+	// amd64: `BTRQ\t\$31, 48\(AX\)`
+	p[6] &^= 1 << 31
+	// amd64: `BTRQ\t\$63, 56\(AX\)`
+	p[7] &^= 1 << 63
+
+	// amd64: `XORQ\t\$8, 64\(AX\)`
+	p[8] ^= 8
+	// amd64: `XORQ\t\$1073741824, 72\(AX\)`
+	p[9] ^= 1 << 30
+	// amd64: `BTCQ\t\$31, 80\(AX\)`
+	p[10] ^= 1 << 31
+	// amd64: `BTCQ\t\$63, 88\(AX\)`
+	p[11] ^= 1 << 63
+}
--- a/test/codegen/memops_bigoffset.go
+++ b/test/codegen/memops_bigoffset.go
@@ -0,0 +1,71 @@
+// asmcheck
+
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+type big1 struct {
+	w [1<<30 - 1]uint32
+}
+type big2 struct {
+	d [1<<29 - 1]uint64
+}
+
+func loadLargeOffset(sw *big1, sd *big2) (uint32, uint64) {
+
+	// ppc64x:`MOVWZ\s+[0-9]+\(R[0-9]+\)`,-`ADD`
+	a3 := sw.w[1<<10]
+	// ppc64le/power10:`MOVWZ\s+[0-9]+\(R[0-9]+\),\sR[0-9]+`,-`ADD`
+	// ppc64x/power9:`ADD`,`MOVWZ\s+\(R[0-9]+\),\sR[0-9]+`
+	// ppc64x/power8:`ADD`,`MOVWZ\s+\(R[0-9]+\),\sR[0-9]+`
+	b3 := sw.w[1<<16]
+	// ppc64le/power10:`MOVWZ\s+[0-9]+\(R[0-9]+\),\sR[0-9]+`,-`ADD`
+	// ppc64x/power9:`ADD`,`MOVWZ\s+\(R[0-9]+\),\sR[0-9]+`
+	// ppc64x/power8:`ADD`,`MOVWZ\s+\(R[0-9]+\),\sR[0-9]+`
+	c3 := sw.w[1<<28]
+	// ppc64x:`MOVWZ\s+\(R[0-9]+\)\(R[0-9]+\),\sR[0-9]+`
+	d3 := sw.w[1<<29]
+	// ppc64x:`MOVD\s+[0-9]+\(R[0-9]+\)`,-`ADD`
+	a4 := sd.d[1<<10]
+	// ppc64le/power10:`MOVD\s+[0-9]+\(R[0-9]+\)`,-`ADD`
+	// ppc64x/power9:`ADD`,`MOVD\s+\(R[0-9]+\),\sR[0-9]+`
+	// ppc64x/power8:`ADD`,`MOVD\s+\(R[0-9]+\),\sR[0-9]+`
+	b4 := sd.d[1<<16]
+	// ppc64le/power10`:`MOVD\s+[0-9]+\(R[0-9]+\)`,-`ADD`
+	// ppc64x/power9:`ADD`,`MOVD\s+\(R[0-9]+\),\sR[0-9]+`
+	// ppc64x/power8:`ADD`,`MOVD\s+\(R[0-9]+\),\sR[0-9]+`
+	c4 := sd.d[1<<27]
+	// ppc64x:`MOVD\s+\(R[0-9]+\)\(R[0-9]+\),\sR[0-9]+`
+	d4 := sd.d[1<<28]
+
+	return a3 + b3 + c3 + d3, a4 + b4 + c4 + d4
+}
+
+func storeLargeOffset(sw *big1, sd *big2) {
+	// ppc64x:`MOVW\s+R[0-9]+,\s[0-9]+\(R[0-9]+\)`,-`ADD`
+	sw.w[1<<10] = uint32(10)
+	// ppc64le/power10:`MOVW\s+R[0-9]+,\s[0-9]+\(R[0-9]+\)`,-`ADD`
+	// ppc64x/power9:`MOVW\s+R[0-9]+\,\s\(R[0-9]+\)`,`ADD`
+	// ppc64x/power8:`MOVW\s+R[0-9]+\,\s\(R[0-9]+\)`,`ADD`
+	sw.w[1<<16] = uint32(20)
+	// ppc64le/power10:`MOVW\s+R[0-9]+,\s[0-9]+\(R[0-9]+\)`,-`ADD`
+	// ppc64x/power9:`MOVW\s+R[0-9]+,\s\(R[0-9]+\)`,`ADD`
+	// ppc64x/power8:`MOVW\s+R[0-9]+,\s\(R[0-9]+\)`,`ADD`
+	sw.w[1<<28] = uint32(30)
+	// ppc64x:`MOVW\s+R[0-9]+,\s\(R[0-9]+\)`
+	sw.w[1<<29] = uint32(40)
+	// ppc64x:`MOVD\s+R[0-9]+,\s[0-9]+\(R[0-9]+\)`,-`ADD`
+	sd.d[1<<10] = uint64(40)
+	// ppc64le/power10:`MOVD\s+R[0-9]+,\s[0-9]+\(R[0-9]+\)`,-`ADD`
+	// ppc64x/power9:`MOVD\s+R[0-9]+,\s\(R[0-9]+\)`,`ADD`
+	// ppc64x/power8:`MOVD\s+R[0-9]+,\s\(R[0-9]+\)`,`ADD`
+	sd.d[1<<16] = uint64(50)
+	// ppc64le/power10`:`MOVD\s+R[0-9]+,\s[0-9]+\(R[0-9]+\)`,-`ADD`
+	// ppc64x/power9:`MOVD\s+R[0-9]+,\s\(R[0-9]+\)`,`ADD`
+	// ppc64x/power8:`MOVD\s+R[0-9]+,\s\(R[0-9]+\)`,`ADD`
+	sd.d[1<<27] = uint64(60)
+	// ppc64x:`MOVD\s+R[0-9]+,\s\(R[0-9]+\)`
+	sd.d[1<<28] = uint64(70)
+}
--- a/test/codegen/noextend.go
+++ b/test/codegen/noextend.go
@@ -0,0 +1,285 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import "math/bits"
+
+var sval64 [8]int64
+var sval32 [8]int32
+var sval16 [8]int16
+var sval8 [8]int8
+var val64 [8]uint64
+var val32 [8]uint32
+var val16 [8]uint16
+var val8 [8]uint8
+
+// Avoid zero/sign extensions following a load
+// which has extended the value correctly.
+// Note: No tests are done for int8 since
+// an extra extension is usually needed due to
+// no signed byte load.
+
+func set16(x8 int8, u8 *uint8, y8 int8, z8 uint8) {
+	// Truncate not needed, load does sign/zero extend
+
+	// ppc64x:-"MOVBZ\tR\\d+,\\sR\\d+"
+	val16[0] = uint16(*u8)
+
+	// AND not needed due to size
+	// ppc64x:-"ANDCC"
+	sval16[1] = 255 & int16(x8+y8)
+
+	// ppc64x:-"ANDCC"
+	val16[1] = 255 & uint16(*u8+z8)
+
+}
+func shiftidx(u8 *uint8, x16 *int16, u16 *uint16) {
+
+	// ppc64x:-"MOVBZ\tR\\d+,\\sR\\d+"
+	val16[0] = uint16(sval16[*u8>>2])
+
+	// ppc64x:-"MOVH\tR\\d+,\\sR\\d+"
+	sval16[1] = int16(val16[*x16>>1])
+
+	// ppc64x:-"MOVHZ\tR\\d+,\\sR\\d+"
+	val16[1] = uint16(sval16[*u16>>2])
+
+}
+
+func setnox(x8 int8, u8 *uint8, y8 *int8, z8 *uint8, x16 *int16, u16 *uint16, x32 *int32, u32 *uint32) {
+
+	// ppc64x:-"MOVBZ\tR\\d+,\\sR\\d+"
+	val16[0] = uint16(*u8)
+
+	// AND not needed due to size
+	// ppc64x:-"ANDCC"
+	sval16[1] = 255 & int16(x8+*y8)
+
+	// ppc64x:-"ANDCC"
+	val16[1] = 255 & uint16(*u8+*z8)
+
+	// ppc64x:-"MOVH\tR\\d+,\\sR\\d+"
+	sval32[1] = int32(*x16)
+
+	// ppc64x:-"MOVBZ\tR\\d+,\\sR\\d+"
+	val32[0] = uint32(*u8)
+
+	// ppc64x:-"MOVHZ\tR\\d+,\\sR\\d+"
+	val32[1] = uint32(*u16)
+
+	// ppc64x:-"MOVH\tR\\d+,\\sR\\d+"
+	sval64[1] = int64(*x16)
+
+	// ppc64x:-"MOVW\tR\\d+,\\sR\\d+"
+	sval64[2] = int64(*x32)
+
+	// ppc64x:-"MOVBZ\tR\\d+,\\sR\\d+"
+	val64[0] = uint64(*u8)
+
+	// ppc64x:-"MOVHZ\tR\\d+,\\sR\\d+"
+	val64[1] = uint64(*u16)
+
+	// ppc64x:-"MOVWZ\tR\\d+,\\sR\\d+"
+	val64[2] = uint64(*u32)
+}
+
+func cmp16(u8 *uint8, x32 *int32, u32 *uint32, x64 *int64, u64 *uint64) bool {
+
+	// ppc64x:-"MOVBZ\tR\\d+,\\sR\\d+"
+	if uint16(*u8) == val16[0] {
+		return true
+	}
+
+	// ppc64x:-"MOVHZ\tR\\d+,\\sR\\d+"
+	if uint16(*u32>>16) == val16[0] {
+		return true
+	}
+
+	// ppc64x:-"MOVHZ\tR\\d+,\\sR\\d+"
+	if uint16(*u64>>48) == val16[0] {
+		return true
+	}
+
+	// Verify the truncates are using the correct sign.
+	// ppc64x:-"MOVHZ\tR\\d+,\\sR\\d+"
+	if int16(*x32) == sval16[0] {
+		return true
+	}
+
+	// ppc64x:-"MOVH\tR\\d+,\\sR\\d+"
+	if uint16(*u32) == val16[0] {
+		return true
+	}
+
+	// ppc64x:-"MOVHZ\tR\\d+,\\sR\\d+"
+	if int16(*x64) == sval16[0] {
+		return true
+	}
+
+	// ppc64x:-"MOVH\tR\\d+,\\sR\\d+"
+	if uint16(*u64) == val16[0] {
+		return true
+	}
+
+	return false
+}
+
+func cmp32(u8 *uint8, x16 *int16, u16 *uint16, x64 *int64, u64 *uint64) bool {
+
+	// ppc64x:-"MOVBZ\tR\\d+,\\sR\\d+"
+	if uint32(*u8) == val32[0] {
+		return true
+	}
+
+	// ppc64x:-"MOVH\tR\\d+,\\sR\\d+"
+	if int32(*x16) == sval32[0] {
+		return true
+	}
+
+	// ppc64x:-"MOVHZ\tR\\d+,\\sR\\d+"
+	if uint32(*u16) == val32[0] {
+		return true
+	}
+
+	// Verify the truncates are using the correct sign.
+	// ppc64x:-"MOVWZ\tR\\d+,\\sR\\d+"
+	if int32(*x64) == sval32[0] {
+		return true
+	}
+
+	// ppc64x:-"MOVW\tR\\d+,\\sR\\d+"
+	if uint32(*u64) == val32[0] {
+		return true
+	}
+
+	return false
+}
+
+func cmp64(u8 *uint8, x16 *int16, u16 *uint16, x32 *int32, u32 *uint32) bool {
+
+	// ppc64x:-"MOVBZ\tR\\d+,\\sR\\d+"
+	if uint64(*u8) == val64[0] {
+		return true
+	}
+
+	// ppc64x:-"MOVH\tR\\d+,\\sR\\d+"
+	if int64(*x16) == sval64[0] {
+		return true
+	}
+
+	// ppc64x:-"MOVHZ\tR\\d+,\\sR\\d+"
+	if uint64(*u16) == val64[0] {
+		return true
+	}
+
+	// ppc64x:-"MOVW\tR\\d+,\\sR\\d+"
+	if int64(*x32) == sval64[0] {
+		return true
+	}
+
+	// ppc64x:-"MOVWZ\tR\\d+,\\sR\\d+"
+	if uint64(*u32) == val64[0] {
+		return true
+	}
+	return false
+}
+
+// no unsign extension following 32 bits ops
+
+func noUnsignEXT(t1, t2, t3, t4 uint32, k int64) uint64 {
+	var ret uint64
+
+	// arm64:"RORW",-"MOVWU"
+	ret += uint64(bits.RotateLeft32(t1, 7))
+
+	// arm64:"MULW",-"MOVWU"
+	ret *= uint64(t1 * t2)
+
+	// arm64:"MNEGW",-"MOVWU"
+	ret += uint64(-t1 * t3)
+
+	// arm64:"UDIVW",-"MOVWU"
+	ret += uint64(t1 / t4)
+
+	// arm64:-"MOVWU"
+	ret += uint64(t2 % t3)
+
+	// arm64:"MSUBW",-"MOVWU"
+	ret += uint64(t1 - t2*t3)
+
+	// arm64:"MADDW",-"MOVWU"
+	ret += uint64(t3*t4 + t2)
+
+	// arm64:"REVW",-"MOVWU"
+	ret += uint64(bits.ReverseBytes32(t1))
+
+	// arm64:"RBITW",-"MOVWU"
+	ret += uint64(bits.Reverse32(t1))
+
+	// arm64:"CLZW",-"MOVWU"
+	ret += uint64(bits.LeadingZeros32(t1))
+
+	// arm64:"REV16W",-"MOVWU"
+	ret += uint64(((t1 & 0xff00ff00) >> 8) | ((t1 & 0x00ff00ff) << 8))
+
+	// arm64:"EXTRW",-"MOVWU"
+	ret += uint64((t1 << 25) | (t2 >> 7))
+
+	return ret
+}
+
+// no sign extension when the upper bits of the result are zero
+
+func noSignEXT(x int) int64 {
+	t1 := int32(x)
+
+	var ret int64
+
+	// arm64:-"MOVW"
+	ret += int64(t1 & 1)
+
+	// arm64:-"MOVW"
+	ret += int64(int32(x & 0x7fffffff))
+
+	// arm64:-"MOVH"
+	ret += int64(int16(x & 0x7fff))
+
+	// arm64:-"MOVB"
+	ret += int64(int8(x & 0x7f))
+
+	return ret
+}
+
+// corner cases that sign extension must not be omitted
+
+func shouldSignEXT(x int) int64 {
+	t1 := int32(x)
+
+	var ret int64
+
+	// arm64:"MOVW"
+	ret += int64(t1 & (-1))
+
+	// arm64:"MOVW"
+	ret += int64(int32(x & 0x80000000))
+
+	// arm64:"MOVW"
+	ret += int64(int32(x & 0x1100000011111111))
+
+	// arm64:"MOVH"
+	ret += int64(int16(x & 0x1100000000001111))
+
+	// arm64:"MOVB"
+	ret += int64(int8(x & 0x1100000000000011))
+
+	return ret
+}
+
+func noIntermediateExtension(a, b, c uint32) uint32 {
+	// arm64:-"MOVWU"
+	return a*b*9 + c
+}
--- a/test/codegen/race.go
+++ b/test/codegen/race.go
@@ -0,0 +1,22 @@
+// asmcheck -race
+
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// Check that we elide racefuncenter/racefuncexit for
+// functions with no calls (but which might panic
+// in various ways). See issue 31219.
+// amd64:-"CALL.*racefuncenter.*"
+// arm64:-"CALL.*racefuncenter.*"
+// ppc64le:-"CALL.*racefuncenter.*"
+func RaceMightPanic(a []int, i, j, k, s int) {
+	var b [4]int
+	_ = b[i]     // panicIndex
+	_ = a[i:j]   // panicSlice
+	_ = a[i:j:k] // also panicSlice
+	_ = i << s   // panicShift
+	_ = i / j    // panicDivide
+}
--- a/test/codegen/regabi_regalloc.go
+++ b/test/codegen/regabi_regalloc.go
@@ -0,0 +1,23 @@
+// asmcheck
+
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+//go:registerparams
+func f1(a, b int) {
+	// amd64:"MOVQ\tBX, CX", "MOVQ\tAX, BX", "MOVL\t\\$1, AX", -"MOVQ\t.*DX"
+	g(1, a, b)
+}
+
+//go:registerparams
+func f2(a, b int) {
+	// amd64:"MOVQ\tBX, AX", "MOVQ\t[AB]X, CX", -"MOVQ\t.*, BX"
+	g(b, b, b)
+}
+
+//go:noinline
+//go:registerparams
+func g(int, int, int) {}
--- a/test/codegen/retpoline.go
+++ b/test/codegen/retpoline.go
@@ -0,0 +1,43 @@
+// asmcheck -gcflags=-spectre=ret
+
+//go:build amd64
+
+package codegen
+
+func CallFunc(f func()) {
+	// amd64:`CALL\truntime.retpoline`
+	f()
+}
+
+func CallInterface(x interface{ M() }) {
+	// amd64:`CALL\truntime.retpoline`
+	x.M()
+}
+
+// Check to make sure that jump tables are disabled
+// when retpoline is on. See issue 57097.
+func noJumpTables(x int) int {
+	switch x {
+	case 0:
+		return 0
+	case 1:
+		return 1
+	case 2:
+		return 2
+	case 3:
+		return 3
+	case 4:
+		return 4
+	case 5:
+		return 5
+	case 6:
+		return 6
+	case 7:
+		return 7
+	case 8:
+		return 8
+	case 9:
+		return 9
+	}
+	return 10
+}
--- a/test/codegen/rotate.go
+++ b/test/codegen/rotate.go
@@ -0,0 +1,281 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import "math/bits"
+
+// ------------------- //
+//    const rotates    //
+// ------------------- //
+
+func rot64(x uint64) uint64 {
+	var a uint64
+
+	// amd64:"ROLQ\t[$]7"
+	// ppc64x:"ROTL\t[$]7"
+	// loong64: "ROTRV\t[$]57"
+	// riscv64: "RORI\t[$]57"
+	a += x<<7 | x>>57
+
+	// amd64:"ROLQ\t[$]8"
+	// arm64:"ROR\t[$]56"
+	// s390x:"RISBGZ\t[$]0, [$]63, [$]8, "
+	// ppc64x:"ROTL\t[$]8"
+	// loong64: "ROTRV\t[$]56"
+	// riscv64: "RORI\t[$]56"
+	a += x<<8 + x>>56
+
+	// amd64:"ROLQ\t[$]9"
+	// arm64:"ROR\t[$]55"
+	// s390x:"RISBGZ\t[$]0, [$]63, [$]9, "
+	// ppc64x:"ROTL\t[$]9"
+	// loong64: "ROTRV\t[$]55"
+	// riscv64: "RORI\t[$]55"
+	a += x<<9 ^ x>>55
+
+	// amd64:"ROLQ\t[$]10"
+	// arm64:"ROR\t[$]54"
+	// s390x:"RISBGZ\t[$]0, [$]63, [$]10, "
+	// ppc64x:"ROTL\t[$]10"
+	// arm64:"ROR\t[$]54"
+	// s390x:"RISBGZ\t[$]0, [$]63, [$]10, "
+	// loong64: "ROTRV\t[$]54"
+	// riscv64: "RORI\t[$]54"
+	a += bits.RotateLeft64(x, 10)
+
+	return a
+}
+
+func rot32(x uint32) uint32 {
+	var a uint32
+
+	// amd64:"ROLL\t[$]7"
+	// arm:"MOVW\tR\\d+@>25"
+	// ppc64x:"ROTLW\t[$]7"
+	// loong64: "ROTR\t[$]25"
+	// riscv64: "RORIW\t[$]25"
+	a += x<<7 | x>>25
+
+	// amd64:`ROLL\t[$]8`
+	// arm:"MOVW\tR\\d+@>24"
+	// arm64:"RORW\t[$]24"
+	// s390x:"RLL\t[$]8"
+	// ppc64x:"ROTLW\t[$]8"
+	// loong64: "ROTR\t[$]24"
+	// riscv64: "RORIW\t[$]24"
+	a += x<<8 + x>>24
+
+	// amd64:"ROLL\t[$]9"
+	// arm:"MOVW\tR\\d+@>23"
+	// arm64:"RORW\t[$]23"
+	// s390x:"RLL\t[$]9"
+	// ppc64x:"ROTLW\t[$]9"
+	// loong64: "ROTR\t[$]23"
+	// riscv64: "RORIW\t[$]23"
+	a += x<<9 ^ x>>23
+
+	// amd64:"ROLL\t[$]10"
+	// arm:"MOVW\tR\\d+@>22"
+	// arm64:"RORW\t[$]22"
+	// s390x:"RLL\t[$]10"
+	// ppc64x:"ROTLW\t[$]10"
+	// arm64:"RORW\t[$]22"
+	// s390x:"RLL\t[$]10"
+	// loong64: "ROTR\t[$]22"
+	// riscv64: "RORIW\t[$]22"
+	a += bits.RotateLeft32(x, 10)
+
+	return a
+}
+
+func rot16(x uint16) uint16 {
+	var a uint16
+
+	// amd64:"ROLW\t[$]7"
+	// riscv64: "OR","SLLI","SRLI",-"AND"
+	a += x<<7 | x>>9
+
+	// amd64:`ROLW\t[$]8`
+	// riscv64: "OR","SLLI","SRLI",-"AND"
+	a += x<<8 + x>>8
+
+	// amd64:"ROLW\t[$]9"
+	// riscv64: "OR","SLLI","SRLI",-"AND"
+	a += x<<9 ^ x>>7
+
+	return a
+}
+
+func rot8(x uint8) uint8 {
+	var a uint8
+
+	// amd64:"ROLB\t[$]5"
+	// riscv64: "OR","SLLI","SRLI",-"AND"
+	a += x<<5 | x>>3
+
+	// amd64:`ROLB\t[$]6`
+	// riscv64: "OR","SLLI","SRLI",-"AND"
+	a += x<<6 + x>>2
+
+	// amd64:"ROLB\t[$]7"
+	// riscv64: "OR","SLLI","SRLI",-"AND"
+	a += x<<7 ^ x>>1
+
+	return a
+}
+
+// ----------------------- //
+//    non-const rotates    //
+// ----------------------- //
+
+func rot64nc(x uint64, z uint) uint64 {
+	var a uint64
+
+	z &= 63
+
+	// amd64:"ROLQ",-"AND"
+	// arm64:"ROR","NEG",-"AND"
+	// ppc64x:"ROTL",-"NEG",-"AND"
+	// loong64: "ROTRV", -"AND"
+	// riscv64: "ROL",-"AND"
+	a += x<<z | x>>(64-z)
+
+	// amd64:"RORQ",-"AND"
+	// arm64:"ROR",-"NEG",-"AND"
+	// ppc64x:"ROTL","NEG",-"AND"
+	// loong64: "ROTRV", -"AND"
+	// riscv64: "ROR",-"AND"
+	a += x>>z | x<<(64-z)
+
+	return a
+}
+
+func rot32nc(x uint32, z uint) uint32 {
+	var a uint32
+
+	z &= 31
+
+	// amd64:"ROLL",-"AND"
+	// arm64:"ROR","NEG",-"AND"
+	// ppc64x:"ROTLW",-"NEG",-"AND"
+	// loong64: "ROTR", -"AND"
+	// riscv64: "ROLW",-"AND"
+	a += x<<z | x>>(32-z)
+
+	// amd64:"RORL",-"AND"
+	// arm64:"ROR",-"NEG",-"AND"
+	// ppc64x:"ROTLW","NEG",-"AND"
+	// loong64: "ROTR", -"AND"
+	// riscv64: "RORW",-"AND"
+	a += x>>z | x<<(32-z)
+
+	return a
+}
+
+func rot16nc(x uint16, z uint) uint16 {
+	var a uint16
+
+	z &= 15
+
+	// amd64:"ROLW",-"ANDQ"
+	// riscv64: "OR","SLL","SRL",-"AND\t"
+	a += x<<z | x>>(16-z)
+
+	// amd64:"RORW",-"ANDQ"
+	// riscv64: "OR","SLL","SRL",-"AND\t"
+	a += x>>z | x<<(16-z)
+
+	return a
+}
+
+func rot8nc(x uint8, z uint) uint8 {
+	var a uint8
+
+	z &= 7
+
+	// amd64:"ROLB",-"ANDQ"
+	// riscv64: "OR","SLL","SRL",-"AND\t"
+	a += x<<z | x>>(8-z)
+
+	// amd64:"RORB",-"ANDQ"
+	// riscv64: "OR","SLL","SRL",-"AND\t"
+	a += x>>z | x<<(8-z)
+
+	return a
+}
+
+// Issue 18254: rotate after inlining
+func f32(x uint32) uint32 {
+	// amd64:"ROLL\t[$]7"
+	return rot32nc(x, 7)
+}
+
+func doubleRotate(x uint64) uint64 {
+	x = (x << 5) | (x >> 59)
+	// amd64:"ROLQ\t[$]15"
+	// arm64:"ROR\t[$]49"
+	x = (x << 10) | (x >> 54)
+	return x
+}
+
+// --------------------------------------- //
+//    Combined Rotate + Masking operations //
+// --------------------------------------- //
+
+func checkMaskedRotate32(a []uint32, r int) {
+	i := 0
+
+	// ppc64x: "RLWNM\t[$]16, R[0-9]+, [$]8, [$]15, R[0-9]+"
+	a[i] = bits.RotateLeft32(a[i], 16) & 0xFF0000
+	i++
+	// ppc64x: "RLWNM\t[$]16, R[0-9]+, [$]8, [$]15, R[0-9]+"
+	a[i] = bits.RotateLeft32(a[i]&0xFF, 16)
+	i++
+	// ppc64x: "RLWNM\t[$]4, R[0-9]+, [$]20, [$]27, R[0-9]+"
+	a[i] = bits.RotateLeft32(a[i], 4) & 0xFF0
+	i++
+	// ppc64x: "RLWNM\t[$]16, R[0-9]+, [$]24, [$]31, R[0-9]+"
+	a[i] = bits.RotateLeft32(a[i]&0xFF0000, 16)
+	i++
+
+	// ppc64x: "RLWNM\tR[0-9]+, R[0-9]+, [$]8, [$]15, R[0-9]+"
+	a[i] = bits.RotateLeft32(a[i], r) & 0xFF0000
+	i++
+	// ppc64x: "RLWNM\tR[0-9]+, R[0-9]+, [$]16, [$]23, R[0-9]+"
+	a[i] = bits.RotateLeft32(a[i], r) & 0xFF00
+	i++
+
+	// ppc64x: "RLWNM\tR[0-9]+, R[0-9]+, [$]20, [$]11, R[0-9]+"
+	a[i] = bits.RotateLeft32(a[i], r) & 0xFFF00FFF
+	i++
+	// ppc64x: "RLWNM\t[$]4, R[0-9]+, [$]20, [$]11, R[0-9]+"
+	a[i] = bits.RotateLeft32(a[i], 4) & 0xFFF00FFF
+	i++
+}
+
+// combined arithmetic and rotate on arm64
+func checkArithmeticWithRotate(a *[1000]uint64) {
+	// arm64: "AND\tR[0-9]+@>51, R[0-9]+, R[0-9]+"
+	a[2] = a[1] & bits.RotateLeft64(a[0], 13)
+	// arm64: "ORR\tR[0-9]+@>51, R[0-9]+, R[0-9]+"
+	a[5] = a[4] | bits.RotateLeft64(a[3], 13)
+	// arm64: "EOR\tR[0-9]+@>51, R[0-9]+, R[0-9]+"
+	a[8] = a[7] ^ bits.RotateLeft64(a[6], 13)
+	// arm64: "MVN\tR[0-9]+@>51, R[0-9]+"
+	a[10] = ^bits.RotateLeft64(a[9], 13)
+	// arm64: "BIC\tR[0-9]+@>51, R[0-9]+, R[0-9]+"
+	a[13] = a[12] &^ bits.RotateLeft64(a[11], 13)
+	// arm64: "EON\tR[0-9]+@>51, R[0-9]+, R[0-9]+"
+	a[16] = a[15] ^ ^bits.RotateLeft64(a[14], 13)
+	// arm64: "ORN\tR[0-9]+@>51, R[0-9]+, R[0-9]+"
+	a[19] = a[18] | ^bits.RotateLeft64(a[17], 13)
+	// arm64: "TST\tR[0-9]+@>51, R[0-9]+"
+	if a[18]&bits.RotateLeft64(a[19], 13) == 0 {
+		a[20] = 1
+	}
+
+}
--- a/test/codegen/select.go
+++ b/test/codegen/select.go
@@ -0,0 +1,20 @@
+// asmcheck
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func f() {
+	ch1 := make(chan int)
+	ch2 := make(chan int)
+	for {
+		// amd64:-`MOVQ\t[$]0, command-line-arguments..autotmp_3`
+		select {
+		case <-ch1:
+		case <-ch2:
+		default:
+		}
+	}
+}
--- a/test/codegen/shift.go
+++ b/test/codegen/shift.go
@@ -0,0 +1,522 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// ------------------ //
+//   constant shifts  //
+// ------------------ //
+
+func lshConst64x64(v int64) int64 {
+	// ppc64x:"SLD"
+	// riscv64:"SLLI",-"AND",-"SLTIU"
+	return v << uint64(33)
+}
+
+func rshConst64Ux64(v uint64) uint64 {
+	// ppc64x:"SRD"
+	// riscv64:"SRLI\t",-"AND",-"SLTIU"
+	return v >> uint64(33)
+}
+
+func rshConst64Ux64Overflow32(v uint32) uint64 {
+	// riscv64:"MOV\t\\$0,",-"SRL"
+	return uint64(v) >> 32
+}
+
+func rshConst64Ux64Overflow16(v uint16) uint64 {
+	// riscv64:"MOV\t\\$0,",-"SRL"
+	return uint64(v) >> 16
+}
+
+func rshConst64Ux64Overflow8(v uint8) uint64 {
+	// riscv64:"MOV\t\\$0,",-"SRL"
+	return uint64(v) >> 8
+}
+
+func rshConst64x64(v int64) int64 {
+	// ppc64x:"SRAD"
+	// riscv64:"SRAI\t",-"OR",-"SLTIU"
+	return v >> uint64(33)
+}
+
+func rshConst64x64Overflow32(v int32) int64 {
+	// riscv64:"SRAIW",-"SLLI",-"SRAI\t"
+	return int64(v) >> 32
+}
+
+func rshConst64x64Overflow16(v int16) int64 {
+	// riscv64:"SLLI","SRAI",-"SRAIW"
+	return int64(v) >> 16
+}
+
+func rshConst64x64Overflow8(v int8) int64 {
+	// riscv64:"SLLI","SRAI",-"SRAIW"
+	return int64(v) >> 8
+}
+
+func lshConst32x64(v int32) int32 {
+	// ppc64x:"SLW"
+	// riscv64:"SLLI",-"AND",-"SLTIU", -"MOVW"
+	return v << uint64(29)
+}
+
+func rshConst32Ux64(v uint32) uint32 {
+	// ppc64x:"SRW"
+	// riscv64:"SRLIW",-"AND",-"SLTIU", -"MOVW"
+	return v >> uint64(29)
+}
+
+func rshConst32x64(v int32) int32 {
+	// ppc64x:"SRAW"
+	// riscv64:"SRAIW",-"OR",-"SLTIU", -"MOVW"
+	return v >> uint64(29)
+}
+
+func lshConst64x32(v int64) int64 {
+	// ppc64x:"SLD"
+	// riscv64:"SLLI",-"AND",-"SLTIU"
+	return v << uint32(33)
+}
+
+func rshConst64Ux32(v uint64) uint64 {
+	// ppc64x:"SRD"
+	// riscv64:"SRLI\t",-"AND",-"SLTIU"
+	return v >> uint32(33)
+}
+
+func rshConst64x32(v int64) int64 {
+	// ppc64x:"SRAD"
+	// riscv64:"SRAI\t",-"OR",-"SLTIU"
+	return v >> uint32(33)
+}
+
+// ------------------ //
+//   masked shifts    //
+// ------------------ //
+
+func lshMask64x64(v int64, s uint64) int64 {
+	// arm64:"LSL",-"AND"
+	// ppc64x:"RLDICL",-"ORN",-"ISEL"
+	// riscv64:"SLL",-"AND\t",-"SLTIU"
+	// s390x:-"RISBGZ",-"AND",-"LOCGR"
+	return v << (s & 63)
+}
+
+func rshMask64Ux64(v uint64, s uint64) uint64 {
+	// arm64:"LSR",-"AND",-"CSEL"
+	// ppc64x:"RLDICL",-"ORN",-"ISEL"
+	// riscv64:"SRL\t",-"AND\t",-"SLTIU"
+	// s390x:-"RISBGZ",-"AND",-"LOCGR"
+	return v >> (s & 63)
+}
+
+func rshMask64x64(v int64, s uint64) int64 {
+	// arm64:"ASR",-"AND",-"CSEL"
+	// ppc64x:"RLDICL",-"ORN",-"ISEL"
+	// riscv64:"SRA\t",-"OR",-"SLTIU"
+	// s390x:-"RISBGZ",-"AND",-"LOCGR"
+	return v >> (s & 63)
+}
+
+func lshMask32x64(v int32, s uint64) int32 {
+	// arm64:"LSL",-"AND"
+	// ppc64x:"ISEL",-"ORN"
+	// riscv64:"SLL",-"AND\t",-"SLTIU"
+	// s390x:-"RISBGZ",-"AND",-"LOCGR"
+	return v << (s & 63)
+}
+
+func rshMask32Ux64(v uint32, s uint64) uint32 {
+	// arm64:"LSR",-"AND"
+	// ppc64x:"ISEL",-"ORN"
+	// riscv64:"SRLW","SLTIU","NEG","AND\t",-"SRL\t"
+	// s390x:-"RISBGZ",-"AND",-"LOCGR"
+	return v >> (s & 63)
+}
+
+func rsh5Mask32Ux64(v uint32, s uint64) uint32 {
+	// riscv64:"SRLW",-"AND\t",-"SLTIU",-"SRL\t"
+	return v >> (s & 31)
+}
+
+func rshMask32x64(v int32, s uint64) int32 {
+	// arm64:"ASR",-"AND"
+	// ppc64x:"ISEL",-"ORN"
+	// riscv64:"SRAW","OR","SLTIU"
+	// s390x:-"RISBGZ",-"AND",-"LOCGR"
+	return v >> (s & 63)
+}
+
+func rsh5Mask32x64(v int32, s uint64) int32 {
+	// riscv64:"SRAW",-"OR",-"SLTIU"
+	return v >> (s & 31)
+}
+
+func lshMask64x32(v int64, s uint32) int64 {
+	// arm64:"LSL",-"AND"
+	// ppc64x:"RLDICL",-"ORN"
+	// riscv64:"SLL",-"AND\t",-"SLTIU"
+	// s390x:-"RISBGZ",-"AND",-"LOCGR"
+	return v << (s & 63)
+}
+
+func rshMask64Ux32(v uint64, s uint32) uint64 {
+	// arm64:"LSR",-"AND",-"CSEL"
+	// ppc64x:"RLDICL",-"ORN"
+	// riscv64:"SRL\t",-"AND\t",-"SLTIU"
+	// s390x:-"RISBGZ",-"AND",-"LOCGR"
+	return v >> (s & 63)
+}
+
+func rshMask64x32(v int64, s uint32) int64 {
+	// arm64:"ASR",-"AND",-"CSEL"
+	// ppc64x:"RLDICL",-"ORN",-"ISEL"
+	// riscv64:"SRA\t",-"OR",-"SLTIU"
+	// s390x:-"RISBGZ",-"AND",-"LOCGR"
+	return v >> (s & 63)
+}
+
+func lshMask64x32Ext(v int64, s int32) int64 {
+	// ppc64x:"RLDICL",-"ORN",-"ISEL"
+	// riscv64:"SLL",-"AND\t",-"SLTIU"
+	// s390x:-"RISBGZ",-"AND",-"LOCGR"
+	return v << uint(s&63)
+}
+
+func rshMask64Ux32Ext(v uint64, s int32) uint64 {
+	// ppc64x:"RLDICL",-"ORN",-"ISEL"
+	// riscv64:"SRL\t",-"AND\t",-"SLTIU"
+	// s390x:-"RISBGZ",-"AND",-"LOCGR"
+	return v >> uint(s&63)
+}
+
+func rshMask64x32Ext(v int64, s int32) int64 {
+	// ppc64x:"RLDICL",-"ORN",-"ISEL"
+	// riscv64:"SRA\t",-"OR",-"SLTIU"
+	// s390x:-"RISBGZ",-"AND",-"LOCGR"
+	return v >> uint(s&63)
+}
+
+// --------------- //
+//  signed shifts  //
+// --------------- //
+
+// We do want to generate a test + panicshift for these cases.
+func lshSigned(v8 int8, v16 int16, v32 int32, v64 int64, x int) {
+	// amd64:"TESTB"
+	_ = x << v8
+	// amd64:"TESTW"
+	_ = x << v16
+	// amd64:"TESTL"
+	_ = x << v32
+	// amd64:"TESTQ"
+	_ = x << v64
+}
+
+// We want to avoid generating a test + panicshift for these cases.
+func lshSignedMasked(v8 int8, v16 int16, v32 int32, v64 int64, x int) {
+	// amd64:-"TESTB"
+	_ = x << (v8 & 7)
+	// amd64:-"TESTW"
+	_ = x << (v16 & 15)
+	// amd64:-"TESTL"
+	_ = x << (v32 & 31)
+	// amd64:-"TESTQ"
+	_ = x << (v64 & 63)
+}
+
+// ------------------ //
+//   bounded shifts   //
+// ------------------ //
+
+func lshGuarded64(v int64, s uint) int64 {
+	if s < 64 {
+		// riscv64:"SLL",-"AND",-"SLTIU"
+		// s390x:-"RISBGZ",-"AND",-"LOCGR"
+		// wasm:-"Select",-".*LtU"
+		// arm64:"LSL",-"CSEL"
+		return v << s
+	}
+	panic("shift too large")
+}
+
+func rshGuarded64U(v uint64, s uint) uint64 {
+	if s < 64 {
+		// riscv64:"SRL\t",-"AND",-"SLTIU"
+		// s390x:-"RISBGZ",-"AND",-"LOCGR"
+		// wasm:-"Select",-".*LtU"
+		// arm64:"LSR",-"CSEL"
+		return v >> s
+	}
+	panic("shift too large")
+}
+
+func rshGuarded64(v int64, s uint) int64 {
+	if s < 64 {
+		// riscv64:"SRA\t",-"OR",-"SLTIU"
+		// s390x:-"RISBGZ",-"AND",-"LOCGR"
+		// wasm:-"Select",-".*LtU"
+		// arm64:"ASR",-"CSEL"
+		return v >> s
+	}
+	panic("shift too large")
+}
+
+func provedUnsignedShiftLeft(val64 uint64, val32 uint32, val16 uint16, val8 uint8, shift int) (r1 uint64, r2 uint32, r3 uint16, r4 uint8) {
+	if shift >= 0 && shift < 64 {
+		// arm64:"LSL",-"CSEL"
+		r1 = val64 << shift
+	}
+	if shift >= 0 && shift < 32 {
+		// arm64:"LSL",-"CSEL"
+		r2 = val32 << shift
+	}
+	if shift >= 0 && shift < 16 {
+		// arm64:"LSL",-"CSEL"
+		r3 = val16 << shift
+	}
+	if shift >= 0 && shift < 8 {
+		// arm64:"LSL",-"CSEL"
+		r4 = val8 << shift
+	}
+	return r1, r2, r3, r4
+}
+
+func provedSignedShiftLeft(val64 int64, val32 int32, val16 int16, val8 int8, shift int) (r1 int64, r2 int32, r3 int16, r4 int8) {
+	if shift >= 0 && shift < 64 {
+		// arm64:"LSL",-"CSEL"
+		r1 = val64 << shift
+	}
+	if shift >= 0 && shift < 32 {
+		// arm64:"LSL",-"CSEL"
+		r2 = val32 << shift
+	}
+	if shift >= 0 && shift < 16 {
+		// arm64:"LSL",-"CSEL"
+		r3 = val16 << shift
+	}
+	if shift >= 0 && shift < 8 {
+		// arm64:"LSL",-"CSEL"
+		r4 = val8 << shift
+	}
+	return r1, r2, r3, r4
+}
+
+func provedUnsignedShiftRight(val64 uint64, val32 uint32, val16 uint16, val8 uint8, shift int) (r1 uint64, r2 uint32, r3 uint16, r4 uint8) {
+	if shift >= 0 && shift < 64 {
+		// arm64:"LSR",-"CSEL"
+		r1 = val64 >> shift
+	}
+	if shift >= 0 && shift < 32 {
+		// arm64:"LSR",-"CSEL"
+		r2 = val32 >> shift
+	}
+	if shift >= 0 && shift < 16 {
+		// arm64:"LSR",-"CSEL"
+		r3 = val16 >> shift
+	}
+	if shift >= 0 && shift < 8 {
+		// arm64:"LSR",-"CSEL"
+		r4 = val8 >> shift
+	}
+	return r1, r2, r3, r4
+}
+
+func provedSignedShiftRight(val64 int64, val32 int32, val16 int16, val8 int8, shift int) (r1 int64, r2 int32, r3 int16, r4 int8) {
+	if shift >= 0 && shift < 64 {
+		// arm64:"ASR",-"CSEL"
+		r1 = val64 >> shift
+	}
+	if shift >= 0 && shift < 32 {
+		// arm64:"ASR",-"CSEL"
+		r2 = val32 >> shift
+	}
+	if shift >= 0 && shift < 16 {
+		// arm64:"ASR",-"CSEL"
+		r3 = val16 >> shift
+	}
+	if shift >= 0 && shift < 8 {
+		// arm64:"ASR",-"CSEL"
+		r4 = val8 >> shift
+	}
+	return r1, r2, r3, r4
+}
+
+func checkUnneededTrunc(tab *[100000]uint32, d uint64, v uint32, h uint16, b byte) (uint32, uint64) {
+
+	// ppc64x:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+	f := tab[byte(v)^b]
+	// ppc64x:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+	f += tab[byte(v)&b]
+	// ppc64x:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+	f += tab[byte(v)|b]
+	// ppc64x:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+	f += tab[uint16(v)&h]
+	// ppc64x:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+	f += tab[uint16(v)^h]
+	// ppc64x:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+	f += tab[uint16(v)|h]
+	// ppc64x:-".*AND",-"RLDICR",".*CLRLSLDI"
+	f += tab[v&0xff]
+	// ppc64x:-".*AND",".*CLRLSLWI"
+	f += 2 * uint32(uint16(d))
+	// ppc64x:-".*AND",-"RLDICR",".*CLRLSLDI"
+	g := 2 * uint64(uint32(d))
+	return f, g
+}
+
+func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64) (uint8, uint16, uint32, uint64, int64) {
+
+	// ppc64x:-"AND","CLRLSLWI"
+	f := (v8 & 0xF) << 2
+	// ppc64x:"CLRLSLWI"
+	f += byte(v16) << 3
+	// ppc64x:-"AND","CLRLSLWI"
+	g := (v16 & 0xFF) << 3
+	// ppc64x:-"AND","CLRLSLWI"
+	h := (v32 & 0xFFFFF) << 2
+	// ppc64x:"CLRLSLDI"
+	i := (v64 & 0xFFFFFFFF) << 5
+	// ppc64x:-"CLRLSLDI"
+	i += (v64 & 0xFFFFFFF) << 38
+	// ppc64x/power9:-"CLRLSLDI"
+	i += (v64 & 0xFFFF00) << 10
+	// ppc64x/power9:-"SLD","EXTSWSLI"
+	j := int64(x32+32) * 8
+	return f, g, h, i, j
+}
+
+func checkWidenAfterShift(v int64, u uint64) (int64, uint64) {
+
+	// ppc64x:-".*MOVW"
+	f := int32(v >> 32)
+	// ppc64x:".*MOVW"
+	f += int32(v >> 31)
+	// ppc64x:-".*MOVH"
+	g := int16(v >> 48)
+	// ppc64x:".*MOVH"
+	g += int16(v >> 30)
+	// ppc64x:-".*MOVH"
+	g += int16(f >> 16)
+	// ppc64x:-".*MOVB"
+	h := int8(v >> 56)
+	// ppc64x:".*MOVB"
+	h += int8(v >> 28)
+	// ppc64x:-".*MOVB"
+	h += int8(f >> 24)
+	// ppc64x:".*MOVB"
+	h += int8(f >> 16)
+	return int64(h), uint64(g)
+}
+
+func checkShiftAndMask32(v []uint32) {
+	i := 0
+
+	// ppc64x: "RLWNM\t[$]24, R[0-9]+, [$]12, [$]19, R[0-9]+"
+	v[i] = (v[i] & 0xFF00000) >> 8
+	i++
+	// ppc64x: "RLWNM\t[$]26, R[0-9]+, [$]22, [$]29, R[0-9]+"
+	v[i] = (v[i] & 0xFF00) >> 6
+	i++
+	// ppc64x: "MOVW\tR0"
+	v[i] = (v[i] & 0xFF) >> 8
+	i++
+	// ppc64x: "MOVW\tR0"
+	v[i] = (v[i] & 0xF000000) >> 28
+	i++
+	// ppc64x: "RLWNM\t[$]26, R[0-9]+, [$]24, [$]31, R[0-9]+"
+	v[i] = (v[i] >> 6) & 0xFF
+	i++
+	// ppc64x: "RLWNM\t[$]26, R[0-9]+, [$]12, [$]19, R[0-9]+"
+	v[i] = (v[i] >> 6) & 0xFF000
+	i++
+	// ppc64x: "MOVW\tR0"
+	v[i] = (v[i] >> 20) & 0xFF000
+	i++
+	// ppc64x: "MOVW\tR0"
+	v[i] = (v[i] >> 24) & 0xFF00
+	i++
+}
+
+func checkMergedShifts32(a [256]uint32, b [256]uint64, u uint32, v uint32) {
+	// ppc64x: -"CLRLSLDI", "RLWNM\t[$]10, R[0-9]+, [$]22, [$]29, R[0-9]+"
+	a[0] = a[uint8(v>>24)]
+	// ppc64x: -"CLRLSLDI", "RLWNM\t[$]11, R[0-9]+, [$]21, [$]28, R[0-9]+"
+	b[0] = b[uint8(v>>24)]
+	// ppc64x: -"CLRLSLDI", "RLWNM\t[$]15, R[0-9]+, [$]21, [$]28, R[0-9]+"
+	b[1] = b[(v>>20)&0xFF]
+	// ppc64x: -"SLD", "RLWNM\t[$]10, R[0-9]+, [$]22, [$]28, R[0-9]+"
+	b[2] = b[v>>25]
+}
+
+func checkMergedShifts64(a [256]uint32, b [256]uint64, c [256]byte, v uint64) {
+	// ppc64x: -"CLRLSLDI", "RLWNM\t[$]10, R[0-9]+, [$]22, [$]29, R[0-9]+"
+	a[0] = a[uint8(v>>24)]
+	// ppc64x: "SRD", "CLRLSLDI", -"RLWNM"
+	a[1] = a[uint8(v>>25)]
+	// ppc64x: -"CLRLSLDI", "RLWNM\t[$]9, R[0-9]+, [$]23, [$]29, R[0-9]+"
+	a[2] = a[v>>25&0x7F]
+	// ppc64x: -"CLRLSLDI", "RLWNM\t[$]3, R[0-9]+, [$]29, [$]29, R[0-9]+"
+	a[3] = a[(v>>31)&0x01]
+	// ppc64x: "SRD", "CLRLSLDI", -"RLWNM"
+	a[4] = a[(v>>30)&0x07]
+	// ppc64x: "SRD", "CLRLSLDI", -"RLWNM"
+	a[5] = a[(v>>32)&0x01]
+	// ppc64x: "SRD", "CLRLSLDI", -"RLWNM"
+	a[6] = a[(v>>34)&0x03]
+	// ppc64x: -"CLRLSLDI", "RLWNM\t[$]12, R[0-9]+, [$]21, [$]28, R[0-9]+"
+	b[0] = b[uint8(v>>23)]
+	// ppc64x: -"CLRLSLDI", "RLWNM\t[$]15, R[0-9]+, [$]21, [$]28, R[0-9]+"
+	b[1] = b[(v>>20)&0xFF]
+	// ppc64x: "RLWNM", -"SLD"
+	b[2] = b[((uint64((uint32(v) >> 21)) & 0x3f) << 4)]
+	// ppc64x: "RLWNM\t[$]11, R[0-9]+, [$]10, [$]15"
+	c[0] = c[((v>>5)&0x3F)<<16]
+	// ppc64x: "RLWNM\t[$]0, R[0-9]+, [$]19, [$]24"
+	c[1] = c[((v>>7)&0x3F)<<7]
+}
+
+func checkShiftMask(a uint32, b uint64, z []uint32, y []uint64) {
+	_ = y[128]
+	_ = z[128]
+	// ppc64x: -"MOVBZ", -"SRW", "RLWNM"
+	z[0] = uint32(uint8(a >> 5))
+	// ppc64x: -"MOVBZ", -"SRW", "RLWNM"
+	z[1] = uint32(uint8((a >> 4) & 0x7e))
+	// ppc64x: "RLWNM\t[$]25, R[0-9]+, [$]27, [$]29, R[0-9]+"
+	z[2] = uint32(uint8(a>>7)) & 0x1c
+	// ppc64x: -"MOVWZ"
+	y[0] = uint64((a >> 6) & 0x1c)
+	// ppc64x: -"MOVWZ"
+	y[1] = uint64(uint32(b)<<6) + 1
+	// ppc64x: -"MOVHZ", -"MOVWZ"
+	y[2] = uint64((uint16(a) >> 9) & 0x1F)
+	// ppc64x: -"MOVHZ", -"MOVWZ", -"ANDCC"
+	y[3] = uint64(((uint16(a) & 0xFF0) >> 9) & 0x1F)
+}
+
+// 128 bit shifts
+
+func check128bitShifts(x, y uint64, bits uint) (uint64, uint64) {
+	s := bits & 63
+	ŝ := (64 - bits) & 63
+	// check that the shift operation has two commas (three operands)
+	// amd64:"SHRQ.*,.*,"
+	shr := x>>s | y<<ŝ
+	// amd64:"SHLQ.*,.*,"
+	shl := x<<s | y>>ŝ
+	return shr, shl
+}
+
+func checkShiftToMask(u []uint64, s []int64) {
+	// amd64:-"SHR",-"SHL","ANDQ"
+	u[0] = u[0] >> 5 << 5
+	// amd64:-"SAR",-"SHL","ANDQ"
+	s[0] = s[0] >> 5 << 5
+	// amd64:-"SHR",-"SHL","ANDQ"
+	u[1] = u[1] << 5 >> 5
+}
--- a/test/codegen/shortcircuit.go
+++ b/test/codegen/shortcircuit.go
@@ -0,0 +1,17 @@
+// asmcheck
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func efaceExtract(e interface{}) int {
+	// This should be compiled with only
+	// a single conditional jump.
+	// amd64:-"JMP"
+	if x, ok := e.(int); ok {
+		return x
+	}
+	return 0
+}
--- a/test/codegen/slices.go
+++ b/test/codegen/slices.go
@@ -0,0 +1,426 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import "unsafe"
+
+// This file contains code generation tests related to the handling of
+// slice types.
+
+// ------------------ //
+//      Clear         //
+// ------------------ //
+
+// Issue #5373 optimize memset idiom
+// Some of the clears get inlined, see #56997
+
+func SliceClear(s []int) []int {
+	// amd64:`.*memclrNoHeapPointers`
+	// ppc64x:`.*memclrNoHeapPointers`
+	for i := range s {
+		s[i] = 0
+	}
+	return s
+}
+
+func SliceClearPointers(s []*int) []*int {
+	// amd64:`.*memclrHasPointers`
+	// ppc64x:`.*memclrHasPointers`
+	for i := range s {
+		s[i] = nil
+	}
+	return s
+}
+
+// ------------------ //
+//      Extension     //
+// ------------------ //
+
+// Issue #21266 - avoid makeslice in append(x, make([]T, y)...)
+
+func SliceExtensionConst(s []int) []int {
+	// amd64:-`.*runtime\.memclrNoHeapPointers`
+	// amd64:-`.*runtime\.makeslice`
+	// amd64:-`.*runtime\.panicmakeslicelen`
+	// amd64:"MOVUPS\tX15"
+	// ppc64x:-`.*runtime\.memclrNoHeapPointers`
+	// ppc64x:-`.*runtime\.makeslice`
+	// ppc64x:-`.*runtime\.panicmakeslicelen`
+	return append(s, make([]int, 1<<2)...)
+}
+
+func SliceExtensionConstInt64(s []int) []int {
+	// amd64:-`.*runtime\.memclrNoHeapPointers`
+	// amd64:-`.*runtime\.makeslice`
+	// amd64:-`.*runtime\.panicmakeslicelen`
+	// amd64:"MOVUPS\tX15"
+	// ppc64x:-`.*runtime\.memclrNoHeapPointers`
+	// ppc64x:-`.*runtime\.makeslice`
+	// ppc64x:-`.*runtime\.panicmakeslicelen`
+	return append(s, make([]int, int64(1<<2))...)
+}
+
+func SliceExtensionConstUint64(s []int) []int {
+	// amd64:-`.*runtime\.memclrNoHeapPointers`
+	// amd64:-`.*runtime\.makeslice`
+	// amd64:-`.*runtime\.panicmakeslicelen`
+	// amd64:"MOVUPS\tX15"
+	// ppc64x:-`.*runtime\.memclrNoHeapPointers`
+	// ppc64x:-`.*runtime\.makeslice`
+	// ppc64x:-`.*runtime\.panicmakeslicelen`
+	return append(s, make([]int, uint64(1<<2))...)
+}
+
+func SliceExtensionConstUint(s []int) []int {
+	// amd64:-`.*runtime\.memclrNoHeapPointers`
+	// amd64:-`.*runtime\.makeslice`
+	// amd64:-`.*runtime\.panicmakeslicelen`
+	// amd64:"MOVUPS\tX15"
+	// ppc64x:-`.*runtime\.memclrNoHeapPointers`
+	// ppc64x:-`.*runtime\.makeslice`
+	// ppc64x:-`.*runtime\.panicmakeslicelen`
+	return append(s, make([]int, uint(1<<2))...)
+}
+
+// On ppc64x continue to use memclrNoHeapPointers
+// for sizes >= 512.
+func SliceExtensionConst512(s []int) []int {
+	// amd64:-`.*runtime\.memclrNoHeapPointers`
+	// ppc64x:`.*runtime\.memclrNoHeapPointers`
+	return append(s, make([]int, 1<<9)...)
+}
+
+func SliceExtensionPointer(s []*int, l int) []*int {
+	// amd64:`.*runtime\.memclrHasPointers`
+	// amd64:-`.*runtime\.makeslice`
+	// ppc64x:`.*runtime\.memclrHasPointers`
+	// ppc64x:-`.*runtime\.makeslice`
+	return append(s, make([]*int, l)...)
+}
+
+func SliceExtensionVar(s []byte, l int) []byte {
+	// amd64:`.*runtime\.memclrNoHeapPointers`
+	// amd64:-`.*runtime\.makeslice`
+	// ppc64x:`.*runtime\.memclrNoHeapPointers`
+	// ppc64x:-`.*runtime\.makeslice`
+	return append(s, make([]byte, l)...)
+}
+
+func SliceExtensionVarInt64(s []byte, l int64) []byte {
+	// amd64:`.*runtime\.memclrNoHeapPointers`
+	// amd64:-`.*runtime\.makeslice`
+	// amd64:`.*runtime\.panicmakeslicelen`
+	return append(s, make([]byte, l)...)
+}
+
+func SliceExtensionVarUint64(s []byte, l uint64) []byte {
+	// amd64:`.*runtime\.memclrNoHeapPointers`
+	// amd64:-`.*runtime\.makeslice`
+	// amd64:`.*runtime\.panicmakeslicelen`
+	return append(s, make([]byte, l)...)
+}
+
+func SliceExtensionVarUint(s []byte, l uint) []byte {
+	// amd64:`.*runtime\.memclrNoHeapPointers`
+	// amd64:-`.*runtime\.makeslice`
+	// amd64:`.*runtime\.panicmakeslicelen`
+	return append(s, make([]byte, l)...)
+}
+
+func SliceExtensionInt64(s []int, l64 int64) []int {
+	// 386:`.*runtime\.makeslice`
+	// 386:-`.*runtime\.memclr`
+	return append(s, make([]int, l64)...)
+}
+
+// ------------------ //
+//      Make+Copy     //
+// ------------------ //
+
+// Issue #26252 - avoid memclr for make+copy
+
+func SliceMakeCopyLen(s []int) []int {
+	// amd64:`.*runtime\.mallocgc`
+	// amd64:`.*runtime\.memmove`
+	// amd64:-`.*runtime\.makeslice`
+	// ppc64x:`.*runtime\.mallocgc`
+	// ppc64x:`.*runtime\.memmove`
+	// ppc64x:-`.*runtime\.makeslice`
+	a := make([]int, len(s))
+	copy(a, s)
+	return a
+}
+
+func SliceMakeCopyLenPtr(s []*int) []*int {
+	// amd64:`.*runtime\.makeslicecopy`
+	// amd64:-`.*runtime\.makeslice\(`
+	// amd64:-`.*runtime\.typedslicecopy
+	// ppc64x:`.*runtime\.makeslicecopy`
+	// ppc64x:-`.*runtime\.makeslice\(`
+	// ppc64x:-`.*runtime\.typedslicecopy
+	a := make([]*int, len(s))
+	copy(a, s)
+	return a
+}
+
+func SliceMakeCopyConst(s []int) []int {
+	// amd64:`.*runtime\.makeslicecopy`
+	// amd64:-`.*runtime\.makeslice\(`
+	// amd64:-`.*runtime\.memmove`
+	a := make([]int, 4)
+	copy(a, s)
+	return a
+}
+
+func SliceMakeCopyConstPtr(s []*int) []*int {
+	// amd64:`.*runtime\.makeslicecopy`
+	// amd64:-`.*runtime\.makeslice\(`
+	// amd64:-`.*runtime\.typedslicecopy
+	a := make([]*int, 4)
+	copy(a, s)
+	return a
+}
+
+func SliceMakeCopyNoOptNoDeref(s []*int) []*int {
+	a := new([]*int)
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.makeslice\(`
+	*a = make([]*int, 4)
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.typedslicecopy`
+	copy(*a, s)
+	return *a
+}
+
+func SliceMakeCopyNoOptNoVar(s []*int) []*int {
+	a := make([][]*int, 1)
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.makeslice\(`
+	a[0] = make([]*int, 4)
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.typedslicecopy`
+	copy(a[0], s)
+	return a[0]
+}
+
+func SliceMakeCopyNoOptBlank(s []*int) []*int {
+	var a []*int
+	// amd64:-`.*runtime\.makeslicecopy`
+	_ = make([]*int, 4)
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.typedslicecopy`
+	copy(a, s)
+	return a
+}
+
+func SliceMakeCopyNoOptNoMake(s []*int) []*int {
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:-`.*runtime\.objectnew`
+	a := *new([]*int)
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.typedslicecopy`
+	copy(a, s)
+	return a
+}
+
+func SliceMakeCopyNoOptNoHeapAlloc(s []*int) int {
+	// amd64:-`.*runtime\.makeslicecopy`
+	a := make([]*int, 4)
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.typedslicecopy`
+	copy(a, s)
+	return cap(a)
+}
+
+func SliceMakeCopyNoOptNoCap(s []*int) []*int {
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.makeslice\(`
+	a := make([]*int, 0, 4)
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.typedslicecopy`
+	copy(a, s)
+	return a
+}
+
+func SliceMakeCopyNoOptNoCopy(s []*int) []*int {
+	copy := func(x, y []*int) {}
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.makeslice\(`
+	a := make([]*int, 4)
+	// amd64:-`.*runtime\.makeslicecopy`
+	copy(a, s)
+	return a
+}
+
+func SliceMakeCopyNoOptWrongOrder(s []*int) []*int {
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.makeslice\(`
+	a := make([]*int, 4)
+	// amd64:`.*runtime\.typedslicecopy`
+	// amd64:-`.*runtime\.makeslicecopy`
+	copy(s, a)
+	return a
+}
+
+func SliceMakeCopyNoOptWrongAssign(s []*int) []*int {
+	var a []*int
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.makeslice\(`
+	s = make([]*int, 4)
+	// amd64:`.*runtime\.typedslicecopy`
+	// amd64:-`.*runtime\.makeslicecopy`
+	copy(a, s)
+	return s
+}
+
+func SliceMakeCopyNoOptCopyLength(s []*int) (int, []*int) {
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.makeslice\(`
+	a := make([]*int, 4)
+	// amd64:`.*runtime\.typedslicecopy`
+	// amd64:-`.*runtime\.makeslicecopy`
+	n := copy(a, s)
+	return n, a
+}
+
+func SliceMakeCopyNoOptSelfCopy(s []*int) []*int {
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.makeslice\(`
+	a := make([]*int, 4)
+	// amd64:`.*runtime\.typedslicecopy`
+	// amd64:-`.*runtime\.makeslicecopy`
+	copy(a, a)
+	return a
+}
+
+func SliceMakeCopyNoOptTargetReference(s []*int) []*int {
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.makeslice\(`
+	a := make([]*int, 4)
+	// amd64:`.*runtime\.typedslicecopy`
+	// amd64:-`.*runtime\.makeslicecopy`
+	copy(a, s[:len(a)])
+	return a
+}
+
+func SliceMakeCopyNoOptCap(s []int) []int {
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.makeslice\(`
+	a := make([]int, len(s), 9)
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.memmove`
+	copy(a, s)
+	return a
+}
+
+func SliceMakeCopyNoMemmoveDifferentLen(s []int) []int {
+	// amd64:`.*runtime\.makeslicecopy`
+	// amd64:-`.*runtime\.memmove`
+	a := make([]int, len(s)-1)
+	// amd64:-`.*runtime\.memmove`
+	copy(a, s)
+	return a
+}
+
+func SliceMakeEmptyPointerToZerobase() []int {
+	// amd64:`LEAQ.+runtime\.zerobase`
+	// amd64:-`.*runtime\.makeslice`
+	return make([]int, 0)
+}
+
+// ---------------------- //
+//   Nil check of &s[0]   //
+// ---------------------- //
+// See issue 30366
+func SliceNilCheck(s []int) {
+	p := &s[0]
+	// amd64:-`TESTB`
+	_ = *p
+}
+
+// ---------------------- //
+//   Init slice literal   //
+// ---------------------- //
+// See issue 21561
+func InitSmallSliceLiteral() []int {
+	// amd64:`MOVQ\t[$]42`
+	return []int{42}
+}
+
+func InitNotSmallSliceLiteral() []int {
+	// amd64:`LEAQ\t.*stmp_`
+	return []int{
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+	}
+}
+
+// --------------------------------------- //
+//   Test PPC64 SUBFCconst folding rules   //
+//   triggered by slice operations.        //
+// --------------------------------------- //
+
+func SliceWithConstCompare(a []int, b int) []int {
+	var c []int = []int{1, 2, 3, 4, 5}
+	if b+len(a) < len(c) {
+		// ppc64x:-"NEG"
+		return c[b:]
+	}
+	return a
+}
+
+func SliceWithSubtractBound(a []int, b int) []int {
+	// ppc64x:"SUBC",-"NEG"
+	return a[(3 - b):]
+}
+
+// --------------------------------------- //
+//   Code generation for unsafe.Slice      //
+// --------------------------------------- //
+
+func Slice1(p *byte, i int) []byte {
+	// amd64:-"MULQ"
+	return unsafe.Slice(p, i)
+}
+func Slice0(p *struct{}, i int) []struct{} {
+	// amd64:-"MULQ"
+	return unsafe.Slice(p, i)
+}
--- a/test/codegen/smallintiface.go
+++ b/test/codegen/smallintiface.go
@@ -0,0 +1,22 @@
+// asmcheck
+
+package codegen
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+func booliface() interface{} {
+	// amd64:`LEAQ\truntime.staticuint64s\+8\(SB\)`
+	return true
+}
+
+func smallint8iface() interface{} {
+	// amd64:`LEAQ\truntime.staticuint64s\+2024\(SB\)`
+	return int8(-3)
+}
+
+func smalluint8iface() interface{} {
+	// amd64:`LEAQ\truntime.staticuint64s\+24\(SB\)`
+	return uint8(3)
+}
--- a/test/codegen/spectre.go
+++ b/test/codegen/spectre.go
@@ -0,0 +1,39 @@
+// asmcheck -gcflags=-spectre=index
+
+//go:build amd64
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func IndexArray(x *[10]int, i int) int {
+	// amd64:`CMOVQCC`
+	return x[i]
+}
+
+func IndexString(x string, i int) byte {
+	// amd64:`CMOVQ(LS|CC)`
+	return x[i]
+}
+
+func IndexSlice(x []float64, i int) float64 {
+	// amd64:`CMOVQ(LS|CC)`
+	return x[i]
+}
+
+func SliceArray(x *[10]int, i, j int) []int {
+	// amd64:`CMOVQHI`
+	return x[i:j]
+}
+
+func SliceString(x string, i, j int) string {
+	// amd64:`CMOVQHI`
+	return x[i:j]
+}
+
+func SliceSlice(x []float64, i, j int) []float64 {
+	// amd64:`CMOVQHI`
+	return x[i:j]
+}
--- a/test/codegen/stack.go
+++ b/test/codegen/stack.go
@@ -0,0 +1,144 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import "runtime"
+
+// This file contains code generation tests related to the use of the
+// stack.
+
+// Check that stack stores are optimized away.
+
+// 386:"TEXT\t.*, [$]0-"
+// amd64:"TEXT\t.*, [$]0-"
+// arm:"TEXT\t.*, [$]-4-"
+// arm64:"TEXT\t.*, [$]0-"
+// mips:"TEXT\t.*, [$]-4-"
+// ppc64x:"TEXT\t.*, [$]0-"
+// s390x:"TEXT\t.*, [$]0-"
+func StackStore() int {
+	var x int
+	return *(&x)
+}
+
+type T struct {
+	A, B, C, D int // keep exported fields
+	x, y, z    int // reset unexported fields
+}
+
+// Check that large structs are cleared directly (issue #24416).
+
+// 386:"TEXT\t.*, [$]0-"
+// amd64:"TEXT\t.*, [$]0-"
+// arm:"TEXT\t.*, [$]0-" (spills return address)
+// arm64:"TEXT\t.*, [$]0-"
+// mips:"TEXT\t.*, [$]-4-"
+// ppc64x:"TEXT\t.*, [$]0-"
+// s390x:"TEXT\t.*, [$]0-"
+func ZeroLargeStruct(x *T) {
+	t := T{}
+	*x = t
+}
+
+// Check that structs are partially initialised directly (issue #24386).
+
+// Notes:
+// - 386 fails due to spilling a register
+// amd64:"TEXT\t.*, [$]0-"
+// arm:"TEXT\t.*, [$]0-" (spills return address)
+// arm64:"TEXT\t.*, [$]0-"
+// ppc64x:"TEXT\t.*, [$]0-"
+// s390x:"TEXT\t.*, [$]0-"
+// Note: that 386 currently has to spill a register.
+func KeepWanted(t *T) {
+	*t = T{A: t.A, B: t.B, C: t.C, D: t.D}
+}
+
+// Check that small array operations avoid using the stack (issue #15925).
+
+// Notes:
+// - 386 fails due to spilling a register
+// - arm & mips fail due to softfloat calls
+// amd64:"TEXT\t.*, [$]0-"
+// arm64:"TEXT\t.*, [$]0-"
+// ppc64x:"TEXT\t.*, [$]0-"
+// s390x:"TEXT\t.*, [$]0-"
+func ArrayAdd64(a, b [4]float64) [4]float64 {
+	return [4]float64{a[0] + b[0], a[1] + b[1], a[2] + b[2], a[3] + b[3]}
+}
+
+// Check that small array initialization avoids using the stack.
+
+// 386:"TEXT\t.*, [$]0-"
+// amd64:"TEXT\t.*, [$]0-"
+// arm:"TEXT\t.*, [$]0-" (spills return address)
+// arm64:"TEXT\t.*, [$]0-"
+// mips:"TEXT\t.*, [$]-4-"
+// ppc64x:"TEXT\t.*, [$]0-"
+// s390x:"TEXT\t.*, [$]0-"
+func ArrayInit(i, j int) [4]int {
+	return [4]int{i, 0, j, 0}
+}
+
+// Check that assembly output has matching offset and base register
+// (issue #21064).
+
+func check_asmout(b [2]int) int {
+	runtime.GC() // use some frame
+	// amd64:`.*b\+24\(SP\)`
+	// arm:`.*b\+4\(FP\)`
+	return b[1]
+}
+
+// Check that simple functions get promoted to nosplit, even when
+// they might panic in various ways. See issue 31219.
+// amd64:"TEXT\t.*NOSPLIT.*"
+func MightPanic(a []int, i, j, k, s int) {
+	_ = a[i]     // panicIndex
+	_ = a[i:j]   // panicSlice
+	_ = a[i:j:k] // also panicSlice
+	_ = i << s   // panicShift
+	_ = i / j    // panicDivide
+}
+
+// Put a defer in a loop, so second defer is not open-coded
+func Defer() {
+	for i := 0; i < 2; i++ {
+		defer func() {}()
+	}
+	// amd64:`CALL\truntime\.deferprocStack`
+	defer func() {}()
+}
+
+// Check that stack slots are shared among values of the same
+// type, but not pointer-identical types. See issue 65783.
+
+func spillSlotReuse() {
+	// The return values of getp1 and getp2 need to be
+	// spilled around the calls to nopInt. Make sure that
+	// spill slot gets reused.
+
+	//arm64:`.*autotmp_2-8\(SP\)`
+	getp1()[nopInt()] = 0
+	//arm64:`.*autotmp_2-8\(SP\)`
+	getp2()[nopInt()] = 0
+}
+
+//go:noinline
+func nopInt() int {
+	return 0
+}
+
+//go:noinline
+func getp1() *[4]int {
+	return nil
+}
+
+//go:noinline
+func getp2() *[4]int {
+	return nil
+}
--- a/test/codegen/strings.go
+++ b/test/codegen/strings.go
@@ -0,0 +1,80 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// This file contains code generation tests related to the handling of
+// string types.
+
+func CountRunes(s string) int { // Issue #24923
+	// amd64:`.*countrunes`
+	return len([]rune(s))
+}
+
+func CountBytes(s []byte) int {
+	// amd64:-`.*runtime.slicebytetostring`
+	return len(string(s))
+}
+
+func ToByteSlice() []byte { // Issue #24698
+	// amd64:`LEAQ\ttype:\[3\]uint8`
+	// amd64:`CALL\truntime\.newobject`
+	// amd64:-`.*runtime.stringtoslicebyte`
+	return []byte("foo")
+}
+
+// Loading from read-only symbols should get transformed into constants.
+func ConstantLoad() {
+	// 12592 = 0x3130
+	//    50 = 0x32
+	// amd64:`MOVW\t\$12592, \(`,`MOVB\t\$50, 2\(`
+	//   386:`MOVW\t\$12592, \(`,`MOVB\t\$50, 2\(`
+	//   arm:`MOVW\t\$48`,`MOVW\t\$49`,`MOVW\t\$50`
+	// arm64:`MOVD\t\$12592`,`MOVD\t\$50`
+	//  wasm:`I64Const\t\$12592`,`I64Store16\t\$0`,`I64Const\t\$50`,`I64Store8\t\$2`
+	// mips64:`MOVV\t\$48`,`MOVV\t\$49`,`MOVV\t\$50`
+	bsink = []byte("012")
+
+	// 858927408 = 0x33323130
+	//     13620 = 0x3534
+	// amd64:`MOVL\t\$858927408`,`MOVW\t\$13620, 4\(`
+	//   386:`MOVL\t\$858927408`,`MOVW\t\$13620, 4\(`
+	// arm64:`MOVD\t\$858927408`,`MOVD\t\$13620`
+	//  wasm:`I64Const\t\$858927408`,`I64Store32\t\$0`,`I64Const\t\$13620`,`I64Store16\t\$4`
+	bsink = []byte("012345")
+
+	// 3978425819141910832 = 0x3736353433323130
+	// 7306073769690871863 = 0x6564636261393837
+	// amd64:`MOVQ\t\$3978425819141910832`,`MOVQ\t\$7306073769690871863`
+	//   386:`MOVL\t\$858927408, \(`,`DUFFCOPY`
+	// arm64:`MOVD\t\$3978425819141910832`,`MOVD\t\$7306073769690871863`,`MOVD\t\$15`
+	//  wasm:`I64Const\t\$3978425819141910832`,`I64Store\t\$0`,`I64Const\t\$7306073769690871863`,`I64Store\t\$7`
+	bsink = []byte("0123456789abcde")
+
+	// 56 = 0x38
+	// amd64:`MOVQ\t\$3978425819141910832`,`MOVB\t\$56`
+	bsink = []byte("012345678")
+
+	// 14648 = 0x3938
+	// amd64:`MOVQ\t\$3978425819141910832`,`MOVW\t\$14648`
+	bsink = []byte("0123456789")
+
+	// 1650538808 = 0x62613938
+	// amd64:`MOVQ\t\$3978425819141910832`,`MOVL\t\$1650538808`
+	bsink = []byte("0123456789ab")
+}
+
+// self-equality is always true. See issue 60777.
+func EqualSelf(s string) bool {
+	// amd64:`MOVL\t\$1, AX`,-`.*memequal.*`
+	return s == s
+}
+func NotEqualSelf(s string) bool {
+	// amd64:`XORL\tAX, AX`,-`.*memequal.*`
+	return s != s
+}
+
+var bsink []byte
--- a/test/codegen/structs.go
+++ b/test/codegen/structs.go
@@ -0,0 +1,48 @@
+// asmcheck
+
+//go:build !goexperiment.cgocheck2
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// This file contains code generation tests related to the handling of
+// struct types.
+
+// ------------- //
+//    Zeroing    //
+// ------------- //
+
+type Z1 struct {
+	a, b, c int
+}
+
+func Zero1(t *Z1) { // Issue #18370
+	// amd64:`MOVUPS\tX[0-9]+, \(.*\)`,`MOVQ\t\$0, 16\(.*\)`
+	*t = Z1{}
+}
+
+type Z2 struct {
+	a, b, c *int
+}
+
+func Zero2(t *Z2) {
+	// amd64:`MOVUPS\tX[0-9]+, \(.*\)`,`MOVQ\t\$0, 16\(.*\)`
+	// amd64:`.*runtime[.]gcWriteBarrier.*\(SB\)`
+	*t = Z2{}
+}
+
+// ------------------ //
+//    Initializing    //
+// ------------------ //
+
+type I1 struct {
+	a, b, c, d int
+}
+
+func Init1(p *I1) { // Issue #18872
+	// amd64:`MOVQ\t[$]1`,`MOVQ\t[$]2`,`MOVQ\t[$]3`,`MOVQ\t[$]4`
+	*p = I1{1, 2, 3, 4}
+}
--- a/test/codegen/switch.go
+++ b/test/codegen/switch.go
@@ -0,0 +1,185 @@
+// asmcheck
+
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// These tests check code generation of switch statements.
+
+package codegen
+
+// see issue 33934
+func f(x string) int {
+	// amd64:-`cmpstring`
+	switch x {
+	case "":
+		return -1
+	case "1", "2", "3":
+		return -2
+	default:
+		return -3
+	}
+}
+
+// use jump tables for 8+ int cases
+func square(x int) int {
+	// amd64:`JMP\s\(.*\)\(.*\)$`
+	// arm64:`MOVD\s\(R.*\)\(R.*<<3\)`,`JMP\s\(R.*\)$`
+	switch x {
+	case 1:
+		return 1
+	case 2:
+		return 4
+	case 3:
+		return 9
+	case 4:
+		return 16
+	case 5:
+		return 25
+	case 6:
+		return 36
+	case 7:
+		return 49
+	case 8:
+		return 64
+	default:
+		return x * x
+	}
+}
+
+// use jump tables for 8+ string lengths
+func length(x string) int {
+	// amd64:`JMP\s\(.*\)\(.*\)$`
+	// arm64:`MOVD\s\(R.*\)\(R.*<<3\)`,`JMP\s\(R.*\)$`
+	switch x {
+	case "a":
+		return 1
+	case "bb":
+		return 2
+	case "ccc":
+		return 3
+	case "dddd":
+		return 4
+	case "eeeee":
+		return 5
+	case "ffffff":
+		return 6
+	case "ggggggg":
+		return 7
+	case "hhhhhhhh":
+		return 8
+	default:
+		return len(x)
+	}
+}
+
+// Use single-byte ordered comparisons for binary searching strings.
+// See issue 53333.
+func mimetype(ext string) string {
+	// amd64: `CMPB\s1\(.*\), \$104$`,-`cmpstring`
+	// arm64: `MOVB\s1\(R.*\), R.*$`, `CMPW\s\$104, R.*$`, -`cmpstring`
+	switch ext {
+	// amd64: `CMPL\s\(.*\), \$1836345390$`
+	// arm64: `MOVD\s\$1836345390`, `CMPW\sR.*, R.*$`
+	case ".htm":
+		return "A"
+	// amd64: `CMPL\s\(.*\), \$1953457454$`
+	// arm64: `MOVD\s\$1953457454`, `CMPW\sR.*, R.*$`
+	case ".eot":
+		return "B"
+	// amd64: `CMPL\s\(.*\), \$1735815982$`
+	// arm64: `MOVD\s\$1735815982`, `CMPW\sR.*, R.*$`
+	case ".svg":
+		return "C"
+	// amd64: `CMPL\s\(.*\), \$1718907950$`
+	// arm64: `MOVD\s\$1718907950`, `CMPW\sR.*, R.*$`
+	case ".ttf":
+		return "D"
+	default:
+		return ""
+	}
+}
+
+// use jump tables for type switches to concrete types.
+func typeSwitch(x any) int {
+	// amd64:`JMP\s\(.*\)\(.*\)$`
+	// arm64:`MOVD\s\(R.*\)\(R.*<<3\)`,`JMP\s\(R.*\)$`
+	switch x.(type) {
+	case int:
+		return 0
+	case int8:
+		return 1
+	case int16:
+		return 2
+	case int32:
+		return 3
+	case int64:
+		return 4
+	}
+	return 7
+}
+
+type I interface {
+	foo()
+}
+type J interface {
+	bar()
+}
+type IJ interface {
+	I
+	J
+}
+type K interface {
+	baz()
+}
+
+// use a runtime call for type switches to interface types.
+func interfaceSwitch(x any) int {
+	// amd64:`CALL\truntime.interfaceSwitch`,`MOVL\t16\(AX\)`,`MOVQ\t8\(.*\)(.*\*8)`
+	// arm64:`CALL\truntime.interfaceSwitch`,`LDAR`,`MOVWU\t16\(R0\)`,`MOVD\t\(R.*\)\(R.*\)`
+	switch x.(type) {
+	case I:
+		return 1
+	case J:
+		return 2
+	default:
+		return 3
+	}
+}
+
+func interfaceSwitch2(x K) int {
+	// amd64:`CALL\truntime.interfaceSwitch`,`MOVL\t16\(AX\)`,`MOVQ\t8\(.*\)(.*\*8)`
+	// arm64:`CALL\truntime.interfaceSwitch`,`LDAR`,`MOVWU\t16\(R0\)`,`MOVD\t\(R.*\)\(R.*\)`
+	switch x.(type) {
+	case I:
+		return 1
+	case J:
+		return 2
+	default:
+		return 3
+	}
+}
+
+func interfaceCast(x any) int {
+	// amd64:`CALL\truntime.typeAssert`,`MOVL\t16\(AX\)`,`MOVQ\t8\(.*\)(.*\*1)`
+	// arm64:`CALL\truntime.typeAssert`,`LDAR`,`MOVWU\t16\(R0\)`,`MOVD\t\(R.*\)\(R.*\)`
+	if _, ok := x.(I); ok {
+		return 3
+	}
+	return 5
+}
+
+func interfaceCast2(x K) int {
+	// amd64:`CALL\truntime.typeAssert`,`MOVL\t16\(AX\)`,`MOVQ\t8\(.*\)(.*\*1)`
+	// arm64:`CALL\truntime.typeAssert`,`LDAR`,`MOVWU\t16\(R0\)`,`MOVD\t\(R.*\)\(R.*\)`
+	if _, ok := x.(I); ok {
+		return 3
+	}
+	return 5
+}
+
+func interfaceConv(x IJ) I {
+	// amd64:`CALL\truntime.typeAssert`,`MOVL\t16\(AX\)`,`MOVQ\t8\(.*\)(.*\*1)`
+	// arm64:`CALL\truntime.typeAssert`,`LDAR`,`MOVWU\t16\(R0\)`,`MOVD\t\(R.*\)\(R.*\)`
+	return x
+}
--- a/test/codegen/writebarrier.go
+++ b/test/codegen/writebarrier.go
@@ -0,0 +1,55 @@
+// asmcheck
+
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func combine2string(p *[2]string, a, b string) {
+	// amd64:`.*runtime[.]gcWriteBarrier4\(SB\)`
+	// arm64:`.*runtime[.]gcWriteBarrier4\(SB\)`
+	p[0] = a
+	// amd64:-`.*runtime[.]gcWriteBarrier`
+	// arm64:-`.*runtime[.]gcWriteBarrier`
+	p[1] = b
+}
+
+func combine4string(p *[4]string, a, b, c, d string) {
+	// amd64:`.*runtime[.]gcWriteBarrier8\(SB\)`
+	// arm64:`.*runtime[.]gcWriteBarrier8\(SB\)`
+	p[0] = a
+	// amd64:-`.*runtime[.]gcWriteBarrier`
+	// arm64:-`.*runtime[.]gcWriteBarrier`
+	p[1] = b
+	// amd64:-`.*runtime[.]gcWriteBarrier`
+	// arm64:-`.*runtime[.]gcWriteBarrier`
+	p[2] = c
+	// amd64:-`.*runtime[.]gcWriteBarrier`
+	// arm64:-`.*runtime[.]gcWriteBarrier`
+	p[3] = d
+}
+
+func combine2slice(p *[2][]byte, a, b []byte) {
+	// amd64:`.*runtime[.]gcWriteBarrier4\(SB\)`
+	// arm64:`.*runtime[.]gcWriteBarrier4\(SB\)`
+	p[0] = a
+	// amd64:-`.*runtime[.]gcWriteBarrier`
+	// arm64:-`.*runtime[.]gcWriteBarrier`
+	p[1] = b
+}
+
+func combine4slice(p *[4][]byte, a, b, c, d []byte) {
+	// amd64:`.*runtime[.]gcWriteBarrier8\(SB\)`
+	// arm64:`.*runtime[.]gcWriteBarrier8\(SB\)`
+	p[0] = a
+	// amd64:-`.*runtime[.]gcWriteBarrier`
+	// arm64:-`.*runtime[.]gcWriteBarrier`
+	p[1] = b
+	// amd64:-`.*runtime[.]gcWriteBarrier`
+	// arm64:-`.*runtime[.]gcWriteBarrier`
+	p[2] = c
+	// amd64:-`.*runtime[.]gcWriteBarrier`
+	// arm64:-`.*runtime[.]gcWriteBarrier`
+	p[3] = d
+}
--- a/test/codegen/zerosize.go
+++ b/test/codegen/zerosize.go
@@ -0,0 +1,25 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Make sure a pointer variable and a zero-sized variable
+// aren't allocated to the same stack slot.
+// See issue 24993.
+
+package codegen
+
+func zeroSize() {
+	c := make(chan struct{})
+	// amd64:`MOVQ\t\$0, command-line-arguments\.s\+56\(SP\)`
+	var s *int
+	// force s to be a stack object, also use some (fixed) stack space
+	g(&s, 1, 2, 3, 4, 5)
+
+	// amd64:`LEAQ\tcommand-line-arguments\..*\+55\(SP\)`
+	c <- struct{}{}
+}
+
+//go:noinline
+func g(**int, int, int, int, int, int) {}