Initial commit: Go 1.23 release state
This commit is contained in:
182
src/runtime/memclr_arm64.s
Normal file
182
src/runtime/memclr_arm64.s
Normal file
@@ -0,0 +1,182 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// See memclrNoHeapPointers Go doc for important implementation constraints.
|
||||
|
||||
// func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
|
||||
// Also called from assembly in sys_windows_arm64.s without g (but using Go stack convention).
|
||||
TEXT runtime·memclrNoHeapPointers<ABIInternal>(SB),NOSPLIT,$0-16
|
||||
CMP $16, R1
|
||||
// If n is equal to 16 bytes, use zero_exact_16 to zero
|
||||
BEQ zero_exact_16
|
||||
|
||||
// If n is greater than 16 bytes, use zero_by_16 to zero
|
||||
BHI zero_by_16
|
||||
|
||||
// n is less than 16 bytes
|
||||
ADD R1, R0, R7
|
||||
TBZ $3, R1, less_than_8
|
||||
MOVD ZR, (R0)
|
||||
MOVD ZR, -8(R7)
|
||||
RET
|
||||
|
||||
less_than_8:
|
||||
TBZ $2, R1, less_than_4
|
||||
MOVW ZR, (R0)
|
||||
MOVW ZR, -4(R7)
|
||||
RET
|
||||
|
||||
less_than_4:
|
||||
CBZ R1, ending
|
||||
MOVB ZR, (R0)
|
||||
TBZ $1, R1, ending
|
||||
MOVH ZR, -2(R7)
|
||||
|
||||
ending:
|
||||
RET
|
||||
|
||||
zero_exact_16:
|
||||
// n is exactly 16 bytes
|
||||
STP (ZR, ZR), (R0)
|
||||
RET
|
||||
|
||||
zero_by_16:
|
||||
// n greater than 16 bytes, check if the start address is aligned
|
||||
NEG R0, R4
|
||||
ANDS $15, R4, R4
|
||||
// Try zeroing using zva if the start address is aligned with 16
|
||||
BEQ try_zva
|
||||
|
||||
// Non-aligned store
|
||||
STP (ZR, ZR), (R0)
|
||||
// Make the destination aligned
|
||||
SUB R4, R1, R1
|
||||
ADD R4, R0, R0
|
||||
B try_zva
|
||||
|
||||
tail_maybe_long:
|
||||
CMP $64, R1
|
||||
BHS no_zva
|
||||
|
||||
tail63:
|
||||
ANDS $48, R1, R3
|
||||
BEQ last16
|
||||
CMPW $32, R3
|
||||
BEQ last48
|
||||
BLT last32
|
||||
STP.P (ZR, ZR), 16(R0)
|
||||
last48:
|
||||
STP.P (ZR, ZR), 16(R0)
|
||||
last32:
|
||||
STP.P (ZR, ZR), 16(R0)
|
||||
// The last store length is at most 16, so it is safe to use
|
||||
// stp to write last 16 bytes
|
||||
last16:
|
||||
ANDS $15, R1, R1
|
||||
CBZ R1, last_end
|
||||
ADD R1, R0, R0
|
||||
STP (ZR, ZR), -16(R0)
|
||||
last_end:
|
||||
RET
|
||||
|
||||
no_zva:
|
||||
SUB $16, R0, R0
|
||||
SUB $64, R1, R1
|
||||
|
||||
loop_64:
|
||||
STP (ZR, ZR), 16(R0)
|
||||
STP (ZR, ZR), 32(R0)
|
||||
STP (ZR, ZR), 48(R0)
|
||||
STP.W (ZR, ZR), 64(R0)
|
||||
SUBS $64, R1, R1
|
||||
BGE loop_64
|
||||
ANDS $63, R1, ZR
|
||||
ADD $16, R0, R0
|
||||
BNE tail63
|
||||
RET
|
||||
|
||||
try_zva:
|
||||
// Try using the ZVA feature to zero entire cache lines
|
||||
// It is not meaningful to use ZVA if the block size is less than 64,
|
||||
// so make sure that n is greater than or equal to 64
|
||||
CMP $63, R1
|
||||
BLE tail63
|
||||
|
||||
CMP $128, R1
|
||||
// Ensure n is at least 128 bytes, so that there is enough to copy after
|
||||
// alignment.
|
||||
BLT no_zva
|
||||
// Check if ZVA is allowed from user code, and if so get the block size
|
||||
MOVW block_size<>(SB), R5
|
||||
TBNZ $31, R5, no_zva
|
||||
CBNZ R5, zero_by_line
|
||||
// DCZID_EL0 bit assignments
|
||||
// [63:5] Reserved
|
||||
// [4] DZP, if bit set DC ZVA instruction is prohibited, else permitted
|
||||
// [3:0] log2 of the block size in words, eg. if it returns 0x4 then block size is 16 words
|
||||
MRS DCZID_EL0, R3
|
||||
TBZ $4, R3, init
|
||||
// ZVA not available
|
||||
MOVW $~0, R5
|
||||
MOVW R5, block_size<>(SB)
|
||||
B no_zva
|
||||
|
||||
init:
|
||||
MOVW $4, R9
|
||||
ANDW $15, R3, R5
|
||||
LSLW R5, R9, R5
|
||||
MOVW R5, block_size<>(SB)
|
||||
|
||||
ANDS $63, R5, R9
|
||||
// Block size is less than 64.
|
||||
BNE no_zva
|
||||
|
||||
zero_by_line:
|
||||
CMP R5, R1
|
||||
// Not enough memory to reach alignment
|
||||
BLO no_zva
|
||||
SUB $1, R5, R6
|
||||
NEG R0, R4
|
||||
ANDS R6, R4, R4
|
||||
// Already aligned
|
||||
BEQ aligned
|
||||
|
||||
// check there is enough to copy after alignment
|
||||
SUB R4, R1, R3
|
||||
|
||||
// Check that the remaining length to ZVA after alignment
|
||||
// is greater than 64.
|
||||
CMP $64, R3
|
||||
CCMP GE, R3, R5, $10 // condition code GE, NZCV=0b1010
|
||||
BLT no_zva
|
||||
|
||||
// We now have at least 64 bytes to zero, update n
|
||||
MOVD R3, R1
|
||||
|
||||
loop_zva_prolog:
|
||||
STP (ZR, ZR), (R0)
|
||||
STP (ZR, ZR), 16(R0)
|
||||
STP (ZR, ZR), 32(R0)
|
||||
SUBS $64, R4, R4
|
||||
STP (ZR, ZR), 48(R0)
|
||||
ADD $64, R0, R0
|
||||
BGE loop_zva_prolog
|
||||
|
||||
ADD R4, R0, R0
|
||||
|
||||
aligned:
|
||||
SUB R5, R1, R1
|
||||
|
||||
loop_zva:
|
||||
WORD $0xd50b7420 // DC ZVA, R0
|
||||
ADD R5, R0, R0
|
||||
SUBS R5, R1, R1
|
||||
BHS loop_zva
|
||||
ANDS R6, R1, R1
|
||||
BNE tail_maybe_long
|
||||
RET
|
||||
|
||||
GLOBL block_size<>(SB), NOPTR, $8
|
||||
Reference in New Issue
Block a user