xref: /OK3568_Linux_fs/kernel/tools/testing/selftests/arm64/fp/sve-test.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun// SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun// Copyright (C) 2015-2019 ARM Limited.
3*4882a593Smuzhiyun// Original author: Dave Martin <Dave.Martin@arm.com>
4*4882a593Smuzhiyun//
5*4882a593Smuzhiyun// Simple Scalable Vector Extension context switch test
6*4882a593Smuzhiyun// Repeatedly writes unique test patterns into each SVE register
7*4882a593Smuzhiyun// and reads them back to verify integrity.
8*4882a593Smuzhiyun//
9*4882a593Smuzhiyun// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
10*4882a593Smuzhiyun// (leave it running for as long as you want...)
11*4882a593Smuzhiyun// kill $pids
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun#include <asm/unistd.h>
14*4882a593Smuzhiyun#include "assembler.h"
15*4882a593Smuzhiyun#include "asm-offsets.h"
16*4882a593Smuzhiyun
17*4882a593Smuzhiyun#define NZR	32
18*4882a593Smuzhiyun#define NPR	16
19*4882a593Smuzhiyun#define MAXVL_B	(2048 / 8)
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun.arch_extension sve
22*4882a593Smuzhiyun
23*4882a593Smuzhiyun.macro _sve_ldr_v zt, xn
24*4882a593Smuzhiyun	ldr	z\zt, [x\xn]
25*4882a593Smuzhiyun.endm
26*4882a593Smuzhiyun
27*4882a593Smuzhiyun.macro _sve_str_v zt, xn
28*4882a593Smuzhiyun	str	z\zt, [x\xn]
29*4882a593Smuzhiyun.endm
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun.macro _sve_ldr_p pt, xn
32*4882a593Smuzhiyun	ldr	p\pt, [x\xn]
33*4882a593Smuzhiyun.endm
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun.macro _sve_str_p pt, xn
36*4882a593Smuzhiyun	str	p\pt, [x\xn]
37*4882a593Smuzhiyun.endm
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun// Generate accessor functions to read/write programmatically selected
40*4882a593Smuzhiyun// SVE registers.
41*4882a593Smuzhiyun// x0 is the register index to access
42*4882a593Smuzhiyun// x1 is the memory address to read from (getz,setp) or store to (setz,setp)
43*4882a593Smuzhiyun// All clobber x0-x2
44*4882a593Smuzhiyundefine_accessor setz, NZR, _sve_ldr_v
45*4882a593Smuzhiyundefine_accessor getz, NZR, _sve_str_v
46*4882a593Smuzhiyundefine_accessor setp, NPR, _sve_ldr_p
47*4882a593Smuzhiyundefine_accessor getp, NPR, _sve_str_p
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun// Print a single character x0 to stdout
50*4882a593Smuzhiyun// Clobbers x0-x2,x8
51*4882a593Smuzhiyunfunction putc
52*4882a593Smuzhiyun	str	x0, [sp, #-16]!
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun	mov	x0, #1			// STDOUT_FILENO
55*4882a593Smuzhiyun	mov	x1, sp
56*4882a593Smuzhiyun	mov	x2, #1
57*4882a593Smuzhiyun	mov	x8, #__NR_write
58*4882a593Smuzhiyun	svc	#0
59*4882a593Smuzhiyun
60*4882a593Smuzhiyun	add	sp, sp, #16
61*4882a593Smuzhiyun	ret
62*4882a593Smuzhiyunendfunction
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun// Print a NUL-terminated string starting at address x0 to stdout
65*4882a593Smuzhiyun// Clobbers x0-x3,x8
66*4882a593Smuzhiyunfunction puts
67*4882a593Smuzhiyun	mov	x1, x0
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun	mov	x2, #0
70*4882a593Smuzhiyun0:	ldrb	w3, [x0], #1
71*4882a593Smuzhiyun	cbz	w3, 1f
72*4882a593Smuzhiyun	add	x2, x2, #1
73*4882a593Smuzhiyun	b	0b
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun1:	mov	w0, #1			// STDOUT_FILENO
76*4882a593Smuzhiyun	mov	x8, #__NR_write
77*4882a593Smuzhiyun	svc	#0
78*4882a593Smuzhiyun
79*4882a593Smuzhiyun	ret
80*4882a593Smuzhiyunendfunction
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun// Utility macro to print a literal string
83*4882a593Smuzhiyun// Clobbers x0-x4,x8
84*4882a593Smuzhiyun.macro puts string
85*4882a593Smuzhiyun	.pushsection .rodata.str1.1, "aMS", 1
86*4882a593Smuzhiyun.L__puts_literal\@: .string "\string"
87*4882a593Smuzhiyun	.popsection
88*4882a593Smuzhiyun
89*4882a593Smuzhiyun	ldr	x0, =.L__puts_literal\@
90*4882a593Smuzhiyun	bl	puts
91*4882a593Smuzhiyun.endm
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun// Print an unsigned decimal number x0 to stdout
94*4882a593Smuzhiyun// Clobbers x0-x4,x8
95*4882a593Smuzhiyunfunction putdec
96*4882a593Smuzhiyun	mov	x1, sp
97*4882a593Smuzhiyun	str	x30, [sp, #-32]!	// Result can't be > 20 digits
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun	mov	x2, #0
100*4882a593Smuzhiyun	strb	w2, [x1, #-1]!		// Write the NUL terminator
101*4882a593Smuzhiyun
102*4882a593Smuzhiyun	mov	x2, #10
103*4882a593Smuzhiyun0:	udiv	x3, x0, x2		// div-mod loop to generate the digits
104*4882a593Smuzhiyun	msub	x0, x3, x2, x0
105*4882a593Smuzhiyun	add	w0, w0, #'0'
106*4882a593Smuzhiyun	strb	w0, [x1, #-1]!
107*4882a593Smuzhiyun	mov	x0, x3
108*4882a593Smuzhiyun	cbnz	x3, 0b
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun	ldrb	w0, [x1]
111*4882a593Smuzhiyun	cbnz	w0, 1f
112*4882a593Smuzhiyun	mov	w0, #'0'		// Print "0" for 0, not ""
113*4882a593Smuzhiyun	strb	w0, [x1, #-1]!
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun1:	mov	x0, x1
116*4882a593Smuzhiyun	bl	puts
117*4882a593Smuzhiyun
118*4882a593Smuzhiyun	ldr	x30, [sp], #32
119*4882a593Smuzhiyun	ret
120*4882a593Smuzhiyunendfunction
121*4882a593Smuzhiyun
122*4882a593Smuzhiyun// Print an unsigned decimal number x0 to stdout, followed by a newline
123*4882a593Smuzhiyun// Clobbers x0-x5,x8
124*4882a593Smuzhiyunfunction putdecn
125*4882a593Smuzhiyun	mov	x5, x30
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun	bl	putdec
128*4882a593Smuzhiyun	mov	x0, #'\n'
129*4882a593Smuzhiyun	bl	putc
130*4882a593Smuzhiyun
131*4882a593Smuzhiyun	ret	x5
132*4882a593Smuzhiyunendfunction
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun// Clobbers x0-x3,x8
135*4882a593Smuzhiyunfunction puthexb
136*4882a593Smuzhiyun	str	x30, [sp, #-0x10]!
137*4882a593Smuzhiyun
138*4882a593Smuzhiyun	mov	w3, w0
139*4882a593Smuzhiyun	lsr	w0, w0, #4
140*4882a593Smuzhiyun	bl	puthexnibble
141*4882a593Smuzhiyun	mov	w0, w3
142*4882a593Smuzhiyun
143*4882a593Smuzhiyun	ldr	x30, [sp], #0x10
144*4882a593Smuzhiyun	// fall through to puthexnibble
145*4882a593Smuzhiyunendfunction
146*4882a593Smuzhiyun// Clobbers x0-x2,x8
147*4882a593Smuzhiyunfunction puthexnibble
148*4882a593Smuzhiyun	and	w0, w0, #0xf
149*4882a593Smuzhiyun	cmp	w0, #10
150*4882a593Smuzhiyun	blo	1f
151*4882a593Smuzhiyun	add	w0, w0, #'a' - ('9' + 1)
152*4882a593Smuzhiyun1:	add	w0, w0, #'0'
153*4882a593Smuzhiyun	b	putc
154*4882a593Smuzhiyunendfunction
155*4882a593Smuzhiyun
156*4882a593Smuzhiyun// x0=data in, x1=size in, clobbers x0-x5,x8
157*4882a593Smuzhiyunfunction dumphex
158*4882a593Smuzhiyun	str	x30, [sp, #-0x10]!
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun	mov	x4, x0
161*4882a593Smuzhiyun	mov	x5, x1
162*4882a593Smuzhiyun
163*4882a593Smuzhiyun0:	subs	x5, x5, #1
164*4882a593Smuzhiyun	b.lo	1f
165*4882a593Smuzhiyun	ldrb	w0, [x4], #1
166*4882a593Smuzhiyun	bl	puthexb
167*4882a593Smuzhiyun	b	0b
168*4882a593Smuzhiyun
169*4882a593Smuzhiyun1:	ldr	x30, [sp], #0x10
170*4882a593Smuzhiyun	ret
171*4882a593Smuzhiyunendfunction
172*4882a593Smuzhiyun
173*4882a593Smuzhiyun// Declare some storate space to shadow the SVE register contents:
174*4882a593Smuzhiyun.pushsection .text
175*4882a593Smuzhiyun.data
176*4882a593Smuzhiyun.align 4
177*4882a593Smuzhiyunzref:
178*4882a593Smuzhiyun	.space	MAXVL_B * NZR
179*4882a593Smuzhiyunpref:
180*4882a593Smuzhiyun	.space	MAXVL_B / 8 * NPR
181*4882a593Smuzhiyunffrref:
182*4882a593Smuzhiyun	.space	MAXVL_B / 8
183*4882a593Smuzhiyunscratch:
184*4882a593Smuzhiyun	.space	MAXVL_B
185*4882a593Smuzhiyun.popsection
186*4882a593Smuzhiyun
187*4882a593Smuzhiyun// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
188*4882a593Smuzhiyun// Clobbers x0-x3
189*4882a593Smuzhiyunfunction memcpy
190*4882a593Smuzhiyun	cmp	x2, #0
191*4882a593Smuzhiyun	b.eq	1f
192*4882a593Smuzhiyun0:	ldrb	w3, [x1], #1
193*4882a593Smuzhiyun	strb	w3, [x0], #1
194*4882a593Smuzhiyun	subs	x2, x2, #1
195*4882a593Smuzhiyun	b.ne	0b
196*4882a593Smuzhiyun1:	ret
197*4882a593Smuzhiyunendfunction
198*4882a593Smuzhiyun
199*4882a593Smuzhiyun// Generate a test pattern for storage in SVE registers
200*4882a593Smuzhiyun// x0: pid	(16 bits)
201*4882a593Smuzhiyun// x1: register number (6 bits)
202*4882a593Smuzhiyun// x2: generation (4 bits)
203*4882a593Smuzhiyun
204*4882a593Smuzhiyun// These values are used to constuct a 32-bit pattern that is repeated in the
205*4882a593Smuzhiyun// scratch buffer as many times as will fit:
206*4882a593Smuzhiyun// bits 31:28	generation number (increments once per test_loop)
207*4882a593Smuzhiyun// bits 27:22	32-bit lane index
208*4882a593Smuzhiyun// bits 21:16	register number
209*4882a593Smuzhiyun// bits 15: 0	pid
210*4882a593Smuzhiyun
211*4882a593Smuzhiyunfunction pattern
212*4882a593Smuzhiyun	orr	w1, w0, w1, lsl #16
213*4882a593Smuzhiyun	orr	w2, w1, w2, lsl #28
214*4882a593Smuzhiyun
215*4882a593Smuzhiyun	ldr	x0, =scratch
216*4882a593Smuzhiyun	mov	w1, #MAXVL_B / 4
217*4882a593Smuzhiyun
218*4882a593Smuzhiyun0:	str	w2, [x0], #4
219*4882a593Smuzhiyun	add	w2, w2, #(1 << 22)
220*4882a593Smuzhiyun	subs	w1, w1, #1
221*4882a593Smuzhiyun	bne	0b
222*4882a593Smuzhiyun
223*4882a593Smuzhiyun	ret
224*4882a593Smuzhiyunendfunction
225*4882a593Smuzhiyun
226*4882a593Smuzhiyun// Get the address of shadow data for SVE Z-register Z<xn>
227*4882a593Smuzhiyun.macro _adrz xd, xn, nrtmp
228*4882a593Smuzhiyun	ldr	\xd, =zref
229*4882a593Smuzhiyun	rdvl	x\nrtmp, #1
230*4882a593Smuzhiyun	madd	\xd, x\nrtmp, \xn, \xd
231*4882a593Smuzhiyun.endm
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun// Get the address of shadow data for SVE P-register P<xn - NZR>
234*4882a593Smuzhiyun.macro _adrp xd, xn, nrtmp
235*4882a593Smuzhiyun	ldr	\xd, =pref
236*4882a593Smuzhiyun	rdvl	x\nrtmp, #1
237*4882a593Smuzhiyun	lsr	x\nrtmp, x\nrtmp, #3
238*4882a593Smuzhiyun	sub	\xn, \xn, #NZR
239*4882a593Smuzhiyun	madd	\xd, x\nrtmp, \xn, \xd
240*4882a593Smuzhiyun.endm
241*4882a593Smuzhiyun
242*4882a593Smuzhiyun// Set up test pattern in a SVE Z-register
243*4882a593Smuzhiyun// x0: pid
244*4882a593Smuzhiyun// x1: register number
245*4882a593Smuzhiyun// x2: generation
246*4882a593Smuzhiyunfunction setup_zreg
247*4882a593Smuzhiyun	mov	x4, x30
248*4882a593Smuzhiyun
249*4882a593Smuzhiyun	mov	x6, x1
250*4882a593Smuzhiyun	bl	pattern
251*4882a593Smuzhiyun	_adrz	x0, x6, 2
252*4882a593Smuzhiyun	mov	x5, x0
253*4882a593Smuzhiyun	ldr	x1, =scratch
254*4882a593Smuzhiyun	bl	memcpy
255*4882a593Smuzhiyun
256*4882a593Smuzhiyun	mov	x0, x6
257*4882a593Smuzhiyun	mov	x1, x5
258*4882a593Smuzhiyun	bl	setz
259*4882a593Smuzhiyun
260*4882a593Smuzhiyun	ret	x4
261*4882a593Smuzhiyunendfunction
262*4882a593Smuzhiyun
263*4882a593Smuzhiyun// Set up test pattern in a SVE P-register
264*4882a593Smuzhiyun// x0: pid
265*4882a593Smuzhiyun// x1: register number
266*4882a593Smuzhiyun// x2: generation
267*4882a593Smuzhiyunfunction setup_preg
268*4882a593Smuzhiyun	mov	x4, x30
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun	mov	x6, x1
271*4882a593Smuzhiyun	bl	pattern
272*4882a593Smuzhiyun	_adrp	x0, x6, 2
273*4882a593Smuzhiyun	mov	x5, x0
274*4882a593Smuzhiyun	ldr	x1, =scratch
275*4882a593Smuzhiyun	bl	memcpy
276*4882a593Smuzhiyun
277*4882a593Smuzhiyun	mov	x0, x6
278*4882a593Smuzhiyun	mov	x1, x5
279*4882a593Smuzhiyun	bl	setp
280*4882a593Smuzhiyun
281*4882a593Smuzhiyun	ret	x4
282*4882a593Smuzhiyunendfunction
283*4882a593Smuzhiyun
284*4882a593Smuzhiyun// Set up test pattern in the FFR
285*4882a593Smuzhiyun// x0: pid
286*4882a593Smuzhiyun// x2: generation
287*4882a593Smuzhiyun//
288*4882a593Smuzhiyun// We need to generate a canonical FFR value, which consists of a number of
289*4882a593Smuzhiyun// low "1" bits, followed by a number of zeros. This gives us 17 unique values
290*4882a593Smuzhiyun// per 16 bits of FFR, so we create a 4 bit signature out of the PID and
291*4882a593Smuzhiyun// generation, and use that as the initial number of ones in the pattern.
292*4882a593Smuzhiyun// We fill the upper lanes of FFR with zeros.
293*4882a593Smuzhiyun// Beware: corrupts P0.
294*4882a593Smuzhiyunfunction setup_ffr
295*4882a593Smuzhiyun	mov	x4, x30
296*4882a593Smuzhiyun
297*4882a593Smuzhiyun	and	w0, w0, #0x3
298*4882a593Smuzhiyun	bfi	w0, w2, #2, #2
299*4882a593Smuzhiyun	mov	w1, #1
300*4882a593Smuzhiyun	lsl	w1, w1, w0
301*4882a593Smuzhiyun	sub	w1, w1, #1
302*4882a593Smuzhiyun
303*4882a593Smuzhiyun	ldr	x0, =ffrref
304*4882a593Smuzhiyun	strh	w1, [x0], 2
305*4882a593Smuzhiyun	rdvl	x1, #1
306*4882a593Smuzhiyun	lsr	x1, x1, #3
307*4882a593Smuzhiyun	sub	x1, x1, #2
308*4882a593Smuzhiyun	bl	memclr
309*4882a593Smuzhiyun
310*4882a593Smuzhiyun	mov	x0, #0
311*4882a593Smuzhiyun	ldr	x1, =ffrref
312*4882a593Smuzhiyun	bl	setp
313*4882a593Smuzhiyun
314*4882a593Smuzhiyun	wrffr	p0.b
315*4882a593Smuzhiyun
316*4882a593Smuzhiyun	ret	x4
317*4882a593Smuzhiyunendfunction
318*4882a593Smuzhiyun
319*4882a593Smuzhiyun// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
320*4882a593Smuzhiyun// Clobbers x1, x2.
321*4882a593Smuzhiyunfunction memfill_ae
322*4882a593Smuzhiyun	mov	w2, #0xae
323*4882a593Smuzhiyun	b	memfill
324*4882a593Smuzhiyunendfunction
325*4882a593Smuzhiyun
326*4882a593Smuzhiyun// Fill x1 bytes starting at x0 with 0.
327*4882a593Smuzhiyun// Clobbers x1, x2.
328*4882a593Smuzhiyunfunction memclr
329*4882a593Smuzhiyun	mov	w2, #0
330*4882a593Smuzhiyunendfunction
331*4882a593Smuzhiyun	// fall through to memfill
332*4882a593Smuzhiyun
333*4882a593Smuzhiyun// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
334*4882a593Smuzhiyun// Clobbers x1
335*4882a593Smuzhiyunfunction memfill
336*4882a593Smuzhiyun	cmp	x1, #0
337*4882a593Smuzhiyun	b.eq	1f
338*4882a593Smuzhiyun
339*4882a593Smuzhiyun0:	strb	w2, [x0], #1
340*4882a593Smuzhiyun	subs	x1, x1, #1
341*4882a593Smuzhiyun	b.ne	0b
342*4882a593Smuzhiyun
343*4882a593Smuzhiyun1:	ret
344*4882a593Smuzhiyunendfunction
345*4882a593Smuzhiyun
346*4882a593Smuzhiyun// Trivial memory compare: compare x2 bytes starting at address x0 with
347*4882a593Smuzhiyun// bytes starting at address x1.
348*4882a593Smuzhiyun// Returns only if all bytes match; otherwise, the program is aborted.
349*4882a593Smuzhiyun// Clobbers x0-x5.
350*4882a593Smuzhiyunfunction memcmp
351*4882a593Smuzhiyun	cbz	x2, 2f
352*4882a593Smuzhiyun
353*4882a593Smuzhiyun	stp	x0, x1, [sp, #-0x20]!
354*4882a593Smuzhiyun	str	x2, [sp, #0x10]
355*4882a593Smuzhiyun
356*4882a593Smuzhiyun	mov	x5, #0
357*4882a593Smuzhiyun0:	ldrb	w3, [x0, x5]
358*4882a593Smuzhiyun	ldrb	w4, [x1, x5]
359*4882a593Smuzhiyun	add	x5, x5, #1
360*4882a593Smuzhiyun	cmp	w3, w4
361*4882a593Smuzhiyun	b.ne	1f
362*4882a593Smuzhiyun	subs	x2, x2, #1
363*4882a593Smuzhiyun	b.ne	0b
364*4882a593Smuzhiyun
365*4882a593Smuzhiyun1:	ldr	x2, [sp, #0x10]
366*4882a593Smuzhiyun	ldp	x0, x1, [sp], #0x20
367*4882a593Smuzhiyun	b.ne	barf
368*4882a593Smuzhiyun
369*4882a593Smuzhiyun2:	ret
370*4882a593Smuzhiyunendfunction
371*4882a593Smuzhiyun
372*4882a593Smuzhiyun// Verify that a SVE Z-register matches its shadow in memory, else abort
373*4882a593Smuzhiyun// x0: reg number
374*4882a593Smuzhiyun// Clobbers x0-x7.
375*4882a593Smuzhiyunfunction check_zreg
376*4882a593Smuzhiyun	mov	x3, x30
377*4882a593Smuzhiyun
378*4882a593Smuzhiyun	_adrz	x5, x0, 6
379*4882a593Smuzhiyun	mov	x4, x0
380*4882a593Smuzhiyun	ldr	x7, =scratch
381*4882a593Smuzhiyun
382*4882a593Smuzhiyun	mov	x0, x7
383*4882a593Smuzhiyun	mov	x1, x6
384*4882a593Smuzhiyun	bl	memfill_ae
385*4882a593Smuzhiyun
386*4882a593Smuzhiyun	mov	x0, x4
387*4882a593Smuzhiyun	mov	x1, x7
388*4882a593Smuzhiyun	bl	getz
389*4882a593Smuzhiyun
390*4882a593Smuzhiyun	mov	x0, x5
391*4882a593Smuzhiyun	mov	x1, x7
392*4882a593Smuzhiyun	mov	x2, x6
393*4882a593Smuzhiyun	mov	x30, x3
394*4882a593Smuzhiyun	b	memcmp
395*4882a593Smuzhiyunendfunction
396*4882a593Smuzhiyun
397*4882a593Smuzhiyun// Verify that a SVE P-register matches its shadow in memory, else abort
398*4882a593Smuzhiyun// x0: reg number
399*4882a593Smuzhiyun// Clobbers x0-x7.
400*4882a593Smuzhiyunfunction check_preg
401*4882a593Smuzhiyun	mov	x3, x30
402*4882a593Smuzhiyun
403*4882a593Smuzhiyun	_adrp	x5, x0, 6
404*4882a593Smuzhiyun	mov	x4, x0
405*4882a593Smuzhiyun	ldr	x7, =scratch
406*4882a593Smuzhiyun
407*4882a593Smuzhiyun	mov	x0, x7
408*4882a593Smuzhiyun	mov	x1, x6
409*4882a593Smuzhiyun	bl	memfill_ae
410*4882a593Smuzhiyun
411*4882a593Smuzhiyun	mov	x0, x4
412*4882a593Smuzhiyun	mov	x1, x7
413*4882a593Smuzhiyun	bl	getp
414*4882a593Smuzhiyun
415*4882a593Smuzhiyun	mov	x0, x5
416*4882a593Smuzhiyun	mov	x1, x7
417*4882a593Smuzhiyun	mov	x2, x6
418*4882a593Smuzhiyun	mov	x30, x3
419*4882a593Smuzhiyun	b	memcmp
420*4882a593Smuzhiyunendfunction
421*4882a593Smuzhiyun
422*4882a593Smuzhiyun// Verify that the FFR matches its shadow in memory, else abort
423*4882a593Smuzhiyun// Beware -- corrupts P0.
424*4882a593Smuzhiyun// Clobbers x0-x5.
425*4882a593Smuzhiyunfunction check_ffr
426*4882a593Smuzhiyun	mov	x3, x30
427*4882a593Smuzhiyun
428*4882a593Smuzhiyun	ldr	x4, =scratch
429*4882a593Smuzhiyun	rdvl	x5, #1
430*4882a593Smuzhiyun	lsr	x5, x5, #3
431*4882a593Smuzhiyun
432*4882a593Smuzhiyun	mov	x0, x4
433*4882a593Smuzhiyun	mov	x1, x5
434*4882a593Smuzhiyun	bl	memfill_ae
435*4882a593Smuzhiyun
436*4882a593Smuzhiyun	rdffr	p0.b
437*4882a593Smuzhiyun	mov	x0, #0
438*4882a593Smuzhiyun	mov	x1, x4
439*4882a593Smuzhiyun	bl	getp
440*4882a593Smuzhiyun
441*4882a593Smuzhiyun	ldr	x0, =ffrref
442*4882a593Smuzhiyun	mov	x1, x4
443*4882a593Smuzhiyun	mov	x2, x5
444*4882a593Smuzhiyun	mov	x30, x3
445*4882a593Smuzhiyun	b	memcmp
446*4882a593Smuzhiyunendfunction
447*4882a593Smuzhiyun
448*4882a593Smuzhiyun// Any SVE register modified here can cause corruption in the main
449*4882a593Smuzhiyun// thread -- but *only* the registers modified here.
450*4882a593Smuzhiyunfunction irritator_handler
451*4882a593Smuzhiyun	// Increment the irritation signal count (x23):
452*4882a593Smuzhiyun	ldr	x0, [x2, #ucontext_regs + 8 * 23]
453*4882a593Smuzhiyun	add	x0, x0, #1
454*4882a593Smuzhiyun	str	x0, [x2, #ucontext_regs + 8 * 23]
455*4882a593Smuzhiyun
456*4882a593Smuzhiyun	// Corrupt some random Z-regs
457*4882a593Smuzhiyun	adr	x0, .text + (irritator_handler - .text) / 16 * 16
458*4882a593Smuzhiyun	movi	v0.8b, #1
459*4882a593Smuzhiyun	movi	v9.16b, #2
460*4882a593Smuzhiyun	movi	v31.8b, #3
461*4882a593Smuzhiyun	// And P0
462*4882a593Smuzhiyun	rdffr	p0.b
463*4882a593Smuzhiyun	// And FFR
464*4882a593Smuzhiyun	wrffr	p15.b
465*4882a593Smuzhiyun
466*4882a593Smuzhiyun	ret
467*4882a593Smuzhiyunendfunction
468*4882a593Smuzhiyun
469*4882a593Smuzhiyunfunction terminate_handler
470*4882a593Smuzhiyun	mov	w21, w0
471*4882a593Smuzhiyun	mov	x20, x2
472*4882a593Smuzhiyun
473*4882a593Smuzhiyun	puts	"Terminated by signal "
474*4882a593Smuzhiyun	mov	w0, w21
475*4882a593Smuzhiyun	bl	putdec
476*4882a593Smuzhiyun	puts	", no error, iterations="
477*4882a593Smuzhiyun	ldr	x0, [x20, #ucontext_regs + 8 * 22]
478*4882a593Smuzhiyun	bl	putdec
479*4882a593Smuzhiyun	puts	", signals="
480*4882a593Smuzhiyun	ldr	x0, [x20, #ucontext_regs + 8 * 23]
481*4882a593Smuzhiyun	bl	putdecn
482*4882a593Smuzhiyun
483*4882a593Smuzhiyun	mov	x0, #0
484*4882a593Smuzhiyun	mov	x8, #__NR_exit
485*4882a593Smuzhiyun	svc	#0
486*4882a593Smuzhiyunendfunction
487*4882a593Smuzhiyun
488*4882a593Smuzhiyun// w0: signal number
489*4882a593Smuzhiyun// x1: sa_action
490*4882a593Smuzhiyun// w2: sa_flags
491*4882a593Smuzhiyun// Clobbers x0-x6,x8
492*4882a593Smuzhiyunfunction setsignal
493*4882a593Smuzhiyun	str	x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
494*4882a593Smuzhiyun
495*4882a593Smuzhiyun	mov	w4, w0
496*4882a593Smuzhiyun	mov	x5, x1
497*4882a593Smuzhiyun	mov	w6, w2
498*4882a593Smuzhiyun
499*4882a593Smuzhiyun	add	x0, sp, #16
500*4882a593Smuzhiyun	mov	x1, #sa_sz
501*4882a593Smuzhiyun	bl	memclr
502*4882a593Smuzhiyun
503*4882a593Smuzhiyun	mov	w0, w4
504*4882a593Smuzhiyun	add	x1, sp, #16
505*4882a593Smuzhiyun	str	w6, [x1, #sa_flags]
506*4882a593Smuzhiyun	str	x5, [x1, #sa_handler]
507*4882a593Smuzhiyun	mov	x2, #0
508*4882a593Smuzhiyun	mov	x3, #sa_mask_sz
509*4882a593Smuzhiyun	mov	x8, #__NR_rt_sigaction
510*4882a593Smuzhiyun	svc	#0
511*4882a593Smuzhiyun
512*4882a593Smuzhiyun	cbz	w0, 1f
513*4882a593Smuzhiyun
514*4882a593Smuzhiyun	puts	"sigaction failure\n"
515*4882a593Smuzhiyun	b	.Labort
516*4882a593Smuzhiyun
517*4882a593Smuzhiyun1:	ldr	x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
518*4882a593Smuzhiyun	ret
519*4882a593Smuzhiyunendfunction
520*4882a593Smuzhiyun
521*4882a593Smuzhiyun// Main program entry point
522*4882a593Smuzhiyun.globl _start
523*4882a593Smuzhiyunfunction _start
524*4882a593Smuzhiyun_start:
525*4882a593Smuzhiyun	// Sanity-check and report the vector length
526*4882a593Smuzhiyun
527*4882a593Smuzhiyun	rdvl	x19, #8
528*4882a593Smuzhiyun	cmp	x19, #128
529*4882a593Smuzhiyun	b.lo	1f
530*4882a593Smuzhiyun	cmp	x19, #2048
531*4882a593Smuzhiyun	b.hi	1f
532*4882a593Smuzhiyun	tst	x19, #(8 - 1)
533*4882a593Smuzhiyun	b.eq	2f
534*4882a593Smuzhiyun
535*4882a593Smuzhiyun1:	puts	"Bad vector length: "
536*4882a593Smuzhiyun	mov	x0, x19
537*4882a593Smuzhiyun	bl	putdecn
538*4882a593Smuzhiyun	b	.Labort
539*4882a593Smuzhiyun
540*4882a593Smuzhiyun2:	puts	"Vector length:\t"
541*4882a593Smuzhiyun	mov	x0, x19
542*4882a593Smuzhiyun	bl	putdec
543*4882a593Smuzhiyun	puts	" bits\n"
544*4882a593Smuzhiyun
545*4882a593Smuzhiyun	// Obtain our PID, to ensure test pattern uniqueness between processes
546*4882a593Smuzhiyun
547*4882a593Smuzhiyun	mov	x8, #__NR_getpid
548*4882a593Smuzhiyun	svc	#0
549*4882a593Smuzhiyun	mov	x20, x0
550*4882a593Smuzhiyun
551*4882a593Smuzhiyun	puts	"PID:\t"
552*4882a593Smuzhiyun	mov	x0, x20
553*4882a593Smuzhiyun	bl	putdecn
554*4882a593Smuzhiyun
555*4882a593Smuzhiyun	mov	x23, #0		// Irritation signal count
556*4882a593Smuzhiyun
557*4882a593Smuzhiyun	mov	w0, #SIGINT
558*4882a593Smuzhiyun	adr	x1, terminate_handler
559*4882a593Smuzhiyun	mov	w2, #SA_SIGINFO
560*4882a593Smuzhiyun	bl	setsignal
561*4882a593Smuzhiyun
562*4882a593Smuzhiyun	mov	w0, #SIGTERM
563*4882a593Smuzhiyun	adr	x1, terminate_handler
564*4882a593Smuzhiyun	mov	w2, #SA_SIGINFO
565*4882a593Smuzhiyun	bl	setsignal
566*4882a593Smuzhiyun
567*4882a593Smuzhiyun	mov	w0, #SIGUSR1
568*4882a593Smuzhiyun	adr	x1, irritator_handler
569*4882a593Smuzhiyun	mov	w2, #SA_SIGINFO
570*4882a593Smuzhiyun	orr	w2, w2, #SA_NODEFER
571*4882a593Smuzhiyun	bl	setsignal
572*4882a593Smuzhiyun
573*4882a593Smuzhiyun	mov	x22, #0		// generation number, increments per iteration
574*4882a593Smuzhiyun.Ltest_loop:
575*4882a593Smuzhiyun	rdvl	x0, #8
576*4882a593Smuzhiyun	cmp	x0, x19
577*4882a593Smuzhiyun	b.ne	vl_barf
578*4882a593Smuzhiyun
579*4882a593Smuzhiyun	mov	x21, #0		// Set up Z-regs & shadow with test pattern
580*4882a593Smuzhiyun0:	mov	x0, x20
581*4882a593Smuzhiyun	mov	x1, x21
582*4882a593Smuzhiyun	and	x2, x22, #0xf
583*4882a593Smuzhiyun	bl	setup_zreg
584*4882a593Smuzhiyun	add	x21, x21, #1
585*4882a593Smuzhiyun	cmp	x21, #NZR
586*4882a593Smuzhiyun	b.lo	0b
587*4882a593Smuzhiyun
588*4882a593Smuzhiyun	mov	x0, x20		// Set up FFR & shadow with test pattern
589*4882a593Smuzhiyun	mov	x1, #NZR + NPR
590*4882a593Smuzhiyun	and	x2, x22, #0xf
591*4882a593Smuzhiyun	bl	setup_ffr
592*4882a593Smuzhiyun
593*4882a593Smuzhiyun0:	mov	x0, x20		// Set up P-regs & shadow with test pattern
594*4882a593Smuzhiyun	mov	x1, x21
595*4882a593Smuzhiyun	and	x2, x22, #0xf
596*4882a593Smuzhiyun	bl	setup_preg
597*4882a593Smuzhiyun	add	x21, x21, #1
598*4882a593Smuzhiyun	cmp	x21, #NZR + NPR
599*4882a593Smuzhiyun	b.lo	0b
600*4882a593Smuzhiyun
601*4882a593Smuzhiyun// Can't do this when SVE state is volatile across SVC:
602*4882a593Smuzhiyun//	mov	x8, #__NR_sched_yield	// Encourage preemption
603*4882a593Smuzhiyun//	svc	#0
604*4882a593Smuzhiyun
605*4882a593Smuzhiyun	mov	x21, #0
606*4882a593Smuzhiyun0:	mov	x0, x21
607*4882a593Smuzhiyun	bl	check_zreg
608*4882a593Smuzhiyun	add	x21, x21, #1
609*4882a593Smuzhiyun	cmp	x21, #NZR
610*4882a593Smuzhiyun	b.lo	0b
611*4882a593Smuzhiyun
612*4882a593Smuzhiyun0:	mov	x0, x21
613*4882a593Smuzhiyun	bl	check_preg
614*4882a593Smuzhiyun	add	x21, x21, #1
615*4882a593Smuzhiyun	cmp	x21, #NZR + NPR
616*4882a593Smuzhiyun	b.lo	0b
617*4882a593Smuzhiyun
618*4882a593Smuzhiyun	bl	check_ffr
619*4882a593Smuzhiyun
620*4882a593Smuzhiyun	add	x22, x22, #1
621*4882a593Smuzhiyun	b	.Ltest_loop
622*4882a593Smuzhiyun
623*4882a593Smuzhiyun.Labort:
624*4882a593Smuzhiyun	mov	x0, #0
625*4882a593Smuzhiyun	mov	x1, #SIGABRT
626*4882a593Smuzhiyun	mov	x8, #__NR_kill
627*4882a593Smuzhiyun	svc	#0
628*4882a593Smuzhiyunendfunction
629*4882a593Smuzhiyun
630*4882a593Smuzhiyunfunction barf
631*4882a593Smuzhiyun// fpsimd.c acitivty log dump hack
632*4882a593Smuzhiyun//	ldr	w0, =0xdeadc0de
633*4882a593Smuzhiyun//	mov	w8, #__NR_exit
634*4882a593Smuzhiyun//	svc	#0
635*4882a593Smuzhiyun// end hack
636*4882a593Smuzhiyun	mov	x10, x0	// expected data
637*4882a593Smuzhiyun	mov	x11, x1	// actual data
638*4882a593Smuzhiyun	mov	x12, x2	// data size
639*4882a593Smuzhiyun
640*4882a593Smuzhiyun	puts	"Mistatch: PID="
641*4882a593Smuzhiyun	mov	x0, x20
642*4882a593Smuzhiyun	bl	putdec
643*4882a593Smuzhiyun	puts	", iteration="
644*4882a593Smuzhiyun	mov	x0, x22
645*4882a593Smuzhiyun	bl	putdec
646*4882a593Smuzhiyun	puts	", reg="
647*4882a593Smuzhiyun	mov	x0, x21
648*4882a593Smuzhiyun	bl	putdecn
649*4882a593Smuzhiyun	puts	"\tExpected ["
650*4882a593Smuzhiyun	mov	x0, x10
651*4882a593Smuzhiyun	mov	x1, x12
652*4882a593Smuzhiyun	bl	dumphex
653*4882a593Smuzhiyun	puts	"]\n\tGot      ["
654*4882a593Smuzhiyun	mov	x0, x11
655*4882a593Smuzhiyun	mov	x1, x12
656*4882a593Smuzhiyun	bl	dumphex
657*4882a593Smuzhiyun	puts	"]\n"
658*4882a593Smuzhiyun
659*4882a593Smuzhiyun	mov	x8, #__NR_getpid
660*4882a593Smuzhiyun	svc	#0
661*4882a593Smuzhiyun// fpsimd.c acitivty log dump hack
662*4882a593Smuzhiyun//	ldr	w0, =0xdeadc0de
663*4882a593Smuzhiyun//	mov	w8, #__NR_exit
664*4882a593Smuzhiyun//	svc	#0
665*4882a593Smuzhiyun// ^ end of hack
666*4882a593Smuzhiyun	mov	x1, #SIGABRT
667*4882a593Smuzhiyun	mov	x8, #__NR_kill
668*4882a593Smuzhiyun	svc	#0
669*4882a593Smuzhiyun//	mov	x8, #__NR_exit
670*4882a593Smuzhiyun//	mov	x1, #1
671*4882a593Smuzhiyun//	svc	#0
672*4882a593Smuzhiyunendfunction
673*4882a593Smuzhiyun
674*4882a593Smuzhiyunfunction vl_barf
675*4882a593Smuzhiyun	mov	x10, x0
676*4882a593Smuzhiyun
677*4882a593Smuzhiyun	puts	"Bad active VL: "
678*4882a593Smuzhiyun	mov	x0, x10
679*4882a593Smuzhiyun	bl	putdecn
680*4882a593Smuzhiyun
681*4882a593Smuzhiyun	mov	x8, #__NR_exit
682*4882a593Smuzhiyun	mov	x1, #1
683*4882a593Smuzhiyun	svc	#0
684*4882a593Smuzhiyunendfunction
685