xref: /OK3568_Linux_fs/kernel/arch/arm64/include/asm/fpsimdmacros.h (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0-only */
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * FP/SIMD state saving and restoring macros
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright (C) 2012 ARM Ltd.
6*4882a593Smuzhiyun  * Author: Catalin Marinas <catalin.marinas@arm.com>
7*4882a593Smuzhiyun  */
8*4882a593Smuzhiyun 
9*4882a593Smuzhiyun #include <asm/assembler.h>
10*4882a593Smuzhiyun 
11*4882a593Smuzhiyun .macro fpsimd_save state, tmpnr
12*4882a593Smuzhiyun 	stp	q0, q1, [\state, #16 * 0]
13*4882a593Smuzhiyun 	stp	q2, q3, [\state, #16 * 2]
14*4882a593Smuzhiyun 	stp	q4, q5, [\state, #16 * 4]
15*4882a593Smuzhiyun 	stp	q6, q7, [\state, #16 * 6]
16*4882a593Smuzhiyun 	stp	q8, q9, [\state, #16 * 8]
17*4882a593Smuzhiyun 	stp	q10, q11, [\state, #16 * 10]
18*4882a593Smuzhiyun 	stp	q12, q13, [\state, #16 * 12]
19*4882a593Smuzhiyun 	stp	q14, q15, [\state, #16 * 14]
20*4882a593Smuzhiyun 	stp	q16, q17, [\state, #16 * 16]
21*4882a593Smuzhiyun 	stp	q18, q19, [\state, #16 * 18]
22*4882a593Smuzhiyun 	stp	q20, q21, [\state, #16 * 20]
23*4882a593Smuzhiyun 	stp	q22, q23, [\state, #16 * 22]
24*4882a593Smuzhiyun 	stp	q24, q25, [\state, #16 * 24]
25*4882a593Smuzhiyun 	stp	q26, q27, [\state, #16 * 26]
26*4882a593Smuzhiyun 	stp	q28, q29, [\state, #16 * 28]
27*4882a593Smuzhiyun 	stp	q30, q31, [\state, #16 * 30]!
28*4882a593Smuzhiyun 	mrs	x\tmpnr, fpsr
29*4882a593Smuzhiyun 	str	w\tmpnr, [\state, #16 * 2]
30*4882a593Smuzhiyun 	mrs	x\tmpnr, fpcr
31*4882a593Smuzhiyun 	str	w\tmpnr, [\state, #16 * 2 + 4]
32*4882a593Smuzhiyun .endm
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun .macro fpsimd_restore_fpcr state, tmp
35*4882a593Smuzhiyun 	/*
36*4882a593Smuzhiyun 	 * Writes to fpcr may be self-synchronising, so avoid restoring
37*4882a593Smuzhiyun 	 * the register if it hasn't changed.
38*4882a593Smuzhiyun 	 */
39*4882a593Smuzhiyun 	mrs	\tmp, fpcr
40*4882a593Smuzhiyun 	cmp	\tmp, \state
41*4882a593Smuzhiyun 	b.eq	9999f
42*4882a593Smuzhiyun 	msr	fpcr, \state
43*4882a593Smuzhiyun 9999:
44*4882a593Smuzhiyun .endm
45*4882a593Smuzhiyun 
46*4882a593Smuzhiyun /* Clobbers \state */
47*4882a593Smuzhiyun .macro fpsimd_restore state, tmpnr
48*4882a593Smuzhiyun 	ldp	q0, q1, [\state, #16 * 0]
49*4882a593Smuzhiyun 	ldp	q2, q3, [\state, #16 * 2]
50*4882a593Smuzhiyun 	ldp	q4, q5, [\state, #16 * 4]
51*4882a593Smuzhiyun 	ldp	q6, q7, [\state, #16 * 6]
52*4882a593Smuzhiyun 	ldp	q8, q9, [\state, #16 * 8]
53*4882a593Smuzhiyun 	ldp	q10, q11, [\state, #16 * 10]
54*4882a593Smuzhiyun 	ldp	q12, q13, [\state, #16 * 12]
55*4882a593Smuzhiyun 	ldp	q14, q15, [\state, #16 * 14]
56*4882a593Smuzhiyun 	ldp	q16, q17, [\state, #16 * 16]
57*4882a593Smuzhiyun 	ldp	q18, q19, [\state, #16 * 18]
58*4882a593Smuzhiyun 	ldp	q20, q21, [\state, #16 * 20]
59*4882a593Smuzhiyun 	ldp	q22, q23, [\state, #16 * 22]
60*4882a593Smuzhiyun 	ldp	q24, q25, [\state, #16 * 24]
61*4882a593Smuzhiyun 	ldp	q26, q27, [\state, #16 * 26]
62*4882a593Smuzhiyun 	ldp	q28, q29, [\state, #16 * 28]
63*4882a593Smuzhiyun 	ldp	q30, q31, [\state, #16 * 30]!
64*4882a593Smuzhiyun 	ldr	w\tmpnr, [\state, #16 * 2]
65*4882a593Smuzhiyun 	msr	fpsr, x\tmpnr
66*4882a593Smuzhiyun 	ldr	w\tmpnr, [\state, #16 * 2 + 4]
67*4882a593Smuzhiyun 	fpsimd_restore_fpcr x\tmpnr, \state
68*4882a593Smuzhiyun .endm
69*4882a593Smuzhiyun 
70*4882a593Smuzhiyun /* Sanity-check macros to help avoid encoding garbage instructions */
71*4882a593Smuzhiyun 
72*4882a593Smuzhiyun .macro _check_general_reg nr
73*4882a593Smuzhiyun 	.if (\nr) < 0 || (\nr) > 30
74*4882a593Smuzhiyun 		.error "Bad register number \nr."
75*4882a593Smuzhiyun 	.endif
76*4882a593Smuzhiyun .endm
77*4882a593Smuzhiyun 
78*4882a593Smuzhiyun .macro _sve_check_zreg znr
79*4882a593Smuzhiyun 	.if (\znr) < 0 || (\znr) > 31
80*4882a593Smuzhiyun 		.error "Bad Scalable Vector Extension vector register number \znr."
81*4882a593Smuzhiyun 	.endif
82*4882a593Smuzhiyun .endm
83*4882a593Smuzhiyun 
84*4882a593Smuzhiyun .macro _sve_check_preg pnr
85*4882a593Smuzhiyun 	.if (\pnr) < 0 || (\pnr) > 15
86*4882a593Smuzhiyun 		.error "Bad Scalable Vector Extension predicate register number \pnr."
87*4882a593Smuzhiyun 	.endif
88*4882a593Smuzhiyun .endm
89*4882a593Smuzhiyun 
90*4882a593Smuzhiyun .macro _check_num n, min, max
91*4882a593Smuzhiyun 	.if (\n) < (\min) || (\n) > (\max)
92*4882a593Smuzhiyun 		.error "Number \n out of range [\min,\max]"
93*4882a593Smuzhiyun 	.endif
94*4882a593Smuzhiyun .endm
95*4882a593Smuzhiyun 
96*4882a593Smuzhiyun /* SVE instruction encodings for non-SVE-capable assemblers */
97*4882a593Smuzhiyun 
98*4882a593Smuzhiyun /* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */
99*4882a593Smuzhiyun .macro _sve_str_v nz, nxbase, offset=0
100*4882a593Smuzhiyun 	_sve_check_zreg \nz
101*4882a593Smuzhiyun 	_check_general_reg \nxbase
102*4882a593Smuzhiyun 	_check_num (\offset), -0x100, 0xff
103*4882a593Smuzhiyun 	.inst	0xe5804000			\
104*4882a593Smuzhiyun 		| (\nz)				\
105*4882a593Smuzhiyun 		| ((\nxbase) << 5)		\
106*4882a593Smuzhiyun 		| (((\offset) & 7) << 10)	\
107*4882a593Smuzhiyun 		| (((\offset) & 0x1f8) << 13)
108*4882a593Smuzhiyun .endm
109*4882a593Smuzhiyun 
110*4882a593Smuzhiyun /* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */
111*4882a593Smuzhiyun .macro _sve_ldr_v nz, nxbase, offset=0
112*4882a593Smuzhiyun 	_sve_check_zreg \nz
113*4882a593Smuzhiyun 	_check_general_reg \nxbase
114*4882a593Smuzhiyun 	_check_num (\offset), -0x100, 0xff
115*4882a593Smuzhiyun 	.inst	0x85804000			\
116*4882a593Smuzhiyun 		| (\nz)				\
117*4882a593Smuzhiyun 		| ((\nxbase) << 5)		\
118*4882a593Smuzhiyun 		| (((\offset) & 7) << 10)	\
119*4882a593Smuzhiyun 		| (((\offset) & 0x1f8) << 13)
120*4882a593Smuzhiyun .endm
121*4882a593Smuzhiyun 
122*4882a593Smuzhiyun /* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */
123*4882a593Smuzhiyun .macro _sve_str_p np, nxbase, offset=0
124*4882a593Smuzhiyun 	_sve_check_preg \np
125*4882a593Smuzhiyun 	_check_general_reg \nxbase
126*4882a593Smuzhiyun 	_check_num (\offset), -0x100, 0xff
127*4882a593Smuzhiyun 	.inst	0xe5800000			\
128*4882a593Smuzhiyun 		| (\np)				\
129*4882a593Smuzhiyun 		| ((\nxbase) << 5)		\
130*4882a593Smuzhiyun 		| (((\offset) & 7) << 10)	\
131*4882a593Smuzhiyun 		| (((\offset) & 0x1f8) << 13)
132*4882a593Smuzhiyun .endm
133*4882a593Smuzhiyun 
134*4882a593Smuzhiyun /* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */
135*4882a593Smuzhiyun .macro _sve_ldr_p np, nxbase, offset=0
136*4882a593Smuzhiyun 	_sve_check_preg \np
137*4882a593Smuzhiyun 	_check_general_reg \nxbase
138*4882a593Smuzhiyun 	_check_num (\offset), -0x100, 0xff
139*4882a593Smuzhiyun 	.inst	0x85800000			\
140*4882a593Smuzhiyun 		| (\np)				\
141*4882a593Smuzhiyun 		| ((\nxbase) << 5)		\
142*4882a593Smuzhiyun 		| (((\offset) & 7) << 10)	\
143*4882a593Smuzhiyun 		| (((\offset) & 0x1f8) << 13)
144*4882a593Smuzhiyun .endm
145*4882a593Smuzhiyun 
146*4882a593Smuzhiyun /* RDVL X\nx, #\imm */
147*4882a593Smuzhiyun .macro _sve_rdvl nx, imm
148*4882a593Smuzhiyun 	_check_general_reg \nx
149*4882a593Smuzhiyun 	_check_num (\imm), -0x20, 0x1f
150*4882a593Smuzhiyun 	.inst	0x04bf5000			\
151*4882a593Smuzhiyun 		| (\nx)				\
152*4882a593Smuzhiyun 		| (((\imm) & 0x3f) << 5)
153*4882a593Smuzhiyun .endm
154*4882a593Smuzhiyun 
155*4882a593Smuzhiyun /* RDFFR (unpredicated): RDFFR P\np.B */
156*4882a593Smuzhiyun .macro _sve_rdffr np
157*4882a593Smuzhiyun 	_sve_check_preg \np
158*4882a593Smuzhiyun 	.inst	0x2519f000			\
159*4882a593Smuzhiyun 		| (\np)
160*4882a593Smuzhiyun .endm
161*4882a593Smuzhiyun 
162*4882a593Smuzhiyun /* WRFFR P\np.B */
163*4882a593Smuzhiyun .macro _sve_wrffr np
164*4882a593Smuzhiyun 	_sve_check_preg \np
165*4882a593Smuzhiyun 	.inst	0x25289000			\
166*4882a593Smuzhiyun 		| ((\np) << 5)
167*4882a593Smuzhiyun .endm
168*4882a593Smuzhiyun 
169*4882a593Smuzhiyun /* PFALSE P\np.B */
170*4882a593Smuzhiyun .macro _sve_pfalse np
171*4882a593Smuzhiyun 	_sve_check_preg \np
172*4882a593Smuzhiyun 	.inst	0x2518e400			\
173*4882a593Smuzhiyun 		| (\np)
174*4882a593Smuzhiyun .endm
175*4882a593Smuzhiyun 
176*4882a593Smuzhiyun .macro __for from:req, to:req
177*4882a593Smuzhiyun 	.if (\from) == (\to)
178*4882a593Smuzhiyun 		_for__body %\from
179*4882a593Smuzhiyun 	.else
180*4882a593Smuzhiyun 		__for %\from, %((\from) + ((\to) - (\from)) / 2)
181*4882a593Smuzhiyun 		__for %((\from) + ((\to) - (\from)) / 2 + 1), %\to
182*4882a593Smuzhiyun 	.endif
183*4882a593Smuzhiyun .endm
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun .macro _for var:req, from:req, to:req, insn:vararg
186*4882a593Smuzhiyun 	.macro _for__body \var:req
187*4882a593Smuzhiyun 		.noaltmacro
188*4882a593Smuzhiyun 		\insn
189*4882a593Smuzhiyun 		.altmacro
190*4882a593Smuzhiyun 	.endm
191*4882a593Smuzhiyun 
192*4882a593Smuzhiyun 	.altmacro
193*4882a593Smuzhiyun 	__for \from, \to
194*4882a593Smuzhiyun 	.noaltmacro
195*4882a593Smuzhiyun 
196*4882a593Smuzhiyun 	.purgem _for__body
197*4882a593Smuzhiyun .endm
198*4882a593Smuzhiyun 
199*4882a593Smuzhiyun /* Update ZCR_EL1.LEN with the new VQ */
200*4882a593Smuzhiyun .macro sve_load_vq xvqminus1, xtmp, xtmp2
201*4882a593Smuzhiyun 		mrs_s		\xtmp, SYS_ZCR_EL1
202*4882a593Smuzhiyun 		bic		\xtmp2, \xtmp, ZCR_ELx_LEN_MASK
203*4882a593Smuzhiyun 		orr		\xtmp2, \xtmp2, \xvqminus1
204*4882a593Smuzhiyun 		cmp		\xtmp2, \xtmp
205*4882a593Smuzhiyun 		b.eq		921f
206*4882a593Smuzhiyun 		msr_s		SYS_ZCR_EL1, \xtmp2	//self-synchronising
207*4882a593Smuzhiyun 921:
208*4882a593Smuzhiyun .endm
209*4882a593Smuzhiyun 
210*4882a593Smuzhiyun /* Preserve the first 128-bits of Znz and zero the rest. */
211*4882a593Smuzhiyun .macro _sve_flush_z nz
212*4882a593Smuzhiyun 	_sve_check_zreg \nz
213*4882a593Smuzhiyun 	mov	v\nz\().16b, v\nz\().16b
214*4882a593Smuzhiyun .endm
215*4882a593Smuzhiyun 
216*4882a593Smuzhiyun .macro sve_flush
217*4882a593Smuzhiyun  _for n, 0, 31, _sve_flush_z	\n
218*4882a593Smuzhiyun  _for n, 0, 15, _sve_pfalse	\n
219*4882a593Smuzhiyun 		_sve_wrffr	0
220*4882a593Smuzhiyun .endm
221*4882a593Smuzhiyun 
222*4882a593Smuzhiyun .macro sve_save nxbase, xpfpsr, nxtmp
223*4882a593Smuzhiyun  _for n, 0, 31,	_sve_str_v	\n, \nxbase, \n - 34
224*4882a593Smuzhiyun  _for n, 0, 15,	_sve_str_p	\n, \nxbase, \n - 16
225*4882a593Smuzhiyun 		_sve_rdffr	0
226*4882a593Smuzhiyun 		_sve_str_p	0, \nxbase
227*4882a593Smuzhiyun 		_sve_ldr_p	0, \nxbase, -16
228*4882a593Smuzhiyun 
229*4882a593Smuzhiyun 		mrs		x\nxtmp, fpsr
230*4882a593Smuzhiyun 		str		w\nxtmp, [\xpfpsr]
231*4882a593Smuzhiyun 		mrs		x\nxtmp, fpcr
232*4882a593Smuzhiyun 		str		w\nxtmp, [\xpfpsr, #4]
233*4882a593Smuzhiyun .endm
234*4882a593Smuzhiyun 
235*4882a593Smuzhiyun .macro __sve_load nxbase, xpfpsr, nxtmp
236*4882a593Smuzhiyun  _for n, 0, 31,	_sve_ldr_v	\n, \nxbase, \n - 34
237*4882a593Smuzhiyun 		_sve_ldr_p	0, \nxbase
238*4882a593Smuzhiyun 		_sve_wrffr	0
239*4882a593Smuzhiyun  _for n, 0, 15,	_sve_ldr_p	\n, \nxbase, \n - 16
240*4882a593Smuzhiyun 
241*4882a593Smuzhiyun 		ldr		w\nxtmp, [\xpfpsr]
242*4882a593Smuzhiyun 		msr		fpsr, x\nxtmp
243*4882a593Smuzhiyun 		ldr		w\nxtmp, [\xpfpsr, #4]
244*4882a593Smuzhiyun 		msr		fpcr, x\nxtmp
245*4882a593Smuzhiyun .endm
246*4882a593Smuzhiyun 
247*4882a593Smuzhiyun .macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2
248*4882a593Smuzhiyun 		sve_load_vq	\xvqminus1, x\nxtmp, \xtmp2
249*4882a593Smuzhiyun 		__sve_load	\nxbase, \xpfpsr, \nxtmp
250*4882a593Smuzhiyun .endm
251