1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0-only */ 2*4882a593Smuzhiyun /* 3*4882a593Smuzhiyun * FP/SIMD state saving and restoring macros 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * Copyright (C) 2012 ARM Ltd. 6*4882a593Smuzhiyun * Author: Catalin Marinas <catalin.marinas@arm.com> 7*4882a593Smuzhiyun */ 8*4882a593Smuzhiyun 9*4882a593Smuzhiyun #include <asm/assembler.h> 10*4882a593Smuzhiyun 11*4882a593Smuzhiyun .macro fpsimd_save state, tmpnr 12*4882a593Smuzhiyun stp q0, q1, [\state, #16 * 0] 13*4882a593Smuzhiyun stp q2, q3, [\state, #16 * 2] 14*4882a593Smuzhiyun stp q4, q5, [\state, #16 * 4] 15*4882a593Smuzhiyun stp q6, q7, [\state, #16 * 6] 16*4882a593Smuzhiyun stp q8, q9, [\state, #16 * 8] 17*4882a593Smuzhiyun stp q10, q11, [\state, #16 * 10] 18*4882a593Smuzhiyun stp q12, q13, [\state, #16 * 12] 19*4882a593Smuzhiyun stp q14, q15, [\state, #16 * 14] 20*4882a593Smuzhiyun stp q16, q17, [\state, #16 * 16] 21*4882a593Smuzhiyun stp q18, q19, [\state, #16 * 18] 22*4882a593Smuzhiyun stp q20, q21, [\state, #16 * 20] 23*4882a593Smuzhiyun stp q22, q23, [\state, #16 * 22] 24*4882a593Smuzhiyun stp q24, q25, [\state, #16 * 24] 25*4882a593Smuzhiyun stp q26, q27, [\state, #16 * 26] 26*4882a593Smuzhiyun stp q28, q29, [\state, #16 * 28] 27*4882a593Smuzhiyun stp q30, q31, [\state, #16 * 30]! 28*4882a593Smuzhiyun mrs x\tmpnr, fpsr 29*4882a593Smuzhiyun str w\tmpnr, [\state, #16 * 2] 30*4882a593Smuzhiyun mrs x\tmpnr, fpcr 31*4882a593Smuzhiyun str w\tmpnr, [\state, #16 * 2 + 4] 32*4882a593Smuzhiyun .endm 33*4882a593Smuzhiyun 34*4882a593Smuzhiyun .macro fpsimd_restore_fpcr state, tmp 35*4882a593Smuzhiyun /* 36*4882a593Smuzhiyun * Writes to fpcr may be self-synchronising, so avoid restoring 37*4882a593Smuzhiyun * the register if it hasn't changed. 38*4882a593Smuzhiyun */ 39*4882a593Smuzhiyun mrs \tmp, fpcr 40*4882a593Smuzhiyun cmp \tmp, \state 41*4882a593Smuzhiyun b.eq 9999f 42*4882a593Smuzhiyun msr fpcr, \state 43*4882a593Smuzhiyun 9999: 44*4882a593Smuzhiyun .endm 45*4882a593Smuzhiyun 46*4882a593Smuzhiyun /* Clobbers \state */ 47*4882a593Smuzhiyun .macro fpsimd_restore state, tmpnr 48*4882a593Smuzhiyun ldp q0, q1, [\state, #16 * 0] 49*4882a593Smuzhiyun ldp q2, q3, [\state, #16 * 2] 50*4882a593Smuzhiyun ldp q4, q5, [\state, #16 * 4] 51*4882a593Smuzhiyun ldp q6, q7, [\state, #16 * 6] 52*4882a593Smuzhiyun ldp q8, q9, [\state, #16 * 8] 53*4882a593Smuzhiyun ldp q10, q11, [\state, #16 * 10] 54*4882a593Smuzhiyun ldp q12, q13, [\state, #16 * 12] 55*4882a593Smuzhiyun ldp q14, q15, [\state, #16 * 14] 56*4882a593Smuzhiyun ldp q16, q17, [\state, #16 * 16] 57*4882a593Smuzhiyun ldp q18, q19, [\state, #16 * 18] 58*4882a593Smuzhiyun ldp q20, q21, [\state, #16 * 20] 59*4882a593Smuzhiyun ldp q22, q23, [\state, #16 * 22] 60*4882a593Smuzhiyun ldp q24, q25, [\state, #16 * 24] 61*4882a593Smuzhiyun ldp q26, q27, [\state, #16 * 26] 62*4882a593Smuzhiyun ldp q28, q29, [\state, #16 * 28] 63*4882a593Smuzhiyun ldp q30, q31, [\state, #16 * 30]! 64*4882a593Smuzhiyun ldr w\tmpnr, [\state, #16 * 2] 65*4882a593Smuzhiyun msr fpsr, x\tmpnr 66*4882a593Smuzhiyun ldr w\tmpnr, [\state, #16 * 2 + 4] 67*4882a593Smuzhiyun fpsimd_restore_fpcr x\tmpnr, \state 68*4882a593Smuzhiyun .endm 69*4882a593Smuzhiyun 70*4882a593Smuzhiyun /* Sanity-check macros to help avoid encoding garbage instructions */ 71*4882a593Smuzhiyun 72*4882a593Smuzhiyun .macro _check_general_reg nr 73*4882a593Smuzhiyun .if (\nr) < 0 || (\nr) > 30 74*4882a593Smuzhiyun .error "Bad register number \nr." 75*4882a593Smuzhiyun .endif 76*4882a593Smuzhiyun .endm 77*4882a593Smuzhiyun 78*4882a593Smuzhiyun .macro _sve_check_zreg znr 79*4882a593Smuzhiyun .if (\znr) < 0 || (\znr) > 31 80*4882a593Smuzhiyun .error "Bad Scalable Vector Extension vector register number \znr." 81*4882a593Smuzhiyun .endif 82*4882a593Smuzhiyun .endm 83*4882a593Smuzhiyun 84*4882a593Smuzhiyun .macro _sve_check_preg pnr 85*4882a593Smuzhiyun .if (\pnr) < 0 || (\pnr) > 15 86*4882a593Smuzhiyun .error "Bad Scalable Vector Extension predicate register number \pnr." 87*4882a593Smuzhiyun .endif 88*4882a593Smuzhiyun .endm 89*4882a593Smuzhiyun 90*4882a593Smuzhiyun .macro _check_num n, min, max 91*4882a593Smuzhiyun .if (\n) < (\min) || (\n) > (\max) 92*4882a593Smuzhiyun .error "Number \n out of range [\min,\max]" 93*4882a593Smuzhiyun .endif 94*4882a593Smuzhiyun .endm 95*4882a593Smuzhiyun 96*4882a593Smuzhiyun /* SVE instruction encodings for non-SVE-capable assemblers */ 97*4882a593Smuzhiyun 98*4882a593Smuzhiyun /* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */ 99*4882a593Smuzhiyun .macro _sve_str_v nz, nxbase, offset=0 100*4882a593Smuzhiyun _sve_check_zreg \nz 101*4882a593Smuzhiyun _check_general_reg \nxbase 102*4882a593Smuzhiyun _check_num (\offset), -0x100, 0xff 103*4882a593Smuzhiyun .inst 0xe5804000 \ 104*4882a593Smuzhiyun | (\nz) \ 105*4882a593Smuzhiyun | ((\nxbase) << 5) \ 106*4882a593Smuzhiyun | (((\offset) & 7) << 10) \ 107*4882a593Smuzhiyun | (((\offset) & 0x1f8) << 13) 108*4882a593Smuzhiyun .endm 109*4882a593Smuzhiyun 110*4882a593Smuzhiyun /* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */ 111*4882a593Smuzhiyun .macro _sve_ldr_v nz, nxbase, offset=0 112*4882a593Smuzhiyun _sve_check_zreg \nz 113*4882a593Smuzhiyun _check_general_reg \nxbase 114*4882a593Smuzhiyun _check_num (\offset), -0x100, 0xff 115*4882a593Smuzhiyun .inst 0x85804000 \ 116*4882a593Smuzhiyun | (\nz) \ 117*4882a593Smuzhiyun | ((\nxbase) << 5) \ 118*4882a593Smuzhiyun | (((\offset) & 7) << 10) \ 119*4882a593Smuzhiyun | (((\offset) & 0x1f8) << 13) 120*4882a593Smuzhiyun .endm 121*4882a593Smuzhiyun 122*4882a593Smuzhiyun /* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */ 123*4882a593Smuzhiyun .macro _sve_str_p np, nxbase, offset=0 124*4882a593Smuzhiyun _sve_check_preg \np 125*4882a593Smuzhiyun _check_general_reg \nxbase 126*4882a593Smuzhiyun _check_num (\offset), -0x100, 0xff 127*4882a593Smuzhiyun .inst 0xe5800000 \ 128*4882a593Smuzhiyun | (\np) \ 129*4882a593Smuzhiyun | ((\nxbase) << 5) \ 130*4882a593Smuzhiyun | (((\offset) & 7) << 10) \ 131*4882a593Smuzhiyun | (((\offset) & 0x1f8) << 13) 132*4882a593Smuzhiyun .endm 133*4882a593Smuzhiyun 134*4882a593Smuzhiyun /* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */ 135*4882a593Smuzhiyun .macro _sve_ldr_p np, nxbase, offset=0 136*4882a593Smuzhiyun _sve_check_preg \np 137*4882a593Smuzhiyun _check_general_reg \nxbase 138*4882a593Smuzhiyun _check_num (\offset), -0x100, 0xff 139*4882a593Smuzhiyun .inst 0x85800000 \ 140*4882a593Smuzhiyun | (\np) \ 141*4882a593Smuzhiyun | ((\nxbase) << 5) \ 142*4882a593Smuzhiyun | (((\offset) & 7) << 10) \ 143*4882a593Smuzhiyun | (((\offset) & 0x1f8) << 13) 144*4882a593Smuzhiyun .endm 145*4882a593Smuzhiyun 146*4882a593Smuzhiyun /* RDVL X\nx, #\imm */ 147*4882a593Smuzhiyun .macro _sve_rdvl nx, imm 148*4882a593Smuzhiyun _check_general_reg \nx 149*4882a593Smuzhiyun _check_num (\imm), -0x20, 0x1f 150*4882a593Smuzhiyun .inst 0x04bf5000 \ 151*4882a593Smuzhiyun | (\nx) \ 152*4882a593Smuzhiyun | (((\imm) & 0x3f) << 5) 153*4882a593Smuzhiyun .endm 154*4882a593Smuzhiyun 155*4882a593Smuzhiyun /* RDFFR (unpredicated): RDFFR P\np.B */ 156*4882a593Smuzhiyun .macro _sve_rdffr np 157*4882a593Smuzhiyun _sve_check_preg \np 158*4882a593Smuzhiyun .inst 0x2519f000 \ 159*4882a593Smuzhiyun | (\np) 160*4882a593Smuzhiyun .endm 161*4882a593Smuzhiyun 162*4882a593Smuzhiyun /* WRFFR P\np.B */ 163*4882a593Smuzhiyun .macro _sve_wrffr np 164*4882a593Smuzhiyun _sve_check_preg \np 165*4882a593Smuzhiyun .inst 0x25289000 \ 166*4882a593Smuzhiyun | ((\np) << 5) 167*4882a593Smuzhiyun .endm 168*4882a593Smuzhiyun 169*4882a593Smuzhiyun /* PFALSE P\np.B */ 170*4882a593Smuzhiyun .macro _sve_pfalse np 171*4882a593Smuzhiyun _sve_check_preg \np 172*4882a593Smuzhiyun .inst 0x2518e400 \ 173*4882a593Smuzhiyun | (\np) 174*4882a593Smuzhiyun .endm 175*4882a593Smuzhiyun 176*4882a593Smuzhiyun .macro __for from:req, to:req 177*4882a593Smuzhiyun .if (\from) == (\to) 178*4882a593Smuzhiyun _for__body %\from 179*4882a593Smuzhiyun .else 180*4882a593Smuzhiyun __for %\from, %((\from) + ((\to) - (\from)) / 2) 181*4882a593Smuzhiyun __for %((\from) + ((\to) - (\from)) / 2 + 1), %\to 182*4882a593Smuzhiyun .endif 183*4882a593Smuzhiyun .endm 184*4882a593Smuzhiyun 185*4882a593Smuzhiyun .macro _for var:req, from:req, to:req, insn:vararg 186*4882a593Smuzhiyun .macro _for__body \var:req 187*4882a593Smuzhiyun .noaltmacro 188*4882a593Smuzhiyun \insn 189*4882a593Smuzhiyun .altmacro 190*4882a593Smuzhiyun .endm 191*4882a593Smuzhiyun 192*4882a593Smuzhiyun .altmacro 193*4882a593Smuzhiyun __for \from, \to 194*4882a593Smuzhiyun .noaltmacro 195*4882a593Smuzhiyun 196*4882a593Smuzhiyun .purgem _for__body 197*4882a593Smuzhiyun .endm 198*4882a593Smuzhiyun 199*4882a593Smuzhiyun /* Update ZCR_EL1.LEN with the new VQ */ 200*4882a593Smuzhiyun .macro sve_load_vq xvqminus1, xtmp, xtmp2 201*4882a593Smuzhiyun mrs_s \xtmp, SYS_ZCR_EL1 202*4882a593Smuzhiyun bic \xtmp2, \xtmp, ZCR_ELx_LEN_MASK 203*4882a593Smuzhiyun orr \xtmp2, \xtmp2, \xvqminus1 204*4882a593Smuzhiyun cmp \xtmp2, \xtmp 205*4882a593Smuzhiyun b.eq 921f 206*4882a593Smuzhiyun msr_s SYS_ZCR_EL1, \xtmp2 //self-synchronising 207*4882a593Smuzhiyun 921: 208*4882a593Smuzhiyun .endm 209*4882a593Smuzhiyun 210*4882a593Smuzhiyun /* Preserve the first 128-bits of Znz and zero the rest. */ 211*4882a593Smuzhiyun .macro _sve_flush_z nz 212*4882a593Smuzhiyun _sve_check_zreg \nz 213*4882a593Smuzhiyun mov v\nz\().16b, v\nz\().16b 214*4882a593Smuzhiyun .endm 215*4882a593Smuzhiyun 216*4882a593Smuzhiyun .macro sve_flush 217*4882a593Smuzhiyun _for n, 0, 31, _sve_flush_z \n 218*4882a593Smuzhiyun _for n, 0, 15, _sve_pfalse \n 219*4882a593Smuzhiyun _sve_wrffr 0 220*4882a593Smuzhiyun .endm 221*4882a593Smuzhiyun 222*4882a593Smuzhiyun .macro sve_save nxbase, xpfpsr, nxtmp 223*4882a593Smuzhiyun _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34 224*4882a593Smuzhiyun _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16 225*4882a593Smuzhiyun _sve_rdffr 0 226*4882a593Smuzhiyun _sve_str_p 0, \nxbase 227*4882a593Smuzhiyun _sve_ldr_p 0, \nxbase, -16 228*4882a593Smuzhiyun 229*4882a593Smuzhiyun mrs x\nxtmp, fpsr 230*4882a593Smuzhiyun str w\nxtmp, [\xpfpsr] 231*4882a593Smuzhiyun mrs x\nxtmp, fpcr 232*4882a593Smuzhiyun str w\nxtmp, [\xpfpsr, #4] 233*4882a593Smuzhiyun .endm 234*4882a593Smuzhiyun 235*4882a593Smuzhiyun .macro __sve_load nxbase, xpfpsr, nxtmp 236*4882a593Smuzhiyun _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34 237*4882a593Smuzhiyun _sve_ldr_p 0, \nxbase 238*4882a593Smuzhiyun _sve_wrffr 0 239*4882a593Smuzhiyun _for n, 0, 15, _sve_ldr_p \n, \nxbase, \n - 16 240*4882a593Smuzhiyun 241*4882a593Smuzhiyun ldr w\nxtmp, [\xpfpsr] 242*4882a593Smuzhiyun msr fpsr, x\nxtmp 243*4882a593Smuzhiyun ldr w\nxtmp, [\xpfpsr, #4] 244*4882a593Smuzhiyun msr fpcr, x\nxtmp 245*4882a593Smuzhiyun .endm 246*4882a593Smuzhiyun 247*4882a593Smuzhiyun .macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2 248*4882a593Smuzhiyun sve_load_vq \xvqminus1, x\nxtmp, \xtmp2 249*4882a593Smuzhiyun __sve_load \nxbase, \xpfpsr, \nxtmp 250*4882a593Smuzhiyun .endm 251