1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-only */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * linux/arch/arm/vfp/vfphw.S 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * Copyright (C) 2004 ARM Limited. 6*4882a593Smuzhiyun * Written by Deep Blue Solutions Limited. 7*4882a593Smuzhiyun * 8*4882a593Smuzhiyun * This code is called from the kernel's undefined instruction trap. 9*4882a593Smuzhiyun * r9 holds the return address for successful handling. 10*4882a593Smuzhiyun * lr holds the return address for unrecognised instructions. 11*4882a593Smuzhiyun * r10 points at the start of the private FP workspace in the thread structure 12*4882a593Smuzhiyun * sp points to a struct pt_regs (as defined in include/asm/proc/ptrace.h) 13*4882a593Smuzhiyun */ 14*4882a593Smuzhiyun#include <linux/init.h> 15*4882a593Smuzhiyun#include <linux/linkage.h> 16*4882a593Smuzhiyun#include <asm/thread_info.h> 17*4882a593Smuzhiyun#include <asm/vfpmacros.h> 18*4882a593Smuzhiyun#include <linux/kern_levels.h> 19*4882a593Smuzhiyun#include <asm/assembler.h> 20*4882a593Smuzhiyun#include <asm/asm-offsets.h> 21*4882a593Smuzhiyun 22*4882a593Smuzhiyun .macro DBGSTR, str 23*4882a593Smuzhiyun#ifdef DEBUG 24*4882a593Smuzhiyun stmfd sp!, {r0-r3, ip, lr} 25*4882a593Smuzhiyun ldr r0, =1f 26*4882a593Smuzhiyun bl printk 27*4882a593Smuzhiyun ldmfd sp!, {r0-r3, ip, lr} 28*4882a593Smuzhiyun 29*4882a593Smuzhiyun .pushsection .rodata, "a" 30*4882a593Smuzhiyun1: .ascii KERN_DEBUG "VFP: \str\n" 31*4882a593Smuzhiyun .byte 0 32*4882a593Smuzhiyun .previous 33*4882a593Smuzhiyun#endif 34*4882a593Smuzhiyun .endm 35*4882a593Smuzhiyun 36*4882a593Smuzhiyun .macro DBGSTR1, str, arg 37*4882a593Smuzhiyun#ifdef DEBUG 38*4882a593Smuzhiyun stmfd sp!, {r0-r3, ip, lr} 39*4882a593Smuzhiyun mov r1, \arg 40*4882a593Smuzhiyun ldr r0, =1f 41*4882a593Smuzhiyun bl printk 42*4882a593Smuzhiyun ldmfd sp!, {r0-r3, ip, lr} 43*4882a593Smuzhiyun 44*4882a593Smuzhiyun .pushsection .rodata, "a" 45*4882a593Smuzhiyun1: .ascii KERN_DEBUG "VFP: \str\n" 46*4882a593Smuzhiyun .byte 0 47*4882a593Smuzhiyun .previous 48*4882a593Smuzhiyun#endif 49*4882a593Smuzhiyun .endm 50*4882a593Smuzhiyun 51*4882a593Smuzhiyun .macro DBGSTR3, str, arg1, arg2, arg3 52*4882a593Smuzhiyun#ifdef DEBUG 53*4882a593Smuzhiyun stmfd sp!, {r0-r3, ip, lr} 54*4882a593Smuzhiyun mov r3, \arg3 55*4882a593Smuzhiyun mov r2, \arg2 56*4882a593Smuzhiyun mov r1, \arg1 57*4882a593Smuzhiyun ldr r0, =1f 58*4882a593Smuzhiyun bl printk 59*4882a593Smuzhiyun ldmfd sp!, {r0-r3, ip, lr} 60*4882a593Smuzhiyun 61*4882a593Smuzhiyun .pushsection .rodata, "a" 62*4882a593Smuzhiyun1: .ascii KERN_DEBUG "VFP: \str\n" 63*4882a593Smuzhiyun .byte 0 64*4882a593Smuzhiyun .previous 65*4882a593Smuzhiyun#endif 66*4882a593Smuzhiyun .endm 67*4882a593Smuzhiyun 68*4882a593Smuzhiyun 69*4882a593Smuzhiyun@ VFP hardware support entry point. 70*4882a593Smuzhiyun@ 71*4882a593Smuzhiyun@ r0 = instruction opcode (32-bit ARM or two 16-bit Thumb) 72*4882a593Smuzhiyun@ r2 = PC value to resume execution after successful emulation 73*4882a593Smuzhiyun@ r9 = normal "successful" return address 74*4882a593Smuzhiyun@ r10 = vfp_state union 75*4882a593Smuzhiyun@ r11 = CPU number 76*4882a593Smuzhiyun@ lr = unrecognised instruction return address 77*4882a593Smuzhiyun@ IRQs enabled. 78*4882a593SmuzhiyunENTRY(vfp_support_entry) 79*4882a593Smuzhiyun DBGSTR3 "instr %08x pc %08x state %p", r0, r2, r10 80*4882a593Smuzhiyun 81*4882a593Smuzhiyun .fpu vfpv2 82*4882a593Smuzhiyun VFPFMRX r1, FPEXC @ Is the VFP enabled? 83*4882a593Smuzhiyun DBGSTR1 "fpexc %08x", r1 84*4882a593Smuzhiyun tst r1, #FPEXC_EN 85*4882a593Smuzhiyun bne look_for_VFP_exceptions @ VFP is already enabled 86*4882a593Smuzhiyun 87*4882a593Smuzhiyun DBGSTR1 "enable %x", r10 88*4882a593Smuzhiyun ldr r3, vfp_current_hw_state_address 89*4882a593Smuzhiyun orr r1, r1, #FPEXC_EN @ user FPEXC has the enable bit set 90*4882a593Smuzhiyun ldr r4, [r3, r11, lsl #2] @ vfp_current_hw_state pointer 91*4882a593Smuzhiyun bic r5, r1, #FPEXC_EX @ make sure exceptions are disabled 92*4882a593Smuzhiyun cmp r4, r10 @ this thread owns the hw context? 93*4882a593Smuzhiyun#ifndef CONFIG_SMP 94*4882a593Smuzhiyun @ For UP, checking that this thread owns the hw context is 95*4882a593Smuzhiyun @ sufficient to determine that the hardware state is valid. 96*4882a593Smuzhiyun beq vfp_hw_state_valid 97*4882a593Smuzhiyun 98*4882a593Smuzhiyun @ On UP, we lazily save the VFP context. As a different 99*4882a593Smuzhiyun @ thread wants ownership of the VFP hardware, save the old 100*4882a593Smuzhiyun @ state if there was a previous (valid) owner. 101*4882a593Smuzhiyun 102*4882a593Smuzhiyun VFPFMXR FPEXC, r5 @ enable VFP, disable any pending 103*4882a593Smuzhiyun @ exceptions, so we can get at the 104*4882a593Smuzhiyun @ rest of it 105*4882a593Smuzhiyun 106*4882a593Smuzhiyun DBGSTR1 "save old state %p", r4 107*4882a593Smuzhiyun cmp r4, #0 @ if the vfp_current_hw_state is NULL 108*4882a593Smuzhiyun beq vfp_reload_hw @ then the hw state needs reloading 109*4882a593Smuzhiyun VFPFSTMIA r4, r5 @ save the working registers 110*4882a593Smuzhiyun VFPFMRX r5, FPSCR @ current status 111*4882a593Smuzhiyun#ifndef CONFIG_CPU_FEROCEON 112*4882a593Smuzhiyun tst r1, #FPEXC_EX @ is there additional state to save? 113*4882a593Smuzhiyun beq 1f 114*4882a593Smuzhiyun VFPFMRX r6, FPINST @ FPINST (only if FPEXC.EX is set) 115*4882a593Smuzhiyun tst r1, #FPEXC_FP2V @ is there an FPINST2 to read? 116*4882a593Smuzhiyun beq 1f 117*4882a593Smuzhiyun VFPFMRX r8, FPINST2 @ FPINST2 if needed (and present) 118*4882a593Smuzhiyun1: 119*4882a593Smuzhiyun#endif 120*4882a593Smuzhiyun stmia r4, {r1, r5, r6, r8} @ save FPEXC, FPSCR, FPINST, FPINST2 121*4882a593Smuzhiyunvfp_reload_hw: 122*4882a593Smuzhiyun 123*4882a593Smuzhiyun#else 124*4882a593Smuzhiyun @ For SMP, if this thread does not own the hw context, then we 125*4882a593Smuzhiyun @ need to reload it. No need to save the old state as on SMP, 126*4882a593Smuzhiyun @ we always save the state when we switch away from a thread. 127*4882a593Smuzhiyun bne vfp_reload_hw 128*4882a593Smuzhiyun 129*4882a593Smuzhiyun @ This thread has ownership of the current hardware context. 130*4882a593Smuzhiyun @ However, it may have been migrated to another CPU, in which 131*4882a593Smuzhiyun @ case the saved state is newer than the hardware context. 132*4882a593Smuzhiyun @ Check this by looking at the CPU number which the state was 133*4882a593Smuzhiyun @ last loaded onto. 134*4882a593Smuzhiyun ldr ip, [r10, #VFP_CPU] 135*4882a593Smuzhiyun teq ip, r11 136*4882a593Smuzhiyun beq vfp_hw_state_valid 137*4882a593Smuzhiyun 138*4882a593Smuzhiyunvfp_reload_hw: 139*4882a593Smuzhiyun @ We're loading this threads state into the VFP hardware. Update 140*4882a593Smuzhiyun @ the CPU number which contains the most up to date VFP context. 141*4882a593Smuzhiyun str r11, [r10, #VFP_CPU] 142*4882a593Smuzhiyun 143*4882a593Smuzhiyun VFPFMXR FPEXC, r5 @ enable VFP, disable any pending 144*4882a593Smuzhiyun @ exceptions, so we can get at the 145*4882a593Smuzhiyun @ rest of it 146*4882a593Smuzhiyun#endif 147*4882a593Smuzhiyun 148*4882a593Smuzhiyun DBGSTR1 "load state %p", r10 149*4882a593Smuzhiyun str r10, [r3, r11, lsl #2] @ update the vfp_current_hw_state pointer 150*4882a593Smuzhiyun @ Load the saved state back into the VFP 151*4882a593Smuzhiyun VFPFLDMIA r10, r5 @ reload the working registers while 152*4882a593Smuzhiyun @ FPEXC is in a safe state 153*4882a593Smuzhiyun ldmia r10, {r1, r5, r6, r8} @ load FPEXC, FPSCR, FPINST, FPINST2 154*4882a593Smuzhiyun#ifndef CONFIG_CPU_FEROCEON 155*4882a593Smuzhiyun tst r1, #FPEXC_EX @ is there additional state to restore? 156*4882a593Smuzhiyun beq 1f 157*4882a593Smuzhiyun VFPFMXR FPINST, r6 @ restore FPINST (only if FPEXC.EX is set) 158*4882a593Smuzhiyun tst r1, #FPEXC_FP2V @ is there an FPINST2 to write? 159*4882a593Smuzhiyun beq 1f 160*4882a593Smuzhiyun VFPFMXR FPINST2, r8 @ FPINST2 if needed (and present) 161*4882a593Smuzhiyun1: 162*4882a593Smuzhiyun#endif 163*4882a593Smuzhiyun VFPFMXR FPSCR, r5 @ restore status 164*4882a593Smuzhiyun 165*4882a593Smuzhiyun@ The context stored in the VFP hardware is up to date with this thread 166*4882a593Smuzhiyunvfp_hw_state_valid: 167*4882a593Smuzhiyun tst r1, #FPEXC_EX 168*4882a593Smuzhiyun bne process_exception @ might as well handle the pending 169*4882a593Smuzhiyun @ exception before retrying branch 170*4882a593Smuzhiyun @ out before setting an FPEXC that 171*4882a593Smuzhiyun @ stops us reading stuff 172*4882a593Smuzhiyun VFPFMXR FPEXC, r1 @ Restore FPEXC last 173*4882a593Smuzhiyun sub r2, r2, #4 @ Retry current instruction - if Thumb 174*4882a593Smuzhiyun str r2, [sp, #S_PC] @ mode it's two 16-bit instructions, 175*4882a593Smuzhiyun @ else it's one 32-bit instruction, so 176*4882a593Smuzhiyun @ always subtract 4 from the following 177*4882a593Smuzhiyun @ instruction address. 178*4882a593Smuzhiyun dec_preempt_count_ti r10, r4 179*4882a593Smuzhiyun ret r9 @ we think we have handled things 180*4882a593Smuzhiyun 181*4882a593Smuzhiyun 182*4882a593Smuzhiyunlook_for_VFP_exceptions: 183*4882a593Smuzhiyun @ Check for synchronous or asynchronous exception 184*4882a593Smuzhiyun tst r1, #FPEXC_EX | FPEXC_DEX 185*4882a593Smuzhiyun bne process_exception 186*4882a593Smuzhiyun @ On some implementations of the VFP subarch 1, setting FPSCR.IXE 187*4882a593Smuzhiyun @ causes all the CDP instructions to be bounced synchronously without 188*4882a593Smuzhiyun @ setting the FPEXC.EX bit 189*4882a593Smuzhiyun VFPFMRX r5, FPSCR 190*4882a593Smuzhiyun tst r5, #FPSCR_IXE 191*4882a593Smuzhiyun bne process_exception 192*4882a593Smuzhiyun 193*4882a593Smuzhiyun tst r5, #FPSCR_LENGTH_MASK 194*4882a593Smuzhiyun beq skip 195*4882a593Smuzhiyun orr r1, r1, #FPEXC_DEX 196*4882a593Smuzhiyun b process_exception 197*4882a593Smuzhiyunskip: 198*4882a593Smuzhiyun 199*4882a593Smuzhiyun @ Fall into hand on to next handler - appropriate coproc instr 200*4882a593Smuzhiyun @ not recognised by VFP 201*4882a593Smuzhiyun 202*4882a593Smuzhiyun DBGSTR "not VFP" 203*4882a593Smuzhiyun dec_preempt_count_ti r10, r4 204*4882a593Smuzhiyun ret lr 205*4882a593Smuzhiyun 206*4882a593Smuzhiyunprocess_exception: 207*4882a593Smuzhiyun DBGSTR "bounce" 208*4882a593Smuzhiyun mov r2, sp @ nothing stacked - regdump is at TOS 209*4882a593Smuzhiyun mov lr, r9 @ setup for a return to the user code. 210*4882a593Smuzhiyun 211*4882a593Smuzhiyun @ Now call the C code to package up the bounce to the support code 212*4882a593Smuzhiyun @ r0 holds the trigger instruction 213*4882a593Smuzhiyun @ r1 holds the FPEXC value 214*4882a593Smuzhiyun @ r2 pointer to register dump 215*4882a593Smuzhiyun b VFP_bounce @ we have handled this - the support 216*4882a593Smuzhiyun @ code will raise an exception if 217*4882a593Smuzhiyun @ required. If not, the user code will 218*4882a593Smuzhiyun @ retry the faulted instruction 219*4882a593SmuzhiyunENDPROC(vfp_support_entry) 220*4882a593Smuzhiyun 221*4882a593SmuzhiyunENTRY(vfp_save_state) 222*4882a593Smuzhiyun @ Save the current VFP state 223*4882a593Smuzhiyun @ r0 - save location 224*4882a593Smuzhiyun @ r1 - FPEXC 225*4882a593Smuzhiyun DBGSTR1 "save VFP state %p", r0 226*4882a593Smuzhiyun VFPFSTMIA r0, r2 @ save the working registers 227*4882a593Smuzhiyun VFPFMRX r2, FPSCR @ current status 228*4882a593Smuzhiyun tst r1, #FPEXC_EX @ is there additional state to save? 229*4882a593Smuzhiyun beq 1f 230*4882a593Smuzhiyun VFPFMRX r3, FPINST @ FPINST (only if FPEXC.EX is set) 231*4882a593Smuzhiyun tst r1, #FPEXC_FP2V @ is there an FPINST2 to read? 232*4882a593Smuzhiyun beq 1f 233*4882a593Smuzhiyun VFPFMRX r12, FPINST2 @ FPINST2 if needed (and present) 234*4882a593Smuzhiyun1: 235*4882a593Smuzhiyun stmia r0, {r1, r2, r3, r12} @ save FPEXC, FPSCR, FPINST, FPINST2 236*4882a593Smuzhiyun ret lr 237*4882a593SmuzhiyunENDPROC(vfp_save_state) 238*4882a593Smuzhiyun 239*4882a593Smuzhiyun .align 240*4882a593Smuzhiyunvfp_current_hw_state_address: 241*4882a593Smuzhiyun .word vfp_current_hw_state 242*4882a593Smuzhiyun 243*4882a593Smuzhiyun .macro tbl_branch, base, tmp, shift 244*4882a593Smuzhiyun#ifdef CONFIG_THUMB2_KERNEL 245*4882a593Smuzhiyun adr \tmp, 1f 246*4882a593Smuzhiyun add \tmp, \tmp, \base, lsl \shift 247*4882a593Smuzhiyun ret \tmp 248*4882a593Smuzhiyun#else 249*4882a593Smuzhiyun add pc, pc, \base, lsl \shift 250*4882a593Smuzhiyun mov r0, r0 251*4882a593Smuzhiyun#endif 252*4882a593Smuzhiyun1: 253*4882a593Smuzhiyun .endm 254*4882a593Smuzhiyun 255*4882a593SmuzhiyunENTRY(vfp_get_float) 256*4882a593Smuzhiyun tbl_branch r0, r3, #3 257*4882a593Smuzhiyun .fpu vfpv2 258*4882a593Smuzhiyun .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 259*4882a593Smuzhiyun1: vmov r0, s\dr 260*4882a593Smuzhiyun ret lr 261*4882a593Smuzhiyun .org 1b + 8 262*4882a593Smuzhiyun .endr 263*4882a593Smuzhiyun .irp dr,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 264*4882a593Smuzhiyun1: vmov r0, s\dr 265*4882a593Smuzhiyun ret lr 266*4882a593Smuzhiyun .org 1b + 8 267*4882a593Smuzhiyun .endr 268*4882a593SmuzhiyunENDPROC(vfp_get_float) 269*4882a593Smuzhiyun 270*4882a593SmuzhiyunENTRY(vfp_put_float) 271*4882a593Smuzhiyun tbl_branch r1, r3, #3 272*4882a593Smuzhiyun .fpu vfpv2 273*4882a593Smuzhiyun .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 274*4882a593Smuzhiyun1: vmov s\dr, r0 275*4882a593Smuzhiyun ret lr 276*4882a593Smuzhiyun .org 1b + 8 277*4882a593Smuzhiyun .endr 278*4882a593Smuzhiyun .irp dr,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 279*4882a593Smuzhiyun1: vmov s\dr, r0 280*4882a593Smuzhiyun ret lr 281*4882a593Smuzhiyun .org 1b + 8 282*4882a593Smuzhiyun .endr 283*4882a593SmuzhiyunENDPROC(vfp_put_float) 284*4882a593Smuzhiyun 285*4882a593SmuzhiyunENTRY(vfp_get_double) 286*4882a593Smuzhiyun tbl_branch r0, r3, #3 287*4882a593Smuzhiyun .fpu vfpv2 288*4882a593Smuzhiyun .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 289*4882a593Smuzhiyun1: vmov r0, r1, d\dr 290*4882a593Smuzhiyun ret lr 291*4882a593Smuzhiyun .org 1b + 8 292*4882a593Smuzhiyun .endr 293*4882a593Smuzhiyun#ifdef CONFIG_VFPv3 294*4882a593Smuzhiyun @ d16 - d31 registers 295*4882a593Smuzhiyun .fpu vfpv3 296*4882a593Smuzhiyun .irp dr,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 297*4882a593Smuzhiyun1: vmov r0, r1, d\dr 298*4882a593Smuzhiyun ret lr 299*4882a593Smuzhiyun .org 1b + 8 300*4882a593Smuzhiyun .endr 301*4882a593Smuzhiyun#endif 302*4882a593Smuzhiyun 303*4882a593Smuzhiyun @ virtual register 16 (or 32 if VFPv3) for compare with zero 304*4882a593Smuzhiyun mov r0, #0 305*4882a593Smuzhiyun mov r1, #0 306*4882a593Smuzhiyun ret lr 307*4882a593SmuzhiyunENDPROC(vfp_get_double) 308*4882a593Smuzhiyun 309*4882a593SmuzhiyunENTRY(vfp_put_double) 310*4882a593Smuzhiyun tbl_branch r2, r3, #3 311*4882a593Smuzhiyun .fpu vfpv2 312*4882a593Smuzhiyun .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 313*4882a593Smuzhiyun1: vmov d\dr, r0, r1 314*4882a593Smuzhiyun ret lr 315*4882a593Smuzhiyun .org 1b + 8 316*4882a593Smuzhiyun .endr 317*4882a593Smuzhiyun#ifdef CONFIG_VFPv3 318*4882a593Smuzhiyun .fpu vfpv3 319*4882a593Smuzhiyun @ d16 - d31 registers 320*4882a593Smuzhiyun .irp dr,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 321*4882a593Smuzhiyun1: vmov d\dr, r0, r1 322*4882a593Smuzhiyun ret lr 323*4882a593Smuzhiyun .org 1b + 8 324*4882a593Smuzhiyun .endr 325*4882a593Smuzhiyun#endif 326*4882a593SmuzhiyunENDPROC(vfp_put_double) 327