1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * copy_page, __copy_user_page, __copy_user implementation of SuperH 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * Copyright (C) 2001 Niibe Yutaka & Kaz Kojima 6*4882a593Smuzhiyun * Copyright (C) 2002 Toshinobu Sugioka 7*4882a593Smuzhiyun * Copyright (C) 2006 Paul Mundt 8*4882a593Smuzhiyun */ 9*4882a593Smuzhiyun#include <linux/linkage.h> 10*4882a593Smuzhiyun#include <asm/page.h> 11*4882a593Smuzhiyun 12*4882a593Smuzhiyun/* 13*4882a593Smuzhiyun * copy_page 14*4882a593Smuzhiyun * @to: P1 address 15*4882a593Smuzhiyun * @from: P1 address 16*4882a593Smuzhiyun * 17*4882a593Smuzhiyun * void copy_page(void *to, void *from) 18*4882a593Smuzhiyun */ 19*4882a593Smuzhiyun 20*4882a593Smuzhiyun/* 21*4882a593Smuzhiyun * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch 22*4882a593Smuzhiyun * r8 --- from + PAGE_SIZE 23*4882a593Smuzhiyun * r9 --- not used 24*4882a593Smuzhiyun * r10 --- to 25*4882a593Smuzhiyun * r11 --- from 26*4882a593Smuzhiyun */ 27*4882a593SmuzhiyunENTRY(copy_page) 28*4882a593Smuzhiyun mov.l r8,@-r15 29*4882a593Smuzhiyun mov.l r10,@-r15 30*4882a593Smuzhiyun mov.l r11,@-r15 31*4882a593Smuzhiyun mov r4,r10 32*4882a593Smuzhiyun mov r5,r11 33*4882a593Smuzhiyun mov r5,r8 34*4882a593Smuzhiyun mov #(PAGE_SIZE >> 10), r0 35*4882a593Smuzhiyun shll8 r0 36*4882a593Smuzhiyun shll2 r0 37*4882a593Smuzhiyun add r0,r8 38*4882a593Smuzhiyun ! 39*4882a593Smuzhiyun1: mov.l @r11+,r0 40*4882a593Smuzhiyun mov.l @r11+,r1 41*4882a593Smuzhiyun mov.l @r11+,r2 42*4882a593Smuzhiyun mov.l @r11+,r3 43*4882a593Smuzhiyun mov.l @r11+,r4 44*4882a593Smuzhiyun mov.l @r11+,r5 45*4882a593Smuzhiyun mov.l @r11+,r6 46*4882a593Smuzhiyun mov.l @r11+,r7 47*4882a593Smuzhiyun#if defined(CONFIG_CPU_SH4) 48*4882a593Smuzhiyun movca.l r0,@r10 49*4882a593Smuzhiyun#else 50*4882a593Smuzhiyun mov.l r0,@r10 51*4882a593Smuzhiyun#endif 52*4882a593Smuzhiyun add #32,r10 53*4882a593Smuzhiyun mov.l r7,@-r10 54*4882a593Smuzhiyun mov.l r6,@-r10 55*4882a593Smuzhiyun mov.l r5,@-r10 56*4882a593Smuzhiyun mov.l r4,@-r10 57*4882a593Smuzhiyun mov.l r3,@-r10 58*4882a593Smuzhiyun mov.l r2,@-r10 59*4882a593Smuzhiyun mov.l r1,@-r10 60*4882a593Smuzhiyun cmp/eq r11,r8 61*4882a593Smuzhiyun bf/s 1b 62*4882a593Smuzhiyun add #28,r10 63*4882a593Smuzhiyun ! 64*4882a593Smuzhiyun mov.l @r15+,r11 65*4882a593Smuzhiyun mov.l @r15+,r10 66*4882a593Smuzhiyun mov.l @r15+,r8 67*4882a593Smuzhiyun rts 68*4882a593Smuzhiyun nop 69*4882a593Smuzhiyun 70*4882a593Smuzhiyun/* 71*4882a593Smuzhiyun * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n); 72*4882a593Smuzhiyun * Return the number of bytes NOT copied 73*4882a593Smuzhiyun */ 74*4882a593Smuzhiyun#define EX(...) \ 75*4882a593Smuzhiyun 9999: __VA_ARGS__ ; \ 76*4882a593Smuzhiyun .section __ex_table, "a"; \ 77*4882a593Smuzhiyun .long 9999b, 6000f ; \ 78*4882a593Smuzhiyun .previous 79*4882a593Smuzhiyun#define EX_NO_POP(...) \ 80*4882a593Smuzhiyun 9999: __VA_ARGS__ ; \ 81*4882a593Smuzhiyun .section __ex_table, "a"; \ 82*4882a593Smuzhiyun .long 9999b, 6005f ; \ 83*4882a593Smuzhiyun .previous 84*4882a593SmuzhiyunENTRY(__copy_user) 85*4882a593Smuzhiyun ! Check if small number of bytes 86*4882a593Smuzhiyun mov #11,r0 87*4882a593Smuzhiyun mov r4,r3 88*4882a593Smuzhiyun cmp/gt r0,r6 ! r6 (len) > r0 (11) 89*4882a593Smuzhiyun bf/s .L_cleanup_loop_no_pop 90*4882a593Smuzhiyun add r6,r3 ! last destination address 91*4882a593Smuzhiyun 92*4882a593Smuzhiyun ! Calculate bytes needed to align to src 93*4882a593Smuzhiyun mov.l r11,@-r15 94*4882a593Smuzhiyun neg r5,r0 95*4882a593Smuzhiyun mov.l r10,@-r15 96*4882a593Smuzhiyun add #4,r0 97*4882a593Smuzhiyun mov.l r9,@-r15 98*4882a593Smuzhiyun and #3,r0 99*4882a593Smuzhiyun mov.l r8,@-r15 100*4882a593Smuzhiyun tst r0,r0 101*4882a593Smuzhiyun bt 2f 102*4882a593Smuzhiyun 103*4882a593Smuzhiyun1: 104*4882a593Smuzhiyun ! Copy bytes to long word align src 105*4882a593SmuzhiyunEX( mov.b @r5+,r1 ) 106*4882a593Smuzhiyun dt r0 107*4882a593Smuzhiyun add #-1,r6 108*4882a593SmuzhiyunEX( mov.b r1,@r4 ) 109*4882a593Smuzhiyun bf/s 1b 110*4882a593Smuzhiyun add #1,r4 111*4882a593Smuzhiyun 112*4882a593Smuzhiyun ! Jump to appropriate routine depending on dest 113*4882a593Smuzhiyun2: mov #3,r1 114*4882a593Smuzhiyun mov r6, r2 115*4882a593Smuzhiyun and r4,r1 116*4882a593Smuzhiyun shlr2 r2 117*4882a593Smuzhiyun shll2 r1 118*4882a593Smuzhiyun mova .L_jump_tbl,r0 119*4882a593Smuzhiyun mov.l @(r0,r1),r1 120*4882a593Smuzhiyun jmp @r1 121*4882a593Smuzhiyun nop 122*4882a593Smuzhiyun 123*4882a593Smuzhiyun .align 2 124*4882a593Smuzhiyun.L_jump_tbl: 125*4882a593Smuzhiyun .long .L_dest00 126*4882a593Smuzhiyun .long .L_dest01 127*4882a593Smuzhiyun .long .L_dest10 128*4882a593Smuzhiyun .long .L_dest11 129*4882a593Smuzhiyun 130*4882a593Smuzhiyun/* 131*4882a593Smuzhiyun * Come here if there are less than 12 bytes to copy 132*4882a593Smuzhiyun * 133*4882a593Smuzhiyun * Keep the branch target close, so the bf/s callee doesn't overflow 134*4882a593Smuzhiyun * and result in a more expensive branch being inserted. This is the 135*4882a593Smuzhiyun * fast-path for small copies, the jump via the jump table will hit the 136*4882a593Smuzhiyun * default slow-path cleanup. -PFM. 137*4882a593Smuzhiyun */ 138*4882a593Smuzhiyun.L_cleanup_loop_no_pop: 139*4882a593Smuzhiyun tst r6,r6 ! Check explicitly for zero 140*4882a593Smuzhiyun bt 1f 141*4882a593Smuzhiyun 142*4882a593Smuzhiyun2: 143*4882a593SmuzhiyunEX_NO_POP( mov.b @r5+,r0 ) 144*4882a593Smuzhiyun dt r6 145*4882a593SmuzhiyunEX_NO_POP( mov.b r0,@r4 ) 146*4882a593Smuzhiyun bf/s 2b 147*4882a593Smuzhiyun add #1,r4 148*4882a593Smuzhiyun 149*4882a593Smuzhiyun1: mov #0,r0 ! normal return 150*4882a593Smuzhiyun5000: 151*4882a593Smuzhiyun 152*4882a593Smuzhiyun# Exception handler: 153*4882a593Smuzhiyun.section .fixup, "ax" 154*4882a593Smuzhiyun6005: 155*4882a593Smuzhiyun mov.l 8000f,r1 156*4882a593Smuzhiyun mov r3,r0 157*4882a593Smuzhiyun jmp @r1 158*4882a593Smuzhiyun sub r4,r0 159*4882a593Smuzhiyun .align 2 160*4882a593Smuzhiyun8000: .long 5000b 161*4882a593Smuzhiyun 162*4882a593Smuzhiyun.previous 163*4882a593Smuzhiyun rts 164*4882a593Smuzhiyun nop 165*4882a593Smuzhiyun 166*4882a593Smuzhiyun! Destination = 00 167*4882a593Smuzhiyun 168*4882a593Smuzhiyun.L_dest00: 169*4882a593Smuzhiyun ! Skip the large copy for small transfers 170*4882a593Smuzhiyun mov #(32+32-4), r0 171*4882a593Smuzhiyun cmp/gt r6, r0 ! r0 (60) > r6 (len) 172*4882a593Smuzhiyun bt 1f 173*4882a593Smuzhiyun 174*4882a593Smuzhiyun ! Align dest to a 32 byte boundary 175*4882a593Smuzhiyun neg r4,r0 176*4882a593Smuzhiyun add #0x20, r0 177*4882a593Smuzhiyun and #0x1f, r0 178*4882a593Smuzhiyun tst r0, r0 179*4882a593Smuzhiyun bt 2f 180*4882a593Smuzhiyun 181*4882a593Smuzhiyun sub r0, r6 182*4882a593Smuzhiyun shlr2 r0 183*4882a593Smuzhiyun3: 184*4882a593SmuzhiyunEX( mov.l @r5+,r1 ) 185*4882a593Smuzhiyun dt r0 186*4882a593SmuzhiyunEX( mov.l r1,@r4 ) 187*4882a593Smuzhiyun bf/s 3b 188*4882a593Smuzhiyun add #4,r4 189*4882a593Smuzhiyun 190*4882a593Smuzhiyun2: 191*4882a593SmuzhiyunEX( mov.l @r5+,r0 ) 192*4882a593SmuzhiyunEX( mov.l @r5+,r1 ) 193*4882a593SmuzhiyunEX( mov.l @r5+,r2 ) 194*4882a593SmuzhiyunEX( mov.l @r5+,r7 ) 195*4882a593SmuzhiyunEX( mov.l @r5+,r8 ) 196*4882a593SmuzhiyunEX( mov.l @r5+,r9 ) 197*4882a593SmuzhiyunEX( mov.l @r5+,r10 ) 198*4882a593SmuzhiyunEX( mov.l @r5+,r11 ) 199*4882a593Smuzhiyun#ifdef CONFIG_CPU_SH4 200*4882a593SmuzhiyunEX( movca.l r0,@r4 ) 201*4882a593Smuzhiyun#else 202*4882a593SmuzhiyunEX( mov.l r0,@r4 ) 203*4882a593Smuzhiyun#endif 204*4882a593Smuzhiyun add #-32, r6 205*4882a593SmuzhiyunEX( mov.l r1,@(4,r4) ) 206*4882a593Smuzhiyun mov #32, r0 207*4882a593SmuzhiyunEX( mov.l r2,@(8,r4) ) 208*4882a593Smuzhiyun cmp/gt r6, r0 ! r0 (32) > r6 (len) 209*4882a593SmuzhiyunEX( mov.l r7,@(12,r4) ) 210*4882a593SmuzhiyunEX( mov.l r8,@(16,r4) ) 211*4882a593SmuzhiyunEX( mov.l r9,@(20,r4) ) 212*4882a593SmuzhiyunEX( mov.l r10,@(24,r4) ) 213*4882a593SmuzhiyunEX( mov.l r11,@(28,r4) ) 214*4882a593Smuzhiyun bf/s 2b 215*4882a593Smuzhiyun add #32,r4 216*4882a593Smuzhiyun 217*4882a593Smuzhiyun1: mov r6, r0 218*4882a593Smuzhiyun shlr2 r0 219*4882a593Smuzhiyun tst r0, r0 220*4882a593Smuzhiyun bt .L_cleanup 221*4882a593Smuzhiyun1: 222*4882a593SmuzhiyunEX( mov.l @r5+,r1 ) 223*4882a593Smuzhiyun dt r0 224*4882a593SmuzhiyunEX( mov.l r1,@r4 ) 225*4882a593Smuzhiyun bf/s 1b 226*4882a593Smuzhiyun add #4,r4 227*4882a593Smuzhiyun 228*4882a593Smuzhiyun bra .L_cleanup 229*4882a593Smuzhiyun nop 230*4882a593Smuzhiyun 231*4882a593Smuzhiyun! Destination = 10 232*4882a593Smuzhiyun 233*4882a593Smuzhiyun.L_dest10: 234*4882a593Smuzhiyun mov r2,r7 235*4882a593Smuzhiyun shlr2 r7 236*4882a593Smuzhiyun shlr r7 237*4882a593Smuzhiyun tst r7,r7 238*4882a593Smuzhiyun mov #7,r0 239*4882a593Smuzhiyun bt/s 1f 240*4882a593Smuzhiyun and r0,r2 241*4882a593Smuzhiyun2: 242*4882a593Smuzhiyun dt r7 243*4882a593Smuzhiyun#ifdef CONFIG_CPU_LITTLE_ENDIAN 244*4882a593SmuzhiyunEX( mov.l @r5+,r0 ) 245*4882a593SmuzhiyunEX( mov.l @r5+,r1 ) 246*4882a593SmuzhiyunEX( mov.l @r5+,r8 ) 247*4882a593SmuzhiyunEX( mov.l @r5+,r9 ) 248*4882a593SmuzhiyunEX( mov.l @r5+,r10 ) 249*4882a593SmuzhiyunEX( mov.w r0,@r4 ) 250*4882a593Smuzhiyun add #2,r4 251*4882a593Smuzhiyun xtrct r1,r0 252*4882a593Smuzhiyun xtrct r8,r1 253*4882a593Smuzhiyun xtrct r9,r8 254*4882a593Smuzhiyun xtrct r10,r9 255*4882a593Smuzhiyun 256*4882a593SmuzhiyunEX( mov.l r0,@r4 ) 257*4882a593SmuzhiyunEX( mov.l r1,@(4,r4) ) 258*4882a593SmuzhiyunEX( mov.l r8,@(8,r4) ) 259*4882a593SmuzhiyunEX( mov.l r9,@(12,r4) ) 260*4882a593Smuzhiyun 261*4882a593SmuzhiyunEX( mov.l @r5+,r1 ) 262*4882a593SmuzhiyunEX( mov.l @r5+,r8 ) 263*4882a593SmuzhiyunEX( mov.l @r5+,r0 ) 264*4882a593Smuzhiyun xtrct r1,r10 265*4882a593Smuzhiyun xtrct r8,r1 266*4882a593Smuzhiyun xtrct r0,r8 267*4882a593Smuzhiyun shlr16 r0 268*4882a593SmuzhiyunEX( mov.l r10,@(16,r4) ) 269*4882a593SmuzhiyunEX( mov.l r1,@(20,r4) ) 270*4882a593SmuzhiyunEX( mov.l r8,@(24,r4) ) 271*4882a593SmuzhiyunEX( mov.w r0,@(28,r4) ) 272*4882a593Smuzhiyun bf/s 2b 273*4882a593Smuzhiyun add #30,r4 274*4882a593Smuzhiyun#else 275*4882a593SmuzhiyunEX( mov.l @(28,r5),r0 ) 276*4882a593SmuzhiyunEX( mov.l @(24,r5),r8 ) 277*4882a593SmuzhiyunEX( mov.l @(20,r5),r9 ) 278*4882a593SmuzhiyunEX( mov.l @(16,r5),r10 ) 279*4882a593SmuzhiyunEX( mov.w r0,@(30,r4) ) 280*4882a593Smuzhiyun add #-2,r4 281*4882a593Smuzhiyun xtrct r8,r0 282*4882a593Smuzhiyun xtrct r9,r8 283*4882a593Smuzhiyun xtrct r10,r9 284*4882a593SmuzhiyunEX( mov.l r0,@(28,r4) ) 285*4882a593SmuzhiyunEX( mov.l r8,@(24,r4) ) 286*4882a593SmuzhiyunEX( mov.l r9,@(20,r4) ) 287*4882a593Smuzhiyun 288*4882a593SmuzhiyunEX( mov.l @(12,r5),r0 ) 289*4882a593SmuzhiyunEX( mov.l @(8,r5),r8 ) 290*4882a593Smuzhiyun xtrct r0,r10 291*4882a593SmuzhiyunEX( mov.l @(4,r5),r9 ) 292*4882a593Smuzhiyun mov.l r10,@(16,r4) 293*4882a593SmuzhiyunEX( mov.l @r5,r10 ) 294*4882a593Smuzhiyun xtrct r8,r0 295*4882a593Smuzhiyun xtrct r9,r8 296*4882a593Smuzhiyun xtrct r10,r9 297*4882a593SmuzhiyunEX( mov.l r0,@(12,r4) ) 298*4882a593SmuzhiyunEX( mov.l r8,@(8,r4) ) 299*4882a593Smuzhiyun swap.w r10,r0 300*4882a593SmuzhiyunEX( mov.l r9,@(4,r4) ) 301*4882a593SmuzhiyunEX( mov.w r0,@(2,r4) ) 302*4882a593Smuzhiyun 303*4882a593Smuzhiyun add #32,r5 304*4882a593Smuzhiyun bf/s 2b 305*4882a593Smuzhiyun add #34,r4 306*4882a593Smuzhiyun#endif 307*4882a593Smuzhiyun tst r2,r2 308*4882a593Smuzhiyun bt .L_cleanup 309*4882a593Smuzhiyun 310*4882a593Smuzhiyun1: ! Read longword, write two words per iteration 311*4882a593SmuzhiyunEX( mov.l @r5+,r0 ) 312*4882a593Smuzhiyun dt r2 313*4882a593Smuzhiyun#ifdef CONFIG_CPU_LITTLE_ENDIAN 314*4882a593SmuzhiyunEX( mov.w r0,@r4 ) 315*4882a593Smuzhiyun shlr16 r0 316*4882a593SmuzhiyunEX( mov.w r0,@(2,r4) ) 317*4882a593Smuzhiyun#else 318*4882a593SmuzhiyunEX( mov.w r0,@(2,r4) ) 319*4882a593Smuzhiyun shlr16 r0 320*4882a593SmuzhiyunEX( mov.w r0,@r4 ) 321*4882a593Smuzhiyun#endif 322*4882a593Smuzhiyun bf/s 1b 323*4882a593Smuzhiyun add #4,r4 324*4882a593Smuzhiyun 325*4882a593Smuzhiyun bra .L_cleanup 326*4882a593Smuzhiyun nop 327*4882a593Smuzhiyun 328*4882a593Smuzhiyun! Destination = 01 or 11 329*4882a593Smuzhiyun 330*4882a593Smuzhiyun.L_dest01: 331*4882a593Smuzhiyun.L_dest11: 332*4882a593Smuzhiyun ! Read longword, write byte, word, byte per iteration 333*4882a593SmuzhiyunEX( mov.l @r5+,r0 ) 334*4882a593Smuzhiyun dt r2 335*4882a593Smuzhiyun#ifdef CONFIG_CPU_LITTLE_ENDIAN 336*4882a593SmuzhiyunEX( mov.b r0,@r4 ) 337*4882a593Smuzhiyun shlr8 r0 338*4882a593Smuzhiyun add #1,r4 339*4882a593SmuzhiyunEX( mov.w r0,@r4 ) 340*4882a593Smuzhiyun shlr16 r0 341*4882a593SmuzhiyunEX( mov.b r0,@(2,r4) ) 342*4882a593Smuzhiyun bf/s .L_dest01 343*4882a593Smuzhiyun add #3,r4 344*4882a593Smuzhiyun#else 345*4882a593SmuzhiyunEX( mov.b r0,@(3,r4) ) 346*4882a593Smuzhiyun shlr8 r0 347*4882a593Smuzhiyun swap.w r0,r7 348*4882a593SmuzhiyunEX( mov.b r7,@r4 ) 349*4882a593Smuzhiyun add #1,r4 350*4882a593SmuzhiyunEX( mov.w r0,@r4 ) 351*4882a593Smuzhiyun bf/s .L_dest01 352*4882a593Smuzhiyun add #3,r4 353*4882a593Smuzhiyun#endif 354*4882a593Smuzhiyun 355*4882a593Smuzhiyun! Cleanup last few bytes 356*4882a593Smuzhiyun.L_cleanup: 357*4882a593Smuzhiyun mov r6,r0 358*4882a593Smuzhiyun and #3,r0 359*4882a593Smuzhiyun tst r0,r0 360*4882a593Smuzhiyun bt .L_exit 361*4882a593Smuzhiyun mov r0,r6 362*4882a593Smuzhiyun 363*4882a593Smuzhiyun.L_cleanup_loop: 364*4882a593SmuzhiyunEX( mov.b @r5+,r0 ) 365*4882a593Smuzhiyun dt r6 366*4882a593SmuzhiyunEX( mov.b r0,@r4 ) 367*4882a593Smuzhiyun bf/s .L_cleanup_loop 368*4882a593Smuzhiyun add #1,r4 369*4882a593Smuzhiyun 370*4882a593Smuzhiyun.L_exit: 371*4882a593Smuzhiyun mov #0,r0 ! normal return 372*4882a593Smuzhiyun 373*4882a593Smuzhiyun5000: 374*4882a593Smuzhiyun 375*4882a593Smuzhiyun# Exception handler: 376*4882a593Smuzhiyun.section .fixup, "ax" 377*4882a593Smuzhiyun6000: 378*4882a593Smuzhiyun mov.l 8000f,r1 379*4882a593Smuzhiyun mov r3,r0 380*4882a593Smuzhiyun jmp @r1 381*4882a593Smuzhiyun sub r4,r0 382*4882a593Smuzhiyun .align 2 383*4882a593Smuzhiyun8000: .long 5000b 384*4882a593Smuzhiyun 385*4882a593Smuzhiyun.previous 386*4882a593Smuzhiyun mov.l @r15+,r8 387*4882a593Smuzhiyun mov.l @r15+,r9 388*4882a593Smuzhiyun mov.l @r15+,r10 389*4882a593Smuzhiyun rts 390*4882a593Smuzhiyun mov.l @r15+,r11 391