1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. 3*4882a593Smuzhiyun 4*4882a593Smuzhiyun#include <linux/linkage.h> 5*4882a593Smuzhiyun 6*4882a593Smuzhiyun.macro GET_FRONT_BITS rx y 7*4882a593Smuzhiyun#ifdef __cskyLE__ 8*4882a593Smuzhiyun lsri \rx, \y 9*4882a593Smuzhiyun#else 10*4882a593Smuzhiyun lsli \rx, \y 11*4882a593Smuzhiyun#endif 12*4882a593Smuzhiyun.endm 13*4882a593Smuzhiyun 14*4882a593Smuzhiyun.macro GET_AFTER_BITS rx y 15*4882a593Smuzhiyun#ifdef __cskyLE__ 16*4882a593Smuzhiyun lsli \rx, \y 17*4882a593Smuzhiyun#else 18*4882a593Smuzhiyun lsri \rx, \y 19*4882a593Smuzhiyun#endif 20*4882a593Smuzhiyun.endm 21*4882a593Smuzhiyun 22*4882a593Smuzhiyun/* void *memcpy(void *dest, const void *src, size_t n); */ 23*4882a593SmuzhiyunENTRY(memcpy) 24*4882a593Smuzhiyun mov r7, r2 25*4882a593Smuzhiyun cmplti r4, 4 26*4882a593Smuzhiyun bt .L_copy_by_byte 27*4882a593Smuzhiyun mov r6, r2 28*4882a593Smuzhiyun andi r6, 3 29*4882a593Smuzhiyun cmpnei r6, 0 30*4882a593Smuzhiyun jbt .L_dest_not_aligned 31*4882a593Smuzhiyun mov r6, r3 32*4882a593Smuzhiyun andi r6, 3 33*4882a593Smuzhiyun cmpnei r6, 0 34*4882a593Smuzhiyun jbt .L_dest_aligned_but_src_not_aligned 35*4882a593Smuzhiyun.L0: 36*4882a593Smuzhiyun cmplti r4, 16 37*4882a593Smuzhiyun jbt .L_aligned_and_len_less_16bytes 38*4882a593Smuzhiyun subi sp, 8 39*4882a593Smuzhiyun stw r8, (sp, 0) 40*4882a593Smuzhiyun.L_aligned_and_len_larger_16bytes: 41*4882a593Smuzhiyun ldw r1, (r3, 0) 42*4882a593Smuzhiyun ldw r5, (r3, 4) 43*4882a593Smuzhiyun ldw r8, (r3, 8) 44*4882a593Smuzhiyun stw r1, (r7, 0) 45*4882a593Smuzhiyun ldw r1, (r3, 12) 46*4882a593Smuzhiyun stw r5, (r7, 4) 47*4882a593Smuzhiyun stw r8, (r7, 8) 48*4882a593Smuzhiyun stw r1, (r7, 12) 49*4882a593Smuzhiyun subi r4, 16 50*4882a593Smuzhiyun addi r3, 16 51*4882a593Smuzhiyun addi r7, 16 52*4882a593Smuzhiyun cmplti r4, 16 53*4882a593Smuzhiyun jbf .L_aligned_and_len_larger_16bytes 54*4882a593Smuzhiyun ldw r8, (sp, 0) 55*4882a593Smuzhiyun addi sp, 8 56*4882a593Smuzhiyun cmpnei r4, 0 57*4882a593Smuzhiyun jbf .L_return 58*4882a593Smuzhiyun 59*4882a593Smuzhiyun.L_aligned_and_len_less_16bytes: 60*4882a593Smuzhiyun cmplti r4, 4 61*4882a593Smuzhiyun bt .L_copy_by_byte 62*4882a593Smuzhiyun.L1: 63*4882a593Smuzhiyun ldw r1, (r3, 0) 64*4882a593Smuzhiyun stw r1, (r7, 0) 65*4882a593Smuzhiyun subi r4, 4 66*4882a593Smuzhiyun addi r3, 4 67*4882a593Smuzhiyun addi r7, 4 68*4882a593Smuzhiyun cmplti r4, 4 69*4882a593Smuzhiyun jbf .L1 70*4882a593Smuzhiyun br .L_copy_by_byte 71*4882a593Smuzhiyun 72*4882a593Smuzhiyun.L_return: 73*4882a593Smuzhiyun rts 74*4882a593Smuzhiyun 75*4882a593Smuzhiyun.L_copy_by_byte: /* len less than 4 bytes */ 76*4882a593Smuzhiyun cmpnei r4, 0 77*4882a593Smuzhiyun jbf .L_return 78*4882a593Smuzhiyun.L4: 79*4882a593Smuzhiyun ldb r1, (r3, 0) 80*4882a593Smuzhiyun stb r1, (r7, 0) 81*4882a593Smuzhiyun addi r3, 1 82*4882a593Smuzhiyun addi r7, 1 83*4882a593Smuzhiyun decne r4 84*4882a593Smuzhiyun jbt .L4 85*4882a593Smuzhiyun rts 86*4882a593Smuzhiyun 87*4882a593Smuzhiyun/* 88*4882a593Smuzhiyun * If dest is not aligned, just copying some bytes makes the dest align. 89*4882a593Smuzhiyun * Afther that, we judge whether the src is aligned. 90*4882a593Smuzhiyun */ 91*4882a593Smuzhiyun.L_dest_not_aligned: 92*4882a593Smuzhiyun mov r5, r3 93*4882a593Smuzhiyun rsub r5, r5, r7 94*4882a593Smuzhiyun abs r5, r5 95*4882a593Smuzhiyun cmplt r5, r4 96*4882a593Smuzhiyun bt .L_copy_by_byte 97*4882a593Smuzhiyun mov r5, r7 98*4882a593Smuzhiyun sub r5, r3 99*4882a593Smuzhiyun cmphs r5, r4 100*4882a593Smuzhiyun bf .L_copy_by_byte 101*4882a593Smuzhiyun mov r5, r6 102*4882a593Smuzhiyun.L5: 103*4882a593Smuzhiyun ldb r1, (r3, 0) /* makes the dest align. */ 104*4882a593Smuzhiyun stb r1, (r7, 0) 105*4882a593Smuzhiyun addi r5, 1 106*4882a593Smuzhiyun subi r4, 1 107*4882a593Smuzhiyun addi r3, 1 108*4882a593Smuzhiyun addi r7, 1 109*4882a593Smuzhiyun cmpnei r5, 4 110*4882a593Smuzhiyun jbt .L5 111*4882a593Smuzhiyun cmplti r4, 4 112*4882a593Smuzhiyun jbt .L_copy_by_byte 113*4882a593Smuzhiyun mov r6, r3 /* judge whether the src is aligned. */ 114*4882a593Smuzhiyun andi r6, 3 115*4882a593Smuzhiyun cmpnei r6, 0 116*4882a593Smuzhiyun jbf .L0 117*4882a593Smuzhiyun 118*4882a593Smuzhiyun/* Judge the number of misaligned, 1, 2, 3? */ 119*4882a593Smuzhiyun.L_dest_aligned_but_src_not_aligned: 120*4882a593Smuzhiyun mov r5, r3 121*4882a593Smuzhiyun rsub r5, r5, r7 122*4882a593Smuzhiyun abs r5, r5 123*4882a593Smuzhiyun cmplt r5, r4 124*4882a593Smuzhiyun bt .L_copy_by_byte 125*4882a593Smuzhiyun bclri r3, 0 126*4882a593Smuzhiyun bclri r3, 1 127*4882a593Smuzhiyun ldw r1, (r3, 0) 128*4882a593Smuzhiyun addi r3, 4 129*4882a593Smuzhiyun cmpnei r6, 2 130*4882a593Smuzhiyun bf .L_dest_aligned_but_src_not_aligned_2bytes 131*4882a593Smuzhiyun cmpnei r6, 3 132*4882a593Smuzhiyun bf .L_dest_aligned_but_src_not_aligned_3bytes 133*4882a593Smuzhiyun 134*4882a593Smuzhiyun.L_dest_aligned_but_src_not_aligned_1byte: 135*4882a593Smuzhiyun mov r5, r7 136*4882a593Smuzhiyun sub r5, r3 137*4882a593Smuzhiyun cmphs r5, r4 138*4882a593Smuzhiyun bf .L_copy_by_byte 139*4882a593Smuzhiyun cmplti r4, 16 140*4882a593Smuzhiyun bf .L11 141*4882a593Smuzhiyun.L10: /* If the len is less than 16 bytes */ 142*4882a593Smuzhiyun GET_FRONT_BITS r1 8 143*4882a593Smuzhiyun mov r5, r1 144*4882a593Smuzhiyun ldw r6, (r3, 0) 145*4882a593Smuzhiyun mov r1, r6 146*4882a593Smuzhiyun GET_AFTER_BITS r6 24 147*4882a593Smuzhiyun or r5, r6 148*4882a593Smuzhiyun stw r5, (r7, 0) 149*4882a593Smuzhiyun subi r4, 4 150*4882a593Smuzhiyun addi r3, 4 151*4882a593Smuzhiyun addi r7, 4 152*4882a593Smuzhiyun cmplti r4, 4 153*4882a593Smuzhiyun bf .L10 154*4882a593Smuzhiyun subi r3, 3 155*4882a593Smuzhiyun br .L_copy_by_byte 156*4882a593Smuzhiyun.L11: 157*4882a593Smuzhiyun subi sp, 16 158*4882a593Smuzhiyun stw r8, (sp, 0) 159*4882a593Smuzhiyun stw r9, (sp, 4) 160*4882a593Smuzhiyun stw r10, (sp, 8) 161*4882a593Smuzhiyun stw r11, (sp, 12) 162*4882a593Smuzhiyun.L12: 163*4882a593Smuzhiyun ldw r5, (r3, 0) 164*4882a593Smuzhiyun ldw r11, (r3, 4) 165*4882a593Smuzhiyun ldw r8, (r3, 8) 166*4882a593Smuzhiyun ldw r9, (r3, 12) 167*4882a593Smuzhiyun 168*4882a593Smuzhiyun GET_FRONT_BITS r1 8 /* little or big endian? */ 169*4882a593Smuzhiyun mov r10, r5 170*4882a593Smuzhiyun GET_AFTER_BITS r5 24 171*4882a593Smuzhiyun or r5, r1 172*4882a593Smuzhiyun 173*4882a593Smuzhiyun GET_FRONT_BITS r10 8 174*4882a593Smuzhiyun mov r1, r11 175*4882a593Smuzhiyun GET_AFTER_BITS r11 24 176*4882a593Smuzhiyun or r11, r10 177*4882a593Smuzhiyun 178*4882a593Smuzhiyun GET_FRONT_BITS r1 8 179*4882a593Smuzhiyun mov r10, r8 180*4882a593Smuzhiyun GET_AFTER_BITS r8 24 181*4882a593Smuzhiyun or r8, r1 182*4882a593Smuzhiyun 183*4882a593Smuzhiyun GET_FRONT_BITS r10 8 184*4882a593Smuzhiyun mov r1, r9 185*4882a593Smuzhiyun GET_AFTER_BITS r9 24 186*4882a593Smuzhiyun or r9, r10 187*4882a593Smuzhiyun 188*4882a593Smuzhiyun stw r5, (r7, 0) 189*4882a593Smuzhiyun stw r11, (r7, 4) 190*4882a593Smuzhiyun stw r8, (r7, 8) 191*4882a593Smuzhiyun stw r9, (r7, 12) 192*4882a593Smuzhiyun subi r4, 16 193*4882a593Smuzhiyun addi r3, 16 194*4882a593Smuzhiyun addi r7, 16 195*4882a593Smuzhiyun cmplti r4, 16 196*4882a593Smuzhiyun jbf .L12 197*4882a593Smuzhiyun ldw r8, (sp, 0) 198*4882a593Smuzhiyun ldw r9, (sp, 4) 199*4882a593Smuzhiyun ldw r10, (sp, 8) 200*4882a593Smuzhiyun ldw r11, (sp, 12) 201*4882a593Smuzhiyun addi sp , 16 202*4882a593Smuzhiyun cmplti r4, 4 203*4882a593Smuzhiyun bf .L10 204*4882a593Smuzhiyun subi r3, 3 205*4882a593Smuzhiyun br .L_copy_by_byte 206*4882a593Smuzhiyun 207*4882a593Smuzhiyun.L_dest_aligned_but_src_not_aligned_2bytes: 208*4882a593Smuzhiyun cmplti r4, 16 209*4882a593Smuzhiyun bf .L21 210*4882a593Smuzhiyun.L20: 211*4882a593Smuzhiyun GET_FRONT_BITS r1 16 212*4882a593Smuzhiyun mov r5, r1 213*4882a593Smuzhiyun ldw r6, (r3, 0) 214*4882a593Smuzhiyun mov r1, r6 215*4882a593Smuzhiyun GET_AFTER_BITS r6 16 216*4882a593Smuzhiyun or r5, r6 217*4882a593Smuzhiyun stw r5, (r7, 0) 218*4882a593Smuzhiyun subi r4, 4 219*4882a593Smuzhiyun addi r3, 4 220*4882a593Smuzhiyun addi r7, 4 221*4882a593Smuzhiyun cmplti r4, 4 222*4882a593Smuzhiyun bf .L20 223*4882a593Smuzhiyun subi r3, 2 224*4882a593Smuzhiyun br .L_copy_by_byte 225*4882a593Smuzhiyun rts 226*4882a593Smuzhiyun 227*4882a593Smuzhiyun.L21: /* n > 16 */ 228*4882a593Smuzhiyun subi sp, 16 229*4882a593Smuzhiyun stw r8, (sp, 0) 230*4882a593Smuzhiyun stw r9, (sp, 4) 231*4882a593Smuzhiyun stw r10, (sp, 8) 232*4882a593Smuzhiyun stw r11, (sp, 12) 233*4882a593Smuzhiyun 234*4882a593Smuzhiyun.L22: 235*4882a593Smuzhiyun ldw r5, (r3, 0) 236*4882a593Smuzhiyun ldw r11, (r3, 4) 237*4882a593Smuzhiyun ldw r8, (r3, 8) 238*4882a593Smuzhiyun ldw r9, (r3, 12) 239*4882a593Smuzhiyun 240*4882a593Smuzhiyun GET_FRONT_BITS r1 16 241*4882a593Smuzhiyun mov r10, r5 242*4882a593Smuzhiyun GET_AFTER_BITS r5 16 243*4882a593Smuzhiyun or r5, r1 244*4882a593Smuzhiyun 245*4882a593Smuzhiyun GET_FRONT_BITS r10 16 246*4882a593Smuzhiyun mov r1, r11 247*4882a593Smuzhiyun GET_AFTER_BITS r11 16 248*4882a593Smuzhiyun or r11, r10 249*4882a593Smuzhiyun 250*4882a593Smuzhiyun GET_FRONT_BITS r1 16 251*4882a593Smuzhiyun mov r10, r8 252*4882a593Smuzhiyun GET_AFTER_BITS r8 16 253*4882a593Smuzhiyun or r8, r1 254*4882a593Smuzhiyun 255*4882a593Smuzhiyun GET_FRONT_BITS r10 16 256*4882a593Smuzhiyun mov r1, r9 257*4882a593Smuzhiyun GET_AFTER_BITS r9 16 258*4882a593Smuzhiyun or r9, r10 259*4882a593Smuzhiyun 260*4882a593Smuzhiyun stw r5, (r7, 0) 261*4882a593Smuzhiyun stw r11, (r7, 4) 262*4882a593Smuzhiyun stw r8, (r7, 8) 263*4882a593Smuzhiyun stw r9, (r7, 12) 264*4882a593Smuzhiyun subi r4, 16 265*4882a593Smuzhiyun addi r3, 16 266*4882a593Smuzhiyun addi r7, 16 267*4882a593Smuzhiyun cmplti r4, 16 268*4882a593Smuzhiyun jbf .L22 269*4882a593Smuzhiyun ldw r8, (sp, 0) 270*4882a593Smuzhiyun ldw r9, (sp, 4) 271*4882a593Smuzhiyun ldw r10, (sp, 8) 272*4882a593Smuzhiyun ldw r11, (sp, 12) 273*4882a593Smuzhiyun addi sp, 16 274*4882a593Smuzhiyun cmplti r4, 4 275*4882a593Smuzhiyun bf .L20 276*4882a593Smuzhiyun subi r3, 2 277*4882a593Smuzhiyun br .L_copy_by_byte 278*4882a593Smuzhiyun 279*4882a593Smuzhiyun 280*4882a593Smuzhiyun.L_dest_aligned_but_src_not_aligned_3bytes: 281*4882a593Smuzhiyun cmplti r4, 16 282*4882a593Smuzhiyun bf .L31 283*4882a593Smuzhiyun.L30: 284*4882a593Smuzhiyun GET_FRONT_BITS r1 24 285*4882a593Smuzhiyun mov r5, r1 286*4882a593Smuzhiyun ldw r6, (r3, 0) 287*4882a593Smuzhiyun mov r1, r6 288*4882a593Smuzhiyun GET_AFTER_BITS r6 8 289*4882a593Smuzhiyun or r5, r6 290*4882a593Smuzhiyun stw r5, (r7, 0) 291*4882a593Smuzhiyun subi r4, 4 292*4882a593Smuzhiyun addi r3, 4 293*4882a593Smuzhiyun addi r7, 4 294*4882a593Smuzhiyun cmplti r4, 4 295*4882a593Smuzhiyun bf .L30 296*4882a593Smuzhiyun subi r3, 1 297*4882a593Smuzhiyun br .L_copy_by_byte 298*4882a593Smuzhiyun.L31: 299*4882a593Smuzhiyun subi sp, 16 300*4882a593Smuzhiyun stw r8, (sp, 0) 301*4882a593Smuzhiyun stw r9, (sp, 4) 302*4882a593Smuzhiyun stw r10, (sp, 8) 303*4882a593Smuzhiyun stw r11, (sp, 12) 304*4882a593Smuzhiyun.L32: 305*4882a593Smuzhiyun ldw r5, (r3, 0) 306*4882a593Smuzhiyun ldw r11, (r3, 4) 307*4882a593Smuzhiyun ldw r8, (r3, 8) 308*4882a593Smuzhiyun ldw r9, (r3, 12) 309*4882a593Smuzhiyun 310*4882a593Smuzhiyun GET_FRONT_BITS r1 24 311*4882a593Smuzhiyun mov r10, r5 312*4882a593Smuzhiyun GET_AFTER_BITS r5 8 313*4882a593Smuzhiyun or r5, r1 314*4882a593Smuzhiyun 315*4882a593Smuzhiyun GET_FRONT_BITS r10 24 316*4882a593Smuzhiyun mov r1, r11 317*4882a593Smuzhiyun GET_AFTER_BITS r11 8 318*4882a593Smuzhiyun or r11, r10 319*4882a593Smuzhiyun 320*4882a593Smuzhiyun GET_FRONT_BITS r1 24 321*4882a593Smuzhiyun mov r10, r8 322*4882a593Smuzhiyun GET_AFTER_BITS r8 8 323*4882a593Smuzhiyun or r8, r1 324*4882a593Smuzhiyun 325*4882a593Smuzhiyun GET_FRONT_BITS r10 24 326*4882a593Smuzhiyun mov r1, r9 327*4882a593Smuzhiyun GET_AFTER_BITS r9 8 328*4882a593Smuzhiyun or r9, r10 329*4882a593Smuzhiyun 330*4882a593Smuzhiyun stw r5, (r7, 0) 331*4882a593Smuzhiyun stw r11, (r7, 4) 332*4882a593Smuzhiyun stw r8, (r7, 8) 333*4882a593Smuzhiyun stw r9, (r7, 12) 334*4882a593Smuzhiyun subi r4, 16 335*4882a593Smuzhiyun addi r3, 16 336*4882a593Smuzhiyun addi r7, 16 337*4882a593Smuzhiyun cmplti r4, 16 338*4882a593Smuzhiyun jbf .L32 339*4882a593Smuzhiyun ldw r8, (sp, 0) 340*4882a593Smuzhiyun ldw r9, (sp, 4) 341*4882a593Smuzhiyun ldw r10, (sp, 8) 342*4882a593Smuzhiyun ldw r11, (sp, 12) 343*4882a593Smuzhiyun addi sp, 16 344*4882a593Smuzhiyun cmplti r4, 4 345*4882a593Smuzhiyun bf .L30 346*4882a593Smuzhiyun subi r3, 1 347*4882a593Smuzhiyun br .L_copy_by_byte 348