1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-or-later */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * This file contains assembly-language implementations 4*4882a593Smuzhiyun * of IP-style 1's complement checksum routines. 5*4882a593Smuzhiyun * 6*4882a593Smuzhiyun * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) 7*4882a593Smuzhiyun * 8*4882a593Smuzhiyun * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). 9*4882a593Smuzhiyun */ 10*4882a593Smuzhiyun 11*4882a593Smuzhiyun#include <linux/sys.h> 12*4882a593Smuzhiyun#include <asm/processor.h> 13*4882a593Smuzhiyun#include <asm/cache.h> 14*4882a593Smuzhiyun#include <asm/errno.h> 15*4882a593Smuzhiyun#include <asm/ppc_asm.h> 16*4882a593Smuzhiyun#include <asm/export.h> 17*4882a593Smuzhiyun 18*4882a593Smuzhiyun .text 19*4882a593Smuzhiyun 20*4882a593Smuzhiyun/* 21*4882a593Smuzhiyun * computes the checksum of a memory block at buff, length len, 22*4882a593Smuzhiyun * and adds in "sum" (32-bit) 23*4882a593Smuzhiyun * 24*4882a593Smuzhiyun * __csum_partial(buff, len, sum) 25*4882a593Smuzhiyun */ 26*4882a593Smuzhiyun_GLOBAL(__csum_partial) 27*4882a593Smuzhiyun subi r3,r3,4 28*4882a593Smuzhiyun srawi. r6,r4,2 /* Divide len by 4 and also clear carry */ 29*4882a593Smuzhiyun beq 3f /* if we're doing < 4 bytes */ 30*4882a593Smuzhiyun andi. r0,r3,2 /* Align buffer to longword boundary */ 31*4882a593Smuzhiyun beq+ 1f 32*4882a593Smuzhiyun lhz r0,4(r3) /* do 2 bytes to get aligned */ 33*4882a593Smuzhiyun subi r4,r4,2 34*4882a593Smuzhiyun addi r3,r3,2 35*4882a593Smuzhiyun srwi. r6,r4,2 /* # words to do */ 36*4882a593Smuzhiyun adde r5,r5,r0 37*4882a593Smuzhiyun beq 3f 38*4882a593Smuzhiyun1: andi. r6,r6,3 /* Prepare to handle words 4 by 4 */ 39*4882a593Smuzhiyun beq 21f 40*4882a593Smuzhiyun mtctr r6 41*4882a593Smuzhiyun2: lwzu r0,4(r3) 42*4882a593Smuzhiyun adde r5,r5,r0 43*4882a593Smuzhiyun bdnz 2b 44*4882a593Smuzhiyun21: srwi. r6,r4,4 /* # blocks of 4 words to do */ 45*4882a593Smuzhiyun beq 3f 46*4882a593Smuzhiyun lwz r0,4(r3) 47*4882a593Smuzhiyun mtctr r6 48*4882a593Smuzhiyun lwz r6,8(r3) 49*4882a593Smuzhiyun adde r5,r5,r0 50*4882a593Smuzhiyun lwz r7,12(r3) 51*4882a593Smuzhiyun adde r5,r5,r6 52*4882a593Smuzhiyun lwzu r8,16(r3) 53*4882a593Smuzhiyun adde r5,r5,r7 54*4882a593Smuzhiyun bdz 23f 55*4882a593Smuzhiyun22: lwz r0,4(r3) 56*4882a593Smuzhiyun adde r5,r5,r8 57*4882a593Smuzhiyun lwz r6,8(r3) 58*4882a593Smuzhiyun adde r5,r5,r0 59*4882a593Smuzhiyun lwz r7,12(r3) 60*4882a593Smuzhiyun adde r5,r5,r6 61*4882a593Smuzhiyun lwzu r8,16(r3) 62*4882a593Smuzhiyun adde r5,r5,r7 63*4882a593Smuzhiyun bdnz 22b 64*4882a593Smuzhiyun23: adde r5,r5,r8 65*4882a593Smuzhiyun3: andi. r0,r4,2 66*4882a593Smuzhiyun beq+ 4f 67*4882a593Smuzhiyun lhz r0,4(r3) 68*4882a593Smuzhiyun addi r3,r3,2 69*4882a593Smuzhiyun adde r5,r5,r0 70*4882a593Smuzhiyun4: andi. r0,r4,1 71*4882a593Smuzhiyun beq+ 5f 72*4882a593Smuzhiyun lbz r0,4(r3) 73*4882a593Smuzhiyun slwi r0,r0,8 /* Upper byte of word */ 74*4882a593Smuzhiyun adde r5,r5,r0 75*4882a593Smuzhiyun5: addze r3,r5 /* add in final carry */ 76*4882a593Smuzhiyun blr 77*4882a593SmuzhiyunEXPORT_SYMBOL(__csum_partial) 78*4882a593Smuzhiyun 79*4882a593Smuzhiyun/* 80*4882a593Smuzhiyun * Computes the checksum of a memory block at src, length len, 81*4882a593Smuzhiyun * and adds in 0xffffffff, while copying the block to dst. 82*4882a593Smuzhiyun * If an access exception occurs it returns zero. 83*4882a593Smuzhiyun * 84*4882a593Smuzhiyun * csum_partial_copy_generic(src, dst, len) 85*4882a593Smuzhiyun */ 86*4882a593Smuzhiyun#define CSUM_COPY_16_BYTES_WITHEX(n) \ 87*4882a593Smuzhiyun8 ## n ## 0: \ 88*4882a593Smuzhiyun lwz r7,4(r4); \ 89*4882a593Smuzhiyun8 ## n ## 1: \ 90*4882a593Smuzhiyun lwz r8,8(r4); \ 91*4882a593Smuzhiyun8 ## n ## 2: \ 92*4882a593Smuzhiyun lwz r9,12(r4); \ 93*4882a593Smuzhiyun8 ## n ## 3: \ 94*4882a593Smuzhiyun lwzu r10,16(r4); \ 95*4882a593Smuzhiyun8 ## n ## 4: \ 96*4882a593Smuzhiyun stw r7,4(r6); \ 97*4882a593Smuzhiyun adde r12,r12,r7; \ 98*4882a593Smuzhiyun8 ## n ## 5: \ 99*4882a593Smuzhiyun stw r8,8(r6); \ 100*4882a593Smuzhiyun adde r12,r12,r8; \ 101*4882a593Smuzhiyun8 ## n ## 6: \ 102*4882a593Smuzhiyun stw r9,12(r6); \ 103*4882a593Smuzhiyun adde r12,r12,r9; \ 104*4882a593Smuzhiyun8 ## n ## 7: \ 105*4882a593Smuzhiyun stwu r10,16(r6); \ 106*4882a593Smuzhiyun adde r12,r12,r10 107*4882a593Smuzhiyun 108*4882a593Smuzhiyun#define CSUM_COPY_16_BYTES_EXCODE(n) \ 109*4882a593Smuzhiyun EX_TABLE(8 ## n ## 0b, fault); \ 110*4882a593Smuzhiyun EX_TABLE(8 ## n ## 1b, fault); \ 111*4882a593Smuzhiyun EX_TABLE(8 ## n ## 2b, fault); \ 112*4882a593Smuzhiyun EX_TABLE(8 ## n ## 3b, fault); \ 113*4882a593Smuzhiyun EX_TABLE(8 ## n ## 4b, fault); \ 114*4882a593Smuzhiyun EX_TABLE(8 ## n ## 5b, fault); \ 115*4882a593Smuzhiyun EX_TABLE(8 ## n ## 6b, fault); \ 116*4882a593Smuzhiyun EX_TABLE(8 ## n ## 7b, fault); 117*4882a593Smuzhiyun 118*4882a593Smuzhiyun .text 119*4882a593Smuzhiyun .stabs "arch/powerpc/lib/",N_SO,0,0,0f 120*4882a593Smuzhiyun .stabs "checksum_32.S",N_SO,0,0,0f 121*4882a593Smuzhiyun0: 122*4882a593Smuzhiyun 123*4882a593SmuzhiyunCACHELINE_BYTES = L1_CACHE_BYTES 124*4882a593SmuzhiyunLG_CACHELINE_BYTES = L1_CACHE_SHIFT 125*4882a593SmuzhiyunCACHELINE_MASK = (L1_CACHE_BYTES-1) 126*4882a593Smuzhiyun 127*4882a593Smuzhiyun_GLOBAL(csum_partial_copy_generic) 128*4882a593Smuzhiyun li r12,-1 129*4882a593Smuzhiyun addic r0,r0,0 /* clear carry */ 130*4882a593Smuzhiyun addi r6,r4,-4 131*4882a593Smuzhiyun neg r0,r4 132*4882a593Smuzhiyun addi r4,r3,-4 133*4882a593Smuzhiyun andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 134*4882a593Smuzhiyun crset 4*cr7+eq 135*4882a593Smuzhiyun beq 58f 136*4882a593Smuzhiyun 137*4882a593Smuzhiyun cmplw 0,r5,r0 /* is this more than total to do? */ 138*4882a593Smuzhiyun blt 63f /* if not much to do */ 139*4882a593Smuzhiyun rlwinm r7,r6,3,0x8 140*4882a593Smuzhiyun rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */ 141*4882a593Smuzhiyun cmplwi cr7,r7,0 /* is destination address even ? */ 142*4882a593Smuzhiyun andi. r8,r0,3 /* get it word-aligned first */ 143*4882a593Smuzhiyun mtctr r8 144*4882a593Smuzhiyun beq+ 61f 145*4882a593Smuzhiyun li r3,0 146*4882a593Smuzhiyun70: lbz r9,4(r4) /* do some bytes */ 147*4882a593Smuzhiyun addi r4,r4,1 148*4882a593Smuzhiyun slwi r3,r3,8 149*4882a593Smuzhiyun rlwimi r3,r9,0,24,31 150*4882a593Smuzhiyun71: stb r9,4(r6) 151*4882a593Smuzhiyun addi r6,r6,1 152*4882a593Smuzhiyun bdnz 70b 153*4882a593Smuzhiyun adde r12,r12,r3 154*4882a593Smuzhiyun61: subf r5,r0,r5 155*4882a593Smuzhiyun srwi. r0,r0,2 156*4882a593Smuzhiyun mtctr r0 157*4882a593Smuzhiyun beq 58f 158*4882a593Smuzhiyun72: lwzu r9,4(r4) /* do some words */ 159*4882a593Smuzhiyun adde r12,r12,r9 160*4882a593Smuzhiyun73: stwu r9,4(r6) 161*4882a593Smuzhiyun bdnz 72b 162*4882a593Smuzhiyun 163*4882a593Smuzhiyun58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 164*4882a593Smuzhiyun clrlwi r5,r5,32-LG_CACHELINE_BYTES 165*4882a593Smuzhiyun li r11,4 166*4882a593Smuzhiyun beq 63f 167*4882a593Smuzhiyun 168*4882a593Smuzhiyun /* Here we decide how far ahead to prefetch the source */ 169*4882a593Smuzhiyun li r3,4 170*4882a593Smuzhiyun cmpwi r0,1 171*4882a593Smuzhiyun li r7,0 172*4882a593Smuzhiyun ble 114f 173*4882a593Smuzhiyun li r7,1 174*4882a593Smuzhiyun#if MAX_COPY_PREFETCH > 1 175*4882a593Smuzhiyun /* Heuristically, for large transfers we prefetch 176*4882a593Smuzhiyun MAX_COPY_PREFETCH cachelines ahead. For small transfers 177*4882a593Smuzhiyun we prefetch 1 cacheline ahead. */ 178*4882a593Smuzhiyun cmpwi r0,MAX_COPY_PREFETCH 179*4882a593Smuzhiyun ble 112f 180*4882a593Smuzhiyun li r7,MAX_COPY_PREFETCH 181*4882a593Smuzhiyun112: mtctr r7 182*4882a593Smuzhiyun111: dcbt r3,r4 183*4882a593Smuzhiyun addi r3,r3,CACHELINE_BYTES 184*4882a593Smuzhiyun bdnz 111b 185*4882a593Smuzhiyun#else 186*4882a593Smuzhiyun dcbt r3,r4 187*4882a593Smuzhiyun addi r3,r3,CACHELINE_BYTES 188*4882a593Smuzhiyun#endif /* MAX_COPY_PREFETCH > 1 */ 189*4882a593Smuzhiyun 190*4882a593Smuzhiyun114: subf r8,r7,r0 191*4882a593Smuzhiyun mr r0,r7 192*4882a593Smuzhiyun mtctr r8 193*4882a593Smuzhiyun 194*4882a593Smuzhiyun53: dcbt r3,r4 195*4882a593Smuzhiyun54: dcbz r11,r6 196*4882a593Smuzhiyun/* the main body of the cacheline loop */ 197*4882a593Smuzhiyun CSUM_COPY_16_BYTES_WITHEX(0) 198*4882a593Smuzhiyun#if L1_CACHE_BYTES >= 32 199*4882a593Smuzhiyun CSUM_COPY_16_BYTES_WITHEX(1) 200*4882a593Smuzhiyun#if L1_CACHE_BYTES >= 64 201*4882a593Smuzhiyun CSUM_COPY_16_BYTES_WITHEX(2) 202*4882a593Smuzhiyun CSUM_COPY_16_BYTES_WITHEX(3) 203*4882a593Smuzhiyun#if L1_CACHE_BYTES >= 128 204*4882a593Smuzhiyun CSUM_COPY_16_BYTES_WITHEX(4) 205*4882a593Smuzhiyun CSUM_COPY_16_BYTES_WITHEX(5) 206*4882a593Smuzhiyun CSUM_COPY_16_BYTES_WITHEX(6) 207*4882a593Smuzhiyun CSUM_COPY_16_BYTES_WITHEX(7) 208*4882a593Smuzhiyun#endif 209*4882a593Smuzhiyun#endif 210*4882a593Smuzhiyun#endif 211*4882a593Smuzhiyun bdnz 53b 212*4882a593Smuzhiyun cmpwi r0,0 213*4882a593Smuzhiyun li r3,4 214*4882a593Smuzhiyun li r7,0 215*4882a593Smuzhiyun bne 114b 216*4882a593Smuzhiyun 217*4882a593Smuzhiyun63: srwi. r0,r5,2 218*4882a593Smuzhiyun mtctr r0 219*4882a593Smuzhiyun beq 64f 220*4882a593Smuzhiyun30: lwzu r0,4(r4) 221*4882a593Smuzhiyun adde r12,r12,r0 222*4882a593Smuzhiyun31: stwu r0,4(r6) 223*4882a593Smuzhiyun bdnz 30b 224*4882a593Smuzhiyun 225*4882a593Smuzhiyun64: andi. r0,r5,2 226*4882a593Smuzhiyun beq+ 65f 227*4882a593Smuzhiyun40: lhz r0,4(r4) 228*4882a593Smuzhiyun addi r4,r4,2 229*4882a593Smuzhiyun41: sth r0,4(r6) 230*4882a593Smuzhiyun adde r12,r12,r0 231*4882a593Smuzhiyun addi r6,r6,2 232*4882a593Smuzhiyun65: andi. r0,r5,1 233*4882a593Smuzhiyun beq+ 66f 234*4882a593Smuzhiyun50: lbz r0,4(r4) 235*4882a593Smuzhiyun51: stb r0,4(r6) 236*4882a593Smuzhiyun slwi r0,r0,8 237*4882a593Smuzhiyun adde r12,r12,r0 238*4882a593Smuzhiyun66: addze r3,r12 239*4882a593Smuzhiyun beqlr+ cr7 240*4882a593Smuzhiyun rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */ 241*4882a593Smuzhiyun blr 242*4882a593Smuzhiyun 243*4882a593Smuzhiyunfault: 244*4882a593Smuzhiyun li r3,0 245*4882a593Smuzhiyun blr 246*4882a593Smuzhiyun 247*4882a593Smuzhiyun EX_TABLE(70b, fault); 248*4882a593Smuzhiyun EX_TABLE(71b, fault); 249*4882a593Smuzhiyun EX_TABLE(72b, fault); 250*4882a593Smuzhiyun EX_TABLE(73b, fault); 251*4882a593Smuzhiyun EX_TABLE(54b, fault); 252*4882a593Smuzhiyun 253*4882a593Smuzhiyun/* 254*4882a593Smuzhiyun * this stuff handles faults in the cacheline loop and branches to either 255*4882a593Smuzhiyun * fault (if in read part) or fault (if in write part) 256*4882a593Smuzhiyun */ 257*4882a593Smuzhiyun CSUM_COPY_16_BYTES_EXCODE(0) 258*4882a593Smuzhiyun#if L1_CACHE_BYTES >= 32 259*4882a593Smuzhiyun CSUM_COPY_16_BYTES_EXCODE(1) 260*4882a593Smuzhiyun#if L1_CACHE_BYTES >= 64 261*4882a593Smuzhiyun CSUM_COPY_16_BYTES_EXCODE(2) 262*4882a593Smuzhiyun CSUM_COPY_16_BYTES_EXCODE(3) 263*4882a593Smuzhiyun#if L1_CACHE_BYTES >= 128 264*4882a593Smuzhiyun CSUM_COPY_16_BYTES_EXCODE(4) 265*4882a593Smuzhiyun CSUM_COPY_16_BYTES_EXCODE(5) 266*4882a593Smuzhiyun CSUM_COPY_16_BYTES_EXCODE(6) 267*4882a593Smuzhiyun CSUM_COPY_16_BYTES_EXCODE(7) 268*4882a593Smuzhiyun#endif 269*4882a593Smuzhiyun#endif 270*4882a593Smuzhiyun#endif 271*4882a593Smuzhiyun 272*4882a593Smuzhiyun EX_TABLE(30b, fault); 273*4882a593Smuzhiyun EX_TABLE(31b, fault); 274*4882a593Smuzhiyun EX_TABLE(40b, fault); 275*4882a593Smuzhiyun EX_TABLE(41b, fault); 276*4882a593Smuzhiyun EX_TABLE(50b, fault); 277*4882a593Smuzhiyun EX_TABLE(51b, fault); 278*4882a593Smuzhiyun 279*4882a593SmuzhiyunEXPORT_SYMBOL(csum_partial_copy_generic) 280*4882a593Smuzhiyun 281*4882a593Smuzhiyun/* 282*4882a593Smuzhiyun * __sum16 csum_ipv6_magic(const struct in6_addr *saddr, 283*4882a593Smuzhiyun * const struct in6_addr *daddr, 284*4882a593Smuzhiyun * __u32 len, __u8 proto, __wsum sum) 285*4882a593Smuzhiyun */ 286*4882a593Smuzhiyun 287*4882a593Smuzhiyun_GLOBAL(csum_ipv6_magic) 288*4882a593Smuzhiyun lwz r8, 0(r3) 289*4882a593Smuzhiyun lwz r9, 4(r3) 290*4882a593Smuzhiyun addc r0, r7, r8 291*4882a593Smuzhiyun lwz r10, 8(r3) 292*4882a593Smuzhiyun adde r0, r0, r9 293*4882a593Smuzhiyun lwz r11, 12(r3) 294*4882a593Smuzhiyun adde r0, r0, r10 295*4882a593Smuzhiyun lwz r8, 0(r4) 296*4882a593Smuzhiyun adde r0, r0, r11 297*4882a593Smuzhiyun lwz r9, 4(r4) 298*4882a593Smuzhiyun adde r0, r0, r8 299*4882a593Smuzhiyun lwz r10, 8(r4) 300*4882a593Smuzhiyun adde r0, r0, r9 301*4882a593Smuzhiyun lwz r11, 12(r4) 302*4882a593Smuzhiyun adde r0, r0, r10 303*4882a593Smuzhiyun add r5, r5, r6 /* assumption: len + proto doesn't carry */ 304*4882a593Smuzhiyun adde r0, r0, r11 305*4882a593Smuzhiyun adde r0, r0, r5 306*4882a593Smuzhiyun addze r0, r0 307*4882a593Smuzhiyun rotlwi r3, r0, 16 308*4882a593Smuzhiyun add r3, r0, r3 309*4882a593Smuzhiyun not r3, r3 310*4882a593Smuzhiyun rlwinm r3, r3, 16, 16, 31 311*4882a593Smuzhiyun blr 312*4882a593SmuzhiyunEXPORT_SYMBOL(csum_ipv6_magic) 313