1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-or-later */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * This file contains assembly-language implementations 4*4882a593Smuzhiyun * of IP-style 1's complement checksum routines. 5*4882a593Smuzhiyun * 6*4882a593Smuzhiyun * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) 7*4882a593Smuzhiyun * 8*4882a593Smuzhiyun * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). 9*4882a593Smuzhiyun */ 10*4882a593Smuzhiyun 11*4882a593Smuzhiyun#include <linux/sys.h> 12*4882a593Smuzhiyun#include <asm/processor.h> 13*4882a593Smuzhiyun#include <asm/errno.h> 14*4882a593Smuzhiyun#include <asm/ppc_asm.h> 15*4882a593Smuzhiyun#include <asm/export.h> 16*4882a593Smuzhiyun 17*4882a593Smuzhiyun/* 18*4882a593Smuzhiyun * Computes the checksum of a memory block at buff, length len, 19*4882a593Smuzhiyun * and adds in "sum" (32-bit). 20*4882a593Smuzhiyun * 21*4882a593Smuzhiyun * __csum_partial(r3=buff, r4=len, r5=sum) 22*4882a593Smuzhiyun */ 23*4882a593Smuzhiyun_GLOBAL(__csum_partial) 24*4882a593Smuzhiyun addic r0,r5,0 /* clear carry */ 25*4882a593Smuzhiyun 26*4882a593Smuzhiyun srdi. r6,r4,3 /* less than 8 bytes? */ 27*4882a593Smuzhiyun beq .Lcsum_tail_word 28*4882a593Smuzhiyun 29*4882a593Smuzhiyun /* 30*4882a593Smuzhiyun * If only halfword aligned, align to a double word. Since odd 31*4882a593Smuzhiyun * aligned addresses should be rare and they would require more 32*4882a593Smuzhiyun * work to calculate the correct checksum, we ignore that case 33*4882a593Smuzhiyun * and take the potential slowdown of unaligned loads. 34*4882a593Smuzhiyun */ 35*4882a593Smuzhiyun rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */ 36*4882a593Smuzhiyun beq .Lcsum_aligned 37*4882a593Smuzhiyun 38*4882a593Smuzhiyun li r7,4 39*4882a593Smuzhiyun sub r6,r7,r6 40*4882a593Smuzhiyun mtctr r6 41*4882a593Smuzhiyun 42*4882a593Smuzhiyun1: 43*4882a593Smuzhiyun lhz r6,0(r3) /* align to doubleword */ 44*4882a593Smuzhiyun subi r4,r4,2 45*4882a593Smuzhiyun addi r3,r3,2 46*4882a593Smuzhiyun adde r0,r0,r6 47*4882a593Smuzhiyun bdnz 1b 48*4882a593Smuzhiyun 49*4882a593Smuzhiyun.Lcsum_aligned: 50*4882a593Smuzhiyun /* 51*4882a593Smuzhiyun * We unroll the loop such that each iteration is 64 bytes with an 52*4882a593Smuzhiyun * entry and exit limb of 64 bytes, meaning a minimum size of 53*4882a593Smuzhiyun * 128 bytes. 54*4882a593Smuzhiyun */ 55*4882a593Smuzhiyun srdi. r6,r4,7 56*4882a593Smuzhiyun beq .Lcsum_tail_doublewords /* len < 128 */ 57*4882a593Smuzhiyun 58*4882a593Smuzhiyun srdi r6,r4,6 59*4882a593Smuzhiyun subi r6,r6,1 60*4882a593Smuzhiyun mtctr r6 61*4882a593Smuzhiyun 62*4882a593Smuzhiyun stdu r1,-STACKFRAMESIZE(r1) 63*4882a593Smuzhiyun std r14,STK_REG(R14)(r1) 64*4882a593Smuzhiyun std r15,STK_REG(R15)(r1) 65*4882a593Smuzhiyun std r16,STK_REG(R16)(r1) 66*4882a593Smuzhiyun 67*4882a593Smuzhiyun ld r6,0(r3) 68*4882a593Smuzhiyun ld r9,8(r3) 69*4882a593Smuzhiyun 70*4882a593Smuzhiyun ld r10,16(r3) 71*4882a593Smuzhiyun ld r11,24(r3) 72*4882a593Smuzhiyun 73*4882a593Smuzhiyun /* 74*4882a593Smuzhiyun * On POWER6 and POWER7 back to back adde instructions take 2 cycles 75*4882a593Smuzhiyun * because of the XER dependency. This means the fastest this loop can 76*4882a593Smuzhiyun * go is 16 cycles per iteration. The scheduling of the loop below has 77*4882a593Smuzhiyun * been shown to hit this on both POWER6 and POWER7. 78*4882a593Smuzhiyun */ 79*4882a593Smuzhiyun .align 5 80*4882a593Smuzhiyun2: 81*4882a593Smuzhiyun adde r0,r0,r6 82*4882a593Smuzhiyun ld r12,32(r3) 83*4882a593Smuzhiyun ld r14,40(r3) 84*4882a593Smuzhiyun 85*4882a593Smuzhiyun adde r0,r0,r9 86*4882a593Smuzhiyun ld r15,48(r3) 87*4882a593Smuzhiyun ld r16,56(r3) 88*4882a593Smuzhiyun addi r3,r3,64 89*4882a593Smuzhiyun 90*4882a593Smuzhiyun adde r0,r0,r10 91*4882a593Smuzhiyun 92*4882a593Smuzhiyun adde r0,r0,r11 93*4882a593Smuzhiyun 94*4882a593Smuzhiyun adde r0,r0,r12 95*4882a593Smuzhiyun 96*4882a593Smuzhiyun adde r0,r0,r14 97*4882a593Smuzhiyun 98*4882a593Smuzhiyun adde r0,r0,r15 99*4882a593Smuzhiyun ld r6,0(r3) 100*4882a593Smuzhiyun ld r9,8(r3) 101*4882a593Smuzhiyun 102*4882a593Smuzhiyun adde r0,r0,r16 103*4882a593Smuzhiyun ld r10,16(r3) 104*4882a593Smuzhiyun ld r11,24(r3) 105*4882a593Smuzhiyun bdnz 2b 106*4882a593Smuzhiyun 107*4882a593Smuzhiyun 108*4882a593Smuzhiyun adde r0,r0,r6 109*4882a593Smuzhiyun ld r12,32(r3) 110*4882a593Smuzhiyun ld r14,40(r3) 111*4882a593Smuzhiyun 112*4882a593Smuzhiyun adde r0,r0,r9 113*4882a593Smuzhiyun ld r15,48(r3) 114*4882a593Smuzhiyun ld r16,56(r3) 115*4882a593Smuzhiyun addi r3,r3,64 116*4882a593Smuzhiyun 117*4882a593Smuzhiyun adde r0,r0,r10 118*4882a593Smuzhiyun adde r0,r0,r11 119*4882a593Smuzhiyun adde r0,r0,r12 120*4882a593Smuzhiyun adde r0,r0,r14 121*4882a593Smuzhiyun adde r0,r0,r15 122*4882a593Smuzhiyun adde r0,r0,r16 123*4882a593Smuzhiyun 124*4882a593Smuzhiyun ld r14,STK_REG(R14)(r1) 125*4882a593Smuzhiyun ld r15,STK_REG(R15)(r1) 126*4882a593Smuzhiyun ld r16,STK_REG(R16)(r1) 127*4882a593Smuzhiyun addi r1,r1,STACKFRAMESIZE 128*4882a593Smuzhiyun 129*4882a593Smuzhiyun andi. r4,r4,63 130*4882a593Smuzhiyun 131*4882a593Smuzhiyun.Lcsum_tail_doublewords: /* Up to 127 bytes to go */ 132*4882a593Smuzhiyun srdi. r6,r4,3 133*4882a593Smuzhiyun beq .Lcsum_tail_word 134*4882a593Smuzhiyun 135*4882a593Smuzhiyun mtctr r6 136*4882a593Smuzhiyun3: 137*4882a593Smuzhiyun ld r6,0(r3) 138*4882a593Smuzhiyun addi r3,r3,8 139*4882a593Smuzhiyun adde r0,r0,r6 140*4882a593Smuzhiyun bdnz 3b 141*4882a593Smuzhiyun 142*4882a593Smuzhiyun andi. r4,r4,7 143*4882a593Smuzhiyun 144*4882a593Smuzhiyun.Lcsum_tail_word: /* Up to 7 bytes to go */ 145*4882a593Smuzhiyun srdi. r6,r4,2 146*4882a593Smuzhiyun beq .Lcsum_tail_halfword 147*4882a593Smuzhiyun 148*4882a593Smuzhiyun lwz r6,0(r3) 149*4882a593Smuzhiyun addi r3,r3,4 150*4882a593Smuzhiyun adde r0,r0,r6 151*4882a593Smuzhiyun subi r4,r4,4 152*4882a593Smuzhiyun 153*4882a593Smuzhiyun.Lcsum_tail_halfword: /* Up to 3 bytes to go */ 154*4882a593Smuzhiyun srdi. r6,r4,1 155*4882a593Smuzhiyun beq .Lcsum_tail_byte 156*4882a593Smuzhiyun 157*4882a593Smuzhiyun lhz r6,0(r3) 158*4882a593Smuzhiyun addi r3,r3,2 159*4882a593Smuzhiyun adde r0,r0,r6 160*4882a593Smuzhiyun subi r4,r4,2 161*4882a593Smuzhiyun 162*4882a593Smuzhiyun.Lcsum_tail_byte: /* Up to 1 byte to go */ 163*4882a593Smuzhiyun andi. r6,r4,1 164*4882a593Smuzhiyun beq .Lcsum_finish 165*4882a593Smuzhiyun 166*4882a593Smuzhiyun lbz r6,0(r3) 167*4882a593Smuzhiyun#ifdef __BIG_ENDIAN__ 168*4882a593Smuzhiyun sldi r9,r6,8 /* Pad the byte out to 16 bits */ 169*4882a593Smuzhiyun adde r0,r0,r9 170*4882a593Smuzhiyun#else 171*4882a593Smuzhiyun adde r0,r0,r6 172*4882a593Smuzhiyun#endif 173*4882a593Smuzhiyun 174*4882a593Smuzhiyun.Lcsum_finish: 175*4882a593Smuzhiyun addze r0,r0 /* add in final carry */ 176*4882a593Smuzhiyun rldicl r4,r0,32,0 /* fold two 32 bit halves together */ 177*4882a593Smuzhiyun add r3,r4,r0 178*4882a593Smuzhiyun srdi r3,r3,32 179*4882a593Smuzhiyun blr 180*4882a593SmuzhiyunEXPORT_SYMBOL(__csum_partial) 181*4882a593Smuzhiyun 182*4882a593Smuzhiyun 183*4882a593Smuzhiyun .macro srcnr 184*4882a593Smuzhiyun100: 185*4882a593Smuzhiyun EX_TABLE(100b,.Lerror_nr) 186*4882a593Smuzhiyun .endm 187*4882a593Smuzhiyun 188*4882a593Smuzhiyun .macro source 189*4882a593Smuzhiyun150: 190*4882a593Smuzhiyun EX_TABLE(150b,.Lerror) 191*4882a593Smuzhiyun .endm 192*4882a593Smuzhiyun 193*4882a593Smuzhiyun .macro dstnr 194*4882a593Smuzhiyun200: 195*4882a593Smuzhiyun EX_TABLE(200b,.Lerror_nr) 196*4882a593Smuzhiyun .endm 197*4882a593Smuzhiyun 198*4882a593Smuzhiyun .macro dest 199*4882a593Smuzhiyun250: 200*4882a593Smuzhiyun EX_TABLE(250b,.Lerror) 201*4882a593Smuzhiyun .endm 202*4882a593Smuzhiyun 203*4882a593Smuzhiyun/* 204*4882a593Smuzhiyun * Computes the checksum of a memory block at src, length len, 205*4882a593Smuzhiyun * and adds in 0xffffffff (32-bit), while copying the block to dst. 206*4882a593Smuzhiyun * If an access exception occurs, it returns 0. 207*4882a593Smuzhiyun * 208*4882a593Smuzhiyun * csum_partial_copy_generic(r3=src, r4=dst, r5=len) 209*4882a593Smuzhiyun */ 210*4882a593Smuzhiyun_GLOBAL(csum_partial_copy_generic) 211*4882a593Smuzhiyun li r6,-1 212*4882a593Smuzhiyun addic r0,r6,0 /* clear carry */ 213*4882a593Smuzhiyun 214*4882a593Smuzhiyun srdi. r6,r5,3 /* less than 8 bytes? */ 215*4882a593Smuzhiyun beq .Lcopy_tail_word 216*4882a593Smuzhiyun 217*4882a593Smuzhiyun /* 218*4882a593Smuzhiyun * If only halfword aligned, align to a double word. Since odd 219*4882a593Smuzhiyun * aligned addresses should be rare and they would require more 220*4882a593Smuzhiyun * work to calculate the correct checksum, we ignore that case 221*4882a593Smuzhiyun * and take the potential slowdown of unaligned loads. 222*4882a593Smuzhiyun * 223*4882a593Smuzhiyun * If the source and destination are relatively unaligned we only 224*4882a593Smuzhiyun * align the source. This keeps things simple. 225*4882a593Smuzhiyun */ 226*4882a593Smuzhiyun rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */ 227*4882a593Smuzhiyun beq .Lcopy_aligned 228*4882a593Smuzhiyun 229*4882a593Smuzhiyun li r9,4 230*4882a593Smuzhiyun sub r6,r9,r6 231*4882a593Smuzhiyun mtctr r6 232*4882a593Smuzhiyun 233*4882a593Smuzhiyun1: 234*4882a593Smuzhiyunsrcnr; lhz r6,0(r3) /* align to doubleword */ 235*4882a593Smuzhiyun subi r5,r5,2 236*4882a593Smuzhiyun addi r3,r3,2 237*4882a593Smuzhiyun adde r0,r0,r6 238*4882a593Smuzhiyundstnr; sth r6,0(r4) 239*4882a593Smuzhiyun addi r4,r4,2 240*4882a593Smuzhiyun bdnz 1b 241*4882a593Smuzhiyun 242*4882a593Smuzhiyun.Lcopy_aligned: 243*4882a593Smuzhiyun /* 244*4882a593Smuzhiyun * We unroll the loop such that each iteration is 64 bytes with an 245*4882a593Smuzhiyun * entry and exit limb of 64 bytes, meaning a minimum size of 246*4882a593Smuzhiyun * 128 bytes. 247*4882a593Smuzhiyun */ 248*4882a593Smuzhiyun srdi. r6,r5,7 249*4882a593Smuzhiyun beq .Lcopy_tail_doublewords /* len < 128 */ 250*4882a593Smuzhiyun 251*4882a593Smuzhiyun srdi r6,r5,6 252*4882a593Smuzhiyun subi r6,r6,1 253*4882a593Smuzhiyun mtctr r6 254*4882a593Smuzhiyun 255*4882a593Smuzhiyun stdu r1,-STACKFRAMESIZE(r1) 256*4882a593Smuzhiyun std r14,STK_REG(R14)(r1) 257*4882a593Smuzhiyun std r15,STK_REG(R15)(r1) 258*4882a593Smuzhiyun std r16,STK_REG(R16)(r1) 259*4882a593Smuzhiyun 260*4882a593Smuzhiyunsource; ld r6,0(r3) 261*4882a593Smuzhiyunsource; ld r9,8(r3) 262*4882a593Smuzhiyun 263*4882a593Smuzhiyunsource; ld r10,16(r3) 264*4882a593Smuzhiyunsource; ld r11,24(r3) 265*4882a593Smuzhiyun 266*4882a593Smuzhiyun /* 267*4882a593Smuzhiyun * On POWER6 and POWER7 back to back adde instructions take 2 cycles 268*4882a593Smuzhiyun * because of the XER dependency. This means the fastest this loop can 269*4882a593Smuzhiyun * go is 16 cycles per iteration. The scheduling of the loop below has 270*4882a593Smuzhiyun * been shown to hit this on both POWER6 and POWER7. 271*4882a593Smuzhiyun */ 272*4882a593Smuzhiyun .align 5 273*4882a593Smuzhiyun2: 274*4882a593Smuzhiyun adde r0,r0,r6 275*4882a593Smuzhiyunsource; ld r12,32(r3) 276*4882a593Smuzhiyunsource; ld r14,40(r3) 277*4882a593Smuzhiyun 278*4882a593Smuzhiyun adde r0,r0,r9 279*4882a593Smuzhiyunsource; ld r15,48(r3) 280*4882a593Smuzhiyunsource; ld r16,56(r3) 281*4882a593Smuzhiyun addi r3,r3,64 282*4882a593Smuzhiyun 283*4882a593Smuzhiyun adde r0,r0,r10 284*4882a593Smuzhiyundest; std r6,0(r4) 285*4882a593Smuzhiyundest; std r9,8(r4) 286*4882a593Smuzhiyun 287*4882a593Smuzhiyun adde r0,r0,r11 288*4882a593Smuzhiyundest; std r10,16(r4) 289*4882a593Smuzhiyundest; std r11,24(r4) 290*4882a593Smuzhiyun 291*4882a593Smuzhiyun adde r0,r0,r12 292*4882a593Smuzhiyundest; std r12,32(r4) 293*4882a593Smuzhiyundest; std r14,40(r4) 294*4882a593Smuzhiyun 295*4882a593Smuzhiyun adde r0,r0,r14 296*4882a593Smuzhiyundest; std r15,48(r4) 297*4882a593Smuzhiyundest; std r16,56(r4) 298*4882a593Smuzhiyun addi r4,r4,64 299*4882a593Smuzhiyun 300*4882a593Smuzhiyun adde r0,r0,r15 301*4882a593Smuzhiyunsource; ld r6,0(r3) 302*4882a593Smuzhiyunsource; ld r9,8(r3) 303*4882a593Smuzhiyun 304*4882a593Smuzhiyun adde r0,r0,r16 305*4882a593Smuzhiyunsource; ld r10,16(r3) 306*4882a593Smuzhiyunsource; ld r11,24(r3) 307*4882a593Smuzhiyun bdnz 2b 308*4882a593Smuzhiyun 309*4882a593Smuzhiyun 310*4882a593Smuzhiyun adde r0,r0,r6 311*4882a593Smuzhiyunsource; ld r12,32(r3) 312*4882a593Smuzhiyunsource; ld r14,40(r3) 313*4882a593Smuzhiyun 314*4882a593Smuzhiyun adde r0,r0,r9 315*4882a593Smuzhiyunsource; ld r15,48(r3) 316*4882a593Smuzhiyunsource; ld r16,56(r3) 317*4882a593Smuzhiyun addi r3,r3,64 318*4882a593Smuzhiyun 319*4882a593Smuzhiyun adde r0,r0,r10 320*4882a593Smuzhiyundest; std r6,0(r4) 321*4882a593Smuzhiyundest; std r9,8(r4) 322*4882a593Smuzhiyun 323*4882a593Smuzhiyun adde r0,r0,r11 324*4882a593Smuzhiyundest; std r10,16(r4) 325*4882a593Smuzhiyundest; std r11,24(r4) 326*4882a593Smuzhiyun 327*4882a593Smuzhiyun adde r0,r0,r12 328*4882a593Smuzhiyundest; std r12,32(r4) 329*4882a593Smuzhiyundest; std r14,40(r4) 330*4882a593Smuzhiyun 331*4882a593Smuzhiyun adde r0,r0,r14 332*4882a593Smuzhiyundest; std r15,48(r4) 333*4882a593Smuzhiyundest; std r16,56(r4) 334*4882a593Smuzhiyun addi r4,r4,64 335*4882a593Smuzhiyun 336*4882a593Smuzhiyun adde r0,r0,r15 337*4882a593Smuzhiyun adde r0,r0,r16 338*4882a593Smuzhiyun 339*4882a593Smuzhiyun ld r14,STK_REG(R14)(r1) 340*4882a593Smuzhiyun ld r15,STK_REG(R15)(r1) 341*4882a593Smuzhiyun ld r16,STK_REG(R16)(r1) 342*4882a593Smuzhiyun addi r1,r1,STACKFRAMESIZE 343*4882a593Smuzhiyun 344*4882a593Smuzhiyun andi. r5,r5,63 345*4882a593Smuzhiyun 346*4882a593Smuzhiyun.Lcopy_tail_doublewords: /* Up to 127 bytes to go */ 347*4882a593Smuzhiyun srdi. r6,r5,3 348*4882a593Smuzhiyun beq .Lcopy_tail_word 349*4882a593Smuzhiyun 350*4882a593Smuzhiyun mtctr r6 351*4882a593Smuzhiyun3: 352*4882a593Smuzhiyunsrcnr; ld r6,0(r3) 353*4882a593Smuzhiyun addi r3,r3,8 354*4882a593Smuzhiyun adde r0,r0,r6 355*4882a593Smuzhiyundstnr; std r6,0(r4) 356*4882a593Smuzhiyun addi r4,r4,8 357*4882a593Smuzhiyun bdnz 3b 358*4882a593Smuzhiyun 359*4882a593Smuzhiyun andi. r5,r5,7 360*4882a593Smuzhiyun 361*4882a593Smuzhiyun.Lcopy_tail_word: /* Up to 7 bytes to go */ 362*4882a593Smuzhiyun srdi. r6,r5,2 363*4882a593Smuzhiyun beq .Lcopy_tail_halfword 364*4882a593Smuzhiyun 365*4882a593Smuzhiyunsrcnr; lwz r6,0(r3) 366*4882a593Smuzhiyun addi r3,r3,4 367*4882a593Smuzhiyun adde r0,r0,r6 368*4882a593Smuzhiyundstnr; stw r6,0(r4) 369*4882a593Smuzhiyun addi r4,r4,4 370*4882a593Smuzhiyun subi r5,r5,4 371*4882a593Smuzhiyun 372*4882a593Smuzhiyun.Lcopy_tail_halfword: /* Up to 3 bytes to go */ 373*4882a593Smuzhiyun srdi. r6,r5,1 374*4882a593Smuzhiyun beq .Lcopy_tail_byte 375*4882a593Smuzhiyun 376*4882a593Smuzhiyunsrcnr; lhz r6,0(r3) 377*4882a593Smuzhiyun addi r3,r3,2 378*4882a593Smuzhiyun adde r0,r0,r6 379*4882a593Smuzhiyundstnr; sth r6,0(r4) 380*4882a593Smuzhiyun addi r4,r4,2 381*4882a593Smuzhiyun subi r5,r5,2 382*4882a593Smuzhiyun 383*4882a593Smuzhiyun.Lcopy_tail_byte: /* Up to 1 byte to go */ 384*4882a593Smuzhiyun andi. r6,r5,1 385*4882a593Smuzhiyun beq .Lcopy_finish 386*4882a593Smuzhiyun 387*4882a593Smuzhiyunsrcnr; lbz r6,0(r3) 388*4882a593Smuzhiyun#ifdef __BIG_ENDIAN__ 389*4882a593Smuzhiyun sldi r9,r6,8 /* Pad the byte out to 16 bits */ 390*4882a593Smuzhiyun adde r0,r0,r9 391*4882a593Smuzhiyun#else 392*4882a593Smuzhiyun adde r0,r0,r6 393*4882a593Smuzhiyun#endif 394*4882a593Smuzhiyundstnr; stb r6,0(r4) 395*4882a593Smuzhiyun 396*4882a593Smuzhiyun.Lcopy_finish: 397*4882a593Smuzhiyun addze r0,r0 /* add in final carry */ 398*4882a593Smuzhiyun rldicl r4,r0,32,0 /* fold two 32 bit halves together */ 399*4882a593Smuzhiyun add r3,r4,r0 400*4882a593Smuzhiyun srdi r3,r3,32 401*4882a593Smuzhiyun blr 402*4882a593Smuzhiyun 403*4882a593Smuzhiyun.Lerror: 404*4882a593Smuzhiyun ld r14,STK_REG(R14)(r1) 405*4882a593Smuzhiyun ld r15,STK_REG(R15)(r1) 406*4882a593Smuzhiyun ld r16,STK_REG(R16)(r1) 407*4882a593Smuzhiyun addi r1,r1,STACKFRAMESIZE 408*4882a593Smuzhiyun.Lerror_nr: 409*4882a593Smuzhiyun li r3,0 410*4882a593Smuzhiyun blr 411*4882a593Smuzhiyun 412*4882a593SmuzhiyunEXPORT_SYMBOL(csum_partial_copy_generic) 413*4882a593Smuzhiyun 414*4882a593Smuzhiyun/* 415*4882a593Smuzhiyun * __sum16 csum_ipv6_magic(const struct in6_addr *saddr, 416*4882a593Smuzhiyun * const struct in6_addr *daddr, 417*4882a593Smuzhiyun * __u32 len, __u8 proto, __wsum sum) 418*4882a593Smuzhiyun */ 419*4882a593Smuzhiyun 420*4882a593Smuzhiyun_GLOBAL(csum_ipv6_magic) 421*4882a593Smuzhiyun ld r8, 0(r3) 422*4882a593Smuzhiyun ld r9, 8(r3) 423*4882a593Smuzhiyun add r5, r5, r6 424*4882a593Smuzhiyun addc r0, r8, r9 425*4882a593Smuzhiyun ld r10, 0(r4) 426*4882a593Smuzhiyun ld r11, 8(r4) 427*4882a593Smuzhiyun#ifdef CONFIG_CPU_LITTLE_ENDIAN 428*4882a593Smuzhiyun rotldi r5, r5, 8 429*4882a593Smuzhiyun#endif 430*4882a593Smuzhiyun adde r0, r0, r10 431*4882a593Smuzhiyun add r5, r5, r7 432*4882a593Smuzhiyun adde r0, r0, r11 433*4882a593Smuzhiyun adde r0, r0, r5 434*4882a593Smuzhiyun addze r0, r0 435*4882a593Smuzhiyun rotldi r3, r0, 32 /* fold two 32 bit halves together */ 436*4882a593Smuzhiyun add r3, r0, r3 437*4882a593Smuzhiyun srdi r0, r3, 32 438*4882a593Smuzhiyun rotlwi r3, r0, 16 /* fold two 16 bit halves together */ 439*4882a593Smuzhiyun add r3, r0, r3 440*4882a593Smuzhiyun not r3, r3 441*4882a593Smuzhiyun rlwinm r3, r3, 16, 16, 31 442*4882a593Smuzhiyun blr 443*4882a593SmuzhiyunEXPORT_SYMBOL(csum_ipv6_magic) 444