1*4882a593Smuzhiyun/* 2*4882a593Smuzhiyun * This file is subject to the terms and conditions of the GNU General Public 3*4882a593Smuzhiyun * License. See the file "COPYING" in the main directory of this archive 4*4882a593Smuzhiyun * for more details. 5*4882a593Smuzhiyun * 6*4882a593Smuzhiyun * Quick'n'dirty IP checksum ... 7*4882a593Smuzhiyun * 8*4882a593Smuzhiyun * Copyright (C) 1998, 1999 Ralf Baechle 9*4882a593Smuzhiyun * Copyright (C) 1999 Silicon Graphics, Inc. 10*4882a593Smuzhiyun * Copyright (C) 2007 Maciej W. Rozycki 11*4882a593Smuzhiyun * Copyright (C) 2014 Imagination Technologies Ltd. 12*4882a593Smuzhiyun */ 13*4882a593Smuzhiyun#include <linux/errno.h> 14*4882a593Smuzhiyun#include <asm/asm.h> 15*4882a593Smuzhiyun#include <asm/asm-offsets.h> 16*4882a593Smuzhiyun#include <asm/export.h> 17*4882a593Smuzhiyun#include <asm/regdef.h> 18*4882a593Smuzhiyun 19*4882a593Smuzhiyun#ifdef CONFIG_64BIT 20*4882a593Smuzhiyun/* 21*4882a593Smuzhiyun * As we are sharing code base with the mips32 tree (which use the o32 ABI 22*4882a593Smuzhiyun * register definitions). We need to redefine the register definitions from 23*4882a593Smuzhiyun * the n64 ABI register naming to the o32 ABI register naming. 24*4882a593Smuzhiyun */ 25*4882a593Smuzhiyun#undef t0 26*4882a593Smuzhiyun#undef t1 27*4882a593Smuzhiyun#undef t2 28*4882a593Smuzhiyun#undef t3 29*4882a593Smuzhiyun#define t0 $8 30*4882a593Smuzhiyun#define t1 $9 31*4882a593Smuzhiyun#define t2 $10 32*4882a593Smuzhiyun#define t3 $11 33*4882a593Smuzhiyun#define t4 $12 34*4882a593Smuzhiyun#define t5 $13 35*4882a593Smuzhiyun#define t6 $14 36*4882a593Smuzhiyun#define t7 $15 37*4882a593Smuzhiyun 38*4882a593Smuzhiyun#define USE_DOUBLE 39*4882a593Smuzhiyun#endif 40*4882a593Smuzhiyun 41*4882a593Smuzhiyun#ifdef USE_DOUBLE 42*4882a593Smuzhiyun 43*4882a593Smuzhiyun#define LOAD ld 44*4882a593Smuzhiyun#define LOAD32 lwu 45*4882a593Smuzhiyun#define ADD daddu 46*4882a593Smuzhiyun#define NBYTES 8 47*4882a593Smuzhiyun 48*4882a593Smuzhiyun#else 49*4882a593Smuzhiyun 50*4882a593Smuzhiyun#define LOAD lw 51*4882a593Smuzhiyun#define LOAD32 lw 52*4882a593Smuzhiyun#define ADD addu 53*4882a593Smuzhiyun#define NBYTES 4 54*4882a593Smuzhiyun 55*4882a593Smuzhiyun#endif /* USE_DOUBLE */ 56*4882a593Smuzhiyun 57*4882a593Smuzhiyun#define UNIT(unit) ((unit)*NBYTES) 58*4882a593Smuzhiyun 59*4882a593Smuzhiyun#define ADDC(sum,reg) \ 60*4882a593Smuzhiyun .set push; \ 61*4882a593Smuzhiyun .set noat; \ 62*4882a593Smuzhiyun ADD sum, reg; \ 63*4882a593Smuzhiyun sltu v1, sum, reg; \ 64*4882a593Smuzhiyun ADD sum, v1; \ 65*4882a593Smuzhiyun .set pop 66*4882a593Smuzhiyun 67*4882a593Smuzhiyun#define ADDC32(sum,reg) \ 68*4882a593Smuzhiyun .set push; \ 69*4882a593Smuzhiyun .set noat; \ 70*4882a593Smuzhiyun addu sum, reg; \ 71*4882a593Smuzhiyun sltu v1, sum, reg; \ 72*4882a593Smuzhiyun addu sum, v1; \ 73*4882a593Smuzhiyun .set pop 74*4882a593Smuzhiyun 75*4882a593Smuzhiyun#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ 76*4882a593Smuzhiyun LOAD _t0, (offset + UNIT(0))(src); \ 77*4882a593Smuzhiyun LOAD _t1, (offset + UNIT(1))(src); \ 78*4882a593Smuzhiyun LOAD _t2, (offset + UNIT(2))(src); \ 79*4882a593Smuzhiyun LOAD _t3, (offset + UNIT(3))(src); \ 80*4882a593Smuzhiyun ADDC(_t0, _t1); \ 81*4882a593Smuzhiyun ADDC(_t2, _t3); \ 82*4882a593Smuzhiyun ADDC(sum, _t0); \ 83*4882a593Smuzhiyun ADDC(sum, _t2) 84*4882a593Smuzhiyun 85*4882a593Smuzhiyun#ifdef USE_DOUBLE 86*4882a593Smuzhiyun#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ 87*4882a593Smuzhiyun CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) 88*4882a593Smuzhiyun#else 89*4882a593Smuzhiyun#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ 90*4882a593Smuzhiyun CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3); \ 91*4882a593Smuzhiyun CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3) 92*4882a593Smuzhiyun#endif 93*4882a593Smuzhiyun 94*4882a593Smuzhiyun/* 95*4882a593Smuzhiyun * a0: source address 96*4882a593Smuzhiyun * a1: length of the area to checksum 97*4882a593Smuzhiyun * a2: partial checksum 98*4882a593Smuzhiyun */ 99*4882a593Smuzhiyun 100*4882a593Smuzhiyun#define src a0 101*4882a593Smuzhiyun#define sum v0 102*4882a593Smuzhiyun 103*4882a593Smuzhiyun .text 104*4882a593Smuzhiyun .set noreorder 105*4882a593Smuzhiyun .align 5 106*4882a593SmuzhiyunLEAF(csum_partial) 107*4882a593SmuzhiyunEXPORT_SYMBOL(csum_partial) 108*4882a593Smuzhiyun move sum, zero 109*4882a593Smuzhiyun move t7, zero 110*4882a593Smuzhiyun 111*4882a593Smuzhiyun sltiu t8, a1, 0x8 112*4882a593Smuzhiyun bnez t8, .Lsmall_csumcpy /* < 8 bytes to copy */ 113*4882a593Smuzhiyun move t2, a1 114*4882a593Smuzhiyun 115*4882a593Smuzhiyun andi t7, src, 0x1 /* odd buffer? */ 116*4882a593Smuzhiyun 117*4882a593Smuzhiyun.Lhword_align: 118*4882a593Smuzhiyun beqz t7, .Lword_align 119*4882a593Smuzhiyun andi t8, src, 0x2 120*4882a593Smuzhiyun 121*4882a593Smuzhiyun lbu t0, (src) 122*4882a593Smuzhiyun LONG_SUBU a1, a1, 0x1 123*4882a593Smuzhiyun#ifdef __MIPSEL__ 124*4882a593Smuzhiyun sll t0, t0, 8 125*4882a593Smuzhiyun#endif 126*4882a593Smuzhiyun ADDC(sum, t0) 127*4882a593Smuzhiyun PTR_ADDU src, src, 0x1 128*4882a593Smuzhiyun andi t8, src, 0x2 129*4882a593Smuzhiyun 130*4882a593Smuzhiyun.Lword_align: 131*4882a593Smuzhiyun beqz t8, .Ldword_align 132*4882a593Smuzhiyun sltiu t8, a1, 56 133*4882a593Smuzhiyun 134*4882a593Smuzhiyun lhu t0, (src) 135*4882a593Smuzhiyun LONG_SUBU a1, a1, 0x2 136*4882a593Smuzhiyun ADDC(sum, t0) 137*4882a593Smuzhiyun sltiu t8, a1, 56 138*4882a593Smuzhiyun PTR_ADDU src, src, 0x2 139*4882a593Smuzhiyun 140*4882a593Smuzhiyun.Ldword_align: 141*4882a593Smuzhiyun bnez t8, .Ldo_end_words 142*4882a593Smuzhiyun move t8, a1 143*4882a593Smuzhiyun 144*4882a593Smuzhiyun andi t8, src, 0x4 145*4882a593Smuzhiyun beqz t8, .Lqword_align 146*4882a593Smuzhiyun andi t8, src, 0x8 147*4882a593Smuzhiyun 148*4882a593Smuzhiyun LOAD32 t0, 0x00(src) 149*4882a593Smuzhiyun LONG_SUBU a1, a1, 0x4 150*4882a593Smuzhiyun ADDC(sum, t0) 151*4882a593Smuzhiyun PTR_ADDU src, src, 0x4 152*4882a593Smuzhiyun andi t8, src, 0x8 153*4882a593Smuzhiyun 154*4882a593Smuzhiyun.Lqword_align: 155*4882a593Smuzhiyun beqz t8, .Loword_align 156*4882a593Smuzhiyun andi t8, src, 0x10 157*4882a593Smuzhiyun 158*4882a593Smuzhiyun#ifdef USE_DOUBLE 159*4882a593Smuzhiyun ld t0, 0x00(src) 160*4882a593Smuzhiyun LONG_SUBU a1, a1, 0x8 161*4882a593Smuzhiyun ADDC(sum, t0) 162*4882a593Smuzhiyun#else 163*4882a593Smuzhiyun lw t0, 0x00(src) 164*4882a593Smuzhiyun lw t1, 0x04(src) 165*4882a593Smuzhiyun LONG_SUBU a1, a1, 0x8 166*4882a593Smuzhiyun ADDC(sum, t0) 167*4882a593Smuzhiyun ADDC(sum, t1) 168*4882a593Smuzhiyun#endif 169*4882a593Smuzhiyun PTR_ADDU src, src, 0x8 170*4882a593Smuzhiyun andi t8, src, 0x10 171*4882a593Smuzhiyun 172*4882a593Smuzhiyun.Loword_align: 173*4882a593Smuzhiyun beqz t8, .Lbegin_movement 174*4882a593Smuzhiyun LONG_SRL t8, a1, 0x7 175*4882a593Smuzhiyun 176*4882a593Smuzhiyun#ifdef USE_DOUBLE 177*4882a593Smuzhiyun ld t0, 0x00(src) 178*4882a593Smuzhiyun ld t1, 0x08(src) 179*4882a593Smuzhiyun ADDC(sum, t0) 180*4882a593Smuzhiyun ADDC(sum, t1) 181*4882a593Smuzhiyun#else 182*4882a593Smuzhiyun CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4) 183*4882a593Smuzhiyun#endif 184*4882a593Smuzhiyun LONG_SUBU a1, a1, 0x10 185*4882a593Smuzhiyun PTR_ADDU src, src, 0x10 186*4882a593Smuzhiyun LONG_SRL t8, a1, 0x7 187*4882a593Smuzhiyun 188*4882a593Smuzhiyun.Lbegin_movement: 189*4882a593Smuzhiyun beqz t8, 1f 190*4882a593Smuzhiyun andi t2, a1, 0x40 191*4882a593Smuzhiyun 192*4882a593Smuzhiyun.Lmove_128bytes: 193*4882a593Smuzhiyun CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) 194*4882a593Smuzhiyun CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) 195*4882a593Smuzhiyun CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) 196*4882a593Smuzhiyun CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4) 197*4882a593Smuzhiyun LONG_SUBU t8, t8, 0x01 198*4882a593Smuzhiyun .set reorder /* DADDI_WAR */ 199*4882a593Smuzhiyun PTR_ADDU src, src, 0x80 200*4882a593Smuzhiyun bnez t8, .Lmove_128bytes 201*4882a593Smuzhiyun .set noreorder 202*4882a593Smuzhiyun 203*4882a593Smuzhiyun1: 204*4882a593Smuzhiyun beqz t2, 1f 205*4882a593Smuzhiyun andi t2, a1, 0x20 206*4882a593Smuzhiyun 207*4882a593Smuzhiyun.Lmove_64bytes: 208*4882a593Smuzhiyun CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) 209*4882a593Smuzhiyun CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) 210*4882a593Smuzhiyun PTR_ADDU src, src, 0x40 211*4882a593Smuzhiyun 212*4882a593Smuzhiyun1: 213*4882a593Smuzhiyun beqz t2, .Ldo_end_words 214*4882a593Smuzhiyun andi t8, a1, 0x1c 215*4882a593Smuzhiyun 216*4882a593Smuzhiyun.Lmove_32bytes: 217*4882a593Smuzhiyun CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) 218*4882a593Smuzhiyun andi t8, a1, 0x1c 219*4882a593Smuzhiyun PTR_ADDU src, src, 0x20 220*4882a593Smuzhiyun 221*4882a593Smuzhiyun.Ldo_end_words: 222*4882a593Smuzhiyun beqz t8, .Lsmall_csumcpy 223*4882a593Smuzhiyun andi t2, a1, 0x3 224*4882a593Smuzhiyun LONG_SRL t8, t8, 0x2 225*4882a593Smuzhiyun 226*4882a593Smuzhiyun.Lend_words: 227*4882a593Smuzhiyun LOAD32 t0, (src) 228*4882a593Smuzhiyun LONG_SUBU t8, t8, 0x1 229*4882a593Smuzhiyun ADDC(sum, t0) 230*4882a593Smuzhiyun .set reorder /* DADDI_WAR */ 231*4882a593Smuzhiyun PTR_ADDU src, src, 0x4 232*4882a593Smuzhiyun bnez t8, .Lend_words 233*4882a593Smuzhiyun .set noreorder 234*4882a593Smuzhiyun 235*4882a593Smuzhiyun/* unknown src alignment and < 8 bytes to go */ 236*4882a593Smuzhiyun.Lsmall_csumcpy: 237*4882a593Smuzhiyun move a1, t2 238*4882a593Smuzhiyun 239*4882a593Smuzhiyun andi t0, a1, 4 240*4882a593Smuzhiyun beqz t0, 1f 241*4882a593Smuzhiyun andi t0, a1, 2 242*4882a593Smuzhiyun 243*4882a593Smuzhiyun /* Still a full word to go */ 244*4882a593Smuzhiyun ulw t1, (src) 245*4882a593Smuzhiyun PTR_ADDIU src, 4 246*4882a593Smuzhiyun#ifdef USE_DOUBLE 247*4882a593Smuzhiyun dsll t1, t1, 32 /* clear lower 32bit */ 248*4882a593Smuzhiyun#endif 249*4882a593Smuzhiyun ADDC(sum, t1) 250*4882a593Smuzhiyun 251*4882a593Smuzhiyun1: move t1, zero 252*4882a593Smuzhiyun beqz t0, 1f 253*4882a593Smuzhiyun andi t0, a1, 1 254*4882a593Smuzhiyun 255*4882a593Smuzhiyun /* Still a halfword to go */ 256*4882a593Smuzhiyun ulhu t1, (src) 257*4882a593Smuzhiyun PTR_ADDIU src, 2 258*4882a593Smuzhiyun 259*4882a593Smuzhiyun1: beqz t0, 1f 260*4882a593Smuzhiyun sll t1, t1, 16 261*4882a593Smuzhiyun 262*4882a593Smuzhiyun lbu t2, (src) 263*4882a593Smuzhiyun nop 264*4882a593Smuzhiyun 265*4882a593Smuzhiyun#ifdef __MIPSEB__ 266*4882a593Smuzhiyun sll t2, t2, 8 267*4882a593Smuzhiyun#endif 268*4882a593Smuzhiyun or t1, t2 269*4882a593Smuzhiyun 270*4882a593Smuzhiyun1: ADDC(sum, t1) 271*4882a593Smuzhiyun 272*4882a593Smuzhiyun /* fold checksum */ 273*4882a593Smuzhiyun#ifdef USE_DOUBLE 274*4882a593Smuzhiyun dsll32 v1, sum, 0 275*4882a593Smuzhiyun daddu sum, v1 276*4882a593Smuzhiyun sltu v1, sum, v1 277*4882a593Smuzhiyun dsra32 sum, sum, 0 278*4882a593Smuzhiyun addu sum, v1 279*4882a593Smuzhiyun#endif 280*4882a593Smuzhiyun 281*4882a593Smuzhiyun /* odd buffer alignment? */ 282*4882a593Smuzhiyun#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \ 283*4882a593Smuzhiyun defined(CONFIG_CPU_LOONGSON64) 284*4882a593Smuzhiyun .set push 285*4882a593Smuzhiyun .set arch=mips32r2 286*4882a593Smuzhiyun wsbh v1, sum 287*4882a593Smuzhiyun movn sum, v1, t7 288*4882a593Smuzhiyun .set pop 289*4882a593Smuzhiyun#else 290*4882a593Smuzhiyun beqz t7, 1f /* odd buffer alignment? */ 291*4882a593Smuzhiyun lui v1, 0x00ff 292*4882a593Smuzhiyun addu v1, 0x00ff 293*4882a593Smuzhiyun and t0, sum, v1 294*4882a593Smuzhiyun sll t0, t0, 8 295*4882a593Smuzhiyun srl sum, sum, 8 296*4882a593Smuzhiyun and sum, sum, v1 297*4882a593Smuzhiyun or sum, sum, t0 298*4882a593Smuzhiyun1: 299*4882a593Smuzhiyun#endif 300*4882a593Smuzhiyun .set reorder 301*4882a593Smuzhiyun /* Add the passed partial csum. */ 302*4882a593Smuzhiyun ADDC32(sum, a2) 303*4882a593Smuzhiyun jr ra 304*4882a593Smuzhiyun .set noreorder 305*4882a593Smuzhiyun END(csum_partial) 306*4882a593Smuzhiyun 307*4882a593Smuzhiyun 308*4882a593Smuzhiyun/* 309*4882a593Smuzhiyun * checksum and copy routines based on memcpy.S 310*4882a593Smuzhiyun * 311*4882a593Smuzhiyun * csum_partial_copy_nocheck(src, dst, len) 312*4882a593Smuzhiyun * __csum_partial_copy_kernel(src, dst, len) 313*4882a593Smuzhiyun * 314*4882a593Smuzhiyun * See "Spec" in memcpy.S for details. Unlike __copy_user, all 315*4882a593Smuzhiyun * function in this file use the standard calling convention. 316*4882a593Smuzhiyun */ 317*4882a593Smuzhiyun 318*4882a593Smuzhiyun#define src a0 319*4882a593Smuzhiyun#define dst a1 320*4882a593Smuzhiyun#define len a2 321*4882a593Smuzhiyun#define sum v0 322*4882a593Smuzhiyun#define odd t8 323*4882a593Smuzhiyun 324*4882a593Smuzhiyun/* 325*4882a593Smuzhiyun * All exception handlers simply return 0. 326*4882a593Smuzhiyun */ 327*4882a593Smuzhiyun 328*4882a593Smuzhiyun/* Instruction type */ 329*4882a593Smuzhiyun#define LD_INSN 1 330*4882a593Smuzhiyun#define ST_INSN 2 331*4882a593Smuzhiyun#define LEGACY_MODE 1 332*4882a593Smuzhiyun#define EVA_MODE 2 333*4882a593Smuzhiyun#define USEROP 1 334*4882a593Smuzhiyun#define KERNELOP 2 335*4882a593Smuzhiyun 336*4882a593Smuzhiyun/* 337*4882a593Smuzhiyun * Wrapper to add an entry in the exception table 338*4882a593Smuzhiyun * in case the insn causes a memory exception. 339*4882a593Smuzhiyun * Arguments: 340*4882a593Smuzhiyun * insn : Load/store instruction 341*4882a593Smuzhiyun * type : Instruction type 342*4882a593Smuzhiyun * reg : Register 343*4882a593Smuzhiyun * addr : Address 344*4882a593Smuzhiyun * handler : Exception handler 345*4882a593Smuzhiyun */ 346*4882a593Smuzhiyun#define EXC(insn, type, reg, addr) \ 347*4882a593Smuzhiyun .if \mode == LEGACY_MODE; \ 348*4882a593Smuzhiyun9: insn reg, addr; \ 349*4882a593Smuzhiyun .section __ex_table,"a"; \ 350*4882a593Smuzhiyun PTR 9b, .L_exc; \ 351*4882a593Smuzhiyun .previous; \ 352*4882a593Smuzhiyun /* This is enabled in EVA mode */ \ 353*4882a593Smuzhiyun .else; \ 354*4882a593Smuzhiyun /* If loading from user or storing to user */ \ 355*4882a593Smuzhiyun .if ((\from == USEROP) && (type == LD_INSN)) || \ 356*4882a593Smuzhiyun ((\to == USEROP) && (type == ST_INSN)); \ 357*4882a593Smuzhiyun9: __BUILD_EVA_INSN(insn##e, reg, addr); \ 358*4882a593Smuzhiyun .section __ex_table,"a"; \ 359*4882a593Smuzhiyun PTR 9b, .L_exc; \ 360*4882a593Smuzhiyun .previous; \ 361*4882a593Smuzhiyun .else; \ 362*4882a593Smuzhiyun /* EVA without exception */ \ 363*4882a593Smuzhiyun insn reg, addr; \ 364*4882a593Smuzhiyun .endif; \ 365*4882a593Smuzhiyun .endif 366*4882a593Smuzhiyun 367*4882a593Smuzhiyun#undef LOAD 368*4882a593Smuzhiyun 369*4882a593Smuzhiyun#ifdef USE_DOUBLE 370*4882a593Smuzhiyun 371*4882a593Smuzhiyun#define LOADK ld /* No exception */ 372*4882a593Smuzhiyun#define LOAD(reg, addr) EXC(ld, LD_INSN, reg, addr) 373*4882a593Smuzhiyun#define LOADBU(reg, addr) EXC(lbu, LD_INSN, reg, addr) 374*4882a593Smuzhiyun#define LOADL(reg, addr) EXC(ldl, LD_INSN, reg, addr) 375*4882a593Smuzhiyun#define LOADR(reg, addr) EXC(ldr, LD_INSN, reg, addr) 376*4882a593Smuzhiyun#define STOREB(reg, addr) EXC(sb, ST_INSN, reg, addr) 377*4882a593Smuzhiyun#define STOREL(reg, addr) EXC(sdl, ST_INSN, reg, addr) 378*4882a593Smuzhiyun#define STORER(reg, addr) EXC(sdr, ST_INSN, reg, addr) 379*4882a593Smuzhiyun#define STORE(reg, addr) EXC(sd, ST_INSN, reg, addr) 380*4882a593Smuzhiyun#define ADD daddu 381*4882a593Smuzhiyun#define SUB dsubu 382*4882a593Smuzhiyun#define SRL dsrl 383*4882a593Smuzhiyun#define SLL dsll 384*4882a593Smuzhiyun#define SLLV dsllv 385*4882a593Smuzhiyun#define SRLV dsrlv 386*4882a593Smuzhiyun#define NBYTES 8 387*4882a593Smuzhiyun#define LOG_NBYTES 3 388*4882a593Smuzhiyun 389*4882a593Smuzhiyun#else 390*4882a593Smuzhiyun 391*4882a593Smuzhiyun#define LOADK lw /* No exception */ 392*4882a593Smuzhiyun#define LOAD(reg, addr) EXC(lw, LD_INSN, reg, addr) 393*4882a593Smuzhiyun#define LOADBU(reg, addr) EXC(lbu, LD_INSN, reg, addr) 394*4882a593Smuzhiyun#define LOADL(reg, addr) EXC(lwl, LD_INSN, reg, addr) 395*4882a593Smuzhiyun#define LOADR(reg, addr) EXC(lwr, LD_INSN, reg, addr) 396*4882a593Smuzhiyun#define STOREB(reg, addr) EXC(sb, ST_INSN, reg, addr) 397*4882a593Smuzhiyun#define STOREL(reg, addr) EXC(swl, ST_INSN, reg, addr) 398*4882a593Smuzhiyun#define STORER(reg, addr) EXC(swr, ST_INSN, reg, addr) 399*4882a593Smuzhiyun#define STORE(reg, addr) EXC(sw, ST_INSN, reg, addr) 400*4882a593Smuzhiyun#define ADD addu 401*4882a593Smuzhiyun#define SUB subu 402*4882a593Smuzhiyun#define SRL srl 403*4882a593Smuzhiyun#define SLL sll 404*4882a593Smuzhiyun#define SLLV sllv 405*4882a593Smuzhiyun#define SRLV srlv 406*4882a593Smuzhiyun#define NBYTES 4 407*4882a593Smuzhiyun#define LOG_NBYTES 2 408*4882a593Smuzhiyun 409*4882a593Smuzhiyun#endif /* USE_DOUBLE */ 410*4882a593Smuzhiyun 411*4882a593Smuzhiyun#ifdef CONFIG_CPU_LITTLE_ENDIAN 412*4882a593Smuzhiyun#define LDFIRST LOADR 413*4882a593Smuzhiyun#define LDREST LOADL 414*4882a593Smuzhiyun#define STFIRST STORER 415*4882a593Smuzhiyun#define STREST STOREL 416*4882a593Smuzhiyun#define SHIFT_DISCARD SLLV 417*4882a593Smuzhiyun#define SHIFT_DISCARD_REVERT SRLV 418*4882a593Smuzhiyun#else 419*4882a593Smuzhiyun#define LDFIRST LOADL 420*4882a593Smuzhiyun#define LDREST LOADR 421*4882a593Smuzhiyun#define STFIRST STOREL 422*4882a593Smuzhiyun#define STREST STORER 423*4882a593Smuzhiyun#define SHIFT_DISCARD SRLV 424*4882a593Smuzhiyun#define SHIFT_DISCARD_REVERT SLLV 425*4882a593Smuzhiyun#endif 426*4882a593Smuzhiyun 427*4882a593Smuzhiyun#define FIRST(unit) ((unit)*NBYTES) 428*4882a593Smuzhiyun#define REST(unit) (FIRST(unit)+NBYTES-1) 429*4882a593Smuzhiyun 430*4882a593Smuzhiyun#define ADDRMASK (NBYTES-1) 431*4882a593Smuzhiyun 432*4882a593Smuzhiyun#ifndef CONFIG_CPU_DADDI_WORKAROUNDS 433*4882a593Smuzhiyun .set noat 434*4882a593Smuzhiyun#else 435*4882a593Smuzhiyun .set at=v1 436*4882a593Smuzhiyun#endif 437*4882a593Smuzhiyun 438*4882a593Smuzhiyun .macro __BUILD_CSUM_PARTIAL_COPY_USER mode, from, to 439*4882a593Smuzhiyun 440*4882a593Smuzhiyun li sum, -1 441*4882a593Smuzhiyun move odd, zero 442*4882a593Smuzhiyun /* 443*4882a593Smuzhiyun * Note: dst & src may be unaligned, len may be 0 444*4882a593Smuzhiyun * Temps 445*4882a593Smuzhiyun */ 446*4882a593Smuzhiyun /* 447*4882a593Smuzhiyun * The "issue break"s below are very approximate. 448*4882a593Smuzhiyun * Issue delays for dcache fills will perturb the schedule, as will 449*4882a593Smuzhiyun * load queue full replay traps, etc. 450*4882a593Smuzhiyun * 451*4882a593Smuzhiyun * If len < NBYTES use byte operations. 452*4882a593Smuzhiyun */ 453*4882a593Smuzhiyun sltu t2, len, NBYTES 454*4882a593Smuzhiyun and t1, dst, ADDRMASK 455*4882a593Smuzhiyun bnez t2, .Lcopy_bytes_checklen\@ 456*4882a593Smuzhiyun and t0, src, ADDRMASK 457*4882a593Smuzhiyun andi odd, dst, 0x1 /* odd buffer? */ 458*4882a593Smuzhiyun bnez t1, .Ldst_unaligned\@ 459*4882a593Smuzhiyun nop 460*4882a593Smuzhiyun bnez t0, .Lsrc_unaligned_dst_aligned\@ 461*4882a593Smuzhiyun /* 462*4882a593Smuzhiyun * use delay slot for fall-through 463*4882a593Smuzhiyun * src and dst are aligned; need to compute rem 464*4882a593Smuzhiyun */ 465*4882a593Smuzhiyun.Lboth_aligned\@: 466*4882a593Smuzhiyun SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter 467*4882a593Smuzhiyun beqz t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES 468*4882a593Smuzhiyun nop 469*4882a593Smuzhiyun SUB len, 8*NBYTES # subtract here for bgez loop 470*4882a593Smuzhiyun .align 4 471*4882a593Smuzhiyun1: 472*4882a593Smuzhiyun LOAD(t0, UNIT(0)(src)) 473*4882a593Smuzhiyun LOAD(t1, UNIT(1)(src)) 474*4882a593Smuzhiyun LOAD(t2, UNIT(2)(src)) 475*4882a593Smuzhiyun LOAD(t3, UNIT(3)(src)) 476*4882a593Smuzhiyun LOAD(t4, UNIT(4)(src)) 477*4882a593Smuzhiyun LOAD(t5, UNIT(5)(src)) 478*4882a593Smuzhiyun LOAD(t6, UNIT(6)(src)) 479*4882a593Smuzhiyun LOAD(t7, UNIT(7)(src)) 480*4882a593Smuzhiyun SUB len, len, 8*NBYTES 481*4882a593Smuzhiyun ADD src, src, 8*NBYTES 482*4882a593Smuzhiyun STORE(t0, UNIT(0)(dst)) 483*4882a593Smuzhiyun ADDC(t0, t1) 484*4882a593Smuzhiyun STORE(t1, UNIT(1)(dst)) 485*4882a593Smuzhiyun ADDC(sum, t0) 486*4882a593Smuzhiyun STORE(t2, UNIT(2)(dst)) 487*4882a593Smuzhiyun ADDC(t2, t3) 488*4882a593Smuzhiyun STORE(t3, UNIT(3)(dst)) 489*4882a593Smuzhiyun ADDC(sum, t2) 490*4882a593Smuzhiyun STORE(t4, UNIT(4)(dst)) 491*4882a593Smuzhiyun ADDC(t4, t5) 492*4882a593Smuzhiyun STORE(t5, UNIT(5)(dst)) 493*4882a593Smuzhiyun ADDC(sum, t4) 494*4882a593Smuzhiyun STORE(t6, UNIT(6)(dst)) 495*4882a593Smuzhiyun ADDC(t6, t7) 496*4882a593Smuzhiyun STORE(t7, UNIT(7)(dst)) 497*4882a593Smuzhiyun ADDC(sum, t6) 498*4882a593Smuzhiyun .set reorder /* DADDI_WAR */ 499*4882a593Smuzhiyun ADD dst, dst, 8*NBYTES 500*4882a593Smuzhiyun bgez len, 1b 501*4882a593Smuzhiyun .set noreorder 502*4882a593Smuzhiyun ADD len, 8*NBYTES # revert len (see above) 503*4882a593Smuzhiyun 504*4882a593Smuzhiyun /* 505*4882a593Smuzhiyun * len == the number of bytes left to copy < 8*NBYTES 506*4882a593Smuzhiyun */ 507*4882a593Smuzhiyun.Lcleanup_both_aligned\@: 508*4882a593Smuzhiyun#define rem t7 509*4882a593Smuzhiyun beqz len, .Ldone\@ 510*4882a593Smuzhiyun sltu t0, len, 4*NBYTES 511*4882a593Smuzhiyun bnez t0, .Lless_than_4units\@ 512*4882a593Smuzhiyun and rem, len, (NBYTES-1) # rem = len % NBYTES 513*4882a593Smuzhiyun /* 514*4882a593Smuzhiyun * len >= 4*NBYTES 515*4882a593Smuzhiyun */ 516*4882a593Smuzhiyun LOAD(t0, UNIT(0)(src)) 517*4882a593Smuzhiyun LOAD(t1, UNIT(1)(src)) 518*4882a593Smuzhiyun LOAD(t2, UNIT(2)(src)) 519*4882a593Smuzhiyun LOAD(t3, UNIT(3)(src)) 520*4882a593Smuzhiyun SUB len, len, 4*NBYTES 521*4882a593Smuzhiyun ADD src, src, 4*NBYTES 522*4882a593Smuzhiyun STORE(t0, UNIT(0)(dst)) 523*4882a593Smuzhiyun ADDC(t0, t1) 524*4882a593Smuzhiyun STORE(t1, UNIT(1)(dst)) 525*4882a593Smuzhiyun ADDC(sum, t0) 526*4882a593Smuzhiyun STORE(t2, UNIT(2)(dst)) 527*4882a593Smuzhiyun ADDC(t2, t3) 528*4882a593Smuzhiyun STORE(t3, UNIT(3)(dst)) 529*4882a593Smuzhiyun ADDC(sum, t2) 530*4882a593Smuzhiyun .set reorder /* DADDI_WAR */ 531*4882a593Smuzhiyun ADD dst, dst, 4*NBYTES 532*4882a593Smuzhiyun beqz len, .Ldone\@ 533*4882a593Smuzhiyun .set noreorder 534*4882a593Smuzhiyun.Lless_than_4units\@: 535*4882a593Smuzhiyun /* 536*4882a593Smuzhiyun * rem = len % NBYTES 537*4882a593Smuzhiyun */ 538*4882a593Smuzhiyun beq rem, len, .Lcopy_bytes\@ 539*4882a593Smuzhiyun nop 540*4882a593Smuzhiyun1: 541*4882a593Smuzhiyun LOAD(t0, 0(src)) 542*4882a593Smuzhiyun ADD src, src, NBYTES 543*4882a593Smuzhiyun SUB len, len, NBYTES 544*4882a593Smuzhiyun STORE(t0, 0(dst)) 545*4882a593Smuzhiyun ADDC(sum, t0) 546*4882a593Smuzhiyun .set reorder /* DADDI_WAR */ 547*4882a593Smuzhiyun ADD dst, dst, NBYTES 548*4882a593Smuzhiyun bne rem, len, 1b 549*4882a593Smuzhiyun .set noreorder 550*4882a593Smuzhiyun 551*4882a593Smuzhiyun /* 552*4882a593Smuzhiyun * src and dst are aligned, need to copy rem bytes (rem < NBYTES) 553*4882a593Smuzhiyun * A loop would do only a byte at a time with possible branch 554*4882a593Smuzhiyun * mispredicts. Can't do an explicit LOAD dst,mask,or,STORE 555*4882a593Smuzhiyun * because can't assume read-access to dst. Instead, use 556*4882a593Smuzhiyun * STREST dst, which doesn't require read access to dst. 557*4882a593Smuzhiyun * 558*4882a593Smuzhiyun * This code should perform better than a simple loop on modern, 559*4882a593Smuzhiyun * wide-issue mips processors because the code has fewer branches and 560*4882a593Smuzhiyun * more instruction-level parallelism. 561*4882a593Smuzhiyun */ 562*4882a593Smuzhiyun#define bits t2 563*4882a593Smuzhiyun beqz len, .Ldone\@ 564*4882a593Smuzhiyun ADD t1, dst, len # t1 is just past last byte of dst 565*4882a593Smuzhiyun li bits, 8*NBYTES 566*4882a593Smuzhiyun SLL rem, len, 3 # rem = number of bits to keep 567*4882a593Smuzhiyun LOAD(t0, 0(src)) 568*4882a593Smuzhiyun SUB bits, bits, rem # bits = number of bits to discard 569*4882a593Smuzhiyun SHIFT_DISCARD t0, t0, bits 570*4882a593Smuzhiyun STREST(t0, -1(t1)) 571*4882a593Smuzhiyun SHIFT_DISCARD_REVERT t0, t0, bits 572*4882a593Smuzhiyun .set reorder 573*4882a593Smuzhiyun ADDC(sum, t0) 574*4882a593Smuzhiyun b .Ldone\@ 575*4882a593Smuzhiyun .set noreorder 576*4882a593Smuzhiyun.Ldst_unaligned\@: 577*4882a593Smuzhiyun /* 578*4882a593Smuzhiyun * dst is unaligned 579*4882a593Smuzhiyun * t0 = src & ADDRMASK 580*4882a593Smuzhiyun * t1 = dst & ADDRMASK; T1 > 0 581*4882a593Smuzhiyun * len >= NBYTES 582*4882a593Smuzhiyun * 583*4882a593Smuzhiyun * Copy enough bytes to align dst 584*4882a593Smuzhiyun * Set match = (src and dst have same alignment) 585*4882a593Smuzhiyun */ 586*4882a593Smuzhiyun#define match rem 587*4882a593Smuzhiyun LDFIRST(t3, FIRST(0)(src)) 588*4882a593Smuzhiyun ADD t2, zero, NBYTES 589*4882a593Smuzhiyun LDREST(t3, REST(0)(src)) 590*4882a593Smuzhiyun SUB t2, t2, t1 # t2 = number of bytes copied 591*4882a593Smuzhiyun xor match, t0, t1 592*4882a593Smuzhiyun STFIRST(t3, FIRST(0)(dst)) 593*4882a593Smuzhiyun SLL t4, t1, 3 # t4 = number of bits to discard 594*4882a593Smuzhiyun SHIFT_DISCARD t3, t3, t4 595*4882a593Smuzhiyun /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */ 596*4882a593Smuzhiyun ADDC(sum, t3) 597*4882a593Smuzhiyun beq len, t2, .Ldone\@ 598*4882a593Smuzhiyun SUB len, len, t2 599*4882a593Smuzhiyun ADD dst, dst, t2 600*4882a593Smuzhiyun beqz match, .Lboth_aligned\@ 601*4882a593Smuzhiyun ADD src, src, t2 602*4882a593Smuzhiyun 603*4882a593Smuzhiyun.Lsrc_unaligned_dst_aligned\@: 604*4882a593Smuzhiyun SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter 605*4882a593Smuzhiyun beqz t0, .Lcleanup_src_unaligned\@ 606*4882a593Smuzhiyun and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES 607*4882a593Smuzhiyun1: 608*4882a593Smuzhiyun/* 609*4882a593Smuzhiyun * Avoid consecutive LD*'s to the same register since some mips 610*4882a593Smuzhiyun * implementations can't issue them in the same cycle. 611*4882a593Smuzhiyun * It's OK to load FIRST(N+1) before REST(N) because the two addresses 612*4882a593Smuzhiyun * are to the same unit (unless src is aligned, but it's not). 613*4882a593Smuzhiyun */ 614*4882a593Smuzhiyun LDFIRST(t0, FIRST(0)(src)) 615*4882a593Smuzhiyun LDFIRST(t1, FIRST(1)(src)) 616*4882a593Smuzhiyun SUB len, len, 4*NBYTES 617*4882a593Smuzhiyun LDREST(t0, REST(0)(src)) 618*4882a593Smuzhiyun LDREST(t1, REST(1)(src)) 619*4882a593Smuzhiyun LDFIRST(t2, FIRST(2)(src)) 620*4882a593Smuzhiyun LDFIRST(t3, FIRST(3)(src)) 621*4882a593Smuzhiyun LDREST(t2, REST(2)(src)) 622*4882a593Smuzhiyun LDREST(t3, REST(3)(src)) 623*4882a593Smuzhiyun ADD src, src, 4*NBYTES 624*4882a593Smuzhiyun#ifdef CONFIG_CPU_SB1 625*4882a593Smuzhiyun nop # improves slotting 626*4882a593Smuzhiyun#endif 627*4882a593Smuzhiyun STORE(t0, UNIT(0)(dst)) 628*4882a593Smuzhiyun ADDC(t0, t1) 629*4882a593Smuzhiyun STORE(t1, UNIT(1)(dst)) 630*4882a593Smuzhiyun ADDC(sum, t0) 631*4882a593Smuzhiyun STORE(t2, UNIT(2)(dst)) 632*4882a593Smuzhiyun ADDC(t2, t3) 633*4882a593Smuzhiyun STORE(t3, UNIT(3)(dst)) 634*4882a593Smuzhiyun ADDC(sum, t2) 635*4882a593Smuzhiyun .set reorder /* DADDI_WAR */ 636*4882a593Smuzhiyun ADD dst, dst, 4*NBYTES 637*4882a593Smuzhiyun bne len, rem, 1b 638*4882a593Smuzhiyun .set noreorder 639*4882a593Smuzhiyun 640*4882a593Smuzhiyun.Lcleanup_src_unaligned\@: 641*4882a593Smuzhiyun beqz len, .Ldone\@ 642*4882a593Smuzhiyun and rem, len, NBYTES-1 # rem = len % NBYTES 643*4882a593Smuzhiyun beq rem, len, .Lcopy_bytes\@ 644*4882a593Smuzhiyun nop 645*4882a593Smuzhiyun1: 646*4882a593Smuzhiyun LDFIRST(t0, FIRST(0)(src)) 647*4882a593Smuzhiyun LDREST(t0, REST(0)(src)) 648*4882a593Smuzhiyun ADD src, src, NBYTES 649*4882a593Smuzhiyun SUB len, len, NBYTES 650*4882a593Smuzhiyun STORE(t0, 0(dst)) 651*4882a593Smuzhiyun ADDC(sum, t0) 652*4882a593Smuzhiyun .set reorder /* DADDI_WAR */ 653*4882a593Smuzhiyun ADD dst, dst, NBYTES 654*4882a593Smuzhiyun bne len, rem, 1b 655*4882a593Smuzhiyun .set noreorder 656*4882a593Smuzhiyun 657*4882a593Smuzhiyun.Lcopy_bytes_checklen\@: 658*4882a593Smuzhiyun beqz len, .Ldone\@ 659*4882a593Smuzhiyun nop 660*4882a593Smuzhiyun.Lcopy_bytes\@: 661*4882a593Smuzhiyun /* 0 < len < NBYTES */ 662*4882a593Smuzhiyun#ifdef CONFIG_CPU_LITTLE_ENDIAN 663*4882a593Smuzhiyun#define SHIFT_START 0 664*4882a593Smuzhiyun#define SHIFT_INC 8 665*4882a593Smuzhiyun#else 666*4882a593Smuzhiyun#define SHIFT_START 8*(NBYTES-1) 667*4882a593Smuzhiyun#define SHIFT_INC -8 668*4882a593Smuzhiyun#endif 669*4882a593Smuzhiyun move t2, zero # partial word 670*4882a593Smuzhiyun li t3, SHIFT_START # shift 671*4882a593Smuzhiyun#define COPY_BYTE(N) \ 672*4882a593Smuzhiyun LOADBU(t0, N(src)); \ 673*4882a593Smuzhiyun SUB len, len, 1; \ 674*4882a593Smuzhiyun STOREB(t0, N(dst)); \ 675*4882a593Smuzhiyun SLLV t0, t0, t3; \ 676*4882a593Smuzhiyun addu t3, SHIFT_INC; \ 677*4882a593Smuzhiyun beqz len, .Lcopy_bytes_done\@; \ 678*4882a593Smuzhiyun or t2, t0 679*4882a593Smuzhiyun 680*4882a593Smuzhiyun COPY_BYTE(0) 681*4882a593Smuzhiyun COPY_BYTE(1) 682*4882a593Smuzhiyun#ifdef USE_DOUBLE 683*4882a593Smuzhiyun COPY_BYTE(2) 684*4882a593Smuzhiyun COPY_BYTE(3) 685*4882a593Smuzhiyun COPY_BYTE(4) 686*4882a593Smuzhiyun COPY_BYTE(5) 687*4882a593Smuzhiyun#endif 688*4882a593Smuzhiyun LOADBU(t0, NBYTES-2(src)) 689*4882a593Smuzhiyun SUB len, len, 1 690*4882a593Smuzhiyun STOREB(t0, NBYTES-2(dst)) 691*4882a593Smuzhiyun SLLV t0, t0, t3 692*4882a593Smuzhiyun or t2, t0 693*4882a593Smuzhiyun.Lcopy_bytes_done\@: 694*4882a593Smuzhiyun ADDC(sum, t2) 695*4882a593Smuzhiyun.Ldone\@: 696*4882a593Smuzhiyun /* fold checksum */ 697*4882a593Smuzhiyun .set push 698*4882a593Smuzhiyun .set noat 699*4882a593Smuzhiyun#ifdef USE_DOUBLE 700*4882a593Smuzhiyun dsll32 v1, sum, 0 701*4882a593Smuzhiyun daddu sum, v1 702*4882a593Smuzhiyun sltu v1, sum, v1 703*4882a593Smuzhiyun dsra32 sum, sum, 0 704*4882a593Smuzhiyun addu sum, v1 705*4882a593Smuzhiyun#endif 706*4882a593Smuzhiyun 707*4882a593Smuzhiyun#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \ 708*4882a593Smuzhiyun defined(CONFIG_CPU_LOONGSON64) 709*4882a593Smuzhiyun .set push 710*4882a593Smuzhiyun .set arch=mips32r2 711*4882a593Smuzhiyun wsbh v1, sum 712*4882a593Smuzhiyun movn sum, v1, odd 713*4882a593Smuzhiyun .set pop 714*4882a593Smuzhiyun#else 715*4882a593Smuzhiyun beqz odd, 1f /* odd buffer alignment? */ 716*4882a593Smuzhiyun lui v1, 0x00ff 717*4882a593Smuzhiyun addu v1, 0x00ff 718*4882a593Smuzhiyun and t0, sum, v1 719*4882a593Smuzhiyun sll t0, t0, 8 720*4882a593Smuzhiyun srl sum, sum, 8 721*4882a593Smuzhiyun and sum, sum, v1 722*4882a593Smuzhiyun or sum, sum, t0 723*4882a593Smuzhiyun1: 724*4882a593Smuzhiyun#endif 725*4882a593Smuzhiyun .set pop 726*4882a593Smuzhiyun .set reorder 727*4882a593Smuzhiyun jr ra 728*4882a593Smuzhiyun .set noreorder 729*4882a593Smuzhiyun .endm 730*4882a593Smuzhiyun 731*4882a593Smuzhiyun .set noreorder 732*4882a593Smuzhiyun.L_exc: 733*4882a593Smuzhiyun jr ra 734*4882a593Smuzhiyun li v0, 0 735*4882a593Smuzhiyun 736*4882a593SmuzhiyunFEXPORT(__csum_partial_copy_nocheck) 737*4882a593SmuzhiyunEXPORT_SYMBOL(__csum_partial_copy_nocheck) 738*4882a593Smuzhiyun#ifndef CONFIG_EVA 739*4882a593SmuzhiyunFEXPORT(__csum_partial_copy_to_user) 740*4882a593SmuzhiyunEXPORT_SYMBOL(__csum_partial_copy_to_user) 741*4882a593SmuzhiyunFEXPORT(__csum_partial_copy_from_user) 742*4882a593SmuzhiyunEXPORT_SYMBOL(__csum_partial_copy_from_user) 743*4882a593Smuzhiyun#endif 744*4882a593Smuzhiyun__BUILD_CSUM_PARTIAL_COPY_USER LEGACY_MODE USEROP USEROP 745*4882a593Smuzhiyun 746*4882a593Smuzhiyun#ifdef CONFIG_EVA 747*4882a593SmuzhiyunLEAF(__csum_partial_copy_to_user) 748*4882a593Smuzhiyun__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE KERNELOP USEROP 749*4882a593SmuzhiyunEND(__csum_partial_copy_to_user) 750*4882a593Smuzhiyun 751*4882a593SmuzhiyunLEAF(__csum_partial_copy_from_user) 752*4882a593Smuzhiyun__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE USEROP KERNELOP 753*4882a593SmuzhiyunEND(__csum_partial_copy_from_user) 754*4882a593Smuzhiyun#endif 755