1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun #include <linux/kernel.h> 3*4882a593Smuzhiyun #include <linux/sched.h> 4*4882a593Smuzhiyun #include <linux/types.h> 5*4882a593Smuzhiyun #include <asm/byteorder.h> 6*4882a593Smuzhiyun 7*4882a593Smuzhiyun #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 8*4882a593Smuzhiyun __asm__ ("addcc %r4,%5,%1\n\t" \ 9*4882a593Smuzhiyun "addx %r2,%3,%0\n" \ 10*4882a593Smuzhiyun : "=r" (sh), \ 11*4882a593Smuzhiyun "=&r" (sl) \ 12*4882a593Smuzhiyun : "%rJ" ((USItype)(ah)), \ 13*4882a593Smuzhiyun "rI" ((USItype)(bh)), \ 14*4882a593Smuzhiyun "%rJ" ((USItype)(al)), \ 15*4882a593Smuzhiyun "rI" ((USItype)(bl)) \ 16*4882a593Smuzhiyun : "cc") 17*4882a593Smuzhiyun #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 18*4882a593Smuzhiyun __asm__ ("subcc %r4,%5,%1\n\t" \ 19*4882a593Smuzhiyun "subx %r2,%3,%0\n" \ 20*4882a593Smuzhiyun : "=r" (sh), \ 21*4882a593Smuzhiyun "=&r" (sl) \ 22*4882a593Smuzhiyun : "rJ" ((USItype)(ah)), \ 23*4882a593Smuzhiyun "rI" ((USItype)(bh)), \ 24*4882a593Smuzhiyun "rJ" ((USItype)(al)), \ 25*4882a593Smuzhiyun "rI" ((USItype)(bl)) \ 26*4882a593Smuzhiyun : "cc") 27*4882a593Smuzhiyun 28*4882a593Smuzhiyun #define umul_ppmm(w1, w0, u, v) \ 29*4882a593Smuzhiyun __asm__ ("! Inlined umul_ppmm\n\t" \ 30*4882a593Smuzhiyun "wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n\t" \ 31*4882a593Smuzhiyun "sra %3,31,%%g2 ! Don't move this insn\n\t" \ 32*4882a593Smuzhiyun "and %2,%%g2,%%g2 ! Don't move this insn\n\t" \ 33*4882a593Smuzhiyun "andcc %%g0,0,%%g1 ! Don't move this insn\n\t" \ 34*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 35*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 36*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 37*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 38*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 39*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 40*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 41*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 42*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 43*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 44*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 45*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 46*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 47*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 48*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 49*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 50*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 51*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 52*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 53*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 54*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 55*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 56*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 57*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 58*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 59*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 60*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 61*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 62*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 63*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 64*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 65*4882a593Smuzhiyun "mulscc %%g1,%3,%%g1\n\t" \ 66*4882a593Smuzhiyun "mulscc %%g1,0,%%g1\n\t" \ 67*4882a593Smuzhiyun "add %%g1,%%g2,%0\n\t" \ 68*4882a593Smuzhiyun "rd %%y,%1\n" \ 69*4882a593Smuzhiyun : "=r" (w1), \ 70*4882a593Smuzhiyun "=r" (w0) \ 71*4882a593Smuzhiyun : "%rI" ((USItype)(u)), \ 72*4882a593Smuzhiyun "r" ((USItype)(v)) \ 73*4882a593Smuzhiyun : "%g1", "%g2", "cc") 74*4882a593Smuzhiyun 75*4882a593Smuzhiyun /* It's quite necessary to add this much assembler for the sparc. 76*4882a593Smuzhiyun The default udiv_qrnnd (in C) is more than 10 times slower! */ 77*4882a593Smuzhiyun #define udiv_qrnnd(q, r, n1, n0, d) \ 78*4882a593Smuzhiyun __asm__ ("! Inlined udiv_qrnnd\n\t" \ 79*4882a593Smuzhiyun "mov 32,%%g1\n\t" \ 80*4882a593Smuzhiyun "subcc %1,%2,%%g0\n\t" \ 81*4882a593Smuzhiyun "1: bcs 5f\n\t" \ 82*4882a593Smuzhiyun "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \ 83*4882a593Smuzhiyun "sub %1,%2,%1 ! this kills msb of n\n\t" \ 84*4882a593Smuzhiyun "addx %1,%1,%1 ! so this can't give carry\n\t" \ 85*4882a593Smuzhiyun "subcc %%g1,1,%%g1\n\t" \ 86*4882a593Smuzhiyun "2: bne 1b\n\t" \ 87*4882a593Smuzhiyun "subcc %1,%2,%%g0\n\t" \ 88*4882a593Smuzhiyun "bcs 3f\n\t" \ 89*4882a593Smuzhiyun "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \ 90*4882a593Smuzhiyun "b 3f\n\t" \ 91*4882a593Smuzhiyun "sub %1,%2,%1 ! this kills msb of n\n\t" \ 92*4882a593Smuzhiyun "4: sub %1,%2,%1\n\t" \ 93*4882a593Smuzhiyun "5: addxcc %1,%1,%1\n\t" \ 94*4882a593Smuzhiyun "bcc 2b\n\t" \ 95*4882a593Smuzhiyun "subcc %%g1,1,%%g1\n\t" \ 96*4882a593Smuzhiyun "! Got carry from n. Subtract next step to cancel this carry.\n\t" \ 97*4882a593Smuzhiyun "bne 4b\n\t" \ 98*4882a593Smuzhiyun "addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n\t" \ 99*4882a593Smuzhiyun "sub %1,%2,%1\n\t" \ 100*4882a593Smuzhiyun "3: xnor %0,0,%0\n\t" \ 101*4882a593Smuzhiyun "! End of inline udiv_qrnnd\n" \ 102*4882a593Smuzhiyun : "=&r" (q), \ 103*4882a593Smuzhiyun "=&r" (r) \ 104*4882a593Smuzhiyun : "r" ((USItype)(d)), \ 105*4882a593Smuzhiyun "1" ((USItype)(n1)), \ 106*4882a593Smuzhiyun "0" ((USItype)(n0)) : "%g1", "cc") 107*4882a593Smuzhiyun #define UDIV_NEEDS_NORMALIZATION 0 108*4882a593Smuzhiyun 109*4882a593Smuzhiyun #define abort() \ 110*4882a593Smuzhiyun return 0 111*4882a593Smuzhiyun 112*4882a593Smuzhiyun #ifdef __BIG_ENDIAN 113*4882a593Smuzhiyun #define __BYTE_ORDER __BIG_ENDIAN 114*4882a593Smuzhiyun #else 115*4882a593Smuzhiyun #define __BYTE_ORDER __LITTLE_ENDIAN 116*4882a593Smuzhiyun #endif 117