1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun .file "wm_shrx.S" 3*4882a593Smuzhiyun/*---------------------------------------------------------------------------+ 4*4882a593Smuzhiyun | wm_shrx.S | 5*4882a593Smuzhiyun | | 6*4882a593Smuzhiyun | 64 bit right shift functions | 7*4882a593Smuzhiyun | | 8*4882a593Smuzhiyun | Copyright (C) 1992,1995 | 9*4882a593Smuzhiyun | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | 10*4882a593Smuzhiyun | Australia. E-mail billm@jacobi.maths.monash.edu.au | 11*4882a593Smuzhiyun | | 12*4882a593Smuzhiyun | Call from C as: | 13*4882a593Smuzhiyun | unsigned FPU_shrx(void *arg1, unsigned arg2) | 14*4882a593Smuzhiyun | and | 15*4882a593Smuzhiyun | unsigned FPU_shrxs(void *arg1, unsigned arg2) | 16*4882a593Smuzhiyun | | 17*4882a593Smuzhiyun +---------------------------------------------------------------------------*/ 18*4882a593Smuzhiyun 19*4882a593Smuzhiyun#include "fpu_emu.h" 20*4882a593Smuzhiyun 21*4882a593Smuzhiyun.text 22*4882a593Smuzhiyun/*---------------------------------------------------------------------------+ 23*4882a593Smuzhiyun | unsigned FPU_shrx(void *arg1, unsigned arg2) | 24*4882a593Smuzhiyun | | 25*4882a593Smuzhiyun | Extended shift right function. | 26*4882a593Smuzhiyun | Fastest for small shifts. | 27*4882a593Smuzhiyun | Shifts the 64 bit quantity pointed to by the first arg (arg1) | 28*4882a593Smuzhiyun | right by the number of bits specified by the second arg (arg2). | 29*4882a593Smuzhiyun | Forms a 96 bit quantity from the 64 bit arg and eax: | 30*4882a593Smuzhiyun | [ 64 bit arg ][ eax ] | 31*4882a593Smuzhiyun | shift right ---------> | 32*4882a593Smuzhiyun | The eax register is initialized to 0 before the shifting. | 33*4882a593Smuzhiyun | Results returned in the 64 bit arg and eax. | 34*4882a593Smuzhiyun +---------------------------------------------------------------------------*/ 35*4882a593Smuzhiyun 36*4882a593SmuzhiyunSYM_FUNC_START(FPU_shrx) 37*4882a593Smuzhiyun push %ebp 38*4882a593Smuzhiyun movl %esp,%ebp 39*4882a593Smuzhiyun pushl %esi 40*4882a593Smuzhiyun movl PARAM2,%ecx 41*4882a593Smuzhiyun movl PARAM1,%esi 42*4882a593Smuzhiyun cmpl $32,%ecx /* shrd only works for 0..31 bits */ 43*4882a593Smuzhiyun jnc L_more_than_31 44*4882a593Smuzhiyun 45*4882a593Smuzhiyun/* less than 32 bits */ 46*4882a593Smuzhiyun pushl %ebx 47*4882a593Smuzhiyun movl (%esi),%ebx /* lsl */ 48*4882a593Smuzhiyun movl 4(%esi),%edx /* msl */ 49*4882a593Smuzhiyun xorl %eax,%eax /* extension */ 50*4882a593Smuzhiyun shrd %cl,%ebx,%eax 51*4882a593Smuzhiyun shrd %cl,%edx,%ebx 52*4882a593Smuzhiyun shr %cl,%edx 53*4882a593Smuzhiyun movl %ebx,(%esi) 54*4882a593Smuzhiyun movl %edx,4(%esi) 55*4882a593Smuzhiyun popl %ebx 56*4882a593Smuzhiyun popl %esi 57*4882a593Smuzhiyun leave 58*4882a593Smuzhiyun RET 59*4882a593Smuzhiyun 60*4882a593SmuzhiyunL_more_than_31: 61*4882a593Smuzhiyun cmpl $64,%ecx 62*4882a593Smuzhiyun jnc L_more_than_63 63*4882a593Smuzhiyun 64*4882a593Smuzhiyun subb $32,%cl 65*4882a593Smuzhiyun movl (%esi),%eax /* lsl */ 66*4882a593Smuzhiyun movl 4(%esi),%edx /* msl */ 67*4882a593Smuzhiyun shrd %cl,%edx,%eax 68*4882a593Smuzhiyun shr %cl,%edx 69*4882a593Smuzhiyun movl %edx,(%esi) 70*4882a593Smuzhiyun movl $0,4(%esi) 71*4882a593Smuzhiyun popl %esi 72*4882a593Smuzhiyun leave 73*4882a593Smuzhiyun RET 74*4882a593Smuzhiyun 75*4882a593SmuzhiyunL_more_than_63: 76*4882a593Smuzhiyun cmpl $96,%ecx 77*4882a593Smuzhiyun jnc L_more_than_95 78*4882a593Smuzhiyun 79*4882a593Smuzhiyun subb $64,%cl 80*4882a593Smuzhiyun movl 4(%esi),%eax /* msl */ 81*4882a593Smuzhiyun shr %cl,%eax 82*4882a593Smuzhiyun xorl %edx,%edx 83*4882a593Smuzhiyun movl %edx,(%esi) 84*4882a593Smuzhiyun movl %edx,4(%esi) 85*4882a593Smuzhiyun popl %esi 86*4882a593Smuzhiyun leave 87*4882a593Smuzhiyun RET 88*4882a593Smuzhiyun 89*4882a593SmuzhiyunL_more_than_95: 90*4882a593Smuzhiyun xorl %eax,%eax 91*4882a593Smuzhiyun movl %eax,(%esi) 92*4882a593Smuzhiyun movl %eax,4(%esi) 93*4882a593Smuzhiyun popl %esi 94*4882a593Smuzhiyun leave 95*4882a593Smuzhiyun RET 96*4882a593SmuzhiyunSYM_FUNC_END(FPU_shrx) 97*4882a593Smuzhiyun 98*4882a593Smuzhiyun 99*4882a593Smuzhiyun/*---------------------------------------------------------------------------+ 100*4882a593Smuzhiyun | unsigned FPU_shrxs(void *arg1, unsigned arg2) | 101*4882a593Smuzhiyun | | 102*4882a593Smuzhiyun | Extended shift right function (optimized for small floating point | 103*4882a593Smuzhiyun | integers). | 104*4882a593Smuzhiyun | Shifts the 64 bit quantity pointed to by the first arg (arg1) | 105*4882a593Smuzhiyun | right by the number of bits specified by the second arg (arg2). | 106*4882a593Smuzhiyun | Forms a 96 bit quantity from the 64 bit arg and eax: | 107*4882a593Smuzhiyun | [ 64 bit arg ][ eax ] | 108*4882a593Smuzhiyun | shift right ---------> | 109*4882a593Smuzhiyun | The eax register is initialized to 0 before the shifting. | 110*4882a593Smuzhiyun | The lower 8 bits of eax are lost and replaced by a flag which is | 111*4882a593Smuzhiyun | set (to 0x01) if any bit, apart from the first one, is set in the | 112*4882a593Smuzhiyun | part which has been shifted out of the arg. | 113*4882a593Smuzhiyun | Results returned in the 64 bit arg and eax. | 114*4882a593Smuzhiyun +---------------------------------------------------------------------------*/ 115*4882a593SmuzhiyunSYM_FUNC_START(FPU_shrxs) 116*4882a593Smuzhiyun push %ebp 117*4882a593Smuzhiyun movl %esp,%ebp 118*4882a593Smuzhiyun pushl %esi 119*4882a593Smuzhiyun pushl %ebx 120*4882a593Smuzhiyun movl PARAM2,%ecx 121*4882a593Smuzhiyun movl PARAM1,%esi 122*4882a593Smuzhiyun cmpl $64,%ecx /* shrd only works for 0..31 bits */ 123*4882a593Smuzhiyun jnc Ls_more_than_63 124*4882a593Smuzhiyun 125*4882a593Smuzhiyun cmpl $32,%ecx /* shrd only works for 0..31 bits */ 126*4882a593Smuzhiyun jc Ls_less_than_32 127*4882a593Smuzhiyun 128*4882a593Smuzhiyun/* We got here without jumps by assuming that the most common requirement 129*4882a593Smuzhiyun is for small integers */ 130*4882a593Smuzhiyun/* Shift by [32..63] bits */ 131*4882a593Smuzhiyun subb $32,%cl 132*4882a593Smuzhiyun movl (%esi),%eax /* lsl */ 133*4882a593Smuzhiyun movl 4(%esi),%edx /* msl */ 134*4882a593Smuzhiyun xorl %ebx,%ebx 135*4882a593Smuzhiyun shrd %cl,%eax,%ebx 136*4882a593Smuzhiyun shrd %cl,%edx,%eax 137*4882a593Smuzhiyun shr %cl,%edx 138*4882a593Smuzhiyun orl %ebx,%ebx /* test these 32 bits */ 139*4882a593Smuzhiyun setne %bl 140*4882a593Smuzhiyun test $0x7fffffff,%eax /* and 31 bits here */ 141*4882a593Smuzhiyun setne %bh 142*4882a593Smuzhiyun orw %bx,%bx /* Any of the 63 bit set ? */ 143*4882a593Smuzhiyun setne %al 144*4882a593Smuzhiyun movl %edx,(%esi) 145*4882a593Smuzhiyun movl $0,4(%esi) 146*4882a593Smuzhiyun popl %ebx 147*4882a593Smuzhiyun popl %esi 148*4882a593Smuzhiyun leave 149*4882a593Smuzhiyun RET 150*4882a593Smuzhiyun 151*4882a593Smuzhiyun/* Shift by [0..31] bits */ 152*4882a593SmuzhiyunLs_less_than_32: 153*4882a593Smuzhiyun movl (%esi),%ebx /* lsl */ 154*4882a593Smuzhiyun movl 4(%esi),%edx /* msl */ 155*4882a593Smuzhiyun xorl %eax,%eax /* extension */ 156*4882a593Smuzhiyun shrd %cl,%ebx,%eax 157*4882a593Smuzhiyun shrd %cl,%edx,%ebx 158*4882a593Smuzhiyun shr %cl,%edx 159*4882a593Smuzhiyun test $0x7fffffff,%eax /* only need to look at eax here */ 160*4882a593Smuzhiyun setne %al 161*4882a593Smuzhiyun movl %ebx,(%esi) 162*4882a593Smuzhiyun movl %edx,4(%esi) 163*4882a593Smuzhiyun popl %ebx 164*4882a593Smuzhiyun popl %esi 165*4882a593Smuzhiyun leave 166*4882a593Smuzhiyun RET 167*4882a593Smuzhiyun 168*4882a593Smuzhiyun/* Shift by [64..95] bits */ 169*4882a593SmuzhiyunLs_more_than_63: 170*4882a593Smuzhiyun cmpl $96,%ecx 171*4882a593Smuzhiyun jnc Ls_more_than_95 172*4882a593Smuzhiyun 173*4882a593Smuzhiyun subb $64,%cl 174*4882a593Smuzhiyun movl (%esi),%ebx /* lsl */ 175*4882a593Smuzhiyun movl 4(%esi),%eax /* msl */ 176*4882a593Smuzhiyun xorl %edx,%edx /* extension */ 177*4882a593Smuzhiyun shrd %cl,%ebx,%edx 178*4882a593Smuzhiyun shrd %cl,%eax,%ebx 179*4882a593Smuzhiyun shr %cl,%eax 180*4882a593Smuzhiyun orl %ebx,%edx 181*4882a593Smuzhiyun setne %bl 182*4882a593Smuzhiyun test $0x7fffffff,%eax /* only need to look at eax here */ 183*4882a593Smuzhiyun setne %bh 184*4882a593Smuzhiyun orw %bx,%bx 185*4882a593Smuzhiyun setne %al 186*4882a593Smuzhiyun xorl %edx,%edx 187*4882a593Smuzhiyun movl %edx,(%esi) /* set to zero */ 188*4882a593Smuzhiyun movl %edx,4(%esi) /* set to zero */ 189*4882a593Smuzhiyun popl %ebx 190*4882a593Smuzhiyun popl %esi 191*4882a593Smuzhiyun leave 192*4882a593Smuzhiyun RET 193*4882a593Smuzhiyun 194*4882a593SmuzhiyunLs_more_than_95: 195*4882a593Smuzhiyun/* Shift by [96..inf) bits */ 196*4882a593Smuzhiyun xorl %eax,%eax 197*4882a593Smuzhiyun movl (%esi),%ebx 198*4882a593Smuzhiyun orl 4(%esi),%ebx 199*4882a593Smuzhiyun setne %al 200*4882a593Smuzhiyun xorl %ebx,%ebx 201*4882a593Smuzhiyun movl %ebx,(%esi) 202*4882a593Smuzhiyun movl %ebx,4(%esi) 203*4882a593Smuzhiyun popl %ebx 204*4882a593Smuzhiyun popl %esi 205*4882a593Smuzhiyun leave 206*4882a593Smuzhiyun RET 207*4882a593SmuzhiyunSYM_FUNC_END(FPU_shrxs) 208