1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun .file "div_Xsig.S" 3*4882a593Smuzhiyun/*---------------------------------------------------------------------------+ 4*4882a593Smuzhiyun | div_Xsig.S | 5*4882a593Smuzhiyun | | 6*4882a593Smuzhiyun | Division subroutine for 96 bit quantities | 7*4882a593Smuzhiyun | | 8*4882a593Smuzhiyun | Copyright (C) 1994,1995 | 9*4882a593Smuzhiyun | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | 10*4882a593Smuzhiyun | Australia. E-mail billm@jacobi.maths.monash.edu.au | 11*4882a593Smuzhiyun | | 12*4882a593Smuzhiyun | | 13*4882a593Smuzhiyun +---------------------------------------------------------------------------*/ 14*4882a593Smuzhiyun 15*4882a593Smuzhiyun/*---------------------------------------------------------------------------+ 16*4882a593Smuzhiyun | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and | 17*4882a593Smuzhiyun | put the 96 bit result at the location d. | 18*4882a593Smuzhiyun | | 19*4882a593Smuzhiyun | The result may not be accurate to 96 bits. It is intended for use where | 20*4882a593Smuzhiyun | a result better than 64 bits is required. The result should usually be | 21*4882a593Smuzhiyun | good to at least 94 bits. | 22*4882a593Smuzhiyun | The returned result is actually divided by one half. This is done to | 23*4882a593Smuzhiyun | prevent overflow. | 24*4882a593Smuzhiyun | | 25*4882a593Smuzhiyun | .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb -> .dddddddddddd | 26*4882a593Smuzhiyun | | 27*4882a593Smuzhiyun | void div_Xsig(Xsig *a, Xsig *b, Xsig *dest) | 28*4882a593Smuzhiyun | | 29*4882a593Smuzhiyun +---------------------------------------------------------------------------*/ 30*4882a593Smuzhiyun 31*4882a593Smuzhiyun#include "exception.h" 32*4882a593Smuzhiyun#include "fpu_emu.h" 33*4882a593Smuzhiyun 34*4882a593Smuzhiyun 35*4882a593Smuzhiyun#define XsigLL(x) (x) 36*4882a593Smuzhiyun#define XsigL(x) 4(x) 37*4882a593Smuzhiyun#define XsigH(x) 8(x) 38*4882a593Smuzhiyun 39*4882a593Smuzhiyun 40*4882a593Smuzhiyun#ifndef NON_REENTRANT_FPU 41*4882a593Smuzhiyun/* 42*4882a593Smuzhiyun Local storage on the stack: 43*4882a593Smuzhiyun Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 44*4882a593Smuzhiyun */ 45*4882a593Smuzhiyun#define FPU_accum_3 -4(%ebp) 46*4882a593Smuzhiyun#define FPU_accum_2 -8(%ebp) 47*4882a593Smuzhiyun#define FPU_accum_1 -12(%ebp) 48*4882a593Smuzhiyun#define FPU_accum_0 -16(%ebp) 49*4882a593Smuzhiyun#define FPU_result_3 -20(%ebp) 50*4882a593Smuzhiyun#define FPU_result_2 -24(%ebp) 51*4882a593Smuzhiyun#define FPU_result_1 -28(%ebp) 52*4882a593Smuzhiyun 53*4882a593Smuzhiyun#else 54*4882a593Smuzhiyun.data 55*4882a593Smuzhiyun/* 56*4882a593Smuzhiyun Local storage in a static area: 57*4882a593Smuzhiyun Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 58*4882a593Smuzhiyun */ 59*4882a593Smuzhiyun .align 4,0 60*4882a593SmuzhiyunFPU_accum_3: 61*4882a593Smuzhiyun .long 0 62*4882a593SmuzhiyunFPU_accum_2: 63*4882a593Smuzhiyun .long 0 64*4882a593SmuzhiyunFPU_accum_1: 65*4882a593Smuzhiyun .long 0 66*4882a593SmuzhiyunFPU_accum_0: 67*4882a593Smuzhiyun .long 0 68*4882a593SmuzhiyunFPU_result_3: 69*4882a593Smuzhiyun .long 0 70*4882a593SmuzhiyunFPU_result_2: 71*4882a593Smuzhiyun .long 0 72*4882a593SmuzhiyunFPU_result_1: 73*4882a593Smuzhiyun .long 0 74*4882a593Smuzhiyun#endif /* NON_REENTRANT_FPU */ 75*4882a593Smuzhiyun 76*4882a593Smuzhiyun 77*4882a593Smuzhiyun.text 78*4882a593SmuzhiyunSYM_FUNC_START(div_Xsig) 79*4882a593Smuzhiyun pushl %ebp 80*4882a593Smuzhiyun movl %esp,%ebp 81*4882a593Smuzhiyun#ifndef NON_REENTRANT_FPU 82*4882a593Smuzhiyun subl $28,%esp 83*4882a593Smuzhiyun#endif /* NON_REENTRANT_FPU */ 84*4882a593Smuzhiyun 85*4882a593Smuzhiyun pushl %esi 86*4882a593Smuzhiyun pushl %edi 87*4882a593Smuzhiyun pushl %ebx 88*4882a593Smuzhiyun 89*4882a593Smuzhiyun movl PARAM1,%esi /* pointer to num */ 90*4882a593Smuzhiyun movl PARAM2,%ebx /* pointer to denom */ 91*4882a593Smuzhiyun 92*4882a593Smuzhiyun#ifdef PARANOID 93*4882a593Smuzhiyun testl $0x80000000, XsigH(%ebx) /* Divisor */ 94*4882a593Smuzhiyun je L_bugged 95*4882a593Smuzhiyun#endif /* PARANOID */ 96*4882a593Smuzhiyun 97*4882a593Smuzhiyun 98*4882a593Smuzhiyun/*---------------------------------------------------------------------------+ 99*4882a593Smuzhiyun | Divide: Return arg1/arg2 to arg3. | 100*4882a593Smuzhiyun | | 101*4882a593Smuzhiyun | The maximum returned value is (ignoring exponents) | 102*4882a593Smuzhiyun | .ffffffff ffffffff | 103*4882a593Smuzhiyun | ------------------ = 1.ffffffff fffffffe | 104*4882a593Smuzhiyun | .80000000 00000000 | 105*4882a593Smuzhiyun | and the minimum is | 106*4882a593Smuzhiyun | .80000000 00000000 | 107*4882a593Smuzhiyun | ------------------ = .80000000 00000001 (rounded) | 108*4882a593Smuzhiyun | .ffffffff ffffffff | 109*4882a593Smuzhiyun | | 110*4882a593Smuzhiyun +---------------------------------------------------------------------------*/ 111*4882a593Smuzhiyun 112*4882a593Smuzhiyun /* Save extended dividend in local register */ 113*4882a593Smuzhiyun 114*4882a593Smuzhiyun /* Divide by 2 to prevent overflow */ 115*4882a593Smuzhiyun clc 116*4882a593Smuzhiyun movl XsigH(%esi),%eax 117*4882a593Smuzhiyun rcrl %eax 118*4882a593Smuzhiyun movl %eax,FPU_accum_3 119*4882a593Smuzhiyun movl XsigL(%esi),%eax 120*4882a593Smuzhiyun rcrl %eax 121*4882a593Smuzhiyun movl %eax,FPU_accum_2 122*4882a593Smuzhiyun movl XsigLL(%esi),%eax 123*4882a593Smuzhiyun rcrl %eax 124*4882a593Smuzhiyun movl %eax,FPU_accum_1 125*4882a593Smuzhiyun movl $0,%eax 126*4882a593Smuzhiyun rcrl %eax 127*4882a593Smuzhiyun movl %eax,FPU_accum_0 128*4882a593Smuzhiyun 129*4882a593Smuzhiyun movl FPU_accum_2,%eax /* Get the current num */ 130*4882a593Smuzhiyun movl FPU_accum_3,%edx 131*4882a593Smuzhiyun 132*4882a593Smuzhiyun/*----------------------------------------------------------------------*/ 133*4882a593Smuzhiyun/* Initialization done. 134*4882a593Smuzhiyun Do the first 32 bits. */ 135*4882a593Smuzhiyun 136*4882a593Smuzhiyun /* We will divide by a number which is too large */ 137*4882a593Smuzhiyun movl XsigH(%ebx),%ecx 138*4882a593Smuzhiyun addl $1,%ecx 139*4882a593Smuzhiyun jnc LFirst_div_not_1 140*4882a593Smuzhiyun 141*4882a593Smuzhiyun /* here we need to divide by 100000000h, 142*4882a593Smuzhiyun i.e., no division at all.. */ 143*4882a593Smuzhiyun mov %edx,%eax 144*4882a593Smuzhiyun jmp LFirst_div_done 145*4882a593Smuzhiyun 146*4882a593SmuzhiyunLFirst_div_not_1: 147*4882a593Smuzhiyun divl %ecx /* Divide the numerator by the augmented 148*4882a593Smuzhiyun denom ms dw */ 149*4882a593Smuzhiyun 150*4882a593SmuzhiyunLFirst_div_done: 151*4882a593Smuzhiyun movl %eax,FPU_result_3 /* Put the result in the answer */ 152*4882a593Smuzhiyun 153*4882a593Smuzhiyun mull XsigH(%ebx) /* mul by the ms dw of the denom */ 154*4882a593Smuzhiyun 155*4882a593Smuzhiyun subl %eax,FPU_accum_2 /* Subtract from the num local reg */ 156*4882a593Smuzhiyun sbbl %edx,FPU_accum_3 157*4882a593Smuzhiyun 158*4882a593Smuzhiyun movl FPU_result_3,%eax /* Get the result back */ 159*4882a593Smuzhiyun mull XsigL(%ebx) /* now mul the ls dw of the denom */ 160*4882a593Smuzhiyun 161*4882a593Smuzhiyun subl %eax,FPU_accum_1 /* Subtract from the num local reg */ 162*4882a593Smuzhiyun sbbl %edx,FPU_accum_2 163*4882a593Smuzhiyun sbbl $0,FPU_accum_3 164*4882a593Smuzhiyun je LDo_2nd_32_bits /* Must check for non-zero result here */ 165*4882a593Smuzhiyun 166*4882a593Smuzhiyun#ifdef PARANOID 167*4882a593Smuzhiyun jb L_bugged_1 168*4882a593Smuzhiyun#endif /* PARANOID */ 169*4882a593Smuzhiyun 170*4882a593Smuzhiyun /* need to subtract another once of the denom */ 171*4882a593Smuzhiyun incl FPU_result_3 /* Correct the answer */ 172*4882a593Smuzhiyun 173*4882a593Smuzhiyun movl XsigL(%ebx),%eax 174*4882a593Smuzhiyun movl XsigH(%ebx),%edx 175*4882a593Smuzhiyun subl %eax,FPU_accum_1 /* Subtract from the num local reg */ 176*4882a593Smuzhiyun sbbl %edx,FPU_accum_2 177*4882a593Smuzhiyun 178*4882a593Smuzhiyun#ifdef PARANOID 179*4882a593Smuzhiyun sbbl $0,FPU_accum_3 180*4882a593Smuzhiyun jne L_bugged_1 /* Must check for non-zero result here */ 181*4882a593Smuzhiyun#endif /* PARANOID */ 182*4882a593Smuzhiyun 183*4882a593Smuzhiyun/*----------------------------------------------------------------------*/ 184*4882a593Smuzhiyun/* Half of the main problem is done, there is just a reduced numerator 185*4882a593Smuzhiyun to handle now. 186*4882a593Smuzhiyun Work with the second 32 bits, FPU_accum_0 not used from now on */ 187*4882a593SmuzhiyunLDo_2nd_32_bits: 188*4882a593Smuzhiyun movl FPU_accum_2,%edx /* get the reduced num */ 189*4882a593Smuzhiyun movl FPU_accum_1,%eax 190*4882a593Smuzhiyun 191*4882a593Smuzhiyun /* need to check for possible subsequent overflow */ 192*4882a593Smuzhiyun cmpl XsigH(%ebx),%edx 193*4882a593Smuzhiyun jb LDo_2nd_div 194*4882a593Smuzhiyun ja LPrevent_2nd_overflow 195*4882a593Smuzhiyun 196*4882a593Smuzhiyun cmpl XsigL(%ebx),%eax 197*4882a593Smuzhiyun jb LDo_2nd_div 198*4882a593Smuzhiyun 199*4882a593SmuzhiyunLPrevent_2nd_overflow: 200*4882a593Smuzhiyun/* The numerator is greater or equal, would cause overflow */ 201*4882a593Smuzhiyun /* prevent overflow */ 202*4882a593Smuzhiyun subl XsigL(%ebx),%eax 203*4882a593Smuzhiyun sbbl XsigH(%ebx),%edx 204*4882a593Smuzhiyun movl %edx,FPU_accum_2 205*4882a593Smuzhiyun movl %eax,FPU_accum_1 206*4882a593Smuzhiyun 207*4882a593Smuzhiyun incl FPU_result_3 /* Reflect the subtraction in the answer */ 208*4882a593Smuzhiyun 209*4882a593Smuzhiyun#ifdef PARANOID 210*4882a593Smuzhiyun je L_bugged_2 /* Can't bump the result to 1.0 */ 211*4882a593Smuzhiyun#endif /* PARANOID */ 212*4882a593Smuzhiyun 213*4882a593SmuzhiyunLDo_2nd_div: 214*4882a593Smuzhiyun cmpl $0,%ecx /* augmented denom msw */ 215*4882a593Smuzhiyun jnz LSecond_div_not_1 216*4882a593Smuzhiyun 217*4882a593Smuzhiyun /* %ecx == 0, we are dividing by 1.0 */ 218*4882a593Smuzhiyun mov %edx,%eax 219*4882a593Smuzhiyun jmp LSecond_div_done 220*4882a593Smuzhiyun 221*4882a593SmuzhiyunLSecond_div_not_1: 222*4882a593Smuzhiyun divl %ecx /* Divide the numerator by the denom ms dw */ 223*4882a593Smuzhiyun 224*4882a593SmuzhiyunLSecond_div_done: 225*4882a593Smuzhiyun movl %eax,FPU_result_2 /* Put the result in the answer */ 226*4882a593Smuzhiyun 227*4882a593Smuzhiyun mull XsigH(%ebx) /* mul by the ms dw of the denom */ 228*4882a593Smuzhiyun 229*4882a593Smuzhiyun subl %eax,FPU_accum_1 /* Subtract from the num local reg */ 230*4882a593Smuzhiyun sbbl %edx,FPU_accum_2 231*4882a593Smuzhiyun 232*4882a593Smuzhiyun#ifdef PARANOID 233*4882a593Smuzhiyun jc L_bugged_2 234*4882a593Smuzhiyun#endif /* PARANOID */ 235*4882a593Smuzhiyun 236*4882a593Smuzhiyun movl FPU_result_2,%eax /* Get the result back */ 237*4882a593Smuzhiyun mull XsigL(%ebx) /* now mul the ls dw of the denom */ 238*4882a593Smuzhiyun 239*4882a593Smuzhiyun subl %eax,FPU_accum_0 /* Subtract from the num local reg */ 240*4882a593Smuzhiyun sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */ 241*4882a593Smuzhiyun sbbl $0,FPU_accum_2 242*4882a593Smuzhiyun 243*4882a593Smuzhiyun#ifdef PARANOID 244*4882a593Smuzhiyun jc L_bugged_2 245*4882a593Smuzhiyun#endif /* PARANOID */ 246*4882a593Smuzhiyun 247*4882a593Smuzhiyun jz LDo_3rd_32_bits 248*4882a593Smuzhiyun 249*4882a593Smuzhiyun#ifdef PARANOID 250*4882a593Smuzhiyun cmpl $1,FPU_accum_2 251*4882a593Smuzhiyun jne L_bugged_2 252*4882a593Smuzhiyun#endif /* PARANOID */ 253*4882a593Smuzhiyun 254*4882a593Smuzhiyun /* need to subtract another once of the denom */ 255*4882a593Smuzhiyun movl XsigL(%ebx),%eax 256*4882a593Smuzhiyun movl XsigH(%ebx),%edx 257*4882a593Smuzhiyun subl %eax,FPU_accum_0 /* Subtract from the num local reg */ 258*4882a593Smuzhiyun sbbl %edx,FPU_accum_1 259*4882a593Smuzhiyun sbbl $0,FPU_accum_2 260*4882a593Smuzhiyun 261*4882a593Smuzhiyun#ifdef PARANOID 262*4882a593Smuzhiyun jc L_bugged_2 263*4882a593Smuzhiyun jne L_bugged_2 264*4882a593Smuzhiyun#endif /* PARANOID */ 265*4882a593Smuzhiyun 266*4882a593Smuzhiyun addl $1,FPU_result_2 /* Correct the answer */ 267*4882a593Smuzhiyun adcl $0,FPU_result_3 268*4882a593Smuzhiyun 269*4882a593Smuzhiyun#ifdef PARANOID 270*4882a593Smuzhiyun jc L_bugged_2 /* Must check for non-zero result here */ 271*4882a593Smuzhiyun#endif /* PARANOID */ 272*4882a593Smuzhiyun 273*4882a593Smuzhiyun/*----------------------------------------------------------------------*/ 274*4882a593Smuzhiyun/* The division is essentially finished here, we just need to perform 275*4882a593Smuzhiyun tidying operations. 276*4882a593Smuzhiyun Deal with the 3rd 32 bits */ 277*4882a593SmuzhiyunLDo_3rd_32_bits: 278*4882a593Smuzhiyun /* We use an approximation for the third 32 bits. 279*4882a593Smuzhiyun To take account of the 3rd 32 bits of the divisor 280*4882a593Smuzhiyun (call them del), we subtract del * (a/b) */ 281*4882a593Smuzhiyun 282*4882a593Smuzhiyun movl FPU_result_3,%eax /* a/b */ 283*4882a593Smuzhiyun mull XsigLL(%ebx) /* del */ 284*4882a593Smuzhiyun 285*4882a593Smuzhiyun subl %edx,FPU_accum_1 286*4882a593Smuzhiyun 287*4882a593Smuzhiyun /* A borrow indicates that the result is negative */ 288*4882a593Smuzhiyun jnb LTest_over 289*4882a593Smuzhiyun 290*4882a593Smuzhiyun movl XsigH(%ebx),%edx 291*4882a593Smuzhiyun addl %edx,FPU_accum_1 292*4882a593Smuzhiyun 293*4882a593Smuzhiyun subl $1,FPU_result_2 /* Adjust the answer */ 294*4882a593Smuzhiyun sbbl $0,FPU_result_3 295*4882a593Smuzhiyun 296*4882a593Smuzhiyun /* The above addition might not have been enough, check again. */ 297*4882a593Smuzhiyun movl FPU_accum_1,%edx /* get the reduced num */ 298*4882a593Smuzhiyun cmpl XsigH(%ebx),%edx /* denom */ 299*4882a593Smuzhiyun jb LDo_3rd_div 300*4882a593Smuzhiyun 301*4882a593Smuzhiyun movl XsigH(%ebx),%edx 302*4882a593Smuzhiyun addl %edx,FPU_accum_1 303*4882a593Smuzhiyun 304*4882a593Smuzhiyun subl $1,FPU_result_2 /* Adjust the answer */ 305*4882a593Smuzhiyun sbbl $0,FPU_result_3 306*4882a593Smuzhiyun jmp LDo_3rd_div 307*4882a593Smuzhiyun 308*4882a593SmuzhiyunLTest_over: 309*4882a593Smuzhiyun movl FPU_accum_1,%edx /* get the reduced num */ 310*4882a593Smuzhiyun 311*4882a593Smuzhiyun /* need to check for possible subsequent overflow */ 312*4882a593Smuzhiyun cmpl XsigH(%ebx),%edx /* denom */ 313*4882a593Smuzhiyun jb LDo_3rd_div 314*4882a593Smuzhiyun 315*4882a593Smuzhiyun /* prevent overflow */ 316*4882a593Smuzhiyun subl XsigH(%ebx),%edx 317*4882a593Smuzhiyun movl %edx,FPU_accum_1 318*4882a593Smuzhiyun 319*4882a593Smuzhiyun addl $1,FPU_result_2 /* Reflect the subtraction in the answer */ 320*4882a593Smuzhiyun adcl $0,FPU_result_3 321*4882a593Smuzhiyun 322*4882a593SmuzhiyunLDo_3rd_div: 323*4882a593Smuzhiyun movl FPU_accum_0,%eax 324*4882a593Smuzhiyun movl FPU_accum_1,%edx 325*4882a593Smuzhiyun divl XsigH(%ebx) 326*4882a593Smuzhiyun 327*4882a593Smuzhiyun movl %eax,FPU_result_1 /* Rough estimate of third word */ 328*4882a593Smuzhiyun 329*4882a593Smuzhiyun movl PARAM3,%esi /* pointer to answer */ 330*4882a593Smuzhiyun 331*4882a593Smuzhiyun movl FPU_result_1,%eax 332*4882a593Smuzhiyun movl %eax,XsigLL(%esi) 333*4882a593Smuzhiyun movl FPU_result_2,%eax 334*4882a593Smuzhiyun movl %eax,XsigL(%esi) 335*4882a593Smuzhiyun movl FPU_result_3,%eax 336*4882a593Smuzhiyun movl %eax,XsigH(%esi) 337*4882a593Smuzhiyun 338*4882a593SmuzhiyunL_exit: 339*4882a593Smuzhiyun popl %ebx 340*4882a593Smuzhiyun popl %edi 341*4882a593Smuzhiyun popl %esi 342*4882a593Smuzhiyun 343*4882a593Smuzhiyun leave 344*4882a593Smuzhiyun RET 345*4882a593Smuzhiyun 346*4882a593Smuzhiyun 347*4882a593Smuzhiyun#ifdef PARANOID 348*4882a593Smuzhiyun/* The logic is wrong if we got here */ 349*4882a593SmuzhiyunL_bugged: 350*4882a593Smuzhiyun pushl EX_INTERNAL|0x240 351*4882a593Smuzhiyun call EXCEPTION 352*4882a593Smuzhiyun pop %ebx 353*4882a593Smuzhiyun jmp L_exit 354*4882a593Smuzhiyun 355*4882a593SmuzhiyunL_bugged_1: 356*4882a593Smuzhiyun pushl EX_INTERNAL|0x241 357*4882a593Smuzhiyun call EXCEPTION 358*4882a593Smuzhiyun pop %ebx 359*4882a593Smuzhiyun jmp L_exit 360*4882a593Smuzhiyun 361*4882a593SmuzhiyunL_bugged_2: 362*4882a593Smuzhiyun pushl EX_INTERNAL|0x242 363*4882a593Smuzhiyun call EXCEPTION 364*4882a593Smuzhiyun pop %ebx 365*4882a593Smuzhiyun jmp L_exit 366*4882a593Smuzhiyun#endif /* PARANOID */ 367*4882a593SmuzhiyunSYM_FUNC_END(div_Xsig) 368