1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun .file "reg_u_mul.S" 3*4882a593Smuzhiyun/*---------------------------------------------------------------------------+ 4*4882a593Smuzhiyun | reg_u_mul.S | 5*4882a593Smuzhiyun | | 6*4882a593Smuzhiyun | Core multiplication routine | 7*4882a593Smuzhiyun | | 8*4882a593Smuzhiyun | Copyright (C) 1992,1993,1995,1997 | 9*4882a593Smuzhiyun | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | 10*4882a593Smuzhiyun | E-mail billm@suburbia.net | 11*4882a593Smuzhiyun | | 12*4882a593Smuzhiyun | | 13*4882a593Smuzhiyun +---------------------------------------------------------------------------*/ 14*4882a593Smuzhiyun 15*4882a593Smuzhiyun/*---------------------------------------------------------------------------+ 16*4882a593Smuzhiyun | Basic multiplication routine. | 17*4882a593Smuzhiyun | Does not check the resulting exponent for overflow/underflow | 18*4882a593Smuzhiyun | | 19*4882a593Smuzhiyun | FPU_u_mul(FPU_REG *a, FPU_REG *b, FPU_REG *c, unsigned int cw); | 20*4882a593Smuzhiyun | | 21*4882a593Smuzhiyun | Internal working is at approx 128 bits. | 22*4882a593Smuzhiyun | Result is rounded to nearest 53 or 64 bits, using "nearest or even". | 23*4882a593Smuzhiyun +---------------------------------------------------------------------------*/ 24*4882a593Smuzhiyun 25*4882a593Smuzhiyun#include "exception.h" 26*4882a593Smuzhiyun#include "fpu_emu.h" 27*4882a593Smuzhiyun#include "control_w.h" 28*4882a593Smuzhiyun 29*4882a593Smuzhiyun 30*4882a593Smuzhiyun 31*4882a593Smuzhiyun#ifndef NON_REENTRANT_FPU 32*4882a593Smuzhiyun/* Local storage on the stack: */ 33*4882a593Smuzhiyun#define FPU_accum_0 -4(%ebp) /* ms word */ 34*4882a593Smuzhiyun#define FPU_accum_1 -8(%ebp) 35*4882a593Smuzhiyun 36*4882a593Smuzhiyun#else 37*4882a593Smuzhiyun/* Local storage in a static area: */ 38*4882a593Smuzhiyun.data 39*4882a593Smuzhiyun .align 4,0 40*4882a593SmuzhiyunFPU_accum_0: 41*4882a593Smuzhiyun .long 0 42*4882a593SmuzhiyunFPU_accum_1: 43*4882a593Smuzhiyun .long 0 44*4882a593Smuzhiyun#endif /* NON_REENTRANT_FPU */ 45*4882a593Smuzhiyun 46*4882a593Smuzhiyun 47*4882a593Smuzhiyun.text 48*4882a593SmuzhiyunSYM_FUNC_START(FPU_u_mul) 49*4882a593Smuzhiyun pushl %ebp 50*4882a593Smuzhiyun movl %esp,%ebp 51*4882a593Smuzhiyun#ifndef NON_REENTRANT_FPU 52*4882a593Smuzhiyun subl $8,%esp 53*4882a593Smuzhiyun#endif /* NON_REENTRANT_FPU */ 54*4882a593Smuzhiyun 55*4882a593Smuzhiyun pushl %esi 56*4882a593Smuzhiyun pushl %edi 57*4882a593Smuzhiyun pushl %ebx 58*4882a593Smuzhiyun 59*4882a593Smuzhiyun movl PARAM1,%esi 60*4882a593Smuzhiyun movl PARAM2,%edi 61*4882a593Smuzhiyun 62*4882a593Smuzhiyun#ifdef PARANOID 63*4882a593Smuzhiyun testl $0x80000000,SIGH(%esi) 64*4882a593Smuzhiyun jz L_bugged 65*4882a593Smuzhiyun testl $0x80000000,SIGH(%edi) 66*4882a593Smuzhiyun jz L_bugged 67*4882a593Smuzhiyun#endif /* PARANOID */ 68*4882a593Smuzhiyun 69*4882a593Smuzhiyun xorl %ecx,%ecx 70*4882a593Smuzhiyun xorl %ebx,%ebx 71*4882a593Smuzhiyun 72*4882a593Smuzhiyun movl SIGL(%esi),%eax 73*4882a593Smuzhiyun mull SIGL(%edi) 74*4882a593Smuzhiyun movl %eax,FPU_accum_0 75*4882a593Smuzhiyun movl %edx,FPU_accum_1 76*4882a593Smuzhiyun 77*4882a593Smuzhiyun movl SIGL(%esi),%eax 78*4882a593Smuzhiyun mull SIGH(%edi) 79*4882a593Smuzhiyun addl %eax,FPU_accum_1 80*4882a593Smuzhiyun adcl %edx,%ebx 81*4882a593Smuzhiyun/* adcl $0,%ecx // overflow here is not possible */ 82*4882a593Smuzhiyun 83*4882a593Smuzhiyun movl SIGH(%esi),%eax 84*4882a593Smuzhiyun mull SIGL(%edi) 85*4882a593Smuzhiyun addl %eax,FPU_accum_1 86*4882a593Smuzhiyun adcl %edx,%ebx 87*4882a593Smuzhiyun adcl $0,%ecx 88*4882a593Smuzhiyun 89*4882a593Smuzhiyun movl SIGH(%esi),%eax 90*4882a593Smuzhiyun mull SIGH(%edi) 91*4882a593Smuzhiyun addl %eax,%ebx 92*4882a593Smuzhiyun adcl %edx,%ecx 93*4882a593Smuzhiyun 94*4882a593Smuzhiyun /* Get the sum of the exponents. */ 95*4882a593Smuzhiyun movl PARAM6,%eax 96*4882a593Smuzhiyun subl EXP_BIAS-1,%eax 97*4882a593Smuzhiyun 98*4882a593Smuzhiyun /* Two denormals can cause an exponent underflow */ 99*4882a593Smuzhiyun cmpl EXP_WAY_UNDER,%eax 100*4882a593Smuzhiyun jg Exp_not_underflow 101*4882a593Smuzhiyun 102*4882a593Smuzhiyun /* Set to a really low value allow correct handling */ 103*4882a593Smuzhiyun movl EXP_WAY_UNDER,%eax 104*4882a593Smuzhiyun 105*4882a593SmuzhiyunExp_not_underflow: 106*4882a593Smuzhiyun 107*4882a593Smuzhiyun/* Have now finished with the sources */ 108*4882a593Smuzhiyun movl PARAM3,%edi /* Point to the destination */ 109*4882a593Smuzhiyun movw %ax,EXP(%edi) 110*4882a593Smuzhiyun 111*4882a593Smuzhiyun/* Now make sure that the result is normalized */ 112*4882a593Smuzhiyun testl $0x80000000,%ecx 113*4882a593Smuzhiyun jnz LResult_Normalised 114*4882a593Smuzhiyun 115*4882a593Smuzhiyun /* Normalize by shifting left one bit */ 116*4882a593Smuzhiyun shll $1,FPU_accum_0 117*4882a593Smuzhiyun rcll $1,FPU_accum_1 118*4882a593Smuzhiyun rcll $1,%ebx 119*4882a593Smuzhiyun rcll $1,%ecx 120*4882a593Smuzhiyun decw EXP(%edi) 121*4882a593Smuzhiyun 122*4882a593SmuzhiyunLResult_Normalised: 123*4882a593Smuzhiyun movl FPU_accum_0,%eax 124*4882a593Smuzhiyun movl FPU_accum_1,%edx 125*4882a593Smuzhiyun orl %eax,%eax 126*4882a593Smuzhiyun jz L_extent_zero 127*4882a593Smuzhiyun 128*4882a593Smuzhiyun orl $1,%edx 129*4882a593Smuzhiyun 130*4882a593SmuzhiyunL_extent_zero: 131*4882a593Smuzhiyun movl %ecx,%eax 132*4882a593Smuzhiyun jmp fpu_reg_round 133*4882a593Smuzhiyun 134*4882a593Smuzhiyun 135*4882a593Smuzhiyun#ifdef PARANOID 136*4882a593SmuzhiyunL_bugged: 137*4882a593Smuzhiyun pushl EX_INTERNAL|0x205 138*4882a593Smuzhiyun call EXCEPTION 139*4882a593Smuzhiyun pop %ebx 140*4882a593Smuzhiyun jmp L_exit 141*4882a593Smuzhiyun 142*4882a593SmuzhiyunL_exit: 143*4882a593Smuzhiyun popl %ebx 144*4882a593Smuzhiyun popl %edi 145*4882a593Smuzhiyun popl %esi 146*4882a593Smuzhiyun leave 147*4882a593Smuzhiyun RET 148*4882a593Smuzhiyun#endif /* PARANOID */ 149*4882a593Smuzhiyun 150*4882a593SmuzhiyunSYM_FUNC_END(FPU_u_mul) 151