1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun/*---------------------------------------------------------------------------+ 3*4882a593Smuzhiyun | polynomial_Xsig.S | 4*4882a593Smuzhiyun | | 5*4882a593Smuzhiyun | Fixed point arithmetic polynomial evaluation. | 6*4882a593Smuzhiyun | | 7*4882a593Smuzhiyun | Copyright (C) 1992,1993,1994,1995 | 8*4882a593Smuzhiyun | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | 9*4882a593Smuzhiyun | Australia. E-mail billm@jacobi.maths.monash.edu.au | 10*4882a593Smuzhiyun | | 11*4882a593Smuzhiyun | Call from C as: | 12*4882a593Smuzhiyun | void polynomial_Xsig(Xsig *accum, unsigned long long x, | 13*4882a593Smuzhiyun | unsigned long long terms[], int n) | 14*4882a593Smuzhiyun | | 15*4882a593Smuzhiyun | Computes: | 16*4882a593Smuzhiyun | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x | 17*4882a593Smuzhiyun | and adds the result to the 12 byte Xsig. | 18*4882a593Smuzhiyun | The terms[] are each 8 bytes, but all computation is performed to 12 byte | 19*4882a593Smuzhiyun | precision. | 20*4882a593Smuzhiyun | | 21*4882a593Smuzhiyun | This function must be used carefully: most overflow of intermediate | 22*4882a593Smuzhiyun | results is controlled, but overflow of the result is not. | 23*4882a593Smuzhiyun | | 24*4882a593Smuzhiyun +---------------------------------------------------------------------------*/ 25*4882a593Smuzhiyun .file "polynomial_Xsig.S" 26*4882a593Smuzhiyun 27*4882a593Smuzhiyun#include "fpu_emu.h" 28*4882a593Smuzhiyun 29*4882a593Smuzhiyun 30*4882a593Smuzhiyun#define TERM_SIZE $8 31*4882a593Smuzhiyun#define SUM_MS -20(%ebp) /* sum ms long */ 32*4882a593Smuzhiyun#define SUM_MIDDLE -24(%ebp) /* sum middle long */ 33*4882a593Smuzhiyun#define SUM_LS -28(%ebp) /* sum ls long */ 34*4882a593Smuzhiyun#define ACCUM_MS -4(%ebp) /* accum ms long */ 35*4882a593Smuzhiyun#define ACCUM_MIDDLE -8(%ebp) /* accum middle long */ 36*4882a593Smuzhiyun#define ACCUM_LS -12(%ebp) /* accum ls long */ 37*4882a593Smuzhiyun#define OVERFLOWED -16(%ebp) /* addition overflow flag */ 38*4882a593Smuzhiyun 39*4882a593Smuzhiyun.text 40*4882a593SmuzhiyunSYM_FUNC_START(polynomial_Xsig) 41*4882a593Smuzhiyun pushl %ebp 42*4882a593Smuzhiyun movl %esp,%ebp 43*4882a593Smuzhiyun subl $32,%esp 44*4882a593Smuzhiyun pushl %esi 45*4882a593Smuzhiyun pushl %edi 46*4882a593Smuzhiyun pushl %ebx 47*4882a593Smuzhiyun 48*4882a593Smuzhiyun movl PARAM2,%esi /* x */ 49*4882a593Smuzhiyun movl PARAM3,%edi /* terms */ 50*4882a593Smuzhiyun 51*4882a593Smuzhiyun movl TERM_SIZE,%eax 52*4882a593Smuzhiyun mull PARAM4 /* n */ 53*4882a593Smuzhiyun addl %eax,%edi 54*4882a593Smuzhiyun 55*4882a593Smuzhiyun movl 4(%edi),%edx /* terms[n] */ 56*4882a593Smuzhiyun movl %edx,SUM_MS 57*4882a593Smuzhiyun movl (%edi),%edx /* terms[n] */ 58*4882a593Smuzhiyun movl %edx,SUM_MIDDLE 59*4882a593Smuzhiyun xor %eax,%eax 60*4882a593Smuzhiyun movl %eax,SUM_LS 61*4882a593Smuzhiyun movb %al,OVERFLOWED 62*4882a593Smuzhiyun 63*4882a593Smuzhiyun subl TERM_SIZE,%edi 64*4882a593Smuzhiyun decl PARAM4 65*4882a593Smuzhiyun js L_accum_done 66*4882a593Smuzhiyun 67*4882a593SmuzhiyunL_accum_loop: 68*4882a593Smuzhiyun xor %eax,%eax 69*4882a593Smuzhiyun movl %eax,ACCUM_MS 70*4882a593Smuzhiyun movl %eax,ACCUM_MIDDLE 71*4882a593Smuzhiyun 72*4882a593Smuzhiyun movl SUM_MIDDLE,%eax 73*4882a593Smuzhiyun mull (%esi) /* x ls long */ 74*4882a593Smuzhiyun movl %edx,ACCUM_LS 75*4882a593Smuzhiyun 76*4882a593Smuzhiyun movl SUM_MIDDLE,%eax 77*4882a593Smuzhiyun mull 4(%esi) /* x ms long */ 78*4882a593Smuzhiyun addl %eax,ACCUM_LS 79*4882a593Smuzhiyun adcl %edx,ACCUM_MIDDLE 80*4882a593Smuzhiyun adcl $0,ACCUM_MS 81*4882a593Smuzhiyun 82*4882a593Smuzhiyun movl SUM_MS,%eax 83*4882a593Smuzhiyun mull (%esi) /* x ls long */ 84*4882a593Smuzhiyun addl %eax,ACCUM_LS 85*4882a593Smuzhiyun adcl %edx,ACCUM_MIDDLE 86*4882a593Smuzhiyun adcl $0,ACCUM_MS 87*4882a593Smuzhiyun 88*4882a593Smuzhiyun movl SUM_MS,%eax 89*4882a593Smuzhiyun mull 4(%esi) /* x ms long */ 90*4882a593Smuzhiyun addl %eax,ACCUM_MIDDLE 91*4882a593Smuzhiyun adcl %edx,ACCUM_MS 92*4882a593Smuzhiyun 93*4882a593Smuzhiyun testb $0xff,OVERFLOWED 94*4882a593Smuzhiyun jz L_no_overflow 95*4882a593Smuzhiyun 96*4882a593Smuzhiyun movl (%esi),%eax 97*4882a593Smuzhiyun addl %eax,ACCUM_MIDDLE 98*4882a593Smuzhiyun movl 4(%esi),%eax 99*4882a593Smuzhiyun adcl %eax,ACCUM_MS /* This could overflow too */ 100*4882a593Smuzhiyun 101*4882a593SmuzhiyunL_no_overflow: 102*4882a593Smuzhiyun 103*4882a593Smuzhiyun/* 104*4882a593Smuzhiyun * Now put the sum of next term and the accumulator 105*4882a593Smuzhiyun * into the sum register 106*4882a593Smuzhiyun */ 107*4882a593Smuzhiyun movl ACCUM_LS,%eax 108*4882a593Smuzhiyun addl (%edi),%eax /* term ls long */ 109*4882a593Smuzhiyun movl %eax,SUM_LS 110*4882a593Smuzhiyun movl ACCUM_MIDDLE,%eax 111*4882a593Smuzhiyun adcl (%edi),%eax /* term ls long */ 112*4882a593Smuzhiyun movl %eax,SUM_MIDDLE 113*4882a593Smuzhiyun movl ACCUM_MS,%eax 114*4882a593Smuzhiyun adcl 4(%edi),%eax /* term ms long */ 115*4882a593Smuzhiyun movl %eax,SUM_MS 116*4882a593Smuzhiyun sbbb %al,%al 117*4882a593Smuzhiyun movb %al,OVERFLOWED /* Used in the next iteration */ 118*4882a593Smuzhiyun 119*4882a593Smuzhiyun subl TERM_SIZE,%edi 120*4882a593Smuzhiyun decl PARAM4 121*4882a593Smuzhiyun jns L_accum_loop 122*4882a593Smuzhiyun 123*4882a593SmuzhiyunL_accum_done: 124*4882a593Smuzhiyun movl PARAM1,%edi /* accum */ 125*4882a593Smuzhiyun movl SUM_LS,%eax 126*4882a593Smuzhiyun addl %eax,(%edi) 127*4882a593Smuzhiyun movl SUM_MIDDLE,%eax 128*4882a593Smuzhiyun adcl %eax,4(%edi) 129*4882a593Smuzhiyun movl SUM_MS,%eax 130*4882a593Smuzhiyun adcl %eax,8(%edi) 131*4882a593Smuzhiyun 132*4882a593Smuzhiyun popl %ebx 133*4882a593Smuzhiyun popl %edi 134*4882a593Smuzhiyun popl %esi 135*4882a593Smuzhiyun leave 136*4882a593Smuzhiyun RET 137*4882a593SmuzhiyunSYM_FUNC_END(polynomial_Xsig) 138