ifpsp060/src/ilsp.S

*4882a593Smuzhiyun~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*4882a593SmuzhiyunMOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
*4882a593SmuzhiyunM68000 Hi-Performance Microprocessor Division
*4882a593SmuzhiyunM68060 Software Package
*4882a593SmuzhiyunProduction Release P1.00 -- October 10, 1994
*4882a593Smuzhiyun
*4882a593SmuzhiyunM68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
*4882a593Smuzhiyun
*4882a593SmuzhiyunTHE SOFTWARE is provided on an "AS IS" basis and without warranty.
*4882a593SmuzhiyunTo the maximum extent permitted by applicable law,
*4882a593SmuzhiyunMOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
*4882a593SmuzhiyunINCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
*4882a593Smuzhiyunand any warranty against infringement with regard to the SOFTWARE
*4882a593Smuzhiyun(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
*4882a593Smuzhiyun
*4882a593SmuzhiyunTo the maximum extent permitted by applicable law,
*4882a593SmuzhiyunIN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
*4882a593Smuzhiyun(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
*4882a593SmuzhiyunBUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
*4882a593SmuzhiyunARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
*4882a593SmuzhiyunMotorola assumes no responsibility for the maintenance and support of the SOFTWARE.
*4882a593Smuzhiyun
*4882a593SmuzhiyunYou are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
*4882a593Smuzhiyunso long as this entire notice is retained without alteration in any modified and/or
*4882a593Smuzhiyunredistributed versions, and that such modified versions are clearly identified as such.
*4882a593SmuzhiyunNo licenses are granted by implication, estoppel or otherwise under any patents
*4882a593Smuzhiyunor trademarks of Motorola, Inc.
*4882a593Smuzhiyun~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*4882a593Smuzhiyun# litop.s:
*4882a593Smuzhiyun#	This file is appended to the top of the 060FPLSP package
*4882a593Smuzhiyun# and contains the entry points into the package. The user, in
*4882a593Smuzhiyun# effect, branches to one of the branch table entries located here.
*4882a593Smuzhiyun#
*4882a593Smuzhiyun
*4882a593Smuzhiyun	bra.l	_060LSP__idivs64_
*4882a593Smuzhiyun	short	0x0000
*4882a593Smuzhiyun	bra.l	_060LSP__idivu64_
*4882a593Smuzhiyun	short	0x0000
*4882a593Smuzhiyun
*4882a593Smuzhiyun	bra.l	_060LSP__imuls64_
*4882a593Smuzhiyun	short	0x0000
*4882a593Smuzhiyun	bra.l	_060LSP__imulu64_
*4882a593Smuzhiyun	short	0x0000
*4882a593Smuzhiyun
*4882a593Smuzhiyun	bra.l	_060LSP__cmp2_Ab_
*4882a593Smuzhiyun	short	0x0000
*4882a593Smuzhiyun	bra.l	_060LSP__cmp2_Aw_
*4882a593Smuzhiyun	short	0x0000
*4882a593Smuzhiyun	bra.l	_060LSP__cmp2_Al_
*4882a593Smuzhiyun	short	0x0000
*4882a593Smuzhiyun	bra.l	_060LSP__cmp2_Db_
*4882a593Smuzhiyun	short	0x0000
*4882a593Smuzhiyun	bra.l	_060LSP__cmp2_Dw_
*4882a593Smuzhiyun	short	0x0000
*4882a593Smuzhiyun	bra.l	_060LSP__cmp2_Dl_
*4882a593Smuzhiyun	short	0x0000
*4882a593Smuzhiyun
*4882a593Smuzhiyun# leave room for future possible aditions.
*4882a593Smuzhiyun	align	0x200
*4882a593Smuzhiyun
*4882a593Smuzhiyun#########################################################################
*4882a593Smuzhiyun# XDEF ****************************************************************	#
*4882a593Smuzhiyun#	_060LSP__idivu64_(): Emulate 64-bit unsigned div instruction.	#
*4882a593Smuzhiyun#	_060LSP__idivs64_(): Emulate 64-bit signed div instruction.	#
*4882a593Smuzhiyun#									#
*4882a593Smuzhiyun#	This is the library version which is accessed as a subroutine	#
*4882a593Smuzhiyun#	and therefore does not work exactly like the 680X0 div{s,u}.l	#
*4882a593Smuzhiyun#	64-bit divide instruction.					#
*4882a593Smuzhiyun#									#
*4882a593Smuzhiyun# XREF ****************************************************************	#
*4882a593Smuzhiyun#	None.								#
*4882a593Smuzhiyun#									#
*4882a593Smuzhiyun# INPUT ***************************************************************	#
*4882a593Smuzhiyun#	0x4(sp)  = divisor						#
*4882a593Smuzhiyun#	0x8(sp)  = hi(dividend)						#
*4882a593Smuzhiyun#	0xc(sp)  = lo(dividend)						#
*4882a593Smuzhiyun#	0x10(sp) = pointer to location to place quotient/remainder	#
*4882a593Smuzhiyun#									#
*4882a593Smuzhiyun# OUTPUT **************************************************************	#
*4882a593Smuzhiyun#	0x10(sp) = points to location of remainder/quotient.		#
*4882a593Smuzhiyun#		   remainder is in first longword, quotient is in 2nd.	#
*4882a593Smuzhiyun#									#
*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
*4882a593Smuzhiyun#	If the operands are signed, make them unsigned and save the	#
*4882a593Smuzhiyun# sign info for later. Separate out special cases like divide-by-zero	#
*4882a593Smuzhiyun# or 32-bit divides if possible. Else, use a special math algorithm	#
*4882a593Smuzhiyun# to calculate the result.						#
*4882a593Smuzhiyun#	Restore sign info if signed instruction. Set the condition	#
*4882a593Smuzhiyun# codes before performing the final "rts". If the divisor was equal to	#
*4882a593Smuzhiyun# zero, then perform a divide-by-zero using a 16-bit implemented	#
*4882a593Smuzhiyun# divide instruction. This way, the operating system can record that	#
*4882a593Smuzhiyun# the event occurred even though it may not point to the correct place.	#
*4882a593Smuzhiyun#									#
*4882a593Smuzhiyun#########################################################################
*4882a593Smuzhiyun
*4882a593Smuzhiyunset	POSNEG,		-1
*4882a593Smuzhiyunset	NDIVISOR,	-2
*4882a593Smuzhiyunset	NDIVIDEND,	-3
*4882a593Smuzhiyunset	DDSECOND,	-4
*4882a593Smuzhiyunset	DDNORMAL,	-8
*4882a593Smuzhiyunset	DDQUOTIENT,	-12
*4882a593Smuzhiyunset	DIV64_CC,	-16
*4882a593Smuzhiyun
*4882a593Smuzhiyun##########
*4882a593Smuzhiyun# divs.l #
*4882a593Smuzhiyun##########
*4882a593Smuzhiyun	global		_060LSP__idivs64_
*4882a593Smuzhiyun_060LSP__idivs64_:
*4882a593Smuzhiyun# PROLOGUE BEGIN ########################################################
*4882a593Smuzhiyun	link.w		%a6,&-16
*4882a593Smuzhiyun	movm.l		&0x3f00,-(%sp)		# save d2-d7
*4882a593Smuzhiyun#	fmovm.l		&0x0,-(%sp)		# save no fpregs
*4882a593Smuzhiyun# PROLOGUE END ##########################################################
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.w		%cc,DIV64_CC(%a6)
*4882a593Smuzhiyun	st		POSNEG(%a6)		# signed operation
*4882a593Smuzhiyun	bra.b		ldiv64_cont
*4882a593Smuzhiyun
*4882a593Smuzhiyun##########
*4882a593Smuzhiyun# divu.l #
*4882a593Smuzhiyun##########
*4882a593Smuzhiyun	global		_060LSP__idivu64_
*4882a593Smuzhiyun_060LSP__idivu64_:
*4882a593Smuzhiyun# PROLOGUE BEGIN ########################################################
*4882a593Smuzhiyun	link.w		%a6,&-16
*4882a593Smuzhiyun	movm.l		&0x3f00,-(%sp)		# save d2-d7
*4882a593Smuzhiyun#	fmovm.l		&0x0,-(%sp)		# save no fpregs
*4882a593Smuzhiyun# PROLOGUE END ##########################################################
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.w		%cc,DIV64_CC(%a6)
*4882a593Smuzhiyun	sf		POSNEG(%a6)		# unsigned operation
*4882a593Smuzhiyun
*4882a593Smuzhiyunldiv64_cont:
*4882a593Smuzhiyun	mov.l		0x8(%a6),%d7		# fetch divisor
*4882a593Smuzhiyun
*4882a593Smuzhiyun	beq.w		ldiv64eq0		# divisor is = 0!!!
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.l		0xc(%a6), %d5		# get dividend hi
*4882a593Smuzhiyun	mov.l		0x10(%a6), %d6		# get dividend lo
*4882a593Smuzhiyun
*4882a593Smuzhiyun# separate signed and unsigned divide
*4882a593Smuzhiyun	tst.b		POSNEG(%a6)		# signed or unsigned?
*4882a593Smuzhiyun	beq.b		ldspecialcases		# use positive divide
*4882a593Smuzhiyun
*4882a593Smuzhiyun# save the sign of the divisor
*4882a593Smuzhiyun# make divisor unsigned if it's negative
*4882a593Smuzhiyun	tst.l		%d7			# chk sign of divisor
*4882a593Smuzhiyun	slt		NDIVISOR(%a6)		# save sign of divisor
*4882a593Smuzhiyun	bpl.b		ldsgndividend
*4882a593Smuzhiyun	neg.l		%d7			# complement negative divisor
*4882a593Smuzhiyun
*4882a593Smuzhiyun# save the sign of the dividend
*4882a593Smuzhiyun# make dividend unsigned if it's negative
*4882a593Smuzhiyunldsgndividend:
*4882a593Smuzhiyun	tst.l		%d5			# chk sign of hi(dividend)
*4882a593Smuzhiyun	slt		NDIVIDEND(%a6)		# save sign of dividend
*4882a593Smuzhiyun	bpl.b		ldspecialcases
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.w		&0x0, %cc		# clear 'X' cc bit
*4882a593Smuzhiyun	negx.l		%d6			# complement signed dividend
*4882a593Smuzhiyun	negx.l		%d5
*4882a593Smuzhiyun
*4882a593Smuzhiyun# extract some special cases:
*4882a593Smuzhiyun#	- is (dividend == 0) ?
*4882a593Smuzhiyun#	- is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div)
*4882a593Smuzhiyunldspecialcases:
*4882a593Smuzhiyun	tst.l		%d5			# is (hi(dividend) == 0)
*4882a593Smuzhiyun	bne.b		ldnormaldivide		# no, so try it the long way
*4882a593Smuzhiyun
*4882a593Smuzhiyun	tst.l		%d6			# is (lo(dividend) == 0), too
*4882a593Smuzhiyun	beq.w		lddone			# yes, so (dividend == 0)
*4882a593Smuzhiyun
*4882a593Smuzhiyun	cmp.l		%d7,%d6			# is (divisor <= lo(dividend))
*4882a593Smuzhiyun	bls.b		ld32bitdivide		# yes, so use 32 bit divide
*4882a593Smuzhiyun
*4882a593Smuzhiyun	exg		%d5,%d6			# q = 0, r = dividend
*4882a593Smuzhiyun	bra.w		ldivfinish		# can't divide, we're done.
*4882a593Smuzhiyun
*4882a593Smuzhiyunld32bitdivide:
*4882a593Smuzhiyun	tdivu.l		%d7, %d5:%d6		# it's only a 32/32 bit div!
*4882a593Smuzhiyun
*4882a593Smuzhiyun	bra.b		ldivfinish
*4882a593Smuzhiyun
*4882a593Smuzhiyunldnormaldivide:
*4882a593Smuzhiyun# last special case:
*4882a593Smuzhiyun#	- is hi(dividend) >= divisor ? if yes, then overflow
*4882a593Smuzhiyun	cmp.l		%d7,%d5
*4882a593Smuzhiyun	bls.b		lddovf			# answer won't fit in 32 bits
*4882a593Smuzhiyun
*4882a593Smuzhiyun# perform the divide algorithm:
*4882a593Smuzhiyun	bsr.l		ldclassical		# do int divide
*4882a593Smuzhiyun
*4882a593Smuzhiyun# separate into signed and unsigned finishes.
*4882a593Smuzhiyunldivfinish:
*4882a593Smuzhiyun	tst.b		POSNEG(%a6)		# do divs, divu separately
*4882a593Smuzhiyun	beq.b		lddone			# divu has no processing!!!
*4882a593Smuzhiyun
*4882a593Smuzhiyun# it was a divs.l, so ccode setting is a little more complicated...
*4882a593Smuzhiyun	tst.b		NDIVIDEND(%a6)		# remainder has same sign
*4882a593Smuzhiyun	beq.b		ldcc			# as dividend.
*4882a593Smuzhiyun	neg.l		%d5			# sgn(rem) = sgn(dividend)
*4882a593Smuzhiyunldcc:
*4882a593Smuzhiyun	mov.b		NDIVISOR(%a6), %d0
*4882a593Smuzhiyun	eor.b		%d0, NDIVIDEND(%a6)	# chk if quotient is negative
*4882a593Smuzhiyun	beq.b		ldqpos			# branch to quot positive
*4882a593Smuzhiyun
*4882a593Smuzhiyun# 0x80000000 is the largest number representable as a 32-bit negative
*4882a593Smuzhiyun# number. the negative of 0x80000000 is 0x80000000.
*4882a593Smuzhiyun	cmpi.l		%d6, &0x80000000	# will (-quot) fit in 32 bits?
*4882a593Smuzhiyun	bhi.b		lddovf
*4882a593Smuzhiyun
*4882a593Smuzhiyun	neg.l		%d6			# make (-quot) 2's comp
*4882a593Smuzhiyun
*4882a593Smuzhiyun	bra.b		lddone
*4882a593Smuzhiyun
*4882a593Smuzhiyunldqpos:
*4882a593Smuzhiyun	btst		&0x1f, %d6		# will (+quot) fit in 32 bits?
*4882a593Smuzhiyun	bne.b		lddovf
*4882a593Smuzhiyun
*4882a593Smuzhiyunlddone:
*4882a593Smuzhiyun# if the register numbers are the same, only the quotient gets saved.
*4882a593Smuzhiyun# so, if we always save the quotient second, we save ourselves a cmp&beq
*4882a593Smuzhiyun	andi.w		&0x10,DIV64_CC(%a6)
*4882a593Smuzhiyun	mov.w		DIV64_CC(%a6),%cc
*4882a593Smuzhiyun	tst.l		%d6			# may set 'N' ccode bit
*4882a593Smuzhiyun
*4882a593Smuzhiyun# here, the result is in d1 and d0. the current strategy is to save
*4882a593Smuzhiyun# the values at the location pointed to by a0.
*4882a593Smuzhiyun# use movm here to not disturb the condition codes.
*4882a593Smuzhiyunldexit:
*4882a593Smuzhiyun	movm.l		&0x0060,([0x14,%a6])	# save result
*4882a593Smuzhiyun
*4882a593Smuzhiyun# EPILOGUE BEGIN ########################################################
*4882a593Smuzhiyun#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
*4882a593Smuzhiyun	movm.l		(%sp)+,&0x00fc		# restore d2-d7
*4882a593Smuzhiyun	unlk		%a6
*4882a593Smuzhiyun# EPILOGUE END ##########################################################
*4882a593Smuzhiyun
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyun
*4882a593Smuzhiyun# the result should be the unchanged dividend
*4882a593Smuzhiyunlddovf:
*4882a593Smuzhiyun	mov.l		0xc(%a6), %d5		# get dividend hi
*4882a593Smuzhiyun	mov.l		0x10(%a6), %d6		# get dividend lo
*4882a593Smuzhiyun
*4882a593Smuzhiyun	andi.w		&0x1c,DIV64_CC(%a6)
*4882a593Smuzhiyun	ori.w		&0x02,DIV64_CC(%a6)	# set 'V' ccode bit
*4882a593Smuzhiyun	mov.w		DIV64_CC(%a6),%cc
*4882a593Smuzhiyun
*4882a593Smuzhiyun	bra.b		ldexit
*4882a593Smuzhiyun
*4882a593Smuzhiyunldiv64eq0:
*4882a593Smuzhiyun	mov.l		0xc(%a6),([0x14,%a6])
*4882a593Smuzhiyun	mov.l		0x10(%a6),([0x14,%a6],0x4)
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.w		DIV64_CC(%a6),%cc
*4882a593Smuzhiyun
*4882a593Smuzhiyun# EPILOGUE BEGIN ########################################################
*4882a593Smuzhiyun#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
*4882a593Smuzhiyun	movm.l		(%sp)+,&0x00fc		# restore d2-d7
*4882a593Smuzhiyun	unlk		%a6
*4882a593Smuzhiyun# EPILOGUE END ##########################################################
*4882a593Smuzhiyun
*4882a593Smuzhiyun	divu.w		&0x0,%d0		# force a divbyzero exception
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyun
*4882a593Smuzhiyun###########################################################################
*4882a593Smuzhiyun#########################################################################
*4882a593Smuzhiyun# This routine uses the 'classical' Algorithm D from Donald Knuth's	#
*4882a593Smuzhiyun# Art of Computer Programming, vol II, Seminumerical Algorithms.	#
*4882a593Smuzhiyun# For this implementation b=2**16, and the target is U1U2U3U4/V1V2,	#
*4882a593Smuzhiyun# where U,V are words of the quadword dividend and longword divisor,	#
*4882a593Smuzhiyun# and U1, V1 are the most significant words.				#
*4882a593Smuzhiyun#									#
*4882a593Smuzhiyun# The most sig. longword of the 64 bit dividend must be in %d5, least	#
*4882a593Smuzhiyun# in %d6. The divisor must be in the variable ddivisor, and the		#
*4882a593Smuzhiyun# signed/unsigned flag ddusign must be set (0=unsigned,1=signed).	#
*4882a593Smuzhiyun# The quotient is returned in %d6, remainder in %d5, unless the		#
*4882a593Smuzhiyun# v (overflow) bit is set in the saved %ccr. If overflow, the dividend	#
*4882a593Smuzhiyun# is unchanged.								#
*4882a593Smuzhiyun#########################################################################
*4882a593Smuzhiyunldclassical:
*4882a593Smuzhiyun# if the divisor msw is 0, use simpler algorithm then the full blown
*4882a593Smuzhiyun# one at ddknuth:
*4882a593Smuzhiyun
*4882a593Smuzhiyun	cmpi.l		%d7, &0xffff
*4882a593Smuzhiyun	bhi.b		lddknuth		# go use D. Knuth algorithm
*4882a593Smuzhiyun
*4882a593Smuzhiyun# Since the divisor is only a word (and larger than the mslw of the dividend),
*4882a593Smuzhiyun# a simpler algorithm may be used :
*4882a593Smuzhiyun# In the general case, four quotient words would be created by
*4882a593Smuzhiyun# dividing the divisor word into each dividend word. In this case,
*4882a593Smuzhiyun# the first two quotient words must be zero, or overflow would occur.
*4882a593Smuzhiyun# Since we already checked this case above, we can treat the most significant
*4882a593Smuzhiyun# longword of the dividend as (0) remainder (see Knuth) and merely complete
*4882a593Smuzhiyun# the last two divisions to get a quotient longword and word remainder:
*4882a593Smuzhiyun
*4882a593Smuzhiyun	clr.l		%d1
*4882a593Smuzhiyun	swap		%d5			# same as r*b if previous step rqd
*4882a593Smuzhiyun	swap		%d6			# get u3 to lsw position
*4882a593Smuzhiyun	mov.w		%d6, %d5		# rb + u3
*4882a593Smuzhiyun
*4882a593Smuzhiyun	divu.w		%d7, %d5
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.w		%d5, %d1		# first quotient word
*4882a593Smuzhiyun	swap		%d6			# get u4
*4882a593Smuzhiyun	mov.w		%d6, %d5		# rb + u4
*4882a593Smuzhiyun
*4882a593Smuzhiyun	divu.w		%d7, %d5
*4882a593Smuzhiyun
*4882a593Smuzhiyun	swap		%d1
*4882a593Smuzhiyun	mov.w		%d5, %d1		# 2nd quotient 'digit'
*4882a593Smuzhiyun	clr.w		%d5
*4882a593Smuzhiyun	swap		%d5			# now remainder
*4882a593Smuzhiyun	mov.l		%d1, %d6		# and quotient
*4882a593Smuzhiyun
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyun
*4882a593Smuzhiyunlddknuth:
*4882a593Smuzhiyun# In this algorithm, the divisor is treated as a 2 digit (word) number
*4882a593Smuzhiyun# which is divided into a 3 digit (word) dividend to get one quotient
*4882a593Smuzhiyun# digit (word). After subtraction, the dividend is shifted and the
*4882a593Smuzhiyun# process repeated. Before beginning, the divisor and quotient are
*4882a593Smuzhiyun# 'normalized' so that the process of estimating the quotient digit
*4882a593Smuzhiyun# will yield verifiably correct results..
*4882a593Smuzhiyun
*4882a593Smuzhiyun	clr.l		DDNORMAL(%a6)		# count of shifts for normalization
*4882a593Smuzhiyun	clr.b		DDSECOND(%a6)		# clear flag for quotient digits
*4882a593Smuzhiyun	clr.l		%d1			# %d1 will hold trial quotient
*4882a593Smuzhiyunlddnchk:
*4882a593Smuzhiyun	btst		&31, %d7		# must we normalize? first word of
*4882a593Smuzhiyun	bne.b		lddnormalized		# divisor (V1) must be >= 65536/2
*4882a593Smuzhiyun	addq.l		&0x1, DDNORMAL(%a6)	# count normalization shifts
*4882a593Smuzhiyun	lsl.l		&0x1, %d7		# shift the divisor
*4882a593Smuzhiyun	lsl.l		&0x1, %d6		# shift u4,u3 with overflow to u2
*4882a593Smuzhiyun	roxl.l		&0x1, %d5		# shift u1,u2
*4882a593Smuzhiyun	bra.w		lddnchk
*4882a593Smuzhiyunlddnormalized:
*4882a593Smuzhiyun
*4882a593Smuzhiyun# Now calculate an estimate of the quotient words (msw first, then lsw).
*4882a593Smuzhiyun# The comments use subscripts for the first quotient digit determination.
*4882a593Smuzhiyun	mov.l		%d7, %d3		# divisor
*4882a593Smuzhiyun	mov.l		%d5, %d2		# dividend mslw
*4882a593Smuzhiyun	swap		%d2
*4882a593Smuzhiyun	swap		%d3
*4882a593Smuzhiyun	cmp.w		%d2, %d3		# V1 = U1 ?
*4882a593Smuzhiyun	bne.b		lddqcalc1
*4882a593Smuzhiyun	mov.w		&0xffff, %d1		# use max trial quotient word
*4882a593Smuzhiyun	bra.b		lddadj0
*4882a593Smuzhiyunlddqcalc1:
*4882a593Smuzhiyun	mov.l		%d5, %d1
*4882a593Smuzhiyun
*4882a593Smuzhiyun	divu.w		%d3, %d1		# use quotient of mslw/msw
*4882a593Smuzhiyun
*4882a593Smuzhiyun	andi.l		&0x0000ffff, %d1	# zero any remainder
*4882a593Smuzhiyunlddadj0:
*4882a593Smuzhiyun
*4882a593Smuzhiyun# now test the trial quotient and adjust. This step plus the
*4882a593Smuzhiyun# normalization assures (according to Knuth) that the trial
*4882a593Smuzhiyun# quotient will be at worst 1 too large.
*4882a593Smuzhiyun	mov.l		%d6, -(%sp)
*4882a593Smuzhiyun	clr.w		%d6			# word u3 left
*4882a593Smuzhiyun	swap		%d6			# in lsw position
*4882a593Smuzhiyunlddadj1: mov.l		%d7, %d3
*4882a593Smuzhiyun	mov.l		%d1, %d2
*4882a593Smuzhiyun	mulu.w		%d7, %d2		# V2q
*4882a593Smuzhiyun	swap		%d3
*4882a593Smuzhiyun	mulu.w		%d1, %d3		# V1q
*4882a593Smuzhiyun	mov.l		%d5, %d4		# U1U2
*4882a593Smuzhiyun	sub.l		%d3, %d4		# U1U2 - V1q
*4882a593Smuzhiyun
*4882a593Smuzhiyun	swap		%d4
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.w		%d4,%d0
*4882a593Smuzhiyun	mov.w		%d6,%d4			# insert lower word (U3)
*4882a593Smuzhiyun
*4882a593Smuzhiyun	tst.w		%d0			# is upper word set?
*4882a593Smuzhiyun	bne.w		lddadjd1
*4882a593Smuzhiyun
*4882a593Smuzhiyun#	add.l		%d6, %d4		# (U1U2 - V1q) + U3
*4882a593Smuzhiyun
*4882a593Smuzhiyun	cmp.l		%d2, %d4
*4882a593Smuzhiyun	bls.b		lddadjd1		# is V2q > (U1U2-V1q) + U3 ?
*4882a593Smuzhiyun	subq.l		&0x1, %d1		# yes, decrement and recheck
*4882a593Smuzhiyun	bra.b		lddadj1
*4882a593Smuzhiyunlddadjd1:
*4882a593Smuzhiyun# now test the word by multiplying it by the divisor (V1V2) and comparing
*4882a593Smuzhiyun# the 3 digit (word) result with the current dividend words
*4882a593Smuzhiyun	mov.l		%d5, -(%sp)		# save %d5 (%d6 already saved)
*4882a593Smuzhiyun	mov.l		%d1, %d6
*4882a593Smuzhiyun	swap		%d6			# shift answer to ms 3 words
*4882a593Smuzhiyun	mov.l		%d7, %d5
*4882a593Smuzhiyun	bsr.l		ldmm2
*4882a593Smuzhiyun	mov.l		%d5, %d2		# now %d2,%d3 are trial*divisor
*4882a593Smuzhiyun	mov.l		%d6, %d3
*4882a593Smuzhiyun	mov.l		(%sp)+, %d5		# restore dividend
*4882a593Smuzhiyun	mov.l		(%sp)+, %d6
*4882a593Smuzhiyun	sub.l		%d3, %d6
*4882a593Smuzhiyun	subx.l		%d2, %d5		# subtract double precision
*4882a593Smuzhiyun	bcc		ldd2nd			# no carry, do next quotient digit
*4882a593Smuzhiyun	subq.l		&0x1, %d1		# q is one too large
*4882a593Smuzhiyun# need to add back divisor longword to current ms 3 digits of dividend
*4882a593Smuzhiyun# - according to Knuth, this is done only 2 out of 65536 times for random
*4882a593Smuzhiyun# divisor, dividend selection.
*4882a593Smuzhiyun	clr.l		%d2
*4882a593Smuzhiyun	mov.l		%d7, %d3
*4882a593Smuzhiyun	swap		%d3
*4882a593Smuzhiyun	clr.w		%d3			# %d3 now ls word of divisor
*4882a593Smuzhiyun	add.l		%d3, %d6		# aligned with 3rd word of dividend
*4882a593Smuzhiyun	addx.l		%d2, %d5
*4882a593Smuzhiyun	mov.l		%d7, %d3
*4882a593Smuzhiyun	clr.w		%d3			# %d3 now ms word of divisor
*4882a593Smuzhiyun	swap		%d3			# aligned with 2nd word of dividend
*4882a593Smuzhiyun	add.l		%d3, %d5
*4882a593Smuzhiyunldd2nd:
*4882a593Smuzhiyun	tst.b		DDSECOND(%a6)	# both q words done?
*4882a593Smuzhiyun	bne.b		lddremain
*4882a593Smuzhiyun# first quotient digit now correct. store digit and shift the
*4882a593Smuzhiyun# (subtracted) dividend
*4882a593Smuzhiyun	mov.w		%d1, DDQUOTIENT(%a6)
*4882a593Smuzhiyun	clr.l		%d1
*4882a593Smuzhiyun	swap		%d5
*4882a593Smuzhiyun	swap		%d6
*4882a593Smuzhiyun	mov.w		%d6, %d5
*4882a593Smuzhiyun	clr.w		%d6
*4882a593Smuzhiyun	st		DDSECOND(%a6)		# second digit
*4882a593Smuzhiyun	bra.w		lddnormalized
*4882a593Smuzhiyunlddremain:
*4882a593Smuzhiyun# add 2nd word to quotient, get the remainder.
*4882a593Smuzhiyun	mov.w		%d1, DDQUOTIENT+2(%a6)
*4882a593Smuzhiyun# shift down one word/digit to renormalize remainder.
*4882a593Smuzhiyun	mov.w		%d5, %d6
*4882a593Smuzhiyun	swap		%d6
*4882a593Smuzhiyun	swap		%d5
*4882a593Smuzhiyun	mov.l		DDNORMAL(%a6), %d7	# get norm shift count
*4882a593Smuzhiyun	beq.b		lddrn
*4882a593Smuzhiyun	subq.l		&0x1, %d7		# set for loop count
*4882a593Smuzhiyunlddnlp:
*4882a593Smuzhiyun	lsr.l		&0x1, %d5		# shift into %d6
*4882a593Smuzhiyun	roxr.l		&0x1, %d6
*4882a593Smuzhiyun	dbf		%d7, lddnlp
*4882a593Smuzhiyunlddrn:
*4882a593Smuzhiyun	mov.l		%d6, %d5		# remainder
*4882a593Smuzhiyun	mov.l		DDQUOTIENT(%a6), %d6	# quotient
*4882a593Smuzhiyun
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyunldmm2:
*4882a593Smuzhiyun# factors for the 32X32->64 multiplication are in %d5 and %d6.
*4882a593Smuzhiyun# returns 64 bit result in %d5 (hi) %d6(lo).
*4882a593Smuzhiyun# destroys %d2,%d3,%d4.
*4882a593Smuzhiyun
*4882a593Smuzhiyun# multiply hi,lo words of each factor to get 4 intermediate products
*4882a593Smuzhiyun	mov.l		%d6, %d2
*4882a593Smuzhiyun	mov.l		%d6, %d3
*4882a593Smuzhiyun	mov.l		%d5, %d4
*4882a593Smuzhiyun	swap		%d3
*4882a593Smuzhiyun	swap		%d4
*4882a593Smuzhiyun	mulu.w		%d5, %d6		# %d6 <- lsw*lsw
*4882a593Smuzhiyun	mulu.w		%d3, %d5		# %d5 <- msw-dest*lsw-source
*4882a593Smuzhiyun	mulu.w		%d4, %d2		# %d2 <- msw-source*lsw-dest
*4882a593Smuzhiyun	mulu.w		%d4, %d3		# %d3 <- msw*msw
*4882a593Smuzhiyun# now use swap and addx to consolidate to two longwords
*4882a593Smuzhiyun	clr.l		%d4
*4882a593Smuzhiyun	swap		%d6
*4882a593Smuzhiyun	add.w		%d5, %d6		# add msw of l*l to lsw of m*l product
*4882a593Smuzhiyun	addx.w		%d4, %d3		# add any carry to m*m product
*4882a593Smuzhiyun	add.w		%d2, %d6		# add in lsw of other m*l product
*4882a593Smuzhiyun	addx.w		%d4, %d3		# add any carry to m*m product
*4882a593Smuzhiyun	swap		%d6			# %d6 is low 32 bits of final product
*4882a593Smuzhiyun	clr.w		%d5
*4882a593Smuzhiyun	clr.w		%d2			# lsw of two mixed products used,
*4882a593Smuzhiyun	swap		%d5			# now use msws of longwords
*4882a593Smuzhiyun	swap		%d2
*4882a593Smuzhiyun	add.l		%d2, %d5
*4882a593Smuzhiyun	add.l		%d3, %d5	# %d5 now ms 32 bits of final product
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyun
*4882a593Smuzhiyun#########################################################################
*4882a593Smuzhiyun# XDEF ****************************************************************	#
*4882a593Smuzhiyun#	_060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction	#
*4882a593Smuzhiyun#	_060LSP__imuls64_(): Emulate 64-bit signed mul instruction.	#
*4882a593Smuzhiyun#									#
*4882a593Smuzhiyun#	This is the library version which is accessed as a subroutine	#
*4882a593Smuzhiyun#	and therefore does not work exactly like the 680X0 mul{s,u}.l	#
*4882a593Smuzhiyun#	64-bit multiply instruction.					#
*4882a593Smuzhiyun#									#
*4882a593Smuzhiyun# XREF ****************************************************************	#
*4882a593Smuzhiyun#	None								#
*4882a593Smuzhiyun#									#
*4882a593Smuzhiyun# INPUT ***************************************************************	#
*4882a593Smuzhiyun#	0x4(sp) = multiplier						#
*4882a593Smuzhiyun#	0x8(sp) = multiplicand						#
*4882a593Smuzhiyun#	0xc(sp) = pointer to location to place 64-bit result		#
*4882a593Smuzhiyun#									#
*4882a593Smuzhiyun# OUTPUT **************************************************************	#
*4882a593Smuzhiyun#	0xc(sp) = points to location of 64-bit result			#
*4882a593Smuzhiyun#									#
*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
*4882a593Smuzhiyun#	Perform the multiply in pieces using 16x16->32 unsigned		#
*4882a593Smuzhiyun# multiplies and "add" instructions.					#
*4882a593Smuzhiyun#	Set the condition codes as appropriate before performing an	#
*4882a593Smuzhiyun# "rts".								#
*4882a593Smuzhiyun#									#
*4882a593Smuzhiyun#########################################################################
*4882a593Smuzhiyun
*4882a593Smuzhiyunset MUL64_CC, -4
*4882a593Smuzhiyun
*4882a593Smuzhiyun	global		_060LSP__imulu64_
*4882a593Smuzhiyun_060LSP__imulu64_:
*4882a593Smuzhiyun
*4882a593Smuzhiyun# PROLOGUE BEGIN ########################################################
*4882a593Smuzhiyun	link.w		%a6,&-4
*4882a593Smuzhiyun	movm.l		&0x3800,-(%sp)		# save d2-d4
*4882a593Smuzhiyun#	fmovm.l		&0x0,-(%sp)		# save no fpregs
*4882a593Smuzhiyun# PROLOGUE END ##########################################################
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.w		%cc,MUL64_CC(%a6)	# save incoming ccodes
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.l		0x8(%a6),%d0		# store multiplier in d0
*4882a593Smuzhiyun	beq.w		mulu64_zero		# handle zero separately
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.l		0xc(%a6),%d1		# get multiplicand in d1
*4882a593Smuzhiyun	beq.w		mulu64_zero		# handle zero separately
*4882a593Smuzhiyun
*4882a593Smuzhiyun#########################################################################
*4882a593Smuzhiyun#	63			   32				0	#
*4882a593Smuzhiyun#	----------------------------					#
*4882a593Smuzhiyun#	| hi(mplier) * hi(mplicand)|					#
*4882a593Smuzhiyun#	----------------------------					#
*4882a593Smuzhiyun#		     -----------------------------			#
*4882a593Smuzhiyun#		     | hi(mplier) * lo(mplicand) |			#
*4882a593Smuzhiyun#		     -----------------------------			#
*4882a593Smuzhiyun#		     -----------------------------			#
*4882a593Smuzhiyun#		     | lo(mplier) * hi(mplicand) |			#
*4882a593Smuzhiyun#		     -----------------------------			#
*4882a593Smuzhiyun#	  |			   -----------------------------	#
*4882a593Smuzhiyun#	--|--			   | lo(mplier) * lo(mplicand) |	#
*4882a593Smuzhiyun#	  |			   -----------------------------	#
*4882a593Smuzhiyun#	========================================================	#
*4882a593Smuzhiyun#	--------------------------------------------------------	#
*4882a593Smuzhiyun#	|	hi(result)	   |	    lo(result)         |	#
*4882a593Smuzhiyun#	--------------------------------------------------------	#
*4882a593Smuzhiyun#########################################################################
*4882a593Smuzhiyunmulu64_alg:
*4882a593Smuzhiyun# load temp registers with operands
*4882a593Smuzhiyun	mov.l		%d0,%d2			# mr in d2
*4882a593Smuzhiyun	mov.l		%d0,%d3			# mr in d3
*4882a593Smuzhiyun	mov.l		%d1,%d4			# md in d4
*4882a593Smuzhiyun	swap		%d3			# hi(mr) in lo d3
*4882a593Smuzhiyun	swap		%d4			# hi(md) in lo d4
*4882a593Smuzhiyun
*4882a593Smuzhiyun# complete necessary multiplies:
*4882a593Smuzhiyun	mulu.w		%d1,%d0			# [1] lo(mr) * lo(md)
*4882a593Smuzhiyun	mulu.w		%d3,%d1			# [2] hi(mr) * lo(md)
*4882a593Smuzhiyun	mulu.w		%d4,%d2			# [3] lo(mr) * hi(md)
*4882a593Smuzhiyun	mulu.w		%d4,%d3			# [4] hi(mr) * hi(md)
*4882a593Smuzhiyun
*4882a593Smuzhiyun# add lo portions of [2],[3] to hi portion of [1].
*4882a593Smuzhiyun# add carries produced from these adds to [4].
*4882a593Smuzhiyun# lo([1]) is the final lo 16 bits of the result.
*4882a593Smuzhiyun	clr.l		%d4			# load d4 w/ zero value
*4882a593Smuzhiyun	swap		%d0			# hi([1]) <==> lo([1])
*4882a593Smuzhiyun	add.w		%d1,%d0			# hi([1]) + lo([2])
*4882a593Smuzhiyun	addx.l		%d4,%d3			#    [4]  + carry
*4882a593Smuzhiyun	add.w		%d2,%d0			# hi([1]) + lo([3])
*4882a593Smuzhiyun	addx.l		%d4,%d3			#    [4]  + carry
*4882a593Smuzhiyun	swap		%d0			# lo([1]) <==> hi([1])
*4882a593Smuzhiyun
*4882a593Smuzhiyun# lo portions of [2],[3] have been added in to final result.
*4882a593Smuzhiyun# now, clear lo, put hi in lo reg, and add to [4]
*4882a593Smuzhiyun	clr.w		%d1			# clear lo([2])
*4882a593Smuzhiyun	clr.w		%d2			# clear hi([3])
*4882a593Smuzhiyun	swap		%d1			# hi([2]) in lo d1
*4882a593Smuzhiyun	swap		%d2			# hi([3]) in lo d2
*4882a593Smuzhiyun	add.l		%d2,%d1			#    [4]  + hi([2])
*4882a593Smuzhiyun	add.l		%d3,%d1			#    [4]  + hi([3])
*4882a593Smuzhiyun
*4882a593Smuzhiyun# now, grab the condition codes. only one that can be set is 'N'.
*4882a593Smuzhiyun# 'N' CAN be set if the operation is unsigned if bit 63 is set.
*4882a593Smuzhiyun	mov.w		MUL64_CC(%a6),%d4
*4882a593Smuzhiyun	andi.b		&0x10,%d4		# keep old 'X' bit
*4882a593Smuzhiyun	tst.l		%d1			# may set 'N' bit
*4882a593Smuzhiyun	bpl.b		mulu64_ddone
*4882a593Smuzhiyun	ori.b		&0x8,%d4		# set 'N' bit
*4882a593Smuzhiyunmulu64_ddone:
*4882a593Smuzhiyun	mov.w		%d4,%cc
*4882a593Smuzhiyun
*4882a593Smuzhiyun# here, the result is in d1 and d0. the current strategy is to save
*4882a593Smuzhiyun# the values at the location pointed to by a0.
*4882a593Smuzhiyun# use movm here to not disturb the condition codes.
*4882a593Smuzhiyunmulu64_end:
*4882a593Smuzhiyun	exg		%d1,%d0
*4882a593Smuzhiyun	movm.l		&0x0003,([0x10,%a6])		# save result
*4882a593Smuzhiyun
*4882a593Smuzhiyun# EPILOGUE BEGIN ########################################################
*4882a593Smuzhiyun#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
*4882a593Smuzhiyun	movm.l		(%sp)+,&0x001c		# restore d2-d4
*4882a593Smuzhiyun	unlk		%a6
*4882a593Smuzhiyun# EPILOGUE END ##########################################################
*4882a593Smuzhiyun
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyun
*4882a593Smuzhiyun# one or both of the operands is zero so the result is also zero.
*4882a593Smuzhiyun# save the zero result to the register file and set the 'Z' ccode bit.
*4882a593Smuzhiyunmulu64_zero:
*4882a593Smuzhiyun	clr.l		%d0
*4882a593Smuzhiyun	clr.l		%d1
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.w		MUL64_CC(%a6),%d4
*4882a593Smuzhiyun	andi.b		&0x10,%d4
*4882a593Smuzhiyun	ori.b		&0x4,%d4
*4882a593Smuzhiyun	mov.w		%d4,%cc			# set 'Z' ccode bit
*4882a593Smuzhiyun
*4882a593Smuzhiyun	bra.b		mulu64_end
*4882a593Smuzhiyun
*4882a593Smuzhiyun##########
*4882a593Smuzhiyun# muls.l #
*4882a593Smuzhiyun##########
*4882a593Smuzhiyun	global		_060LSP__imuls64_
*4882a593Smuzhiyun_060LSP__imuls64_:
*4882a593Smuzhiyun
*4882a593Smuzhiyun# PROLOGUE BEGIN ########################################################
*4882a593Smuzhiyun	link.w		%a6,&-4
*4882a593Smuzhiyun	movm.l		&0x3c00,-(%sp)		# save d2-d5
*4882a593Smuzhiyun#	fmovm.l		&0x0,-(%sp)		# save no fpregs
*4882a593Smuzhiyun# PROLOGUE END ##########################################################
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.w		%cc,MUL64_CC(%a6)	# save incoming ccodes
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.l		0x8(%a6),%d0		# store multiplier in d0
*4882a593Smuzhiyun	beq.b		mulu64_zero		# handle zero separately
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.l		0xc(%a6),%d1		# get multiplicand in d1
*4882a593Smuzhiyun	beq.b		mulu64_zero		# handle zero separately
*4882a593Smuzhiyun
*4882a593Smuzhiyun	clr.b		%d5			# clear sign tag
*4882a593Smuzhiyun	tst.l		%d0			# is multiplier negative?
*4882a593Smuzhiyun	bge.b		muls64_chk_md_sgn	# no
*4882a593Smuzhiyun	neg.l		%d0			# make multiplier positive
*4882a593Smuzhiyun
*4882a593Smuzhiyun	ori.b		&0x1,%d5		# save multiplier sgn
*4882a593Smuzhiyun
*4882a593Smuzhiyun# the result sign is the exclusive or of the operand sign bits.
*4882a593Smuzhiyunmuls64_chk_md_sgn:
*4882a593Smuzhiyun	tst.l		%d1			# is multiplicand negative?
*4882a593Smuzhiyun	bge.b		muls64_alg		# no
*4882a593Smuzhiyun	neg.l		%d1			# make multiplicand positive
*4882a593Smuzhiyun
*4882a593Smuzhiyun	eori.b		&0x1,%d5		# calculate correct sign
*4882a593Smuzhiyun
*4882a593Smuzhiyun#########################################################################
*4882a593Smuzhiyun#	63			   32				0	#
*4882a593Smuzhiyun#	----------------------------					#
*4882a593Smuzhiyun#	| hi(mplier) * hi(mplicand)|					#
*4882a593Smuzhiyun#	----------------------------					#
*4882a593Smuzhiyun#		     -----------------------------			#
*4882a593Smuzhiyun#		     | hi(mplier) * lo(mplicand) |			#
*4882a593Smuzhiyun#		     -----------------------------			#
*4882a593Smuzhiyun#		     -----------------------------			#
*4882a593Smuzhiyun#		     | lo(mplier) * hi(mplicand) |			#
*4882a593Smuzhiyun#		     -----------------------------			#
*4882a593Smuzhiyun#	  |			   -----------------------------	#
*4882a593Smuzhiyun#	--|--			   | lo(mplier) * lo(mplicand) |	#
*4882a593Smuzhiyun#	  |			   -----------------------------	#
*4882a593Smuzhiyun#	========================================================	#
*4882a593Smuzhiyun#	--------------------------------------------------------	#
*4882a593Smuzhiyun#	|	hi(result)	   |	    lo(result)         |	#
*4882a593Smuzhiyun#	--------------------------------------------------------	#
*4882a593Smuzhiyun#########################################################################
*4882a593Smuzhiyunmuls64_alg:
*4882a593Smuzhiyun# load temp registers with operands
*4882a593Smuzhiyun	mov.l		%d0,%d2			# mr in d2
*4882a593Smuzhiyun	mov.l		%d0,%d3			# mr in d3
*4882a593Smuzhiyun	mov.l		%d1,%d4			# md in d4
*4882a593Smuzhiyun	swap		%d3			# hi(mr) in lo d3
*4882a593Smuzhiyun	swap		%d4			# hi(md) in lo d4
*4882a593Smuzhiyun
*4882a593Smuzhiyun# complete necessary multiplies:
*4882a593Smuzhiyun	mulu.w		%d1,%d0			# [1] lo(mr) * lo(md)
*4882a593Smuzhiyun	mulu.w		%d3,%d1			# [2] hi(mr) * lo(md)
*4882a593Smuzhiyun	mulu.w		%d4,%d2			# [3] lo(mr) * hi(md)
*4882a593Smuzhiyun	mulu.w		%d4,%d3			# [4] hi(mr) * hi(md)
*4882a593Smuzhiyun
*4882a593Smuzhiyun# add lo portions of [2],[3] to hi portion of [1].
*4882a593Smuzhiyun# add carries produced from these adds to [4].
*4882a593Smuzhiyun# lo([1]) is the final lo 16 bits of the result.
*4882a593Smuzhiyun	clr.l		%d4			# load d4 w/ zero value
*4882a593Smuzhiyun	swap		%d0			# hi([1]) <==> lo([1])
*4882a593Smuzhiyun	add.w		%d1,%d0			# hi([1]) + lo([2])
*4882a593Smuzhiyun	addx.l		%d4,%d3			#    [4]  + carry
*4882a593Smuzhiyun	add.w		%d2,%d0			# hi([1]) + lo([3])
*4882a593Smuzhiyun	addx.l		%d4,%d3			#    [4]  + carry
*4882a593Smuzhiyun	swap		%d0			# lo([1]) <==> hi([1])
*4882a593Smuzhiyun
*4882a593Smuzhiyun# lo portions of [2],[3] have been added in to final result.
*4882a593Smuzhiyun# now, clear lo, put hi in lo reg, and add to [4]
*4882a593Smuzhiyun	clr.w		%d1			# clear lo([2])
*4882a593Smuzhiyun	clr.w		%d2			# clear hi([3])
*4882a593Smuzhiyun	swap		%d1			# hi([2]) in lo d1
*4882a593Smuzhiyun	swap		%d2			# hi([3]) in lo d2
*4882a593Smuzhiyun	add.l		%d2,%d1			#    [4]  + hi([2])
*4882a593Smuzhiyun	add.l		%d3,%d1			#    [4]  + hi([3])
*4882a593Smuzhiyun
*4882a593Smuzhiyun	tst.b		%d5			# should result be signed?
*4882a593Smuzhiyun	beq.b		muls64_done		# no
*4882a593Smuzhiyun
*4882a593Smuzhiyun# result should be a signed negative number.
*4882a593Smuzhiyun# compute 2's complement of the unsigned number:
*4882a593Smuzhiyun#   -negate all bits and add 1
*4882a593Smuzhiyunmuls64_neg:
*4882a593Smuzhiyun	not.l		%d0			# negate lo(result) bits
*4882a593Smuzhiyun	not.l		%d1			# negate hi(result) bits
*4882a593Smuzhiyun	addq.l		&1,%d0			# add 1 to lo(result)
*4882a593Smuzhiyun	addx.l		%d4,%d1			# add carry to hi(result)
*4882a593Smuzhiyun
*4882a593Smuzhiyunmuls64_done:
*4882a593Smuzhiyun	mov.w		MUL64_CC(%a6),%d4
*4882a593Smuzhiyun	andi.b		&0x10,%d4		# keep old 'X' bit
*4882a593Smuzhiyun	tst.l		%d1			# may set 'N' bit
*4882a593Smuzhiyun	bpl.b		muls64_ddone
*4882a593Smuzhiyun	ori.b		&0x8,%d4		# set 'N' bit
*4882a593Smuzhiyunmuls64_ddone:
*4882a593Smuzhiyun	mov.w		%d4,%cc
*4882a593Smuzhiyun
*4882a593Smuzhiyun# here, the result is in d1 and d0. the current strategy is to save
*4882a593Smuzhiyun# the values at the location pointed to by a0.
*4882a593Smuzhiyun# use movm here to not disturb the condition codes.
*4882a593Smuzhiyunmuls64_end:
*4882a593Smuzhiyun	exg		%d1,%d0
*4882a593Smuzhiyun	movm.l		&0x0003,([0x10,%a6])	# save result at (a0)
*4882a593Smuzhiyun
*4882a593Smuzhiyun# EPILOGUE BEGIN ########################################################
*4882a593Smuzhiyun#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
*4882a593Smuzhiyun	movm.l		(%sp)+,&0x003c		# restore d2-d5
*4882a593Smuzhiyun	unlk		%a6
*4882a593Smuzhiyun# EPILOGUE END ##########################################################
*4882a593Smuzhiyun
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyun
*4882a593Smuzhiyun# one or both of the operands is zero so the result is also zero.
*4882a593Smuzhiyun# save the zero result to the register file and set the 'Z' ccode bit.
*4882a593Smuzhiyunmuls64_zero:
*4882a593Smuzhiyun	clr.l		%d0
*4882a593Smuzhiyun	clr.l		%d1
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.w		MUL64_CC(%a6),%d4
*4882a593Smuzhiyun	andi.b		&0x10,%d4
*4882a593Smuzhiyun	ori.b		&0x4,%d4
*4882a593Smuzhiyun	mov.w		%d4,%cc			# set 'Z' ccode bit
*4882a593Smuzhiyun
*4882a593Smuzhiyun	bra.b		muls64_end
*4882a593Smuzhiyun
*4882a593Smuzhiyun#########################################################################
*4882a593Smuzhiyun# XDEF ****************************************************************	#
*4882a593Smuzhiyun#	_060LSP__cmp2_Ab_(): Emulate "cmp2.b An,<ea>".			#
*4882a593Smuzhiyun#	_060LSP__cmp2_Aw_(): Emulate "cmp2.w An,<ea>".			#
*4882a593Smuzhiyun#	_060LSP__cmp2_Al_(): Emulate "cmp2.l An,<ea>".			#
*4882a593Smuzhiyun#	_060LSP__cmp2_Db_(): Emulate "cmp2.b Dn,<ea>".			#
*4882a593Smuzhiyun#	_060LSP__cmp2_Dw_(): Emulate "cmp2.w Dn,<ea>".			#
*4882a593Smuzhiyun#	_060LSP__cmp2_Dl_(): Emulate "cmp2.l Dn,<ea>".			#
*4882a593Smuzhiyun#									#
*4882a593Smuzhiyun#	This is the library version which is accessed as a subroutine	#
*4882a593Smuzhiyun#	and therefore does not work exactly like the 680X0 "cmp2"	#
*4882a593Smuzhiyun#	instruction.							#
*4882a593Smuzhiyun#									#
*4882a593Smuzhiyun# XREF ****************************************************************	#
*4882a593Smuzhiyun#	None								#
*4882a593Smuzhiyun#									#
*4882a593Smuzhiyun# INPUT ***************************************************************	#
*4882a593Smuzhiyun#	0x4(sp) = Rn							#
*4882a593Smuzhiyun#	0x8(sp) = pointer to boundary pair				#
*4882a593Smuzhiyun#									#
*4882a593Smuzhiyun# OUTPUT **************************************************************	#
*4882a593Smuzhiyun#	cc = condition codes are set correctly				#
*4882a593Smuzhiyun#									#
*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
*4882a593Smuzhiyun#	In the interest of simplicity, all operands are converted to	#
*4882a593Smuzhiyun# longword size whether the operation is byte, word, or long. The	#
*4882a593Smuzhiyun# bounds are sign extended accordingly. If Rn is a data register, Rn is #
*4882a593Smuzhiyun# also sign extended. If Rn is an address register, it need not be sign #
*4882a593Smuzhiyun# extended since the full register is always used.			#
*4882a593Smuzhiyun#	The condition codes are set correctly before the final "rts".	#
*4882a593Smuzhiyun#									#
*4882a593Smuzhiyun#########################################################################
*4882a593Smuzhiyun
*4882a593Smuzhiyunset	CMP2_CC,	-4
*4882a593Smuzhiyun
*4882a593Smuzhiyun	global		_060LSP__cmp2_Ab_
*4882a593Smuzhiyun_060LSP__cmp2_Ab_:
*4882a593Smuzhiyun
*4882a593Smuzhiyun# PROLOGUE BEGIN ########################################################
*4882a593Smuzhiyun	link.w		%a6,&-4
*4882a593Smuzhiyun	movm.l		&0x3800,-(%sp)		# save d2-d4
*4882a593Smuzhiyun#	fmovm.l		&0x0,-(%sp)		# save no fpregs
*4882a593Smuzhiyun# PROLOGUE END ##########################################################
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.w		%cc,CMP2_CC(%a6)
*4882a593Smuzhiyun	mov.l		0x8(%a6), %d2		# get regval
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.b		([0xc,%a6],0x0),%d0
*4882a593Smuzhiyun	mov.b		([0xc,%a6],0x1),%d1
*4882a593Smuzhiyun
*4882a593Smuzhiyun	extb.l		%d0			# sign extend lo bnd
*4882a593Smuzhiyun	extb.l		%d1			# sign extend hi bnd
*4882a593Smuzhiyun	bra.w		l_cmp2_cmp		# go do the compare emulation
*4882a593Smuzhiyun
*4882a593Smuzhiyun	global		_060LSP__cmp2_Aw_
*4882a593Smuzhiyun_060LSP__cmp2_Aw_:
*4882a593Smuzhiyun
*4882a593Smuzhiyun# PROLOGUE BEGIN ########################################################
*4882a593Smuzhiyun	link.w		%a6,&-4
*4882a593Smuzhiyun	movm.l		&0x3800,-(%sp)		# save d2-d4
*4882a593Smuzhiyun#	fmovm.l		&0x0,-(%sp)		# save no fpregs
*4882a593Smuzhiyun# PROLOGUE END ##########################################################
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.w		%cc,CMP2_CC(%a6)
*4882a593Smuzhiyun	mov.l		0x8(%a6), %d2		# get regval
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.w		([0xc,%a6],0x0),%d0
*4882a593Smuzhiyun	mov.w		([0xc,%a6],0x2),%d1
*4882a593Smuzhiyun
*4882a593Smuzhiyun	ext.l		%d0			# sign extend lo bnd
*4882a593Smuzhiyun	ext.l		%d1			# sign extend hi bnd
*4882a593Smuzhiyun	bra.w		l_cmp2_cmp		# go do the compare emulation
*4882a593Smuzhiyun
*4882a593Smuzhiyun	global		_060LSP__cmp2_Al_
*4882a593Smuzhiyun_060LSP__cmp2_Al_:
*4882a593Smuzhiyun
*4882a593Smuzhiyun# PROLOGUE BEGIN ########################################################
*4882a593Smuzhiyun	link.w		%a6,&-4
*4882a593Smuzhiyun	movm.l		&0x3800,-(%sp)		# save d2-d4
*4882a593Smuzhiyun#	fmovm.l		&0x0,-(%sp)		# save no fpregs
*4882a593Smuzhiyun# PROLOGUE END ##########################################################
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.w		%cc,CMP2_CC(%a6)
*4882a593Smuzhiyun	mov.l		0x8(%a6), %d2		# get regval
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.l		([0xc,%a6],0x0),%d0
*4882a593Smuzhiyun	mov.l		([0xc,%a6],0x4),%d1
*4882a593Smuzhiyun	bra.w		l_cmp2_cmp		# go do the compare emulation
*4882a593Smuzhiyun
*4882a593Smuzhiyun	global		_060LSP__cmp2_Db_
*4882a593Smuzhiyun_060LSP__cmp2_Db_:
*4882a593Smuzhiyun
*4882a593Smuzhiyun# PROLOGUE BEGIN ########################################################
*4882a593Smuzhiyun	link.w		%a6,&-4
*4882a593Smuzhiyun	movm.l		&0x3800,-(%sp)		# save d2-d4
*4882a593Smuzhiyun#	fmovm.l		&0x0,-(%sp)		# save no fpregs
*4882a593Smuzhiyun# PROLOGUE END ##########################################################
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.w		%cc,CMP2_CC(%a6)
*4882a593Smuzhiyun	mov.l		0x8(%a6), %d2		# get regval
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.b		([0xc,%a6],0x0),%d0
*4882a593Smuzhiyun	mov.b		([0xc,%a6],0x1),%d1
*4882a593Smuzhiyun
*4882a593Smuzhiyun	extb.l		%d0			# sign extend lo bnd
*4882a593Smuzhiyun	extb.l		%d1			# sign extend hi bnd
*4882a593Smuzhiyun
*4882a593Smuzhiyun# operation is a data register compare.
*4882a593Smuzhiyun# sign extend byte to long so we can do simple longword compares.
*4882a593Smuzhiyun	extb.l		%d2			# sign extend data byte
*4882a593Smuzhiyun	bra.w		l_cmp2_cmp		# go do the compare emulation
*4882a593Smuzhiyun
*4882a593Smuzhiyun	global		_060LSP__cmp2_Dw_
*4882a593Smuzhiyun_060LSP__cmp2_Dw_:
*4882a593Smuzhiyun
*4882a593Smuzhiyun# PROLOGUE BEGIN ########################################################
*4882a593Smuzhiyun	link.w		%a6,&-4
*4882a593Smuzhiyun	movm.l		&0x3800,-(%sp)		# save d2-d4
*4882a593Smuzhiyun#	fmovm.l		&0x0,-(%sp)		# save no fpregs
*4882a593Smuzhiyun# PROLOGUE END ##########################################################
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.w		%cc,CMP2_CC(%a6)
*4882a593Smuzhiyun	mov.l		0x8(%a6), %d2		# get regval
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.w		([0xc,%a6],0x0),%d0
*4882a593Smuzhiyun	mov.w		([0xc,%a6],0x2),%d1
*4882a593Smuzhiyun
*4882a593Smuzhiyun	ext.l		%d0			# sign extend lo bnd
*4882a593Smuzhiyun	ext.l		%d1			# sign extend hi bnd
*4882a593Smuzhiyun
*4882a593Smuzhiyun# operation is a data register compare.
*4882a593Smuzhiyun# sign extend word to long so we can do simple longword compares.
*4882a593Smuzhiyun	ext.l		%d2			# sign extend data word
*4882a593Smuzhiyun	bra.w		l_cmp2_cmp		# go emulate compare
*4882a593Smuzhiyun
*4882a593Smuzhiyun	global		_060LSP__cmp2_Dl_
*4882a593Smuzhiyun_060LSP__cmp2_Dl_:
*4882a593Smuzhiyun
*4882a593Smuzhiyun# PROLOGUE BEGIN ########################################################
*4882a593Smuzhiyun	link.w		%a6,&-4
*4882a593Smuzhiyun	movm.l		&0x3800,-(%sp)		# save d2-d4
*4882a593Smuzhiyun#	fmovm.l		&0x0,-(%sp)		# save no fpregs
*4882a593Smuzhiyun# PROLOGUE END ##########################################################
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.w		%cc,CMP2_CC(%a6)
*4882a593Smuzhiyun	mov.l		0x8(%a6), %d2		# get regval
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.l		([0xc,%a6],0x0),%d0
*4882a593Smuzhiyun	mov.l		([0xc,%a6],0x4),%d1
*4882a593Smuzhiyun
*4882a593Smuzhiyun#
*4882a593Smuzhiyun# To set the ccodes correctly:
*4882a593Smuzhiyun#	(1) save 'Z' bit from (Rn - lo)
*4882a593Smuzhiyun#	(2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi))
*4882a593Smuzhiyun#	(3) keep 'X', 'N', and 'V' from before instruction
*4882a593Smuzhiyun#	(4) combine ccodes
*4882a593Smuzhiyun#
*4882a593Smuzhiyunl_cmp2_cmp:
*4882a593Smuzhiyun	sub.l		%d0, %d2		# (Rn - lo)
*4882a593Smuzhiyun	mov.w		%cc, %d3		# fetch resulting ccodes
*4882a593Smuzhiyun	andi.b		&0x4, %d3		# keep 'Z' bit
*4882a593Smuzhiyun	sub.l		%d0, %d1		# (hi - lo)
*4882a593Smuzhiyun	cmp.l		%d1,%d2			# ((hi - lo) - (Rn - hi))
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.w		%cc, %d4		# fetch resulting ccodes
*4882a593Smuzhiyun	or.b		%d4, %d3		# combine w/ earlier ccodes
*4882a593Smuzhiyun	andi.b		&0x5, %d3		# keep 'Z' and 'N'
*4882a593Smuzhiyun
*4882a593Smuzhiyun	mov.w		CMP2_CC(%a6), %d4	# fetch old ccodes
*4882a593Smuzhiyun	andi.b		&0x1a, %d4		# keep 'X','N','V' bits
*4882a593Smuzhiyun	or.b		%d3, %d4		# insert new ccodes
*4882a593Smuzhiyun	mov.w		%d4,%cc			# save new ccodes
*4882a593Smuzhiyun
*4882a593Smuzhiyun# EPILOGUE BEGIN ########################################################
*4882a593Smuzhiyun#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
*4882a593Smuzhiyun	movm.l		(%sp)+,&0x001c		# restore d2-d4
*4882a593Smuzhiyun	unlk		%a6
*4882a593Smuzhiyun# EPILOGUE END ##########################################################
*4882a593Smuzhiyun
*4882a593Smuzhiyun	rts