1 ; $Id: fix.asm,v 1.4 2003-12-08 21:21:16 btb Exp $
2 ;THE COMPUTER CODE CONTAINED HEREIN IS THE SOLE PROPERTY OF PARALLAX
3 ;SOFTWARE CORPORATION ("PARALLAX"). PARALLAX, IN DISTRIBUTING THE CODE TO
4 ;END-USERS, AND SUBJECT TO ALL OF THE TERMS AND CONDITIONS HEREIN, GRANTS A
5 ;ROYALTY-FREE, PERPETUAL LICENSE TO SUCH END-USERS FOR USE BY SUCH END-USERS
6 ;IN USING, DISPLAYING, AND CREATING DERIVATIVE WORKS THEREOF, SO LONG AS
7 ;SUCH USE, DISPLAY OR CREATION IS FOR NON-COMMERCIAL, ROYALTY OR REVENUE
8 ;FREE PURPOSES. IN NO EVENT SHALL THE END-USER USE THE COMPUTER CODE
9 ;CONTAINED HEREIN FOR REVENUE-BEARING PURPOSES. THE END-USER UNDERSTANDS
10 ;AND AGREES TO THE TERMS HEREIN AND ACCEPTS THE SAME BY USE OF THIS FILE.
11 ;COPYRIGHT 1993-1998 PARALLAX SOFTWARE CORPORATION. ALL RIGHTS RESERVED.
16 %define _fixdivquadlong fixdivquadlong
17 %define _fixmul fixmul
18 %define _fixdiv fixdiv
19 %define _fixmulaccum fixmulaccum
20 %define _fixmuldiv fixmuldiv
21 %define _fixquadadjust fixquadadjust
22 %define _fixquadnegate fixquadnegate
23 %define _quad_sqrt quad_sqrt
24 %define _long_sqrt long_sqrt
25 %define _fix_sqrt fix_sqrt
26 %define _fix_asin fix_asin
27 %define _fix_acos fix_acos
28 %define _fix_atan2 fix_atan2
29 %define _fix_fastsincos fix_fastsincos
30 %define _fix_sincos fix_sincos
33 global _fixdivquadlong
46 global _fix_fastsincos
48 global quad_sqrt_asm ; for assembler vecmat
49 global fix_sincos_asm ; for assembler vecmat
50 global fix_acos_asm ; for assembler vecmat
51 global long_sqrt_asm ; for assembler vecmat
53 global fix_fastsincos_asm
635 dw 16384 ;extra for when exacty 1
895 dw 0 ;extra for when exacty 1
979 ;standard Newtonian-iteration square root routine. takes eax, returns ax
980 ;trashes eax,ebx,ecx,edx,esi,edi
984 or eax,eax ;check sign
985 jle near error ;zero or negative
993 shr edx,16 ;split eax -> dx:ax
995 ;get a good first quess by checking which byte most significant bit is in
996 xor ebx,ebx ;clear high bytes for index
998 or dh,dh ;highest byte
1000 mov bl,dh ;get value for lookup
1005 mov bl,dl ;get value for lookup
1010 mov bl,ah ;get value for lookup
1013 not_ah: mov bl,al ;get value for lookup
1016 movzx ebx,byte [guess_table+ebx] ;get byte guess
1017 sal ebx,cl ;get in right place
1020 mov esi,edx ;save dx:ax
1022 ;the loop nearly always executes 3 times, so we'll unroll it 2 times and
1023 ;not do any checking until after the third time. By my calcutations, the
1024 ;loop is executed 2 times in 99.97% of cases, 3 times in 93.65% of cases,
1025 ;four times in 16.18% of cases, and five times in 0.44% of cases. It never
1026 ;executes more than five times. By timing, I determined that is is faster
1027 ;to always execute three times and not check for termination the first two
1028 ;times through. This means that in 93.65% of cases, we save 6 cmp/jcc pairs,
1029 ;and in 6.35% of cases we do an extra divide. In real life, these numbers
1030 ;might not be the same.
1035 mov edx,esi ;restore dx:ax
1037 ; mov edi,ebx ;save for compare
1039 rcr ebx,1 ;next guess = (d + q)/2
1042 newt_loop: mov eax,ecx
1043 mov edx,esi ;restore dx:ax
1045 cmp eax,ebx ;correct?
1047 mov edi,ebx ;save for compare
1049 rcr ebx,1 ;next guess = (d + q)/2
1055 almost_got_it: mov eax,ebx
1056 or dx,dx ;check remainder
1059 got_it: and eax,0ffffh
1065 ;sqrt called with zero or negative input. return zero
1069 ;standard Newtonian-iteration square root routine. takes edx:eax, returns eax
1074 or edx,edx ;check sign
1075 js error ;can't do negative number!
1076 jnz must_do_quad ;we really must do 64/32 div
1077 or eax,eax ;check high bit of low longword
1078 jns near long_sqrt_asm ;we can use longword version
1085 ;get a good first quess by checking which byte most significant bit is in
1086 xor ebx,ebx ;clear high bytes for index
1088 ror edx,16 ;get high 2 bytes
1092 mov bl,dh ;get value for lookup
1094 ror edx,16 ;restore edx
1098 mov bl,dl ;get value for lookup
1100 ror edx,16 ;restore edx
1102 q_not_dl: ror edx,16 ;restore edx
1105 mov bl,dh ;get value for lookup
1108 q_not_ah: mov bl,dl ;get value for lookup
1111 movzx ebx,byte [guess_table+ebx] ;get byte guess
1112 sal ebx,cl ;get in right place
1116 mov esi,edx ;save edx:eax
1118 ;quad loop usually executes 4 times
1123 mov edx,esi ;restore dx:ax
1125 mov edi,ebx ;save for compare
1127 rcr ebx,1 ;next guess = (d + q)/2
1130 q_newt_loop: mov eax,ecx
1131 mov edx,esi ;restore dx:ax
1133 cmp eax,ebx ;correct?
1135 mov edi,ebx ;save for compare
1137 rcr ebx,1 ;next guess = (d + q)/2
1143 q_almost_got_it: mov eax,ebx
1144 or edx,edx ;check remainder
1154 ;fixed-point square root
1158 ; movzx eax,ax ; now in long_sqrt
1162 ;the sincos functions have two varients: the C version is passed pointer
1163 ;to variables for sin & cos, and the assembly version returns the values
1166 ;takes ax=angle, returns eax=sin, ebx=cos.
1168 movzx eax,ah ;get high byte
1169 movsx ebx,word [cos_table+eax*2]
1170 sal ebx,2 ;make a fix
1171 movsx eax,word [sin_table+eax*2]
1172 sal eax,2 ;make a fix
1175 ;takes ax=angle, returns eax=sin, ebx=cos.
1181 mov dl, ah ;get high byte
1182 mov cl, al ;save low byte
1185 movsx eax,word [sin_table+edx]
1186 movsx ebx,word [sin_table+edx+2]
1188 imul ebx,ecx ;mul by fraction
1190 add eax,ebx ;add in frac part
1191 sal eax,2 ;make a fix
1193 movsx ebx,word [cos_table+edx]
1194 movsx edx,word [cos_table+edx+2]
1196 imul edx,ecx ;mul by fraction
1198 add ebx,edx ;add in frac part
1199 sal ebx,2 ;make a fix
1208 ;takes eax=cos angle, returns ax=angle
1214 abs_eax ;get abs eax
1221 movzx ecx,al ;save low byte (fraction)
1225 sar edx,8 ;get high byte (+1 bit)
1226 movsx eax,word [acos_table+edx*2]
1227 movsx ebx,word [acos_table+edx*2+2]
1229 imul ebx,ecx ;mul by fraction
1231 add eax,ebx ;add in frac part
1233 pop edx ;get sign back
1235 sub eax,edx ;make correct sign
1236 and edx,8000h ;zero or 1/2
1245 ;takes eax=sin angle, returns ax=angle
1253 abs_eax ;get abs value
1260 movzx ecx,al ;save low byte (fraction)
1264 sar edx,8 ;get high byte (+1 bit)
1265 movsx eax,word [asin_table+edx*2]
1266 movsx ebx,word [asin_table+edx*2+2]
1268 imul ebx,ecx ;mul by fraction
1270 add eax,ebx ;add in frac part
1272 pop edx ;get sign back
1273 xor eax,edx ;make sign correct
1282 ;given cos & sin of an angle, return that angle. takes eax=cos,ebx=sin.
1283 ;returns ax. parms need not be normalized, that is, the ratio eax/ebx must
1284 ;equal the ratio cos/sin, but the parms need not be the actual cos & sin.
1285 ;NOTE: this is different from the standard C atan2, since it is left-handed.
1286 ;uses either asin or acos, to get better precision
1299 break_if z,'Both parms to atan2 are zero!'
1306 ;find smaller of two
1309 abs_eax ;get abs value
1311 abs_eax ;get abs value
1314 cmp ebx,eax ;compare x to y
1319 ;sin is smaller, use arcsin
1328 mov ecx,eax ;ecx = mag
1330 pop ebx ;get cos, save in ebx
1332 jecxz sign_ok ;abort!
1333 m_fixdiv ecx ;normalize it
1334 call fix_asin_asm ;get angle
1335 or ebx,ebx ;check sign of cos
1337 sub eax,8000h ;adjust
1346 ;cos is smaller, use arccos
1358 m_fixdiv ecx ;normalize it
1359 call fix_acos_asm ; get angle
1360 mov ebx,eax ;save in ebx
1362 cdq ;get sign of sin
1363 mov eax,ebx ;get cos back
1365 sub eax,edx ;make sign correct
1373 ; C version - takes angle,*sin,*cos. fills in sin&cos.
1374 ;either (or both) pointers can be null
1375 ;trashes eax,ecx,edx
1379 call fix_fastsincos_asm
1391 ;C version - takes angle,*sin,*cos. fills in sin&cos.
1392 ;trashes eax,ecx,edx
1393 ;either (or both) pointers can be null