1 ;THE COMPUTER CODE CONTAINED HEREIN IS THE SOLE PROPERTY OF PARALLAX
2 ;SOFTWARE CORPORATION ("PARALLAX"). PARALLAX, IN DISTRIBUTING THE CODE TO
3 ;END-USERS, AND SUBJECT TO ALL OF THE TERMS AND CONDITIONS HEREIN, GRANTS A
4 ;ROYALTY-FREE, PERPETUAL LICENSE TO SUCH END-USERS FOR USE BY SUCH END-USERS
5 ;IN USING, DISPLAYING, AND CREATING DERIVATIVE WORKS THEREOF, SO LONG AS
6 ;SUCH USE, DISPLAY OR CREATION IS FOR NON-COMMERCIAL, ROYALTY OR REVENUE
7 ;FREE PURPOSES. IN NO EVENT SHALL THE END-USER USE THE COMPUTER CODE
8 ;CONTAINED HEREIN FOR REVENUE-BEARING PURPOSES. THE END-USER UNDERSTANDS
9 ;AND AGREES TO THE TERMS HEREIN AND ACCEPTS THE SAME BY USE OF THIS FILE.
10 ;COPYRIGHT 1993-1998 PARALLAX SOFTWARE CORPORATION. ALL RIGHTS RESERVED.
15 %define _fixdivquadlong fixdivquadlong
16 %define _fixmul fixmul
17 %define _fixdiv fixdiv
18 %define _fixmulaccum fixmulaccum
19 %define _fixmuldiv fixmuldiv
20 %define _fixquadadjust fixquadadjust
21 %define _fixquadnegate fixquadnegate
22 %define _quad_sqrt quad_sqrt
23 %define _long_sqrt long_sqrt
24 %define _fix_sqrt fix_sqrt
25 %define _fix_asin fix_asin
26 %define _fix_acos fix_acos
27 %define _fix_atan2 fix_atan2
28 %define _fix_fastsincos fix_fastsincos
29 %define _fix_sincos fix_sincos
32 global _fixdivquadlong
45 global _fix_fastsincos
47 global quad_sqrt_asm ; for assembler vecmat
48 global fix_sincos_asm ; for assembler vecmat
49 global fix_acos_asm ; for assembler vecmat
50 global long_sqrt_asm ; for assembler vecmat
52 global fix_fastsincos_asm
634 dw 16384 ;extra for when exacty 1
894 dw 0 ;extra for when exacty 1
978 ;standard Newtonian-iteration square root routine. takes eax, returns ax
979 ;trashes eax,ebx,ecx,edx,esi,edi
983 or eax,eax ;check sign
984 jle near error ;zero or negative
992 shr edx,16 ;split eax -> dx:ax
994 ;get a good first quess by checking which byte most significant bit is in
995 xor ebx,ebx ;clear high bytes for index
997 or dh,dh ;highest byte
999 mov bl,dh ;get value for lookup
1004 mov bl,dl ;get value for lookup
1009 mov bl,ah ;get value for lookup
1012 not_ah: mov bl,al ;get value for lookup
1015 movzx ebx,byte [guess_table+ebx] ;get byte guess
1016 sal ebx,cl ;get in right place
1019 mov esi,edx ;save dx:ax
1021 ;the loop nearly always executes 3 times, so we'll unroll it 2 times and
1022 ;not do any checking until after the third time. By my calcutations, the
1023 ;loop is executed 2 times in 99.97% of cases, 3 times in 93.65% of cases,
1024 ;four times in 16.18% of cases, and five times in 0.44% of cases. It never
1025 ;executes more than five times. By timing, I determined that is is faster
1026 ;to always execute three times and not check for termination the first two
1027 ;times through. This means that in 93.65% of cases, we save 6 cmp/jcc pairs,
1028 ;and in 6.35% of cases we do an extra divide. In real life, these numbers
1029 ;might not be the same.
1034 mov edx,esi ;restore dx:ax
1036 ; mov edi,ebx ;save for compare
1038 rcr ebx,1 ;next guess = (d + q)/2
1041 newt_loop: mov eax,ecx
1042 mov edx,esi ;restore dx:ax
1044 cmp eax,ebx ;correct?
1046 mov edi,ebx ;save for compare
1048 rcr ebx,1 ;next guess = (d + q)/2
1054 almost_got_it: mov eax,ebx
1055 or dx,dx ;check remainder
1058 got_it: and eax,0ffffh
1064 ;sqrt called with zero or negative input. return zero
1068 ;standard Newtonian-iteration square root routine. takes edx:eax, returns eax
1073 or edx,edx ;check sign
1074 js error ;can't do negative number!
1075 jnz must_do_quad ;we really must do 64/32 div
1076 or eax,eax ;check high bit of low longword
1077 jns near long_sqrt_asm ;we can use longword version
1084 ;get a good first quess by checking which byte most significant bit is in
1085 xor ebx,ebx ;clear high bytes for index
1087 ror edx,16 ;get high 2 bytes
1091 mov bl,dh ;get value for lookup
1093 ror edx,16 ;restore edx
1097 mov bl,dl ;get value for lookup
1099 ror edx,16 ;restore edx
1101 q_not_dl: ror edx,16 ;restore edx
1104 mov bl,dh ;get value for lookup
1107 q_not_ah: mov bl,dl ;get value for lookup
1110 movzx ebx,byte [guess_table+ebx] ;get byte guess
1111 sal ebx,cl ;get in right place
1115 mov esi,edx ;save edx:eax
1117 ;quad loop usually executes 4 times
1122 mov edx,esi ;restore dx:ax
1124 mov edi,ebx ;save for compare
1126 rcr ebx,1 ;next guess = (d + q)/2
1129 q_newt_loop: mov eax,ecx
1130 mov edx,esi ;restore dx:ax
1132 cmp eax,ebx ;correct?
1134 mov edi,ebx ;save for compare
1136 rcr ebx,1 ;next guess = (d + q)/2
1142 q_almost_got_it: mov eax,ebx
1143 or edx,edx ;check remainder
1153 ;fixed-point square root
1157 ; movzx eax,ax ; now in long_sqrt
1161 ;the sincos functions have two varients: the C version is passed pointer
1162 ;to variables for sin & cos, and the assembly version returns the values
1165 ;takes ax=angle, returns eax=sin, ebx=cos.
1167 movzx eax,ah ;get high byte
1168 movsx ebx,word [cos_table+eax*2]
1169 sal ebx,2 ;make a fix
1170 movsx eax,word [sin_table+eax*2]
1171 sal eax,2 ;make a fix
1174 ;takes ax=angle, returns eax=sin, ebx=cos.
1180 mov dl, ah ;get high byte
1181 mov cl, al ;save low byte
1184 movsx eax,word [sin_table+edx]
1185 movsx ebx,word [sin_table+edx+2]
1187 imul ebx,ecx ;mul by fraction
1189 add eax,ebx ;add in frac part
1190 sal eax,2 ;make a fix
1192 movsx ebx,word [cos_table+edx]
1193 movsx edx,word [cos_table+edx+2]
1195 imul edx,ecx ;mul by fraction
1197 add ebx,edx ;add in frac part
1198 sal ebx,2 ;make a fix
1207 ;takes eax=cos angle, returns ax=angle
1213 abs_eax ;get abs eax
1220 movzx ecx,al ;save low byte (fraction)
1224 sar edx,8 ;get high byte (+1 bit)
1225 movsx eax,word [acos_table+edx*2]
1226 movsx ebx,word [acos_table+edx*2+2]
1228 imul ebx,ecx ;mul by fraction
1230 add eax,ebx ;add in frac part
1232 pop edx ;get sign back
1234 sub eax,edx ;make correct sign
1235 and edx,8000h ;zero or 1/2
1244 ;takes eax=sin angle, returns ax=angle
1252 abs_eax ;get abs value
1259 movzx ecx,al ;save low byte (fraction)
1263 sar edx,8 ;get high byte (+1 bit)
1264 movsx eax,word [asin_table+edx*2]
1265 movsx ebx,word [asin_table+edx*2+2]
1267 imul ebx,ecx ;mul by fraction
1269 add eax,ebx ;add in frac part
1271 pop edx ;get sign back
1272 xor eax,edx ;make sign correct
1281 ;given cos & sin of an angle, return that angle. takes eax=cos,ebx=sin.
1282 ;returns ax. parms need not be normalized, that is, the ratio eax/ebx must
1283 ;equal the ratio cos/sin, but the parms need not be the actual cos & sin.
1284 ;NOTE: this is different from the standard C atan2, since it is left-handed.
1285 ;uses either asin or acos, to get better precision
1298 break_if z,'Both parms to atan2 are zero!'
1305 ;find smaller of two
1308 abs_eax ;get abs value
1310 abs_eax ;get abs value
1313 cmp ebx,eax ;compare x to y
1318 ;sin is smaller, use arcsin
1327 mov ecx,eax ;ecx = mag
1329 pop ebx ;get cos, save in ebx
1331 jecxz sign_ok ;abort!
1332 m_fixdiv ecx ;normalize it
1333 call fix_asin_asm ;get angle
1334 or ebx,ebx ;check sign of cos
1336 sub eax,8000h ;adjust
1345 ;cos is smaller, use arccos
1357 m_fixdiv ecx ;normalize it
1358 call fix_acos_asm ; get angle
1359 mov ebx,eax ;save in ebx
1361 cdq ;get sign of sin
1362 mov eax,ebx ;get cos back
1364 sub eax,edx ;make sign correct
1372 ; C version - takes angle,*sin,*cos. fills in sin&cos.
1373 ;either (or both) pointers can be null
1374 ;trashes eax,ecx,edx
1378 call fix_fastsincos_asm
1390 ;C version - takes angle,*sin,*cos. fills in sin&cos.
1391 ;trashes eax,ecx,edx
1392 ;either (or both) pointers can be null