maths/fix.asm

   1 ;THE COMPUTER CODE CONTAINED HEREIN IS THE SOLE PROPERTY OF PARALLAX
   2 ;SOFTWARE CORPORATION ("PARALLAX").  PARALLAX, IN DISTRIBUTING THE CODE TO
   3 ;END-USERS, AND SUBJECT TO ALL OF THE TERMS AND CONDITIONS HEREIN, GRANTS A
   4 ;ROYALTY-FREE, PERPETUAL LICENSE TO SUCH END-USERS FOR USE BY SUCH END-USERS
   5 ;IN USING, DISPLAYING,  AND CREATING DERIVATIVE WORKS THEREOF, SO LONG AS
   6 ;SUCH USE, DISPLAY OR CREATION IS FOR NON-COMMERCIAL, ROYALTY OR REVENUE
   7 ;FREE PURPOSES.  IN NO EVENT SHALL THE END-USER USE THE COMPUTER CODE
   8 ;CONTAINED HEREIN FOR REVENUE-BEARING PURPOSES.  THE END-USER UNDERSTANDS
   9 ;AND AGREES TO THE TERMS HEREIN AND ACCEPTS THE SAME BY USE OF THIS FILE.
  10 ;COPYRIGHT 1993-1998 PARALLAX SOFTWARE CORPORATION.  ALL RIGHTS RESERVED.
  11
  12 [BITS 32]
  13
  14 %ifdef __linux__
  15 %define _fixdivquadlong fixdivquadlong
  16 %define _fixmul fixmul
  17 %define _fixdiv fixdiv
  18 %define _fixmulaccum fixmulaccum
  19 %define _fixmuldiv fixmuldiv
  20 %define _fixquadadjust fixquadadjust
  21 %define _fixquadnegate fixquadnegate
  22 %define _quad_sqrt quad_sqrt
  23 %define _long_sqrt long_sqrt
  24 %define _fix_sqrt fix_sqrt
  25 %define _fix_asin fix_asin
  26 %define _fix_acos fix_acos
  27 %define _fix_atan2 fix_atan2
  28 %define _fix_fastsincos fix_fastsincos
  29 %define _fix_sincos fix_sincos
  30 %endif
  31
  32 global _fixdivquadlong
  33 global _fixmul
  34 global _fixdiv
  35 global _fixmulaccum
  36 global _fixmuldiv
  37 global _fixquadadjust
  38 global _fixquadnegate
  39 global _long_sqrt
  40 global _quad_sqrt
  41 global _fix_sqrt
  42 global _fix_asin
  43 global _fix_acos
  44 global _fix_atan2
  45 global _fix_fastsincos
  46 global _fix_sincos
  47 global quad_sqrt_asm    ; for assembler vecmat
  48 global fix_sincos_asm   ; for assembler vecmat
  49 global fix_acos_asm     ; for assembler vecmat
  50 global long_sqrt_asm         ; for assembler vecmat
  51 ;global fix_asin_asm
  52 global fix_fastsincos_asm
  53
  54 [SECTION .data]
  55 sin_table  dw      0
  56         dw      402
  57         dw      804
  58         dw      1205
  59         dw      1606
  60         dw      2006
  61         dw      2404
  62         dw      2801
  63         dw      3196
  64         dw      3590
  65         dw      3981
  66         dw      4370
  67         dw      4756
  68         dw      5139
  69         dw      5520
  70         dw      5897
  71         dw      6270
  72         dw      6639
  73         dw      7005
  74         dw      7366
  75         dw      7723
  76         dw      8076
  77         dw      8423
  78         dw      8765
  79         dw      9102
  80         dw      9434
  81         dw      9760
  82         dw      10080
  83         dw      10394
  84         dw      10702
  85         dw      11003
  86         dw      11297
  87         dw      11585
  88         dw      11866
  89         dw      12140
  90         dw      12406
  91         dw      12665
  92         dw      12916
  93         dw      13160
  94         dw      13395
  95         dw      13623
  96         dw      13842
  97         dw      14053
  98         dw      14256
  99         dw      14449
 100         dw      14635
 101         dw      14811
 102         dw      14978
 103         dw      15137
 104         dw      15286
 105         dw      15426
 106         dw      15557
 107         dw      15679
 108         dw      15791
 109         dw      15893
 110         dw      15986
 111         dw      16069
 112         dw      16143
 113         dw      16207
 114         dw      16261
 115         dw      16305
 116         dw      16340
 117         dw      16364
 118         dw      16379
 119 cos_table       dw      16384
 120         dw      16379
 121         dw      16364
 122         dw      16340
 123         dw      16305
 124         dw      16261
 125         dw      16207
 126         dw      16143
 127         dw      16069
 128         dw      15986
 129         dw      15893
 130         dw      15791
 131         dw      15679
 132         dw      15557
 133         dw      15426
 134         dw      15286
 135         dw      15137
 136         dw      14978
 137         dw      14811
 138         dw      14635
 139         dw      14449
 140         dw      14256
 141         dw      14053
 142         dw      13842
 143         dw      13623
 144         dw      13395
 145         dw      13160
 146         dw      12916
 147         dw      12665
 148         dw      12406
 149         dw      12140
 150         dw      11866
 151         dw      11585
 152         dw      11297
 153         dw      11003
 154         dw      10702
 155         dw      10394
 156         dw      10080
 157         dw      9760
 158         dw      9434
 159         dw      9102
 160         dw      8765
 161         dw      8423
 162         dw      8076
 163         dw      7723
 164         dw      7366
 165         dw      7005
 166         dw      6639
 167         dw      6270
 168         dw      5897
 169         dw      5520
 170         dw      5139
 171         dw      4756
 172         dw      4370
 173         dw      3981
 174         dw      3590
 175         dw      3196
 176         dw      2801
 177         dw      2404
 178         dw      2006
 179         dw      1606
 180         dw      1205
 181         dw      804
 182         dw      402
 183         dw      0
 184         dw      -402
 185         dw      -804
 186         dw      -1205
 187         dw      -1606
 188         dw      -2006
 189         dw      -2404
 190         dw      -2801
 191         dw      -3196
 192         dw      -3590
 193         dw      -3981
 194         dw      -4370
 195         dw      -4756
 196         dw      -5139
 197         dw      -5520
 198         dw      -5897
 199         dw      -6270
 200         dw      -6639
 201         dw      -7005
 202         dw      -7366
 203         dw      -7723
 204         dw      -8076
 205         dw      -8423
 206         dw      -8765
 207         dw      -9102
 208         dw      -9434
 209         dw      -9760
 210         dw      -10080
 211         dw      -10394
 212         dw      -10702
 213         dw      -11003
 214         dw      -11297
 215         dw      -11585
 216         dw      -11866
 217         dw      -12140
 218         dw      -12406
 219         dw      -12665
 220         dw      -12916
 221         dw      -13160
 222         dw      -13395
 223         dw      -13623
 224         dw      -13842
 225         dw      -14053
 226         dw      -14256
 227         dw      -14449
 228         dw      -14635
 229         dw      -14811
 230         dw      -14978
 231         dw      -15137
 232         dw      -15286
 233         dw      -15426
 234         dw      -15557
 235         dw      -15679
 236         dw      -15791
 237         dw      -15893
 238         dw      -15986
 239         dw      -16069
 240         dw      -16143
 241         dw      -16207
 242         dw      -16261
 243         dw      -16305
 244         dw      -16340
 245         dw      -16364
 246         dw      -16379
 247         dw      -16384
 248         dw      -16379
 249         dw      -16364
 250         dw      -16340
 251         dw      -16305
 252         dw      -16261
 253         dw      -16207
 254         dw      -16143
 255         dw      -16069
 256         dw      -15986
 257         dw      -15893
 258         dw      -15791
 259         dw      -15679
 260         dw      -15557
 261         dw      -15426
 262         dw      -15286
 263         dw      -15137
 264         dw      -14978
 265         dw      -14811
 266         dw      -14635
 267         dw      -14449
 268         dw      -14256
 269         dw      -14053
 270         dw      -13842
 271         dw      -13623
 272         dw      -13395
 273         dw      -13160
 274         dw      -12916
 275         dw      -12665
 276         dw      -12406
 277         dw      -12140
 278         dw      -11866
 279         dw      -11585
 280         dw      -11297
 281         dw      -11003
 282         dw      -10702
 283         dw      -10394
 284         dw      -10080
 285         dw      -9760
 286         dw      -9434
 287         dw      -9102
 288         dw      -8765
 289         dw      -8423
 290         dw      -8076
 291         dw      -7723
 292         dw      -7366
 293         dw      -7005
 294         dw      -6639
 295         dw      -6270
 296         dw      -5897
 297         dw      -5520
 298         dw      -5139
 299         dw      -4756
 300         dw      -4370
 301         dw      -3981
 302         dw      -3590
 303         dw      -3196
 304         dw      -2801
 305         dw      -2404
 306         dw      -2006
 307         dw      -1606
 308         dw      -1205
 309         dw      -804
 310         dw      -402
 311         dw      0
 312         dw      402
 313         dw      804
 314         dw      1205
 315         dw      1606
 316         dw      2006
 317         dw      2404
 318         dw      2801
 319         dw      3196
 320         dw      3590
 321         dw      3981
 322         dw      4370
 323         dw      4756
 324         dw      5139
 325         dw      5520
 326         dw      5897
 327         dw      6270
 328         dw      6639
 329         dw      7005
 330         dw      7366
 331         dw      7723
 332         dw      8076
 333         dw      8423
 334         dw      8765
 335         dw      9102
 336         dw      9434
 337         dw      9760
 338         dw      10080
 339         dw      10394
 340         dw      10702
 341         dw      11003
 342         dw      11297
 343         dw      11585
 344         dw      11866
 345         dw      12140
 346         dw      12406
 347         dw      12665
 348         dw      12916
 349         dw      13160
 350         dw      13395
 351         dw      13623
 352         dw      13842
 353         dw      14053
 354         dw      14256
 355         dw      14449
 356         dw      14635
 357         dw      14811
 358         dw      14978
 359         dw      15137
 360         dw      15286
 361         dw      15426
 362         dw      15557
 363         dw      15679
 364         dw      15791
 365         dw      15893
 366         dw      15986
 367         dw      16069
 368         dw      16143
 369         dw      16207
 370         dw      16261
 371         dw      16305
 372         dw      16340
 373         dw      16364
 374         dw      16379
 375         dw      16384
 376
 377 asin_table      dw      0
 378         dw      41
 379         dw      81
 380         dw      122
 381         dw      163
 382         dw      204
 383         dw      244
 384         dw      285
 385         dw      326
 386         dw      367
 387         dw      408
 388         dw      448
 389         dw      489
 390         dw      530
 391         dw      571
 392         dw      612
 393         dw      652
 394         dw      693
 395         dw      734
 396         dw      775
 397         dw      816
 398         dw      857
 399         dw      897
 400         dw      938
 401         dw      979
 402         dw      1020
 403         dw      1061
 404         dw      1102
 405         dw      1143
 406         dw      1184
 407         dw      1225
 408         dw      1266
 409         dw      1307
 410         dw      1348
 411         dw      1389
 412         dw      1431
 413         dw      1472
 414         dw      1513
 415         dw      1554
 416         dw      1595
 417         dw      1636
 418         dw      1678
 419         dw      1719
 420         dw      1760
 421         dw      1802
 422         dw      1843
 423         dw      1884
 424         dw      1926
 425         dw      1967
 426         dw      2009
 427         dw      2050
 428         dw      2092
 429         dw      2134
 430         dw      2175
 431         dw      2217
 432         dw      2259
 433         dw      2300
 434         dw      2342
 435         dw      2384
 436         dw      2426
 437         dw      2468
 438         dw      2510
 439         dw      2551
 440         dw      2593
 441         dw      2636
 442         dw      2678
 443         dw      2720
 444         dw      2762
 445         dw      2804
 446         dw      2847
 447         dw      2889
 448         dw      2931
 449         dw      2974
 450         dw      3016
 451         dw      3059
 452         dw      3101
 453         dw      3144
 454         dw      3187
 455         dw      3229
 456         dw      3272
 457         dw      3315
 458         dw      3358
 459         dw      3401
 460         dw      3444
 461         dw      3487
 462         dw      3530
 463         dw      3573
 464         dw      3617
 465         dw      3660
 466         dw      3704
 467         dw      3747
 468         dw      3791
 469         dw      3834
 470         dw      3878
 471         dw      3922
 472         dw      3965
 473         dw      4009
 474         dw      4053
 475         dw      4097
 476         dw      4142
 477         dw      4186
 478         dw      4230
 479         dw      4275
 480         dw      4319
 481         dw      4364
 482         dw      4408
 483         dw      4453
 484         dw      4498
 485         dw      4543
 486         dw      4588
 487         dw      4633
 488         dw      4678
 489         dw      4723
 490         dw      4768
 491         dw      4814
 492         dw      4859
 493         dw      4905
 494         dw      4951
 495         dw      4997
 496         dw      5043
 497         dw      5089
 498         dw      5135
 499         dw      5181
 500         dw      5228
 501         dw      5274
 502         dw      5321
 503         dw      5367
 504         dw      5414
 505         dw      5461
 506         dw      5508
 507         dw      5556
 508         dw      5603
 509         dw      5651
 510         dw      5698
 511         dw      5746
 512         dw      5794
 513         dw      5842
 514         dw      5890
 515         dw      5938
 516         dw      5987
 517         dw      6035
 518         dw      6084
 519         dw      6133
 520         dw      6182
 521         dw      6231
 522         dw      6281
 523         dw      6330
 524         dw      6380
 525         dw      6430
 526         dw      6480
 527         dw      6530
 528         dw      6580
 529         dw      6631
 530         dw      6681
 531         dw      6732
 532         dw      6783
 533         dw      6835
 534         dw      6886
 535         dw      6938
 536         dw      6990
 537         dw      7042
 538         dw      7094
 539         dw      7147
 540         dw      7199
 541         dw      7252
 542         dw      7306
 543         dw      7359
 544         dw      7413
 545         dw      7466
 546         dw      7521
 547         dw      7575
 548         dw      7630
 549         dw      7684
 550         dw      7740
 551         dw      7795
 552         dw      7851
 553         dw      7907
 554         dw      7963
 555         dw      8019
 556         dw      8076
 557         dw      8133
 558         dw      8191
 559         dw      8249
 560         dw      8307
 561         dw      8365
 562         dw      8424
 563         dw      8483
 564         dw      8543
 565         dw      8602
 566         dw      8663
 567         dw      8723
 568         dw      8784
 569         dw      8846
 570         dw      8907
 571         dw      8970
 572         dw      9032
 573         dw      9095
 574         dw      9159
 575         dw      9223
 576         dw      9288
 577         dw      9353
 578         dw      9418
 579         dw      9484
 580         dw      9551
 581         dw      9618
 582         dw      9686
 583         dw      9754
 584         dw      9823
 585         dw      9892
 586         dw      9963
 587         dw      10034
 588         dw      10105
 589         dw      10177
 590         dw      10251
 591         dw      10324
 592         dw      10399
 593         dw      10475
 594         dw      10551
 595         dw      10628
 596         dw      10706
 597         dw      10785
 598         dw      10866
 599         dw      10947
 600         dw      11029
 601         dw      11113
 602         dw      11198
 603         dw      11284
 604         dw      11371
 605         dw      11460
 606         dw      11550
 607         dw      11642
 608         dw      11736
 609         dw      11831
 610         dw      11929
 611         dw      12028
 612         dw      12130
 613         dw      12234
 614         dw      12340
 615         dw      12449
 616         dw      12561
 617         dw      12677
 618         dw      12796
 619         dw      12919
 620         dw      13046
 621         dw      13178
 622         dw      13315
 623         dw      13459
 624         dw      13610
 625         dw      13770
 626         dw      13939
 627         dw      14121
 628         dw      14319
 629         dw      14538
 630         dw      14786
 631         dw      15079
 632         dw      15462
 633         dw      16384
 634         dw      16384   ;extra for when exacty 1
 635
 636
 637 acos_table      dw      16384
 638         dw      16343
 639         dw      16303
 640         dw      16262
 641         dw      16221
 642         dw      16180
 643         dw      16140
 644         dw      16099
 645         dw      16058
 646         dw      16017
 647         dw      15976
 648         dw      15936
 649         dw      15895
 650         dw      15854
 651         dw      15813
 652         dw      15772
 653         dw      15732
 654         dw      15691
 655         dw      15650
 656         dw      15609
 657         dw      15568
 658         dw      15527
 659         dw      15487
 660         dw      15446
 661         dw      15405
 662         dw      15364
 663         dw      15323
 664         dw      15282
 665         dw      15241
 666         dw      15200
 667         dw      15159
 668         dw      15118
 669         dw      15077
 670         dw      15036
 671         dw      14995
 672         dw      14953
 673         dw      14912
 674         dw      14871
 675         dw      14830
 676         dw      14789
 677         dw      14748
 678         dw      14706
 679         dw      14665
 680         dw      14624
 681         dw      14582
 682         dw      14541
 683         dw      14500
 684         dw      14458
 685         dw      14417
 686         dw      14375
 687         dw      14334
 688         dw      14292
 689         dw      14250
 690         dw      14209
 691         dw      14167
 692         dw      14125
 693         dw      14084
 694         dw      14042
 695         dw      14000
 696         dw      13958
 697         dw      13916
 698         dw      13874
 699         dw      13833
 700         dw      13791
 701         dw      13748
 702         dw      13706
 703         dw      13664
 704         dw      13622
 705         dw      13580
 706         dw      13537
 707         dw      13495
 708         dw      13453
 709         dw      13410
 710         dw      13368
 711         dw      13325
 712         dw      13283
 713         dw      13240
 714         dw      13197
 715         dw      13155
 716         dw      13112
 717         dw      13069
 718         dw      13026
 719         dw      12983
 720         dw      12940
 721         dw      12897
 722         dw      12854
 723         dw      12811
 724         dw      12767
 725         dw      12724
 726         dw      12680
 727         dw      12637
 728         dw      12593
 729         dw      12550
 730         dw      12506
 731         dw      12462
 732         dw      12419
 733         dw      12375
 734         dw      12331
 735         dw      12287
 736         dw      12242
 737         dw      12198
 738         dw      12154
 739         dw      12109
 740         dw      12065
 741         dw      12020
 742         dw      11976
 743         dw      11931
 744         dw      11886
 745         dw      11841
 746         dw      11796
 747         dw      11751
 748         dw      11706
 749         dw      11661
 750         dw      11616
 751         dw      11570
 752         dw      11525
 753         dw      11479
 754         dw      11433
 755         dw      11387
 756         dw      11341
 757         dw      11295
 758         dw      11249
 759         dw      11203
 760         dw      11156
 761         dw      11110
 762         dw      11063
 763         dw      11017
 764         dw      10970
 765         dw      10923
 766         dw      10876
 767         dw      10828
 768         dw      10781
 769         dw      10733
 770         dw      10686
 771         dw      10638
 772         dw      10590
 773         dw      10542
 774         dw      10494
 775         dw      10446
 776         dw      10397
 777         dw      10349
 778         dw      10300
 779         dw      10251
 780         dw      10202
 781         dw      10153
 782         dw      10103
 783         dw      10054
 784         dw      10004
 785         dw      9954
 786         dw      9904
 787         dw      9854
 788         dw      9804
 789         dw      9753
 790         dw      9703
 791         dw      9652
 792         dw      9601
 793         dw      9549
 794         dw      9498
 795         dw      9446
 796         dw      9394
 797         dw      9342
 798         dw      9290
 799         dw      9237
 800         dw      9185
 801         dw      9132
 802         dw      9078
 803         dw      9025
 804         dw      8971
 805         dw      8918
 806         dw      8863
 807         dw      8809
 808         dw      8754
 809         dw      8700
 810         dw      8644
 811         dw      8589
 812         dw      8533
 813         dw      8477
 814         dw      8421
 815         dw      8365
 816         dw      8308
 817         dw      8251
 818         dw      8193
 819         dw      8135
 820         dw      8077
 821         dw      8019
 822         dw      7960
 823         dw      7901
 824         dw      7841
 825         dw      7782
 826         dw      7721
 827         dw      7661
 828         dw      7600
 829         dw      7538
 830         dw      7477
 831         dw      7414
 832         dw      7352
 833         dw      7289
 834         dw      7225
 835         dw      7161
 836         dw      7096
 837         dw      7031
 838         dw      6966
 839         dw      6900
 840         dw      6833
 841         dw      6766
 842         dw      6698
 843         dw      6630
 844         dw      6561
 845         dw      6492
 846         dw      6421
 847         dw      6350
 848         dw      6279
 849         dw      6207
 850         dw      6133
 851         dw      6060
 852         dw      5985
 853         dw      5909
 854         dw      5833
 855         dw      5756
 856         dw      5678
 857         dw      5599
 858         dw      5518
 859         dw      5437
 860         dw      5355
 861         dw      5271
 862         dw      5186
 863         dw      5100
 864         dw      5013
 865         dw      4924
 866         dw      4834
 867         dw      4742
 868         dw      4648
 869         dw      4553
 870         dw      4455
 871         dw      4356
 872         dw      4254
 873         dw      4150
 874         dw      4044
 875         dw      3935
 876         dw      3823
 877         dw      3707
 878         dw      3588
 879         dw      3465
 880         dw      3338
 881         dw      3206
 882         dw      3069
 883         dw      2925
 884         dw      2774
 885         dw      2614
 886         dw      2445
 887         dw      2263
 888         dw      2065
 889         dw      1846
 890         dw      1598
 891         dw      1305
 892         dw      922
 893         dw      0
 894         dw      0       ;extra for when exacty 1
 895
 896
 897 guess_table:
 898         times 1  db 1
 899         times 3  db 1
 900         times 5  db 2
 901         times 7  db 3
 902         times 9  db 4
 903         times 11 db 5
 904         times 13 db 6
 905         times 15 db 7
 906         times 17 db 8
 907         times 19 db 9
 908         times 21 db 10
 909         times 23 db 11
 910         times 25 db 12
 911         times 27 db 13
 912         times 29 db 14
 913         times 31 db 15
 914
 915 [SECTION .text]
 916
 917 %macro abs_eax 0
 918         cdq
 919         xor     eax,edx
 920         sub     eax,edx
 921 %endmacro
 922
 923 %macro m_fixdiv 1
 924         mov     edx,eax
 925         sar     edx,16
 926         shl     eax,16
 927         idiv    %1
 928 %endmacro
 929
 930 _fixdivquadlong:
 931 mov eax,[esp+4]
 932 mov edx,[esp+8]
 933 idiv dword [esp+12]
 934 ret
 935
 936 _fixmul:
 937 mov eax,[esp+4]
 938 imul dword [esp+8]
 939 shrd eax,edx,16
 940 ret
 941
 942 _fixdiv:
 943 mov eax,[esp+4]
 944 mov edx,eax
 945 sar edx,16
 946 shl eax,16
 947 idiv dword [esp+8]
 948 ret
 949
 950 _fixmulaccum:
 951 mov ecx,[esp+4]
 952 mov eax,[esp+8]
 953 imul dword [esp+12]
 954 add [ecx],eax
 955 adc [ecx+4],edx
 956 ret
 957
 958 _fixmuldiv:
 959 mov eax,[esp+4]
 960 imul dword [esp+8]
 961 idiv dword [esp+12]
 962 ret
 963
 964 _fixquadadjust:
 965 mov ecx,[esp+4]
 966 mov eax,[ecx]
 967 mov edx,[ecx+4]
 968 shrd eax,edx,16
 969 ret
 970
 971 _fixquadnegate:
 972 mov eax,[esp+4]
 973 neg dword [eax]
 974 not dword [eax+4]
 975 sbb dword [eax+4],-1
 976 ret
 977
 978 ;standard Newtonian-iteration square root routine.  takes eax, returns ax
 979 ;trashes eax,ebx,ecx,edx,esi,edi
 980 _long_sqrt:
 981         mov eax,[esp+4]
 982 long_sqrt_asm:
 983         or      eax,eax ;check sign
 984         jle     near error   ;zero or negative
 985
 986         push ebx
 987         push esi
 988         push edi
 989
 990         mov     edx,eax
 991         and     eax,0ffffh
 992         shr     edx,16  ;split eax -> dx:ax
 993
 994 ;get a good first quess by checking which byte most significant bit is in
 995         xor     ebx,ebx ;clear high bytes for index
 996
 997         or      dh,dh   ;highest byte
 998         jz      not_dh
 999         mov     bl,dh   ;get value for lookup
1000         mov     cl,12
1001         jmp     got_guess
1002 not_dh: or      dl,dl
1003         jz      not_dl
1004         mov     bl,dl   ;get value for lookup
1005         mov     cl,8
1006         jmp     got_guess
1007 not_dl: or      ah,ah
1008         jz      not_ah
1009         mov     bl,ah   ;get value for lookup
1010         mov     cl,4
1011         jmp     got_guess
1012 not_ah: mov     bl,al   ;get value for lookup
1013         mov     cl,0
1014 got_guess:
1015         movzx   ebx,byte [guess_table+ebx] ;get byte guess
1016         sal     ebx,cl  ;get in right place
1017
1018         mov     ecx,eax
1019         mov     esi,edx ;save dx:ax
1020
1021 ;the loop nearly always executes 3 times, so we'll unroll it 2 times and
1022 ;not do any checking until after the third time.  By my calcutations, the
1023 ;loop is executed 2 times in 99.97% of cases, 3 times in 93.65% of cases,
1024 ;four times in 16.18% of cases, and five times in 0.44% of cases.  It never
1025 ;executes more than five times.  By timing, I determined that is is faster
1026 ;to always execute three times and not check for termination the first two
1027 ;times through.  This means that in 93.65% of cases, we save 6 cmp/jcc pairs,
1028 ;and in 6.35% of cases we do an extra divide.  In real life, these numbers
1029 ;might not be the same.
1030
1031 ;newt_loop:
1032 %rep 2
1033         mov     eax,ecx
1034         mov     edx,esi ;restore dx:ax
1035         div     bx      ;dx:ax / bx
1036 ;        mov     edi,ebx ;save for compare
1037         add     ebx,eax
1038         rcr     ebx,1    ;next guess = (d + q)/2
1039 %endrep
1040
1041 newt_loop:      mov     eax,ecx
1042         mov     edx,esi   ;restore dx:ax
1043         div     bx      ;dx:ax / bx
1044         cmp     eax,ebx ;correct?
1045         je      got_it  ;..yep
1046         mov     edi,ebx   ;save for compare
1047         add     ebx,eax
1048         rcr     ebx,1    ;next guess = (d + q)/2
1049         cmp     ebx,eax
1050         je      almost_got_it
1051         cmp     ebx,edi
1052         jne     newt_loop
1053
1054 almost_got_it:  mov     eax,ebx
1055         or      dx,dx   ;check remainder
1056         jz      got_it
1057         inc     eax
1058 got_it: and eax,0ffffh
1059         pop edi
1060         pop esi
1061         pop ebx
1062         ret
1063
1064 ;sqrt called with zero or negative input. return zero
1065 error:  xor     eax,eax
1066         ret
1067
1068 ;standard Newtonian-iteration square root routine.  takes edx:eax, returns eax
1069 _quad_sqrt:
1070         mov     eax,[esp+4]
1071         mov     edx,[esp+8]
1072 quad_sqrt_asm:
1073         or      edx,edx ;check sign
1074         js      error   ;can't do negative number!
1075         jnz     must_do_quad    ;we really must do 64/32 div
1076         or      eax,eax ;check high bit of low longword
1077         jns     near long_sqrt_asm   ;we can use longword version
1078 must_do_quad:
1079
1080         push ebx
1081         push esi
1082         push edi
1083
1084 ;get a good first quess by checking which byte most significant bit is in
1085         xor     ebx,ebx ;clear high bytes for index
1086
1087         ror     edx,16  ;get high 2 bytes
1088
1089         or      dh,dh
1090         jz      q_not_dh
1091         mov     bl,dh   ;get value for lookup
1092         mov     cl,12+16
1093         ror     edx,16  ;restore edx
1094         jmp     q_got_guess
1095 q_not_dh:       or      dl,dl
1096         jz      q_not_dl
1097         mov     bl,dl   ;get value for lookup
1098         mov     cl,8+16
1099         ror     edx,16  ;restore edx
1100         jmp     q_got_guess
1101 q_not_dl:       ror     edx,16  ;restore edx
1102         or      dh,dh
1103         jz      q_not_ah
1104         mov     bl,dh   ;get value for lookup
1105         mov     cl,4+16
1106         jmp     q_got_guess
1107 q_not_ah:       mov     bl,dl   ;get value for lookup
1108         mov     cl,0+16
1109 q_got_guess:
1110         movzx   ebx,byte [guess_table+ebx] ;get byte guess
1111         sal     ebx,cl  ;get in right place
1112
1113 q_really_got_guess:
1114         mov     ecx,eax
1115         mov     esi,edx ;save edx:eax
1116
1117 ;quad loop usually executes 4 times
1118
1119 ;q_newt_loop:
1120 %rep 3
1121         mov     eax,ecx
1122         mov     edx,esi ;restore dx:ax
1123         div     ebx     ;dx:ax / bx
1124         mov     edi,ebx ;save for compare
1125         add     ebx,eax
1126         rcr     ebx,1   ;next guess = (d + q)/2
1127 %endrep
1128
1129 q_newt_loop:    mov     eax,ecx
1130         mov     edx,esi ;restore dx:ax
1131         div     ebx     ;dx:ax / bx
1132         cmp     eax,ebx ;correct?
1133         je      q_got_it        ;..yep
1134         mov     edi,ebx ;save for compare
1135         add     ebx,eax
1136         rcr     ebx,1   ;next guess = (d + q)/2
1137         cmp     ebx,eax
1138         je      q_almost_got_it
1139         cmp     ebx,edi
1140         jne     q_newt_loop
1141
1142 q_almost_got_it:        mov     eax,ebx
1143         or      edx,edx ;check remainder
1144         jz      q_got_it
1145         inc     eax
1146 q_got_it:
1147         pop edi
1148         pop esi
1149         pop ebx
1150         ret
1151
1152
1153 ;fixed-point square root
1154 _fix_sqrt:
1155         mov     eax,[esp+4]
1156         call    long_sqrt_asm
1157 ;        movzx   eax,ax ; now in long_sqrt
1158         sal     eax,8
1159         ret
1160
1161 ;the sincos functions have two varients: the C version is passed pointer
1162 ;to variables for sin & cos, and the assembly version returns the values
1163 ;in two registers
1164
1165 ;takes ax=angle, returns eax=sin, ebx=cos.
1166 fix_fastsincos_asm:
1167         movzx   eax,ah  ;get high byte
1168         movsx   ebx,word [cos_table+eax*2]
1169         sal     ebx,2   ;make a fix
1170         movsx   eax,word [sin_table+eax*2]
1171         sal     eax,2   ;make a fix
1172         ret
1173
1174 ;takes ax=angle, returns eax=sin, ebx=cos.
1175 fix_sincos_asm:
1176         push    ecx
1177         push    edx
1178         xor     edx, edx
1179         xor     ecx, ecx
1180         mov     dl, ah  ;get high byte
1181         mov     cl, al  ;save low byte
1182         shl     edx, 1
1183
1184         movsx   eax,word [sin_table+edx]
1185         movsx   ebx,word [sin_table+edx+2]
1186         sub     ebx,eax
1187         imul    ebx,ecx ;mul by fraction
1188         sar     ebx,8
1189         add     eax,ebx ;add in frac part
1190         sal     eax,2   ;make a fix
1191
1192         movsx   ebx,word [cos_table+edx]
1193         movsx   edx,word [cos_table+edx+2]
1194         sub     edx,ebx
1195         imul    edx,ecx ;mul by fraction
1196         sar     edx,8
1197         add     ebx,edx ;add in frac part
1198         sal     ebx,2   ;make a fix
1199         pop     edx
1200         pop     ecx
1201         ret
1202
1203         align   16
1204
1205 _fix_acos:
1206         mov     eax,[esp+4]
1207 ;takes eax=cos angle, returns ax=angle
1208 fix_acos_asm:
1209         push    ebx
1210         push    ecx
1211         push    edx
1212
1213         abs_eax         ;get abs eax
1214         push    edx     ;save sign
1215
1216         cmp     eax,10000h
1217         jle     no_acos_oflow
1218         mov     eax,10000h
1219 no_acos_oflow:
1220         movzx   ecx,al  ;save low byte (fraction)
1221
1222         mov     edx,eax
1223
1224         sar     edx,8   ;get high byte (+1 bit)
1225         movsx   eax,word [acos_table+edx*2]
1226         movsx   ebx,word [acos_table+edx*2+2]
1227         sub     ebx,eax
1228         imul    ebx,ecx ;mul by fraction
1229         sar     ebx,8
1230         add     eax,ebx ;add in frac part
1231
1232         pop     edx     ;get sign back
1233         xor     eax,edx
1234         sub     eax,edx ;make correct sign
1235         and     edx,8000h       ;zero or 1/2
1236         add     eax,edx
1237
1238         pop     edx
1239         pop     ecx
1240         pop     ebx
1241
1242         ret
1243
1244 ;takes eax=sin angle, returns ax=angle
1245 _fix_asin:
1246         mov     eax,[esp+4]
1247 fix_asin_asm:
1248         push    ebx
1249         push    ecx
1250         push    edx
1251
1252         abs_eax         ;get abs value
1253         push    edx     ;save sign
1254
1255         cmp     eax,10000h
1256         jle     no_asin_oflow
1257         mov     eax,10000h
1258 no_asin_oflow:
1259         movzx   ecx,al  ;save low byte (fraction)
1260
1261         mov     edx,eax
1262
1263         sar     edx,8   ;get high byte (+1 bit)
1264         movsx   eax,word [asin_table+edx*2]
1265         movsx   ebx,word [asin_table+edx*2+2]
1266         sub     ebx,eax
1267         imul    ebx,ecx ;mul by fraction
1268         sar     ebx,8
1269         add     eax,ebx ;add in frac part
1270
1271         pop     edx     ;get sign back
1272         xor     eax,edx ;make sign correct
1273         sub     eax,edx
1274
1275         pop     edx
1276         pop     ecx
1277         pop     ebx
1278
1279         ret
1280
1281 ;given cos & sin of an angle, return that angle. takes eax=cos,ebx=sin.
1282 ;returns ax. parms need not be normalized, that is, the ratio eax/ebx must
1283 ;equal the ratio cos/sin, but the parms need not be the actual cos & sin.
1284 ;NOTE: this is different from the standard C atan2, since it is left-handed.
1285 ;uses either asin or acos, to get better precision
1286
1287 _fix_atan2:
1288         push    ebx
1289         push    ecx
1290         push    edx
1291         mov     eax,[esp+16]
1292         mov     ebx,[esp+20]
1293
1294         %ifdef NOT_DEF
1295         %ifndef  NDEBUG
1296         mov     edx,eax
1297         or      edx,ebx
1298         break_if        z,'Both parms to atan2 are zero!'
1299         %endif
1300         %endif
1301
1302         push    ebx
1303         push    eax
1304
1305 ;find smaller of two
1306         push    eax ;save
1307         push    ebx
1308         abs_eax         ;get abs value
1309         xchg    eax,ebx
1310         abs_eax         ;get abs value
1311         xor     eax,edx
1312         sub     eax,edx
1313         cmp     ebx,eax ;compare x to y
1314         pop     ebx
1315         pop     eax
1316         jl      use_cos
1317
1318 ;sin is smaller, use arcsin
1319
1320         imul    eax
1321         xchg    eax,ebx
1322         mov     ecx,edx
1323         imul    eax
1324         add     eax,ebx
1325         adc     edx,ecx
1326         call    quad_sqrt_asm
1327         mov     ecx,eax ;ecx = mag
1328
1329         pop     ebx     ;get cos, save in ebx
1330         pop     eax     ;get sin
1331         jecxz   sign_ok ;abort!
1332         m_fixdiv        ecx     ;normalize it
1333         call    fix_asin_asm    ;get angle
1334         or      ebx,ebx ;check sign of cos
1335         jns     sign_ok
1336         sub     eax,8000h       ;adjust
1337         neg     eax
1338 sign_ok:
1339         pop     edx
1340         pop     ecx
1341         pop     ebx
1342         ret
1343
1344
1345 ;cos is smaller, use arccos
1346
1347 use_cos:        imul    eax
1348         xchg    eax,ebx
1349         mov     ecx,edx
1350         imul    eax
1351         add     eax,ebx
1352         adc     edx,ecx
1353         call    quad_sqrt_asm
1354         mov     ecx,eax
1355
1356         pop     eax     ;get cos
1357         m_fixdiv        ecx     ;normalize it
1358         call    fix_acos_asm ; get angle
1359         mov     ebx,eax ;save in ebx
1360         pop     eax     ;get sin
1361         cdq             ;get sign of sin
1362         mov     eax,ebx ;get cos back
1363         xor     eax,edx
1364         sub     eax,edx ;make sign correct
1365
1366         pop     edx
1367         pop     ecx
1368         pop     ebx
1369         ret
1370
1371
1372 ; C version - takes angle,*sin,*cos. fills in sin&cos.
1373 ;either (or both) pointers can be null
1374 ;trashes eax,ecx,edx
1375 _fix_fastsincos:
1376         push    ebx
1377         mov     eax,[esp+8]
1378         call    fix_fastsincos_asm
1379         mov     ecx,[esp+12]
1380         mov     edx,[esp+16]
1381         or      ecx,ecx
1382         jz      no_sin
1383         mov     [ecx],eax
1384 no_sin: or      edx,edx
1385         jz      no_cos
1386         mov     [edx],ebx
1387 no_cos: pop     ebx
1388         ret
1389
1390 ;C version - takes angle,*sin,*cos. fills in sin&cos.
1391 ;trashes eax,ecx,edx
1392 ;either (or both) pointers can be null
1393 _fix_sincos:
1394         push    ebx
1395         mov     eax,[esp+8]
1396         call    fix_sincos_asm
1397         mov     ecx,[esp+12]
1398         mov     edx,[esp+16]
1399         or      ecx,ecx
1400         jz      no_sin
1401         mov     [ecx],eax
1402         or      edx,edx
1403         jz      no_cos
1404         mov     [edx],ebx
1405         pop     ebx
1406         ret