maths/fix.asm

   1 ; $Id: fix.asm,v 1.4 2003-12-08 21:21:16 btb Exp $
   2 ;THE COMPUTER CODE CONTAINED HEREIN IS THE SOLE PROPERTY OF PARALLAX
   3 ;SOFTWARE CORPORATION ("PARALLAX").  PARALLAX, IN DISTRIBUTING THE CODE TO
   4 ;END-USERS, AND SUBJECT TO ALL OF THE TERMS AND CONDITIONS HEREIN, GRANTS A
   5 ;ROYALTY-FREE, PERPETUAL LICENSE TO SUCH END-USERS FOR USE BY SUCH END-USERS
   6 ;IN USING, DISPLAYING,  AND CREATING DERIVATIVE WORKS THEREOF, SO LONG AS
   7 ;SUCH USE, DISPLAY OR CREATION IS FOR NON-COMMERCIAL, ROYALTY OR REVENUE
   8 ;FREE PURPOSES.  IN NO EVENT SHALL THE END-USER USE THE COMPUTER CODE
   9 ;CONTAINED HEREIN FOR REVENUE-BEARING PURPOSES.  THE END-USER UNDERSTANDS
  10 ;AND AGREES TO THE TERMS HEREIN AND ACCEPTS THE SAME BY USE OF THIS FILE.
  11 ;COPYRIGHT 1993-1998 PARALLAX SOFTWARE CORPORATION.  ALL RIGHTS RESERVED.
  12
  13 [BITS 32]
  14
  15 %ifdef __ELF__
  16 %define _fixdivquadlong fixdivquadlong
  17 %define _fixmul fixmul
  18 %define _fixdiv fixdiv
  19 %define _fixmulaccum fixmulaccum
  20 %define _fixmuldiv fixmuldiv
  21 %define _fixquadadjust fixquadadjust
  22 %define _fixquadnegate fixquadnegate
  23 %define _quad_sqrt quad_sqrt
  24 %define _long_sqrt long_sqrt
  25 %define _fix_sqrt fix_sqrt
  26 %define _fix_asin fix_asin
  27 %define _fix_acos fix_acos
  28 %define _fix_atan2 fix_atan2
  29 %define _fix_fastsincos fix_fastsincos
  30 %define _fix_sincos fix_sincos
  31 %endif
  32
  33 global _fixdivquadlong
  34 global _fixmul
  35 global _fixdiv
  36 global _fixmulaccum
  37 global _fixmuldiv
  38 global _fixquadadjust
  39 global _fixquadnegate
  40 global _long_sqrt
  41 global _quad_sqrt
  42 global _fix_sqrt
  43 global _fix_asin
  44 global _fix_acos
  45 global _fix_atan2
  46 global _fix_fastsincos
  47 global _fix_sincos
  48 global quad_sqrt_asm    ; for assembler vecmat
  49 global fix_sincos_asm   ; for assembler vecmat
  50 global fix_acos_asm     ; for assembler vecmat
  51 global long_sqrt_asm         ; for assembler vecmat
  52 ;global fix_asin_asm
  53 global fix_fastsincos_asm
  54
  55 [SECTION .data]
  56 sin_table  dw      0
  57         dw      402
  58         dw      804
  59         dw      1205
  60         dw      1606
  61         dw      2006
  62         dw      2404
  63         dw      2801
  64         dw      3196
  65         dw      3590
  66         dw      3981
  67         dw      4370
  68         dw      4756
  69         dw      5139
  70         dw      5520
  71         dw      5897
  72         dw      6270
  73         dw      6639
  74         dw      7005
  75         dw      7366
  76         dw      7723
  77         dw      8076
  78         dw      8423
  79         dw      8765
  80         dw      9102
  81         dw      9434
  82         dw      9760
  83         dw      10080
  84         dw      10394
  85         dw      10702
  86         dw      11003
  87         dw      11297
  88         dw      11585
  89         dw      11866
  90         dw      12140
  91         dw      12406
  92         dw      12665
  93         dw      12916
  94         dw      13160
  95         dw      13395
  96         dw      13623
  97         dw      13842
  98         dw      14053
  99         dw      14256
 100         dw      14449
 101         dw      14635
 102         dw      14811
 103         dw      14978
 104         dw      15137
 105         dw      15286
 106         dw      15426
 107         dw      15557
 108         dw      15679
 109         dw      15791
 110         dw      15893
 111         dw      15986
 112         dw      16069
 113         dw      16143
 114         dw      16207
 115         dw      16261
 116         dw      16305
 117         dw      16340
 118         dw      16364
 119         dw      16379
 120 cos_table       dw      16384
 121         dw      16379
 122         dw      16364
 123         dw      16340
 124         dw      16305
 125         dw      16261
 126         dw      16207
 127         dw      16143
 128         dw      16069
 129         dw      15986
 130         dw      15893
 131         dw      15791
 132         dw      15679
 133         dw      15557
 134         dw      15426
 135         dw      15286
 136         dw      15137
 137         dw      14978
 138         dw      14811
 139         dw      14635
 140         dw      14449
 141         dw      14256
 142         dw      14053
 143         dw      13842
 144         dw      13623
 145         dw      13395
 146         dw      13160
 147         dw      12916
 148         dw      12665
 149         dw      12406
 150         dw      12140
 151         dw      11866
 152         dw      11585
 153         dw      11297
 154         dw      11003
 155         dw      10702
 156         dw      10394
 157         dw      10080
 158         dw      9760
 159         dw      9434
 160         dw      9102
 161         dw      8765
 162         dw      8423
 163         dw      8076
 164         dw      7723
 165         dw      7366
 166         dw      7005
 167         dw      6639
 168         dw      6270
 169         dw      5897
 170         dw      5520
 171         dw      5139
 172         dw      4756
 173         dw      4370
 174         dw      3981
 175         dw      3590
 176         dw      3196
 177         dw      2801
 178         dw      2404
 179         dw      2006
 180         dw      1606
 181         dw      1205
 182         dw      804
 183         dw      402
 184         dw      0
 185         dw      -402
 186         dw      -804
 187         dw      -1205
 188         dw      -1606
 189         dw      -2006
 190         dw      -2404
 191         dw      -2801
 192         dw      -3196
 193         dw      -3590
 194         dw      -3981
 195         dw      -4370
 196         dw      -4756
 197         dw      -5139
 198         dw      -5520
 199         dw      -5897
 200         dw      -6270
 201         dw      -6639
 202         dw      -7005
 203         dw      -7366
 204         dw      -7723
 205         dw      -8076
 206         dw      -8423
 207         dw      -8765
 208         dw      -9102
 209         dw      -9434
 210         dw      -9760
 211         dw      -10080
 212         dw      -10394
 213         dw      -10702
 214         dw      -11003
 215         dw      -11297
 216         dw      -11585
 217         dw      -11866
 218         dw      -12140
 219         dw      -12406
 220         dw      -12665
 221         dw      -12916
 222         dw      -13160
 223         dw      -13395
 224         dw      -13623
 225         dw      -13842
 226         dw      -14053
 227         dw      -14256
 228         dw      -14449
 229         dw      -14635
 230         dw      -14811
 231         dw      -14978
 232         dw      -15137
 233         dw      -15286
 234         dw      -15426
 235         dw      -15557
 236         dw      -15679
 237         dw      -15791
 238         dw      -15893
 239         dw      -15986
 240         dw      -16069
 241         dw      -16143
 242         dw      -16207
 243         dw      -16261
 244         dw      -16305
 245         dw      -16340
 246         dw      -16364
 247         dw      -16379
 248         dw      -16384
 249         dw      -16379
 250         dw      -16364
 251         dw      -16340
 252         dw      -16305
 253         dw      -16261
 254         dw      -16207
 255         dw      -16143
 256         dw      -16069
 257         dw      -15986
 258         dw      -15893
 259         dw      -15791
 260         dw      -15679
 261         dw      -15557
 262         dw      -15426
 263         dw      -15286
 264         dw      -15137
 265         dw      -14978
 266         dw      -14811
 267         dw      -14635
 268         dw      -14449
 269         dw      -14256
 270         dw      -14053
 271         dw      -13842
 272         dw      -13623
 273         dw      -13395
 274         dw      -13160
 275         dw      -12916
 276         dw      -12665
 277         dw      -12406
 278         dw      -12140
 279         dw      -11866
 280         dw      -11585
 281         dw      -11297
 282         dw      -11003
 283         dw      -10702
 284         dw      -10394
 285         dw      -10080
 286         dw      -9760
 287         dw      -9434
 288         dw      -9102
 289         dw      -8765
 290         dw      -8423
 291         dw      -8076
 292         dw      -7723
 293         dw      -7366
 294         dw      -7005
 295         dw      -6639
 296         dw      -6270
 297         dw      -5897
 298         dw      -5520
 299         dw      -5139
 300         dw      -4756
 301         dw      -4370
 302         dw      -3981
 303         dw      -3590
 304         dw      -3196
 305         dw      -2801
 306         dw      -2404
 307         dw      -2006
 308         dw      -1606
 309         dw      -1205
 310         dw      -804
 311         dw      -402
 312         dw      0
 313         dw      402
 314         dw      804
 315         dw      1205
 316         dw      1606
 317         dw      2006
 318         dw      2404
 319         dw      2801
 320         dw      3196
 321         dw      3590
 322         dw      3981
 323         dw      4370
 324         dw      4756
 325         dw      5139
 326         dw      5520
 327         dw      5897
 328         dw      6270
 329         dw      6639
 330         dw      7005
 331         dw      7366
 332         dw      7723
 333         dw      8076
 334         dw      8423
 335         dw      8765
 336         dw      9102
 337         dw      9434
 338         dw      9760
 339         dw      10080
 340         dw      10394
 341         dw      10702
 342         dw      11003
 343         dw      11297
 344         dw      11585
 345         dw      11866
 346         dw      12140
 347         dw      12406
 348         dw      12665
 349         dw      12916
 350         dw      13160
 351         dw      13395
 352         dw      13623
 353         dw      13842
 354         dw      14053
 355         dw      14256
 356         dw      14449
 357         dw      14635
 358         dw      14811
 359         dw      14978
 360         dw      15137
 361         dw      15286
 362         dw      15426
 363         dw      15557
 364         dw      15679
 365         dw      15791
 366         dw      15893
 367         dw      15986
 368         dw      16069
 369         dw      16143
 370         dw      16207
 371         dw      16261
 372         dw      16305
 373         dw      16340
 374         dw      16364
 375         dw      16379
 376         dw      16384
 377
 378 asin_table      dw      0
 379         dw      41
 380         dw      81
 381         dw      122
 382         dw      163
 383         dw      204
 384         dw      244
 385         dw      285
 386         dw      326
 387         dw      367
 388         dw      408
 389         dw      448
 390         dw      489
 391         dw      530
 392         dw      571
 393         dw      612
 394         dw      652
 395         dw      693
 396         dw      734
 397         dw      775
 398         dw      816
 399         dw      857
 400         dw      897
 401         dw      938
 402         dw      979
 403         dw      1020
 404         dw      1061
 405         dw      1102
 406         dw      1143
 407         dw      1184
 408         dw      1225
 409         dw      1266
 410         dw      1307
 411         dw      1348
 412         dw      1389
 413         dw      1431
 414         dw      1472
 415         dw      1513
 416         dw      1554
 417         dw      1595
 418         dw      1636
 419         dw      1678
 420         dw      1719
 421         dw      1760
 422         dw      1802
 423         dw      1843
 424         dw      1884
 425         dw      1926
 426         dw      1967
 427         dw      2009
 428         dw      2050
 429         dw      2092
 430         dw      2134
 431         dw      2175
 432         dw      2217
 433         dw      2259
 434         dw      2300
 435         dw      2342
 436         dw      2384
 437         dw      2426
 438         dw      2468
 439         dw      2510
 440         dw      2551
 441         dw      2593
 442         dw      2636
 443         dw      2678
 444         dw      2720
 445         dw      2762
 446         dw      2804
 447         dw      2847
 448         dw      2889
 449         dw      2931
 450         dw      2974
 451         dw      3016
 452         dw      3059
 453         dw      3101
 454         dw      3144
 455         dw      3187
 456         dw      3229
 457         dw      3272
 458         dw      3315
 459         dw      3358
 460         dw      3401
 461         dw      3444
 462         dw      3487
 463         dw      3530
 464         dw      3573
 465         dw      3617
 466         dw      3660
 467         dw      3704
 468         dw      3747
 469         dw      3791
 470         dw      3834
 471         dw      3878
 472         dw      3922
 473         dw      3965
 474         dw      4009
 475         dw      4053
 476         dw      4097
 477         dw      4142
 478         dw      4186
 479         dw      4230
 480         dw      4275
 481         dw      4319
 482         dw      4364
 483         dw      4408
 484         dw      4453
 485         dw      4498
 486         dw      4543
 487         dw      4588
 488         dw      4633
 489         dw      4678
 490         dw      4723
 491         dw      4768
 492         dw      4814
 493         dw      4859
 494         dw      4905
 495         dw      4951
 496         dw      4997
 497         dw      5043
 498         dw      5089
 499         dw      5135
 500         dw      5181
 501         dw      5228
 502         dw      5274
 503         dw      5321
 504         dw      5367
 505         dw      5414
 506         dw      5461
 507         dw      5508
 508         dw      5556
 509         dw      5603
 510         dw      5651
 511         dw      5698
 512         dw      5746
 513         dw      5794
 514         dw      5842
 515         dw      5890
 516         dw      5938
 517         dw      5987
 518         dw      6035
 519         dw      6084
 520         dw      6133
 521         dw      6182
 522         dw      6231
 523         dw      6281
 524         dw      6330
 525         dw      6380
 526         dw      6430
 527         dw      6480
 528         dw      6530
 529         dw      6580
 530         dw      6631
 531         dw      6681
 532         dw      6732
 533         dw      6783
 534         dw      6835
 535         dw      6886
 536         dw      6938
 537         dw      6990
 538         dw      7042
 539         dw      7094
 540         dw      7147
 541         dw      7199
 542         dw      7252
 543         dw      7306
 544         dw      7359
 545         dw      7413
 546         dw      7466
 547         dw      7521
 548         dw      7575
 549         dw      7630
 550         dw      7684
 551         dw      7740
 552         dw      7795
 553         dw      7851
 554         dw      7907
 555         dw      7963
 556         dw      8019
 557         dw      8076
 558         dw      8133
 559         dw      8191
 560         dw      8249
 561         dw      8307
 562         dw      8365
 563         dw      8424
 564         dw      8483
 565         dw      8543
 566         dw      8602
 567         dw      8663
 568         dw      8723
 569         dw      8784
 570         dw      8846
 571         dw      8907
 572         dw      8970
 573         dw      9032
 574         dw      9095
 575         dw      9159
 576         dw      9223
 577         dw      9288
 578         dw      9353
 579         dw      9418
 580         dw      9484
 581         dw      9551
 582         dw      9618
 583         dw      9686
 584         dw      9754
 585         dw      9823
 586         dw      9892
 587         dw      9963
 588         dw      10034
 589         dw      10105
 590         dw      10177
 591         dw      10251
 592         dw      10324
 593         dw      10399
 594         dw      10475
 595         dw      10551
 596         dw      10628
 597         dw      10706
 598         dw      10785
 599         dw      10866
 600         dw      10947
 601         dw      11029
 602         dw      11113
 603         dw      11198
 604         dw      11284
 605         dw      11371
 606         dw      11460
 607         dw      11550
 608         dw      11642
 609         dw      11736
 610         dw      11831
 611         dw      11929
 612         dw      12028
 613         dw      12130
 614         dw      12234
 615         dw      12340
 616         dw      12449
 617         dw      12561
 618         dw      12677
 619         dw      12796
 620         dw      12919
 621         dw      13046
 622         dw      13178
 623         dw      13315
 624         dw      13459
 625         dw      13610
 626         dw      13770
 627         dw      13939
 628         dw      14121
 629         dw      14319
 630         dw      14538
 631         dw      14786
 632         dw      15079
 633         dw      15462
 634         dw      16384
 635         dw      16384   ;extra for when exacty 1
 636
 637
 638 acos_table      dw      16384
 639         dw      16343
 640         dw      16303
 641         dw      16262
 642         dw      16221
 643         dw      16180
 644         dw      16140
 645         dw      16099
 646         dw      16058
 647         dw      16017
 648         dw      15976
 649         dw      15936
 650         dw      15895
 651         dw      15854
 652         dw      15813
 653         dw      15772
 654         dw      15732
 655         dw      15691
 656         dw      15650
 657         dw      15609
 658         dw      15568
 659         dw      15527
 660         dw      15487
 661         dw      15446
 662         dw      15405
 663         dw      15364
 664         dw      15323
 665         dw      15282
 666         dw      15241
 667         dw      15200
 668         dw      15159
 669         dw      15118
 670         dw      15077
 671         dw      15036
 672         dw      14995
 673         dw      14953
 674         dw      14912
 675         dw      14871
 676         dw      14830
 677         dw      14789
 678         dw      14748
 679         dw      14706
 680         dw      14665
 681         dw      14624
 682         dw      14582
 683         dw      14541
 684         dw      14500
 685         dw      14458
 686         dw      14417
 687         dw      14375
 688         dw      14334
 689         dw      14292
 690         dw      14250
 691         dw      14209
 692         dw      14167
 693         dw      14125
 694         dw      14084
 695         dw      14042
 696         dw      14000
 697         dw      13958
 698         dw      13916
 699         dw      13874
 700         dw      13833
 701         dw      13791
 702         dw      13748
 703         dw      13706
 704         dw      13664
 705         dw      13622
 706         dw      13580
 707         dw      13537
 708         dw      13495
 709         dw      13453
 710         dw      13410
 711         dw      13368
 712         dw      13325
 713         dw      13283
 714         dw      13240
 715         dw      13197
 716         dw      13155
 717         dw      13112
 718         dw      13069
 719         dw      13026
 720         dw      12983
 721         dw      12940
 722         dw      12897
 723         dw      12854
 724         dw      12811
 725         dw      12767
 726         dw      12724
 727         dw      12680
 728         dw      12637
 729         dw      12593
 730         dw      12550
 731         dw      12506
 732         dw      12462
 733         dw      12419
 734         dw      12375
 735         dw      12331
 736         dw      12287
 737         dw      12242
 738         dw      12198
 739         dw      12154
 740         dw      12109
 741         dw      12065
 742         dw      12020
 743         dw      11976
 744         dw      11931
 745         dw      11886
 746         dw      11841
 747         dw      11796
 748         dw      11751
 749         dw      11706
 750         dw      11661
 751         dw      11616
 752         dw      11570
 753         dw      11525
 754         dw      11479
 755         dw      11433
 756         dw      11387
 757         dw      11341
 758         dw      11295
 759         dw      11249
 760         dw      11203
 761         dw      11156
 762         dw      11110
 763         dw      11063
 764         dw      11017
 765         dw      10970
 766         dw      10923
 767         dw      10876
 768         dw      10828
 769         dw      10781
 770         dw      10733
 771         dw      10686
 772         dw      10638
 773         dw      10590
 774         dw      10542
 775         dw      10494
 776         dw      10446
 777         dw      10397
 778         dw      10349
 779         dw      10300
 780         dw      10251
 781         dw      10202
 782         dw      10153
 783         dw      10103
 784         dw      10054
 785         dw      10004
 786         dw      9954
 787         dw      9904
 788         dw      9854
 789         dw      9804
 790         dw      9753
 791         dw      9703
 792         dw      9652
 793         dw      9601
 794         dw      9549
 795         dw      9498
 796         dw      9446
 797         dw      9394
 798         dw      9342
 799         dw      9290
 800         dw      9237
 801         dw      9185
 802         dw      9132
 803         dw      9078
 804         dw      9025
 805         dw      8971
 806         dw      8918
 807         dw      8863
 808         dw      8809
 809         dw      8754
 810         dw      8700
 811         dw      8644
 812         dw      8589
 813         dw      8533
 814         dw      8477
 815         dw      8421
 816         dw      8365
 817         dw      8308
 818         dw      8251
 819         dw      8193
 820         dw      8135
 821         dw      8077
 822         dw      8019
 823         dw      7960
 824         dw      7901
 825         dw      7841
 826         dw      7782
 827         dw      7721
 828         dw      7661
 829         dw      7600
 830         dw      7538
 831         dw      7477
 832         dw      7414
 833         dw      7352
 834         dw      7289
 835         dw      7225
 836         dw      7161
 837         dw      7096
 838         dw      7031
 839         dw      6966
 840         dw      6900
 841         dw      6833
 842         dw      6766
 843         dw      6698
 844         dw      6630
 845         dw      6561
 846         dw      6492
 847         dw      6421
 848         dw      6350
 849         dw      6279
 850         dw      6207
 851         dw      6133
 852         dw      6060
 853         dw      5985
 854         dw      5909
 855         dw      5833
 856         dw      5756
 857         dw      5678
 858         dw      5599
 859         dw      5518
 860         dw      5437
 861         dw      5355
 862         dw      5271
 863         dw      5186
 864         dw      5100
 865         dw      5013
 866         dw      4924
 867         dw      4834
 868         dw      4742
 869         dw      4648
 870         dw      4553
 871         dw      4455
 872         dw      4356
 873         dw      4254
 874         dw      4150
 875         dw      4044
 876         dw      3935
 877         dw      3823
 878         dw      3707
 879         dw      3588
 880         dw      3465
 881         dw      3338
 882         dw      3206
 883         dw      3069
 884         dw      2925
 885         dw      2774
 886         dw      2614
 887         dw      2445
 888         dw      2263
 889         dw      2065
 890         dw      1846
 891         dw      1598
 892         dw      1305
 893         dw      922
 894         dw      0
 895         dw      0       ;extra for when exacty 1
 896
 897
 898 guess_table:
 899         times 1  db 1
 900         times 3  db 1
 901         times 5  db 2
 902         times 7  db 3
 903         times 9  db 4
 904         times 11 db 5
 905         times 13 db 6
 906         times 15 db 7
 907         times 17 db 8
 908         times 19 db 9
 909         times 21 db 10
 910         times 23 db 11
 911         times 25 db 12
 912         times 27 db 13
 913         times 29 db 14
 914         times 31 db 15
 915
 916 [SECTION .text]
 917
 918 %macro abs_eax 0
 919         cdq
 920         xor     eax,edx
 921         sub     eax,edx
 922 %endmacro
 923
 924 %macro m_fixdiv 1
 925         mov     edx,eax
 926         sar     edx,16
 927         shl     eax,16
 928         idiv    %1
 929 %endmacro
 930
 931 _fixdivquadlong:
 932 mov eax,[esp+4]
 933 mov edx,[esp+8]
 934 idiv dword [esp+12]
 935 ret
 936
 937 _fixmul:
 938 mov eax,[esp+4]
 939 imul dword [esp+8]
 940 shrd eax,edx,16
 941 ret
 942
 943 _fixdiv:
 944 mov eax,[esp+4]
 945 mov edx,eax
 946 sar edx,16
 947 shl eax,16
 948 idiv dword [esp+8]
 949 ret
 950
 951 _fixmulaccum:
 952 mov ecx,[esp+4]
 953 mov eax,[esp+8]
 954 imul dword [esp+12]
 955 add [ecx],eax
 956 adc [ecx+4],edx
 957 ret
 958
 959 _fixmuldiv:
 960 mov eax,[esp+4]
 961 imul dword [esp+8]
 962 idiv dword [esp+12]
 963 ret
 964
 965 _fixquadadjust:
 966 mov ecx,[esp+4]
 967 mov eax,[ecx]
 968 mov edx,[ecx+4]
 969 shrd eax,edx,16
 970 ret
 971
 972 _fixquadnegate:
 973 mov eax,[esp+4]
 974 neg dword [eax]
 975 not dword [eax+4]
 976 sbb dword [eax+4],-1
 977 ret
 978
 979 ;standard Newtonian-iteration square root routine.  takes eax, returns ax
 980 ;trashes eax,ebx,ecx,edx,esi,edi
 981 _long_sqrt:
 982         mov eax,[esp+4]
 983 long_sqrt_asm:
 984         or      eax,eax ;check sign
 985         jle     near error   ;zero or negative
 986
 987         push ebx
 988         push esi
 989         push edi
 990
 991         mov     edx,eax
 992         and     eax,0ffffh
 993         shr     edx,16  ;split eax -> dx:ax
 994
 995 ;get a good first quess by checking which byte most significant bit is in
 996         xor     ebx,ebx ;clear high bytes for index
 997
 998         or      dh,dh   ;highest byte
 999         jz      not_dh
1000         mov     bl,dh   ;get value for lookup
1001         mov     cl,12
1002         jmp     got_guess
1003 not_dh: or      dl,dl
1004         jz      not_dl
1005         mov     bl,dl   ;get value for lookup
1006         mov     cl,8
1007         jmp     got_guess
1008 not_dl: or      ah,ah
1009         jz      not_ah
1010         mov     bl,ah   ;get value for lookup
1011         mov     cl,4
1012         jmp     got_guess
1013 not_ah: mov     bl,al   ;get value for lookup
1014         mov     cl,0
1015 got_guess:
1016         movzx   ebx,byte [guess_table+ebx] ;get byte guess
1017         sal     ebx,cl  ;get in right place
1018
1019         mov     ecx,eax
1020         mov     esi,edx ;save dx:ax
1021
1022 ;the loop nearly always executes 3 times, so we'll unroll it 2 times and
1023 ;not do any checking until after the third time.  By my calcutations, the
1024 ;loop is executed 2 times in 99.97% of cases, 3 times in 93.65% of cases,
1025 ;four times in 16.18% of cases, and five times in 0.44% of cases.  It never
1026 ;executes more than five times.  By timing, I determined that is is faster
1027 ;to always execute three times and not check for termination the first two
1028 ;times through.  This means that in 93.65% of cases, we save 6 cmp/jcc pairs,
1029 ;and in 6.35% of cases we do an extra divide.  In real life, these numbers
1030 ;might not be the same.
1031
1032 ;newt_loop:
1033 %rep 2
1034         mov     eax,ecx
1035         mov     edx,esi ;restore dx:ax
1036         div     bx      ;dx:ax / bx
1037 ;        mov     edi,ebx ;save for compare
1038         add     ebx,eax
1039         rcr     ebx,1    ;next guess = (d + q)/2
1040 %endrep
1041
1042 newt_loop:      mov     eax,ecx
1043         mov     edx,esi   ;restore dx:ax
1044         div     bx      ;dx:ax / bx
1045         cmp     eax,ebx ;correct?
1046         je      got_it  ;..yep
1047         mov     edi,ebx   ;save for compare
1048         add     ebx,eax
1049         rcr     ebx,1    ;next guess = (d + q)/2
1050         cmp     ebx,eax
1051         je      almost_got_it
1052         cmp     ebx,edi
1053         jne     newt_loop
1054
1055 almost_got_it:  mov     eax,ebx
1056         or      dx,dx   ;check remainder
1057         jz      got_it
1058         inc     eax
1059 got_it: and eax,0ffffh
1060         pop edi
1061         pop esi
1062         pop ebx
1063         ret
1064
1065 ;sqrt called with zero or negative input. return zero
1066 error:  xor     eax,eax
1067         ret
1068
1069 ;standard Newtonian-iteration square root routine.  takes edx:eax, returns eax
1070 _quad_sqrt:
1071         mov     eax,[esp+4]
1072         mov     edx,[esp+8]
1073 quad_sqrt_asm:
1074         or      edx,edx ;check sign
1075         js      error   ;can't do negative number!
1076         jnz     must_do_quad    ;we really must do 64/32 div
1077         or      eax,eax ;check high bit of low longword
1078         jns     near long_sqrt_asm   ;we can use longword version
1079 must_do_quad:
1080
1081         push ebx
1082         push esi
1083         push edi
1084
1085 ;get a good first quess by checking which byte most significant bit is in
1086         xor     ebx,ebx ;clear high bytes for index
1087
1088         ror     edx,16  ;get high 2 bytes
1089
1090         or      dh,dh
1091         jz      q_not_dh
1092         mov     bl,dh   ;get value for lookup
1093         mov     cl,12+16
1094         ror     edx,16  ;restore edx
1095         jmp     q_got_guess
1096 q_not_dh:       or      dl,dl
1097         jz      q_not_dl
1098         mov     bl,dl   ;get value for lookup
1099         mov     cl,8+16
1100         ror     edx,16  ;restore edx
1101         jmp     q_got_guess
1102 q_not_dl:       ror     edx,16  ;restore edx
1103         or      dh,dh
1104         jz      q_not_ah
1105         mov     bl,dh   ;get value for lookup
1106         mov     cl,4+16
1107         jmp     q_got_guess
1108 q_not_ah:       mov     bl,dl   ;get value for lookup
1109         mov     cl,0+16
1110 q_got_guess:
1111         movzx   ebx,byte [guess_table+ebx] ;get byte guess
1112         sal     ebx,cl  ;get in right place
1113
1114 q_really_got_guess:
1115         mov     ecx,eax
1116         mov     esi,edx ;save edx:eax
1117
1118 ;quad loop usually executes 4 times
1119
1120 ;q_newt_loop:
1121 %rep 3
1122         mov     eax,ecx
1123         mov     edx,esi ;restore dx:ax
1124         div     ebx     ;dx:ax / bx
1125         mov     edi,ebx ;save for compare
1126         add     ebx,eax
1127         rcr     ebx,1   ;next guess = (d + q)/2
1128 %endrep
1129
1130 q_newt_loop:    mov     eax,ecx
1131         mov     edx,esi ;restore dx:ax
1132         div     ebx     ;dx:ax / bx
1133         cmp     eax,ebx ;correct?
1134         je      q_got_it        ;..yep
1135         mov     edi,ebx ;save for compare
1136         add     ebx,eax
1137         rcr     ebx,1   ;next guess = (d + q)/2
1138         cmp     ebx,eax
1139         je      q_almost_got_it
1140         cmp     ebx,edi
1141         jne     q_newt_loop
1142
1143 q_almost_got_it:        mov     eax,ebx
1144         or      edx,edx ;check remainder
1145         jz      q_got_it
1146         inc     eax
1147 q_got_it:
1148         pop edi
1149         pop esi
1150         pop ebx
1151         ret
1152
1153
1154 ;fixed-point square root
1155 _fix_sqrt:
1156         mov     eax,[esp+4]
1157         call    long_sqrt_asm
1158 ;        movzx   eax,ax ; now in long_sqrt
1159         sal     eax,8
1160         ret
1161
1162 ;the sincos functions have two varients: the C version is passed pointer
1163 ;to variables for sin & cos, and the assembly version returns the values
1164 ;in two registers
1165
1166 ;takes ax=angle, returns eax=sin, ebx=cos.
1167 fix_fastsincos_asm:
1168         movzx   eax,ah  ;get high byte
1169         movsx   ebx,word [cos_table+eax*2]
1170         sal     ebx,2   ;make a fix
1171         movsx   eax,word [sin_table+eax*2]
1172         sal     eax,2   ;make a fix
1173         ret
1174
1175 ;takes ax=angle, returns eax=sin, ebx=cos.
1176 fix_sincos_asm:
1177         push    ecx
1178         push    edx
1179         xor     edx, edx
1180         xor     ecx, ecx
1181         mov     dl, ah  ;get high byte
1182         mov     cl, al  ;save low byte
1183         shl     edx, 1
1184
1185         movsx   eax,word [sin_table+edx]
1186         movsx   ebx,word [sin_table+edx+2]
1187         sub     ebx,eax
1188         imul    ebx,ecx ;mul by fraction
1189         sar     ebx,8
1190         add     eax,ebx ;add in frac part
1191         sal     eax,2   ;make a fix
1192
1193         movsx   ebx,word [cos_table+edx]
1194         movsx   edx,word [cos_table+edx+2]
1195         sub     edx,ebx
1196         imul    edx,ecx ;mul by fraction
1197         sar     edx,8
1198         add     ebx,edx ;add in frac part
1199         sal     ebx,2   ;make a fix
1200         pop     edx
1201         pop     ecx
1202         ret
1203
1204         align   16
1205
1206 _fix_acos:
1207         mov     eax,[esp+4]
1208 ;takes eax=cos angle, returns ax=angle
1209 fix_acos_asm:
1210         push    ebx
1211         push    ecx
1212         push    edx
1213
1214         abs_eax         ;get abs eax
1215         push    edx     ;save sign
1216
1217         cmp     eax,10000h
1218         jle     no_acos_oflow
1219         mov     eax,10000h
1220 no_acos_oflow:
1221         movzx   ecx,al  ;save low byte (fraction)
1222
1223         mov     edx,eax
1224
1225         sar     edx,8   ;get high byte (+1 bit)
1226         movsx   eax,word [acos_table+edx*2]
1227         movsx   ebx,word [acos_table+edx*2+2]
1228         sub     ebx,eax
1229         imul    ebx,ecx ;mul by fraction
1230         sar     ebx,8
1231         add     eax,ebx ;add in frac part
1232
1233         pop     edx     ;get sign back
1234         xor     eax,edx
1235         sub     eax,edx ;make correct sign
1236         and     edx,8000h       ;zero or 1/2
1237         add     eax,edx
1238
1239         pop     edx
1240         pop     ecx
1241         pop     ebx
1242
1243         ret
1244
1245 ;takes eax=sin angle, returns ax=angle
1246 _fix_asin:
1247         mov     eax,[esp+4]
1248 fix_asin_asm:
1249         push    ebx
1250         push    ecx
1251         push    edx
1252
1253         abs_eax         ;get abs value
1254         push    edx     ;save sign
1255
1256         cmp     eax,10000h
1257         jle     no_asin_oflow
1258         mov     eax,10000h
1259 no_asin_oflow:
1260         movzx   ecx,al  ;save low byte (fraction)
1261
1262         mov     edx,eax
1263
1264         sar     edx,8   ;get high byte (+1 bit)
1265         movsx   eax,word [asin_table+edx*2]
1266         movsx   ebx,word [asin_table+edx*2+2]
1267         sub     ebx,eax
1268         imul    ebx,ecx ;mul by fraction
1269         sar     ebx,8
1270         add     eax,ebx ;add in frac part
1271
1272         pop     edx     ;get sign back
1273         xor     eax,edx ;make sign correct
1274         sub     eax,edx
1275
1276         pop     edx
1277         pop     ecx
1278         pop     ebx
1279
1280         ret
1281
1282 ;given cos & sin of an angle, return that angle. takes eax=cos,ebx=sin.
1283 ;returns ax. parms need not be normalized, that is, the ratio eax/ebx must
1284 ;equal the ratio cos/sin, but the parms need not be the actual cos & sin.
1285 ;NOTE: this is different from the standard C atan2, since it is left-handed.
1286 ;uses either asin or acos, to get better precision
1287
1288 _fix_atan2:
1289         push    ebx
1290         push    ecx
1291         push    edx
1292         mov     eax,[esp+16]
1293         mov     ebx,[esp+20]
1294
1295         %ifdef NOT_DEF
1296         %ifndef  NDEBUG
1297         mov     edx,eax
1298         or      edx,ebx
1299         break_if        z,'Both parms to atan2 are zero!'
1300         %endif
1301         %endif
1302
1303         push    ebx
1304         push    eax
1305
1306 ;find smaller of two
1307         push    eax ;save
1308         push    ebx
1309         abs_eax         ;get abs value
1310         xchg    eax,ebx
1311         abs_eax         ;get abs value
1312         xor     eax,edx
1313         sub     eax,edx
1314         cmp     ebx,eax ;compare x to y
1315         pop     ebx
1316         pop     eax
1317         jl      use_cos
1318
1319 ;sin is smaller, use arcsin
1320
1321         imul    eax
1322         xchg    eax,ebx
1323         mov     ecx,edx
1324         imul    eax
1325         add     eax,ebx
1326         adc     edx,ecx
1327         call    quad_sqrt_asm
1328         mov     ecx,eax ;ecx = mag
1329
1330         pop     ebx     ;get cos, save in ebx
1331         pop     eax     ;get sin
1332         jecxz   sign_ok ;abort!
1333         m_fixdiv        ecx     ;normalize it
1334         call    fix_asin_asm    ;get angle
1335         or      ebx,ebx ;check sign of cos
1336         jns     sign_ok
1337         sub     eax,8000h       ;adjust
1338         neg     eax
1339 sign_ok:
1340         pop     edx
1341         pop     ecx
1342         pop     ebx
1343         ret
1344
1345
1346 ;cos is smaller, use arccos
1347
1348 use_cos:        imul    eax
1349         xchg    eax,ebx
1350         mov     ecx,edx
1351         imul    eax
1352         add     eax,ebx
1353         adc     edx,ecx
1354         call    quad_sqrt_asm
1355         mov     ecx,eax
1356
1357         pop     eax     ;get cos
1358         m_fixdiv        ecx     ;normalize it
1359         call    fix_acos_asm ; get angle
1360         mov     ebx,eax ;save in ebx
1361         pop     eax     ;get sin
1362         cdq             ;get sign of sin
1363         mov     eax,ebx ;get cos back
1364         xor     eax,edx
1365         sub     eax,edx ;make sign correct
1366
1367         pop     edx
1368         pop     ecx
1369         pop     ebx
1370         ret
1371
1372
1373 ; C version - takes angle,*sin,*cos. fills in sin&cos.
1374 ;either (or both) pointers can be null
1375 ;trashes eax,ecx,edx
1376 _fix_fastsincos:
1377         push    ebx
1378         mov     eax,[esp+8]
1379         call    fix_fastsincos_asm
1380         mov     ecx,[esp+12]
1381         mov     edx,[esp+16]
1382         or      ecx,ecx
1383         jz      no_sin
1384         mov     [ecx],eax
1385 no_sin: or      edx,edx
1386         jz      no_cos
1387         mov     [edx],ebx
1388 no_cos: pop     ebx
1389         ret
1390
1391 ;C version - takes angle,*sin,*cos. fills in sin&cos.
1392 ;trashes eax,ecx,edx
1393 ;either (or both) pointers can be null
1394 _fix_sincos:
1395         push    ebx
1396         mov     eax,[esp+8]
1397         call    fix_sincos_asm
1398         mov     ecx,[esp+12]
1399         mov     edx,[esp+16]
1400         or      ecx,ecx
1401         jz      no_sin
1402         mov     [ecx],eax
1403         or      edx,edx
1404         jz      no_cos
1405         mov     [edx],ebx
1406         pop     ebx
1407         ret