texmap/tmap_per.asm

   1 ; $Id: tmap_per.asm,v 1.3 2003-02-18 20:15:48 btb Exp $
   2 ;THE COMPUTER CODE CONTAINED HEREIN IS THE SOLE PROPERTY OF PARALLAX
   3 ;SOFTWARE CORPORATION ("PARALLAX").  PARALLAX, IN DISTRIBUTING THE CODE TO
   4 ;END-USERS, AND SUBJECT TO ALL OF THE TERMS AND CONDITIONS HEREIN, GRANTS A
   5 ;ROYALTY-FREE, PERPETUAL LICENSE TO SUCH END-USERS FOR USE BY SUCH END-USERS
   6 ;IN USING, DISPLAYING,  AND CREATING DERIVATIVE WORKS THEREOF, SO LONG AS
   7 ;SUCH USE, DISPLAY OR CREATION IS FOR NON-COMMERCIAL, ROYALTY OR REVENUE
   8 ;FREE PURPOSES.  IN NO EVENT SHALL THE END-USER USE THE COMPUTER CODE
   9 ;CONTAINED HEREIN FOR REVENUE-BEARING PURPOSES.  THE END-USER UNDERSTANDS
  10 ;AND AGREES TO THE TERMS HEREIN AND ACCEPTS THE SAME BY USE OF THIS FILE.
  11 ;COPYRIGHT 1993-1998 PARALLAX SOFTWARE CORPORATION.  ALL RIGHTS RESERVED.
  12 ;
  13 ;
  14 ; Perspective texture mapper inner loop.
  15 ;
  16 ; Old Log:
  17 ; Revision 1.26  1995/02/20  18:22:55  john
  18 ; Put all the externs in the assembly modules into tmap_inc.asm.
  19 ; Also, moved all the C versions of the inner loops into a new module,
  20 ; scanline.c.
  21 ;
  22 ; Revision 1.25  1995/02/20  17:09:08  john
  23 ; Added code so that you can build the tmapper with no assembly!
  24 ;
  25 ; Revision 1.24  1995/01/10  09:32:07  mike
  26 ; mostly fix garbage at end of scanline, but slow down by 1-4%.
  27 ;
  28 ; Revision 1.23  1994/12/02  23:29:57  mike
  29 ; optimizations.
  30 ;
  31 ; Revision 1.22  1994/11/30  00:57:00  mike
  32 ; optimization.
  33 ;
  34 ; Revision 1.21  1994/11/21  13:57:42  mike
  35 ; fix right side shear bug
  36 ;
  37 ; Revision 1.20  1994/11/12  16:41:09  mike
  38 ; jae -> ja.
  39 ;
  40 ; Revision 1.19  1994/10/27  19:40:00  john
  41 ; Made lighting table lookup be _gr_fade_table[eax] instead
  42 ; of fs:[eax], which gets rig of a segment override that
  43 ; supposedly costs 1 clock on a 486.  Mainly, I wanted to verify
  44 ; that the only reason we need selectors is for the source texture
  45 ; data .
  46 ;
  47 ; Revision 1.18  1994/05/03  11:08:32  mike
  48 ; Trap divide overflows.
  49 ;
  50 ; Revision 1.17  1994/04/21  15:03:41  mike
  51 ; make faster.
  52 ;
  53 ; Revision 1.16  1994/04/08  16:46:57  john
  54 ; Made 32 fade levels. Hacked.
  55 ;
  56 ; Revision 1.15  1994/03/31  08:35:18  mike
  57 ; Fix quantized-by-4 bug in inner loop.
  58 ;
  59 ; Revision 1.14  1994/03/14  17:41:14  mike
  60 ; Fix bug in unlighted version.
  61 ;
  62 ; Revision 1.13  1994/03/14  15:45:14  mike
  63 ; streamline code.
  64 ;
  65 ; Revision 1.12  1994/01/14  14:01:58  mike
  66 ; *** empty log message ***
  67 ;
  68 ; Revision 1.11  1993/12/18  14:43:44  john
  69 ; Messed around with doing 1/z, the u*(1/z) and v*(1/z)
  70 ; (Went from 23 fps to 21 fps... not good! )
  71 ;
  72 ; Revision 1.10  1993/12/17  16:14:17  john
  73 ; Split lighted/nonlighted, so there is no cmp lighting
  74 ; in the inner loop.
  75 ;
  76 ; Revision 1.9  1993/12/17  12:34:29  john
  77 ; Made leftover bytes use linear approx instead of correct...
  78 ; should save about 8 divides per scanline on average.
  79 ; Also, took out anti-aliasing code and rearranged to
  80 ; order of some instructions to help on 486 pipelining.
  81 ; (The anti-aliasing code did *not* look good, so I
  82 ; figure there was no reason to keep it in. )
  83 ;
  84 ; Revision 1.8  1993/12/16  18:37:52  mike
  85 ; Align some stuff on 4 byte boundaries.
  86 ;
  87 ; Revision 1.7  1993/11/30  08:44:18  john
  88 ; Made selector set check for < 64*64 bitmaps.
  89 ;
  90 ; Revision 1.6  1993/11/23  17:25:26  john
  91 ; Added safety "and eax, 0fffh" in lighting lookup.
  92 ;
  93 ; Revision 1.5  1993/11/23  15:08:52  mike
  94 ; Fixed lighting bug.
  95 ;
  96 ; Revision 1.4  1993/11/23  14:38:50  john
  97 ; optimized NORMAL code by switching EBX and ESI, so BH can be used in
  98 ; the lighting process.
  99 ;
 100 ; Revision 1.3  1993/11/23  14:30:53  john
 101 ; Made the perspective tmapper do 1/8 divides; added lighting.
 102 ;
 103 ; Revision 1.2  1993/11/22  10:24:59  mike
 104 ; *** empty log message ***
 105 ;
 106 ; Revision 1.1  1993/09/08  17:29:53  mike
 107 ; Initial revision
 108 ;
 109 ;
 110 ;
 111
 112 [BITS 32]
 113
 114 global  _asm_tmap_scanline_per
 115 global  asm_tmap_scanline_per
 116
 117 %include        "tmap_inc.asm"
 118
 119 [SECTION .data]
 120 align 4
 121     ;extern _per2_flag;:dword
 122 %ifdef __linux__
 123 ; Cater for linux ELF compilers...
 124 global x
 125 %define _loop_count loop_count
 126 %define _new_end new_end
 127 %define _scan_doubling_flag scan_doubling_flag
 128 %define _linear_if_far_flag linear_if_far_flag
 129 %endif
 130
 131         global _x
 132         global _loop_count
 133         global _new_end
 134         global _scan_doubling_flag
 135         global _linear_if_far_flag
 136
 137 ;        global _max_ecx
 138 ;        global _min_ecx
 139
 140     mem_edx dd 0
 141     x:
 142     _x          dd      0
 143     _loop_count dd      0
 144
 145 ;    _max_ecx    dd      0
 146 ;    _min_ecx    dd      55555555h
 147     _new_end     dd      1       ; if set, use new, but slower, way of finishing off extra pixels on scanline, 01/10/95 --MK
 148
 149     _scan_doubling_flag dd 0
 150     _linear_if_far_flag dd 0
 151
 152 ;---------- local variables
 153 align 4
 154     req_base    dd      0
 155     req_size    dd      0
 156     U0          dd      0
 157     U1          dd      0
 158     V0          dd      0
 159     V1          dd      0
 160     num_left_over   dd  0
 161     DU1         dd      0
 162     DV1         dd      0
 163     DZ1         dd      0
 164
 165 [SECTION .text]
 166
 167 ; --------------------------------------------------------------------------------------------------
 168 ; Enter:
 169 ;       _xleft  fixed point left x coordinate
 170 ;       _xright fixed point right x coordinate
 171 ;       _y      fixed point y coordinate
 172 ;       _pixptr address of source pixel map
 173 ;       _u      fixed point initial u coordinate
 174 ;       _v      fixed point initial v coordinate
 175 ;       _z      fixed point initial z coordinate
 176 ;       _du_dx  fixed point du/dx
 177 ;       _dv_dx  fixed point dv/dx
 178 ;       _dz_dx  fixed point dz/dx
 179
 180 ;   for (x = (int) xleft; x <= (int) xright; x++) {
 181 ;      _setcolor(read_pixel_from_tmap(srcb,((int) (u/z)) & 63,((int) (v/z)) & 63));
 182 ;      _setpixel(x,y);
 183 ;
 184 ;      u += du_dx;
 185 ;      v += dv_dx;
 186 ;      z += dz_dx;
 187 ;   }
 188
 189
 190 align   16
 191 _asm_tmap_scanline_per:
 192 asm_tmap_scanline_per:
 193 ;        push    es
 194         pusha
 195
 196 ;---------------------------- setup for loop ---------------------------------
 197 ; Setup for loop:       _loop_count  iterations = (int) xright - (int) xleft
 198 ;       esi     source pixel pointer = pixptr
 199 ;       edi     initial row pointer = y*320+x
 200 ; NOTE: fx_xright and fx_xleft changed from fix to int by mk on 12/01/94.
 201
 202 ; set esi = pointer to start of texture map data
 203
 204 ; set edi = address of first pixel to modify
 205         mov     edi,[_fx_y]
 206 ;        mov     es,[_pixel_data_selector]       ; selector[0*2]
 207
 208         mov     edi,[_y_pointers+edi*4]
 209
 210         mov     ebx,[_fx_xleft]
 211         test    ebx, ebx
 212         jns     ebx_ok
 213         xor     ebx, ebx
 214 ebx_ok: add     edi,[_write_buffer]
 215         add     edi,ebx
 216
 217 ; set _loop_count = # of iterations
 218         mov     eax,[_fx_xright]
 219         sub     eax,ebx
 220         js      near _none_to_do
 221         mov     [_loop_count],eax
 222
 223 ; lighting values are passed in fixed point, but need to be in 8 bit integer, 8 bit fraction so we can easily
 224 ; get the integer by reading %bh
 225         sar     dword [_fx_l], 8
 226         sar     dword [_fx_dl_dx],8
 227         jns     dl_dx_ok
 228         inc     dword [_fx_dl_dx]       ; round towards 0 for negative deltas
 229 dl_dx_ok:
 230
 231 ; set initial values
 232         mov     ebx,[_fx_u]
 233         mov     ebp,[_fx_v]
 234         mov     ecx,[_fx_z]
 235
 236         test    dword [_per2_flag],-1
 237         je      tmap_loop
 238
 239         test    dword [_Lighting_on], -1
 240         je     near _tmap_loop_fast_nolight
 241         jmp     _tmap_loop_fast
 242 ;tmap_loop_fast_nolight_jumper:
 243 ;    jmp tmap_loop_fast_nolight
 244
 245 ;================ PERSPECTIVE TEXTURE MAP INNER LOOPS ========================
 246 ;
 247 ; Usage in loop:        eax     division, pixel value
 248 ;       ebx     u
 249 ;       ecx     z
 250 ;       edx     division
 251 ;       ebp     v
 252 ;       esi     source pixel pointer
 253 ;       edi     destination pixel pointer
 254
 255 ;-------------------- NORMAL PERSPECTIVE TEXTURE MAP LOOP -----------------
 256 tmap_loop:
 257         mov     esi, ebx        ; esi becomes u coordinate
 258
 259         align   4
 260 tmap_loop0:
 261
 262 ; compute v coordinate
 263         mov     eax, ebp        ; get v
 264         mov     edx, eax
 265         sar     edx, 31
 266         idiv    ecx     ; eax = (v/z)
 267
 268         and     eax,3fh ; mask with height-1
 269         mov     ebx,eax
 270
 271 ; compute u coordinate
 272         mov     eax, esi        ; get u
 273         mov     edx, eax
 274         sar     edx, 31
 275         idiv    ecx     ; eax = (u/z)
 276
 277         shl     eax,26
 278         shld    ebx,eax,6       ; esi = v*64+u
 279
 280 ; read 1 pixel
 281         add     ebx, [_pixptr]
 282         xor     eax, eax
 283         test    dword [_Lighting_on], -1
 284         mov     al, [ebx]    ; get pixel from source bitmap
 285         je      NoLight1
 286
 287 ; LIGHTING CODE
 288         mov     ebx, [_fx_l]    ; get temp copy of lighting value
 289         mov     ah, bh  ; get lighting level
 290         add     ebx, [_fx_dl_dx]        ; update lighting value
 291         mov     al, [_gr_fade_table+eax]        ; xlat pixel thru lighting tables
 292         mov     [_fx_l], ebx    ; save temp copy of lighting value
 293
 294 ; transparency check
 295 NoLight1:       cmp     al,255
 296         je      skip1
 297
 298         mov     [edi],al
 299 skip1:  inc     edi
 300
 301 ; update deltas
 302         add     ebp,[_fx_dv_dx]
 303         add     esi,[_fx_du_dx]
 304         add     ecx,[_fx_dz_dx]
 305         je      _div_0_abort    ; would be dividing by 0, so abort
 306
 307         dec     dword [_loop_count]
 308         jns     tmap_loop0
 309
 310 _none_to_do:
 311         popa
 312 ;        pop     es
 313         ret
 314
 315 ; We detected a z=0 condition, which seems pretty bogus, don't you think?
 316 ; So, we abort, but maybe we want to know about it.
 317 _div_0_abort:
 318         jmp     _none_to_do
 319
 320 ;-------------------------- PER/4 TMAPPER ----------------
 321 ;
 322 ;       x = x1
 323 ;       U0 = u/w; V0 = v/w;
 324 ;       while ( 1 )
 325 ;               u += du_dx*4; v+= dv_dx*4
 326 ;               U1 = u/w; V1 = v/w;
 327 ;               DUDX = (U1-U0)/4; DVDX = (V1-V0)/4;
 328 ;
 329 ;       ; Pixel 0
 330 ;               pixels = texmap[V0*64+U0];
 331 ;               U0 += DUDX; V0 += DVDX
 332 ;       ; Pixel 1
 333 ;               pixels = (pixels<<8)+texmap[V0*64+U0];
 334 ;               U0 += DUDX; V0 += DVDX
 335 ;       ; Pixel 2
 336 ;               pixels = (pixels<<8)+texmap[V0*64+U0];
 337 ;               U0 += DUDX; V0 += DVDX
 338 ;       ; Pixel 3
 339 ;               pixels = (pixels<<8)+texmap[V0*64+U0];
 340 ;
 341 ;               screen[x] = pixel
 342 ;               x += 4;
 343 ;               U0 = U1; V0 = V1
 344
 345 NBITS equ 4     ; 2^NBITS pixels plotted per divide
 346 ZSHIFT equ 4    ; precision used in PDIV macro
 347
 348
 349 ;PDIV MACRO
 350 ; Returns EAX/ECX in 16.16 format in EAX. Trashes EDX
 351 ;          sig bits   6.3
 352 ;       mov     edx,eax
 353 ;       shl     eax,ZSHIFT
 354 ;       sar     edx,32-ZSHIFT
 355 ;       idiv    ecx     ; eax = (v/z)
 356 ;   shl eax, 16-ZSHIFT
 357 ;ENDM
 358
 359 global _tmap_loop_fast
 360
 361 ; -------------------------------------- Start of Getting Dword Aligned ----------------------------------------------
 362 ;       ebx     fx_u
 363
 364 _tmap_loop_fast:
 365         mov     esi,ebx
 366
 367         align   4
 368 NotDwordAligned1:
 369         test    edi, 11b
 370         jz      DwordAligned1
 371
 372 ; compute v coordinate
 373         mov     eax, ebp        ; get v
 374         mov     edx, eax
 375         sar     edx, 31
 376         idiv    ecx     ; eax = (v/z)
 377
 378         and     eax,3fh ; mask with height-1
 379         mov     ebx,eax
 380
 381 ; compute u coordinate
 382         mov     eax, esi        ; get u
 383         mov     edx, eax
 384         sar     edx, 31
 385         idiv    ecx     ; eax = (u/z)
 386
 387         shl     eax,26
 388         shld    ebx,eax,6       ; esi = v*64+u
 389
 390 ; read 1  pixel
 391         add     ebx,[_pixptr]
 392         xor     eax, eax
 393         mov     al, [ebx]    ; get pixel from source bitmap
 394
 395 ; lighting code
 396         mov     ebx, [_fx_l]    ; get temp copy of lighting value
 397         mov     ah, bh  ; get lighting level
 398         add     ebx, [_fx_dl_dx]        ; update lighting value
 399         mov     [_fx_l], ebx    ; save temp copy of lighting value
 400
 401 ; transparency check
 402         cmp     al,255
 403         je      skip2   ; this pixel is transparent, so don't write it (or light it)
 404
 405         mov     al, [_gr_fade_table+eax]        ; xlat pixel thru lighting tables
 406
 407 ; write 1 pixel
 408         mov     [edi],al
 409 skip2:  inc     edi
 410
 411 ; update deltas
 412         add     ebp,[_fx_dv_dx]
 413         add     esi,[_fx_du_dx]
 414         add     ecx,[_fx_dz_dx]
 415         je      _div_0_abort    ; would be dividing by 0, so abort
 416
 417         dec     dword [_loop_count]
 418         jns     NotDwordAligned1
 419
 420         jmp     _none_to_do
 421
 422 ; -------------------------------------- End of Getting Dword Aligned ----------------------------------------------
 423
 424 DwordAligned1:
 425
 426         mov     eax, [_loop_count]
 427         mov     ebx, esi        ; get fx_u [pentium pipelining]
 428         inc     eax
 429         mov     esi, eax
 430         and     esi, (1 << NBITS) - 1
 431         sar     eax, NBITS
 432         mov     [num_left_over], esi
 433         je      near tmap_loop  ; there are no 2^NBITS chunks, do divide/pixel for whole scanline
 434         mov     [_loop_count], eax      ; _loop_count = pixels / NPIXS
 435
 436 ; compute initial v coordinate
 437         mov     eax,ebp ; get v
 438         mov     edx,ebp
 439         shl     eax,ZSHIFT
 440         sar     edx,32-ZSHIFT
 441         idiv    ecx     ; eax = (v/z)
 442         shl     eax, 16-ZSHIFT
 443         mov     [V0], eax
 444
 445 ; compute initial u coordinate
 446         mov     eax,ebx ; get u
 447         mov     edx,ebx
 448         shl     eax,ZSHIFT
 449         sar     edx,32-ZSHIFT
 450         idiv    ecx     ; eax = (v/z)
 451         shl     eax, 16-ZSHIFT
 452         mov     [U0], eax
 453
 454 ; Set deltas to NPIXS pixel increments
 455         mov     eax, [_fx_du_dx]
 456         shl     eax, NBITS
 457         mov     [DU1], eax
 458         mov     eax, [_fx_dv_dx]
 459         shl     eax, NBITS
 460         mov     [DV1], eax
 461         mov     eax, [_fx_dz_dx]
 462         shl     eax, NBITS
 463         mov     [DZ1], eax
 464
 465         align   4
 466 TopOfLoop4:
 467         add     ebx, [DU1]
 468         add     ebp, [DV1]
 469         add     ecx, [DZ1]
 470         je      near _div_0_abort       ; would be dividing by 0, so abort
 471
 472 ; Done with ebx, ebp, ecx until next iteration
 473         push    ebx
 474         push    ecx
 475         push    ebp
 476         push    edi
 477
 478 ; Find fixed U1
 479         mov     eax, ebx
 480         mov     edx,ebx
 481         shl     eax,ZSHIFT
 482         sar     edx,32-ZSHIFT
 483         idiv    ecx     ; eax = (v/z)
 484         shl     eax, 16-ZSHIFT
 485         mov     ebx, eax        ; ebx = U1 until pop's
 486
 487 ; Find fixed V1
 488         mov     eax, ebp
 489         mov     edx, ebp
 490         shl     eax,ZSHIFT
 491         sar     edx,32-ZSHIFT
 492         idiv    ecx     ; eax = (v/z)
 493
 494         mov     ecx, [U0]       ; ecx = U0 until pop's
 495         mov     edi, [V0]       ; edi = V0 until pop's
 496
 497         shl     eax, 16-ZSHIFT
 498         mov     ebp, eax        ; ebp = V1 until pop's
 499
 500 ; Make ESI =  V0:U0 in 6:10,6:10 format
 501         mov     eax, ecx
 502         shr     eax, 6
 503         mov     esi, edi
 504         shl     esi, 10
 505         mov     si, ax
 506
 507 ; Make EDX = DV:DU in 6:10,6:10 format
 508         mov     eax, ebx
 509         sub     eax, ecx
 510         sar     eax, NBITS+6
 511         mov     edx, ebp
 512         sub     edx, edi
 513         shl     edx, 10-NBITS   ; EDX = V1-V0/ 4 in 6:10 int:frac
 514         mov     dx, ax  ; put delta u in low word
 515
 516 ; Save the U1 and V1 so we don't have to divide on the next iteration
 517         mov     [U0], ebx
 518         mov     [V0], ebp
 519
 520         pop     edi     ; Restore EDI before using it
 521
 522 ; LIGHTING CODE
 523         mov     ebx, [_fx_l]
 524         mov     ebp, [_fx_dl_dx]
 525
 526         test    dword [_Transparency_on],-1
 527         je      near no_trans1
 528
 529 %macro repproc1 0
 530         mov     eax, esi        ; get u,v
 531         shr     eax, 26 ; shift out all but int(v)
 532         shld    ax,si,6 ; shift in u, shifting up v
 533         add     esi, edx        ; inc u,v
 534         add     eax, [_pixptr]
 535         movzx   eax, byte [eax]    ; get pixel from source bitmap
 536         cmp     al,255
 537         je      %%skipa1
 538         mov     ah, bh  ; form lighting table lookup value
 539         add     ebx, ebp        ; update lighting value
 540         mov     al, [_gr_fade_table+eax]        ; xlat thru lighting table into dest buffer
 541         mov     [edi],al
 542 %%skipa1:
 543         inc     edi
 544
 545 ; Do odd pixel
 546         mov     eax, esi        ; get u,v
 547         shr     eax, 26 ; shift out all but int(v)
 548         shld    ax,si,6 ; shift in u, shifting up v
 549         add     esi, edx        ; inc u,v
 550         add     eax,[_pixptr]
 551         movzx   eax, byte [eax]    ; get pixel from source bitmap
 552         cmp     al,255
 553         je      %%skipa2
 554         mov     ah, bh  ; form lighting table lookup value
 555         add     ebx, ebp        ; update lighting value
 556         mov     al, [_gr_fade_table+eax]        ; xlat thru lighting table into dest buffer
 557         mov     [edi],al
 558 %%skipa2:
 559         inc     edi
 560 %endmacro
 561
 562
 563 %rep (2 << (NBITS-2))
 564 ;       local   skip3,no_trans1
 565 ;       local   skipa1,skipa2
 566     repproc1
 567 %endrep
 568
 569 jmp     cont1
 570
 571 ; -------------------------------------------------------
 572 no_trans1:
 573
 574 %macro repproc2 0
 575         mov     eax, esi        ; get u,v
 576         shr     eax, 26 ; shift out all but int(v)
 577         shld    ax,si,6 ; shift in u, shifting up v
 578         add     esi, edx        ; inc u,v
 579         add     eax,[_pixptr]
 580         movzx   eax, byte [eax]    ; get pixel from source bitmap
 581         mov     ah, bh  ; form lighting table lookup value
 582         add     ebx, ebp        ; update lighting value
 583         mov     cl, [_gr_fade_table+eax]        ; xlat thru lighting table into dest buffer
 584
 585 ; Do odd pixel
 586         mov     eax, esi        ; get u,v
 587         shr     eax, 26 ; shift out all but int(v)
 588         shld    ax,si,6 ; shift in u, shifting up v
 589         add     esi, edx        ; inc u,v
 590         add     eax,[_pixptr]
 591         movzx   eax, byte [eax]    ; get pixel from source bitmap
 592         mov     ah, bh  ; form lighting table lookup value
 593         add     ebx, ebp        ; update lighting value
 594         mov     ch, [_gr_fade_table+eax]        ; xlat thru lighting table into dest buffer
 595
 596 ; ----- This is about 1% faster than the above, and could probably be optimized more.
 597 ; ----- Problem is, it gets the u,v coordinates backwards.  What you would need to do
 598 ; ----- is switch the packing of the u,v coordinates above (about 95 lines up).
 599 ;----------;    mov     eax, esi
 600 ;----------;    shr     ax, 10
 601 ;----------;    rol     eax, 6
 602 ;----------;    mov     dx, ax
 603 ;----------;    add     esi, mem_edx
 604 ;----------;    mov     dl, es:[edx]
 605 ;----------;    mov     dh, bh
 606 ;----------;    add     ebx, ebp
 607 ;----------;    mov     cl, _gr_fade_table[edx]
 608 ;----------;
 609 ;----------;    mov     eax, esi
 610 ;----------;    shr     ax, 10
 611 ;----------;    rol     eax, 6
 612 ;----------;    mov     dx, ax
 613 ;----------;    add     esi, mem_edx
 614 ;----------;    mov     dl, es:[edx]
 615 ;----------;    mov     dh, bh
 616 ;----------;    add     ebx, ebp
 617 ;----------;    mov     ch, _gr_fade_table[edx]
 618
 619         ror     ecx, 16 ; move to next double dest pixel position
 620 %endmacro
 621
 622 %rep (1 << (NBITS-2))
 623
 624     repproc2
 625     repproc2
 626
 627         mov     [edi],ecx       ; Draw 4 pixels to display
 628         add     edi,4
 629 %endrep
 630 ;; pop edx
 631 cont1:
 632
 633 ; -------------------------------------------------------
 634
 635 ; LIGHTING CODE
 636         mov     [_fx_l], ebx
 637         pop     ebp
 638         pop     ecx
 639         pop     ebx
 640         dec     dword [_loop_count]
 641         jnz     near TopOfLoop4
 642
 643 EndOfLoop4:
 644         test    dword [num_left_over], -1
 645         je      near _none_to_do
 646
 647 ; ----------------------------------------- Start of LeftOver Pixels ------------------------------------------
 648 DoEndPixels:
 649         push    ecx
 650
 651         mov     eax, ecx
 652         lea     eax, [eax*2+eax]
 653
 654         add     ecx, [DZ1]
 655         js      notokhere
 656         shl     ecx,2
 657         cmp     eax, ecx
 658         pop     ecx
 659         jl      okhere
 660         jmp     bah_bah
 661 notokhere:
 662         pop     ecx
 663 bah_bah:
 664         test    dword [_new_end],-1
 665         jne     near NewDoEndPixels
 666 okhere:
 667
 668         add     ebx, [DU1]
 669         add     ebp, [DV1]
 670         add     ecx, [DZ1]
 671         je      near _div_0_abort
 672         jns     dep_cont
 673
 674 ; z went negative.
 675 ; this can happen because we added DZ1 to the current z, but dz1 represents dz for perhaps 16 pixels
 676 ; though we might only plot one more pixel.
 677         mov     cl, 1
 678
 679 dep_loop:       mov     eax, [DU1]
 680         sar     eax, cl
 681         sub     ebx, eax
 682
 683         mov     eax, [DV1]
 684         sar     eax, cl
 685         sub     ebp, eax
 686
 687         mov     eax, [DZ1]
 688         sar     eax, cl
 689         sub     ecx, eax
 690         je      near _div_0_abort
 691         jns     dep_cont
 692
 693         inc     cl
 694         cmp     cl, NBITS
 695         jne     dep_loop
 696
 697 dep_cont:
 698         push    edi     ; use edi as a temporary variable
 699
 700         cmp     ecx,1 << (ZSHIFT+1)
 701         jg      ecx_ok
 702         mov     ecx, 1 << (ZSHIFT+1)
 703 ecx_ok:
 704
 705 ; Find fixed U1
 706         mov     eax, ebx
 707         ;PDIV
 708         mov     edx,eax
 709         shl     eax,ZSHIFT
 710         sar     edx,32-ZSHIFT
 711         idiv    ecx     ; eax = (v/z)
 712         shl     eax, 16-ZSHIFT
 713
 714         mov     ebx, eax        ; ebx = U1 until pop's
 715
 716 ; Find fixed V1
 717         mov     eax, ebp
 718         ;PDIV
 719         mov     edx,eax
 720         shl     eax,ZSHIFT
 721         sar     edx,32-ZSHIFT
 722         idiv    ecx     ; eax = (v/z)
 723         shl     eax, 16-ZSHIFT
 724
 725         mov     ebp, eax        ; ebp = V1 until pop's
 726
 727         mov     ecx, [U0]       ; ecx = U0 until pop's
 728         mov     edi, [V0]       ; edi = V0 until pop's
 729
 730 ; Make ESI =  V0:U0 in 6:10,6:10 format
 731         mov     eax, ecx
 732         shr     eax, 6
 733         mov     esi, edi
 734         shl     esi, 10
 735         mov     si, ax
 736
 737 ; Make EDX = DV:DU in 6:10,6:10 format
 738         mov     eax, ebx
 739         sub     eax, ecx
 740         sar     eax, NBITS+6
 741         mov     edx, ebp
 742         sub     edx, edi
 743         shl     edx, 10-NBITS   ; EDX = V1-V0/ 4 in 6:10 int:frac
 744         mov     dx, ax  ; put delta u in low word
 745
 746         pop     edi     ; Restore EDI before using it
 747
 748         mov     ecx, [num_left_over]
 749
 750 ; LIGHTING CODE
 751         mov     ebx, [_fx_l]
 752         mov     ebp, [_fx_dl_dx]
 753
 754     ITERATION equ 0
 755
 756 %macro repproc3 0
 757 ; Do even pixel
 758         mov     eax, esi        ; get u,v
 759         shr     eax, 26 ; shift out all but int(v)
 760         shld    ax,si,6 ; shift in u, shifting up v
 761         add     eax,[_pixptr]
 762         movzx   eax, byte [eax]    ; get pixel from source bitmap
 763         add     esi, edx        ; inc u,v
 764         mov     ah, bh  ; form lighting table lookup value
 765         add     ebx, ebp        ; update lighting value
 766         cmp     al,255
 767         je      %%skip4
 768         mov     al, [_gr_fade_table+eax]        ; xlat thru lighting table into dest buffer
 769         mov     [edi+ITERATION], al     ; write pixel
 770 %%skip4:        dec     ecx
 771         jz      near _none_to_do
 772
 773 ; Do odd pixel
 774         mov     eax, esi        ; get u,v
 775         shr     eax, 26 ; shift out all but int(v)
 776         shld    ax,si,6 ; shift in u, shifting up v
 777         add     eax,[_pixptr]
 778         movzx   eax, byte [eax]    ; get pixel from source bitmap
 779         add     esi, edx        ; inc u,v
 780         mov     ah, bh  ; form lighting table lookup value
 781         add     ebx, [_fx_dl_dx]        ; update lighting value
 782         cmp     al,255
 783         je      %%skip5
 784         mov     al, [_gr_fade_table+eax]        ; xlat thru lighting table into dest buffer
 785         mov     [edi+ITERATION+1], al   ; write pixel
 786 %%skip5:        dec     ecx
 787         jz      near _none_to_do
 788 %endmacro
 789
 790 %rep (1 << (NBITS-1))
 791         ;local  skip4, skip5
 792     repproc3
 793 %assign ITERATION  ITERATION + 2
 794
 795 %endrep
 796
 797 ; Should never get here!!!!
 798         int     3
 799         jmp     _none_to_do
 800
 801 ; ----------------------------------------- End of LeftOver Pixels ------------------------------------------
 802
 803 ; --BUGGY NEW--NewDoEndPixels:
 804 ; --BUGGY NEW-- mov     eax, num_left_over
 805 ; --BUGGY NEW-- and     num_left_over, 3
 806 ; --BUGGY NEW-- shr     eax, 2
 807 ; --BUGGY NEW-- je      NDEP_1
 808 ; --BUGGY NEW-- mov     _loop_count, eax
 809 ; --BUGGY NEW--
 810 ; --BUGGY NEW--; do 4 pixels per hunk, not 16, so div deltas by 4 (16/4=4)
 811 ; --BUGGY NEW-- shr DU1,2
 812 ; --BUGGY NEW-- shr DV1,2
 813 ; --BUGGY NEW-- shr DZ1,2
 814 ; --BUGGY NEW--
 815 ; --BUGGY NEW--NDEP_TopOfLoop4:
 816 ; --BUGGY NEW-- add     ebx, DU1
 817 ; --BUGGY NEW-- add     ebp, DV1
 818 ; --BUGGY NEW-- add     ecx, DZ1
 819 ; --BUGGY NEW-- je      _div_0_abort    ; would be dividing by 0, so abort
 820 ; --BUGGY NEW--
 821 ; --BUGGY NEW--; Done with ebx, ebp, ecx until next iteration
 822 ; --BUGGY NEW-- push    ebx
 823 ; --BUGGY NEW-- push    ecx
 824 ; --BUGGY NEW-- push    ebp
 825 ; --BUGGY NEW-- push    edi
 826 ; --BUGGY NEW--
 827 ; --BUGGY NEW--; Find fixed U1
 828 ; --BUGGY NEW-- mov     eax, ebx
 829 ; --BUGGY NEW-- mov     edx,ebx
 830 ; --BUGGY NEW-- shl     eax,(ZSHIFT-2)
 831 ; --BUGGY NEW-- sar     edx,32-(ZSHIFT-2)
 832 ; --BUGGY NEW-- idiv    ecx     ; eax = (v/z)
 833 ; --BUGGY NEW-- shl     eax, 16-(ZSHIFT-2)
 834 ; --BUGGY NEW-- mov     ebx, eax        ; ebx = U1 until pop's
 835 ; --BUGGY NEW--
 836 ; --BUGGY NEW--; Find fixed V1
 837 ; --BUGGY NEW-- mov     eax, ebp
 838 ; --BUGGY NEW-- mov     edx, ebp
 839 ; --BUGGY NEW-- shl     eax,(ZSHIFT-2)
 840 ; --BUGGY NEW-- sar     edx,32-(ZSHIFT-2)
 841 ; --BUGGY NEW-- idiv    ecx     ; eax = (v/z)
 842 ; --BUGGY NEW--
 843 ; --BUGGY NEW-- mov     ecx, U0 ; ecx = U0 until pop's
 844 ; --BUGGY NEW-- mov     edi, V0 ; edi = V0 until pop's
 845 ; --BUGGY NEW--
 846 ; --BUGGY NEW-- shl     eax, 16-(ZSHIFT-2)
 847 ; --BUGGY NEW-- mov     ebp, eax        ; ebp = V1 until pop's
 848 ; --BUGGY NEW--
 849 ; --BUGGY NEW--; Make ESI =  V0:U0 in 6:10,6:10 format
 850 ; --BUGGY NEW-- mov     eax, ecx
 851 ; --BUGGY NEW-- shr     eax, 6
 852 ; --BUGGY NEW-- mov     esi, edi
 853 ; --BUGGY NEW-- shl     esi, 10
 854 ; --BUGGY NEW-- mov     si, ax
 855 ; --BUGGY NEW--
 856 ; --BUGGY NEW--; Make EDX = DV:DU in 6:10,6:10 format
 857 ; --BUGGY NEW-- mov     eax, ebx
 858 ; --BUGGY NEW-- sub     eax, ecx
 859 ; --BUGGY NEW-- sar     eax, (NBITS-2)+6
 860 ; --BUGGY NEW-- mov     edx, ebp
 861 ; --BUGGY NEW-- sub     edx, edi
 862 ; --BUGGY NEW-- shl     edx, 10-(NBITS-2)       ; EDX = V1-V0/ 4 in 6:10 int:frac
 863 ; --BUGGY NEW-- mov     dx, ax  ; put delta u in low word
 864 ; --BUGGY NEW--
 865 ; --BUGGY NEW--; Save the U1 and V1 so we don't have to divide on the next iteration
 866 ; --BUGGY NEW-- mov     U0, ebx
 867 ; --BUGGY NEW-- mov     V0, ebp
 868 ; --BUGGY NEW--
 869 ; --BUGGY NEW-- pop     edi     ; Restore EDI before using it
 870 ; --BUGGY NEW--
 871 ; --BUGGY NEW--; LIGHTING CODE
 872 ; --BUGGY NEW-- mov     ebx, _fx_l
 873 ; --BUGGY NEW-- mov     ebp, _fx_dl_dx
 874 ; --BUGGY NEW--
 875 ; --BUGGY NEW--;**      test    _Transparency_on,-1
 876 ; --BUGGY NEW--;**      je      NDEP_no_trans1
 877 ; --BUGGY NEW--
 878 ; --BUGGY NEW--        REPT 2
 879 ; --BUGGY NEW-- local   NDEP_skipa1, NDEP_skipa2
 880 ; --BUGGY NEW--
 881 ; --BUGGY NEW-- mov     eax, esi        ; get u,v
 882 ; --BUGGY NEW-- shr     eax, 26 ; shift out all but int(v)
 883 ; --BUGGY NEW-- shld    ax,si,6 ; shift in u, shifting up v
 884 ; --BUGGY NEW-- add     esi, edx        ; inc u,v
 885 ; --BUGGY NEW-- mov     al, es:[eax]    ; get pixel from source bitmap
 886 ; --BUGGY NEW-- cmp     al,255
 887 ; --BUGGY NEW-- je      NDEP_skipa1
 888 ; --BUGGY NEW-- mov     ah, bh  ; form lighting table lookup value
 889 ; --BUGGY NEW-- add     ebx, ebp        ; update lighting value
 890 ; --BUGGY NEW-- mov     al, _gr_fade_table[eax] ; xlat thru lighting table into dest buffer
 891 ; --BUGGY NEW-- mov     [edi],al
 892 ; --BUGGY NEW--NDEP_skipa1:
 893 ; --BUGGY NEW-- inc     edi
 894 ; --BUGGY NEW--
 895 ; --BUGGY NEW--; Do odd pixel
 896 ; --BUGGY NEW-- mov     eax, esi        ; get u,v
 897 ; --BUGGY NEW-- shr     eax, 26 ; shift out all but int(v)
 898 ; --BUGGY NEW-- shld    ax,si,6 ; shift in u, shifting up v
 899 ; --BUGGY NEW-- add     esi, edx        ; inc u,v
 900 ; --BUGGY NEW-- mov     al, es:[eax]    ; get pixel from source bitmap
 901 ; --BUGGY NEW-- cmp     al,255
 902 ; --BUGGY NEW-- je      NDEP_skipa2
 903 ; --BUGGY NEW-- mov     ah, bh  ; form lighting table lookup value
 904 ; --BUGGY NEW-- add     ebx, ebp        ; update lighting value
 905 ; --BUGGY NEW-- mov     al, _gr_fade_table[eax] ; xlat thru lighting table into dest buffer
 906 ; --BUGGY NEW-- mov     [edi],al
 907 ; --BUGGY NEW--NDEP_skipa2:
 908 ; --BUGGY NEW-- inc     edi
 909 ; --BUGGY NEW--
 910 ; --BUGGY NEW--        ENDM
 911 ; --BUGGY NEW--
 912 ; --BUGGY NEW-- mov     _fx_l, ebx
 913 ; --BUGGY NEW-- pop     ebp
 914 ; --BUGGY NEW-- pop     ecx
 915 ; --BUGGY NEW-- pop     ebx
 916 ; --BUGGY NEW-- dec     _loop_count
 917 ; --BUGGY NEW-- jnz     NDEP_TopOfLoop4
 918 ; --BUGGY NEW--
 919 ; --BUGGY NEW-- test    num_left_over, -1
 920 ; --BUGGY NEW-- je      _none_to_do
 921 ; --BUGGY NEW--
 922 ; --BUGGY NEW--NDEP_1:
 923 ; --BUGGY NEW-- mov     esi,ebx
 924 ; --BUGGY NEW--
 925 ; --BUGGY NEW-- align   4
 926 ; --BUGGY NEW--NDEP_loop:
 927 ; --BUGGY NEW--
 928 ; --BUGGY NEW--; compute v coordinate
 929 ; --BUGGY NEW-- mov     eax, ebp        ; get v
 930 ; --BUGGY NEW-- mov     edx, eax
 931 ; --BUGGY NEW-- sar     edx, 31
 932 ; --BUGGY NEW-- idiv    ecx     ; eax = (v/z)
 933 ; --BUGGY NEW--
 934 ; --BUGGY NEW-- and     eax,3fh ; mask with height-1
 935 ; --BUGGY NEW-- mov     ebx,eax
 936 ; --BUGGY NEW--
 937 ; --BUGGY NEW--; compute u coordinate
 938 ; --BUGGY NEW-- mov     eax,    esi     ; get u
 939 ; --BUGGY NEW-- mov     edx, eax
 940 ; --BUGGY NEW-- sar     edx, 31
 941 ; --BUGGY NEW-- idiv    ecx     ; eax = (u/z)
 942 ; --BUGGY NEW--
 943 ; --BUGGY NEW-- shl     eax,26
 944 ; --BUGGY NEW-- shld    ebx,eax,6       ; esi = v*64+u
 945 ; --BUGGY NEW--
 946 ; --BUGGY NEW--; read 1  pixel
 947 ; --BUGGY NEW-- xor     eax, eax
 948 ; --BUGGY NEW-- mov     al, es:[ebx]    ; get pixel from source bitmap
 949 ; --BUGGY NEW--
 950 ; --BUGGY NEW--; lighting code
 951 ; --BUGGY NEW-- mov     ebx, _fx_l      ; get temp copy of lighting value
 952 ; --BUGGY NEW-- mov     ah, bh  ; get lighting level
 953 ; --BUGGY NEW-- add     ebx, _fx_dl_dx  ; update lighting value
 954 ; --BUGGY NEW-- mov     _fx_l, ebx      ; save temp copy of lighting value
 955 ; --BUGGY NEW--
 956 ; --BUGGY NEW--; transparency check
 957 ; --BUGGY NEW-- cmp     al,255
 958 ; --BUGGY NEW-- je      NDEP_skip2      ; this pixel is transparent, so don't write it (or light it)
 959 ; --BUGGY NEW--
 960 ; --BUGGY NEW-- mov     al, _gr_fade_table[eax] ; xlat pixel thru lighting tables
 961 ; --BUGGY NEW--
 962 ; --BUGGY NEW--; write 1 pixel
 963 ; --BUGGY NEW-- mov     [edi],al
 964 ; --BUGGY NEW--NDEP_skip2:      inc     edi
 965 ; --BUGGY NEW--
 966 ; --BUGGY NEW--; update deltas
 967 ; --BUGGY NEW-- add     ebp,_fx_dv_dx
 968 ; --BUGGY NEW-- add     esi,_fx_du_dx
 969 ; --BUGGY NEW-- add     ecx,_fx_dz_dx
 970 ; --BUGGY NEW-- je      _div_0_abort    ; would be dividing by 0, so abort
 971 ; --BUGGY NEW--
 972 ; --BUGGY NEW-- dec     num_left_over
 973 ; --BUGGY NEW-- jne     NDEP_loop
 974 ; --BUGGY NEW--
 975 ; --BUGGY NEW-- jmp     _none_to_do
 976
 977 NewDoEndPixels:
 978         mov     esi,ebx
 979
 980         align   4
 981 NDEP_loop:
 982
 983 ; compute v coordinate
 984         mov     eax, ebp        ; get v
 985         mov     edx, eax
 986         sar     edx, 31
 987         idiv    ecx     ; eax = (v/z)
 988
 989         and     eax,3fh ; mask with height-1
 990         mov     ebx,eax
 991
 992 ; compute u coordinate
 993         mov     eax,    esi     ; get u
 994         mov     edx, eax
 995         sar     edx, 31
 996         idiv    ecx     ; eax = (u/z)
 997
 998         shl     eax,26
 999         shld    ebx,eax,6       ; esi = v*64+u
1000
1001 ; read 1  pixel
1002         add     ebx,[_pixptr]
1003         xor     eax, eax
1004         mov     al, [ebx]    ; get pixel from source bitmap
1005
1006 ; lighting code
1007         mov     ebx, [_fx_l]    ; get temp copy of lighting value
1008         mov     ah, bh  ; get lighting level
1009         add     ebx, [_fx_dl_dx]        ; update lighting value
1010         mov     [_fx_l], ebx    ; save temp copy of lighting value
1011
1012 ; transparency check
1013         cmp     al,255
1014         je      NDEP_skip2      ; this pixel is transparent, so don't write it (or light it)
1015
1016         mov     al, [_gr_fade_table+eax]        ; xlat pixel thru lighting tables
1017
1018 ; write 1 pixel
1019         mov     [edi],al
1020 NDEP_skip2:     inc     edi
1021
1022 ; update deltas
1023         add     ebp,[_fx_dv_dx]
1024         add     esi,[_fx_du_dx]
1025         add     ecx,[_fx_dz_dx]
1026         je      near _div_0_abort    ; would be dividing by 0, so abort
1027
1028         dec     dword [num_left_over]
1029         jne     NDEP_loop
1030
1031         jmp     _none_to_do
1032
1033 ; ==================================================== No Lighting Code ======================================================
1034 global _tmap_loop_fast_nolight
1035 _tmap_loop_fast_nolight:
1036         mov     esi,ebx
1037
1038         align   4
1039 NotDwordAligned1_nolight:
1040         test    edi, 11b
1041         jz      DwordAligned1_nolight
1042
1043 ; compute v coordinate
1044         mov     eax,ebp ; get v
1045         mov     edx, eax
1046         sar     edx, 31
1047         idiv    ecx     ; eax = (v/z)
1048
1049         and     eax,3fh ; mask with height-1
1050         mov     ebx,eax
1051
1052 ; compute u coordinate
1053         mov     eax, esi        ; get u
1054         mov     edx, eax
1055         sar     edx, 31
1056         idiv    ecx     ; eax = (u/z)
1057
1058         shl     eax,26
1059         shld    ebx,eax,6       ; esi = v*64+u
1060
1061 ; read 1  pixel
1062         add     ebx,[_pixptr]
1063         mov     al,[ebx]     ; get pixel from source bitmap
1064
1065 ; write 1 pixel
1066         cmp     al,255
1067         je      skip6
1068         mov     [edi],al
1069 skip6:  inc     edi
1070
1071 ; update deltas
1072         add     ebp,[_fx_dv_dx]
1073         add     esi,[_fx_du_dx]
1074         add     ecx,[_fx_dz_dx]
1075         je      near _div_0_abort    ; would be dividing by 0, so abort
1076
1077         dec     dword [_loop_count]
1078         jns     NotDwordAligned1_nolight
1079         jmp     _none_to_do
1080
1081 DwordAligned1_nolight:
1082         mov     ebx,esi
1083
1084         mov     eax, [_loop_count]
1085         inc     eax
1086         mov     [num_left_over], eax
1087         shr     eax, NBITS
1088
1089         test    eax, -1
1090         je      near tmap_loop       ; no 2^NBITS chunks, do divide/pixel for whole scanline
1091
1092         mov     [_loop_count], eax      ; _loop_count = pixels / NPIXS
1093         shl     eax, NBITS
1094         sub     [num_left_over], eax    ; num_left_over = obvious
1095
1096 ; compute initial v coordinate
1097         mov     eax,ebp ; get v
1098         ;PDIV
1099         mov     edx,eax
1100         shl     eax,ZSHIFT
1101         sar     edx,32-ZSHIFT
1102         idiv    ecx     ; eax = (v/z)
1103         shl     eax, 16-ZSHIFT
1104
1105         mov     [V0], eax
1106
1107 ; compute initial u coordinate
1108         mov     eax,ebx ; get u
1109         ;PDIV
1110         mov     edx,eax
1111         shl     eax,ZSHIFT
1112         sar     edx,32-ZSHIFT
1113         idiv    ecx     ; eax = (v/z)
1114         shl     eax, 16-ZSHIFT
1115
1116         mov     [U0], eax
1117
1118 ; Set deltas to NPIXS pixel increments
1119         mov     eax, [_fx_du_dx]
1120         shl     eax, NBITS
1121         mov     [DU1], eax
1122         mov     eax, [_fx_dv_dx]
1123         shl     eax, NBITS
1124         mov     [DV1], eax
1125         mov     eax, [_fx_dz_dx]
1126         shl     eax, NBITS
1127         mov     [DZ1], eax
1128
1129         align   4
1130 TopOfLoop4_nolight:
1131         add     ebx, [DU1]
1132         add     ebp, [DV1]
1133         add     ecx, [DZ1]
1134         je      near _div_0_abort
1135
1136 ; Done with ebx, ebp, ecx until next iteration
1137         push    ebx
1138         push    ecx
1139         push    ebp
1140         push    edi
1141
1142 ; Find fixed U1
1143         mov     eax, ebx
1144         ;PDIV
1145         mov     edx,eax
1146         shl     eax,ZSHIFT
1147         sar     edx,32-ZSHIFT
1148         idiv    ecx     ; eax = (v/z)
1149         shl     eax, 16-ZSHIFT
1150
1151         mov     ebx, eax        ; ebx = U1 until pop's
1152
1153 ; Find fixed V1
1154         mov     eax, ebp
1155         ;PDIV
1156         mov     edx,eax
1157         shl     eax,ZSHIFT
1158         sar     edx,32-ZSHIFT
1159         idiv    ecx     ; eax = (v/z)
1160         shl     eax, 16-ZSHIFT
1161
1162         mov     ebp, eax        ; ebp = V1 until pop's
1163
1164         mov     ecx, [U0]       ; ecx = U0 until pop's
1165         mov     edi, [V0]       ; edi = V0 until pop's
1166
1167 ; Make ESI =  V0:U0 in 6:10,6:10 format
1168         mov     eax, ecx
1169         shr     eax, 6
1170         mov     esi, edi
1171         shl     esi, 10
1172         mov     si, ax
1173
1174 ; Make EDX = DV:DU in 6:10,6:10 format
1175         mov     eax, ebx
1176         sub     eax, ecx
1177         sar     eax, NBITS+6
1178         mov     edx, ebp
1179         sub     edx, edi
1180         shl     edx, 10-NBITS   ; EDX = V1-V0/ 4 in 6:10 int:frac
1181         mov     dx, ax  ; put delta u in low word
1182
1183 ; Save the U1 and V1 so we don't have to divide on the next iteration
1184         mov     [U0], ebx
1185         mov     [V0], ebp
1186
1187         pop     edi     ; Restore EDI before using it
1188
1189 %macro repproc4 0
1190 ; Do 1 pixel
1191         mov     eax, esi        ; get u,v
1192         shr     eax, 26 ; shift out all but int(v)
1193         shld    ax,si,6 ; shift in u, shifting up v
1194         add     esi, edx        ; inc u,v
1195         add     eax,[_pixptr]
1196         mov     cl, [eax]    ; load into buffer register
1197
1198         mov     eax, esi        ; get u,v
1199         shr     eax, 26 ; shift out all but int(v)
1200         shld    ax,si,6 ; shift in u, shifting up v
1201         add     eax,[_pixptr]
1202         mov     ch, [eax]    ; load into buffer register
1203         add     esi, edx        ; inc u,v
1204         ror     ecx, 16 ; move to next dest pixel
1205
1206         mov     eax, esi        ; get u,v
1207         shr     eax, 26 ; shift out all but int(v)
1208         shld    ax,si,6 ; shift in u, shifting up v
1209         add     eax,[_pixptr]
1210         mov     cl, [eax]    ; load into buffer register
1211         add     esi, edx        ; inc u,v
1212
1213         mov     eax, esi        ; get u,v
1214         shr     eax, 26 ; shift out all but int(v)
1215         shld    ax,si,6 ; shift in u, shifting up v
1216         add     eax,[_pixptr]
1217         mov     ch, [eax]    ; load into buffer register
1218         add     esi, edx        ; inc u,v
1219         ror     ecx, 16 ;-- can get rid of this, just write in different order below --         ; move to next dest pixel
1220
1221         test    dword [_Transparency_on],-1
1222         je      %%no_trans2
1223         cmp     ecx,-1
1224         je      %%skip7
1225
1226         cmp     cl,255
1227         je      %%skip1q
1228         mov     [edi],cl
1229 %%skip1q:
1230
1231         cmp     ch,255
1232         je      %%skip2q
1233         mov     [edi+1],ch
1234 %%skip2q:
1235         ror     ecx,16
1236
1237         cmp     cl,255
1238         je      %%skip3q
1239         mov     [edi+2],cl
1240 %%skip3q:
1241
1242
1243         cmp     ch,255
1244         je      %%skip4q
1245         mov     [edi+3],ch
1246 %%skip4q:
1247
1248         jmp     %%skip7
1249 %%no_trans2:
1250         mov     [edi],ecx       ; Draw 4 pixels to display
1251 %%skip7:        add     edi,4
1252 %endmacro
1253
1254 %rep (1 << (NBITS-2))
1255         ;local  skip7, no_trans2, skip1q, skip2q, skip3q, skip4q
1256     repproc4
1257
1258 %endrep
1259
1260         pop     ebp
1261         pop     ecx
1262         pop     ebx
1263         dec     dword [_loop_count]
1264         jnz     near TopOfLoop4_nolight
1265
1266 EndOfLoop4_nolight:
1267
1268         test    dword [num_left_over], -1
1269         je      near _none_to_do
1270
1271 DoEndPixels_nolight:
1272         add     ebx, [DU1]
1273         add     ebp, [DV1]
1274         add     ecx, [DZ1]
1275         je      near _div_0_abort
1276         push    edi     ; use edi as a temporary variable
1277
1278 ; Find fixed U1
1279         mov     eax, ebx
1280         mov     edx,eax
1281         shl     eax,ZSHIFT
1282         sar     edx,32-ZSHIFT
1283         idiv    ecx     ; eax = (v/z)
1284         shl     eax, 16-ZSHIFT
1285         mov     ebx, eax        ; ebx = U1 until pop's
1286
1287 ; Find fixed V1
1288         mov     eax, ebp
1289         mov     edx,eax
1290         shl     eax,ZSHIFT
1291         sar     edx,32-ZSHIFT
1292         idiv    ecx     ; eax = (v/z)
1293         shl     eax, 16-ZSHIFT
1294         mov     ebp, eax        ; ebp = V1 until pop's
1295
1296         mov     ecx, [U0]       ; ecx = U0 until pop's
1297         mov     edi, [V0]       ; edi = V0 until pop's
1298
1299 ; Make ESI =  V0:U0 in 6:10,6:10 format
1300         mov     eax, ecx
1301         shr     eax, 6
1302         mov     esi, edi
1303         shl     esi, 10
1304         mov     si, ax
1305
1306 ; Make EDX = DV:DU in 6:10,6:10 format
1307         mov     eax, ebx
1308         sub     eax, ecx
1309         sar     eax, NBITS+6
1310         mov     edx, ebp
1311         sub     edx, edi
1312         shl     edx, 10-NBITS   ; EDX = V1-V0/ 4 in 6:10 int:frac
1313         mov     dx, ax  ; put delta u in low word
1314
1315         pop     edi     ; Restore EDI before using it
1316
1317         mov     ecx, [num_left_over]
1318
1319 %assign ITERATION 0
1320 %macro repproc5 0
1321 ; Do 1 pixel
1322         mov     eax, esi        ; get u,v
1323         shr     eax, 26 ; shift out all but int(v)
1324         shld    ax,si,6 ; shift in u, shifting up v
1325         add     eax,[_pixptr]
1326         movzx   eax, byte [eax]    ; load into buffer register
1327         add     esi, edx        ; inc u,v
1328         cmp     al,255
1329         je      %%skip8
1330         mov     [edi+ITERATION], al     ; write pixel
1331 %%skip8:        dec     ecx
1332         jz      near _none_to_do
1333 %endmacro
1334
1335 %rep (1 << NBITS)
1336         ;local  skip8
1337         repproc5
1338 %assign ITERATION  ITERATION + 1
1339 %endrep
1340
1341 ; Should never get here!!!!!
1342         int     3
1343         jmp     _none_to_do
1344