texmap/tmap_per.asm

   1 ;THE COMPUTER CODE CONTAINED HEREIN IS THE SOLE PROPERTY OF PARALLAX
   2 ;SOFTWARE CORPORATION ("PARALLAX").  PARALLAX, IN DISTRIBUTING THE CODE TO
   3 ;END-USERS, AND SUBJECT TO ALL OF THE TERMS AND CONDITIONS HEREIN, GRANTS A
   4 ;ROYALTY-FREE, PERPETUAL LICENSE TO SUCH END-USERS FOR USE BY SUCH END-USERS
   5 ;IN USING, DISPLAYING,  AND CREATING DERIVATIVE WORKS THEREOF, SO LONG AS
   6 ;SUCH USE, DISPLAY OR CREATION IS FOR NON-COMMERCIAL, ROYALTY OR REVENUE
   7 ;FREE PURPOSES.  IN NO EVENT SHALL THE END-USER USE THE COMPUTER CODE
   8 ;CONTAINED HEREIN FOR REVENUE-BEARING PURPOSES.  THE END-USER UNDERSTANDS
   9 ;AND AGREES TO THE TERMS HEREIN AND ACCEPTS THE SAME BY USE OF THIS FILE.
  10 ;COPYRIGHT 1993-1998 PARALLAX SOFTWARE CORPORATION.  ALL RIGHTS RESERVED.
  11 ;
  12 ; $Source: /cvs/cvsroot/d2x/texmap/tmap_per.asm,v $
  13 ; $Revision: 1.1.1.1 $
  14 ; $Author: bradleyb $
  15 ; $Date: 2001-01-19 03:30:16 $
  16 ;
  17 ; Perspective texture mapper inner loop.
  18 ;
  19 ; $Log: not supported by cvs2svn $
  20 ; Revision 1.1.1.1  1999/06/14 22:14:01  donut
  21 ; Import of d1x 1.37 source.
  22 ;
  23 ; Revision 1.26  1995/02/20  18:22:55  john
  24 ; Put all the externs in the assembly modules into tmap_inc.asm.
  25 ; Also, moved all the C versions of the inner loops into a new module,
  26 ; scanline.c.
  27 ;
  28 ; Revision 1.25  1995/02/20  17:09:08  john
  29 ; Added code so that you can build the tmapper with no assembly!
  30 ;
  31 ; Revision 1.24  1995/01/10  09:32:07  mike
  32 ; mostly fix garbage at end of scanline, but slow down by 1-4%.
  33 ;
  34 ; Revision 1.23  1994/12/02  23:29:57  mike
  35 ; optimizations.
  36 ;
  37 ; Revision 1.22  1994/11/30  00:57:00  mike
  38 ; optimization.
  39 ;
  40 ; Revision 1.21  1994/11/21  13:57:42  mike
  41 ; fix right side shear bug
  42 ;
  43 ; Revision 1.20  1994/11/12  16:41:09  mike
  44 ; jae -> ja.
  45 ;
  46 ; Revision 1.19  1994/10/27  19:40:00  john
  47 ; Made lighting table lookup be _gr_fade_table[eax] instead
  48 ; of fs:[eax], which gets rig of a segment override that
  49 ; supposedly costs 1 clock on a 486.  Mainly, I wanted to verify
  50 ; that the only reason we need selectors is for the source texture
  51 ; data .
  52 ;
  53 ; Revision 1.18  1994/05/03  11:08:32  mike
  54 ; Trap divide overflows.
  55 ;
  56 ; Revision 1.17  1994/04/21  15:03:41  mike
  57 ; make faster.
  58 ;
  59 ; Revision 1.16  1994/04/08  16:46:57  john
  60 ; Made 32 fade levels. Hacked.
  61 ;
  62 ; Revision 1.15  1994/03/31  08:35:18  mike
  63 ; Fix quantized-by-4 bug in inner loop.
  64 ;
  65 ; Revision 1.14  1994/03/14  17:41:14  mike
  66 ; Fix bug in unlighted version.
  67 ;
  68 ; Revision 1.13  1994/03/14  15:45:14  mike
  69 ; streamline code.
  70 ;
  71 ; Revision 1.12  1994/01/14  14:01:58  mike
  72 ; *** empty log message ***
  73 ;
  74 ; Revision 1.11  1993/12/18  14:43:44  john
  75 ; Messed around with doing 1/z, the u*(1/z) and v*(1/z)
  76 ; (Went from 23 fps to 21 fps... not good! )
  77 ;
  78 ; Revision 1.10  1993/12/17  16:14:17  john
  79 ; Split lighted/nonlighted, so there is no cmp lighting
  80 ; in the inner loop.
  81 ;
  82 ; Revision 1.9  1993/12/17  12:34:29  john
  83 ; Made leftover bytes use linear approx instead of correct...
  84 ; should save about 8 divides per scanline on average.
  85 ; Also, took out anti-aliasing code and rearranged to
  86 ; order of some instructions to help on 486 pipelining.
  87 ; (The anti-aliasing code did *not* look good, so I
  88 ; figure there was no reason to keep it in. )
  89 ;
  90 ; Revision 1.8  1993/12/16  18:37:52  mike
  91 ; Align some stuff on 4 byte boundaries.
  92 ;
  93 ; Revision 1.7  1993/11/30  08:44:18  john
  94 ; Made selector set check for < 64*64 bitmaps.
  95 ;
  96 ; Revision 1.6  1993/11/23  17:25:26  john
  97 ; Added safety "and eax, 0fffh" in lighting lookup.
  98 ;
  99 ; Revision 1.5  1993/11/23  15:08:52  mike
 100 ; Fixed lighting bug.
 101 ;
 102 ; Revision 1.4  1993/11/23  14:38:50  john
 103 ; optimized NORMAL code by switching EBX and ESI, so BH can be used in
 104 ; the lighting process.
 105 ;
 106 ; Revision 1.3  1993/11/23  14:30:53  john
 107 ; Made the perspective tmapper do 1/8 divides; added lighting.
 108 ;
 109 ; Revision 1.2  1993/11/22  10:24:59  mike
 110 ; *** empty log message ***
 111 ;
 112 ; Revision 1.1  1993/09/08  17:29:53  mike
 113 ; Initial revision
 114 ;
 115 ;
 116 ;
 117
 118 [BITS 32]
 119
 120 global  _asm_tmap_scanline_per
 121 global  asm_tmap_scanline_per
 122
 123 %include        "tmap_inc.asm"
 124
 125 [SECTION .data]
 126 align 4
 127     ;extern _per2_flag;:dword
 128 %ifdef __ENV_LINUX__
 129 ; Cater for linux ELF compilers...
 130 global x
 131 %define _loop_count loop_count
 132 %define _new_end new_end
 133 %define _scan_doubling_flag scan_doubling_flag
 134 %define _linear_if_far_flag linear_if_far_flag
 135 %endif
 136
 137         global _x
 138         global _loop_count
 139         global _new_end
 140         global _scan_doubling_flag
 141         global _linear_if_far_flag
 142
 143 ;        global _max_ecx
 144 ;        global _min_ecx
 145
 146     mem_edx dd 0
 147     x:
 148     _x          dd      0
 149     _loop_count dd      0
 150
 151 ;    _max_ecx    dd      0
 152 ;    _min_ecx    dd      55555555h
 153     _new_end     dd      1       ; if set, use new, but slower, way of finishing off extra pixels on scanline, 01/10/95 --MK
 154
 155     _scan_doubling_flag dd 0
 156     _linear_if_far_flag dd 0
 157
 158 ;---------- local variables
 159 align 4
 160     req_base    dd      0
 161     req_size    dd      0
 162     U0          dd      0
 163     U1          dd      0
 164     V0          dd      0
 165     V1          dd      0
 166     num_left_over   dd  0
 167     DU1         dd      0
 168     DV1         dd      0
 169     DZ1         dd      0
 170
 171 [SECTION .text]
 172
 173 ; --------------------------------------------------------------------------------------------------
 174 ; Enter:
 175 ;       _xleft  fixed point left x coordinate
 176 ;       _xright fixed point right x coordinate
 177 ;       _y      fixed point y coordinate
 178 ;       _pixptr address of source pixel map
 179 ;       _u      fixed point initial u coordinate
 180 ;       _v      fixed point initial v coordinate
 181 ;       _z      fixed point initial z coordinate
 182 ;       _du_dx  fixed point du/dx
 183 ;       _dv_dx  fixed point dv/dx
 184 ;       _dz_dx  fixed point dz/dx
 185
 186 ;   for (x = (int) xleft; x <= (int) xright; x++) {
 187 ;      _setcolor(read_pixel_from_tmap(srcb,((int) (u/z)) & 63,((int) (v/z)) & 63));
 188 ;      _setpixel(x,y);
 189 ;
 190 ;      u += du_dx;
 191 ;      v += dv_dx;
 192 ;      z += dz_dx;
 193 ;   }
 194
 195
 196 align   16
 197 _asm_tmap_scanline_per:
 198 asm_tmap_scanline_per:
 199 ;        push    es
 200         pusha
 201
 202 ;---------------------------- setup for loop ---------------------------------
 203 ; Setup for loop:       _loop_count  iterations = (int) xright - (int) xleft
 204 ;       esi     source pixel pointer = pixptr
 205 ;       edi     initial row pointer = y*320+x
 206 ; NOTE: fx_xright and fx_xleft changed from fix to int by mk on 12/01/94.
 207
 208 ; set esi = pointer to start of texture map data
 209
 210 ; set edi = address of first pixel to modify
 211         mov     edi,[_fx_y]
 212 ;        mov     es,[_pixel_data_selector]       ; selector[0*2]
 213
 214         mov     edi,[_y_pointers+edi*4]
 215
 216         mov     ebx,[_fx_xleft]
 217         test    ebx, ebx
 218         jns     ebx_ok
 219         xor     ebx, ebx
 220 ebx_ok: add     edi,[_write_buffer]
 221         add     edi,ebx
 222
 223 ; set _loop_count = # of iterations
 224         mov     eax,[_fx_xright]
 225         sub     eax,ebx
 226         js      near _none_to_do
 227         mov     [_loop_count],eax
 228
 229 ; lighting values are passed in fixed point, but need to be in 8 bit integer, 8 bit fraction so we can easily
 230 ; get the integer by reading %bh
 231         sar     dword [_fx_l], 8
 232         sar     dword [_fx_dl_dx],8
 233         jns     dl_dx_ok
 234         inc     dword [_fx_dl_dx]       ; round towards 0 for negative deltas
 235 dl_dx_ok:
 236
 237 ; set initial values
 238         mov     ebx,[_fx_u]
 239         mov     ebp,[_fx_v]
 240         mov     ecx,[_fx_z]
 241
 242         test    dword [_per2_flag],-1
 243         je      tmap_loop
 244
 245         test    dword [_Lighting_on], -1
 246         je     near _tmap_loop_fast_nolight
 247         jmp     _tmap_loop_fast
 248 ;tmap_loop_fast_nolight_jumper:
 249 ;    jmp tmap_loop_fast_nolight
 250
 251 ;================ PERSPECTIVE TEXTURE MAP INNER LOOPS ========================
 252 ;
 253 ; Usage in loop:        eax     division, pixel value
 254 ;       ebx     u
 255 ;       ecx     z
 256 ;       edx     division
 257 ;       ebp     v
 258 ;       esi     source pixel pointer
 259 ;       edi     destination pixel pointer
 260
 261 ;-------------------- NORMAL PERSPECTIVE TEXTURE MAP LOOP -----------------
 262 tmap_loop:
 263         mov     esi, ebx        ; esi becomes u coordinate
 264
 265         align   4
 266 tmap_loop0:
 267
 268 ; compute v coordinate
 269         mov     eax, ebp        ; get v
 270         mov     edx, eax
 271         sar     edx, 31
 272         idiv    ecx     ; eax = (v/z)
 273
 274         and     eax,3fh ; mask with height-1
 275         mov     ebx,eax
 276
 277 ; compute u coordinate
 278         mov     eax, esi        ; get u
 279         mov     edx, eax
 280         sar     edx, 31
 281         idiv    ecx     ; eax = (u/z)
 282
 283         shl     eax,26
 284         shld    ebx,eax,6       ; esi = v*64+u
 285
 286 ; read 1 pixel
 287         add     ebx, [_pixptr]
 288         xor     eax, eax
 289         test    dword [_Lighting_on], -1
 290         mov     al, [ebx]    ; get pixel from source bitmap
 291         je      NoLight1
 292
 293 ; LIGHTING CODE
 294         mov     ebx, [_fx_l]    ; get temp copy of lighting value
 295         mov     ah, bh  ; get lighting level
 296         add     ebx, [_fx_dl_dx]        ; update lighting value
 297         mov     al, [_gr_fade_table+eax]        ; xlat pixel thru lighting tables
 298         mov     [_fx_l], ebx    ; save temp copy of lighting value
 299
 300 ; transparency check
 301 NoLight1:       cmp     al,255
 302         je      skip1
 303
 304         mov     [edi],al
 305 skip1:  inc     edi
 306
 307 ; update deltas
 308         add     ebp,[_fx_dv_dx]
 309         add     esi,[_fx_du_dx]
 310         add     ecx,[_fx_dz_dx]
 311         je      _div_0_abort    ; would be dividing by 0, so abort
 312
 313         dec     dword [_loop_count]
 314         jns     tmap_loop0
 315
 316 _none_to_do:
 317         popa
 318 ;        pop     es
 319         ret
 320
 321 ; We detected a z=0 condition, which seems pretty bogus, don't you think?
 322 ; So, we abort, but maybe we want to know about it.
 323 _div_0_abort:
 324         jmp     _none_to_do
 325
 326 ;-------------------------- PER/4 TMAPPER ----------------
 327 ;
 328 ;       x = x1
 329 ;       U0 = u/w; V0 = v/w;
 330 ;       while ( 1 )
 331 ;               u += du_dx*4; v+= dv_dx*4
 332 ;               U1 = u/w; V1 = v/w;
 333 ;               DUDX = (U1-U0)/4; DVDX = (V1-V0)/4;
 334 ;
 335 ;       ; Pixel 0
 336 ;               pixels = texmap[V0*64+U0];
 337 ;               U0 += DUDX; V0 += DVDX
 338 ;       ; Pixel 1
 339 ;               pixels = (pixels<<8)+texmap[V0*64+U0];
 340 ;               U0 += DUDX; V0 += DVDX
 341 ;       ; Pixel 2
 342 ;               pixels = (pixels<<8)+texmap[V0*64+U0];
 343 ;               U0 += DUDX; V0 += DVDX
 344 ;       ; Pixel 3
 345 ;               pixels = (pixels<<8)+texmap[V0*64+U0];
 346 ;
 347 ;               screen[x] = pixel
 348 ;               x += 4;
 349 ;               U0 = U1; V0 = V1
 350
 351 NBITS equ 4     ; 2^NBITS pixels plotted per divide
 352 ZSHIFT equ 4    ; precision used in PDIV macro
 353
 354
 355 ;PDIV MACRO
 356 ; Returns EAX/ECX in 16.16 format in EAX. Trashes EDX
 357 ;          sig bits   6.3
 358 ;       mov     edx,eax
 359 ;       shl     eax,ZSHIFT
 360 ;       sar     edx,32-ZSHIFT
 361 ;       idiv    ecx     ; eax = (v/z)
 362 ;   shl eax, 16-ZSHIFT
 363 ;ENDM
 364
 365 global _tmap_loop_fast
 366
 367 ; -------------------------------------- Start of Getting Dword Aligned ----------------------------------------------
 368 ;       ebx     fx_u
 369
 370 _tmap_loop_fast:
 371         mov     esi,ebx
 372
 373         align   4
 374 NotDwordAligned1:
 375         test    edi, 11b
 376         jz      DwordAligned1
 377
 378 ; compute v coordinate
 379         mov     eax, ebp        ; get v
 380         mov     edx, eax
 381         sar     edx, 31
 382         idiv    ecx     ; eax = (v/z)
 383
 384         and     eax,3fh ; mask with height-1
 385         mov     ebx,eax
 386
 387 ; compute u coordinate
 388         mov     eax, esi        ; get u
 389         mov     edx, eax
 390         sar     edx, 31
 391         idiv    ecx     ; eax = (u/z)
 392
 393         shl     eax,26
 394         shld    ebx,eax,6       ; esi = v*64+u
 395
 396 ; read 1  pixel
 397         add     ebx,[_pixptr]
 398         xor     eax, eax
 399         mov     al, [ebx]    ; get pixel from source bitmap
 400
 401 ; lighting code
 402         mov     ebx, [_fx_l]    ; get temp copy of lighting value
 403         mov     ah, bh  ; get lighting level
 404         add     ebx, [_fx_dl_dx]        ; update lighting value
 405         mov     [_fx_l], ebx    ; save temp copy of lighting value
 406
 407 ; transparency check
 408         cmp     al,255
 409         je      skip2   ; this pixel is transparent, so don't write it (or light it)
 410
 411         mov     al, [_gr_fade_table+eax]        ; xlat pixel thru lighting tables
 412
 413 ; write 1 pixel
 414         mov     [edi],al
 415 skip2:  inc     edi
 416
 417 ; update deltas
 418         add     ebp,[_fx_dv_dx]
 419         add     esi,[_fx_du_dx]
 420         add     ecx,[_fx_dz_dx]
 421         je      _div_0_abort    ; would be dividing by 0, so abort
 422
 423         dec     dword [_loop_count]
 424         jns     NotDwordAligned1
 425
 426         jmp     _none_to_do
 427
 428 ; -------------------------------------- End of Getting Dword Aligned ----------------------------------------------
 429
 430 DwordAligned1:
 431
 432         mov     eax, [_loop_count]
 433         mov     ebx, esi        ; get fx_u [pentium pipelining]
 434         inc     eax
 435         mov     esi, eax
 436         and     esi, (1 << NBITS) - 1
 437         sar     eax, NBITS
 438         mov     [num_left_over], esi
 439         je      near tmap_loop  ; there are no 2^NBITS chunks, do divide/pixel for whole scanline
 440         mov     [_loop_count], eax      ; _loop_count = pixels / NPIXS
 441
 442 ; compute initial v coordinate
 443         mov     eax,ebp ; get v
 444         mov     edx,ebp
 445         shl     eax,ZSHIFT
 446         sar     edx,32-ZSHIFT
 447         idiv    ecx     ; eax = (v/z)
 448         shl     eax, 16-ZSHIFT
 449         mov     [V0], eax
 450
 451 ; compute initial u coordinate
 452         mov     eax,ebx ; get u
 453         mov     edx,ebx
 454         shl     eax,ZSHIFT
 455         sar     edx,32-ZSHIFT
 456         idiv    ecx     ; eax = (v/z)
 457         shl     eax, 16-ZSHIFT
 458         mov     [U0], eax
 459
 460 ; Set deltas to NPIXS pixel increments
 461         mov     eax, [_fx_du_dx]
 462         shl     eax, NBITS
 463         mov     [DU1], eax
 464         mov     eax, [_fx_dv_dx]
 465         shl     eax, NBITS
 466         mov     [DV1], eax
 467         mov     eax, [_fx_dz_dx]
 468         shl     eax, NBITS
 469         mov     [DZ1], eax
 470
 471         align   4
 472 TopOfLoop4:
 473         add     ebx, [DU1]
 474         add     ebp, [DV1]
 475         add     ecx, [DZ1]
 476         je      near _div_0_abort       ; would be dividing by 0, so abort
 477
 478 ; Done with ebx, ebp, ecx until next iteration
 479         push    ebx
 480         push    ecx
 481         push    ebp
 482         push    edi
 483
 484 ; Find fixed U1
 485         mov     eax, ebx
 486         mov     edx,ebx
 487         shl     eax,ZSHIFT
 488         sar     edx,32-ZSHIFT
 489         idiv    ecx     ; eax = (v/z)
 490         shl     eax, 16-ZSHIFT
 491         mov     ebx, eax        ; ebx = U1 until pop's
 492
 493 ; Find fixed V1
 494         mov     eax, ebp
 495         mov     edx, ebp
 496         shl     eax,ZSHIFT
 497         sar     edx,32-ZSHIFT
 498         idiv    ecx     ; eax = (v/z)
 499
 500         mov     ecx, [U0]       ; ecx = U0 until pop's
 501         mov     edi, [V0]       ; edi = V0 until pop's
 502
 503         shl     eax, 16-ZSHIFT
 504         mov     ebp, eax        ; ebp = V1 until pop's
 505
 506 ; Make ESI =  V0:U0 in 6:10,6:10 format
 507         mov     eax, ecx
 508         shr     eax, 6
 509         mov     esi, edi
 510         shl     esi, 10
 511         mov     si, ax
 512
 513 ; Make EDX = DV:DU in 6:10,6:10 format
 514         mov     eax, ebx
 515         sub     eax, ecx
 516         sar     eax, NBITS+6
 517         mov     edx, ebp
 518         sub     edx, edi
 519         shl     edx, 10-NBITS   ; EDX = V1-V0/ 4 in 6:10 int:frac
 520         mov     dx, ax  ; put delta u in low word
 521
 522 ; Save the U1 and V1 so we don't have to divide on the next iteration
 523         mov     [U0], ebx
 524         mov     [V0], ebp
 525
 526         pop     edi     ; Restore EDI before using it
 527
 528 ; LIGHTING CODE
 529         mov     ebx, [_fx_l]
 530         mov     ebp, [_fx_dl_dx]
 531
 532         test    dword [_Transparency_on],-1
 533         je      near no_trans1
 534
 535 %macro repproc1 0
 536         mov     eax, esi        ; get u,v
 537         shr     eax, 26 ; shift out all but int(v)
 538         shld    ax,si,6 ; shift in u, shifting up v
 539         add     esi, edx        ; inc u,v
 540         add     eax, [_pixptr]
 541         movzx   eax, byte [eax]    ; get pixel from source bitmap
 542         cmp     al,255
 543         je      %%skipa1
 544         mov     ah, bh  ; form lighting table lookup value
 545         add     ebx, ebp        ; update lighting value
 546         mov     al, [_gr_fade_table+eax]        ; xlat thru lighting table into dest buffer
 547         mov     [edi],al
 548 %%skipa1:
 549         inc     edi
 550
 551 ; Do odd pixel
 552         mov     eax, esi        ; get u,v
 553         shr     eax, 26 ; shift out all but int(v)
 554         shld    ax,si,6 ; shift in u, shifting up v
 555         add     esi, edx        ; inc u,v
 556         add     eax,[_pixptr]
 557         movzx   eax, byte [eax]    ; get pixel from source bitmap
 558         cmp     al,255
 559         je      %%skipa2
 560         mov     ah, bh  ; form lighting table lookup value
 561         add     ebx, ebp        ; update lighting value
 562         mov     al, [_gr_fade_table+eax]        ; xlat thru lighting table into dest buffer
 563         mov     [edi],al
 564 %%skipa2:
 565         inc     edi
 566 %endmacro
 567
 568
 569 %rep (2 << (NBITS-2))
 570 ;       local   skip3,no_trans1
 571 ;       local   skipa1,skipa2
 572     repproc1
 573 %endrep
 574
 575 jmp     cont1
 576
 577 ; -------------------------------------------------------
 578 no_trans1:
 579
 580 %macro repproc2 0
 581         mov     eax, esi        ; get u,v
 582         shr     eax, 26 ; shift out all but int(v)
 583         shld    ax,si,6 ; shift in u, shifting up v
 584         add     esi, edx        ; inc u,v
 585         add     eax,[_pixptr]
 586         movzx   eax, byte [eax]    ; get pixel from source bitmap
 587         mov     ah, bh  ; form lighting table lookup value
 588         add     ebx, ebp        ; update lighting value
 589         mov     cl, [_gr_fade_table+eax]        ; xlat thru lighting table into dest buffer
 590
 591 ; Do odd pixel
 592         mov     eax, esi        ; get u,v
 593         shr     eax, 26 ; shift out all but int(v)
 594         shld    ax,si,6 ; shift in u, shifting up v
 595         add     esi, edx        ; inc u,v
 596         add     eax,[_pixptr]
 597         movzx   eax, byte [eax]    ; get pixel from source bitmap
 598         mov     ah, bh  ; form lighting table lookup value
 599         add     ebx, ebp        ; update lighting value
 600         mov     ch, [_gr_fade_table+eax]        ; xlat thru lighting table into dest buffer
 601
 602 ; ----- This is about 1% faster than the above, and could probably be optimized more.
 603 ; ----- Problem is, it gets the u,v coordinates backwards.  What you would need to do
 604 ; ----- is switch the packing of the u,v coordinates above (about 95 lines up).
 605 ;----------;    mov     eax, esi
 606 ;----------;    shr     ax, 10
 607 ;----------;    rol     eax, 6
 608 ;----------;    mov     dx, ax
 609 ;----------;    add     esi, mem_edx
 610 ;----------;    mov     dl, es:[edx]
 611 ;----------;    mov     dh, bh
 612 ;----------;    add     ebx, ebp
 613 ;----------;    mov     cl, _gr_fade_table[edx]
 614 ;----------;
 615 ;----------;    mov     eax, esi
 616 ;----------;    shr     ax, 10
 617 ;----------;    rol     eax, 6
 618 ;----------;    mov     dx, ax
 619 ;----------;    add     esi, mem_edx
 620 ;----------;    mov     dl, es:[edx]
 621 ;----------;    mov     dh, bh
 622 ;----------;    add     ebx, ebp
 623 ;----------;    mov     ch, _gr_fade_table[edx]
 624
 625         ror     ecx, 16 ; move to next double dest pixel position
 626 %endmacro
 627
 628 %rep (1 << (NBITS-2))
 629
 630     repproc2
 631     repproc2
 632
 633         mov     [edi],ecx       ; Draw 4 pixels to display
 634         add     edi,4
 635 %endrep
 636 ;; pop edx
 637 cont1:
 638
 639 ; -------------------------------------------------------
 640
 641 ; LIGHTING CODE
 642         mov     [_fx_l], ebx
 643         pop     ebp
 644         pop     ecx
 645         pop     ebx
 646         dec     dword [_loop_count]
 647         jnz     near TopOfLoop4
 648
 649 EndOfLoop4:
 650         test    dword [num_left_over], -1
 651         je      near _none_to_do
 652
 653 ; ----------------------------------------- Start of LeftOver Pixels ------------------------------------------
 654 DoEndPixels:
 655         push    ecx
 656
 657         mov     eax, ecx
 658         lea     eax, [eax*2+eax]
 659
 660         add     ecx, [DZ1]
 661         js      notokhere
 662         shl     ecx,2
 663         cmp     eax, ecx
 664         pop     ecx
 665         jl      okhere
 666         jmp     bah_bah
 667 notokhere:
 668         pop     ecx
 669 bah_bah:
 670         test    dword [_new_end],-1
 671         jne     near NewDoEndPixels
 672 okhere:
 673
 674         add     ebx, [DU1]
 675         add     ebp, [DV1]
 676         add     ecx, [DZ1]
 677         je      near _div_0_abort
 678         jns     dep_cont
 679
 680 ; z went negative.
 681 ; this can happen because we added DZ1 to the current z, but dz1 represents dz for perhaps 16 pixels
 682 ; though we might only plot one more pixel.
 683         mov     cl, 1
 684
 685 dep_loop:       mov     eax, [DU1]
 686         sar     eax, cl
 687         sub     ebx, eax
 688
 689         mov     eax, [DV1]
 690         sar     eax, cl
 691         sub     ebp, eax
 692
 693         mov     eax, [DZ1]
 694         sar     eax, cl
 695         sub     ecx, eax
 696         je      near _div_0_abort
 697         jns     dep_cont
 698
 699         inc     cl
 700         cmp     cl, NBITS
 701         jne     dep_loop
 702
 703 dep_cont:
 704         push    edi     ; use edi as a temporary variable
 705
 706         cmp     ecx,1 << (ZSHIFT+1)
 707         jg      ecx_ok
 708         mov     ecx, 1 << (ZSHIFT+1)
 709 ecx_ok:
 710
 711 ; Find fixed U1
 712         mov     eax, ebx
 713         ;PDIV
 714         mov     edx,eax
 715         shl     eax,ZSHIFT
 716         sar     edx,32-ZSHIFT
 717         idiv    ecx     ; eax = (v/z)
 718         shl     eax, 16-ZSHIFT
 719
 720         mov     ebx, eax        ; ebx = U1 until pop's
 721
 722 ; Find fixed V1
 723         mov     eax, ebp
 724         ;PDIV
 725         mov     edx,eax
 726         shl     eax,ZSHIFT
 727         sar     edx,32-ZSHIFT
 728         idiv    ecx     ; eax = (v/z)
 729         shl     eax, 16-ZSHIFT
 730
 731         mov     ebp, eax        ; ebp = V1 until pop's
 732
 733         mov     ecx, [U0]       ; ecx = U0 until pop's
 734         mov     edi, [V0]       ; edi = V0 until pop's
 735
 736 ; Make ESI =  V0:U0 in 6:10,6:10 format
 737         mov     eax, ecx
 738         shr     eax, 6
 739         mov     esi, edi
 740         shl     esi, 10
 741         mov     si, ax
 742
 743 ; Make EDX = DV:DU in 6:10,6:10 format
 744         mov     eax, ebx
 745         sub     eax, ecx
 746         sar     eax, NBITS+6
 747         mov     edx, ebp
 748         sub     edx, edi
 749         shl     edx, 10-NBITS   ; EDX = V1-V0/ 4 in 6:10 int:frac
 750         mov     dx, ax  ; put delta u in low word
 751
 752         pop     edi     ; Restore EDI before using it
 753
 754         mov     ecx, [num_left_over]
 755
 756 ; LIGHTING CODE
 757         mov     ebx, [_fx_l]
 758         mov     ebp, [_fx_dl_dx]
 759
 760     ITERATION equ 0
 761
 762 %macro repproc3 0
 763 ; Do even pixel
 764         mov     eax, esi        ; get u,v
 765         shr     eax, 26 ; shift out all but int(v)
 766         shld    ax,si,6 ; shift in u, shifting up v
 767         add     eax,[_pixptr]
 768         movzx   eax, byte [eax]    ; get pixel from source bitmap
 769         add     esi, edx        ; inc u,v
 770         mov     ah, bh  ; form lighting table lookup value
 771         add     ebx, ebp        ; update lighting value
 772         cmp     al,255
 773         je      %%skip4
 774         mov     al, [_gr_fade_table+eax]        ; xlat thru lighting table into dest buffer
 775         mov     [edi+ITERATION], al     ; write pixel
 776 %%skip4:        dec     ecx
 777         jz      near _none_to_do
 778
 779 ; Do odd pixel
 780         mov     eax, esi        ; get u,v
 781         shr     eax, 26 ; shift out all but int(v)
 782         shld    ax,si,6 ; shift in u, shifting up v
 783         add     eax,[_pixptr]
 784         movzx   eax, byte [eax]    ; get pixel from source bitmap
 785         add     esi, edx        ; inc u,v
 786         mov     ah, bh  ; form lighting table lookup value
 787         add     ebx, [_fx_dl_dx]        ; update lighting value
 788         cmp     al,255
 789         je      %%skip5
 790         mov     al, [_gr_fade_table+eax]        ; xlat thru lighting table into dest buffer
 791         mov     [edi+ITERATION+1], al   ; write pixel
 792 %%skip5:        dec     ecx
 793         jz      near _none_to_do
 794 %endmacro
 795
 796 %rep (1 << (NBITS-1))
 797         ;local  skip4, skip5
 798     repproc3
 799 %assign ITERATION  ITERATION + 2
 800
 801 %endrep
 802
 803 ; Should never get here!!!!
 804         int     3
 805         jmp     _none_to_do
 806
 807 ; ----------------------------------------- End of LeftOver Pixels ------------------------------------------
 808
 809 ; --BUGGY NEW--NewDoEndPixels:
 810 ; --BUGGY NEW-- mov     eax, num_left_over
 811 ; --BUGGY NEW-- and     num_left_over, 3
 812 ; --BUGGY NEW-- shr     eax, 2
 813 ; --BUGGY NEW-- je      NDEP_1
 814 ; --BUGGY NEW-- mov     _loop_count, eax
 815 ; --BUGGY NEW--
 816 ; --BUGGY NEW--; do 4 pixels per hunk, not 16, so div deltas by 4 (16/4=4)
 817 ; --BUGGY NEW-- shr DU1,2
 818 ; --BUGGY NEW-- shr DV1,2
 819 ; --BUGGY NEW-- shr DZ1,2
 820 ; --BUGGY NEW--
 821 ; --BUGGY NEW--NDEP_TopOfLoop4:
 822 ; --BUGGY NEW-- add     ebx, DU1
 823 ; --BUGGY NEW-- add     ebp, DV1
 824 ; --BUGGY NEW-- add     ecx, DZ1
 825 ; --BUGGY NEW-- je      _div_0_abort    ; would be dividing by 0, so abort
 826 ; --BUGGY NEW--
 827 ; --BUGGY NEW--; Done with ebx, ebp, ecx until next iteration
 828 ; --BUGGY NEW-- push    ebx
 829 ; --BUGGY NEW-- push    ecx
 830 ; --BUGGY NEW-- push    ebp
 831 ; --BUGGY NEW-- push    edi
 832 ; --BUGGY NEW--
 833 ; --BUGGY NEW--; Find fixed U1
 834 ; --BUGGY NEW-- mov     eax, ebx
 835 ; --BUGGY NEW-- mov     edx,ebx
 836 ; --BUGGY NEW-- shl     eax,(ZSHIFT-2)
 837 ; --BUGGY NEW-- sar     edx,32-(ZSHIFT-2)
 838 ; --BUGGY NEW-- idiv    ecx     ; eax = (v/z)
 839 ; --BUGGY NEW-- shl     eax, 16-(ZSHIFT-2)
 840 ; --BUGGY NEW-- mov     ebx, eax        ; ebx = U1 until pop's
 841 ; --BUGGY NEW--
 842 ; --BUGGY NEW--; Find fixed V1
 843 ; --BUGGY NEW-- mov     eax, ebp
 844 ; --BUGGY NEW-- mov     edx, ebp
 845 ; --BUGGY NEW-- shl     eax,(ZSHIFT-2)
 846 ; --BUGGY NEW-- sar     edx,32-(ZSHIFT-2)
 847 ; --BUGGY NEW-- idiv    ecx     ; eax = (v/z)
 848 ; --BUGGY NEW--
 849 ; --BUGGY NEW-- mov     ecx, U0 ; ecx = U0 until pop's
 850 ; --BUGGY NEW-- mov     edi, V0 ; edi = V0 until pop's
 851 ; --BUGGY NEW--
 852 ; --BUGGY NEW-- shl     eax, 16-(ZSHIFT-2)
 853 ; --BUGGY NEW-- mov     ebp, eax        ; ebp = V1 until pop's
 854 ; --BUGGY NEW--
 855 ; --BUGGY NEW--; Make ESI =  V0:U0 in 6:10,6:10 format
 856 ; --BUGGY NEW-- mov     eax, ecx
 857 ; --BUGGY NEW-- shr     eax, 6
 858 ; --BUGGY NEW-- mov     esi, edi
 859 ; --BUGGY NEW-- shl     esi, 10
 860 ; --BUGGY NEW-- mov     si, ax
 861 ; --BUGGY NEW--
 862 ; --BUGGY NEW--; Make EDX = DV:DU in 6:10,6:10 format
 863 ; --BUGGY NEW-- mov     eax, ebx
 864 ; --BUGGY NEW-- sub     eax, ecx
 865 ; --BUGGY NEW-- sar     eax, (NBITS-2)+6
 866 ; --BUGGY NEW-- mov     edx, ebp
 867 ; --BUGGY NEW-- sub     edx, edi
 868 ; --BUGGY NEW-- shl     edx, 10-(NBITS-2)       ; EDX = V1-V0/ 4 in 6:10 int:frac
 869 ; --BUGGY NEW-- mov     dx, ax  ; put delta u in low word
 870 ; --BUGGY NEW--
 871 ; --BUGGY NEW--; Save the U1 and V1 so we don't have to divide on the next iteration
 872 ; --BUGGY NEW-- mov     U0, ebx
 873 ; --BUGGY NEW-- mov     V0, ebp
 874 ; --BUGGY NEW--
 875 ; --BUGGY NEW-- pop     edi     ; Restore EDI before using it
 876 ; --BUGGY NEW--
 877 ; --BUGGY NEW--; LIGHTING CODE
 878 ; --BUGGY NEW-- mov     ebx, _fx_l
 879 ; --BUGGY NEW-- mov     ebp, _fx_dl_dx
 880 ; --BUGGY NEW--
 881 ; --BUGGY NEW--;**      test    _Transparency_on,-1
 882 ; --BUGGY NEW--;**      je      NDEP_no_trans1
 883 ; --BUGGY NEW--
 884 ; --BUGGY NEW--        REPT 2
 885 ; --BUGGY NEW-- local   NDEP_skipa1, NDEP_skipa2
 886 ; --BUGGY NEW--
 887 ; --BUGGY NEW-- mov     eax, esi        ; get u,v
 888 ; --BUGGY NEW-- shr     eax, 26 ; shift out all but int(v)
 889 ; --BUGGY NEW-- shld    ax,si,6 ; shift in u, shifting up v
 890 ; --BUGGY NEW-- add     esi, edx        ; inc u,v
 891 ; --BUGGY NEW-- mov     al, es:[eax]    ; get pixel from source bitmap
 892 ; --BUGGY NEW-- cmp     al,255
 893 ; --BUGGY NEW-- je      NDEP_skipa1
 894 ; --BUGGY NEW-- mov     ah, bh  ; form lighting table lookup value
 895 ; --BUGGY NEW-- add     ebx, ebp        ; update lighting value
 896 ; --BUGGY NEW-- mov     al, _gr_fade_table[eax] ; xlat thru lighting table into dest buffer
 897 ; --BUGGY NEW-- mov     [edi],al
 898 ; --BUGGY NEW--NDEP_skipa1:
 899 ; --BUGGY NEW-- inc     edi
 900 ; --BUGGY NEW--
 901 ; --BUGGY NEW--; Do odd pixel
 902 ; --BUGGY NEW-- mov     eax, esi        ; get u,v
 903 ; --BUGGY NEW-- shr     eax, 26 ; shift out all but int(v)
 904 ; --BUGGY NEW-- shld    ax,si,6 ; shift in u, shifting up v
 905 ; --BUGGY NEW-- add     esi, edx        ; inc u,v
 906 ; --BUGGY NEW-- mov     al, es:[eax]    ; get pixel from source bitmap
 907 ; --BUGGY NEW-- cmp     al,255
 908 ; --BUGGY NEW-- je      NDEP_skipa2
 909 ; --BUGGY NEW-- mov     ah, bh  ; form lighting table lookup value
 910 ; --BUGGY NEW-- add     ebx, ebp        ; update lighting value
 911 ; --BUGGY NEW-- mov     al, _gr_fade_table[eax] ; xlat thru lighting table into dest buffer
 912 ; --BUGGY NEW-- mov     [edi],al
 913 ; --BUGGY NEW--NDEP_skipa2:
 914 ; --BUGGY NEW-- inc     edi
 915 ; --BUGGY NEW--
 916 ; --BUGGY NEW--        ENDM
 917 ; --BUGGY NEW--
 918 ; --BUGGY NEW-- mov     _fx_l, ebx
 919 ; --BUGGY NEW-- pop     ebp
 920 ; --BUGGY NEW-- pop     ecx
 921 ; --BUGGY NEW-- pop     ebx
 922 ; --BUGGY NEW-- dec     _loop_count
 923 ; --BUGGY NEW-- jnz     NDEP_TopOfLoop4
 924 ; --BUGGY NEW--
 925 ; --BUGGY NEW-- test    num_left_over, -1
 926 ; --BUGGY NEW-- je      _none_to_do
 927 ; --BUGGY NEW--
 928 ; --BUGGY NEW--NDEP_1:
 929 ; --BUGGY NEW-- mov     esi,ebx
 930 ; --BUGGY NEW--
 931 ; --BUGGY NEW-- align   4
 932 ; --BUGGY NEW--NDEP_loop:
 933 ; --BUGGY NEW--
 934 ; --BUGGY NEW--; compute v coordinate
 935 ; --BUGGY NEW-- mov     eax, ebp        ; get v
 936 ; --BUGGY NEW-- mov     edx, eax
 937 ; --BUGGY NEW-- sar     edx, 31
 938 ; --BUGGY NEW-- idiv    ecx     ; eax = (v/z)
 939 ; --BUGGY NEW--
 940 ; --BUGGY NEW-- and     eax,3fh ; mask with height-1
 941 ; --BUGGY NEW-- mov     ebx,eax
 942 ; --BUGGY NEW--
 943 ; --BUGGY NEW--; compute u coordinate
 944 ; --BUGGY NEW-- mov     eax,    esi     ; get u
 945 ; --BUGGY NEW-- mov     edx, eax
 946 ; --BUGGY NEW-- sar     edx, 31
 947 ; --BUGGY NEW-- idiv    ecx     ; eax = (u/z)
 948 ; --BUGGY NEW--
 949 ; --BUGGY NEW-- shl     eax,26
 950 ; --BUGGY NEW-- shld    ebx,eax,6       ; esi = v*64+u
 951 ; --BUGGY NEW--
 952 ; --BUGGY NEW--; read 1  pixel
 953 ; --BUGGY NEW-- xor     eax, eax
 954 ; --BUGGY NEW-- mov     al, es:[ebx]    ; get pixel from source bitmap
 955 ; --BUGGY NEW--
 956 ; --BUGGY NEW--; lighting code
 957 ; --BUGGY NEW-- mov     ebx, _fx_l      ; get temp copy of lighting value
 958 ; --BUGGY NEW-- mov     ah, bh  ; get lighting level
 959 ; --BUGGY NEW-- add     ebx, _fx_dl_dx  ; update lighting value
 960 ; --BUGGY NEW-- mov     _fx_l, ebx      ; save temp copy of lighting value
 961 ; --BUGGY NEW--
 962 ; --BUGGY NEW--; transparency check
 963 ; --BUGGY NEW-- cmp     al,255
 964 ; --BUGGY NEW-- je      NDEP_skip2      ; this pixel is transparent, so don't write it (or light it)
 965 ; --BUGGY NEW--
 966 ; --BUGGY NEW-- mov     al, _gr_fade_table[eax] ; xlat pixel thru lighting tables
 967 ; --BUGGY NEW--
 968 ; --BUGGY NEW--; write 1 pixel
 969 ; --BUGGY NEW-- mov     [edi],al
 970 ; --BUGGY NEW--NDEP_skip2:      inc     edi
 971 ; --BUGGY NEW--
 972 ; --BUGGY NEW--; update deltas
 973 ; --BUGGY NEW-- add     ebp,_fx_dv_dx
 974 ; --BUGGY NEW-- add     esi,_fx_du_dx
 975 ; --BUGGY NEW-- add     ecx,_fx_dz_dx
 976 ; --BUGGY NEW-- je      _div_0_abort    ; would be dividing by 0, so abort
 977 ; --BUGGY NEW--
 978 ; --BUGGY NEW-- dec     num_left_over
 979 ; --BUGGY NEW-- jne     NDEP_loop
 980 ; --BUGGY NEW--
 981 ; --BUGGY NEW-- jmp     _none_to_do
 982
 983 NewDoEndPixels:
 984         mov     esi,ebx
 985
 986         align   4
 987 NDEP_loop:
 988
 989 ; compute v coordinate
 990         mov     eax, ebp        ; get v
 991         mov     edx, eax
 992         sar     edx, 31
 993         idiv    ecx     ; eax = (v/z)
 994
 995         and     eax,3fh ; mask with height-1
 996         mov     ebx,eax
 997
 998 ; compute u coordinate
 999         mov     eax,    esi     ; get u
1000         mov     edx, eax
1001         sar     edx, 31
1002         idiv    ecx     ; eax = (u/z)
1003
1004         shl     eax,26
1005         shld    ebx,eax,6       ; esi = v*64+u
1006
1007 ; read 1  pixel
1008         add     ebx,[_pixptr]
1009         xor     eax, eax
1010         mov     al, [ebx]    ; get pixel from source bitmap
1011
1012 ; lighting code
1013         mov     ebx, [_fx_l]    ; get temp copy of lighting value
1014         mov     ah, bh  ; get lighting level
1015         add     ebx, [_fx_dl_dx]        ; update lighting value
1016         mov     [_fx_l], ebx    ; save temp copy of lighting value
1017
1018 ; transparency check
1019         cmp     al,255
1020         je      NDEP_skip2      ; this pixel is transparent, so don't write it (or light it)
1021
1022         mov     al, [_gr_fade_table+eax]        ; xlat pixel thru lighting tables
1023
1024 ; write 1 pixel
1025         mov     [edi],al
1026 NDEP_skip2:     inc     edi
1027
1028 ; update deltas
1029         add     ebp,[_fx_dv_dx]
1030         add     esi,[_fx_du_dx]
1031         add     ecx,[_fx_dz_dx]
1032         je      near _div_0_abort    ; would be dividing by 0, so abort
1033
1034         dec     dword [num_left_over]
1035         jne     NDEP_loop
1036
1037         jmp     _none_to_do
1038
1039 ; ==================================================== No Lighting Code ======================================================
1040 global _tmap_loop_fast_nolight
1041 _tmap_loop_fast_nolight:
1042         mov     esi,ebx
1043
1044         align   4
1045 NotDwordAligned1_nolight:
1046         test    edi, 11b
1047         jz      DwordAligned1_nolight
1048
1049 ; compute v coordinate
1050         mov     eax,ebp ; get v
1051         mov     edx, eax
1052         sar     edx, 31
1053         idiv    ecx     ; eax = (v/z)
1054
1055         and     eax,3fh ; mask with height-1
1056         mov     ebx,eax
1057
1058 ; compute u coordinate
1059         mov     eax, esi        ; get u
1060         mov     edx, eax
1061         sar     edx, 31
1062         idiv    ecx     ; eax = (u/z)
1063
1064         shl     eax,26
1065         shld    ebx,eax,6       ; esi = v*64+u
1066
1067 ; read 1  pixel
1068         add     ebx,[_pixptr]
1069         mov     al,[ebx]     ; get pixel from source bitmap
1070
1071 ; write 1 pixel
1072         cmp     al,255
1073         je      skip6
1074         mov     [edi],al
1075 skip6:  inc     edi
1076
1077 ; update deltas
1078         add     ebp,[_fx_dv_dx]
1079         add     esi,[_fx_du_dx]
1080         add     ecx,[_fx_dz_dx]
1081         je      near _div_0_abort    ; would be dividing by 0, so abort
1082
1083         dec     dword [_loop_count]
1084         jns     NotDwordAligned1_nolight
1085         jmp     _none_to_do
1086
1087 DwordAligned1_nolight:
1088         mov     ebx,esi
1089
1090         mov     eax, [_loop_count]
1091         inc     eax
1092         mov     [num_left_over], eax
1093         shr     eax, NBITS
1094
1095         test    eax, -1
1096         je      near tmap_loop       ; no 2^NBITS chunks, do divide/pixel for whole scanline
1097
1098         mov     [_loop_count], eax      ; _loop_count = pixels / NPIXS
1099         shl     eax, NBITS
1100         sub     [num_left_over], eax    ; num_left_over = obvious
1101
1102 ; compute initial v coordinate
1103         mov     eax,ebp ; get v
1104         ;PDIV
1105         mov     edx,eax
1106         shl     eax,ZSHIFT
1107         sar     edx,32-ZSHIFT
1108         idiv    ecx     ; eax = (v/z)
1109         shl     eax, 16-ZSHIFT
1110
1111         mov     [V0], eax
1112
1113 ; compute initial u coordinate
1114         mov     eax,ebx ; get u
1115         ;PDIV
1116         mov     edx,eax
1117         shl     eax,ZSHIFT
1118         sar     edx,32-ZSHIFT
1119         idiv    ecx     ; eax = (v/z)
1120         shl     eax, 16-ZSHIFT
1121
1122         mov     [U0], eax
1123
1124 ; Set deltas to NPIXS pixel increments
1125         mov     eax, [_fx_du_dx]
1126         shl     eax, NBITS
1127         mov     [DU1], eax
1128         mov     eax, [_fx_dv_dx]
1129         shl     eax, NBITS
1130         mov     [DV1], eax
1131         mov     eax, [_fx_dz_dx]
1132         shl     eax, NBITS
1133         mov     [DZ1], eax
1134
1135         align   4
1136 TopOfLoop4_nolight:
1137         add     ebx, [DU1]
1138         add     ebp, [DV1]
1139         add     ecx, [DZ1]
1140         je      near _div_0_abort
1141
1142 ; Done with ebx, ebp, ecx until next iteration
1143         push    ebx
1144         push    ecx
1145         push    ebp
1146         push    edi
1147
1148 ; Find fixed U1
1149         mov     eax, ebx
1150         ;PDIV
1151         mov     edx,eax
1152         shl     eax,ZSHIFT
1153         sar     edx,32-ZSHIFT
1154         idiv    ecx     ; eax = (v/z)
1155         shl     eax, 16-ZSHIFT
1156
1157         mov     ebx, eax        ; ebx = U1 until pop's
1158
1159 ; Find fixed V1
1160         mov     eax, ebp
1161         ;PDIV
1162         mov     edx,eax
1163         shl     eax,ZSHIFT
1164         sar     edx,32-ZSHIFT
1165         idiv    ecx     ; eax = (v/z)
1166         shl     eax, 16-ZSHIFT
1167
1168         mov     ebp, eax        ; ebp = V1 until pop's
1169
1170         mov     ecx, [U0]       ; ecx = U0 until pop's
1171         mov     edi, [V0]       ; edi = V0 until pop's
1172
1173 ; Make ESI =  V0:U0 in 6:10,6:10 format
1174         mov     eax, ecx
1175         shr     eax, 6
1176         mov     esi, edi
1177         shl     esi, 10
1178         mov     si, ax
1179
1180 ; Make EDX = DV:DU in 6:10,6:10 format
1181         mov     eax, ebx
1182         sub     eax, ecx
1183         sar     eax, NBITS+6
1184         mov     edx, ebp
1185         sub     edx, edi
1186         shl     edx, 10-NBITS   ; EDX = V1-V0/ 4 in 6:10 int:frac
1187         mov     dx, ax  ; put delta u in low word
1188
1189 ; Save the U1 and V1 so we don't have to divide on the next iteration
1190         mov     [U0], ebx
1191         mov     [V0], ebp
1192
1193         pop     edi     ; Restore EDI before using it
1194
1195 %macro repproc4 0
1196 ; Do 1 pixel
1197         mov     eax, esi        ; get u,v
1198         shr     eax, 26 ; shift out all but int(v)
1199         shld    ax,si,6 ; shift in u, shifting up v
1200         add     esi, edx        ; inc u,v
1201         add     eax,[_pixptr]
1202         mov     cl, [eax]    ; load into buffer register
1203
1204         mov     eax, esi        ; get u,v
1205         shr     eax, 26 ; shift out all but int(v)
1206         shld    ax,si,6 ; shift in u, shifting up v
1207         add     eax,[_pixptr]
1208         mov     ch, [eax]    ; load into buffer register
1209         add     esi, edx        ; inc u,v
1210         ror     ecx, 16 ; move to next dest pixel
1211
1212         mov     eax, esi        ; get u,v
1213         shr     eax, 26 ; shift out all but int(v)
1214         shld    ax,si,6 ; shift in u, shifting up v
1215         add     eax,[_pixptr]
1216         mov     cl, [eax]    ; load into buffer register
1217         add     esi, edx        ; inc u,v
1218
1219         mov     eax, esi        ; get u,v
1220         shr     eax, 26 ; shift out all but int(v)
1221         shld    ax,si,6 ; shift in u, shifting up v
1222         add     eax,[_pixptr]
1223         mov     ch, [eax]    ; load into buffer register
1224         add     esi, edx        ; inc u,v
1225         ror     ecx, 16 ;-- can get rid of this, just write in different order below --         ; move to next dest pixel
1226
1227         test    dword [_Transparency_on],-1
1228         je      %%no_trans2
1229         cmp     ecx,-1
1230         je      %%skip7
1231
1232         cmp     cl,255
1233         je      %%skip1q
1234         mov     [edi],cl
1235 %%skip1q:
1236
1237         cmp     ch,255
1238         je      %%skip2q
1239         mov     [edi+1],ch
1240 %%skip2q:
1241         ror     ecx,16
1242
1243         cmp     cl,255
1244         je      %%skip3q
1245         mov     [edi+2],cl
1246 %%skip3q:
1247
1248
1249         cmp     ch,255
1250         je      %%skip4q
1251         mov     [edi+3],ch
1252 %%skip4q:
1253
1254         jmp     %%skip7
1255 %%no_trans2:
1256         mov     [edi],ecx       ; Draw 4 pixels to display
1257 %%skip7:        add     edi,4
1258 %endmacro
1259
1260 %rep (1 << (NBITS-2))
1261         ;local  skip7, no_trans2, skip1q, skip2q, skip3q, skip4q
1262     repproc4
1263
1264 %endrep
1265
1266         pop     ebp
1267         pop     ecx
1268         pop     ebx
1269         dec     dword [_loop_count]
1270         jnz     near TopOfLoop4_nolight
1271
1272 EndOfLoop4_nolight:
1273
1274         test    dword [num_left_over], -1
1275         je      near _none_to_do
1276
1277 DoEndPixels_nolight:
1278         add     ebx, [DU1]
1279         add     ebp, [DV1]
1280         add     ecx, [DZ1]
1281         je      near _div_0_abort
1282         push    edi     ; use edi as a temporary variable
1283
1284 ; Find fixed U1
1285         mov     eax, ebx
1286         mov     edx,eax
1287         shl     eax,ZSHIFT
1288         sar     edx,32-ZSHIFT
1289         idiv    ecx     ; eax = (v/z)
1290         shl     eax, 16-ZSHIFT
1291         mov     ebx, eax        ; ebx = U1 until pop's
1292
1293 ; Find fixed V1
1294         mov     eax, ebp
1295         mov     edx,eax
1296         shl     eax,ZSHIFT
1297         sar     edx,32-ZSHIFT
1298         idiv    ecx     ; eax = (v/z)
1299         shl     eax, 16-ZSHIFT
1300         mov     ebp, eax        ; ebp = V1 until pop's
1301
1302         mov     ecx, [U0]       ; ecx = U0 until pop's
1303         mov     edi, [V0]       ; edi = V0 until pop's
1304
1305 ; Make ESI =  V0:U0 in 6:10,6:10 format
1306         mov     eax, ecx
1307         shr     eax, 6
1308         mov     esi, edi
1309         shl     esi, 10
1310         mov     si, ax
1311
1312 ; Make EDX = DV:DU in 6:10,6:10 format
1313         mov     eax, ebx
1314         sub     eax, ecx
1315         sar     eax, NBITS+6
1316         mov     edx, ebp
1317         sub     edx, edi
1318         shl     edx, 10-NBITS   ; EDX = V1-V0/ 4 in 6:10 int:frac
1319         mov     dx, ax  ; put delta u in low word
1320
1321         pop     edi     ; Restore EDI before using it
1322
1323         mov     ecx, [num_left_over]
1324
1325 %assign ITERATION 0
1326 %macro repproc5 0
1327 ; Do 1 pixel
1328         mov     eax, esi        ; get u,v
1329         shr     eax, 26 ; shift out all but int(v)
1330         shld    ax,si,6 ; shift in u, shifting up v
1331         add     eax,[_pixptr]
1332         movzx   eax, byte [eax]    ; load into buffer register
1333         add     esi, edx        ; inc u,v
1334         cmp     al,255
1335         je      %%skip8
1336         mov     [edi+ITERATION], al     ; write pixel
1337 %%skip8:        dec     ecx
1338         jz      near _none_to_do
1339 %endmacro
1340
1341 %rep (1 << NBITS)
1342         ;local  skip8
1343         repproc5
1344 %assign ITERATION  ITERATION + 1
1345 %endrep
1346
1347 ; Should never get here!!!!!
1348         int     3
1349         jmp     _none_to_do
1350