1 ;THE COMPUTER CODE CONTAINED HEREIN IS THE SOLE PROPERTY OF PARALLAX
2 ;SOFTWARE CORPORATION ("PARALLAX"). PARALLAX, IN DISTRIBUTING THE CODE TO
3 ;END-USERS, AND SUBJECT TO ALL OF THE TERMS AND CONDITIONS HEREIN, GRANTS A
4 ;ROYALTY-FREE, PERPETUAL LICENSE TO SUCH END-USERS FOR USE BY SUCH END-USERS
5 ;IN USING, DISPLAYING, AND CREATING DERIVATIVE WORKS THEREOF, SO LONG AS
6 ;SUCH USE, DISPLAY OR CREATION IS FOR NON-COMMERCIAL, ROYALTY OR REVENUE
7 ;FREE PURPOSES. IN NO EVENT SHALL THE END-USER USE THE COMPUTER CODE
8 ;CONTAINED HEREIN FOR REVENUE-BEARING PURPOSES. THE END-USER UNDERSTANDS
9 ;AND AGREES TO THE TERMS HEREIN AND ACCEPTS THE SAME BY USE OF THIS FILE.
10 ;COPYRIGHT 1993-1998 PARALLAX SOFTWARE CORPORATION. ALL RIGHTS RESERVED.
12 ; $Source: /cvs/cvsroot/d2x/texmap/tmap_per.asm,v $
15 ; $Date: 2001-10-18 23:59:23 $
17 ; Perspective texture mapper inner loop.
19 ; $Log: not supported by cvs2svn $
20 ; Revision 1.1.1.1 2001/01/19 03:30:16 bradleyb
23 ; Revision 1.1.1.1 1999/06/14 22:14:01 donut
24 ; Import of d1x 1.37 source.
26 ; Revision 1.26 1995/02/20 18:22:55 john
27 ; Put all the externs in the assembly modules into tmap_inc.asm.
28 ; Also, moved all the C versions of the inner loops into a new module,
31 ; Revision 1.25 1995/02/20 17:09:08 john
32 ; Added code so that you can build the tmapper with no assembly!
34 ; Revision 1.24 1995/01/10 09:32:07 mike
35 ; mostly fix garbage at end of scanline, but slow down by 1-4%.
37 ; Revision 1.23 1994/12/02 23:29:57 mike
40 ; Revision 1.22 1994/11/30 00:57:00 mike
43 ; Revision 1.21 1994/11/21 13:57:42 mike
44 ; fix right side shear bug
46 ; Revision 1.20 1994/11/12 16:41:09 mike
49 ; Revision 1.19 1994/10/27 19:40:00 john
50 ; Made lighting table lookup be _gr_fade_table[eax] instead
51 ; of fs:[eax], which gets rig of a segment override that
52 ; supposedly costs 1 clock on a 486. Mainly, I wanted to verify
53 ; that the only reason we need selectors is for the source texture
56 ; Revision 1.18 1994/05/03 11:08:32 mike
57 ; Trap divide overflows.
59 ; Revision 1.17 1994/04/21 15:03:41 mike
62 ; Revision 1.16 1994/04/08 16:46:57 john
63 ; Made 32 fade levels. Hacked.
65 ; Revision 1.15 1994/03/31 08:35:18 mike
66 ; Fix quantized-by-4 bug in inner loop.
68 ; Revision 1.14 1994/03/14 17:41:14 mike
69 ; Fix bug in unlighted version.
71 ; Revision 1.13 1994/03/14 15:45:14 mike
74 ; Revision 1.12 1994/01/14 14:01:58 mike
75 ; *** empty log message ***
77 ; Revision 1.11 1993/12/18 14:43:44 john
78 ; Messed around with doing 1/z, the u*(1/z) and v*(1/z)
79 ; (Went from 23 fps to 21 fps... not good! )
81 ; Revision 1.10 1993/12/17 16:14:17 john
82 ; Split lighted/nonlighted, so there is no cmp lighting
85 ; Revision 1.9 1993/12/17 12:34:29 john
86 ; Made leftover bytes use linear approx instead of correct...
87 ; should save about 8 divides per scanline on average.
88 ; Also, took out anti-aliasing code and rearranged to
89 ; order of some instructions to help on 486 pipelining.
90 ; (The anti-aliasing code did *not* look good, so I
91 ; figure there was no reason to keep it in. )
93 ; Revision 1.8 1993/12/16 18:37:52 mike
94 ; Align some stuff on 4 byte boundaries.
96 ; Revision 1.7 1993/11/30 08:44:18 john
97 ; Made selector set check for < 64*64 bitmaps.
99 ; Revision 1.6 1993/11/23 17:25:26 john
100 ; Added safety "and eax, 0fffh" in lighting lookup.
102 ; Revision 1.5 1993/11/23 15:08:52 mike
103 ; Fixed lighting bug.
105 ; Revision 1.4 1993/11/23 14:38:50 john
106 ; optimized NORMAL code by switching EBX and ESI, so BH can be used in
107 ; the lighting process.
109 ; Revision 1.3 1993/11/23 14:30:53 john
110 ; Made the perspective tmapper do 1/8 divides; added lighting.
112 ; Revision 1.2 1993/11/22 10:24:59 mike
113 ; *** empty log message ***
115 ; Revision 1.1 1993/09/08 17:29:53 mike
123 global _asm_tmap_scanline_per
124 global asm_tmap_scanline_per
126 %include "tmap_inc.asm"
130 ;extern _per2_flag;:dword
132 ; Cater for linux ELF compilers...
134 %define _loop_count loop_count
135 %define _new_end new_end
136 %define _scan_doubling_flag scan_doubling_flag
137 %define _linear_if_far_flag linear_if_far_flag
143 global _scan_doubling_flag
144 global _linear_if_far_flag
155 ; _min_ecx dd 55555555h
156 _new_end dd 1 ; if set, use new, but slower, way of finishing off extra pixels on scanline, 01/10/95 --MK
158 _scan_doubling_flag dd 0
159 _linear_if_far_flag dd 0
161 ;---------- local variables
176 ; --------------------------------------------------------------------------------------------------
178 ; _xleft fixed point left x coordinate
179 ; _xright fixed point right x coordinate
180 ; _y fixed point y coordinate
181 ; _pixptr address of source pixel map
182 ; _u fixed point initial u coordinate
183 ; _v fixed point initial v coordinate
184 ; _z fixed point initial z coordinate
185 ; _du_dx fixed point du/dx
186 ; _dv_dx fixed point dv/dx
187 ; _dz_dx fixed point dz/dx
189 ; for (x = (int) xleft; x <= (int) xright; x++) {
190 ; _setcolor(read_pixel_from_tmap(srcb,((int) (u/z)) & 63,((int) (v/z)) & 63));
200 _asm_tmap_scanline_per:
201 asm_tmap_scanline_per:
205 ;---------------------------- setup for loop ---------------------------------
206 ; Setup for loop: _loop_count iterations = (int) xright - (int) xleft
207 ; esi source pixel pointer = pixptr
208 ; edi initial row pointer = y*320+x
209 ; NOTE: fx_xright and fx_xleft changed from fix to int by mk on 12/01/94.
211 ; set esi = pointer to start of texture map data
213 ; set edi = address of first pixel to modify
215 ; mov es,[_pixel_data_selector] ; selector[0*2]
217 mov edi,[_y_pointers+edi*4]
223 ebx_ok: add edi,[_write_buffer]
226 ; set _loop_count = # of iterations
230 mov [_loop_count],eax
232 ; lighting values are passed in fixed point, but need to be in 8 bit integer, 8 bit fraction so we can easily
233 ; get the integer by reading %bh
235 sar dword [_fx_dl_dx],8
237 inc dword [_fx_dl_dx] ; round towards 0 for negative deltas
245 test dword [_per2_flag],-1
248 test dword [_Lighting_on], -1
249 je near _tmap_loop_fast_nolight
251 ;tmap_loop_fast_nolight_jumper:
252 ; jmp tmap_loop_fast_nolight
254 ;================ PERSPECTIVE TEXTURE MAP INNER LOOPS ========================
256 ; Usage in loop: eax division, pixel value
261 ; esi source pixel pointer
262 ; edi destination pixel pointer
264 ;-------------------- NORMAL PERSPECTIVE TEXTURE MAP LOOP -----------------
266 mov esi, ebx ; esi becomes u coordinate
271 ; compute v coordinate
275 idiv ecx ; eax = (v/z)
277 and eax,3fh ; mask with height-1
280 ; compute u coordinate
284 idiv ecx ; eax = (u/z)
287 shld ebx,eax,6 ; esi = v*64+u
292 test dword [_Lighting_on], -1
293 mov al, [ebx] ; get pixel from source bitmap
297 mov ebx, [_fx_l] ; get temp copy of lighting value
298 mov ah, bh ; get lighting level
299 add ebx, [_fx_dl_dx] ; update lighting value
300 mov al, [_gr_fade_table+eax] ; xlat pixel thru lighting tables
301 mov [_fx_l], ebx ; save temp copy of lighting value
314 je _div_0_abort ; would be dividing by 0, so abort
316 dec dword [_loop_count]
324 ; We detected a z=0 condition, which seems pretty bogus, don't you think?
325 ; So, we abort, but maybe we want to know about it.
329 ;-------------------------- PER/4 TMAPPER ----------------
332 ; U0 = u/w; V0 = v/w;
334 ; u += du_dx*4; v+= dv_dx*4
335 ; U1 = u/w; V1 = v/w;
336 ; DUDX = (U1-U0)/4; DVDX = (V1-V0)/4;
339 ; pixels = texmap[V0*64+U0];
340 ; U0 += DUDX; V0 += DVDX
342 ; pixels = (pixels<<8)+texmap[V0*64+U0];
343 ; U0 += DUDX; V0 += DVDX
345 ; pixels = (pixels<<8)+texmap[V0*64+U0];
346 ; U0 += DUDX; V0 += DVDX
348 ; pixels = (pixels<<8)+texmap[V0*64+U0];
354 NBITS equ 4 ; 2^NBITS pixels plotted per divide
355 ZSHIFT equ 4 ; precision used in PDIV macro
359 ; Returns EAX/ECX in 16.16 format in EAX. Trashes EDX
364 ; idiv ecx ; eax = (v/z)
368 global _tmap_loop_fast
370 ; -------------------------------------- Start of Getting Dword Aligned ----------------------------------------------
381 ; compute v coordinate
385 idiv ecx ; eax = (v/z)
387 and eax,3fh ; mask with height-1
390 ; compute u coordinate
394 idiv ecx ; eax = (u/z)
397 shld ebx,eax,6 ; esi = v*64+u
402 mov al, [ebx] ; get pixel from source bitmap
405 mov ebx, [_fx_l] ; get temp copy of lighting value
406 mov ah, bh ; get lighting level
407 add ebx, [_fx_dl_dx] ; update lighting value
408 mov [_fx_l], ebx ; save temp copy of lighting value
412 je skip2 ; this pixel is transparent, so don't write it (or light it)
414 mov al, [_gr_fade_table+eax] ; xlat pixel thru lighting tables
424 je _div_0_abort ; would be dividing by 0, so abort
426 dec dword [_loop_count]
431 ; -------------------------------------- End of Getting Dword Aligned ----------------------------------------------
435 mov eax, [_loop_count]
436 mov ebx, esi ; get fx_u [pentium pipelining]
439 and esi, (1 << NBITS) - 1
441 mov [num_left_over], esi
442 je near tmap_loop ; there are no 2^NBITS chunks, do divide/pixel for whole scanline
443 mov [_loop_count], eax ; _loop_count = pixels / NPIXS
445 ; compute initial v coordinate
450 idiv ecx ; eax = (v/z)
454 ; compute initial u coordinate
459 idiv ecx ; eax = (v/z)
463 ; Set deltas to NPIXS pixel increments
479 je near _div_0_abort ; would be dividing by 0, so abort
481 ; Done with ebx, ebp, ecx until next iteration
492 idiv ecx ; eax = (v/z)
494 mov ebx, eax ; ebx = U1 until pop's
501 idiv ecx ; eax = (v/z)
503 mov ecx, [U0] ; ecx = U0 until pop's
504 mov edi, [V0] ; edi = V0 until pop's
507 mov ebp, eax ; ebp = V1 until pop's
509 ; Make ESI = V0:U0 in 6:10,6:10 format
516 ; Make EDX = DV:DU in 6:10,6:10 format
522 shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac
523 mov dx, ax ; put delta u in low word
525 ; Save the U1 and V1 so we don't have to divide on the next iteration
529 pop edi ; Restore EDI before using it
535 test dword [_Transparency_on],-1
539 mov eax, esi ; get u,v
540 shr eax, 26 ; shift out all but int(v)
541 shld ax,si,6 ; shift in u, shifting up v
542 add esi, edx ; inc u,v
544 movzx eax, byte [eax] ; get pixel from source bitmap
547 mov ah, bh ; form lighting table lookup value
548 add ebx, ebp ; update lighting value
549 mov al, [_gr_fade_table+eax] ; xlat thru lighting table into dest buffer
555 mov eax, esi ; get u,v
556 shr eax, 26 ; shift out all but int(v)
557 shld ax,si,6 ; shift in u, shifting up v
558 add esi, edx ; inc u,v
560 movzx eax, byte [eax] ; get pixel from source bitmap
563 mov ah, bh ; form lighting table lookup value
564 add ebx, ebp ; update lighting value
565 mov al, [_gr_fade_table+eax] ; xlat thru lighting table into dest buffer
572 %rep (2 << (NBITS-2))
573 ; local skip3,no_trans1
574 ; local skipa1,skipa2
580 ; -------------------------------------------------------
584 mov eax, esi ; get u,v
585 shr eax, 26 ; shift out all but int(v)
586 shld ax,si,6 ; shift in u, shifting up v
587 add esi, edx ; inc u,v
589 movzx eax, byte [eax] ; get pixel from source bitmap
590 mov ah, bh ; form lighting table lookup value
591 add ebx, ebp ; update lighting value
592 mov cl, [_gr_fade_table+eax] ; xlat thru lighting table into dest buffer
595 mov eax, esi ; get u,v
596 shr eax, 26 ; shift out all but int(v)
597 shld ax,si,6 ; shift in u, shifting up v
598 add esi, edx ; inc u,v
600 movzx eax, byte [eax] ; get pixel from source bitmap
601 mov ah, bh ; form lighting table lookup value
602 add ebx, ebp ; update lighting value
603 mov ch, [_gr_fade_table+eax] ; xlat thru lighting table into dest buffer
605 ; ----- This is about 1% faster than the above, and could probably be optimized more.
606 ; ----- Problem is, it gets the u,v coordinates backwards. What you would need to do
607 ; ----- is switch the packing of the u,v coordinates above (about 95 lines up).
608 ;----------; mov eax, esi
609 ;----------; shr ax, 10
610 ;----------; rol eax, 6
611 ;----------; mov dx, ax
612 ;----------; add esi, mem_edx
613 ;----------; mov dl, es:[edx]
614 ;----------; mov dh, bh
615 ;----------; add ebx, ebp
616 ;----------; mov cl, _gr_fade_table[edx]
618 ;----------; mov eax, esi
619 ;----------; shr ax, 10
620 ;----------; rol eax, 6
621 ;----------; mov dx, ax
622 ;----------; add esi, mem_edx
623 ;----------; mov dl, es:[edx]
624 ;----------; mov dh, bh
625 ;----------; add ebx, ebp
626 ;----------; mov ch, _gr_fade_table[edx]
628 ror ecx, 16 ; move to next double dest pixel position
631 %rep (1 << (NBITS-2))
636 mov [edi],ecx ; Draw 4 pixels to display
642 ; -------------------------------------------------------
649 dec dword [_loop_count]
653 test dword [num_left_over], -1
656 ; ----------------------------------------- Start of LeftOver Pixels ------------------------------------------
673 test dword [_new_end],-1
674 jne near NewDoEndPixels
684 ; this can happen because we added DZ1 to the current z, but dz1 represents dz for perhaps 16 pixels
685 ; though we might only plot one more pixel.
688 dep_loop: mov eax, [DU1]
707 push edi ; use edi as a temporary variable
709 cmp ecx,1 << (ZSHIFT+1)
711 mov ecx, 1 << (ZSHIFT+1)
720 idiv ecx ; eax = (v/z)
723 mov ebx, eax ; ebx = U1 until pop's
731 idiv ecx ; eax = (v/z)
734 mov ebp, eax ; ebp = V1 until pop's
736 mov ecx, [U0] ; ecx = U0 until pop's
737 mov edi, [V0] ; edi = V0 until pop's
739 ; Make ESI = V0:U0 in 6:10,6:10 format
746 ; Make EDX = DV:DU in 6:10,6:10 format
752 shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac
753 mov dx, ax ; put delta u in low word
755 pop edi ; Restore EDI before using it
757 mov ecx, [num_left_over]
767 mov eax, esi ; get u,v
768 shr eax, 26 ; shift out all but int(v)
769 shld ax,si,6 ; shift in u, shifting up v
771 movzx eax, byte [eax] ; get pixel from source bitmap
772 add esi, edx ; inc u,v
773 mov ah, bh ; form lighting table lookup value
774 add ebx, ebp ; update lighting value
777 mov al, [_gr_fade_table+eax] ; xlat thru lighting table into dest buffer
778 mov [edi+ITERATION], al ; write pixel
783 mov eax, esi ; get u,v
784 shr eax, 26 ; shift out all but int(v)
785 shld ax,si,6 ; shift in u, shifting up v
787 movzx eax, byte [eax] ; get pixel from source bitmap
788 add esi, edx ; inc u,v
789 mov ah, bh ; form lighting table lookup value
790 add ebx, [_fx_dl_dx] ; update lighting value
793 mov al, [_gr_fade_table+eax] ; xlat thru lighting table into dest buffer
794 mov [edi+ITERATION+1], al ; write pixel
799 %rep (1 << (NBITS-1))
802 %assign ITERATION ITERATION + 2
806 ; Should never get here!!!!
810 ; ----------------------------------------- End of LeftOver Pixels ------------------------------------------
812 ; --BUGGY NEW--NewDoEndPixels:
813 ; --BUGGY NEW-- mov eax, num_left_over
814 ; --BUGGY NEW-- and num_left_over, 3
815 ; --BUGGY NEW-- shr eax, 2
816 ; --BUGGY NEW-- je NDEP_1
817 ; --BUGGY NEW-- mov _loop_count, eax
819 ; --BUGGY NEW--; do 4 pixels per hunk, not 16, so div deltas by 4 (16/4=4)
820 ; --BUGGY NEW-- shr DU1,2
821 ; --BUGGY NEW-- shr DV1,2
822 ; --BUGGY NEW-- shr DZ1,2
824 ; --BUGGY NEW--NDEP_TopOfLoop4:
825 ; --BUGGY NEW-- add ebx, DU1
826 ; --BUGGY NEW-- add ebp, DV1
827 ; --BUGGY NEW-- add ecx, DZ1
828 ; --BUGGY NEW-- je _div_0_abort ; would be dividing by 0, so abort
830 ; --BUGGY NEW--; Done with ebx, ebp, ecx until next iteration
831 ; --BUGGY NEW-- push ebx
832 ; --BUGGY NEW-- push ecx
833 ; --BUGGY NEW-- push ebp
834 ; --BUGGY NEW-- push edi
836 ; --BUGGY NEW--; Find fixed U1
837 ; --BUGGY NEW-- mov eax, ebx
838 ; --BUGGY NEW-- mov edx,ebx
839 ; --BUGGY NEW-- shl eax,(ZSHIFT-2)
840 ; --BUGGY NEW-- sar edx,32-(ZSHIFT-2)
841 ; --BUGGY NEW-- idiv ecx ; eax = (v/z)
842 ; --BUGGY NEW-- shl eax, 16-(ZSHIFT-2)
843 ; --BUGGY NEW-- mov ebx, eax ; ebx = U1 until pop's
845 ; --BUGGY NEW--; Find fixed V1
846 ; --BUGGY NEW-- mov eax, ebp
847 ; --BUGGY NEW-- mov edx, ebp
848 ; --BUGGY NEW-- shl eax,(ZSHIFT-2)
849 ; --BUGGY NEW-- sar edx,32-(ZSHIFT-2)
850 ; --BUGGY NEW-- idiv ecx ; eax = (v/z)
852 ; --BUGGY NEW-- mov ecx, U0 ; ecx = U0 until pop's
853 ; --BUGGY NEW-- mov edi, V0 ; edi = V0 until pop's
855 ; --BUGGY NEW-- shl eax, 16-(ZSHIFT-2)
856 ; --BUGGY NEW-- mov ebp, eax ; ebp = V1 until pop's
858 ; --BUGGY NEW--; Make ESI = V0:U0 in 6:10,6:10 format
859 ; --BUGGY NEW-- mov eax, ecx
860 ; --BUGGY NEW-- shr eax, 6
861 ; --BUGGY NEW-- mov esi, edi
862 ; --BUGGY NEW-- shl esi, 10
863 ; --BUGGY NEW-- mov si, ax
865 ; --BUGGY NEW--; Make EDX = DV:DU in 6:10,6:10 format
866 ; --BUGGY NEW-- mov eax, ebx
867 ; --BUGGY NEW-- sub eax, ecx
868 ; --BUGGY NEW-- sar eax, (NBITS-2)+6
869 ; --BUGGY NEW-- mov edx, ebp
870 ; --BUGGY NEW-- sub edx, edi
871 ; --BUGGY NEW-- shl edx, 10-(NBITS-2) ; EDX = V1-V0/ 4 in 6:10 int:frac
872 ; --BUGGY NEW-- mov dx, ax ; put delta u in low word
874 ; --BUGGY NEW--; Save the U1 and V1 so we don't have to divide on the next iteration
875 ; --BUGGY NEW-- mov U0, ebx
876 ; --BUGGY NEW-- mov V0, ebp
878 ; --BUGGY NEW-- pop edi ; Restore EDI before using it
880 ; --BUGGY NEW--; LIGHTING CODE
881 ; --BUGGY NEW-- mov ebx, _fx_l
882 ; --BUGGY NEW-- mov ebp, _fx_dl_dx
884 ; --BUGGY NEW--;** test _Transparency_on,-1
885 ; --BUGGY NEW--;** je NDEP_no_trans1
887 ; --BUGGY NEW-- REPT 2
888 ; --BUGGY NEW-- local NDEP_skipa1, NDEP_skipa2
890 ; --BUGGY NEW-- mov eax, esi ; get u,v
891 ; --BUGGY NEW-- shr eax, 26 ; shift out all but int(v)
892 ; --BUGGY NEW-- shld ax,si,6 ; shift in u, shifting up v
893 ; --BUGGY NEW-- add esi, edx ; inc u,v
894 ; --BUGGY NEW-- mov al, es:[eax] ; get pixel from source bitmap
895 ; --BUGGY NEW-- cmp al,255
896 ; --BUGGY NEW-- je NDEP_skipa1
897 ; --BUGGY NEW-- mov ah, bh ; form lighting table lookup value
898 ; --BUGGY NEW-- add ebx, ebp ; update lighting value
899 ; --BUGGY NEW-- mov al, _gr_fade_table[eax] ; xlat thru lighting table into dest buffer
900 ; --BUGGY NEW-- mov [edi],al
901 ; --BUGGY NEW--NDEP_skipa1:
902 ; --BUGGY NEW-- inc edi
904 ; --BUGGY NEW--; Do odd pixel
905 ; --BUGGY NEW-- mov eax, esi ; get u,v
906 ; --BUGGY NEW-- shr eax, 26 ; shift out all but int(v)
907 ; --BUGGY NEW-- shld ax,si,6 ; shift in u, shifting up v
908 ; --BUGGY NEW-- add esi, edx ; inc u,v
909 ; --BUGGY NEW-- mov al, es:[eax] ; get pixel from source bitmap
910 ; --BUGGY NEW-- cmp al,255
911 ; --BUGGY NEW-- je NDEP_skipa2
912 ; --BUGGY NEW-- mov ah, bh ; form lighting table lookup value
913 ; --BUGGY NEW-- add ebx, ebp ; update lighting value
914 ; --BUGGY NEW-- mov al, _gr_fade_table[eax] ; xlat thru lighting table into dest buffer
915 ; --BUGGY NEW-- mov [edi],al
916 ; --BUGGY NEW--NDEP_skipa2:
917 ; --BUGGY NEW-- inc edi
921 ; --BUGGY NEW-- mov _fx_l, ebx
922 ; --BUGGY NEW-- pop ebp
923 ; --BUGGY NEW-- pop ecx
924 ; --BUGGY NEW-- pop ebx
925 ; --BUGGY NEW-- dec _loop_count
926 ; --BUGGY NEW-- jnz NDEP_TopOfLoop4
928 ; --BUGGY NEW-- test num_left_over, -1
929 ; --BUGGY NEW-- je _none_to_do
931 ; --BUGGY NEW--NDEP_1:
932 ; --BUGGY NEW-- mov esi,ebx
934 ; --BUGGY NEW-- align 4
935 ; --BUGGY NEW--NDEP_loop:
937 ; --BUGGY NEW--; compute v coordinate
938 ; --BUGGY NEW-- mov eax, ebp ; get v
939 ; --BUGGY NEW-- mov edx, eax
940 ; --BUGGY NEW-- sar edx, 31
941 ; --BUGGY NEW-- idiv ecx ; eax = (v/z)
943 ; --BUGGY NEW-- and eax,3fh ; mask with height-1
944 ; --BUGGY NEW-- mov ebx,eax
946 ; --BUGGY NEW--; compute u coordinate
947 ; --BUGGY NEW-- mov eax, esi ; get u
948 ; --BUGGY NEW-- mov edx, eax
949 ; --BUGGY NEW-- sar edx, 31
950 ; --BUGGY NEW-- idiv ecx ; eax = (u/z)
952 ; --BUGGY NEW-- shl eax,26
953 ; --BUGGY NEW-- shld ebx,eax,6 ; esi = v*64+u
955 ; --BUGGY NEW--; read 1 pixel
956 ; --BUGGY NEW-- xor eax, eax
957 ; --BUGGY NEW-- mov al, es:[ebx] ; get pixel from source bitmap
959 ; --BUGGY NEW--; lighting code
960 ; --BUGGY NEW-- mov ebx, _fx_l ; get temp copy of lighting value
961 ; --BUGGY NEW-- mov ah, bh ; get lighting level
962 ; --BUGGY NEW-- add ebx, _fx_dl_dx ; update lighting value
963 ; --BUGGY NEW-- mov _fx_l, ebx ; save temp copy of lighting value
965 ; --BUGGY NEW--; transparency check
966 ; --BUGGY NEW-- cmp al,255
967 ; --BUGGY NEW-- je NDEP_skip2 ; this pixel is transparent, so don't write it (or light it)
969 ; --BUGGY NEW-- mov al, _gr_fade_table[eax] ; xlat pixel thru lighting tables
971 ; --BUGGY NEW--; write 1 pixel
972 ; --BUGGY NEW-- mov [edi],al
973 ; --BUGGY NEW--NDEP_skip2: inc edi
975 ; --BUGGY NEW--; update deltas
976 ; --BUGGY NEW-- add ebp,_fx_dv_dx
977 ; --BUGGY NEW-- add esi,_fx_du_dx
978 ; --BUGGY NEW-- add ecx,_fx_dz_dx
979 ; --BUGGY NEW-- je _div_0_abort ; would be dividing by 0, so abort
981 ; --BUGGY NEW-- dec num_left_over
982 ; --BUGGY NEW-- jne NDEP_loop
984 ; --BUGGY NEW-- jmp _none_to_do
992 ; compute v coordinate
996 idiv ecx ; eax = (v/z)
998 and eax,3fh ; mask with height-1
1001 ; compute u coordinate
1002 mov eax, esi ; get u
1005 idiv ecx ; eax = (u/z)
1008 shld ebx,eax,6 ; esi = v*64+u
1013 mov al, [ebx] ; get pixel from source bitmap
1016 mov ebx, [_fx_l] ; get temp copy of lighting value
1017 mov ah, bh ; get lighting level
1018 add ebx, [_fx_dl_dx] ; update lighting value
1019 mov [_fx_l], ebx ; save temp copy of lighting value
1021 ; transparency check
1023 je NDEP_skip2 ; this pixel is transparent, so don't write it (or light it)
1025 mov al, [_gr_fade_table+eax] ; xlat pixel thru lighting tables
1035 je near _div_0_abort ; would be dividing by 0, so abort
1037 dec dword [num_left_over]
1042 ; ==================================================== No Lighting Code ======================================================
1043 global _tmap_loop_fast_nolight
1044 _tmap_loop_fast_nolight:
1048 NotDwordAligned1_nolight:
1050 jz DwordAligned1_nolight
1052 ; compute v coordinate
1056 idiv ecx ; eax = (v/z)
1058 and eax,3fh ; mask with height-1
1061 ; compute u coordinate
1062 mov eax, esi ; get u
1065 idiv ecx ; eax = (u/z)
1068 shld ebx,eax,6 ; esi = v*64+u
1072 mov al,[ebx] ; get pixel from source bitmap
1084 je near _div_0_abort ; would be dividing by 0, so abort
1086 dec dword [_loop_count]
1087 jns NotDwordAligned1_nolight
1090 DwordAligned1_nolight:
1093 mov eax, [_loop_count]
1095 mov [num_left_over], eax
1099 je near tmap_loop ; no 2^NBITS chunks, do divide/pixel for whole scanline
1101 mov [_loop_count], eax ; _loop_count = pixels / NPIXS
1103 sub [num_left_over], eax ; num_left_over = obvious
1105 ; compute initial v coordinate
1111 idiv ecx ; eax = (v/z)
1116 ; compute initial u coordinate
1122 idiv ecx ; eax = (v/z)
1127 ; Set deltas to NPIXS pixel increments
1128 mov eax, [_fx_du_dx]
1131 mov eax, [_fx_dv_dx]
1134 mov eax, [_fx_dz_dx]
1143 je near _div_0_abort
1145 ; Done with ebx, ebp, ecx until next iteration
1157 idiv ecx ; eax = (v/z)
1160 mov ebx, eax ; ebx = U1 until pop's
1168 idiv ecx ; eax = (v/z)
1171 mov ebp, eax ; ebp = V1 until pop's
1173 mov ecx, [U0] ; ecx = U0 until pop's
1174 mov edi, [V0] ; edi = V0 until pop's
1176 ; Make ESI = V0:U0 in 6:10,6:10 format
1183 ; Make EDX = DV:DU in 6:10,6:10 format
1189 shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac
1190 mov dx, ax ; put delta u in low word
1192 ; Save the U1 and V1 so we don't have to divide on the next iteration
1196 pop edi ; Restore EDI before using it
1200 mov eax, esi ; get u,v
1201 shr eax, 26 ; shift out all but int(v)
1202 shld ax,si,6 ; shift in u, shifting up v
1203 add esi, edx ; inc u,v
1205 mov cl, [eax] ; load into buffer register
1207 mov eax, esi ; get u,v
1208 shr eax, 26 ; shift out all but int(v)
1209 shld ax,si,6 ; shift in u, shifting up v
1211 mov ch, [eax] ; load into buffer register
1212 add esi, edx ; inc u,v
1213 ror ecx, 16 ; move to next dest pixel
1215 mov eax, esi ; get u,v
1216 shr eax, 26 ; shift out all but int(v)
1217 shld ax,si,6 ; shift in u, shifting up v
1219 mov cl, [eax] ; load into buffer register
1220 add esi, edx ; inc u,v
1222 mov eax, esi ; get u,v
1223 shr eax, 26 ; shift out all but int(v)
1224 shld ax,si,6 ; shift in u, shifting up v
1226 mov ch, [eax] ; load into buffer register
1227 add esi, edx ; inc u,v
1228 ror ecx, 16 ;-- can get rid of this, just write in different order below -- ; move to next dest pixel
1230 test dword [_Transparency_on],-1
1259 mov [edi],ecx ; Draw 4 pixels to display
1263 %rep (1 << (NBITS-2))
1264 ;local skip7, no_trans2, skip1q, skip2q, skip3q, skip4q
1272 dec dword [_loop_count]
1273 jnz near TopOfLoop4_nolight
1277 test dword [num_left_over], -1
1280 DoEndPixels_nolight:
1284 je near _div_0_abort
1285 push edi ; use edi as a temporary variable
1292 idiv ecx ; eax = (v/z)
1294 mov ebx, eax ; ebx = U1 until pop's
1301 idiv ecx ; eax = (v/z)
1303 mov ebp, eax ; ebp = V1 until pop's
1305 mov ecx, [U0] ; ecx = U0 until pop's
1306 mov edi, [V0] ; edi = V0 until pop's
1308 ; Make ESI = V0:U0 in 6:10,6:10 format
1315 ; Make EDX = DV:DU in 6:10,6:10 format
1321 shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac
1322 mov dx, ax ; put delta u in low word
1324 pop edi ; Restore EDI before using it
1326 mov ecx, [num_left_over]
1331 mov eax, esi ; get u,v
1332 shr eax, 26 ; shift out all but int(v)
1333 shld ax,si,6 ; shift in u, shifting up v
1335 movzx eax, byte [eax] ; load into buffer register
1336 add esi, edx ; inc u,v
1339 mov [edi+ITERATION], al ; write pixel
1347 %assign ITERATION ITERATION + 1
1350 ; Should never get here!!!!!