1 ; $Id: tmap_per.asm,v 1.4 2003-12-08 21:21:16 btb Exp $
2 ;THE COMPUTER CODE CONTAINED HEREIN IS THE SOLE PROPERTY OF PARALLAX
3 ;SOFTWARE CORPORATION ("PARALLAX"). PARALLAX, IN DISTRIBUTING THE CODE TO
4 ;END-USERS, AND SUBJECT TO ALL OF THE TERMS AND CONDITIONS HEREIN, GRANTS A
5 ;ROYALTY-FREE, PERPETUAL LICENSE TO SUCH END-USERS FOR USE BY SUCH END-USERS
6 ;IN USING, DISPLAYING, AND CREATING DERIVATIVE WORKS THEREOF, SO LONG AS
7 ;SUCH USE, DISPLAY OR CREATION IS FOR NON-COMMERCIAL, ROYALTY OR REVENUE
8 ;FREE PURPOSES. IN NO EVENT SHALL THE END-USER USE THE COMPUTER CODE
9 ;CONTAINED HEREIN FOR REVENUE-BEARING PURPOSES. THE END-USER UNDERSTANDS
10 ;AND AGREES TO THE TERMS HEREIN AND ACCEPTS THE SAME BY USE OF THIS FILE.
11 ;COPYRIGHT 1993-1998 PARALLAX SOFTWARE CORPORATION. ALL RIGHTS RESERVED.
14 ; Perspective texture mapper inner loop.
17 ; Revision 1.26 1995/02/20 18:22:55 john
18 ; Put all the externs in the assembly modules into tmap_inc.asm.
19 ; Also, moved all the C versions of the inner loops into a new module,
22 ; Revision 1.25 1995/02/20 17:09:08 john
23 ; Added code so that you can build the tmapper with no assembly!
25 ; Revision 1.24 1995/01/10 09:32:07 mike
26 ; mostly fix garbage at end of scanline, but slow down by 1-4%.
28 ; Revision 1.23 1994/12/02 23:29:57 mike
31 ; Revision 1.22 1994/11/30 00:57:00 mike
34 ; Revision 1.21 1994/11/21 13:57:42 mike
35 ; fix right side shear bug
37 ; Revision 1.20 1994/11/12 16:41:09 mike
40 ; Revision 1.19 1994/10/27 19:40:00 john
41 ; Made lighting table lookup be _gr_fade_table[eax] instead
42 ; of fs:[eax], which gets rig of a segment override that
43 ; supposedly costs 1 clock on a 486. Mainly, I wanted to verify
44 ; that the only reason we need selectors is for the source texture
47 ; Revision 1.18 1994/05/03 11:08:32 mike
48 ; Trap divide overflows.
50 ; Revision 1.17 1994/04/21 15:03:41 mike
53 ; Revision 1.16 1994/04/08 16:46:57 john
54 ; Made 32 fade levels. Hacked.
56 ; Revision 1.15 1994/03/31 08:35:18 mike
57 ; Fix quantized-by-4 bug in inner loop.
59 ; Revision 1.14 1994/03/14 17:41:14 mike
60 ; Fix bug in unlighted version.
62 ; Revision 1.13 1994/03/14 15:45:14 mike
65 ; Revision 1.12 1994/01/14 14:01:58 mike
66 ; *** empty log message ***
68 ; Revision 1.11 1993/12/18 14:43:44 john
69 ; Messed around with doing 1/z, the u*(1/z) and v*(1/z)
70 ; (Went from 23 fps to 21 fps... not good! )
72 ; Revision 1.10 1993/12/17 16:14:17 john
73 ; Split lighted/nonlighted, so there is no cmp lighting
76 ; Revision 1.9 1993/12/17 12:34:29 john
77 ; Made leftover bytes use linear approx instead of correct...
78 ; should save about 8 divides per scanline on average.
79 ; Also, took out anti-aliasing code and rearranged to
80 ; order of some instructions to help on 486 pipelining.
81 ; (The anti-aliasing code did *not* look good, so I
82 ; figure there was no reason to keep it in. )
84 ; Revision 1.8 1993/12/16 18:37:52 mike
85 ; Align some stuff on 4 byte boundaries.
87 ; Revision 1.7 1993/11/30 08:44:18 john
88 ; Made selector set check for < 64*64 bitmaps.
90 ; Revision 1.6 1993/11/23 17:25:26 john
91 ; Added safety "and eax, 0fffh" in lighting lookup.
93 ; Revision 1.5 1993/11/23 15:08:52 mike
96 ; Revision 1.4 1993/11/23 14:38:50 john
97 ; optimized NORMAL code by switching EBX and ESI, so BH can be used in
98 ; the lighting process.
100 ; Revision 1.3 1993/11/23 14:30:53 john
101 ; Made the perspective tmapper do 1/8 divides; added lighting.
103 ; Revision 1.2 1993/11/22 10:24:59 mike
104 ; *** empty log message ***
106 ; Revision 1.1 1993/09/08 17:29:53 mike
114 global _asm_tmap_scanline_per
115 global asm_tmap_scanline_per
117 %include "tmap_inc.asm"
121 ;extern _per2_flag;:dword
123 ; Cater for ELF compilers...
125 %define _loop_count loop_count
126 %define _new_end new_end
127 %define _scan_doubling_flag scan_doubling_flag
128 %define _linear_if_far_flag linear_if_far_flag
134 global _scan_doubling_flag
135 global _linear_if_far_flag
146 ; _min_ecx dd 55555555h
147 _new_end dd 1 ; if set, use new, but slower, way of finishing off extra pixels on scanline, 01/10/95 --MK
149 _scan_doubling_flag dd 0
150 _linear_if_far_flag dd 0
152 ;---------- local variables
167 ; --------------------------------------------------------------------------------------------------
169 ; _xleft fixed point left x coordinate
170 ; _xright fixed point right x coordinate
171 ; _y fixed point y coordinate
172 ; _pixptr address of source pixel map
173 ; _u fixed point initial u coordinate
174 ; _v fixed point initial v coordinate
175 ; _z fixed point initial z coordinate
176 ; _du_dx fixed point du/dx
177 ; _dv_dx fixed point dv/dx
178 ; _dz_dx fixed point dz/dx
180 ; for (x = (int) xleft; x <= (int) xright; x++) {
181 ; _setcolor(read_pixel_from_tmap(srcb,((int) (u/z)) & 63,((int) (v/z)) & 63));
191 _asm_tmap_scanline_per:
192 asm_tmap_scanline_per:
196 ;---------------------------- setup for loop ---------------------------------
197 ; Setup for loop: _loop_count iterations = (int) xright - (int) xleft
198 ; esi source pixel pointer = pixptr
199 ; edi initial row pointer = y*320+x
200 ; NOTE: fx_xright and fx_xleft changed from fix to int by mk on 12/01/94.
202 ; set esi = pointer to start of texture map data
204 ; set edi = address of first pixel to modify
206 ; mov es,[_pixel_data_selector] ; selector[0*2]
208 mov edi,[_y_pointers+edi*4]
214 ebx_ok: add edi,[_write_buffer]
217 ; set _loop_count = # of iterations
221 mov [_loop_count],eax
223 ; lighting values are passed in fixed point, but need to be in 8 bit integer, 8 bit fraction so we can easily
224 ; get the integer by reading %bh
226 sar dword [_fx_dl_dx],8
228 inc dword [_fx_dl_dx] ; round towards 0 for negative deltas
236 test dword [_per2_flag],-1
239 test dword [_Lighting_on], -1
240 je near _tmap_loop_fast_nolight
242 ;tmap_loop_fast_nolight_jumper:
243 ; jmp tmap_loop_fast_nolight
245 ;================ PERSPECTIVE TEXTURE MAP INNER LOOPS ========================
247 ; Usage in loop: eax division, pixel value
252 ; esi source pixel pointer
253 ; edi destination pixel pointer
255 ;-------------------- NORMAL PERSPECTIVE TEXTURE MAP LOOP -----------------
257 mov esi, ebx ; esi becomes u coordinate
262 ; compute v coordinate
266 idiv ecx ; eax = (v/z)
268 and eax,3fh ; mask with height-1
271 ; compute u coordinate
275 idiv ecx ; eax = (u/z)
278 shld ebx,eax,6 ; esi = v*64+u
283 test dword [_Lighting_on], -1
284 mov al, [ebx] ; get pixel from source bitmap
288 mov ebx, [_fx_l] ; get temp copy of lighting value
289 mov ah, bh ; get lighting level
290 add ebx, [_fx_dl_dx] ; update lighting value
291 mov al, [_gr_fade_table+eax] ; xlat pixel thru lighting tables
292 mov [_fx_l], ebx ; save temp copy of lighting value
305 je _div_0_abort ; would be dividing by 0, so abort
307 dec dword [_loop_count]
315 ; We detected a z=0 condition, which seems pretty bogus, don't you think?
316 ; So, we abort, but maybe we want to know about it.
320 ;-------------------------- PER/4 TMAPPER ----------------
323 ; U0 = u/w; V0 = v/w;
325 ; u += du_dx*4; v+= dv_dx*4
326 ; U1 = u/w; V1 = v/w;
327 ; DUDX = (U1-U0)/4; DVDX = (V1-V0)/4;
330 ; pixels = texmap[V0*64+U0];
331 ; U0 += DUDX; V0 += DVDX
333 ; pixels = (pixels<<8)+texmap[V0*64+U0];
334 ; U0 += DUDX; V0 += DVDX
336 ; pixels = (pixels<<8)+texmap[V0*64+U0];
337 ; U0 += DUDX; V0 += DVDX
339 ; pixels = (pixels<<8)+texmap[V0*64+U0];
345 NBITS equ 4 ; 2^NBITS pixels plotted per divide
346 ZSHIFT equ 4 ; precision used in PDIV macro
350 ; Returns EAX/ECX in 16.16 format in EAX. Trashes EDX
355 ; idiv ecx ; eax = (v/z)
359 global _tmap_loop_fast
361 ; -------------------------------------- Start of Getting Dword Aligned ----------------------------------------------
372 ; compute v coordinate
376 idiv ecx ; eax = (v/z)
378 and eax,3fh ; mask with height-1
381 ; compute u coordinate
385 idiv ecx ; eax = (u/z)
388 shld ebx,eax,6 ; esi = v*64+u
393 mov al, [ebx] ; get pixel from source bitmap
396 mov ebx, [_fx_l] ; get temp copy of lighting value
397 mov ah, bh ; get lighting level
398 add ebx, [_fx_dl_dx] ; update lighting value
399 mov [_fx_l], ebx ; save temp copy of lighting value
403 je skip2 ; this pixel is transparent, so don't write it (or light it)
405 mov al, [_gr_fade_table+eax] ; xlat pixel thru lighting tables
415 je _div_0_abort ; would be dividing by 0, so abort
417 dec dword [_loop_count]
422 ; -------------------------------------- End of Getting Dword Aligned ----------------------------------------------
426 mov eax, [_loop_count]
427 mov ebx, esi ; get fx_u [pentium pipelining]
430 and esi, (1 << NBITS) - 1
432 mov [num_left_over], esi
433 je near tmap_loop ; there are no 2^NBITS chunks, do divide/pixel for whole scanline
434 mov [_loop_count], eax ; _loop_count = pixels / NPIXS
436 ; compute initial v coordinate
441 idiv ecx ; eax = (v/z)
445 ; compute initial u coordinate
450 idiv ecx ; eax = (v/z)
454 ; Set deltas to NPIXS pixel increments
470 je near _div_0_abort ; would be dividing by 0, so abort
472 ; Done with ebx, ebp, ecx until next iteration
483 idiv ecx ; eax = (v/z)
485 mov ebx, eax ; ebx = U1 until pop's
492 idiv ecx ; eax = (v/z)
494 mov ecx, [U0] ; ecx = U0 until pop's
495 mov edi, [V0] ; edi = V0 until pop's
498 mov ebp, eax ; ebp = V1 until pop's
500 ; Make ESI = V0:U0 in 6:10,6:10 format
507 ; Make EDX = DV:DU in 6:10,6:10 format
513 shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac
514 mov dx, ax ; put delta u in low word
516 ; Save the U1 and V1 so we don't have to divide on the next iteration
520 pop edi ; Restore EDI before using it
526 test dword [_Transparency_on],-1
530 mov eax, esi ; get u,v
531 shr eax, 26 ; shift out all but int(v)
532 shld ax,si,6 ; shift in u, shifting up v
533 add esi, edx ; inc u,v
535 movzx eax, byte [eax] ; get pixel from source bitmap
538 mov ah, bh ; form lighting table lookup value
539 add ebx, ebp ; update lighting value
540 mov al, [_gr_fade_table+eax] ; xlat thru lighting table into dest buffer
546 mov eax, esi ; get u,v
547 shr eax, 26 ; shift out all but int(v)
548 shld ax,si,6 ; shift in u, shifting up v
549 add esi, edx ; inc u,v
551 movzx eax, byte [eax] ; get pixel from source bitmap
554 mov ah, bh ; form lighting table lookup value
555 add ebx, ebp ; update lighting value
556 mov al, [_gr_fade_table+eax] ; xlat thru lighting table into dest buffer
563 %rep (2 << (NBITS-2))
564 ; local skip3,no_trans1
565 ; local skipa1,skipa2
571 ; -------------------------------------------------------
575 mov eax, esi ; get u,v
576 shr eax, 26 ; shift out all but int(v)
577 shld ax,si,6 ; shift in u, shifting up v
578 add esi, edx ; inc u,v
580 movzx eax, byte [eax] ; get pixel from source bitmap
581 mov ah, bh ; form lighting table lookup value
582 add ebx, ebp ; update lighting value
583 mov cl, [_gr_fade_table+eax] ; xlat thru lighting table into dest buffer
586 mov eax, esi ; get u,v
587 shr eax, 26 ; shift out all but int(v)
588 shld ax,si,6 ; shift in u, shifting up v
589 add esi, edx ; inc u,v
591 movzx eax, byte [eax] ; get pixel from source bitmap
592 mov ah, bh ; form lighting table lookup value
593 add ebx, ebp ; update lighting value
594 mov ch, [_gr_fade_table+eax] ; xlat thru lighting table into dest buffer
596 ; ----- This is about 1% faster than the above, and could probably be optimized more.
597 ; ----- Problem is, it gets the u,v coordinates backwards. What you would need to do
598 ; ----- is switch the packing of the u,v coordinates above (about 95 lines up).
599 ;----------; mov eax, esi
600 ;----------; shr ax, 10
601 ;----------; rol eax, 6
602 ;----------; mov dx, ax
603 ;----------; add esi, mem_edx
604 ;----------; mov dl, es:[edx]
605 ;----------; mov dh, bh
606 ;----------; add ebx, ebp
607 ;----------; mov cl, _gr_fade_table[edx]
609 ;----------; mov eax, esi
610 ;----------; shr ax, 10
611 ;----------; rol eax, 6
612 ;----------; mov dx, ax
613 ;----------; add esi, mem_edx
614 ;----------; mov dl, es:[edx]
615 ;----------; mov dh, bh
616 ;----------; add ebx, ebp
617 ;----------; mov ch, _gr_fade_table[edx]
619 ror ecx, 16 ; move to next double dest pixel position
622 %rep (1 << (NBITS-2))
627 mov [edi],ecx ; Draw 4 pixels to display
633 ; -------------------------------------------------------
640 dec dword [_loop_count]
644 test dword [num_left_over], -1
647 ; ----------------------------------------- Start of LeftOver Pixels ------------------------------------------
664 test dword [_new_end],-1
665 jne near NewDoEndPixels
675 ; this can happen because we added DZ1 to the current z, but dz1 represents dz for perhaps 16 pixels
676 ; though we might only plot one more pixel.
679 dep_loop: mov eax, [DU1]
698 push edi ; use edi as a temporary variable
700 cmp ecx,1 << (ZSHIFT+1)
702 mov ecx, 1 << (ZSHIFT+1)
711 idiv ecx ; eax = (v/z)
714 mov ebx, eax ; ebx = U1 until pop's
722 idiv ecx ; eax = (v/z)
725 mov ebp, eax ; ebp = V1 until pop's
727 mov ecx, [U0] ; ecx = U0 until pop's
728 mov edi, [V0] ; edi = V0 until pop's
730 ; Make ESI = V0:U0 in 6:10,6:10 format
737 ; Make EDX = DV:DU in 6:10,6:10 format
743 shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac
744 mov dx, ax ; put delta u in low word
746 pop edi ; Restore EDI before using it
748 mov ecx, [num_left_over]
758 mov eax, esi ; get u,v
759 shr eax, 26 ; shift out all but int(v)
760 shld ax,si,6 ; shift in u, shifting up v
762 movzx eax, byte [eax] ; get pixel from source bitmap
763 add esi, edx ; inc u,v
764 mov ah, bh ; form lighting table lookup value
765 add ebx, ebp ; update lighting value
768 mov al, [_gr_fade_table+eax] ; xlat thru lighting table into dest buffer
769 mov [edi+ITERATION], al ; write pixel
774 mov eax, esi ; get u,v
775 shr eax, 26 ; shift out all but int(v)
776 shld ax,si,6 ; shift in u, shifting up v
778 movzx eax, byte [eax] ; get pixel from source bitmap
779 add esi, edx ; inc u,v
780 mov ah, bh ; form lighting table lookup value
781 add ebx, [_fx_dl_dx] ; update lighting value
784 mov al, [_gr_fade_table+eax] ; xlat thru lighting table into dest buffer
785 mov [edi+ITERATION+1], al ; write pixel
790 %rep (1 << (NBITS-1))
793 %assign ITERATION ITERATION + 2
797 ; Should never get here!!!!
801 ; ----------------------------------------- End of LeftOver Pixels ------------------------------------------
803 ; --BUGGY NEW--NewDoEndPixels:
804 ; --BUGGY NEW-- mov eax, num_left_over
805 ; --BUGGY NEW-- and num_left_over, 3
806 ; --BUGGY NEW-- shr eax, 2
807 ; --BUGGY NEW-- je NDEP_1
808 ; --BUGGY NEW-- mov _loop_count, eax
810 ; --BUGGY NEW--; do 4 pixels per hunk, not 16, so div deltas by 4 (16/4=4)
811 ; --BUGGY NEW-- shr DU1,2
812 ; --BUGGY NEW-- shr DV1,2
813 ; --BUGGY NEW-- shr DZ1,2
815 ; --BUGGY NEW--NDEP_TopOfLoop4:
816 ; --BUGGY NEW-- add ebx, DU1
817 ; --BUGGY NEW-- add ebp, DV1
818 ; --BUGGY NEW-- add ecx, DZ1
819 ; --BUGGY NEW-- je _div_0_abort ; would be dividing by 0, so abort
821 ; --BUGGY NEW--; Done with ebx, ebp, ecx until next iteration
822 ; --BUGGY NEW-- push ebx
823 ; --BUGGY NEW-- push ecx
824 ; --BUGGY NEW-- push ebp
825 ; --BUGGY NEW-- push edi
827 ; --BUGGY NEW--; Find fixed U1
828 ; --BUGGY NEW-- mov eax, ebx
829 ; --BUGGY NEW-- mov edx,ebx
830 ; --BUGGY NEW-- shl eax,(ZSHIFT-2)
831 ; --BUGGY NEW-- sar edx,32-(ZSHIFT-2)
832 ; --BUGGY NEW-- idiv ecx ; eax = (v/z)
833 ; --BUGGY NEW-- shl eax, 16-(ZSHIFT-2)
834 ; --BUGGY NEW-- mov ebx, eax ; ebx = U1 until pop's
836 ; --BUGGY NEW--; Find fixed V1
837 ; --BUGGY NEW-- mov eax, ebp
838 ; --BUGGY NEW-- mov edx, ebp
839 ; --BUGGY NEW-- shl eax,(ZSHIFT-2)
840 ; --BUGGY NEW-- sar edx,32-(ZSHIFT-2)
841 ; --BUGGY NEW-- idiv ecx ; eax = (v/z)
843 ; --BUGGY NEW-- mov ecx, U0 ; ecx = U0 until pop's
844 ; --BUGGY NEW-- mov edi, V0 ; edi = V0 until pop's
846 ; --BUGGY NEW-- shl eax, 16-(ZSHIFT-2)
847 ; --BUGGY NEW-- mov ebp, eax ; ebp = V1 until pop's
849 ; --BUGGY NEW--; Make ESI = V0:U0 in 6:10,6:10 format
850 ; --BUGGY NEW-- mov eax, ecx
851 ; --BUGGY NEW-- shr eax, 6
852 ; --BUGGY NEW-- mov esi, edi
853 ; --BUGGY NEW-- shl esi, 10
854 ; --BUGGY NEW-- mov si, ax
856 ; --BUGGY NEW--; Make EDX = DV:DU in 6:10,6:10 format
857 ; --BUGGY NEW-- mov eax, ebx
858 ; --BUGGY NEW-- sub eax, ecx
859 ; --BUGGY NEW-- sar eax, (NBITS-2)+6
860 ; --BUGGY NEW-- mov edx, ebp
861 ; --BUGGY NEW-- sub edx, edi
862 ; --BUGGY NEW-- shl edx, 10-(NBITS-2) ; EDX = V1-V0/ 4 in 6:10 int:frac
863 ; --BUGGY NEW-- mov dx, ax ; put delta u in low word
865 ; --BUGGY NEW--; Save the U1 and V1 so we don't have to divide on the next iteration
866 ; --BUGGY NEW-- mov U0, ebx
867 ; --BUGGY NEW-- mov V0, ebp
869 ; --BUGGY NEW-- pop edi ; Restore EDI before using it
871 ; --BUGGY NEW--; LIGHTING CODE
872 ; --BUGGY NEW-- mov ebx, _fx_l
873 ; --BUGGY NEW-- mov ebp, _fx_dl_dx
875 ; --BUGGY NEW--;** test _Transparency_on,-1
876 ; --BUGGY NEW--;** je NDEP_no_trans1
878 ; --BUGGY NEW-- REPT 2
879 ; --BUGGY NEW-- local NDEP_skipa1, NDEP_skipa2
881 ; --BUGGY NEW-- mov eax, esi ; get u,v
882 ; --BUGGY NEW-- shr eax, 26 ; shift out all but int(v)
883 ; --BUGGY NEW-- shld ax,si,6 ; shift in u, shifting up v
884 ; --BUGGY NEW-- add esi, edx ; inc u,v
885 ; --BUGGY NEW-- mov al, es:[eax] ; get pixel from source bitmap
886 ; --BUGGY NEW-- cmp al,255
887 ; --BUGGY NEW-- je NDEP_skipa1
888 ; --BUGGY NEW-- mov ah, bh ; form lighting table lookup value
889 ; --BUGGY NEW-- add ebx, ebp ; update lighting value
890 ; --BUGGY NEW-- mov al, _gr_fade_table[eax] ; xlat thru lighting table into dest buffer
891 ; --BUGGY NEW-- mov [edi],al
892 ; --BUGGY NEW--NDEP_skipa1:
893 ; --BUGGY NEW-- inc edi
895 ; --BUGGY NEW--; Do odd pixel
896 ; --BUGGY NEW-- mov eax, esi ; get u,v
897 ; --BUGGY NEW-- shr eax, 26 ; shift out all but int(v)
898 ; --BUGGY NEW-- shld ax,si,6 ; shift in u, shifting up v
899 ; --BUGGY NEW-- add esi, edx ; inc u,v
900 ; --BUGGY NEW-- mov al, es:[eax] ; get pixel from source bitmap
901 ; --BUGGY NEW-- cmp al,255
902 ; --BUGGY NEW-- je NDEP_skipa2
903 ; --BUGGY NEW-- mov ah, bh ; form lighting table lookup value
904 ; --BUGGY NEW-- add ebx, ebp ; update lighting value
905 ; --BUGGY NEW-- mov al, _gr_fade_table[eax] ; xlat thru lighting table into dest buffer
906 ; --BUGGY NEW-- mov [edi],al
907 ; --BUGGY NEW--NDEP_skipa2:
908 ; --BUGGY NEW-- inc edi
912 ; --BUGGY NEW-- mov _fx_l, ebx
913 ; --BUGGY NEW-- pop ebp
914 ; --BUGGY NEW-- pop ecx
915 ; --BUGGY NEW-- pop ebx
916 ; --BUGGY NEW-- dec _loop_count
917 ; --BUGGY NEW-- jnz NDEP_TopOfLoop4
919 ; --BUGGY NEW-- test num_left_over, -1
920 ; --BUGGY NEW-- je _none_to_do
922 ; --BUGGY NEW--NDEP_1:
923 ; --BUGGY NEW-- mov esi,ebx
925 ; --BUGGY NEW-- align 4
926 ; --BUGGY NEW--NDEP_loop:
928 ; --BUGGY NEW--; compute v coordinate
929 ; --BUGGY NEW-- mov eax, ebp ; get v
930 ; --BUGGY NEW-- mov edx, eax
931 ; --BUGGY NEW-- sar edx, 31
932 ; --BUGGY NEW-- idiv ecx ; eax = (v/z)
934 ; --BUGGY NEW-- and eax,3fh ; mask with height-1
935 ; --BUGGY NEW-- mov ebx,eax
937 ; --BUGGY NEW--; compute u coordinate
938 ; --BUGGY NEW-- mov eax, esi ; get u
939 ; --BUGGY NEW-- mov edx, eax
940 ; --BUGGY NEW-- sar edx, 31
941 ; --BUGGY NEW-- idiv ecx ; eax = (u/z)
943 ; --BUGGY NEW-- shl eax,26
944 ; --BUGGY NEW-- shld ebx,eax,6 ; esi = v*64+u
946 ; --BUGGY NEW--; read 1 pixel
947 ; --BUGGY NEW-- xor eax, eax
948 ; --BUGGY NEW-- mov al, es:[ebx] ; get pixel from source bitmap
950 ; --BUGGY NEW--; lighting code
951 ; --BUGGY NEW-- mov ebx, _fx_l ; get temp copy of lighting value
952 ; --BUGGY NEW-- mov ah, bh ; get lighting level
953 ; --BUGGY NEW-- add ebx, _fx_dl_dx ; update lighting value
954 ; --BUGGY NEW-- mov _fx_l, ebx ; save temp copy of lighting value
956 ; --BUGGY NEW--; transparency check
957 ; --BUGGY NEW-- cmp al,255
958 ; --BUGGY NEW-- je NDEP_skip2 ; this pixel is transparent, so don't write it (or light it)
960 ; --BUGGY NEW-- mov al, _gr_fade_table[eax] ; xlat pixel thru lighting tables
962 ; --BUGGY NEW--; write 1 pixel
963 ; --BUGGY NEW-- mov [edi],al
964 ; --BUGGY NEW--NDEP_skip2: inc edi
966 ; --BUGGY NEW--; update deltas
967 ; --BUGGY NEW-- add ebp,_fx_dv_dx
968 ; --BUGGY NEW-- add esi,_fx_du_dx
969 ; --BUGGY NEW-- add ecx,_fx_dz_dx
970 ; --BUGGY NEW-- je _div_0_abort ; would be dividing by 0, so abort
972 ; --BUGGY NEW-- dec num_left_over
973 ; --BUGGY NEW-- jne NDEP_loop
975 ; --BUGGY NEW-- jmp _none_to_do
983 ; compute v coordinate
987 idiv ecx ; eax = (v/z)
989 and eax,3fh ; mask with height-1
992 ; compute u coordinate
996 idiv ecx ; eax = (u/z)
999 shld ebx,eax,6 ; esi = v*64+u
1004 mov al, [ebx] ; get pixel from source bitmap
1007 mov ebx, [_fx_l] ; get temp copy of lighting value
1008 mov ah, bh ; get lighting level
1009 add ebx, [_fx_dl_dx] ; update lighting value
1010 mov [_fx_l], ebx ; save temp copy of lighting value
1012 ; transparency check
1014 je NDEP_skip2 ; this pixel is transparent, so don't write it (or light it)
1016 mov al, [_gr_fade_table+eax] ; xlat pixel thru lighting tables
1026 je near _div_0_abort ; would be dividing by 0, so abort
1028 dec dword [num_left_over]
1033 ; ==================================================== No Lighting Code ======================================================
1034 global _tmap_loop_fast_nolight
1035 _tmap_loop_fast_nolight:
1039 NotDwordAligned1_nolight:
1041 jz DwordAligned1_nolight
1043 ; compute v coordinate
1047 idiv ecx ; eax = (v/z)
1049 and eax,3fh ; mask with height-1
1052 ; compute u coordinate
1053 mov eax, esi ; get u
1056 idiv ecx ; eax = (u/z)
1059 shld ebx,eax,6 ; esi = v*64+u
1063 mov al,[ebx] ; get pixel from source bitmap
1075 je near _div_0_abort ; would be dividing by 0, so abort
1077 dec dword [_loop_count]
1078 jns NotDwordAligned1_nolight
1081 DwordAligned1_nolight:
1084 mov eax, [_loop_count]
1086 mov [num_left_over], eax
1090 je near tmap_loop ; no 2^NBITS chunks, do divide/pixel for whole scanline
1092 mov [_loop_count], eax ; _loop_count = pixels / NPIXS
1094 sub [num_left_over], eax ; num_left_over = obvious
1096 ; compute initial v coordinate
1102 idiv ecx ; eax = (v/z)
1107 ; compute initial u coordinate
1113 idiv ecx ; eax = (v/z)
1118 ; Set deltas to NPIXS pixel increments
1119 mov eax, [_fx_du_dx]
1122 mov eax, [_fx_dv_dx]
1125 mov eax, [_fx_dz_dx]
1134 je near _div_0_abort
1136 ; Done with ebx, ebp, ecx until next iteration
1148 idiv ecx ; eax = (v/z)
1151 mov ebx, eax ; ebx = U1 until pop's
1159 idiv ecx ; eax = (v/z)
1162 mov ebp, eax ; ebp = V1 until pop's
1164 mov ecx, [U0] ; ecx = U0 until pop's
1165 mov edi, [V0] ; edi = V0 until pop's
1167 ; Make ESI = V0:U0 in 6:10,6:10 format
1174 ; Make EDX = DV:DU in 6:10,6:10 format
1180 shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac
1181 mov dx, ax ; put delta u in low word
1183 ; Save the U1 and V1 so we don't have to divide on the next iteration
1187 pop edi ; Restore EDI before using it
1191 mov eax, esi ; get u,v
1192 shr eax, 26 ; shift out all but int(v)
1193 shld ax,si,6 ; shift in u, shifting up v
1194 add esi, edx ; inc u,v
1196 mov cl, [eax] ; load into buffer register
1198 mov eax, esi ; get u,v
1199 shr eax, 26 ; shift out all but int(v)
1200 shld ax,si,6 ; shift in u, shifting up v
1202 mov ch, [eax] ; load into buffer register
1203 add esi, edx ; inc u,v
1204 ror ecx, 16 ; move to next dest pixel
1206 mov eax, esi ; get u,v
1207 shr eax, 26 ; shift out all but int(v)
1208 shld ax,si,6 ; shift in u, shifting up v
1210 mov cl, [eax] ; load into buffer register
1211 add esi, edx ; inc u,v
1213 mov eax, esi ; get u,v
1214 shr eax, 26 ; shift out all but int(v)
1215 shld ax,si,6 ; shift in u, shifting up v
1217 mov ch, [eax] ; load into buffer register
1218 add esi, edx ; inc u,v
1219 ror ecx, 16 ;-- can get rid of this, just write in different order below -- ; move to next dest pixel
1221 test dword [_Transparency_on],-1
1250 mov [edi],ecx ; Draw 4 pixels to display
1254 %rep (1 << (NBITS-2))
1255 ;local skip7, no_trans2, skip1q, skip2q, skip3q, skip4q
1263 dec dword [_loop_count]
1264 jnz near TopOfLoop4_nolight
1268 test dword [num_left_over], -1
1271 DoEndPixels_nolight:
1275 je near _div_0_abort
1276 push edi ; use edi as a temporary variable
1283 idiv ecx ; eax = (v/z)
1285 mov ebx, eax ; ebx = U1 until pop's
1292 idiv ecx ; eax = (v/z)
1294 mov ebp, eax ; ebp = V1 until pop's
1296 mov ecx, [U0] ; ecx = U0 until pop's
1297 mov edi, [V0] ; edi = V0 until pop's
1299 ; Make ESI = V0:U0 in 6:10,6:10 format
1306 ; Make EDX = DV:DU in 6:10,6:10 format
1312 shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac
1313 mov dx, ax ; put delta u in low word
1315 pop edi ; Restore EDI before using it
1317 mov ecx, [num_left_over]
1322 mov eax, esi ; get u,v
1323 shr eax, 26 ; shift out all but int(v)
1324 shld ax,si,6 ; shift in u, shifting up v
1326 movzx eax, byte [eax] ; load into buffer register
1327 add esi, edx ; inc u,v
1330 mov [edi+ITERATION], al ; write pixel
1338 %assign ITERATION ITERATION + 1
1341 ; Should never get here!!!!!