2 * $Logfile: /Freespace2/code/Graphics/TmapScanTiled128x128.cpp $
7 * Routines for drawing tiled 128x128 textues
10 * Revision 1.1 2002/05/03 03:28:09 root
14 * 4 11/30/98 5:31p Dave
15 * Fixed up Fred support for software mode.
17 * 3 11/30/98 1:07p Dave
18 * 16 bit conversion, first run.
20 * 2 10/07/98 10:53a Dave
23 * 1 10/07/98 10:49a Dave
25 * 9 4/23/98 9:55a John
26 * Fixed some bugs in the tiled tmapper causing bright dots to appear all
29 * 8 3/10/98 4:19p John
30 * Cleaned up graphics lib. Took out most unused gr functions. Made D3D
31 * & Glide have popups and print screen. Took out all >8bpp software
32 * support. Made Fred zbuffer. Made zbuffer allocate dynamically to
33 * support Fred. Made zbuffering key off of functions rather than one
36 * 7 1/27/98 5:13p John
37 * Moved all float to int conversions out of inner loops and into outer.
38 * Made outer loop use FISTP instead of ftol, saved about 10%.
40 * 6 1/23/98 5:08p John
41 * Took L out of vertex structure used B (blue) instead. Took all small
42 * fireballs out of fireball types and used particles instead. Fixed some
43 * debris explosion things. Restructured fireball code. Restructured
44 * some lighting code. Made dynamic lighting on by default. Made groups
45 * of lasers only cast one light. Made fireballs not cast light.
47 * 5 12/04/97 10:38a John
48 * Fixed tiled texture mappers that were swapping uvs.
50 * 4 10/14/97 9:19a John
51 * removed fdiv warnings.
53 * 3 6/02/97 11:45a John
54 * fixed bugs with 64x64 and 128x128 tmappers.
56 * 2 5/12/97 12:27p John
57 * Restructured Graphics Library to add support for multiple renderers.
59 * 1 4/24/97 4:42p John
60 * Initial version of the tiled texture mappers for 64 & 128 wide
69 #include "grinternal.h"
71 #include "tmapscanline.h"
76 // Needed to keep warning 4725 to stay away. See PsTypes.h for details why.
77 void disable_warning_4725_stub_tst128()
82 void tmapscan_pln8_zbuffered_tiled_128x128()
97 // Put the FPU in low precision mode
98 fstcw Tmap.OldFPUCW // store copy of CW
99 mov ax,Tmap.OldFPUCW // get it in ax
101 mov Tmap.FPUCW,ax // store it
102 fldcw Tmap.FPUCW // load the FPU
105 mov ecx, Tmap.loop_count // ecx = width
106 mov edi, Tmap.dest_row_data // edi = dest pointer
108 // edi = pointer to start pixel in dest dib
111 mov eax,ecx // eax and ecx = width
112 shr ecx,5 // ecx = width / subdivision length
113 and eax,31 // eax = width mod subdivision length
114 jnz some_left_over // any leftover?
115 dec ecx // no, so special case last span
116 mov eax,32 // it's 8 pixels long
118 mov Tmap.Subdivisions,ecx // store widths
119 mov Tmap.WidthModLength,eax
121 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
122 // st0 st1 st2 st3 st4 st5 st6 st7
124 fld Tmap.l.u // U/ZL V/ZL
125 fld Tmap.l.sw // 1/ZL U/ZL V/ZL
126 fld1 // 1 1/ZL U/ZL V/ZL
127 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
128 fld st // ZL ZL 1/ZL U/ZL V/ZL
129 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
130 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
131 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
133 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
134 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
136 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
138 fadd Tmap.fl_dwdx_wide // 1/ZR U/ZL V/ZL UL VL
139 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
140 fadd Tmap.fl_dudx_wide // U/ZR 1/ZR V/ZL UL VL
141 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
142 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZR U/ZR UL VL
144 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
146 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
147 // @todo overlap this guy
148 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
149 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
150 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
151 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
152 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
154 cmp ecx,0 // check for any full spans
155 jle HandleLeftoverPixels
159 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
160 // UR VR V/ZR 1/ZR U/ZR UL VL
162 // convert left side coords
164 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
165 fmul Tmap.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
166 fistp Tmap.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
168 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
169 fmul Tmap.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
170 fistp Tmap.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
172 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
174 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
175 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
176 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
177 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
179 fmul Tmap.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
180 fistp Tmap.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
182 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
183 fmul Tmap.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
184 fistp Tmap.DeltaU ; V/ZR 1/ZR U/ZR UR VR
186 // increment terms for next span // st0 st1 st2 st3 st4 st5 st6 st7
187 // Right terms become Left terms--->// V/ZL 1/ZL U/ZL UL VL
189 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZL U/ZL UL VL
190 fxch st(1) // 1/ZL V/ZR U/ZL UL VL
191 fadd Tmap.fl_dwdx_wide // 1/ZR V/ZR U/ZL UL VL
192 fxch st(2) // U/ZL V/ZR 1/ZR UL VL
193 fadd Tmap.fl_dudx_wide // U/ZR V/ZR 1/ZR UL VL
194 fxch st(2) // 1/ZR V/ZR U/ZR UL VL
195 fxch st(1) // V/ZR 1/ZR U/ZR UL VL
198 // setup delta values
200 mov eax,Tmap.DeltaV // get v 16.16 step
201 mov ebx,eax // copy it
202 sar eax,16 // get v int step
203 shl ebx,16 // get v frac step
204 mov Tmap.DeltaVFrac,ebx // store it
205 imul eax,Tmap.src_offset // calculate texture step for v int step
207 mov ebx,Tmap.DeltaU // get u 16.16 step
208 mov ecx,ebx // copy it
209 sar ebx,16 // get u int step
210 shl ecx,16 // get u frac step
211 mov Tmap.DeltaUFrac,ecx // store it
212 add eax,ebx // calculate uint + vint step
213 mov Tmap.uv_delta[4],eax // save whole step in non-v-carry slot
214 add eax,Tmap.src_offset // calculate whole step + v carry
215 mov Tmap.uv_delta[0],eax // save in v-carry slot
217 // setup initial coordinates
218 mov esi,Tmap.UFixed // get u 16.16 fixedpoint coordinate
220 mov ebx,esi // copy it
221 sar esi,16 // get integer part
222 shl ebx,16 // get fractional part
224 mov ecx,Tmap.VFixed // get v 16.16 fixedpoint coordinate
226 mov edx,ecx // copy it
227 sar edx,16 // get integer part
228 shl ecx,16 // get fractional part
229 imul edx,Tmap.src_offset // calc texture scanline address
230 add esi,edx // calc texture offset
231 add esi,Tmap.pixptr // calc address
233 // set up affine registers
239 mov ebp, Tmap.fx_dl_dx
250 // calculate right side coords st0 st1 st2 st3 st4 st5 st6 st7
251 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
252 // This divide should happen while the pixel span is drawn.
253 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
257 // edi = dest dib bits at current pixel
258 // esi = texture pointer at current u,v
260 // ebx = u fraction 0.32
261 // ecx = v fraction 0.32
263 // ebp = v carry scratch
265 mov al,[edi] // preread the destination cache line
267 mov Tmap.InnerLooper, 32/4 // Set up loop counter
272 sub eax, Tmap.pScreenBits
276 // Make ESI = DV:DU in 7:9,7:9 format
282 mov Tmap.DeltaUFrac, esi
284 // Make ECX = V:U in 7:9,7:9 format
295 // ecx = V:U in 7.9:7.9
296 // edx = zbuffer pointer
305 cmp esi, [edx+0] // Compare the Z depth of this pixel with zbuffer
306 jle Skip0 // If pixel is covered, skip drawing
308 mov [edx+0], esi // Write z
310 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
311 shr ax, 9 // EAX = V:U in 7.9:16.0
312 rol eax, 7 // EAX = V:U in 0.0:7:7
313 and eax, 03fffh // clear upper bits
314 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
318 and eax, 0ffffh // clear upper bits
319 mov al, gr_fade_table[eax]
322 add ecx, Tmap.DeltaUFrac
323 add esi, Tmap.fx_dwdx
327 cmp esi, [edx+4] // Compare the Z depth of this pixel with zbuffer
328 jle Skip1 // If pixel is covered, skip drawing
330 mov [edx+4], esi // Write z
332 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
333 shr ax, 9 // EAX = V:U in 7.9:16.0
334 rol eax, 7 // EAX = V:U in 0.0:7:7
335 and eax, 03fffh // clear upper bits
336 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
340 and eax, 0ffffh // clear upper bits
341 mov al, gr_fade_table[eax]
344 add ecx, Tmap.DeltaUFrac
345 add esi, Tmap.fx_dwdx
349 cmp esi, [edx+8] // Compare the Z depth of this pixel with zbuffer
350 jle Skip2 // If pixel is covered, skip drawing
352 mov [edx+8], esi // Write z
354 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
355 shr ax, 9 // EAX = V:U in 7.9:16.0
356 rol eax, 7 // EAX = V:U in 0.0:7:7
357 and eax, 03fffh // clear upper bits
358 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
362 and eax, 0ffffh // clear upper bits
363 mov al, gr_fade_table[eax]
366 add ecx, Tmap.DeltaUFrac
367 add esi, Tmap.fx_dwdx
371 cmp esi, [edx+12] // Compare the Z depth of this pixel with zbuffer
372 jle Skip3 // If pixel is covered, skip drawing
374 mov [edx+12], esi // Write z
376 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
377 shr ax, 9 // EAX = V:U in 7.9:16.0
378 rol eax, 7 // EAX = V:U in 0.0:7:7
379 and eax, 03fffh // clear upper bits
380 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
384 and eax, 0ffffh // clear upper bits
385 mov al, gr_fade_table[eax]
388 add ecx, Tmap.DeltaUFrac
389 add esi, Tmap.fx_dwdx
400 // the fdiv is done, finish right // st0 st1 st2 st3 st4 st5 st6 st7
401 // ZR V/ZR 1/ZR U/ZR UL VL
403 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
404 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
405 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
406 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
408 dec Tmap.Subdivisions // decrement span count
409 jnz SpanLoop // loop back
412 HandleLeftoverPixels:
414 mov esi,Tmap.pixptr // load texture pointer
416 // edi = dest dib bits
417 // esi = current texture dib bits
418 // at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
419 // inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
421 cmp Tmap.WidthModLength,0 ; are there remaining pixels to draw?
422 jz FPUReturn ; nope, pop the FPU and bail
424 // convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
426 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
427 fmul Tmap.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
428 fistp Tmap.UFixed ; inv. inv. inv. inv. inv. UL VL
430 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
431 fmul Tmap.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
432 fistp Tmap.VFixed ; inv. inv. inv. inv. inv. UL VL
434 dec Tmap.WidthModLength ; calc how many steps to take
435 jz OnePixelSpan ; just one, don't do deltas'
437 // calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
440 // @todo rearrange things so we don't need these two instructions
441 fstp Tmap.FloatTemp ; inv. inv. inv. inv. UL VL
442 fstp Tmap.FloatTemp ; inv. inv. inv. UL VL
444 fld Tmap.r.v ; V/Zr inv. inv. inv. UL VL
445 fsub Tmap.deltas.v ; V/ZR inv. inv. inv. UL VL
446 fld Tmap.r.u ; U/Zr V/ZR inv. inv. inv. UL VL
447 fsub Tmap.deltas.u ; U/ZR V/ZR inv. inv. inv. UL VL
448 fld Tmap.r.sw ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
449 fsub Tmap.deltas.sw ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
451 fdivr Tmap.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
453 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
454 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
456 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
458 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
459 fxch st(1) ; VR UR inv. inv. inv. dU VL
460 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
461 fxch st(6) ; dV UR inv. inv. inv. dU VR
463 fidiv Tmap.WidthModLength ; dv UR inv. inv. inv. dU VR
464 fmul Tmap.FixedScale ; dv16 UR inv. inv. inv. dU VR
465 fistp Tmap.DeltaV ; UR inv. inv. inv. dU VR
467 fxch st(4) ; dU inv. inv. inv. UR VR
468 fidiv Tmap.WidthModLength ; du inv. inv. inv. UR VR
469 fmul Tmap.FixedScale ; du16 inv. inv. inv. UR VR
470 fistp Tmap.DeltaU ; inv. inv. inv. UR VR
472 // @todo gross! these are to line up with the other loop
473 fld st(1) ; inv. inv. inv. inv. UR VR
474 fld st(2) ; inv. inv. inv. inv. inv. UR VR
477 // setup delta values
478 mov eax, Tmap.DeltaV // get v 16.16 step
479 mov ebx, eax // copy it
480 sar eax, 16 // get v int step
481 shl ebx, 16 // get v frac step
482 mov Tmap.DeltaVFrac, ebx // store it
483 imul eax, Tmap.src_offset // calc texture step for v int step
485 mov ebx, Tmap.DeltaU // get u 16.16 step
486 mov ecx, ebx // copy it
487 sar ebx, 16 // get the u int step
488 shl ecx, 16 // get the u frac step
489 mov Tmap.DeltaUFrac, ecx // store it
490 add eax, ebx // calc uint + vint step
491 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
492 add eax, Tmap.src_offset // calc whole step + v carry
493 mov Tmap.uv_delta[0], eax // save in v-carry slot
498 ; setup initial coordinates
499 mov esi, Tmap.UFixed // get u 16.16
500 mov ebx, esi // copy it
501 sar esi, 16 // get integer part
502 shl ebx, 16 // get fractional part
504 mov ecx, Tmap.VFixed // get v 16.16
505 mov edx, ecx // copy it
506 sar edx, 16 // get integer part
507 shl ecx, 16 // get fractional part
508 imul edx, Tmap.src_offset // calc texture scanline address
509 add esi, edx // calc texture offset
510 add esi, Tmap.pixptr // calc address
517 // mov edx, Tmap.DeltaUFrac
521 mov ebx, Tmap.fx_l_right
527 mov eax, Tmap.fx_dl_dx
535 sub eax, Tmap.pScreenBits
540 inc Tmap.WidthModLength
541 mov eax,Tmap.WidthModLength
545 mov Tmap.WidthModLength, eax
549 mov al,[edi] // preread the destination cache line
551 // Make ESI = DV:DU in 7:9,7:9 format
557 mov Tmap.DeltaUFrac, esi
559 // Make ECX = V:U in 7:9,7:9 format
570 // ecx = V:U in 7.9:7.9
571 // edx = zbuffer pointer
580 cmp esi, [edx+0] // Compare the Z depth of this pixel with zbuffer
581 jle Skip0a // If pixel is covered, skip drawing
583 mov [edx+0], esi // Write z
585 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
586 shr ax, 9 // EAX = V:U in 7.9:16.0
587 rol eax, 7 // EAX = V:U in 0.0:7:7
588 and eax, 03fffh // clear upper bits
589 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
593 and eax, 0ffffh // clear upper bits
594 mov al, gr_fade_table[eax]
597 add ecx, Tmap.DeltaUFrac
598 add esi, Tmap.fx_dwdx
602 cmp esi, [edx+4] // Compare the Z depth of this pixel with zbuffer
603 jle Skip1a // If pixel is covered, skip drawing
605 mov [edx+4], esi // Write z
607 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
608 shr ax, 9 // EAX = V:U in 7.9:16.0
609 rol eax, 7 // EAX = V:U in 0.0:7:7
610 and eax, 03fffh // clear upper bits
611 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
615 and eax, 0ffffh // clear upper bits
616 mov al, gr_fade_table[eax]
619 add ecx, Tmap.DeltaUFrac
620 add esi, Tmap.fx_dwdx
627 dec Tmap.WidthModLength
635 cmp esi, [edx+0] // Compare the Z depth of this pixel with zbuffer
636 jle Skip0b // If pixel is covered, skip drawing
638 mov [edx+0], esi // Write z
640 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
641 shr ax, 9 // EAX = V:U in 7.9:16.0
642 rol eax, 7 // EAX = V:U in 0.0:7:7
643 and eax, 03fffh // clear upper bits
644 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
648 and eax, 0ffffh // clear upper bits
649 mov al, gr_fade_table[eax]
652 add ecx, Tmap.DeltaUFrac
653 add esi, Tmap.fx_dwdx
659 // busy FPU registers: // st0 st1 st2 st3 st4 st5 st6 st7
660 // xxx xxx xxx xxx xxx xxx xxx
669 fldcw Tmap.OldFPUCW // restore the FPU
682 void tmapscan_pln8_tiled_128x128()
685 switch(gr_zbuffering_mode) {
688 case GR_ZBUFF_FULL: // both
689 tmapscan_pln8_zbuffered_tiled_128x128();
691 case GR_ZBUFF_WRITE: // write only
692 tmapscan_pln8_zbuffered_tiled_128x128();
694 case GR_ZBUFF_READ: // read only
695 tmapscan_pln8_zbuffered_tiled_128x128();
713 // Put the FPU in low precision mode
714 fstcw Tmap.OldFPUCW // store copy of CW
715 mov ax,Tmap.OldFPUCW // get it in ax
717 mov Tmap.FPUCW,ax // store it
718 fldcw Tmap.FPUCW // load the FPU
721 mov ecx, Tmap.loop_count // ecx = width
722 mov edi, Tmap.dest_row_data // edi = dest pointer
724 // edi = pointer to start pixel in dest dib
727 mov eax,ecx // eax and ecx = width
728 shr ecx,5 // ecx = width / subdivision length
729 and eax,31 // eax = width mod subdivision length
730 jnz some_left_over // any leftover?
731 dec ecx // no, so special case last span
732 mov eax,32 // it's 8 pixels long
734 mov Tmap.Subdivisions,ecx // store widths
735 mov Tmap.WidthModLength,eax
737 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
738 // st0 st1 st2 st3 st4 st5 st6 st7
740 fld Tmap.l.u // U/ZL V/ZL
741 fld Tmap.l.sw // 1/ZL U/ZL V/ZL
742 fld1 // 1 1/ZL U/ZL V/ZL
743 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
744 fld st // ZL ZL 1/ZL U/ZL V/ZL
745 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
746 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
747 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
749 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
750 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
752 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
754 fadd Tmap.fl_dwdx_wide // 1/ZR U/ZL V/ZL UL VL
755 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
756 fadd Tmap.fl_dudx_wide // U/ZR 1/ZR V/ZL UL VL
757 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
758 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZR U/ZR UL VL
760 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
762 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
763 // @todo overlap this guy
764 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
765 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
766 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
767 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
768 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
770 cmp ecx,0 // check for any full spans
771 jle HandleLeftoverPixels
775 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
776 // UR VR V/ZR 1/ZR U/ZR UL VL
778 // convert left side coords
780 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
781 fmul Tmap.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
782 fistp Tmap.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
784 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
785 fmul Tmap.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
786 fistp Tmap.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
788 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
790 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
791 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
792 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
793 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
795 fmul Tmap.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
796 fistp Tmap.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
798 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
799 fmul Tmap.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
800 fistp Tmap.DeltaU ; V/ZR 1/ZR U/ZR UR VR
802 // increment terms for next span // st0 st1 st2 st3 st4 st5 st6 st7
803 // Right terms become Left terms--->// V/ZL 1/ZL U/ZL UL VL
805 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZL U/ZL UL VL
806 fxch st(1) // 1/ZL V/ZR U/ZL UL VL
807 fadd Tmap.fl_dwdx_wide // 1/ZR V/ZR U/ZL UL VL
808 fxch st(2) // U/ZL V/ZR 1/ZR UL VL
809 fadd Tmap.fl_dudx_wide // U/ZR V/ZR 1/ZR UL VL
810 fxch st(2) // 1/ZR V/ZR U/ZR UL VL
811 fxch st(1) // V/ZR 1/ZR U/ZR UL VL
814 // setup delta values
816 mov eax,Tmap.DeltaV // get v 16.16 step
817 mov ebx,eax // copy it
818 sar eax,16 // get v int step
819 shl ebx,16 // get v frac step
820 mov Tmap.DeltaVFrac,ebx // store it
821 imul eax,Tmap.src_offset // calculate texture step for v int step
823 mov ebx,Tmap.DeltaU // get u 16.16 step
824 mov ecx,ebx // copy it
825 sar ebx,16 // get u int step
826 shl ecx,16 // get u frac step
827 mov Tmap.DeltaUFrac,ecx // store it
828 add eax,ebx // calculate uint + vint step
829 mov Tmap.uv_delta[4],eax // save whole step in non-v-carry slot
830 add eax,Tmap.src_offset // calculate whole step + v carry
831 mov Tmap.uv_delta[0],eax // save in v-carry slot
833 // setup initial coordinates
834 mov esi,Tmap.UFixed // get u 16.16 fixedpoint coordinate
836 mov ebx,esi // copy it
837 sar esi,16 // get integer part
838 shl ebx,16 // get fractional part
840 mov ecx,Tmap.VFixed // get v 16.16 fixedpoint coordinate
842 mov edx,ecx // copy it
843 sar edx,16 // get integer part
844 shl ecx,16 // get fractional part
845 imul edx,Tmap.src_offset // calc texture scanline address
846 add esi,edx // calc texture offset
847 add esi,Tmap.pixptr // calc address
849 // set up affine registers
855 mov ebp, Tmap.fx_dl_dx
866 // calculate right side coords st0 st1 st2 st3 st4 st5 st6 st7
867 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
868 // This divide should happen while the pixel span is drawn.
869 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
873 // edi = dest dib bits at current pixel
874 // esi = texture pointer at current u,v
876 // ebx = u fraction 0.32
877 // ecx = v fraction 0.32
879 // ebp = v carry scratch
881 mov al,[edi] // preread the destination cache line
883 mov Tmap.InnerLooper, 32/4 // Set up loop counter
888 sub eax, Tmap.pScreenBits
892 // Make ESI = DV:DU in 7:9,7:9 format
898 mov Tmap.DeltaUFrac, esi
900 // Make ECX = V:U in 7:9,7:9 format
910 // ecx = V:U in 7.9:7.9
911 // edx = zbuffer pointer
920 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
921 shr ax, 9 // EAX = V:U in 7.9:16.0
922 rol eax, 7 // EAX = V:U in 0.0:7:7
923 and eax, 03fffh // clear upper bits
924 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
928 and eax, 0ffffh // clear upper bits
929 mov al, gr_fade_table[eax]
931 add ecx, Tmap.DeltaUFrac
935 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
936 shr ax, 9 // EAX = V:U in 7.9:16.0
937 rol eax, 7 // EAX = V:U in 0.0:7:7
938 and eax, 03fffh // clear upper bits
939 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
943 and eax, 0ffffh // clear upper bits
944 mov al, gr_fade_table[eax]
946 add ecx, Tmap.DeltaUFrac
950 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
951 shr ax, 9 // EAX = V:U in 7.9:16.0
952 rol eax, 7 // EAX = V:U in 0.0:7:7
953 and eax, 03fffh // clear upper bits
954 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
958 and eax, 0ffffh // clear upper bits
959 mov al, gr_fade_table[eax]
961 add ecx, Tmap.DeltaUFrac
965 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
966 shr ax, 9 // EAX = V:U in 7.9:16.0
967 rol eax, 7 // EAX = V:U in 0.0:7:7
968 and eax, 03fffh // clear upper bits
969 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
973 and eax, 0ffffh // clear upper bits
974 mov al, gr_fade_table[eax]
976 add ecx, Tmap.DeltaUFrac
986 // the fdiv is done, finish right // st0 st1 st2 st3 st4 st5 st6 st7
987 // ZR V/ZR 1/ZR U/ZR UL VL
989 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
990 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
991 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
992 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
994 dec Tmap.Subdivisions // decrement span count
995 jnz SpanLoop // loop back
998 HandleLeftoverPixels:
1000 mov esi,Tmap.pixptr // load texture pointer
1002 // edi = dest dib bits
1003 // esi = current texture dib bits
1004 // at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
1005 // inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
1007 cmp Tmap.WidthModLength,0 ; are there remaining pixels to draw?
1008 jz FPUReturn ; nope, pop the FPU and bail
1010 // convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
1012 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
1013 fmul Tmap.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
1014 fistp Tmap.UFixed ; inv. inv. inv. inv. inv. UL VL
1016 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
1017 fmul Tmap.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
1018 fistp Tmap.VFixed ; inv. inv. inv. inv. inv. UL VL
1020 dec Tmap.WidthModLength ; calc how many steps to take
1021 jz OnePixelSpan ; just one, don't do deltas'
1023 // calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
1026 // @todo rearrange things so we don't need these two instructions
1027 fstp Tmap.FloatTemp ; inv. inv. inv. inv. UL VL
1028 fstp Tmap.FloatTemp ; inv. inv. inv. UL VL
1030 fld Tmap.r.v ; V/Zr inv. inv. inv. UL VL
1031 fsub Tmap.deltas.v ; V/ZR inv. inv. inv. UL VL
1032 fld Tmap.r.u ; U/Zr V/ZR inv. inv. inv. UL VL
1033 fsub Tmap.deltas.u ; U/ZR V/ZR inv. inv. inv. UL VL
1034 fld Tmap.r.sw ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
1035 fsub Tmap.deltas.sw ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
1037 fdivr Tmap.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
1039 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
1040 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
1042 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
1044 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
1045 fxch st(1) ; VR UR inv. inv. inv. dU VL
1046 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
1047 fxch st(6) ; dV UR inv. inv. inv. dU VR
1049 fidiv Tmap.WidthModLength ; dv UR inv. inv. inv. dU VR
1050 fmul Tmap.FixedScale ; dv16 UR inv. inv. inv. dU VR
1051 fistp Tmap.DeltaV ; UR inv. inv. inv. dU VR
1053 fxch st(4) ; dU inv. inv. inv. UR VR
1054 fidiv Tmap.WidthModLength ; du inv. inv. inv. UR VR
1055 fmul Tmap.FixedScale ; du16 inv. inv. inv. UR VR
1056 fistp Tmap.DeltaU ; inv. inv. inv. UR VR
1058 // @todo gross! these are to line up with the other loop
1059 fld st(1) ; inv. inv. inv. inv. UR VR
1060 fld st(2) ; inv. inv. inv. inv. inv. UR VR
1063 // setup delta values
1064 mov eax, Tmap.DeltaV // get v 16.16 step
1065 mov ebx, eax // copy it
1066 sar eax, 16 // get v int step
1067 shl ebx, 16 // get v frac step
1068 mov Tmap.DeltaVFrac, ebx // store it
1069 imul eax, Tmap.src_offset // calc texture step for v int step
1071 mov ebx, Tmap.DeltaU // get u 16.16 step
1072 mov ecx, ebx // copy it
1073 sar ebx, 16 // get the u int step
1074 shl ecx, 16 // get the u frac step
1075 mov Tmap.DeltaUFrac, ecx // store it
1076 add eax, ebx // calc uint + vint step
1077 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
1078 add eax, Tmap.src_offset // calc whole step + v carry
1079 mov Tmap.uv_delta[0], eax // save in v-carry slot
1084 ; setup initial coordinates
1085 mov esi, Tmap.UFixed // get u 16.16
1086 mov ebx, esi // copy it
1087 sar esi, 16 // get integer part
1088 shl ebx, 16 // get fractional part
1090 mov ecx, Tmap.VFixed // get v 16.16
1091 mov edx, ecx // copy it
1092 sar edx, 16 // get integer part
1093 shl ecx, 16 // get fractional part
1094 imul edx, Tmap.src_offset // calc texture scanline address
1095 add esi, edx // calc texture offset
1096 add esi, Tmap.pixptr // calc address
1103 // mov edx, Tmap.DeltaUFrac
1107 mov ebx, Tmap.fx_l_right
1113 mov eax, Tmap.fx_dl_dx
1121 sub eax, Tmap.pScreenBits
1126 inc Tmap.WidthModLength
1127 mov eax,Tmap.WidthModLength
1131 mov Tmap.WidthModLength, eax
1135 mov al,[edi] // preread the destination cache line
1137 // Make ESI = DV:DU in 7:9,7:9 format
1138 mov eax, Tmap.DeltaV
1140 mov esi, Tmap.DeltaU
1143 mov Tmap.DeltaUFrac, esi
1145 // Make ECX = V:U in 7:9,7:9 format
1146 mov eax, Tmap.VFixed
1148 mov ecx, Tmap.UFixed
1156 // ecx = V:U in 7.9:7.9
1157 // edx = zbuffer pointer
1159 // edi = screen data
1166 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
1167 shr ax, 9 // EAX = V:U in 7.9:16.0
1168 rol eax, 7 // EAX = V:U in 0.0:7:7
1169 and eax, 03fffh // clear upper bits
1170 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
1174 and eax, 0ffffh // clear upper bits
1175 mov al, gr_fade_table[eax]
1177 add ecx, Tmap.DeltaUFrac
1181 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
1182 shr ax, 9 // EAX = V:U in 7.9:16.0
1183 rol eax, 7 // EAX = V:U in 0.0:7:7
1184 and eax, 03fffh // clear upper bits
1185 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
1189 and eax, 0ffffh // clear upper bits
1190 mov al, gr_fade_table[eax]
1192 add ecx, Tmap.DeltaUFrac
1199 dec Tmap.WidthModLength
1207 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
1208 shr ax, 9 // EAX = V:U in 7.9:16.0
1209 rol eax, 7 // EAX = V:U in 0.0:7:7
1210 and eax, 03fffh // clear upper bits
1211 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
1215 and eax, 0ffffh // clear upper bits
1216 mov al, gr_fade_table[eax]
1218 add ecx, Tmap.DeltaUFrac
1224 // busy FPU registers: // st0 st1 st2 st3 st4 st5 st6 st7
1225 // xxx xxx xxx xxx xxx xxx xxx
1234 fldcw Tmap.OldFPUCW // restore the FPU