2 * Copyright (C) Volition, Inc. 1999. All rights reserved.
4 * All source code herein is the property of Volition, Inc. You may not sell
5 * or otherwise commercially exploit the source or things you created based on
10 * $Logfile: /Freespace2/code/Graphics/TmapScanTiled128x128.cpp $
15 * Routines for drawing tiled 128x128 textues
18 * Revision 1.2 2002/06/09 04:41:18 relnev
19 * added copyright header
21 * Revision 1.1.1.1 2002/05/03 03:28:09 root
25 * 4 11/30/98 5:31p Dave
26 * Fixed up Fred support for software mode.
28 * 3 11/30/98 1:07p Dave
29 * 16 bit conversion, first run.
31 * 2 10/07/98 10:53a Dave
34 * 1 10/07/98 10:49a Dave
36 * 9 4/23/98 9:55a John
37 * Fixed some bugs in the tiled tmapper causing bright dots to appear all
40 * 8 3/10/98 4:19p John
41 * Cleaned up graphics lib. Took out most unused gr functions. Made D3D
42 * & Glide have popups and print screen. Took out all >8bpp software
43 * support. Made Fred zbuffer. Made zbuffer allocate dynamically to
44 * support Fred. Made zbuffering key off of functions rather than one
47 * 7 1/27/98 5:13p John
48 * Moved all float to int conversions out of inner loops and into outer.
49 * Made outer loop use FISTP instead of ftol, saved about 10%.
51 * 6 1/23/98 5:08p John
52 * Took L out of vertex structure used B (blue) instead. Took all small
53 * fireballs out of fireball types and used particles instead. Fixed some
54 * debris explosion things. Restructured fireball code. Restructured
55 * some lighting code. Made dynamic lighting on by default. Made groups
56 * of lasers only cast one light. Made fireballs not cast light.
58 * 5 12/04/97 10:38a John
59 * Fixed tiled texture mappers that were swapping uvs.
61 * 4 10/14/97 9:19a John
62 * removed fdiv warnings.
64 * 3 6/02/97 11:45a John
65 * fixed bugs with 64x64 and 128x128 tmappers.
67 * 2 5/12/97 12:27p John
68 * Restructured Graphics Library to add support for multiple renderers.
70 * 1 4/24/97 4:42p John
71 * Initial version of the tiled texture mappers for 64 & 128 wide
80 #include "grinternal.h"
82 #include "tmapscanline.h"
87 // Needed to keep warning 4725 to stay away. See PsTypes.h for details why.
88 void disable_warning_4725_stub_tst128()
93 void tmapscan_pln8_zbuffered_tiled_128x128()
108 // Put the FPU in low precision mode
109 fstcw Tmap.OldFPUCW // store copy of CW
110 mov ax,Tmap.OldFPUCW // get it in ax
112 mov Tmap.FPUCW,ax // store it
113 fldcw Tmap.FPUCW // load the FPU
116 mov ecx, Tmap.loop_count // ecx = width
117 mov edi, Tmap.dest_row_data // edi = dest pointer
119 // edi = pointer to start pixel in dest dib
122 mov eax,ecx // eax and ecx = width
123 shr ecx,5 // ecx = width / subdivision length
124 and eax,31 // eax = width mod subdivision length
125 jnz some_left_over // any leftover?
126 dec ecx // no, so special case last span
127 mov eax,32 // it's 8 pixels long
129 mov Tmap.Subdivisions,ecx // store widths
130 mov Tmap.WidthModLength,eax
132 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
133 // st0 st1 st2 st3 st4 st5 st6 st7
135 fld Tmap.l.u // U/ZL V/ZL
136 fld Tmap.l.sw // 1/ZL U/ZL V/ZL
137 fld1 // 1 1/ZL U/ZL V/ZL
138 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
139 fld st // ZL ZL 1/ZL U/ZL V/ZL
140 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
141 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
142 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
144 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
145 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
147 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
149 fadd Tmap.fl_dwdx_wide // 1/ZR U/ZL V/ZL UL VL
150 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
151 fadd Tmap.fl_dudx_wide // U/ZR 1/ZR V/ZL UL VL
152 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
153 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZR U/ZR UL VL
155 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
157 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
158 // @todo overlap this guy
159 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
160 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
161 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
162 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
163 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
165 cmp ecx,0 // check for any full spans
166 jle HandleLeftoverPixels
170 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
171 // UR VR V/ZR 1/ZR U/ZR UL VL
173 // convert left side coords
175 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
176 fmul Tmap.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
177 fistp Tmap.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
179 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
180 fmul Tmap.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
181 fistp Tmap.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
183 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
185 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
186 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
187 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
188 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
190 fmul Tmap.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
191 fistp Tmap.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
193 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
194 fmul Tmap.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
195 fistp Tmap.DeltaU ; V/ZR 1/ZR U/ZR UR VR
197 // increment terms for next span // st0 st1 st2 st3 st4 st5 st6 st7
198 // Right terms become Left terms--->// V/ZL 1/ZL U/ZL UL VL
200 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZL U/ZL UL VL
201 fxch st(1) // 1/ZL V/ZR U/ZL UL VL
202 fadd Tmap.fl_dwdx_wide // 1/ZR V/ZR U/ZL UL VL
203 fxch st(2) // U/ZL V/ZR 1/ZR UL VL
204 fadd Tmap.fl_dudx_wide // U/ZR V/ZR 1/ZR UL VL
205 fxch st(2) // 1/ZR V/ZR U/ZR UL VL
206 fxch st(1) // V/ZR 1/ZR U/ZR UL VL
209 // setup delta values
211 mov eax,Tmap.DeltaV // get v 16.16 step
212 mov ebx,eax // copy it
213 sar eax,16 // get v int step
214 shl ebx,16 // get v frac step
215 mov Tmap.DeltaVFrac,ebx // store it
216 imul eax,Tmap.src_offset // calculate texture step for v int step
218 mov ebx,Tmap.DeltaU // get u 16.16 step
219 mov ecx,ebx // copy it
220 sar ebx,16 // get u int step
221 shl ecx,16 // get u frac step
222 mov Tmap.DeltaUFrac,ecx // store it
223 add eax,ebx // calculate uint + vint step
224 mov Tmap.uv_delta[4],eax // save whole step in non-v-carry slot
225 add eax,Tmap.src_offset // calculate whole step + v carry
226 mov Tmap.uv_delta[0],eax // save in v-carry slot
228 // setup initial coordinates
229 mov esi,Tmap.UFixed // get u 16.16 fixedpoint coordinate
231 mov ebx,esi // copy it
232 sar esi,16 // get integer part
233 shl ebx,16 // get fractional part
235 mov ecx,Tmap.VFixed // get v 16.16 fixedpoint coordinate
237 mov edx,ecx // copy it
238 sar edx,16 // get integer part
239 shl ecx,16 // get fractional part
240 imul edx,Tmap.src_offset // calc texture scanline address
241 add esi,edx // calc texture offset
242 add esi,Tmap.pixptr // calc address
244 // set up affine registers
250 mov ebp, Tmap.fx_dl_dx
261 // calculate right side coords st0 st1 st2 st3 st4 st5 st6 st7
262 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
263 // This divide should happen while the pixel span is drawn.
264 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
268 // edi = dest dib bits at current pixel
269 // esi = texture pointer at current u,v
271 // ebx = u fraction 0.32
272 // ecx = v fraction 0.32
274 // ebp = v carry scratch
276 mov al,[edi] // preread the destination cache line
278 mov Tmap.InnerLooper, 32/4 // Set up loop counter
283 sub eax, Tmap.pScreenBits
287 // Make ESI = DV:DU in 7:9,7:9 format
293 mov Tmap.DeltaUFrac, esi
295 // Make ECX = V:U in 7:9,7:9 format
306 // ecx = V:U in 7.9:7.9
307 // edx = zbuffer pointer
316 cmp esi, [edx+0] // Compare the Z depth of this pixel with zbuffer
317 jle Skip0 // If pixel is covered, skip drawing
319 mov [edx+0], esi // Write z
321 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
322 shr ax, 9 // EAX = V:U in 7.9:16.0
323 rol eax, 7 // EAX = V:U in 0.0:7:7
324 and eax, 03fffh // clear upper bits
325 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
329 and eax, 0ffffh // clear upper bits
330 mov al, gr_fade_table[eax]
333 add ecx, Tmap.DeltaUFrac
334 add esi, Tmap.fx_dwdx
338 cmp esi, [edx+4] // Compare the Z depth of this pixel with zbuffer
339 jle Skip1 // If pixel is covered, skip drawing
341 mov [edx+4], esi // Write z
343 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
344 shr ax, 9 // EAX = V:U in 7.9:16.0
345 rol eax, 7 // EAX = V:U in 0.0:7:7
346 and eax, 03fffh // clear upper bits
347 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
351 and eax, 0ffffh // clear upper bits
352 mov al, gr_fade_table[eax]
355 add ecx, Tmap.DeltaUFrac
356 add esi, Tmap.fx_dwdx
360 cmp esi, [edx+8] // Compare the Z depth of this pixel with zbuffer
361 jle Skip2 // If pixel is covered, skip drawing
363 mov [edx+8], esi // Write z
365 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
366 shr ax, 9 // EAX = V:U in 7.9:16.0
367 rol eax, 7 // EAX = V:U in 0.0:7:7
368 and eax, 03fffh // clear upper bits
369 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
373 and eax, 0ffffh // clear upper bits
374 mov al, gr_fade_table[eax]
377 add ecx, Tmap.DeltaUFrac
378 add esi, Tmap.fx_dwdx
382 cmp esi, [edx+12] // Compare the Z depth of this pixel with zbuffer
383 jle Skip3 // If pixel is covered, skip drawing
385 mov [edx+12], esi // Write z
387 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
388 shr ax, 9 // EAX = V:U in 7.9:16.0
389 rol eax, 7 // EAX = V:U in 0.0:7:7
390 and eax, 03fffh // clear upper bits
391 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
395 and eax, 0ffffh // clear upper bits
396 mov al, gr_fade_table[eax]
399 add ecx, Tmap.DeltaUFrac
400 add esi, Tmap.fx_dwdx
411 // the fdiv is done, finish right // st0 st1 st2 st3 st4 st5 st6 st7
412 // ZR V/ZR 1/ZR U/ZR UL VL
414 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
415 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
416 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
417 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
419 dec Tmap.Subdivisions // decrement span count
420 jnz SpanLoop // loop back
423 HandleLeftoverPixels:
425 mov esi,Tmap.pixptr // load texture pointer
427 // edi = dest dib bits
428 // esi = current texture dib bits
429 // at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
430 // inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
432 cmp Tmap.WidthModLength,0 ; are there remaining pixels to draw?
433 jz FPUReturn ; nope, pop the FPU and bail
435 // convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
437 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
438 fmul Tmap.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
439 fistp Tmap.UFixed ; inv. inv. inv. inv. inv. UL VL
441 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
442 fmul Tmap.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
443 fistp Tmap.VFixed ; inv. inv. inv. inv. inv. UL VL
445 dec Tmap.WidthModLength ; calc how many steps to take
446 jz OnePixelSpan ; just one, don't do deltas'
448 // calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
451 // @todo rearrange things so we don't need these two instructions
452 fstp Tmap.FloatTemp ; inv. inv. inv. inv. UL VL
453 fstp Tmap.FloatTemp ; inv. inv. inv. UL VL
455 fld Tmap.r.v ; V/Zr inv. inv. inv. UL VL
456 fsub Tmap.deltas.v ; V/ZR inv. inv. inv. UL VL
457 fld Tmap.r.u ; U/Zr V/ZR inv. inv. inv. UL VL
458 fsub Tmap.deltas.u ; U/ZR V/ZR inv. inv. inv. UL VL
459 fld Tmap.r.sw ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
460 fsub Tmap.deltas.sw ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
462 fdivr Tmap.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
464 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
465 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
467 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
469 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
470 fxch st(1) ; VR UR inv. inv. inv. dU VL
471 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
472 fxch st(6) ; dV UR inv. inv. inv. dU VR
474 fidiv Tmap.WidthModLength ; dv UR inv. inv. inv. dU VR
475 fmul Tmap.FixedScale ; dv16 UR inv. inv. inv. dU VR
476 fistp Tmap.DeltaV ; UR inv. inv. inv. dU VR
478 fxch st(4) ; dU inv. inv. inv. UR VR
479 fidiv Tmap.WidthModLength ; du inv. inv. inv. UR VR
480 fmul Tmap.FixedScale ; du16 inv. inv. inv. UR VR
481 fistp Tmap.DeltaU ; inv. inv. inv. UR VR
483 // @todo gross! these are to line up with the other loop
484 fld st(1) ; inv. inv. inv. inv. UR VR
485 fld st(2) ; inv. inv. inv. inv. inv. UR VR
488 // setup delta values
489 mov eax, Tmap.DeltaV // get v 16.16 step
490 mov ebx, eax // copy it
491 sar eax, 16 // get v int step
492 shl ebx, 16 // get v frac step
493 mov Tmap.DeltaVFrac, ebx // store it
494 imul eax, Tmap.src_offset // calc texture step for v int step
496 mov ebx, Tmap.DeltaU // get u 16.16 step
497 mov ecx, ebx // copy it
498 sar ebx, 16 // get the u int step
499 shl ecx, 16 // get the u frac step
500 mov Tmap.DeltaUFrac, ecx // store it
501 add eax, ebx // calc uint + vint step
502 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
503 add eax, Tmap.src_offset // calc whole step + v carry
504 mov Tmap.uv_delta[0], eax // save in v-carry slot
509 ; setup initial coordinates
510 mov esi, Tmap.UFixed // get u 16.16
511 mov ebx, esi // copy it
512 sar esi, 16 // get integer part
513 shl ebx, 16 // get fractional part
515 mov ecx, Tmap.VFixed // get v 16.16
516 mov edx, ecx // copy it
517 sar edx, 16 // get integer part
518 shl ecx, 16 // get fractional part
519 imul edx, Tmap.src_offset // calc texture scanline address
520 add esi, edx // calc texture offset
521 add esi, Tmap.pixptr // calc address
528 // mov edx, Tmap.DeltaUFrac
532 mov ebx, Tmap.fx_l_right
538 mov eax, Tmap.fx_dl_dx
546 sub eax, Tmap.pScreenBits
551 inc Tmap.WidthModLength
552 mov eax,Tmap.WidthModLength
556 mov Tmap.WidthModLength, eax
560 mov al,[edi] // preread the destination cache line
562 // Make ESI = DV:DU in 7:9,7:9 format
568 mov Tmap.DeltaUFrac, esi
570 // Make ECX = V:U in 7:9,7:9 format
581 // ecx = V:U in 7.9:7.9
582 // edx = zbuffer pointer
591 cmp esi, [edx+0] // Compare the Z depth of this pixel with zbuffer
592 jle Skip0a // If pixel is covered, skip drawing
594 mov [edx+0], esi // Write z
596 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
597 shr ax, 9 // EAX = V:U in 7.9:16.0
598 rol eax, 7 // EAX = V:U in 0.0:7:7
599 and eax, 03fffh // clear upper bits
600 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
604 and eax, 0ffffh // clear upper bits
605 mov al, gr_fade_table[eax]
608 add ecx, Tmap.DeltaUFrac
609 add esi, Tmap.fx_dwdx
613 cmp esi, [edx+4] // Compare the Z depth of this pixel with zbuffer
614 jle Skip1a // If pixel is covered, skip drawing
616 mov [edx+4], esi // Write z
618 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
619 shr ax, 9 // EAX = V:U in 7.9:16.0
620 rol eax, 7 // EAX = V:U in 0.0:7:7
621 and eax, 03fffh // clear upper bits
622 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
626 and eax, 0ffffh // clear upper bits
627 mov al, gr_fade_table[eax]
630 add ecx, Tmap.DeltaUFrac
631 add esi, Tmap.fx_dwdx
638 dec Tmap.WidthModLength
646 cmp esi, [edx+0] // Compare the Z depth of this pixel with zbuffer
647 jle Skip0b // If pixel is covered, skip drawing
649 mov [edx+0], esi // Write z
651 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
652 shr ax, 9 // EAX = V:U in 7.9:16.0
653 rol eax, 7 // EAX = V:U in 0.0:7:7
654 and eax, 03fffh // clear upper bits
655 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
659 and eax, 0ffffh // clear upper bits
660 mov al, gr_fade_table[eax]
663 add ecx, Tmap.DeltaUFrac
664 add esi, Tmap.fx_dwdx
670 // busy FPU registers: // st0 st1 st2 st3 st4 st5 st6 st7
671 // xxx xxx xxx xxx xxx xxx xxx
680 fldcw Tmap.OldFPUCW // restore the FPU
693 void tmapscan_pln8_tiled_128x128()
696 switch(gr_zbuffering_mode) {
699 case GR_ZBUFF_FULL: // both
700 tmapscan_pln8_zbuffered_tiled_128x128();
702 case GR_ZBUFF_WRITE: // write only
703 tmapscan_pln8_zbuffered_tiled_128x128();
705 case GR_ZBUFF_READ: // read only
706 tmapscan_pln8_zbuffered_tiled_128x128();
724 // Put the FPU in low precision mode
725 fstcw Tmap.OldFPUCW // store copy of CW
726 mov ax,Tmap.OldFPUCW // get it in ax
728 mov Tmap.FPUCW,ax // store it
729 fldcw Tmap.FPUCW // load the FPU
732 mov ecx, Tmap.loop_count // ecx = width
733 mov edi, Tmap.dest_row_data // edi = dest pointer
735 // edi = pointer to start pixel in dest dib
738 mov eax,ecx // eax and ecx = width
739 shr ecx,5 // ecx = width / subdivision length
740 and eax,31 // eax = width mod subdivision length
741 jnz some_left_over // any leftover?
742 dec ecx // no, so special case last span
743 mov eax,32 // it's 8 pixels long
745 mov Tmap.Subdivisions,ecx // store widths
746 mov Tmap.WidthModLength,eax
748 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
749 // st0 st1 st2 st3 st4 st5 st6 st7
751 fld Tmap.l.u // U/ZL V/ZL
752 fld Tmap.l.sw // 1/ZL U/ZL V/ZL
753 fld1 // 1 1/ZL U/ZL V/ZL
754 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
755 fld st // ZL ZL 1/ZL U/ZL V/ZL
756 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
757 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
758 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
760 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
761 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
763 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
765 fadd Tmap.fl_dwdx_wide // 1/ZR U/ZL V/ZL UL VL
766 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
767 fadd Tmap.fl_dudx_wide // U/ZR 1/ZR V/ZL UL VL
768 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
769 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZR U/ZR UL VL
771 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
773 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
774 // @todo overlap this guy
775 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
776 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
777 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
778 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
779 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
781 cmp ecx,0 // check for any full spans
782 jle HandleLeftoverPixels
786 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
787 // UR VR V/ZR 1/ZR U/ZR UL VL
789 // convert left side coords
791 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
792 fmul Tmap.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
793 fistp Tmap.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
795 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
796 fmul Tmap.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
797 fistp Tmap.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
799 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
801 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
802 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
803 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
804 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
806 fmul Tmap.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
807 fistp Tmap.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
809 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
810 fmul Tmap.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
811 fistp Tmap.DeltaU ; V/ZR 1/ZR U/ZR UR VR
813 // increment terms for next span // st0 st1 st2 st3 st4 st5 st6 st7
814 // Right terms become Left terms--->// V/ZL 1/ZL U/ZL UL VL
816 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZL U/ZL UL VL
817 fxch st(1) // 1/ZL V/ZR U/ZL UL VL
818 fadd Tmap.fl_dwdx_wide // 1/ZR V/ZR U/ZL UL VL
819 fxch st(2) // U/ZL V/ZR 1/ZR UL VL
820 fadd Tmap.fl_dudx_wide // U/ZR V/ZR 1/ZR UL VL
821 fxch st(2) // 1/ZR V/ZR U/ZR UL VL
822 fxch st(1) // V/ZR 1/ZR U/ZR UL VL
825 // setup delta values
827 mov eax,Tmap.DeltaV // get v 16.16 step
828 mov ebx,eax // copy it
829 sar eax,16 // get v int step
830 shl ebx,16 // get v frac step
831 mov Tmap.DeltaVFrac,ebx // store it
832 imul eax,Tmap.src_offset // calculate texture step for v int step
834 mov ebx,Tmap.DeltaU // get u 16.16 step
835 mov ecx,ebx // copy it
836 sar ebx,16 // get u int step
837 shl ecx,16 // get u frac step
838 mov Tmap.DeltaUFrac,ecx // store it
839 add eax,ebx // calculate uint + vint step
840 mov Tmap.uv_delta[4],eax // save whole step in non-v-carry slot
841 add eax,Tmap.src_offset // calculate whole step + v carry
842 mov Tmap.uv_delta[0],eax // save in v-carry slot
844 // setup initial coordinates
845 mov esi,Tmap.UFixed // get u 16.16 fixedpoint coordinate
847 mov ebx,esi // copy it
848 sar esi,16 // get integer part
849 shl ebx,16 // get fractional part
851 mov ecx,Tmap.VFixed // get v 16.16 fixedpoint coordinate
853 mov edx,ecx // copy it
854 sar edx,16 // get integer part
855 shl ecx,16 // get fractional part
856 imul edx,Tmap.src_offset // calc texture scanline address
857 add esi,edx // calc texture offset
858 add esi,Tmap.pixptr // calc address
860 // set up affine registers
866 mov ebp, Tmap.fx_dl_dx
877 // calculate right side coords st0 st1 st2 st3 st4 st5 st6 st7
878 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
879 // This divide should happen while the pixel span is drawn.
880 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
884 // edi = dest dib bits at current pixel
885 // esi = texture pointer at current u,v
887 // ebx = u fraction 0.32
888 // ecx = v fraction 0.32
890 // ebp = v carry scratch
892 mov al,[edi] // preread the destination cache line
894 mov Tmap.InnerLooper, 32/4 // Set up loop counter
899 sub eax, Tmap.pScreenBits
903 // Make ESI = DV:DU in 7:9,7:9 format
909 mov Tmap.DeltaUFrac, esi
911 // Make ECX = V:U in 7:9,7:9 format
921 // ecx = V:U in 7.9:7.9
922 // edx = zbuffer pointer
931 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
932 shr ax, 9 // EAX = V:U in 7.9:16.0
933 rol eax, 7 // EAX = V:U in 0.0:7:7
934 and eax, 03fffh // clear upper bits
935 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
939 and eax, 0ffffh // clear upper bits
940 mov al, gr_fade_table[eax]
942 add ecx, Tmap.DeltaUFrac
946 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
947 shr ax, 9 // EAX = V:U in 7.9:16.0
948 rol eax, 7 // EAX = V:U in 0.0:7:7
949 and eax, 03fffh // clear upper bits
950 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
954 and eax, 0ffffh // clear upper bits
955 mov al, gr_fade_table[eax]
957 add ecx, Tmap.DeltaUFrac
961 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
962 shr ax, 9 // EAX = V:U in 7.9:16.0
963 rol eax, 7 // EAX = V:U in 0.0:7:7
964 and eax, 03fffh // clear upper bits
965 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
969 and eax, 0ffffh // clear upper bits
970 mov al, gr_fade_table[eax]
972 add ecx, Tmap.DeltaUFrac
976 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
977 shr ax, 9 // EAX = V:U in 7.9:16.0
978 rol eax, 7 // EAX = V:U in 0.0:7:7
979 and eax, 03fffh // clear upper bits
980 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
984 and eax, 0ffffh // clear upper bits
985 mov al, gr_fade_table[eax]
987 add ecx, Tmap.DeltaUFrac
997 // the fdiv is done, finish right // st0 st1 st2 st3 st4 st5 st6 st7
998 // ZR V/ZR 1/ZR U/ZR UL VL
1000 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
1001 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
1002 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
1003 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
1005 dec Tmap.Subdivisions // decrement span count
1006 jnz SpanLoop // loop back
1009 HandleLeftoverPixels:
1011 mov esi,Tmap.pixptr // load texture pointer
1013 // edi = dest dib bits
1014 // esi = current texture dib bits
1015 // at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
1016 // inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
1018 cmp Tmap.WidthModLength,0 ; are there remaining pixels to draw?
1019 jz FPUReturn ; nope, pop the FPU and bail
1021 // convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
1023 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
1024 fmul Tmap.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
1025 fistp Tmap.UFixed ; inv. inv. inv. inv. inv. UL VL
1027 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
1028 fmul Tmap.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
1029 fistp Tmap.VFixed ; inv. inv. inv. inv. inv. UL VL
1031 dec Tmap.WidthModLength ; calc how many steps to take
1032 jz OnePixelSpan ; just one, don't do deltas'
1034 // calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
1037 // @todo rearrange things so we don't need these two instructions
1038 fstp Tmap.FloatTemp ; inv. inv. inv. inv. UL VL
1039 fstp Tmap.FloatTemp ; inv. inv. inv. UL VL
1041 fld Tmap.r.v ; V/Zr inv. inv. inv. UL VL
1042 fsub Tmap.deltas.v ; V/ZR inv. inv. inv. UL VL
1043 fld Tmap.r.u ; U/Zr V/ZR inv. inv. inv. UL VL
1044 fsub Tmap.deltas.u ; U/ZR V/ZR inv. inv. inv. UL VL
1045 fld Tmap.r.sw ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
1046 fsub Tmap.deltas.sw ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
1048 fdivr Tmap.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
1050 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
1051 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
1053 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
1055 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
1056 fxch st(1) ; VR UR inv. inv. inv. dU VL
1057 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
1058 fxch st(6) ; dV UR inv. inv. inv. dU VR
1060 fidiv Tmap.WidthModLength ; dv UR inv. inv. inv. dU VR
1061 fmul Tmap.FixedScale ; dv16 UR inv. inv. inv. dU VR
1062 fistp Tmap.DeltaV ; UR inv. inv. inv. dU VR
1064 fxch st(4) ; dU inv. inv. inv. UR VR
1065 fidiv Tmap.WidthModLength ; du inv. inv. inv. UR VR
1066 fmul Tmap.FixedScale ; du16 inv. inv. inv. UR VR
1067 fistp Tmap.DeltaU ; inv. inv. inv. UR VR
1069 // @todo gross! these are to line up with the other loop
1070 fld st(1) ; inv. inv. inv. inv. UR VR
1071 fld st(2) ; inv. inv. inv. inv. inv. UR VR
1074 // setup delta values
1075 mov eax, Tmap.DeltaV // get v 16.16 step
1076 mov ebx, eax // copy it
1077 sar eax, 16 // get v int step
1078 shl ebx, 16 // get v frac step
1079 mov Tmap.DeltaVFrac, ebx // store it
1080 imul eax, Tmap.src_offset // calc texture step for v int step
1082 mov ebx, Tmap.DeltaU // get u 16.16 step
1083 mov ecx, ebx // copy it
1084 sar ebx, 16 // get the u int step
1085 shl ecx, 16 // get the u frac step
1086 mov Tmap.DeltaUFrac, ecx // store it
1087 add eax, ebx // calc uint + vint step
1088 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
1089 add eax, Tmap.src_offset // calc whole step + v carry
1090 mov Tmap.uv_delta[0], eax // save in v-carry slot
1095 ; setup initial coordinates
1096 mov esi, Tmap.UFixed // get u 16.16
1097 mov ebx, esi // copy it
1098 sar esi, 16 // get integer part
1099 shl ebx, 16 // get fractional part
1101 mov ecx, Tmap.VFixed // get v 16.16
1102 mov edx, ecx // copy it
1103 sar edx, 16 // get integer part
1104 shl ecx, 16 // get fractional part
1105 imul edx, Tmap.src_offset // calc texture scanline address
1106 add esi, edx // calc texture offset
1107 add esi, Tmap.pixptr // calc address
1114 // mov edx, Tmap.DeltaUFrac
1118 mov ebx, Tmap.fx_l_right
1124 mov eax, Tmap.fx_dl_dx
1132 sub eax, Tmap.pScreenBits
1137 inc Tmap.WidthModLength
1138 mov eax,Tmap.WidthModLength
1142 mov Tmap.WidthModLength, eax
1146 mov al,[edi] // preread the destination cache line
1148 // Make ESI = DV:DU in 7:9,7:9 format
1149 mov eax, Tmap.DeltaV
1151 mov esi, Tmap.DeltaU
1154 mov Tmap.DeltaUFrac, esi
1156 // Make ECX = V:U in 7:9,7:9 format
1157 mov eax, Tmap.VFixed
1159 mov ecx, Tmap.UFixed
1167 // ecx = V:U in 7.9:7.9
1168 // edx = zbuffer pointer
1170 // edi = screen data
1177 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
1178 shr ax, 9 // EAX = V:U in 7.9:16.0
1179 rol eax, 7 // EAX = V:U in 0.0:7:7
1180 and eax, 03fffh // clear upper bits
1181 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
1185 and eax, 0ffffh // clear upper bits
1186 mov al, gr_fade_table[eax]
1188 add ecx, Tmap.DeltaUFrac
1192 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
1193 shr ax, 9 // EAX = V:U in 7.9:16.0
1194 rol eax, 7 // EAX = V:U in 0.0:7:7
1195 and eax, 03fffh // clear upper bits
1196 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
1200 and eax, 0ffffh // clear upper bits
1201 mov al, gr_fade_table[eax]
1203 add ecx, Tmap.DeltaUFrac
1210 dec Tmap.WidthModLength
1218 mov eax, ecx // EAX = V.VF:U.UF in 7.9:7.9
1219 shr ax, 9 // EAX = V:U in 7.9:16.0
1220 rol eax, 7 // EAX = V:U in 0.0:7:7
1221 and eax, 03fffh // clear upper bits
1222 add eax, Tmap.pixptr // EAX = (V*128)+U + Pixptr
1226 and eax, 0ffffh // clear upper bits
1227 mov al, gr_fade_table[eax]
1229 add ecx, Tmap.DeltaUFrac
1235 // busy FPU registers: // st0 st1 st2 st3 st4 st5 st6 st7
1236 // xxx xxx xxx xxx xxx xxx xxx
1245 fldcw Tmap.OldFPUCW // restore the FPU