2 * Copyright (C) Volition, Inc. 1999. All rights reserved.
4 * All source code herein is the property of Volition, Inc. You may not sell
5 * or otherwise commercially exploit the source or things you created based on
10 * $Logfile: /Freespace2/code/Graphics/TmapScanTiled32x32.cpp $
15 * Routines for drawing tiled 32x32 textues
18 * Revision 1.3 2002/06/09 04:41:18 relnev
19 * added copyright header
21 * Revision 1.2 2002/05/07 03:16:45 theoddone33
22 * The Great Newline Fix
24 * Revision 1.1.1.1 2002/05/03 03:28:09 root
28 * 4 11/30/98 5:31p Dave
29 * Fixed up Fred support for software mode.
31 * 3 11/30/98 1:07p Dave
32 * 16 bit conversion, first run.
34 * 2 10/07/98 10:53a Dave
37 * 1 10/07/98 10:49a Dave
39 * 6 4/23/98 9:55a John
40 * Fixed some bugs in the tiled tmapper causing bright dots to appear all
43 * 5 3/10/98 4:19p John
44 * Cleaned up graphics lib. Took out most unused gr functions. Made D3D
45 * & Glide have popups and print screen. Took out all >8bpp software
46 * support. Made Fred zbuffer. Made zbuffer allocate dynamically to
47 * support Fred. Made zbuffering key off of functions rather than one
50 * 4 1/23/98 5:08p John
51 * Took L out of vertex structure used B (blue) instead. Took all small
52 * fireballs out of fireball types and used particles instead. Fixed some
53 * debris explosion things. Restructured fireball code. Restructured
54 * some lighting code. Made dynamic lighting on by default. Made groups
55 * of lasers only cast one light. Made fireballs not cast light.
57 * 3 12/04/97 10:38a John
58 * Fixed tiled texture mappers that were swapping uvs.
60 * 2 10/14/97 9:19a John
61 * removed fdiv warnings.
63 * 1 6/18/97 4:02p John
64 * added new code for 16x16 and 32x32 tiled tmaps.
72 #include "grinternal.h"
74 #include "tmapscanline.h"
79 // Needed to keep warning 4725 to stay away. See PsTypes.h for details why.
80 void disable_warning_4725_stub_tst32()
84 void tmapscan_pln8_zbuffered_tiled_32x32()
86 Tmap.fx_l = fl2f(Tmap.l.b*32.0);
87 Tmap.fx_l_right = fl2f(Tmap.r.b*32.0);
88 Tmap.fx_dl_dx = fl2f(Tmap.deltas.b*32.0);
90 if ( Tmap.fx_dl_dx < 0 ) {
91 Tmap.fx_dl_dx = -Tmap.fx_dl_dx;
92 Tmap.fx_l = (67*F1_0)-Tmap.fx_l;
93 Tmap.fx_l_right = (67*F1_0)-Tmap.fx_l_right;
94 // SDL_assert( Tmap.fx_l > 31*F1_0 );
95 // SDL_assert( Tmap.fx_l < 66*F1_0 );
96 // SDL_assert( Tmap.fx_dl_dx >= 0 );
97 // SDL_assert( Tmap.fx_dl_dx < 31*F1_0 );
100 Tmap.fl_dudx_wide = Tmap.deltas.u*32.0f;
101 Tmap.fl_dvdx_wide = Tmap.deltas.v*32.0f;
102 Tmap.fl_dwdx_wide = Tmap.deltas.sw*32.0f;
104 Tmap.fx_w = fl2i(Tmap.l.sw * GR_Z_RANGE)+gr_zoffset;
105 Tmap.fx_dwdx = fl2i(Tmap.deltas.sw * GR_Z_RANGE);
107 // SDL_assert(Tmap.fx_w < 65536 );
108 // SDL_assert(Tmap.fx_w >= 0 );
109 // SDL_assert(Tmap.fx_w+Tmap.fx_dwdx*Tmap.loop_count < 65536 );
110 // SDL_assert(Tmap.fx_w+Tmap.fx_dwdx*Tmap.loop_count >= 0 );
125 // Put the FPU in low precision mode
126 fstcw Tmap.OldFPUCW // store copy of CW
127 mov ax,Tmap.OldFPUCW // get it in ax
129 mov Tmap.FPUCW,ax // store it
130 fldcw Tmap.FPUCW // load the FPU
133 mov ecx, Tmap.loop_count // ecx = width
134 mov edi, Tmap.dest_row_data // edi = dest pointer
136 // edi = pointer to start pixel in dest dib
139 mov eax,ecx // eax and ecx = width
140 shr ecx,5 // ecx = width / subdivision length
141 and eax,31 // eax = width mod subdivision length
142 jnz some_left_over // any leftover?
143 dec ecx // no, so special case last span
144 mov eax,32 // it's 8 pixels long
146 mov Tmap.Subdivisions,ecx // store widths
147 mov Tmap.WidthModLength,eax
149 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
150 // st0 st1 st2 st3 st4 st5 st6 st7
152 fld Tmap.l.u // U/ZL V/ZL
153 fld Tmap.l.sw // 1/ZL U/ZL V/ZL
154 fld1 // 1 1/ZL U/ZL V/ZL
155 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
156 fld st // ZL ZL 1/ZL U/ZL V/ZL
157 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
158 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
159 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
161 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
162 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
164 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
166 fadd Tmap.fl_dwdx_wide // 1/ZR U/ZL V/ZL UL VL
167 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
168 fadd Tmap.fl_dudx_wide // U/ZR 1/ZR V/ZL UL VL
169 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
170 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZR U/ZR UL VL
172 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
174 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
175 // @todo overlap this guy
176 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
177 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
178 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
179 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
180 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
182 cmp ecx,0 // check for any full spans
183 jle HandleLeftoverPixels
187 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
188 // UR VR V/ZR 1/ZR U/ZR UL VL
190 // convert left side coords
192 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
193 fmul Tmap.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
194 fistp Tmap.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
196 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
197 fmul Tmap.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
198 fistp Tmap.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
200 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
202 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
203 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
204 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
205 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
207 fmul Tmap.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
208 fistp Tmap.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
210 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
211 fmul Tmap.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
212 fistp Tmap.DeltaU ; V/ZR 1/ZR U/ZR UR VR
214 // increment terms for next span // st0 st1 st2 st3 st4 st5 st6 st7
215 // Right terms become Left terms--->// V/ZL 1/ZL U/ZL UL VL
217 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZL U/ZL UL VL
218 fxch st(1) // 1/ZL V/ZR U/ZL UL VL
219 fadd Tmap.fl_dwdx_wide // 1/ZR V/ZR U/ZL UL VL
220 fxch st(2) // U/ZL V/ZR 1/ZR UL VL
221 fadd Tmap.fl_dudx_wide // U/ZR V/ZR 1/ZR UL VL
222 fxch st(2) // 1/ZR V/ZR U/ZR UL VL
223 fxch st(1) // V/ZR 1/ZR U/ZR UL VL
226 // setup delta values
228 mov eax,Tmap.DeltaV // get v 16.16 step
229 mov ebx,eax // copy it
230 sar eax,16 // get v int step
231 shl ebx,16 // get v frac step
232 mov Tmap.DeltaVFrac,ebx // store it
233 imul eax,Tmap.src_offset // calculate texture step for v int step
235 mov ebx,Tmap.DeltaU // get u 16.16 step
236 mov ecx,ebx // copy it
237 sar ebx,16 // get u int step
238 shl ecx,16 // get u frac step
239 mov Tmap.DeltaUFrac,ecx // store it
240 add eax,ebx // calculate uint + vint step
241 mov Tmap.uv_delta[4],eax // save whole step in non-v-carry slot
242 add eax,Tmap.src_offset // calculate whole step + v carry
243 mov Tmap.uv_delta[0],eax // save in v-carry slot
245 // setup initial coordinates
246 mov esi,Tmap.UFixed // get u 16.16 fixedpoint coordinate
248 mov ebx,esi // copy it
249 sar esi,16 // get integer part
250 shl ebx,16 // get fractional part
252 mov ecx,Tmap.VFixed // get v 16.16 fixedpoint coordinate
254 mov edx,ecx // copy it
255 sar edx,16 // get integer part
256 shl ecx,16 // get fractional part
257 imul edx,Tmap.src_offset // calc texture scanline address
258 add esi,edx // calc texture offset
259 add esi,Tmap.pixptr // calc address
261 // set up affine registers
267 mov ebp, Tmap.fx_dl_dx
278 // calculate right side coords st0 st1 st2 st3 st4 st5 st6 st7
279 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
280 // This divide should happen while the pixel span is drawn.
281 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
285 // edi = dest dib bits at current pixel
286 // esi = texture pointer at current u,v
288 // ebx = u fraction 0.32
289 // ecx = v fraction 0.32
291 // ebp = v carry scratch
293 mov al,[edi] // preread the destination cache line
295 mov Tmap.InnerLooper, 32/4 // Set up loop counter
298 sub eax, Tmap.pScreenBits
303 // Make ESI = DV:DU in 5:11,5:11 format
309 mov Tmap.DeltaUFrac, esi
311 // Make ECX = V:U in 5:11,5:11 format
322 // ecx = V:U in 8.6:10.8
323 // edx = zbuffer pointer
332 cmp esi, [edx+0] // Compare the Z depth of this pixel with zbuffer
333 jle Skip0 // If pixel is covered, skip drawing
335 mov [edx+0], esi // Write z
337 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
338 shr ax, 11 // EAX = V:U in 6.10:16.0
339 rol eax, 5 // EAX = V:U in 0.0:6:6
340 and eax, 03ffh // clear upper bits
341 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
345 and eax, 0ffffh // clear upper bits
346 mov al, gr_fade_table[eax]
349 add ecx, Tmap.DeltaUFrac
350 add esi, Tmap.fx_dwdx
354 cmp esi, [edx+4] // Compare the Z depth of this pixel with zbuffer
355 jle Skip1 // If pixel is covered, skip drawing
357 mov [edx+4], esi // Write z
359 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
360 shr ax, 11 // EAX = V:U in 6.10:16.0
361 rol eax, 5 // EAX = V:U in 0.0:6:6
362 and eax, 03ffh // clear upper bits
363 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
367 and eax, 0ffffh // clear upper bits
368 mov al, gr_fade_table[eax]
371 add ecx, Tmap.DeltaUFrac
372 add esi, Tmap.fx_dwdx
376 cmp esi, [edx+8] // Compare the Z depth of this pixel with zbuffer
377 jle Skip2 // If pixel is covered, skip drawing
379 mov [edx+8], esi // Write z
381 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
382 shr ax, 11 // EAX = V:U in 6.10:16.0
383 rol eax, 5 // EAX = V:U in 0.0:6:6
384 and eax, 03ffh // clear upper bits
385 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
389 and eax, 0ffffh // clear upper bits
390 mov al, gr_fade_table[eax]
393 add ecx, Tmap.DeltaUFrac
394 add esi, Tmap.fx_dwdx
398 cmp esi, [edx+12] // Compare the Z depth of this pixel with zbuffer
399 jle Skip3 // If pixel is covered, skip drawing
401 mov [edx+12], esi // Write z
403 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
404 shr ax, 11 // EAX = V:U in 6.10:16.0
405 rol eax, 5 // EAX = V:U in 0.0:6:6
406 and eax, 03ffh // clear upper bits
407 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
411 and eax, 0ffffh // clear upper bits
412 mov al, gr_fade_table[eax]
415 add ecx, Tmap.DeltaUFrac
416 add esi, Tmap.fx_dwdx
427 // the fdiv is done, finish right // st0 st1 st2 st3 st4 st5 st6 st7
428 // ZR V/ZR 1/ZR U/ZR UL VL
430 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
431 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
432 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
433 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
435 dec Tmap.Subdivisions // decrement span count
436 jnz SpanLoop // loop back
439 HandleLeftoverPixels:
441 mov esi,Tmap.pixptr // load texture pointer
443 // edi = dest dib bits
444 // esi = current texture dib bits
445 // at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
446 // inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
448 cmp Tmap.WidthModLength,0 ; are there remaining pixels to draw?
449 jz FPUReturn ; nope, pop the FPU and bail
451 // convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
453 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
454 fmul Tmap.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
455 fistp Tmap.UFixed ; inv. inv. inv. inv. inv. UL VL
457 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
458 fmul Tmap.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
459 fistp Tmap.VFixed ; inv. inv. inv. inv. inv. UL VL
461 dec Tmap.WidthModLength ; calc how many steps to take
462 jz OnePixelSpan ; just one, don't do deltas'
464 // calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
467 // @todo rearrange things so we don't need these two instructions
468 fstp Tmap.FloatTemp ; inv. inv. inv. inv. UL VL
469 fstp Tmap.FloatTemp ; inv. inv. inv. UL VL
471 fld Tmap.r.v ; V/Zr inv. inv. inv. UL VL
472 fsub Tmap.deltas.v ; V/ZR inv. inv. inv. UL VL
473 fld Tmap.r.u ; U/Zr V/ZR inv. inv. inv. UL VL
474 fsub Tmap.deltas.u ; U/ZR V/ZR inv. inv. inv. UL VL
475 fld Tmap.r.sw ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
476 fsub Tmap.deltas.sw ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
478 fdivr Tmap.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
480 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
481 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
483 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
485 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
486 fxch st(1) ; VR UR inv. inv. inv. dU VL
487 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
488 fxch st(6) ; dV UR inv. inv. inv. dU VR
490 fidiv Tmap.WidthModLength ; dv UR inv. inv. inv. dU VR
491 fmul Tmap.FixedScale ; dv16 UR inv. inv. inv. dU VR
492 fistp Tmap.DeltaV ; UR inv. inv. inv. dU VR
494 fxch st(4) ; dU inv. inv. inv. UR VR
495 fidiv Tmap.WidthModLength ; du inv. inv. inv. UR VR
496 fmul Tmap.FixedScale ; du16 inv. inv. inv. UR VR
497 fistp Tmap.DeltaU ; inv. inv. inv. UR VR
499 // @todo gross! these are to line up with the other loop
500 fld st(1) ; inv. inv. inv. inv. UR VR
501 fld st(2) ; inv. inv. inv. inv. inv. UR VR
504 // setup delta values
505 mov eax, Tmap.DeltaV // get v 16.16 step
506 mov ebx, eax // copy it
507 sar eax, 16 // get v int step
508 shl ebx, 16 // get v frac step
509 mov Tmap.DeltaVFrac, ebx // store it
510 imul eax, Tmap.src_offset // calc texture step for v int step
512 mov ebx, Tmap.DeltaU // get u 16.16 step
513 mov ecx, ebx // copy it
514 sar ebx, 16 // get the u int step
515 shl ecx, 16 // get the u frac step
516 mov Tmap.DeltaUFrac, ecx // store it
517 add eax, ebx // calc uint + vint step
518 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
519 add eax, Tmap.src_offset // calc whole step + v carry
520 mov Tmap.uv_delta[0], eax // save in v-carry slot
525 ; setup initial coordinates
526 mov esi, Tmap.UFixed // get u 16.16
527 mov ebx, esi // copy it
528 sar esi, 16 // get integer part
529 shl ebx, 16 // get fractional part
531 mov ecx, Tmap.VFixed // get v 16.16
532 mov edx, ecx // copy it
533 sar edx, 16 // get integer part
534 shl ecx, 16 // get fractional part
535 imul edx, Tmap.src_offset // calc texture scanline address
536 add esi, edx // calc texture offset
537 add esi, Tmap.pixptr // calc address
544 // mov edx, Tmap.DeltaUFrac
549 mov ebx, Tmap.fx_l_right
555 mov eax, Tmap.fx_dl_dx
564 sub eax, Tmap.pScreenBits
569 inc Tmap.WidthModLength
570 mov eax,Tmap.WidthModLength
574 mov Tmap.WidthModLength, eax
578 mov al,[edi] // preread the destination cache line
580 // Make ESI = DV:DU in 6:10,6:10 format
586 mov Tmap.DeltaUFrac, esi
588 // Make ECX = V:U in 6:10,6:10 format
599 // ecx = V:U in 8.6:10.8
600 // edx = zbuffer pointer
609 cmp esi, [edx+0] // Compare the Z depth of this pixel with zbuffer
610 jle Skip0a // If pixel is covered, skip drawing
612 mov [edx+0], esi // Write z
614 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
615 shr ax, 11 // EAX = V:U in 6.10:16.0
616 rol eax, 5 // EAX = V:U in 0.0:6:6
617 and eax, 03ffh // clear upper bits
618 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
622 and eax, 0ffffh // clear upper bits
623 mov al, gr_fade_table[eax]
626 add ecx, Tmap.DeltaUFrac
627 add esi, Tmap.fx_dwdx
631 cmp esi, [edx+4] // Compare the Z depth of this pixel with zbuffer
632 jle Skip1a // If pixel is covered, skip drawing
634 mov [edx+4], esi // Write z
636 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
637 shr ax, 11 // EAX = V:U in 6.10:16.0
638 rol eax, 5 // EAX = V:U in 0.0:6:6
639 and eax, 03ffh // clear upper bits
640 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
644 and eax, 0ffffh // clear upper bits
645 mov al, gr_fade_table[eax]
648 add ecx, Tmap.DeltaUFrac
649 add esi, Tmap.fx_dwdx
656 dec Tmap.WidthModLength
664 cmp esi, [edx+0] // Compare the Z depth of this pixel with zbuffer
665 jle Skip0b // If pixel is covered, skip drawing
667 mov [edx+0], esi // Write z
669 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
670 shr ax, 11 // EAX = V:U in 6.10:16.0
671 rol eax, 5 // EAX = V:U in 0.0:6:6
672 and eax, 03ffh // clear upper bits
673 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
677 and eax, 0ffffh // clear upper bits
678 mov al, gr_fade_table[eax]
681 add ecx, Tmap.DeltaUFrac
682 add esi, Tmap.fx_dwdx
688 // busy FPU registers: // st0 st1 st2 st3 st4 st5 st6 st7
689 // xxx xxx xxx xxx xxx xxx xxx
698 fldcw Tmap.OldFPUCW // restore the FPU
711 void tmapscan_pln8_tiled_32x32()
714 switch(gr_zbuffering_mode) {
717 case GR_ZBUFF_FULL: // both
718 tmapscan_pln8_zbuffered_tiled_32x32();
720 case GR_ZBUFF_WRITE: // write only
721 tmapscan_pln8_zbuffered_tiled_32x32();
723 case GR_ZBUFF_READ: // read only
724 tmapscan_pln8_zbuffered_tiled_32x32();
729 Tmap.fx_l = fl2f(Tmap.l.b*32.0);
730 Tmap.fx_l_right = fl2f(Tmap.r.b*32.0);
731 Tmap.fx_dl_dx = fl2f(Tmap.deltas.b*32.0);
733 if ( Tmap.fx_dl_dx < 0 ) {
734 Tmap.fx_dl_dx = -Tmap.fx_dl_dx;
735 Tmap.fx_l = (67*F1_0)-Tmap.fx_l;
736 Tmap.fx_l_right = (67*F1_0)-Tmap.fx_l_right;
737 // SDL_assert( Tmap.fx_l > 31*F1_0 );
738 // SDL_assert( Tmap.fx_l < 66*F1_0 );
739 // SDL_assert( Tmap.fx_dl_dx >= 0 );
740 // SDL_assert( Tmap.fx_dl_dx < 31*F1_0 );
743 Tmap.fl_dudx_wide = Tmap.deltas.u*32.0f;
744 Tmap.fl_dvdx_wide = Tmap.deltas.v*32.0f;
745 Tmap.fl_dwdx_wide = Tmap.deltas.sw*32.0f;
747 Tmap.fx_w = fl2i(Tmap.l.sw * GR_Z_RANGE)+gr_zoffset;
748 Tmap.fx_dwdx = fl2i(Tmap.deltas.sw * GR_Z_RANGE);
750 // SDL_assert(Tmap.fx_w < 65536 );
751 // SDL_assert(Tmap.fx_w >= 0 );
752 // SDL_assert(Tmap.fx_w+Tmap.fx_dwdx*Tmap.loop_count < 65536 );
753 // SDL_assert(Tmap.fx_w+Tmap.fx_dwdx*Tmap.loop_count >= 0 );
768 // Put the FPU in low precision mode
769 fstcw Tmap.OldFPUCW // store copy of CW
770 mov ax,Tmap.OldFPUCW // get it in ax
772 mov Tmap.FPUCW,ax // store it
773 fldcw Tmap.FPUCW // load the FPU
776 mov ecx, Tmap.loop_count // ecx = width
777 mov edi, Tmap.dest_row_data // edi = dest pointer
779 // edi = pointer to start pixel in dest dib
782 mov eax,ecx // eax and ecx = width
783 shr ecx,5 // ecx = width / subdivision length
784 and eax,31 // eax = width mod subdivision length
785 jnz some_left_over // any leftover?
786 dec ecx // no, so special case last span
787 mov eax,32 // it's 8 pixels long
789 mov Tmap.Subdivisions,ecx // store widths
790 mov Tmap.WidthModLength,eax
792 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
793 // st0 st1 st2 st3 st4 st5 st6 st7
795 fld Tmap.l.u // U/ZL V/ZL
796 fld Tmap.l.sw // 1/ZL U/ZL V/ZL
797 fld1 // 1 1/ZL U/ZL V/ZL
798 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
799 fld st // ZL ZL 1/ZL U/ZL V/ZL
800 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
801 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
802 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
804 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
805 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
807 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
809 fadd Tmap.fl_dwdx_wide // 1/ZR U/ZL V/ZL UL VL
810 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
811 fadd Tmap.fl_dudx_wide // U/ZR 1/ZR V/ZL UL VL
812 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
813 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZR U/ZR UL VL
815 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
817 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
818 // @todo overlap this guy
819 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
820 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
821 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
822 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
823 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
825 cmp ecx,0 // check for any full spans
826 jle HandleLeftoverPixels
830 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
831 // UR VR V/ZR 1/ZR U/ZR UL VL
833 // convert left side coords
835 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
836 fmul Tmap.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
837 fistp Tmap.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
839 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
840 fmul Tmap.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
841 fistp Tmap.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
843 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
845 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
846 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
847 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
848 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
850 fmul Tmap.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
851 fistp Tmap.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
853 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
854 fmul Tmap.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
855 fistp Tmap.DeltaU ; V/ZR 1/ZR U/ZR UR VR
857 // increment terms for next span // st0 st1 st2 st3 st4 st5 st6 st7
858 // Right terms become Left terms--->// V/ZL 1/ZL U/ZL UL VL
860 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZL U/ZL UL VL
861 fxch st(1) // 1/ZL V/ZR U/ZL UL VL
862 fadd Tmap.fl_dwdx_wide // 1/ZR V/ZR U/ZL UL VL
863 fxch st(2) // U/ZL V/ZR 1/ZR UL VL
864 fadd Tmap.fl_dudx_wide // U/ZR V/ZR 1/ZR UL VL
865 fxch st(2) // 1/ZR V/ZR U/ZR UL VL
866 fxch st(1) // V/ZR 1/ZR U/ZR UL VL
869 // setup delta values
871 mov eax,Tmap.DeltaV // get v 16.16 step
872 mov ebx,eax // copy it
873 sar eax,16 // get v int step
874 shl ebx,16 // get v frac step
875 mov Tmap.DeltaVFrac,ebx // store it
876 imul eax,Tmap.src_offset // calculate texture step for v int step
878 mov ebx,Tmap.DeltaU // get u 16.16 step
879 mov ecx,ebx // copy it
880 sar ebx,16 // get u int step
881 shl ecx,16 // get u frac step
882 mov Tmap.DeltaUFrac,ecx // store it
883 add eax,ebx // calculate uint + vint step
884 mov Tmap.uv_delta[4],eax // save whole step in non-v-carry slot
885 add eax,Tmap.src_offset // calculate whole step + v carry
886 mov Tmap.uv_delta[0],eax // save in v-carry slot
888 // setup initial coordinates
889 mov esi,Tmap.UFixed // get u 16.16 fixedpoint coordinate
891 mov ebx,esi // copy it
892 sar esi,16 // get integer part
893 shl ebx,16 // get fractional part
895 mov ecx,Tmap.VFixed // get v 16.16 fixedpoint coordinate
897 mov edx,ecx // copy it
898 sar edx,16 // get integer part
899 shl ecx,16 // get fractional part
900 imul edx,Tmap.src_offset // calc texture scanline address
901 add esi,edx // calc texture offset
902 add esi,Tmap.pixptr // calc address
904 // set up affine registers
910 mov ebp, Tmap.fx_dl_dx
921 // calculate right side coords st0 st1 st2 st3 st4 st5 st6 st7
922 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
923 // This divide should happen while the pixel span is drawn.
924 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
928 // edi = dest dib bits at current pixel
929 // esi = texture pointer at current u,v
931 // ebx = u fraction 0.32
932 // ecx = v fraction 0.32
934 // ebp = v carry scratch
936 mov al,[edi] // preread the destination cache line
938 mov Tmap.InnerLooper, 32/4 // Set up loop counter
941 sub eax, Tmap.pScreenBits
946 // Make ESI = DV:DU in 6:10,6:10 format
952 mov Tmap.DeltaUFrac, esi
954 // Make ECX = V:U in 6:10,6:10 format
964 // ecx = V:U in 8.6:10.8
965 // edx = zbuffer pointer
974 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
975 shr ax, 11 // EAX = V:U in 6.10:16.0
976 rol eax, 5 // EAX = V:U in 0.0:6:6
977 and eax, 03ffh // clear upper bits
978 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
982 and eax, 0ffffh // clear upper bits
983 mov al, gr_fade_table[eax]
985 add ecx, Tmap.DeltaUFrac
989 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
990 shr ax, 11 // EAX = V:U in 6.10:16.0
991 rol eax, 5 // EAX = V:U in 0.0:6:6
992 and eax, 03ffh // clear upper bits
993 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
997 and eax, 0ffffh // clear upper bits
998 mov al, gr_fade_table[eax]
1000 add ecx, Tmap.DeltaUFrac
1004 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
1005 shr ax, 11 // EAX = V:U in 6.10:16.0
1006 rol eax, 5 // EAX = V:U in 0.0:6:6
1007 and eax, 03ffh // clear upper bits
1008 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
1012 and eax, 0ffffh // clear upper bits
1013 mov al, gr_fade_table[eax]
1015 add ecx, Tmap.DeltaUFrac
1019 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
1020 shr ax, 11 // EAX = V:U in 6.10:16.0
1021 rol eax, 5 // EAX = V:U in 0.0:6:6
1022 and eax, 03ffh // clear upper bits
1023 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
1027 and eax, 0ffffh // clear upper bits
1028 mov al, gr_fade_table[eax]
1030 add ecx, Tmap.DeltaUFrac
1035 dec Tmap.InnerLooper
1040 // the fdiv is done, finish right // st0 st1 st2 st3 st4 st5 st6 st7
1041 // ZR V/ZR 1/ZR U/ZR UL VL
1043 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
1044 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
1045 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
1046 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
1048 dec Tmap.Subdivisions // decrement span count
1049 jnz SpanLoop // loop back
1052 HandleLeftoverPixels:
1054 mov esi,Tmap.pixptr // load texture pointer
1056 // edi = dest dib bits
1057 // esi = current texture dib bits
1058 // at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
1059 // inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
1061 cmp Tmap.WidthModLength,0 ; are there remaining pixels to draw?
1062 jz FPUReturn ; nope, pop the FPU and bail
1064 // convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
1066 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
1067 fmul Tmap.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
1068 fistp Tmap.UFixed ; inv. inv. inv. inv. inv. UL VL
1070 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
1071 fmul Tmap.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
1072 fistp Tmap.VFixed ; inv. inv. inv. inv. inv. UL VL
1074 dec Tmap.WidthModLength ; calc how many steps to take
1075 jz OnePixelSpan ; just one, don't do deltas'
1077 // calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
1080 // @todo rearrange things so we don't need these two instructions
1081 fstp Tmap.FloatTemp ; inv. inv. inv. inv. UL VL
1082 fstp Tmap.FloatTemp ; inv. inv. inv. UL VL
1084 fld Tmap.r.v ; V/Zr inv. inv. inv. UL VL
1085 fsub Tmap.deltas.v ; V/ZR inv. inv. inv. UL VL
1086 fld Tmap.r.u ; U/Zr V/ZR inv. inv. inv. UL VL
1087 fsub Tmap.deltas.u ; U/ZR V/ZR inv. inv. inv. UL VL
1088 fld Tmap.r.sw ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
1089 fsub Tmap.deltas.sw ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
1091 fdivr Tmap.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
1093 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
1094 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
1096 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
1098 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
1099 fxch st(1) ; VR UR inv. inv. inv. dU VL
1100 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
1101 fxch st(6) ; dV UR inv. inv. inv. dU VR
1103 fidiv Tmap.WidthModLength ; dv UR inv. inv. inv. dU VR
1104 fmul Tmap.FixedScale ; dv16 UR inv. inv. inv. dU VR
1105 fistp Tmap.DeltaV ; UR inv. inv. inv. dU VR
1107 fxch st(4) ; dU inv. inv. inv. UR VR
1108 fidiv Tmap.WidthModLength ; du inv. inv. inv. UR VR
1109 fmul Tmap.FixedScale ; du16 inv. inv. inv. UR VR
1110 fistp Tmap.DeltaU ; inv. inv. inv. UR VR
1112 // @todo gross! these are to line up with the other loop
1113 fld st(1) ; inv. inv. inv. inv. UR VR
1114 fld st(2) ; inv. inv. inv. inv. inv. UR VR
1117 // setup delta values
1118 mov eax, Tmap.DeltaV // get v 16.16 step
1119 mov ebx, eax // copy it
1120 sar eax, 16 // get v int step
1121 shl ebx, 16 // get v frac step
1122 mov Tmap.DeltaVFrac, ebx // store it
1123 imul eax, Tmap.src_offset // calc texture step for v int step
1125 mov ebx, Tmap.DeltaU // get u 16.16 step
1126 mov ecx, ebx // copy it
1127 sar ebx, 16 // get the u int step
1128 shl ecx, 16 // get the u frac step
1129 mov Tmap.DeltaUFrac, ecx // store it
1130 add eax, ebx // calc uint + vint step
1131 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
1132 add eax, Tmap.src_offset // calc whole step + v carry
1133 mov Tmap.uv_delta[0], eax // save in v-carry slot
1138 ; setup initial coordinates
1139 mov esi, Tmap.UFixed // get u 16.16
1140 mov ebx, esi // copy it
1141 sar esi, 16 // get integer part
1142 shl ebx, 16 // get fractional part
1144 mov ecx, Tmap.VFixed // get v 16.16
1145 mov edx, ecx // copy it
1146 sar edx, 16 // get integer part
1147 shl ecx, 16 // get fractional part
1148 imul edx, Tmap.src_offset // calc texture scanline address
1149 add esi, edx // calc texture offset
1150 add esi, Tmap.pixptr // calc address
1157 // mov edx, Tmap.DeltaUFrac
1161 mov ebx, Tmap.fx_l_right
1167 mov eax, Tmap.fx_dl_dx
1176 sub eax, Tmap.pScreenBits
1181 inc Tmap.WidthModLength
1182 mov eax,Tmap.WidthModLength
1186 mov Tmap.WidthModLength, eax
1190 mov al,[edi] // preread the destination cache line
1192 // Make ESI = DV:DU in 6:10,6:10 format
1193 mov eax, Tmap.DeltaU
1195 mov esi, Tmap.DeltaV
1198 mov Tmap.DeltaUFrac, esi
1200 // Make ECX = V:U in 6:10,6:10 format
1201 mov eax, Tmap.UFixed
1203 mov ecx, Tmap.VFixed
1211 // ecx = V:U in 8.6:10.8
1212 // edx = zbuffer pointer
1214 // edi = screen data
1221 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
1222 shr ax, 11 // EAX = V:U in 6.10:16.0
1223 rol eax, 5 // EAX = V:U in 0.0:6:6
1224 and eax, 03ffh // clear upper bits
1225 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
1229 and eax, 0ffffh // clear upper bits
1230 mov al, gr_fade_table[eax]
1232 add ecx, Tmap.DeltaUFrac
1236 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
1237 shr ax, 11 // EAX = V:U in 6.10:16.0
1238 rol eax, 5 // EAX = V:U in 0.0:6:6
1239 and eax, 03ffh // clear upper bits
1240 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
1244 and eax, 0ffffh // clear upper bits
1245 mov al, gr_fade_table[eax]
1247 add ecx, Tmap.DeltaUFrac
1254 dec Tmap.WidthModLength
1262 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
1263 shr ax, 11 // EAX = V:U in 6.10:16.0
1264 rol eax, 5 // EAX = V:U in 0.0:6:6
1265 and eax, 03ffh // clear upper bits
1266 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
1270 and eax, 0ffffh // clear upper bits
1271 mov al, gr_fade_table[eax]
1273 add ecx, Tmap.DeltaUFrac
1279 // busy FPU registers: // st0 st1 st2 st3 st4 st5 st6 st7
1280 // xxx xxx xxx xxx xxx xxx xxx
1289 fldcw Tmap.OldFPUCW // restore the FPU