2 * Copyright (C) Volition, Inc. 1999. All rights reserved.
4 * All source code herein is the property of Volition, Inc. You may not sell
5 * or otherwise commercially exploit the source or things you created based on
10 * $Logfile: /Freespace2/code/Graphics/TmapScanTiled64x64.cpp $
15 * Routines for drawing tiled 64x64 textues
18 * Revision 1.2 2002/06/09 04:41:18 relnev
19 * added copyright header
21 * Revision 1.1.1.1 2002/05/03 03:28:09 root
25 * 4 11/30/98 5:31p Dave
26 * Fixed up Fred support for software mode.
28 * 3 11/30/98 1:07p Dave
29 * 16 bit conversion, first run.
31 * 2 10/07/98 10:53a Dave
34 * 1 10/07/98 10:49a Dave
36 * 8 4/23/98 9:55a John
37 * Fixed some bugs in the tiled tmapper causing bright dots to appear all
40 * 7 3/10/98 4:19p John
41 * Cleaned up graphics lib. Took out most unused gr functions. Made D3D
42 * & Glide have popups and print screen. Took out all >8bpp software
43 * support. Made Fred zbuffer. Made zbuffer allocate dynamically to
44 * support Fred. Made zbuffering key off of functions rather than one
47 * 6 1/23/98 5:08p John
48 * Took L out of vertex structure used B (blue) instead. Took all small
49 * fireballs out of fireball types and used particles instead. Fixed some
50 * debris explosion things. Restructured fireball code. Restructured
51 * some lighting code. Made dynamic lighting on by default. Made groups
52 * of lasers only cast one light. Made fireballs not cast light.
54 * 5 12/04/97 10:38a John
55 * Fixed tiled texture mappers that were swapping uvs.
57 * 4 10/14/97 9:19a John
58 * removed fdiv warnings.
60 * 3 6/02/97 11:45a John
61 * fixed bugs with 64x64 and 128x128 tmappers.
63 * 2 5/12/97 12:27p John
64 * Restructured Graphics Library to add support for multiple renderers.
66 * 1 4/24/97 4:42p John
67 * Initial version of the tiled texture mappers for 64 & 128 wide
76 #include "grinternal.h"
78 #include "tmapscanline.h"
83 // Needed to keep warning 4725 to stay away. See PsTypes.h for details why.
84 void disable_warning_4725_stub_tst64()
88 void tmapscan_pln8_zbuffered_tiled_64x64()
90 Tmap.fx_l = fl2f(Tmap.l.b*32.0);
91 Tmap.fx_l_right = fl2f(Tmap.r.b*32.0);
92 Tmap.fx_dl_dx = fl2f(Tmap.deltas.b*32.0);
94 if ( Tmap.fx_dl_dx < 0 ) {
95 Tmap.fx_dl_dx = -Tmap.fx_dl_dx;
96 Tmap.fx_l = (67*F1_0)-Tmap.fx_l;
97 Tmap.fx_l_right = (67*F1_0)-Tmap.fx_l_right;
98 // SDL_assert( Tmap.fx_l > 31*F1_0 );
99 // SDL_assert( Tmap.fx_l < 66*F1_0 );
100 // SDL_assert( Tmap.fx_dl_dx >= 0 );
101 // SDL_assert( Tmap.fx_dl_dx < 31*F1_0 );
104 Tmap.fl_dudx_wide = Tmap.deltas.u*32.0f;
105 Tmap.fl_dvdx_wide = Tmap.deltas.v*32.0f;
106 Tmap.fl_dwdx_wide = Tmap.deltas.sw*32.0f;
108 Tmap.fx_w = fl2i(Tmap.l.sw * GR_Z_RANGE)+gr_zoffset;
109 Tmap.fx_dwdx = fl2i(Tmap.deltas.sw * GR_Z_RANGE);
111 // SDL_assert(Tmap.fx_w < 65536 );
112 // SDL_assert(Tmap.fx_w >= 0 );
113 // SDL_assert(Tmap.fx_w+Tmap.fx_dwdx*Tmap.loop_count < 65536 );
114 // SDL_assert(Tmap.fx_w+Tmap.fx_dwdx*Tmap.loop_count >= 0 );
129 // Put the FPU in low precision mode
130 fstcw Tmap.OldFPUCW // store copy of CW
131 mov ax,Tmap.OldFPUCW // get it in ax
133 mov Tmap.FPUCW,ax // store it
134 fldcw Tmap.FPUCW // load the FPU
137 mov ecx, Tmap.loop_count // ecx = width
138 mov edi, Tmap.dest_row_data // edi = dest pointer
140 // edi = pointer to start pixel in dest dib
143 mov eax,ecx // eax and ecx = width
144 shr ecx,5 // ecx = width / subdivision length
145 and eax,31 // eax = width mod subdivision length
146 jnz some_left_over // any leftover?
147 dec ecx // no, so special case last span
148 mov eax,32 // it's 8 pixels long
150 mov Tmap.Subdivisions,ecx // store widths
151 mov Tmap.WidthModLength,eax
153 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
154 // st0 st1 st2 st3 st4 st5 st6 st7
156 fld Tmap.l.u // U/ZL V/ZL
157 fld Tmap.l.sw // 1/ZL U/ZL V/ZL
158 fld1 // 1 1/ZL U/ZL V/ZL
159 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
160 fld st // ZL ZL 1/ZL U/ZL V/ZL
161 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
162 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
163 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
165 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
166 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
168 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
170 fadd Tmap.fl_dwdx_wide // 1/ZR U/ZL V/ZL UL VL
171 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
172 fadd Tmap.fl_dudx_wide // U/ZR 1/ZR V/ZL UL VL
173 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
174 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZR U/ZR UL VL
176 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
178 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
179 // @todo overlap this guy
180 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
181 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
182 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
183 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
184 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
186 cmp ecx,0 // check for any full spans
187 jle HandleLeftoverPixels
191 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
192 // UR VR V/ZR 1/ZR U/ZR UL VL
194 // convert left side coords
196 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
197 fmul Tmap.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
198 fistp Tmap.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
200 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
201 fmul Tmap.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
202 fistp Tmap.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
204 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
206 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
207 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
208 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
209 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
211 fmul Tmap.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
212 fistp Tmap.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
214 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
215 fmul Tmap.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
216 fistp Tmap.DeltaU ; V/ZR 1/ZR U/ZR UR VR
218 // increment terms for next span // st0 st1 st2 st3 st4 st5 st6 st7
219 // Right terms become Left terms--->// V/ZL 1/ZL U/ZL UL VL
221 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZL U/ZL UL VL
222 fxch st(1) // 1/ZL V/ZR U/ZL UL VL
223 fadd Tmap.fl_dwdx_wide // 1/ZR V/ZR U/ZL UL VL
224 fxch st(2) // U/ZL V/ZR 1/ZR UL VL
225 fadd Tmap.fl_dudx_wide // U/ZR V/ZR 1/ZR UL VL
226 fxch st(2) // 1/ZR V/ZR U/ZR UL VL
227 fxch st(1) // V/ZR 1/ZR U/ZR UL VL
230 // setup delta values
232 mov eax,Tmap.DeltaV // get v 16.16 step
233 mov ebx,eax // copy it
234 sar eax,16 // get v int step
235 shl ebx,16 // get v frac step
236 mov Tmap.DeltaVFrac,ebx // store it
237 imul eax,Tmap.src_offset // calculate texture step for v int step
239 mov ebx,Tmap.DeltaU // get u 16.16 step
240 mov ecx,ebx // copy it
241 sar ebx,16 // get u int step
242 shl ecx,16 // get u frac step
243 mov Tmap.DeltaUFrac,ecx // store it
244 add eax,ebx // calculate uint + vint step
245 mov Tmap.uv_delta[4],eax // save whole step in non-v-carry slot
246 add eax,Tmap.src_offset // calculate whole step + v carry
247 mov Tmap.uv_delta[0],eax // save in v-carry slot
249 // setup initial coordinates
250 mov esi,Tmap.UFixed // get u 16.16 fixedpoint coordinate
252 mov ebx,esi // copy it
253 sar esi,16 // get integer part
254 shl ebx,16 // get fractional part
256 mov ecx,Tmap.VFixed // get v 16.16 fixedpoint coordinate
258 mov edx,ecx // copy it
259 sar edx,16 // get integer part
260 shl ecx,16 // get fractional part
261 imul edx,Tmap.src_offset // calc texture scanline address
262 add esi,edx // calc texture offset
263 add esi,Tmap.pixptr // calc address
265 // set up affine registers
271 mov ebp, Tmap.fx_dl_dx
282 // calculate right side coords st0 st1 st2 st3 st4 st5 st6 st7
283 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
284 // This divide should happen while the pixel span is drawn.
285 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
289 // edi = dest dib bits at current pixel
290 // esi = texture pointer at current u,v
292 // ebx = u fraction 0.32
293 // ecx = v fraction 0.32
295 // ebp = v carry scratch
297 mov al,[edi] // preread the destination cache line
299 mov Tmap.InnerLooper, 32/4 // Set up loop counter
302 sub eax, Tmap.pScreenBits
307 // Make ESI = DU:DV in 6:10,6:10 format
313 mov Tmap.DeltaUFrac, esi
315 // Make ECX = U:V in 6:10,6:10 format
326 // ecx = V:U in 8.6:10.8
327 // edx = zbuffer pointer
336 cmp esi, [edx+0] // Compare the Z depth of this pixel with zbuffer
337 jle Skip0 // If pixel is covered, skip drawing
339 mov [edx+0], esi // Write z
341 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
342 shr ax, 10 // EAX = V:U in 6.10:16.0
343 rol eax, 6 // EAX = V:U in 0.0:6:6
344 and eax, 0fffh // clear upper bits
345 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
349 and eax, 0ffffh // clear upper bits
350 mov al, gr_fade_table[eax]
353 add ecx, Tmap.DeltaUFrac
354 add esi, Tmap.fx_dwdx
358 cmp esi, [edx+4] // Compare the Z depth of this pixel with zbuffer
359 jle Skip1 // If pixel is covered, skip drawing
361 mov [edx+4], esi // Write z
363 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
364 shr ax, 10 // EAX = V:U in 6.10:16.0
365 rol eax, 6 // EAX = V:U in 0.0:6:6
366 and eax, 0fffh // clear upper bits
367 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
371 and eax, 0ffffh // clear upper bits
372 mov al, gr_fade_table[eax]
375 add ecx, Tmap.DeltaUFrac
376 add esi, Tmap.fx_dwdx
380 cmp esi, [edx+8] // Compare the Z depth of this pixel with zbuffer
381 jle Skip2 // If pixel is covered, skip drawing
383 mov [edx+8], esi // Write z
385 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
386 shr ax, 10 // EAX = V:U in 6.10:16.0
387 rol eax, 6 // EAX = V:U in 0.0:6:6
388 and eax, 0fffh // clear upper bits
389 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
393 and eax, 0ffffh // clear upper bits
394 mov al, gr_fade_table[eax]
397 add ecx, Tmap.DeltaUFrac
398 add esi, Tmap.fx_dwdx
402 cmp esi, [edx+12] // Compare the Z depth of this pixel with zbuffer
403 jle Skip3 // If pixel is covered, skip drawing
405 mov [edx+12], esi // Write z
407 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
408 shr ax, 10 // EAX = V:U in 6.10:16.0
409 rol eax, 6 // EAX = V:U in 0.0:6:6
410 and eax, 0fffh // clear upper bits
411 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
415 and eax, 0ffffh // clear upper bits
416 mov al, gr_fade_table[eax]
419 add ecx, Tmap.DeltaUFrac
420 add esi, Tmap.fx_dwdx
431 // the fdiv is done, finish right // st0 st1 st2 st3 st4 st5 st6 st7
432 // ZR V/ZR 1/ZR U/ZR UL VL
434 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
435 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
436 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
437 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
439 dec Tmap.Subdivisions // decrement span count
440 jnz SpanLoop // loop back
443 HandleLeftoverPixels:
445 mov esi,Tmap.pixptr // load texture pointer
447 // edi = dest dib bits
448 // esi = current texture dib bits
449 // at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
450 // inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
452 cmp Tmap.WidthModLength,0 ; are there remaining pixels to draw?
453 jz FPUReturn ; nope, pop the FPU and bail
455 // convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
457 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
458 fmul Tmap.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
459 fistp Tmap.UFixed ; inv. inv. inv. inv. inv. UL VL
461 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
462 fmul Tmap.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
463 fistp Tmap.VFixed ; inv. inv. inv. inv. inv. UL VL
465 dec Tmap.WidthModLength ; calc how many steps to take
466 jz OnePixelSpan ; just one, don't do deltas'
468 // calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
471 // @todo rearrange things so we don't need these two instructions
472 fstp Tmap.FloatTemp ; inv. inv. inv. inv. UL VL
473 fstp Tmap.FloatTemp ; inv. inv. inv. UL VL
475 fld Tmap.r.v ; V/Zr inv. inv. inv. UL VL
476 fsub Tmap.deltas.v ; V/ZR inv. inv. inv. UL VL
477 fld Tmap.r.u ; U/Zr V/ZR inv. inv. inv. UL VL
478 fsub Tmap.deltas.u ; U/ZR V/ZR inv. inv. inv. UL VL
479 fld Tmap.r.sw ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
480 fsub Tmap.deltas.sw ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
482 fdivr Tmap.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
484 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
485 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
487 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
489 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
490 fxch st(1) ; VR UR inv. inv. inv. dU VL
491 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
492 fxch st(6) ; dV UR inv. inv. inv. dU VR
494 fidiv Tmap.WidthModLength ; dv UR inv. inv. inv. dU VR
495 fmul Tmap.FixedScale ; dv16 UR inv. inv. inv. dU VR
496 fistp Tmap.DeltaV ; UR inv. inv. inv. dU VR
498 fxch st(4) ; dU inv. inv. inv. UR VR
499 fidiv Tmap.WidthModLength ; du inv. inv. inv. UR VR
500 fmul Tmap.FixedScale ; du16 inv. inv. inv. UR VR
501 fistp Tmap.DeltaU ; inv. inv. inv. UR VR
503 // @todo gross! these are to line up with the other loop
504 fld st(1) ; inv. inv. inv. inv. UR VR
505 fld st(2) ; inv. inv. inv. inv. inv. UR VR
508 // setup delta values
509 mov eax, Tmap.DeltaV // get v 16.16 step
510 mov ebx, eax // copy it
511 sar eax, 16 // get v int step
512 shl ebx, 16 // get v frac step
513 mov Tmap.DeltaVFrac, ebx // store it
514 imul eax, Tmap.src_offset // calc texture step for v int step
516 mov ebx, Tmap.DeltaU // get u 16.16 step
517 mov ecx, ebx // copy it
518 sar ebx, 16 // get the u int step
519 shl ecx, 16 // get the u frac step
520 mov Tmap.DeltaUFrac, ecx // store it
521 add eax, ebx // calc uint + vint step
522 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
523 add eax, Tmap.src_offset // calc whole step + v carry
524 mov Tmap.uv_delta[0], eax // save in v-carry slot
529 ; setup initial coordinates
530 mov esi, Tmap.UFixed // get u 16.16
531 mov ebx, esi // copy it
532 sar esi, 16 // get integer part
533 shl ebx, 16 // get fractional part
535 mov ecx, Tmap.VFixed // get v 16.16
536 mov edx, ecx // copy it
537 sar edx, 16 // get integer part
538 shl ecx, 16 // get fractional part
539 imul edx, Tmap.src_offset // calc texture scanline address
540 add esi, edx // calc texture offset
541 add esi, Tmap.pixptr // calc address
548 // mov edx, Tmap.DeltaUFrac
552 mov ebx, Tmap.fx_l_right
558 mov eax, Tmap.fx_dl_dx
566 sub eax, Tmap.pScreenBits
571 inc Tmap.WidthModLength
572 mov eax,Tmap.WidthModLength
576 mov Tmap.WidthModLength, eax
580 mov al,[edi] // preread the destination cache line
582 // Make ESI = DV:DU in 6:10,6:10 format
588 mov Tmap.DeltaUFrac, esi
590 // Make ECX = V:U in 6:10,6:10 format
601 // ecx = V:U in 8.6:10.8
602 // edx = zbuffer pointer
611 cmp esi, [edx+0] // Compare the Z depth of this pixel with zbuffer
612 jle Skip0a // If pixel is covered, skip drawing
614 mov [edx+0], esi // Write z
616 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
617 shr ax, 10 // EAX = V:U in 6.10:16.0
618 rol eax, 6 // EAX = V:U in 0.0:6:6
619 and eax, 0fffh // clear upper bits
620 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
624 and eax, 0ffffh // clear upper bits
625 mov al, gr_fade_table[eax]
628 add ecx, Tmap.DeltaUFrac
629 add esi, Tmap.fx_dwdx
633 cmp esi, [edx+4] // Compare the Z depth of this pixel with zbuffer
634 jle Skip1a // If pixel is covered, skip drawing
636 mov [edx+4], esi // Write z
638 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
639 shr ax, 10 // EAX = V:U in 6.10:16.0
640 rol eax, 6 // EAX = V:U in 0.0:6:6
641 and eax, 0fffh // clear upper bits
642 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
646 and eax, 0ffffh // clear upper bits
647 mov al, gr_fade_table[eax]
650 add ecx, Tmap.DeltaUFrac
651 add esi, Tmap.fx_dwdx
658 dec Tmap.WidthModLength
666 cmp esi, [edx+0] // Compare the Z depth of this pixel with zbuffer
667 jle Skip0b // If pixel is covered, skip drawing
669 mov [edx+0], esi // Write z
671 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
672 shr ax, 10 // EAX = V:U in 6.10:16.0
673 rol eax, 6 // EAX = V:U in 0.0:6:6
674 and eax, 0fffh // clear upper bits
675 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
679 and eax, 0ffffh // clear upper bits
680 mov al, gr_fade_table[eax]
683 add ecx, Tmap.DeltaUFrac
684 add esi, Tmap.fx_dwdx
690 // busy FPU registers: // st0 st1 st2 st3 st4 st5 st6 st7
691 // xxx xxx xxx xxx xxx xxx xxx
700 fldcw Tmap.OldFPUCW // restore the FPU
713 void tmapscan_pln8_tiled_64x64()
716 switch(gr_zbuffering_mode) {
719 case GR_ZBUFF_FULL: // both
720 tmapscan_pln8_zbuffered_tiled_64x64();
722 case GR_ZBUFF_WRITE: // write only
723 tmapscan_pln8_zbuffered_tiled_64x64();
725 case GR_ZBUFF_READ: // read only
726 tmapscan_pln8_zbuffered_tiled_64x64();
731 Tmap.fx_l = fl2f(Tmap.l.b*32.0);
732 Tmap.fx_l_right = fl2f(Tmap.r.b*32.0);
733 Tmap.fx_dl_dx = fl2f(Tmap.deltas.b*32.0);
735 if ( Tmap.fx_dl_dx < 0 ) {
736 Tmap.fx_dl_dx = -Tmap.fx_dl_dx;
737 Tmap.fx_l = (67*F1_0)-Tmap.fx_l;
738 Tmap.fx_l_right = (67*F1_0)-Tmap.fx_l_right;
739 // SDL_assert( Tmap.fx_l > 31*F1_0 );
740 // SDL_assert( Tmap.fx_l < 66*F1_0 );
741 // SDL_assert( Tmap.fx_dl_dx >= 0 );
742 // SDL_assert( Tmap.fx_dl_dx < 31*F1_0 );
745 Tmap.fl_dudx_wide = Tmap.deltas.u*32.0f;
746 Tmap.fl_dvdx_wide = Tmap.deltas.v*32.0f;
747 Tmap.fl_dwdx_wide = Tmap.deltas.sw*32.0f;
749 Tmap.fx_w = fl2i(Tmap.l.sw * GR_Z_RANGE)+gr_zoffset;
750 Tmap.fx_dwdx = fl2i(Tmap.deltas.sw * GR_Z_RANGE);
752 // SDL_assert(Tmap.fx_w < 65536 );
753 // SDL_assert(Tmap.fx_w >= 0 );
754 // SDL_assert(Tmap.fx_w+Tmap.fx_dwdx*Tmap.loop_count < 65536 );
755 // SDL_assert(Tmap.fx_w+Tmap.fx_dwdx*Tmap.loop_count >= 0 );
770 // Put the FPU in low precision mode
771 fstcw Tmap.OldFPUCW // store copy of CW
772 mov ax,Tmap.OldFPUCW // get it in ax
774 mov Tmap.FPUCW,ax // store it
775 fldcw Tmap.FPUCW // load the FPU
778 mov ecx, Tmap.loop_count // ecx = width
779 mov edi, Tmap.dest_row_data // edi = dest pointer
781 // edi = pointer to start pixel in dest dib
784 mov eax,ecx // eax and ecx = width
785 shr ecx,5 // ecx = width / subdivision length
786 and eax,31 // eax = width mod subdivision length
787 jnz some_left_over // any leftover?
788 dec ecx // no, so special case last span
789 mov eax,32 // it's 8 pixels long
791 mov Tmap.Subdivisions,ecx // store widths
792 mov Tmap.WidthModLength,eax
794 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
795 // st0 st1 st2 st3 st4 st5 st6 st7
797 fld Tmap.l.u // U/ZL V/ZL
798 fld Tmap.l.sw // 1/ZL U/ZL V/ZL
799 fld1 // 1 1/ZL U/ZL V/ZL
800 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
801 fld st // ZL ZL 1/ZL U/ZL V/ZL
802 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
803 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
804 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
806 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
807 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
809 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
811 fadd Tmap.fl_dwdx_wide // 1/ZR U/ZL V/ZL UL VL
812 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
813 fadd Tmap.fl_dudx_wide // U/ZR 1/ZR V/ZL UL VL
814 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
815 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZR U/ZR UL VL
817 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
819 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
820 // @todo overlap this guy
821 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
822 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
823 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
824 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
825 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
827 cmp ecx,0 // check for any full spans
828 jle HandleLeftoverPixels
832 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
833 // UR VR V/ZR 1/ZR U/ZR UL VL
835 // convert left side coords
837 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
838 fmul Tmap.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
839 fistp Tmap.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
841 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
842 fmul Tmap.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
843 fistp Tmap.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
845 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
847 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
848 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
849 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
850 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
852 fmul Tmap.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
853 fistp Tmap.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
855 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
856 fmul Tmap.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
857 fistp Tmap.DeltaU ; V/ZR 1/ZR U/ZR UR VR
859 // increment terms for next span // st0 st1 st2 st3 st4 st5 st6 st7
860 // Right terms become Left terms--->// V/ZL 1/ZL U/ZL UL VL
862 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZL U/ZL UL VL
863 fxch st(1) // 1/ZL V/ZR U/ZL UL VL
864 fadd Tmap.fl_dwdx_wide // 1/ZR V/ZR U/ZL UL VL
865 fxch st(2) // U/ZL V/ZR 1/ZR UL VL
866 fadd Tmap.fl_dudx_wide // U/ZR V/ZR 1/ZR UL VL
867 fxch st(2) // 1/ZR V/ZR U/ZR UL VL
868 fxch st(1) // V/ZR 1/ZR U/ZR UL VL
871 // setup delta values
873 mov eax,Tmap.DeltaV // get v 16.16 step
874 mov ebx,eax // copy it
875 sar eax,16 // get v int step
876 shl ebx,16 // get v frac step
877 mov Tmap.DeltaVFrac,ebx // store it
878 imul eax,Tmap.src_offset // calculate texture step for v int step
880 mov ebx,Tmap.DeltaU // get u 16.16 step
881 mov ecx,ebx // copy it
882 sar ebx,16 // get u int step
883 shl ecx,16 // get u frac step
884 mov Tmap.DeltaUFrac,ecx // store it
885 add eax,ebx // calculate uint + vint step
886 mov Tmap.uv_delta[4],eax // save whole step in non-v-carry slot
887 add eax,Tmap.src_offset // calculate whole step + v carry
888 mov Tmap.uv_delta[0],eax // save in v-carry slot
890 // setup initial coordinates
891 mov esi,Tmap.UFixed // get u 16.16 fixedpoint coordinate
893 mov ebx,esi // copy it
894 sar esi,16 // get integer part
895 shl ebx,16 // get fractional part
897 mov ecx,Tmap.VFixed // get v 16.16 fixedpoint coordinate
899 mov edx,ecx // copy it
900 sar edx,16 // get integer part
901 shl ecx,16 // get fractional part
902 imul edx,Tmap.src_offset // calc texture scanline address
903 add esi,edx // calc texture offset
904 add esi,Tmap.pixptr // calc address
906 // set up affine registers
912 mov ebp, Tmap.fx_dl_dx
923 // calculate right side coords st0 st1 st2 st3 st4 st5 st6 st7
924 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
925 // This divide should happen while the pixel span is drawn.
926 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
930 // edi = dest dib bits at current pixel
931 // esi = texture pointer at current u,v
933 // ebx = u fraction 0.32
934 // ecx = v fraction 0.32
936 // ebp = v carry scratch
938 mov al,[edi] // preread the destination cache line
940 mov Tmap.InnerLooper, 32/4 // Set up loop counter
943 sub eax, Tmap.pScreenBits
948 // Make ESI = DV:DU in 6:10,6:10 format
954 mov Tmap.DeltaUFrac, esi
956 // Make ECX = V:U in 6:10,6:10 format
966 // ecx = V:U in 8.6:10.8
967 // edx = zbuffer pointer
976 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
977 shr ax, 10 // EAX = V:U in 6.10:16.0
978 rol eax, 6 // EAX = V:U in 0.0:6:6
979 and eax, 0fffh // clear upper bits
980 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
984 and eax, 0ffffh // clear upper bits
985 mov al, gr_fade_table[eax]
987 add ecx, Tmap.DeltaUFrac
991 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
992 shr ax, 10 // EAX = V:U in 6.10:16.0
993 rol eax, 6 // EAX = V:U in 0.0:6:6
994 and eax, 0fffh // clear upper bits
995 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
999 and eax, 0ffffh // clear upper bits
1000 mov al, gr_fade_table[eax]
1002 add ecx, Tmap.DeltaUFrac
1006 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
1007 shr ax, 10 // EAX = V:U in 6.10:16.0
1008 rol eax, 6 // EAX = V:U in 0.0:6:6
1009 and eax, 0fffh // clear upper bits
1010 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
1014 and eax, 0ffffh // clear upper bits
1015 mov al, gr_fade_table[eax]
1017 add ecx, Tmap.DeltaUFrac
1021 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
1022 shr ax, 10 // EAX = V:U in 6.10:16.0
1023 rol eax, 6 // EAX = V:U in 0.0:6:6
1024 and eax, 0fffh // clear upper bits
1025 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
1029 and eax, 0ffffh // clear upper bits
1030 mov al, gr_fade_table[eax]
1032 add ecx, Tmap.DeltaUFrac
1037 dec Tmap.InnerLooper
1042 // the fdiv is done, finish right // st0 st1 st2 st3 st4 st5 st6 st7
1043 // ZR V/ZR 1/ZR U/ZR UL VL
1045 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
1046 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
1047 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
1048 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
1050 dec Tmap.Subdivisions // decrement span count
1051 jnz SpanLoop // loop back
1054 HandleLeftoverPixels:
1056 mov esi,Tmap.pixptr // load texture pointer
1058 // edi = dest dib bits
1059 // esi = current texture dib bits
1060 // at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
1061 // inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
1063 cmp Tmap.WidthModLength,0 ; are there remaining pixels to draw?
1064 jz FPUReturn ; nope, pop the FPU and bail
1066 // convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
1068 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
1069 fmul Tmap.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
1070 fistp Tmap.UFixed ; inv. inv. inv. inv. inv. UL VL
1072 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
1073 fmul Tmap.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
1074 fistp Tmap.VFixed ; inv. inv. inv. inv. inv. UL VL
1076 dec Tmap.WidthModLength ; calc how many steps to take
1077 jz OnePixelSpan ; just one, don't do deltas'
1079 // calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
1082 // @todo rearrange things so we don't need these two instructions
1083 fstp Tmap.FloatTemp ; inv. inv. inv. inv. UL VL
1084 fstp Tmap.FloatTemp ; inv. inv. inv. UL VL
1086 fld Tmap.r.v ; V/Zr inv. inv. inv. UL VL
1087 fsub Tmap.deltas.v ; V/ZR inv. inv. inv. UL VL
1088 fld Tmap.r.u ; U/Zr V/ZR inv. inv. inv. UL VL
1089 fsub Tmap.deltas.u ; U/ZR V/ZR inv. inv. inv. UL VL
1090 fld Tmap.r.sw ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
1091 fsub Tmap.deltas.sw ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
1093 fdivr Tmap.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
1095 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
1096 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
1098 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
1100 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
1101 fxch st(1) ; VR UR inv. inv. inv. dU VL
1102 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
1103 fxch st(6) ; dV UR inv. inv. inv. dU VR
1105 fidiv Tmap.WidthModLength ; dv UR inv. inv. inv. dU VR
1106 fmul Tmap.FixedScale ; dv16 UR inv. inv. inv. dU VR
1107 fistp Tmap.DeltaV ; UR inv. inv. inv. dU VR
1109 fxch st(4) ; dU inv. inv. inv. UR VR
1110 fidiv Tmap.WidthModLength ; du inv. inv. inv. UR VR
1111 fmul Tmap.FixedScale ; du16 inv. inv. inv. UR VR
1112 fistp Tmap.DeltaU ; inv. inv. inv. UR VR
1114 // @todo gross! these are to line up with the other loop
1115 fld st(1) ; inv. inv. inv. inv. UR VR
1116 fld st(2) ; inv. inv. inv. inv. inv. UR VR
1119 // setup delta values
1120 mov eax, Tmap.DeltaV // get v 16.16 step
1121 mov ebx, eax // copy it
1122 sar eax, 16 // get v int step
1123 shl ebx, 16 // get v frac step
1124 mov Tmap.DeltaVFrac, ebx // store it
1125 imul eax, Tmap.src_offset // calc texture step for v int step
1127 mov ebx, Tmap.DeltaU // get u 16.16 step
1128 mov ecx, ebx // copy it
1129 sar ebx, 16 // get the u int step
1130 shl ecx, 16 // get the u frac step
1131 mov Tmap.DeltaUFrac, ecx // store it
1132 add eax, ebx // calc uint + vint step
1133 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
1134 add eax, Tmap.src_offset // calc whole step + v carry
1135 mov Tmap.uv_delta[0], eax // save in v-carry slot
1140 ; setup initial coordinates
1141 mov esi, Tmap.UFixed // get u 16.16
1142 mov ebx, esi // copy it
1143 sar esi, 16 // get integer part
1144 shl ebx, 16 // get fractional part
1146 mov ecx, Tmap.VFixed // get v 16.16
1147 mov edx, ecx // copy it
1148 sar edx, 16 // get integer part
1149 shl ecx, 16 // get fractional part
1150 imul edx, Tmap.src_offset // calc texture scanline address
1151 add esi, edx // calc texture offset
1152 add esi, Tmap.pixptr // calc address
1159 // mov edx, Tmap.DeltaUFrac
1163 mov ebx, Tmap.fx_l_right
1169 mov eax, Tmap.fx_dl_dx
1178 sub eax, Tmap.pScreenBits
1183 inc Tmap.WidthModLength
1184 mov eax,Tmap.WidthModLength
1188 mov Tmap.WidthModLength, eax
1192 mov al,[edi] // preread the destination cache line
1194 // Make ESI = DV:DU in 6:10,6:10 format
1195 mov eax, Tmap.DeltaU
1197 mov esi, Tmap.DeltaV
1200 mov Tmap.DeltaUFrac, esi
1202 // Make ECX = V:U in 6:10,6:10 format
1203 mov eax, Tmap.UFixed
1205 mov ecx, Tmap.VFixed
1213 // ecx = V:U in 8.6:10.8
1214 // edx = zbuffer pointer
1216 // edi = screen data
1223 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
1224 shr ax, 10 // EAX = V:U in 6.10:16.0
1225 rol eax, 6 // EAX = V:U in 0.0:6:6
1226 and eax, 0fffh // clear upper bits
1227 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
1231 and eax, 0ffffh // clear upper bits
1232 mov al, gr_fade_table[eax]
1234 add ecx, Tmap.DeltaUFrac
1238 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
1239 shr ax, 10 // EAX = V:U in 6.10:16.0
1240 rol eax, 6 // EAX = V:U in 0.0:6:6
1241 and eax, 0fffh // clear upper bits
1242 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
1246 and eax, 0ffffh // clear upper bits
1247 mov al, gr_fade_table[eax]
1249 add ecx, Tmap.DeltaUFrac
1256 dec Tmap.WidthModLength
1264 mov eax, ecx // EAX = V.VF:U.UF in 6.10:6.10
1265 shr ax, 10 // EAX = V:U in 6.10:16.0
1266 rol eax, 6 // EAX = V:U in 0.0:6:6
1267 and eax, 0fffh // clear upper bits
1268 add eax, Tmap.pixptr // EAX = (V*64)+U + Pixptr
1272 and eax, 0ffffh // clear upper bits
1273 mov al, gr_fade_table[eax]
1275 add ecx, Tmap.DeltaUFrac
1281 // busy FPU registers: // st0 st1 st2 st3 st4 st5 st6 st7
1282 // xxx xxx xxx xxx xxx xxx xxx
1291 fldcw Tmap.OldFPUCW // restore the FPU