2 * $Logfile: /Freespace2/code/Graphics/TmapScanTiled256x256.cpp $
7 * Routines for drawing tiled 256x256 textues
10 * Revision 1.1 2002/05/03 03:28:09 root
14 * 4 11/30/98 5:31p Dave
15 * Fixed up Fred support for software mode.
17 * 3 11/30/98 1:07p Dave
18 * 16 bit conversion, first run.
20 * 2 10/07/98 10:53a Dave
23 * 1 10/07/98 10:49a Dave
25 * 11 5/13/98 2:53p John
26 * Made subspace effect work under software. Had to add new inner loop to
27 * tmapper. Added glows to end of subspace effect. Made subspace effect
28 * levels use gamepalette-subspace palette.
30 * 10 4/23/98 9:55a John
31 * Fixed some bugs in the tiled tmapper causing bright dots to appear all
34 * 9 3/10/98 4:19p John
35 * Cleaned up graphics lib. Took out most unused gr functions. Made D3D
36 * & Glide have popups and print screen. Took out all >8bpp software
37 * support. Made Fred zbuffer. Made zbuffer allocate dynamically to
38 * support Fred. Made zbuffering key off of functions rather than one
41 * 8 1/27/98 5:13p John
42 * Moved all float to int conversions out of inner loops and into outer.
43 * Made outer loop use FISTP instead of ftol, saved about 10%.
45 * 7 1/23/98 5:08p John
46 * Took L out of vertex structure used B (blue) instead. Took all small
47 * fireballs out of fireball types and used particles instead. Fixed some
48 * debris explosion things. Restructured fireball code. Restructured
49 * some lighting code. Made dynamic lighting on by default. Made groups
50 * of lasers only cast one light. Made fireballs not cast light.
52 * 6 12/04/97 10:38a John
53 * Fixed tiled texture mappers that were swapping uvs.
55 * 5 10/14/97 9:19a John
56 * removed fdiv warnings.
58 * 4 9/10/97 11:38a Sandeep
60 * 3 9/09/97 3:39p Sandeep
61 * warning level 4 bugs
63 * 2 5/12/97 12:27p John
64 * Restructured Graphics Library to add support for multiple renderers.
66 * 1 4/24/97 4:42p John
67 * Initial version of the tiled texture mappers for 64 & 128 wide
76 #include "grinternal.h"
78 #include "tmapscanline.h"
84 // Needed to keep warning 4725 to stay away. See PsTypes.h for details why.
85 void disable_warning_4725_stub_tst256()
89 void tmapscan_pln8_zbuffered_tiled_256x256()
104 // Put the FPU in low precision mode
105 fstcw Tmap.OldFPUCW // store copy of CW
106 mov ax,Tmap.OldFPUCW // get it in ax
108 mov Tmap.FPUCW,ax // store it
109 fldcw Tmap.FPUCW // load the FPU
112 mov ecx, Tmap.loop_count // ecx = width
113 mov edi, Tmap.dest_row_data // edi = dest pointer
115 // edi = pointer to start pixel in dest dib
118 mov eax,ecx // eax and ecx = width
119 shr ecx,5 // ecx = width / subdivision length
120 and eax,31 // eax = width mod subdivision length
121 jnz some_left_over // any leftover?
122 dec ecx // no, so special case last span
123 mov eax,32 // it's 8 pixels long
125 mov Tmap.Subdivisions,ecx // store widths
126 mov Tmap.WidthModLength,eax
128 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
129 // st0 st1 st2 st3 st4 st5 st6 st7
131 fld Tmap.l.u // U/ZL V/ZL
132 fld Tmap.l.sw // 1/ZL U/ZL V/ZL
133 fld1 // 1 1/ZL U/ZL V/ZL
134 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
135 fld st // ZL ZL 1/ZL U/ZL V/ZL
136 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
137 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
138 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
140 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
141 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
143 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
145 fadd Tmap.fl_dwdx_wide // 1/ZR U/ZL V/ZL UL VL
146 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
147 fadd Tmap.fl_dudx_wide // U/ZR 1/ZR V/ZL UL VL
148 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
149 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZR U/ZR UL VL
151 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
153 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
154 // @todo overlap this guy
155 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
156 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
157 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
158 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
159 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
161 cmp ecx,0 // check for any full spans
162 jle HandleLeftoverPixels
166 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
167 // UR VR V/ZR 1/ZR U/ZR UL VL
169 // convert left side coords
171 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
172 fmul Tmap.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
173 fistp Tmap.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
175 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
176 fmul Tmap.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
177 fistp Tmap.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
179 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
181 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
182 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
183 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
184 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
186 fmul Tmap.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
187 fistp Tmap.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
189 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
190 fmul Tmap.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
191 fistp Tmap.DeltaU ; V/ZR 1/ZR U/ZR UR VR
193 // increment terms for next span // st0 st1 st2 st3 st4 st5 st6 st7
194 // Right terms become Left terms--->// V/ZL 1/ZL U/ZL UL VL
196 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZL U/ZL UL VL
197 fxch st(1) // 1/ZL V/ZR U/ZL UL VL
198 fadd Tmap.fl_dwdx_wide // 1/ZR V/ZR U/ZL UL VL
199 fxch st(2) // U/ZL V/ZR 1/ZR UL VL
200 fadd Tmap.fl_dudx_wide // U/ZR V/ZR 1/ZR UL VL
201 fxch st(2) // 1/ZR V/ZR U/ZR UL VL
202 fxch st(1) // V/ZR 1/ZR U/ZR UL VL
205 // setup delta values
207 mov eax,Tmap.DeltaV // get v 16.16 step
208 mov ebx,eax // copy it
209 sar eax,16 // get v int step
210 shl ebx,16 // get v frac step
211 mov Tmap.DeltaVFrac,ebx // store it
212 imul eax,Tmap.src_offset // calculate texture step for v int step
214 mov ebx,Tmap.DeltaU // get u 16.16 step
215 mov ecx,ebx // copy it
216 sar ebx,16 // get u int step
217 shl ecx,16 // get u frac step
218 mov Tmap.DeltaUFrac,ecx // store it
219 add eax,ebx // calculate uint + vint step
220 mov Tmap.uv_delta[4],eax // save whole step in non-v-carry slot
221 add eax,Tmap.src_offset // calculate whole step + v carry
222 mov Tmap.uv_delta[0],eax // save in v-carry slot
224 // setup initial coordinates
225 mov esi,Tmap.UFixed // get u 16.16 fixedpoint coordinate
227 mov ebx,esi // copy it
228 sar esi,16 // get integer part
229 shl ebx,16 // get fractional part
231 mov ecx,Tmap.VFixed // get v 16.16 fixedpoint coordinate
233 mov edx,ecx // copy it
234 sar edx,16 // get integer part
235 shl ecx,16 // get fractional part
236 imul edx,Tmap.src_offset // calc texture scanline address
237 add esi,edx // calc texture offset
238 add esi,Tmap.pixptr // calc address
240 // set up affine registers
246 mov ebp, Tmap.fx_dl_dx
257 // calculate right side coords st0 st1 st2 st3 st4 st5 st6 st7
258 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
259 // This divide should happen while the pixel span is drawn.
260 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
264 // edi = dest dib bits at current pixel
265 // esi = texture pointer at current u,v
267 // ebx = u fraction 0.32
268 // ecx = v fraction 0.32
270 // ebp = v carry scratch
272 mov al,[edi] // preread the destination cache line
274 mov Tmap.InnerLooper, 32/4 // Set up loop counter
277 sub eax, Tmap.pScreenBits
282 // Make ESI = DV:DU in 8:8,8:8 format
288 mov Tmap.DeltaUFrac, esi
290 // Make ECX = V:U in 8:8,8:8 format
301 // ecx = V:U in 8.8:8.8
302 // edx = zbuffer pointer
311 cmp esi, [edx+0] // Compare the Z depth of this pixel with zbuffer
312 jle Skip0 // If pixel is covered, skip drawing
314 mov [edx+0], esi // Write z
316 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
317 shr ax, 8 // EAX = V:U in 8.8:8.0
318 rol eax, 8 // EAX = V:U in 0.0:8:8
319 and eax, 0ffffh // clear upper bits
320 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
324 and eax, 0ffffh // clear upper bits
325 mov al, gr_fade_table[eax]
328 add ecx, Tmap.DeltaUFrac
329 add esi, Tmap.fx_dwdx
333 cmp esi, [edx+4] // Compare the Z depth of this pixel with zbuffer
334 jle Skip1 // If pixel is covered, skip drawing
336 mov [edx+4], esi // Write z
338 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
339 shr ax, 8 // EAX = V:U in 8.8:8.0
340 rol eax, 8 // EAX = V:U in 0.0:8:8
341 and eax, 0ffffh // clear upper bits
342 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
346 and eax, 0ffffh // clear upper bits
347 mov al, gr_fade_table[eax]
350 add ecx, Tmap.DeltaUFrac
351 add esi, Tmap.fx_dwdx
355 cmp esi, [edx+8] // Compare the Z depth of this pixel with zbuffer
356 jle Skip2 // If pixel is covered, skip drawing
358 mov [edx+8], esi // Write z
360 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
361 shr ax, 8 // EAX = V:U in 8.8:8.0
362 rol eax, 8 // EAX = V:U in 0.0:8:8
363 and eax, 0ffffh // clear upper bits
364 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
368 and eax, 0ffffh // clear upper bits
369 mov al, gr_fade_table[eax]
372 add ecx, Tmap.DeltaUFrac
373 add esi, Tmap.fx_dwdx
377 cmp esi, [edx+12] // Compare the Z depth of this pixel with zbuffer
378 jle Skip3 // If pixel is covered, skip drawing
380 mov [edx+12], esi // Write z
382 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
383 shr ax, 8 // EAX = V:U in 8.8:8.0
384 rol eax, 8 // EAX = V:U in 0.0:8:8
385 and eax, 0ffffh // clear upper bits
386 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
390 and eax, 0ffffh // clear upper bits
391 mov al, gr_fade_table[eax]
394 add ecx, Tmap.DeltaUFrac
395 add esi, Tmap.fx_dwdx
406 // the fdiv is done, finish right // st0 st1 st2 st3 st4 st5 st6 st7
407 // ZR V/ZR 1/ZR U/ZR UL VL
409 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
410 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
411 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
412 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
414 dec Tmap.Subdivisions // decrement span count
415 jnz SpanLoop // loop back
418 HandleLeftoverPixels:
420 mov esi,Tmap.pixptr // load texture pointer
422 // edi = dest dib bits
423 // esi = current texture dib bits
424 // at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
425 // inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
427 cmp Tmap.WidthModLength,0 ; are there remaining pixels to draw?
428 jz FPUReturn ; nope, pop the FPU and bail
430 // convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
432 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
433 fmul Tmap.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
434 fistp Tmap.UFixed ; inv. inv. inv. inv. inv. UL VL
436 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
437 fmul Tmap.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
438 fistp Tmap.VFixed ; inv. inv. inv. inv. inv. UL VL
440 dec Tmap.WidthModLength ; calc how many steps to take
441 jz OnePixelSpan ; just one, don't do deltas'
443 // calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
446 // @todo rearrange things so we don't need these two instructions
447 fstp Tmap.FloatTemp ; inv. inv. inv. inv. UL VL
448 fstp Tmap.FloatTemp ; inv. inv. inv. UL VL
450 fld Tmap.r.v ; V/Zr inv. inv. inv. UL VL
451 fsub Tmap.deltas.v ; V/ZR inv. inv. inv. UL VL
452 fld Tmap.r.u ; U/Zr V/ZR inv. inv. inv. UL VL
453 fsub Tmap.deltas.u ; U/ZR V/ZR inv. inv. inv. UL VL
454 fld Tmap.r.sw ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
455 fsub Tmap.deltas.sw ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
457 fdivr Tmap.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
459 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
460 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
462 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
464 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
465 fxch st(1) ; VR UR inv. inv. inv. dU VL
466 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
467 fxch st(6) ; dV UR inv. inv. inv. dU VR
469 fidiv Tmap.WidthModLength ; dv UR inv. inv. inv. dU VR
470 fmul Tmap.FixedScale ; dv16 UR inv. inv. inv. dU VR
471 fistp Tmap.DeltaV ; UR inv. inv. inv. dU VR
473 fxch st(4) ; dU inv. inv. inv. UR VR
474 fidiv Tmap.WidthModLength ; du inv. inv. inv. UR VR
475 fmul Tmap.FixedScale ; du16 inv. inv. inv. UR VR
476 fistp Tmap.DeltaU ; inv. inv. inv. UR VR
478 // @todo gross! these are to line up with the other loop
479 fld st(1) ; inv. inv. inv. inv. UR VR
480 fld st(2) ; inv. inv. inv. inv. inv. UR VR
483 // setup delta values
484 mov eax, Tmap.DeltaV // get v 16.16 step
485 mov ebx, eax // copy it
486 sar eax, 16 // get v int step
487 shl ebx, 16 // get v frac step
488 mov Tmap.DeltaVFrac, ebx // store it
489 imul eax, Tmap.src_offset // calc texture step for v int step
491 mov ebx, Tmap.DeltaU // get u 16.16 step
492 mov ecx, ebx // copy it
493 sar ebx, 16 // get the u int step
494 shl ecx, 16 // get the u frac step
495 mov Tmap.DeltaUFrac, ecx // store it
496 add eax, ebx // calc uint + vint step
497 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
498 add eax, Tmap.src_offset // calc whole step + v carry
499 mov Tmap.uv_delta[0], eax // save in v-carry slot
504 ; setup initial coordinates
505 mov esi, Tmap.UFixed // get u 16.16
506 mov ebx, esi // copy it
507 sar esi, 16 // get integer part
508 shl ebx, 16 // get fractional part
510 mov ecx, Tmap.VFixed // get v 16.16
511 mov edx, ecx // copy it
512 sar edx, 16 // get integer part
513 shl ecx, 16 // get fractional part
514 imul edx, Tmap.src_offset // calc texture scanline address
515 add esi, edx // calc texture offset
516 add esi, Tmap.pixptr // calc address
523 // mov edx, Tmap.DeltaUFrac
527 mov ebx, Tmap.fx_l_right
533 mov eax, Tmap.fx_dl_dx
542 sub eax, Tmap.pScreenBits
547 inc Tmap.WidthModLength
548 mov eax,Tmap.WidthModLength
552 mov Tmap.WidthModLength, eax
556 mov al,[edi] // preread the destination cache line
558 // Make ESI = DV:DU in 8:8,8:8 format
564 mov Tmap.DeltaUFrac, esi
566 // Make ECX = V:U in 8:8,8:8 format
577 // ecx = V:U in 8.8:8.8
578 // edx = zbuffer pointer
587 cmp esi, [edx+0] // Compare the Z depth of this pixel with zbuffer
588 jle Skip0a // If pixel is covered, skip drawing
590 mov [edx+0], esi // Write z
592 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
593 shr ax, 8 // EAX = V:U in 8.8:8.0
594 rol eax, 8 // EAX = V:U in 0.0:8:8
595 and eax, 0ffffh // clear upper bits
596 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
600 and eax, 0ffffh // clear upper bits
601 mov al, gr_fade_table[eax]
604 add ecx, Tmap.DeltaUFrac
605 add esi, Tmap.fx_dwdx
609 cmp esi, [edx+4] // Compare the Z depth of this pixel with zbuffer
610 jle Skip1a // If pixel is covered, skip drawing
612 mov [edx+4], esi // Write z
614 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
615 shr ax, 8 // EAX = V:U in 8.8:8.0
616 rol eax, 8 // EAX = V:U in 0.0:8:8
617 and eax, 0ffffh // clear upper bits
618 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
622 and eax, 0ffffh // clear upper bits
623 mov al, gr_fade_table[eax]
626 add ecx, Tmap.DeltaUFrac
627 add esi, Tmap.fx_dwdx
634 dec Tmap.WidthModLength
642 cmp esi, [edx+0] // Compare the Z depth of this pixel with zbuffer
643 jle Skip0b // If pixel is covered, skip drawing
645 mov [edx+0], esi // Write z
647 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
648 shr ax, 8 // EAX = V:U in 8.8:8.0
649 rol eax, 8 // EAX = V:U in 0.0:8:8
650 and eax, 0ffffh // clear upper bits
651 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
655 and eax, 0ffffh // clear upper bits
656 mov al, gr_fade_table[eax]
659 add ecx, Tmap.DeltaUFrac
660 add esi, Tmap.fx_dwdx
666 // busy FPU registers: // st0 st1 st2 st3 st4 st5 st6 st7
667 // xxx xxx xxx xxx xxx xxx xxx
676 fldcw Tmap.OldFPUCW // restore the FPU
689 void tmapscan_pln8_tiled_256x256()
692 switch(gr_zbuffering_mode) {
695 case GR_ZBUFF_FULL: // both
696 tmapscan_pln8_zbuffered_tiled_256x256();
698 case GR_ZBUFF_WRITE: // write only
699 tmapscan_pln8_zbuffered_tiled_256x256();
701 case GR_ZBUFF_READ: // read only
702 tmapscan_pln8_zbuffered_tiled_256x256();
720 // Put the FPU in low precision mode
721 fstcw Tmap.OldFPUCW // store copy of CW
722 mov ax,Tmap.OldFPUCW // get it in ax
724 mov Tmap.FPUCW,ax // store it
725 fldcw Tmap.FPUCW // load the FPU
728 mov ecx, Tmap.loop_count // ecx = width
729 mov edi, Tmap.dest_row_data // edi = dest pointer
731 // edi = pointer to start pixel in dest dib
734 mov eax,ecx // eax and ecx = width
735 shr ecx,5 // ecx = width / subdivision length
736 and eax,31 // eax = width mod subdivision length
737 jnz some_left_over // any leftover?
738 dec ecx // no, so special case last span
739 mov eax,32 // it's 8 pixels long
741 mov Tmap.Subdivisions,ecx // store widths
742 mov Tmap.WidthModLength,eax
744 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
745 // st0 st1 st2 st3 st4 st5 st6 st7
747 fld Tmap.l.u // U/ZL V/ZL
748 fld Tmap.l.sw // 1/ZL U/ZL V/ZL
749 fld1 // 1 1/ZL U/ZL V/ZL
750 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
751 fld st // ZL ZL 1/ZL U/ZL V/ZL
752 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
753 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
754 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
756 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
757 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
759 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
761 fadd Tmap.fl_dwdx_wide // 1/ZR U/ZL V/ZL UL VL
762 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
763 fadd Tmap.fl_dudx_wide // U/ZR 1/ZR V/ZL UL VL
764 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
765 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZR U/ZR UL VL
767 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
769 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
770 // @todo overlap this guy
771 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
772 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
773 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
774 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
775 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
777 cmp ecx,0 // check for any full spans
778 jle HandleLeftoverPixels
782 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
783 // UR VR V/ZR 1/ZR U/ZR UL VL
785 // convert left side coords
787 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
788 fmul Tmap.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
789 fistp Tmap.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
791 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
792 fmul Tmap.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
793 fistp Tmap.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
795 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
797 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
798 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
799 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
800 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
802 fmul Tmap.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
803 fistp Tmap.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
805 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
806 fmul Tmap.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
807 fistp Tmap.DeltaU ; V/ZR 1/ZR U/ZR UR VR
809 // increment terms for next span // st0 st1 st2 st3 st4 st5 st6 st7
810 // Right terms become Left terms--->// V/ZL 1/ZL U/ZL UL VL
812 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZL U/ZL UL VL
813 fxch st(1) // 1/ZL V/ZR U/ZL UL VL
814 fadd Tmap.fl_dwdx_wide // 1/ZR V/ZR U/ZL UL VL
815 fxch st(2) // U/ZL V/ZR 1/ZR UL VL
816 fadd Tmap.fl_dudx_wide // U/ZR V/ZR 1/ZR UL VL
817 fxch st(2) // 1/ZR V/ZR U/ZR UL VL
818 fxch st(1) // V/ZR 1/ZR U/ZR UL VL
821 // setup delta values
823 mov eax,Tmap.DeltaV // get v 16.16 step
824 mov ebx,eax // copy it
825 sar eax,16 // get v int step
826 shl ebx,16 // get v frac step
827 mov Tmap.DeltaVFrac,ebx // store it
828 imul eax,Tmap.src_offset // calculate texture step for v int step
830 mov ebx,Tmap.DeltaU // get u 16.16 step
831 mov ecx,ebx // copy it
832 sar ebx,16 // get u int step
833 shl ecx,16 // get u frac step
834 mov Tmap.DeltaUFrac,ecx // store it
835 add eax,ebx // calculate uint + vint step
836 mov Tmap.uv_delta[4],eax // save whole step in non-v-carry slot
837 add eax,Tmap.src_offset // calculate whole step + v carry
838 mov Tmap.uv_delta[0],eax // save in v-carry slot
840 // setup initial coordinates
841 mov esi,Tmap.UFixed // get u 16.16 fixedpoint coordinate
843 mov ebx,esi // copy it
844 sar esi,16 // get integer part
845 shl ebx,16 // get fractional part
847 mov ecx,Tmap.VFixed // get v 16.16 fixedpoint coordinate
849 mov edx,ecx // copy it
850 sar edx,16 // get integer part
851 shl ecx,16 // get fractional part
852 imul edx,Tmap.src_offset // calc texture scanline address
853 add esi,edx // calc texture offset
854 add esi,Tmap.pixptr // calc address
856 // set up affine registers
862 mov ebp, Tmap.fx_dl_dx
873 // calculate right side coords st0 st1 st2 st3 st4 st5 st6 st7
874 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
875 // This divide should happen while the pixel span is drawn.
876 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
880 // edi = dest dib bits at current pixel
881 // esi = texture pointer at current u,v
883 // ebx = u fraction 0.32
884 // ecx = v fraction 0.32
886 // ebp = v carry scratch
888 mov al,[edi] // preread the destination cache line
890 mov Tmap.InnerLooper, 32/4 // Set up loop counter
893 sub eax, Tmap.pScreenBits
898 // Make ESI = DV:DU in 8:8,8:8 format
904 mov Tmap.DeltaUFrac, esi
906 // Make ECX = V:U in 8:8,8:8 format
916 // ecx = V:U in 8.8:8.8
917 // edx = zbuffer pointer
926 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
927 shr ax, 8 // EAX = V:U in 8.8:8.0
928 rol eax, 8 // EAX = V:U in 0.0:8:8
929 and eax, 0ffffh // clear upper bits
930 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
934 and eax, 0ffffh // clear upper bits
935 mov al, gr_fade_table[eax]
937 add ecx, Tmap.DeltaUFrac
941 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
942 shr ax, 8 // EAX = V:U in 8.8:8.0
943 rol eax, 8 // EAX = V:U in 0.0:8:8
944 and eax, 0ffffh // clear upper bits
945 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
949 and eax, 0ffffh // clear upper bits
950 mov al, gr_fade_table[eax]
952 add ecx, Tmap.DeltaUFrac
956 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
957 shr ax, 8 // EAX = V:U in 8.8:8.0
958 rol eax, 8 // EAX = V:U in 0.0:8:8
959 and eax, 0ffffh // clear upper bits
960 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
964 and eax, 0ffffh // clear upper bits
965 mov al, gr_fade_table[eax]
967 add ecx, Tmap.DeltaUFrac
971 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
972 shr ax, 8 // EAX = V:U in 8.8:8.0
973 rol eax, 8 // EAX = V:U in 0.0:8:8
974 and eax, 0ffffh // clear upper bits
975 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
979 and eax, 0ffffh // clear upper bits
980 mov al, gr_fade_table[eax]
982 add ecx, Tmap.DeltaUFrac
992 // the fdiv is done, finish right // st0 st1 st2 st3 st4 st5 st6 st7
993 // ZR V/ZR 1/ZR U/ZR UL VL
995 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
996 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
997 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
998 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
1000 dec Tmap.Subdivisions // decrement span count
1001 jnz SpanLoop // loop back
1004 HandleLeftoverPixels:
1006 mov esi,Tmap.pixptr // load texture pointer
1008 // edi = dest dib bits
1009 // esi = current texture dib bits
1010 // at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
1011 // inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
1013 cmp Tmap.WidthModLength,0 ; are there remaining pixels to draw?
1014 jz FPUReturn ; nope, pop the FPU and bail
1016 // convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
1018 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
1019 fmul Tmap.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
1020 fistp Tmap.UFixed ; inv. inv. inv. inv. inv. UL VL
1022 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
1023 fmul Tmap.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
1024 fistp Tmap.VFixed ; inv. inv. inv. inv. inv. UL VL
1026 dec Tmap.WidthModLength ; calc how many steps to take
1027 jz OnePixelSpan ; just one, don't do deltas'
1029 // calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
1032 // @todo rearrange things so we don't need these two instructions
1033 fstp Tmap.FloatTemp ; inv. inv. inv. inv. UL VL
1034 fstp Tmap.FloatTemp ; inv. inv. inv. UL VL
1036 fld Tmap.r.v ; V/Zr inv. inv. inv. UL VL
1037 fsub Tmap.deltas.v ; V/ZR inv. inv. inv. UL VL
1038 fld Tmap.r.u ; U/Zr V/ZR inv. inv. inv. UL VL
1039 fsub Tmap.deltas.u ; U/ZR V/ZR inv. inv. inv. UL VL
1040 fld Tmap.r.sw ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
1041 fsub Tmap.deltas.sw ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
1043 fdivr Tmap.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
1045 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
1046 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
1048 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
1050 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
1051 fxch st(1) ; VR UR inv. inv. inv. dU VL
1052 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
1053 fxch st(6) ; dV UR inv. inv. inv. dU VR
1055 fidiv Tmap.WidthModLength ; dv UR inv. inv. inv. dU VR
1056 fmul Tmap.FixedScale ; dv16 UR inv. inv. inv. dU VR
1057 fistp Tmap.DeltaV ; UR inv. inv. inv. dU VR
1059 fxch st(4) ; dU inv. inv. inv. UR VR
1060 fidiv Tmap.WidthModLength ; du inv. inv. inv. UR VR
1061 fmul Tmap.FixedScale ; du16 inv. inv. inv. UR VR
1062 fistp Tmap.DeltaU ; inv. inv. inv. UR VR
1064 // @todo gross! these are to line up with the other loop
1065 fld st(1) ; inv. inv. inv. inv. UR VR
1066 fld st(2) ; inv. inv. inv. inv. inv. UR VR
1069 // setup delta values
1070 mov eax, Tmap.DeltaV // get v 16.16 step
1071 mov ebx, eax // copy it
1072 sar eax, 16 // get v int step
1073 shl ebx, 16 // get v frac step
1074 mov Tmap.DeltaVFrac, ebx // store it
1075 imul eax, Tmap.src_offset // calc texture step for v int step
1077 mov ebx, Tmap.DeltaU // get u 16.16 step
1078 mov ecx, ebx // copy it
1079 sar ebx, 16 // get the u int step
1080 shl ecx, 16 // get the u frac step
1081 mov Tmap.DeltaUFrac, ecx // store it
1082 add eax, ebx // calc uint + vint step
1083 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
1084 add eax, Tmap.src_offset // calc whole step + v carry
1085 mov Tmap.uv_delta[0], eax // save in v-carry slot
1090 ; setup initial coordinates
1091 mov esi, Tmap.UFixed // get u 16.16
1092 mov ebx, esi // copy it
1093 sar esi, 16 // get integer part
1094 shl ebx, 16 // get fractional part
1096 mov ecx, Tmap.VFixed // get v 16.16
1097 mov edx, ecx // copy it
1098 sar edx, 16 // get integer part
1099 shl ecx, 16 // get fractional part
1100 imul edx, Tmap.src_offset // calc texture scanline address
1101 add esi, edx // calc texture offset
1102 add esi, Tmap.pixptr // calc address
1109 // mov edx, Tmap.DeltaUFrac
1113 mov ebx, Tmap.fx_l_right
1119 mov eax, Tmap.fx_dl_dx
1127 sub eax, Tmap.pScreenBits
1132 inc Tmap.WidthModLength
1133 mov eax,Tmap.WidthModLength
1137 mov Tmap.WidthModLength, eax
1141 mov al,[edi] // preread the destination cache line
1143 // Make ESI = DV:DU in 8:8,8:8 format
1144 mov eax, Tmap.DeltaV
1146 mov esi, Tmap.DeltaU
1149 mov Tmap.DeltaUFrac, esi
1151 // Make ECX = V:U in 8:8,8:8 format
1152 mov eax, Tmap.UFixed
1154 mov ecx, Tmap.VFixed
1160 // ecx = V:U in 8.8:8.8
1161 // edx = zbuffer pointer
1163 // edi = screen data
1170 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1171 shr ax, 8 // EAX = V:U in 8.8:8.0
1172 rol eax, 8 // EAX = V:U in 0.0:8:8
1173 and eax, 0ffffh // clear upper bits
1174 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
1178 and eax, 0ffffh // clear upper bits
1179 mov al, gr_fade_table[eax]
1181 add ecx, Tmap.DeltaUFrac
1185 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1186 shr ax, 8 // EAX = V:U in 8.8:8.0
1187 rol eax, 8 // EAX = V:U in 0.0:8:8
1188 and eax, 0ffffh // clear upper bits
1189 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
1193 and eax, 0ffffh // clear upper bits
1194 mov al, gr_fade_table[eax]
1196 add ecx, Tmap.DeltaUFrac
1203 dec Tmap.WidthModLength
1211 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1212 shr ax, 8 // EAX = V:U in 8.8:8.0
1213 rol eax, 8 // EAX = V:U in 0.0:8:8
1214 and eax, 0ffffh // clear upper bits
1215 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
1219 and eax, 0ffffh // clear upper bits
1220 mov al, gr_fade_table[eax]
1222 add ecx, Tmap.DeltaUFrac
1228 // busy FPU registers: // st0 st1 st2 st3 st4 st5 st6 st7
1229 // xxx xxx xxx xxx xxx xxx xxx
1238 fldcw Tmap.OldFPUCW // restore the FPU
1252 // Totally non-general function specifically made for the subpsace effect
1253 void tmapscan_lnn8_tiled_256x256()
1255 if ( Tmap.src_offset != 256 ) {
1256 Int3(); // This only works on 256 wide textures!
1260 // Tmap.fx_u = fl2f(Tmap.l.u);
1261 // Tmap.fx_v = fl2f(Tmap.l.v);
1262 // Tmap.fx_du_dx = fl2f(Tmap.deltas.u);
1263 // Tmap.fx_dv_dx = fl2f(Tmap.deltas.v);
1267 ubyte * src = (ubyte *)Tmap.pixptr;
1268 ubyte * dst = (ubyte *)Tmap.dest_row_data;
1270 for (i=0; i<Tmap.loop_count; i++ ) {
1272 u = f2i(Tmap.fx_u) & 255;
1273 v = f2i(Tmap.fx_v) & 255;
1275 ubyte c = src[u+v*Tmap.src_offset];
1279 Tmap.fx_u += Tmap.fx_du_dx;
1280 Tmap.fx_v += Tmap.fx_dv_dx;
1297 // Need ECX = V.VF:U.UF in 8.8:8.8
1304 // Need EDX = delta V:U in 8.8:8.8
1305 mov eax, Tmap.fx_dv_dx
1307 mov edx, Tmap.fx_du_dx
1311 // Need EDI = pointer to dest row
1312 mov edi, Tmap.dest_row_data
1314 // Need ESI = pointer to texture
1315 mov esi, Tmap.pixptr
1317 // Set up loop counter
1318 mov ebp, Tmap.loop_count
1323 mov Tmap.num_big_steps, ebp
1324 and Tmap.loop_count, 3
1326 // EAX = anything (used as tmp in loop)
1328 // ECX = V.VF:U.UF in 8.8:8.8
1329 // EDX = delta V:U in 8.8:8.8
1330 // ESP = stack pointer (could be saved to Tmap.saved_esp and then used if needed)
1331 // EBP = loop counter
1332 // EDI = pointer to dest row
1333 // ESI = pointer to texture
1338 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1339 add ecx, edx // Increment u&v
1340 shr ax, 8 // EAX = V:U in 8.8:8.0
1341 rol eax, 8 // EAX = V:U in 0.0:8:8
1342 and eax, 0ffffh // clear upper bits
1343 mov al, [eax+esi] // Get pixel from texture
1344 mov [edi+0], al // Write pixel to screen
1346 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1347 add ecx, edx // Increment u&v
1348 shr ax, 8 // EAX = V:U in 8.8:8.0
1349 rol eax, 8 // EAX = V:U in 0.0:8:8
1350 and eax, 0ffffh // clear upper bits
1351 mov al, [eax+esi] // Get pixel from texture
1352 mov [edi+1], al // Write pixel to screen
1354 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1355 add ecx, edx // Increment u&v
1356 shr ax, 8 // EAX = V:U in 8.8:8.0
1357 rol eax, 8 // EAX = V:U in 0.0:8:8
1358 and eax, 0ffffh // clear upper bits
1359 mov al, [eax+esi] // Get pixel from texture
1360 mov [edi+2], al // Write pixel to screen
1362 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1363 add ecx, edx // Increment u&v
1364 shr ax, 8 // EAX = V:U in 8.8:8.0
1365 rol eax, 8 // EAX = V:U in 0.0:8:8
1366 and eax, 0ffffh // clear upper bits
1367 mov al, [eax+esi] // Get pixel from texture
1368 mov [edi+3], al // Write pixel to screen
1372 dec Tmap.num_big_steps
1378 mov ebp,Tmap.loop_count
1381 mov Tmap.loop_count, ebp
1385 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1386 add ecx, edx // Increment u&v
1387 shr ax, 8 // EAX = V:U in 8.8:8.0
1388 rol eax, 8 // EAX = V:U in 0.0:8:8
1389 and eax, 0ffffh // clear upper bits
1390 mov al, [eax+esi] // Get pixel from texture
1391 mov [edi], al // Write pixel to screen
1412 // used only for subpsace effect
1414 #define MASK 0x00ff00ff
1417 // not used, but cool
1418 void tmapscan_pnn8_tiled_256x256_subspace_dithered()
1420 if ( Tmap.src_offset != 256 ) {
1421 Int3(); // This only works on 256 wide textures!
1440 // Need EDI = pointer to dest row
1441 mov edi, Tmap.dest_row_data
1443 // Need ESI = pointer to texture
1444 mov esi, Tmap.pixptr
1447 // Put the FPU in low precision mode
1448 fstcw Tmap.OldFPUCW // store copy of CW
1449 mov ax,Tmap.OldFPUCW // get it in ax
1451 mov Tmap.FPUCW,ax // store it
1452 fldcw Tmap.FPUCW // load the FPU
1454 mov ecx, Tmap.loop_count // ecx = width
1456 // edi = pointer to start pixel in dest dib
1459 mov eax,ecx // eax and ecx = width
1460 shr ecx,5 // ecx = width / subdivision length
1461 and eax,31 // eax = width mod subdivision length
1462 jnz some_left_over // any leftover?
1463 dec ecx // no, so special case last span
1464 mov eax,32 // it's 8 pixels long
1466 mov Tmap.Subdivisions,ecx // store widths
1467 mov Tmap.WidthModLength,eax
1469 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
1470 // st0 st1 st2 st3 st4 st5 st6 st7
1471 fld Tmap.l.v // V/ZL
1472 fld Tmap.l.u // U/ZL V/ZL
1473 fld Tmap.l.sw // 1/ZL U/ZL V/ZL
1474 fld1 // 1 1/ZL U/ZL V/ZL
1475 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
1476 fld st // ZL ZL 1/ZL U/ZL V/ZL
1477 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
1478 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
1479 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
1481 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
1482 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
1484 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
1486 fadd Tmap.fl_dwdx_wide // 1/ZR U/ZL V/ZL UL VL
1487 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
1488 fadd Tmap.fl_dudx_wide // U/ZR 1/ZR V/ZL UL VL
1489 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
1490 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZR U/ZR UL VL
1492 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
1494 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
1495 // @todo overlap this guy
1496 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
1497 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
1498 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
1499 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
1500 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
1502 cmp ecx,0 // check for any full spans
1503 jle HandleLeftoverPixels
1507 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
1508 // UR VR V/ZR 1/ZR U/ZR UL VL
1510 // convert left side coords
1512 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
1513 fmul Tmap.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
1514 fistp Tmap.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
1516 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
1517 fmul Tmap.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
1518 fistp Tmap.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
1520 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
1522 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
1523 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
1524 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
1525 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
1527 fmul Tmap.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
1528 fistp Tmap.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
1530 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
1531 fmul Tmap.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
1532 fistp Tmap.DeltaU ; V/ZR 1/ZR U/ZR UR VR
1534 // increment terms for next span // st0 st1 st2 st3 st4 st5 st6 st7
1535 // Right terms become Left terms--->// V/ZL 1/ZL U/ZL UL VL
1537 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZL U/ZL UL VL
1538 fxch st(1) // 1/ZL V/ZR U/ZL UL VL
1539 fadd Tmap.fl_dwdx_wide // 1/ZR V/ZR U/ZL UL VL
1540 fxch st(2) // U/ZL V/ZR 1/ZR UL VL
1541 fadd Tmap.fl_dudx_wide // U/ZR V/ZR 1/ZR UL VL
1542 fxch st(2) // 1/ZR V/ZR U/ZR UL VL
1543 fxch st(1) // V/ZR 1/ZR U/ZR UL VL
1546 // setup delta values
1547 // set up affine registers
1549 // calculate right side coords st0 st1 st2 st3 st4 st5 st6 st7
1550 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
1551 // This divide should happen while the pixel span is drawn.
1552 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
1555 // 8 pixel span code
1556 // edi = dest dib bits at current pixel
1557 // esi = texture pointer at current u,v
1559 // ebx = u fraction 0.32
1560 // ecx = v fraction 0.32
1561 // edx = u frac step
1562 // ebp = v carry scratch
1564 mov al,[edi] // preread the destination cache line
1566 mov Tmap.InnerLooper, 32/4 // Set up loop counter
1568 // Make EDX = DV:DU in 8:8,8:8 format
1569 mov eax, Tmap.DeltaV
1571 mov edx, Tmap.DeltaU
1575 // Make ECX = V:U in 8:8,8:8 format
1576 mov eax, Tmap.VFixed
1578 mov ecx, Tmap.UFixed
1584 // ecx = V:U in 8.8:8.8
1585 // edx = zbuffer pointer
1587 // edi = screen data
1594 // mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1598 xor eax, 0xA3000000 ; This makes 'r' take 2^32 iterations to repeat
1600 and eax, MASK ; mask out all bits except 8.8:8.8 fraction
1603 shr ax, 8 // EAX = V:U in 8.8:8.0
1604 rol eax, 8 // EAX = V:U in 0.0:8:8
1605 and eax, 0ffffh // clear upper bits
1611 // mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1615 xor eax, 0xA3000000 ; This makes 'r' take 2^32 iterations to repeat
1617 and eax, MASK ; mask out all bits except 8.8:8.8 fraction
1620 shr ax, 8 // EAX = V:U in 8.8:8.0
1621 rol eax, 8 // EAX = V:U in 0.0:8:8
1622 and eax, 0ffffh // clear upper bits
1628 // mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1632 xor eax, 0xA3000000 ; This makes 'r' take 2^32 iterations to repeat
1634 and eax, MASK ; mask out all bits except 8.8:8.8 fraction
1637 shr ax, 8 // EAX = V:U in 8.8:8.0
1638 rol eax, 8 // EAX = V:U in 0.0:8:8
1639 and eax, 0ffffh // clear upper bits
1645 // mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1649 xor eax, 0xA3000000 ; This makes 'r' take 2^32 iterations to repeat
1651 and eax, MASK ; mask out all bits except 8.8:8.8 fraction
1654 shr ax, 8 // EAX = V:U in 8.8:8.0
1655 rol eax, 8 // EAX = V:U in 0.0:8:8
1656 and eax, 0ffffh // clear upper bits
1663 dec Tmap.InnerLooper
1668 // the fdiv is done, finish right // st0 st1 st2 st3 st4 st5 st6 st7
1669 // ZR V/ZR 1/ZR U/ZR UL VL
1671 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
1672 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
1673 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
1674 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
1676 dec Tmap.Subdivisions // decrement span count
1677 jnz SpanLoop // loop back
1680 HandleLeftoverPixels:
1682 // edi = dest dib bits
1683 // esi = current texture dib bits
1684 // at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
1685 // inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
1687 cmp Tmap.WidthModLength,0 ; are there remaining pixels to draw?
1688 jz FPUReturn ; nope, pop the FPU and bail
1690 // convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
1692 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
1693 fmul Tmap.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
1694 fistp Tmap.UFixed ; inv. inv. inv. inv. inv. UL VL
1696 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
1697 fmul Tmap.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
1698 fistp Tmap.VFixed ; inv. inv. inv. inv. inv. UL VL
1700 dec Tmap.WidthModLength ; calc how many steps to take
1701 jz OnePixelSpan ; just one, don't do deltas'
1703 // calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
1706 // @todo rearrange things so we don't need these two instructions
1707 fstp Tmap.FloatTemp ; inv. inv. inv. inv. UL VL
1708 fstp Tmap.FloatTemp ; inv. inv. inv. UL VL
1710 fld Tmap.r.v ; V/Zr inv. inv. inv. UL VL
1711 fsub Tmap.deltas.v ; V/ZR inv. inv. inv. UL VL
1712 fld Tmap.r.u ; U/Zr V/ZR inv. inv. inv. UL VL
1713 fsub Tmap.deltas.u ; U/ZR V/ZR inv. inv. inv. UL VL
1714 fld Tmap.r.sw ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
1715 fsub Tmap.deltas.sw ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
1717 fdivr Tmap.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
1719 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
1720 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
1722 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
1724 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
1725 fxch st(1) ; VR UR inv. inv. inv. dU VL
1726 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
1727 fxch st(6) ; dV UR inv. inv. inv. dU VR
1729 fidiv Tmap.WidthModLength ; dv UR inv. inv. inv. dU VR
1730 fmul Tmap.FixedScale ; dv16 UR inv. inv. inv. dU VR
1731 fistp Tmap.DeltaV ; UR inv. inv. inv. dU VR
1733 fxch st(4) ; dU inv. inv. inv. UR VR
1734 fidiv Tmap.WidthModLength ; du inv. inv. inv. UR VR
1735 fmul Tmap.FixedScale ; du16 inv. inv. inv. UR VR
1736 fistp Tmap.DeltaU ; inv. inv. inv. UR VR
1738 // @todo gross! these are to line up with the other loop
1739 fld st(1) ; inv. inv. inv. inv. UR VR
1740 fld st(2) ; inv. inv. inv. inv. inv. UR VR
1745 // Make EDX = DV:DU in 8:8,8:8 format
1746 mov eax, Tmap.DeltaV
1748 mov edx, Tmap.DeltaU
1752 // Make ECX = V:U in 8:8,8:8 format
1753 mov eax, Tmap.VFixed
1755 mov ecx, Tmap.UFixed
1759 inc Tmap.WidthModLength
1760 mov eax,Tmap.WidthModLength
1764 mov Tmap.WidthModLength, eax
1768 // ecx = V:U in 8.8:8.8
1769 // edx = zbuffer pointer
1771 // edi = screen data
1777 // mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1781 xor eax, 0xA3000000 ; This makes 'r' take 2^32 iterations to repeat
1783 and eax, MASK ; mask out all bits except 8.8:8.8 fraction
1786 shr ax, 8 // EAX = V:U in 8.8:8.0
1787 rol eax, 8 // EAX = V:U in 0.0:8:8
1788 and eax, 0ffffh // clear upper bits
1794 // mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1798 xor eax, 0xA3000000 ; This makes 'r' take 2^32 iterations to repeat
1800 and eax, MASK ; mask out all bits except 8.8:8.8 fraction
1803 shr ax, 8 // EAX = V:U in 8.8:8.0
1804 rol eax, 8 // EAX = V:U in 0.0:8:8
1805 and eax, 0ffffh // clear upper bits
1812 dec Tmap.WidthModLength
1820 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1824 xor eax, 0xA3000000 ; This makes 'r' take 2^32 iterations to repeat
1826 and eax, MASK ; mask out all bits except 8.8:8.8 fraction
1829 shr ax, 8 // EAX = V:U in 8.8:8.0
1830 rol eax, 8 // EAX = V:U in 0.0:8:8
1831 and eax, 0ffffh // clear upper bits
1840 // busy FPU registers: // st0 st1 st2 st3 st4 st5 st6 st7
1841 // xxx xxx xxx xxx xxx xxx xxx
1850 fldcw Tmap.OldFPUCW // restore the FPU
1864 void tmapscan_pnn8_tiled_256x256_subspace()
1866 if ( Tmap.src_offset != 256 ) {
1867 Int3(); // This only works on 256 wide textures!
1884 // Need EDI = pointer to dest row
1885 mov edi, Tmap.dest_row_data
1887 // Need ESI = pointer to texture
1888 mov esi, Tmap.pixptr
1891 // Put the FPU in low precision mode
1892 fstcw Tmap.OldFPUCW // store copy of CW
1893 mov ax,Tmap.OldFPUCW // get it in ax
1895 mov Tmap.FPUCW,ax // store it
1896 fldcw Tmap.FPUCW // load the FPU
1898 mov ecx, Tmap.loop_count // ecx = width
1900 // edi = pointer to start pixel in dest dib
1903 mov eax,ecx // eax and ecx = width
1904 shr ecx,5 // ecx = width / subdivision length
1905 and eax,31 // eax = width mod subdivision length
1906 jnz some_left_over // any leftover?
1907 dec ecx // no, so special case last span
1908 mov eax,32 // it's 8 pixels long
1910 mov Tmap.Subdivisions,ecx // store widths
1911 mov Tmap.WidthModLength,eax
1913 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
1914 // st0 st1 st2 st3 st4 st5 st6 st7
1915 fld Tmap.l.v // V/ZL
1916 fld Tmap.l.u // U/ZL V/ZL
1917 fld Tmap.l.sw // 1/ZL U/ZL V/ZL
1918 fld1 // 1 1/ZL U/ZL V/ZL
1919 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
1920 fld st // ZL ZL 1/ZL U/ZL V/ZL
1921 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
1922 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
1923 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
1925 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
1926 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
1928 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
1930 fadd Tmap.fl_dwdx_wide // 1/ZR U/ZL V/ZL UL VL
1931 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
1932 fadd Tmap.fl_dudx_wide // U/ZR 1/ZR V/ZL UL VL
1933 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
1934 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZR U/ZR UL VL
1936 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
1938 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
1939 // @todo overlap this guy
1940 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
1941 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
1942 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
1943 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
1944 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
1946 cmp ecx,0 // check for any full spans
1947 jle HandleLeftoverPixels
1951 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
1952 // UR VR V/ZR 1/ZR U/ZR UL VL
1954 // convert left side coords
1956 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
1957 fmul Tmap.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
1958 fistp Tmap.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
1960 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
1961 fmul Tmap.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
1962 fistp Tmap.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
1964 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
1966 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
1967 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
1968 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
1969 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
1971 fmul Tmap.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
1972 fistp Tmap.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
1974 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
1975 fmul Tmap.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
1976 fistp Tmap.DeltaU ; V/ZR 1/ZR U/ZR UR VR
1978 // increment terms for next span // st0 st1 st2 st3 st4 st5 st6 st7
1979 // Right terms become Left terms--->// V/ZL 1/ZL U/ZL UL VL
1981 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZL U/ZL UL VL
1982 fxch st(1) // 1/ZL V/ZR U/ZL UL VL
1983 fadd Tmap.fl_dwdx_wide // 1/ZR V/ZR U/ZL UL VL
1984 fxch st(2) // U/ZL V/ZR 1/ZR UL VL
1985 fadd Tmap.fl_dudx_wide // U/ZR V/ZR 1/ZR UL VL
1986 fxch st(2) // 1/ZR V/ZR U/ZR UL VL
1987 fxch st(1) // V/ZR 1/ZR U/ZR UL VL
1990 // setup delta values
1991 // set up affine registers
1993 // calculate right side coords st0 st1 st2 st3 st4 st5 st6 st7
1994 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
1995 // This divide should happen while the pixel span is drawn.
1996 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
1999 // 8 pixel span code
2000 // edi = dest dib bits at current pixel
2001 // esi = texture pointer at current u,v
2003 // ebx = u fraction 0.32
2004 // ecx = v fraction 0.32
2005 // edx = u frac step
2006 // ebp = v carry scratch
2008 mov al,[edi] // preread the destination cache line
2010 mov Tmap.InnerLooper, 32/4 // Set up loop counter
2012 // Make EDX = DV:DU in 8:8,8:8 format
2013 mov eax, Tmap.DeltaV
2015 mov edx, Tmap.DeltaU
2019 // Make ECX = V:U in 8:8,8:8 format
2020 mov eax, Tmap.VFixed
2022 mov ecx, Tmap.UFixed
2028 // ecx = V:U in 8.8:8.8
2029 // edx = zbuffer pointer
2031 // edi = screen data
2038 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
2040 shr ax, 8 // EAX = V:U in 8.8:8.0
2041 rol eax, 8 // EAX = V:U in 0.0:8:8
2042 and eax, 0ffffh // clear upper bits
2048 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
2050 shr ax, 8 // EAX = V:U in 8.8:8.0
2051 rol eax, 8 // EAX = V:U in 0.0:8:8
2052 and eax, 0ffffh // clear upper bits
2058 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
2060 shr ax, 8 // EAX = V:U in 8.8:8.0
2061 rol eax, 8 // EAX = V:U in 0.0:8:8
2062 and eax, 0ffffh // clear upper bits
2068 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
2070 shr ax, 8 // EAX = V:U in 8.8:8.0
2071 rol eax, 8 // EAX = V:U in 0.0:8:8
2072 and eax, 0ffffh // clear upper bits
2079 dec Tmap.InnerLooper
2084 // the fdiv is done, finish right // st0 st1 st2 st3 st4 st5 st6 st7
2085 // ZR V/ZR 1/ZR U/ZR UL VL
2087 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
2088 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
2089 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
2090 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
2092 dec Tmap.Subdivisions // decrement span count
2093 jnz SpanLoop // loop back
2096 HandleLeftoverPixels:
2098 // edi = dest dib bits
2099 // esi = current texture dib bits
2100 // at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
2101 // inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
2103 cmp Tmap.WidthModLength,0 ; are there remaining pixels to draw?
2104 jz FPUReturn ; nope, pop the FPU and bail
2106 // convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
2108 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
2109 fmul Tmap.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
2110 fistp Tmap.UFixed ; inv. inv. inv. inv. inv. UL VL
2112 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
2113 fmul Tmap.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
2114 fistp Tmap.VFixed ; inv. inv. inv. inv. inv. UL VL
2116 dec Tmap.WidthModLength ; calc how many steps to take
2117 jz OnePixelSpan ; just one, don't do deltas'
2119 // calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
2122 // @todo rearrange things so we don't need these two instructions
2123 fstp Tmap.FloatTemp ; inv. inv. inv. inv. UL VL
2124 fstp Tmap.FloatTemp ; inv. inv. inv. UL VL
2126 fld Tmap.r.v ; V/Zr inv. inv. inv. UL VL
2127 fsub Tmap.deltas.v ; V/ZR inv. inv. inv. UL VL
2128 fld Tmap.r.u ; U/Zr V/ZR inv. inv. inv. UL VL
2129 fsub Tmap.deltas.u ; U/ZR V/ZR inv. inv. inv. UL VL
2130 fld Tmap.r.sw ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
2131 fsub Tmap.deltas.sw ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
2133 fdivr Tmap.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
2135 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
2136 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
2138 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
2140 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
2141 fxch st(1) ; VR UR inv. inv. inv. dU VL
2142 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
2143 fxch st(6) ; dV UR inv. inv. inv. dU VR
2145 fidiv Tmap.WidthModLength ; dv UR inv. inv. inv. dU VR
2146 fmul Tmap.FixedScale ; dv16 UR inv. inv. inv. dU VR
2147 fistp Tmap.DeltaV ; UR inv. inv. inv. dU VR
2149 fxch st(4) ; dU inv. inv. inv. UR VR
2150 fidiv Tmap.WidthModLength ; du inv. inv. inv. UR VR
2151 fmul Tmap.FixedScale ; du16 inv. inv. inv. UR VR
2152 fistp Tmap.DeltaU ; inv. inv. inv. UR VR
2154 // @todo gross! these are to line up with the other loop
2155 fld st(1) ; inv. inv. inv. inv. UR VR
2156 fld st(2) ; inv. inv. inv. inv. inv. UR VR
2161 // Make EDX = DV:DU in 8:8,8:8 format
2162 mov eax, Tmap.DeltaV
2164 mov edx, Tmap.DeltaU
2168 // Make ECX = V:U in 8:8,8:8 format
2169 mov eax, Tmap.VFixed
2171 mov ecx, Tmap.UFixed
2175 inc Tmap.WidthModLength
2176 mov eax,Tmap.WidthModLength
2180 mov Tmap.WidthModLength, eax
2184 // ecx = V:U in 8.8:8.8
2185 // edx = zbuffer pointer
2187 // edi = screen data
2193 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
2195 shr ax, 8 // EAX = V:U in 8.8:8.0
2196 rol eax, 8 // EAX = V:U in 0.0:8:8
2197 and eax, 0ffffh // clear upper bits
2203 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
2205 shr ax, 8 // EAX = V:U in 8.8:8.0
2206 rol eax, 8 // EAX = V:U in 0.0:8:8
2207 and eax, 0ffffh // clear upper bits
2214 dec Tmap.WidthModLength
2222 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
2224 shr ax, 8 // EAX = V:U in 8.8:8.0
2225 rol eax, 8 // EAX = V:U in 0.0:8:8
2226 and eax, 0ffffh // clear upper bits
2233 // busy FPU registers: // st0 st1 st2 st3 st4 st5 st6 st7
2234 // xxx xxx xxx xxx xxx xxx xxx
2243 fldcw Tmap.OldFPUCW // restore the FPU