2 * Copyright (C) Volition, Inc. 1999. All rights reserved.
4 * All source code herein is the property of Volition, Inc. You may not sell
5 * or otherwise commercially exploit the source or things you created based on
10 * $Logfile: /Freespace2/code/Graphics/TmapScanTiled256x256.cpp $
15 * Routines for drawing tiled 256x256 textues
18 * Revision 1.2 2002/06/09 04:41:18 relnev
19 * added copyright header
21 * Revision 1.1.1.1 2002/05/03 03:28:09 root
25 * 4 11/30/98 5:31p Dave
26 * Fixed up Fred support for software mode.
28 * 3 11/30/98 1:07p Dave
29 * 16 bit conversion, first run.
31 * 2 10/07/98 10:53a Dave
34 * 1 10/07/98 10:49a Dave
36 * 11 5/13/98 2:53p John
37 * Made subspace effect work under software. Had to add new inner loop to
38 * tmapper. Added glows to end of subspace effect. Made subspace effect
39 * levels use gamepalette-subspace palette.
41 * 10 4/23/98 9:55a John
42 * Fixed some bugs in the tiled tmapper causing bright dots to appear all
45 * 9 3/10/98 4:19p John
46 * Cleaned up graphics lib. Took out most unused gr functions. Made D3D
47 * & Glide have popups and print screen. Took out all >8bpp software
48 * support. Made Fred zbuffer. Made zbuffer allocate dynamically to
49 * support Fred. Made zbuffering key off of functions rather than one
52 * 8 1/27/98 5:13p John
53 * Moved all float to int conversions out of inner loops and into outer.
54 * Made outer loop use FISTP instead of ftol, saved about 10%.
56 * 7 1/23/98 5:08p John
57 * Took L out of vertex structure used B (blue) instead. Took all small
58 * fireballs out of fireball types and used particles instead. Fixed some
59 * debris explosion things. Restructured fireball code. Restructured
60 * some lighting code. Made dynamic lighting on by default. Made groups
61 * of lasers only cast one light. Made fireballs not cast light.
63 * 6 12/04/97 10:38a John
64 * Fixed tiled texture mappers that were swapping uvs.
66 * 5 10/14/97 9:19a John
67 * removed fdiv warnings.
69 * 4 9/10/97 11:38a Sandeep
71 * 3 9/09/97 3:39p Sandeep
72 * warning level 4 bugs
74 * 2 5/12/97 12:27p John
75 * Restructured Graphics Library to add support for multiple renderers.
77 * 1 4/24/97 4:42p John
78 * Initial version of the tiled texture mappers for 64 & 128 wide
87 #include "grinternal.h"
89 #include "tmapscanline.h"
95 // Needed to keep warning 4725 to stay away. See PsTypes.h for details why.
96 void disable_warning_4725_stub_tst256()
100 void tmapscan_pln8_zbuffered_tiled_256x256()
115 // Put the FPU in low precision mode
116 fstcw Tmap.OldFPUCW // store copy of CW
117 mov ax,Tmap.OldFPUCW // get it in ax
119 mov Tmap.FPUCW,ax // store it
120 fldcw Tmap.FPUCW // load the FPU
123 mov ecx, Tmap.loop_count // ecx = width
124 mov edi, Tmap.dest_row_data // edi = dest pointer
126 // edi = pointer to start pixel in dest dib
129 mov eax,ecx // eax and ecx = width
130 shr ecx,5 // ecx = width / subdivision length
131 and eax,31 // eax = width mod subdivision length
132 jnz some_left_over // any leftover?
133 dec ecx // no, so special case last span
134 mov eax,32 // it's 8 pixels long
136 mov Tmap.Subdivisions,ecx // store widths
137 mov Tmap.WidthModLength,eax
139 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
140 // st0 st1 st2 st3 st4 st5 st6 st7
142 fld Tmap.l.u // U/ZL V/ZL
143 fld Tmap.l.sw // 1/ZL U/ZL V/ZL
144 fld1 // 1 1/ZL U/ZL V/ZL
145 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
146 fld st // ZL ZL 1/ZL U/ZL V/ZL
147 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
148 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
149 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
151 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
152 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
154 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
156 fadd Tmap.fl_dwdx_wide // 1/ZR U/ZL V/ZL UL VL
157 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
158 fadd Tmap.fl_dudx_wide // U/ZR 1/ZR V/ZL UL VL
159 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
160 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZR U/ZR UL VL
162 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
164 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
165 // @todo overlap this guy
166 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
167 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
168 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
169 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
170 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
172 cmp ecx,0 // check for any full spans
173 jle HandleLeftoverPixels
177 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
178 // UR VR V/ZR 1/ZR U/ZR UL VL
180 // convert left side coords
182 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
183 fmul Tmap.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
184 fistp Tmap.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
186 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
187 fmul Tmap.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
188 fistp Tmap.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
190 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
192 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
193 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
194 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
195 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
197 fmul Tmap.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
198 fistp Tmap.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
200 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
201 fmul Tmap.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
202 fistp Tmap.DeltaU ; V/ZR 1/ZR U/ZR UR VR
204 // increment terms for next span // st0 st1 st2 st3 st4 st5 st6 st7
205 // Right terms become Left terms--->// V/ZL 1/ZL U/ZL UL VL
207 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZL U/ZL UL VL
208 fxch st(1) // 1/ZL V/ZR U/ZL UL VL
209 fadd Tmap.fl_dwdx_wide // 1/ZR V/ZR U/ZL UL VL
210 fxch st(2) // U/ZL V/ZR 1/ZR UL VL
211 fadd Tmap.fl_dudx_wide // U/ZR V/ZR 1/ZR UL VL
212 fxch st(2) // 1/ZR V/ZR U/ZR UL VL
213 fxch st(1) // V/ZR 1/ZR U/ZR UL VL
216 // setup delta values
218 mov eax,Tmap.DeltaV // get v 16.16 step
219 mov ebx,eax // copy it
220 sar eax,16 // get v int step
221 shl ebx,16 // get v frac step
222 mov Tmap.DeltaVFrac,ebx // store it
223 imul eax,Tmap.src_offset // calculate texture step for v int step
225 mov ebx,Tmap.DeltaU // get u 16.16 step
226 mov ecx,ebx // copy it
227 sar ebx,16 // get u int step
228 shl ecx,16 // get u frac step
229 mov Tmap.DeltaUFrac,ecx // store it
230 add eax,ebx // calculate uint + vint step
231 mov Tmap.uv_delta[4],eax // save whole step in non-v-carry slot
232 add eax,Tmap.src_offset // calculate whole step + v carry
233 mov Tmap.uv_delta[0],eax // save in v-carry slot
235 // setup initial coordinates
236 mov esi,Tmap.UFixed // get u 16.16 fixedpoint coordinate
238 mov ebx,esi // copy it
239 sar esi,16 // get integer part
240 shl ebx,16 // get fractional part
242 mov ecx,Tmap.VFixed // get v 16.16 fixedpoint coordinate
244 mov edx,ecx // copy it
245 sar edx,16 // get integer part
246 shl ecx,16 // get fractional part
247 imul edx,Tmap.src_offset // calc texture scanline address
248 add esi,edx // calc texture offset
249 add esi,Tmap.pixptr // calc address
251 // set up affine registers
257 mov ebp, Tmap.fx_dl_dx
268 // calculate right side coords st0 st1 st2 st3 st4 st5 st6 st7
269 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
270 // This divide should happen while the pixel span is drawn.
271 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
275 // edi = dest dib bits at current pixel
276 // esi = texture pointer at current u,v
278 // ebx = u fraction 0.32
279 // ecx = v fraction 0.32
281 // ebp = v carry scratch
283 mov al,[edi] // preread the destination cache line
285 mov Tmap.InnerLooper, 32/4 // Set up loop counter
288 sub eax, Tmap.pScreenBits
293 // Make ESI = DV:DU in 8:8,8:8 format
299 mov Tmap.DeltaUFrac, esi
301 // Make ECX = V:U in 8:8,8:8 format
312 // ecx = V:U in 8.8:8.8
313 // edx = zbuffer pointer
322 cmp esi, [edx+0] // Compare the Z depth of this pixel with zbuffer
323 jle Skip0 // If pixel is covered, skip drawing
325 mov [edx+0], esi // Write z
327 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
328 shr ax, 8 // EAX = V:U in 8.8:8.0
329 rol eax, 8 // EAX = V:U in 0.0:8:8
330 and eax, 0ffffh // clear upper bits
331 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
335 and eax, 0ffffh // clear upper bits
336 mov al, gr_fade_table[eax]
339 add ecx, Tmap.DeltaUFrac
340 add esi, Tmap.fx_dwdx
344 cmp esi, [edx+4] // Compare the Z depth of this pixel with zbuffer
345 jle Skip1 // If pixel is covered, skip drawing
347 mov [edx+4], esi // Write z
349 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
350 shr ax, 8 // EAX = V:U in 8.8:8.0
351 rol eax, 8 // EAX = V:U in 0.0:8:8
352 and eax, 0ffffh // clear upper bits
353 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
357 and eax, 0ffffh // clear upper bits
358 mov al, gr_fade_table[eax]
361 add ecx, Tmap.DeltaUFrac
362 add esi, Tmap.fx_dwdx
366 cmp esi, [edx+8] // Compare the Z depth of this pixel with zbuffer
367 jle Skip2 // If pixel is covered, skip drawing
369 mov [edx+8], esi // Write z
371 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
372 shr ax, 8 // EAX = V:U in 8.8:8.0
373 rol eax, 8 // EAX = V:U in 0.0:8:8
374 and eax, 0ffffh // clear upper bits
375 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
379 and eax, 0ffffh // clear upper bits
380 mov al, gr_fade_table[eax]
383 add ecx, Tmap.DeltaUFrac
384 add esi, Tmap.fx_dwdx
388 cmp esi, [edx+12] // Compare the Z depth of this pixel with zbuffer
389 jle Skip3 // If pixel is covered, skip drawing
391 mov [edx+12], esi // Write z
393 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
394 shr ax, 8 // EAX = V:U in 8.8:8.0
395 rol eax, 8 // EAX = V:U in 0.0:8:8
396 and eax, 0ffffh // clear upper bits
397 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
401 and eax, 0ffffh // clear upper bits
402 mov al, gr_fade_table[eax]
405 add ecx, Tmap.DeltaUFrac
406 add esi, Tmap.fx_dwdx
417 // the fdiv is done, finish right // st0 st1 st2 st3 st4 st5 st6 st7
418 // ZR V/ZR 1/ZR U/ZR UL VL
420 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
421 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
422 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
423 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
425 dec Tmap.Subdivisions // decrement span count
426 jnz SpanLoop // loop back
429 HandleLeftoverPixels:
431 mov esi,Tmap.pixptr // load texture pointer
433 // edi = dest dib bits
434 // esi = current texture dib bits
435 // at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
436 // inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
438 cmp Tmap.WidthModLength,0 ; are there remaining pixels to draw?
439 jz FPUReturn ; nope, pop the FPU and bail
441 // convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
443 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
444 fmul Tmap.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
445 fistp Tmap.UFixed ; inv. inv. inv. inv. inv. UL VL
447 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
448 fmul Tmap.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
449 fistp Tmap.VFixed ; inv. inv. inv. inv. inv. UL VL
451 dec Tmap.WidthModLength ; calc how many steps to take
452 jz OnePixelSpan ; just one, don't do deltas'
454 // calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
457 // @todo rearrange things so we don't need these two instructions
458 fstp Tmap.FloatTemp ; inv. inv. inv. inv. UL VL
459 fstp Tmap.FloatTemp ; inv. inv. inv. UL VL
461 fld Tmap.r.v ; V/Zr inv. inv. inv. UL VL
462 fsub Tmap.deltas.v ; V/ZR inv. inv. inv. UL VL
463 fld Tmap.r.u ; U/Zr V/ZR inv. inv. inv. UL VL
464 fsub Tmap.deltas.u ; U/ZR V/ZR inv. inv. inv. UL VL
465 fld Tmap.r.sw ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
466 fsub Tmap.deltas.sw ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
468 fdivr Tmap.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
470 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
471 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
473 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
475 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
476 fxch st(1) ; VR UR inv. inv. inv. dU VL
477 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
478 fxch st(6) ; dV UR inv. inv. inv. dU VR
480 fidiv Tmap.WidthModLength ; dv UR inv. inv. inv. dU VR
481 fmul Tmap.FixedScale ; dv16 UR inv. inv. inv. dU VR
482 fistp Tmap.DeltaV ; UR inv. inv. inv. dU VR
484 fxch st(4) ; dU inv. inv. inv. UR VR
485 fidiv Tmap.WidthModLength ; du inv. inv. inv. UR VR
486 fmul Tmap.FixedScale ; du16 inv. inv. inv. UR VR
487 fistp Tmap.DeltaU ; inv. inv. inv. UR VR
489 // @todo gross! these are to line up with the other loop
490 fld st(1) ; inv. inv. inv. inv. UR VR
491 fld st(2) ; inv. inv. inv. inv. inv. UR VR
494 // setup delta values
495 mov eax, Tmap.DeltaV // get v 16.16 step
496 mov ebx, eax // copy it
497 sar eax, 16 // get v int step
498 shl ebx, 16 // get v frac step
499 mov Tmap.DeltaVFrac, ebx // store it
500 imul eax, Tmap.src_offset // calc texture step for v int step
502 mov ebx, Tmap.DeltaU // get u 16.16 step
503 mov ecx, ebx // copy it
504 sar ebx, 16 // get the u int step
505 shl ecx, 16 // get the u frac step
506 mov Tmap.DeltaUFrac, ecx // store it
507 add eax, ebx // calc uint + vint step
508 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
509 add eax, Tmap.src_offset // calc whole step + v carry
510 mov Tmap.uv_delta[0], eax // save in v-carry slot
515 ; setup initial coordinates
516 mov esi, Tmap.UFixed // get u 16.16
517 mov ebx, esi // copy it
518 sar esi, 16 // get integer part
519 shl ebx, 16 // get fractional part
521 mov ecx, Tmap.VFixed // get v 16.16
522 mov edx, ecx // copy it
523 sar edx, 16 // get integer part
524 shl ecx, 16 // get fractional part
525 imul edx, Tmap.src_offset // calc texture scanline address
526 add esi, edx // calc texture offset
527 add esi, Tmap.pixptr // calc address
534 // mov edx, Tmap.DeltaUFrac
538 mov ebx, Tmap.fx_l_right
544 mov eax, Tmap.fx_dl_dx
553 sub eax, Tmap.pScreenBits
558 inc Tmap.WidthModLength
559 mov eax,Tmap.WidthModLength
563 mov Tmap.WidthModLength, eax
567 mov al,[edi] // preread the destination cache line
569 // Make ESI = DV:DU in 8:8,8:8 format
575 mov Tmap.DeltaUFrac, esi
577 // Make ECX = V:U in 8:8,8:8 format
588 // ecx = V:U in 8.8:8.8
589 // edx = zbuffer pointer
598 cmp esi, [edx+0] // Compare the Z depth of this pixel with zbuffer
599 jle Skip0a // If pixel is covered, skip drawing
601 mov [edx+0], esi // Write z
603 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
604 shr ax, 8 // EAX = V:U in 8.8:8.0
605 rol eax, 8 // EAX = V:U in 0.0:8:8
606 and eax, 0ffffh // clear upper bits
607 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
611 and eax, 0ffffh // clear upper bits
612 mov al, gr_fade_table[eax]
615 add ecx, Tmap.DeltaUFrac
616 add esi, Tmap.fx_dwdx
620 cmp esi, [edx+4] // Compare the Z depth of this pixel with zbuffer
621 jle Skip1a // If pixel is covered, skip drawing
623 mov [edx+4], esi // Write z
625 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
626 shr ax, 8 // EAX = V:U in 8.8:8.0
627 rol eax, 8 // EAX = V:U in 0.0:8:8
628 and eax, 0ffffh // clear upper bits
629 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
633 and eax, 0ffffh // clear upper bits
634 mov al, gr_fade_table[eax]
637 add ecx, Tmap.DeltaUFrac
638 add esi, Tmap.fx_dwdx
645 dec Tmap.WidthModLength
653 cmp esi, [edx+0] // Compare the Z depth of this pixel with zbuffer
654 jle Skip0b // If pixel is covered, skip drawing
656 mov [edx+0], esi // Write z
658 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
659 shr ax, 8 // EAX = V:U in 8.8:8.0
660 rol eax, 8 // EAX = V:U in 0.0:8:8
661 and eax, 0ffffh // clear upper bits
662 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
666 and eax, 0ffffh // clear upper bits
667 mov al, gr_fade_table[eax]
670 add ecx, Tmap.DeltaUFrac
671 add esi, Tmap.fx_dwdx
677 // busy FPU registers: // st0 st1 st2 st3 st4 st5 st6 st7
678 // xxx xxx xxx xxx xxx xxx xxx
687 fldcw Tmap.OldFPUCW // restore the FPU
700 void tmapscan_pln8_tiled_256x256()
703 switch(gr_zbuffering_mode) {
706 case GR_ZBUFF_FULL: // both
707 tmapscan_pln8_zbuffered_tiled_256x256();
709 case GR_ZBUFF_WRITE: // write only
710 tmapscan_pln8_zbuffered_tiled_256x256();
712 case GR_ZBUFF_READ: // read only
713 tmapscan_pln8_zbuffered_tiled_256x256();
731 // Put the FPU in low precision mode
732 fstcw Tmap.OldFPUCW // store copy of CW
733 mov ax,Tmap.OldFPUCW // get it in ax
735 mov Tmap.FPUCW,ax // store it
736 fldcw Tmap.FPUCW // load the FPU
739 mov ecx, Tmap.loop_count // ecx = width
740 mov edi, Tmap.dest_row_data // edi = dest pointer
742 // edi = pointer to start pixel in dest dib
745 mov eax,ecx // eax and ecx = width
746 shr ecx,5 // ecx = width / subdivision length
747 and eax,31 // eax = width mod subdivision length
748 jnz some_left_over // any leftover?
749 dec ecx // no, so special case last span
750 mov eax,32 // it's 8 pixels long
752 mov Tmap.Subdivisions,ecx // store widths
753 mov Tmap.WidthModLength,eax
755 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
756 // st0 st1 st2 st3 st4 st5 st6 st7
758 fld Tmap.l.u // U/ZL V/ZL
759 fld Tmap.l.sw // 1/ZL U/ZL V/ZL
760 fld1 // 1 1/ZL U/ZL V/ZL
761 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
762 fld st // ZL ZL 1/ZL U/ZL V/ZL
763 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
764 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
765 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
767 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
768 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
770 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
772 fadd Tmap.fl_dwdx_wide // 1/ZR U/ZL V/ZL UL VL
773 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
774 fadd Tmap.fl_dudx_wide // U/ZR 1/ZR V/ZL UL VL
775 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
776 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZR U/ZR UL VL
778 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
780 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
781 // @todo overlap this guy
782 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
783 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
784 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
785 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
786 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
788 cmp ecx,0 // check for any full spans
789 jle HandleLeftoverPixels
793 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
794 // UR VR V/ZR 1/ZR U/ZR UL VL
796 // convert left side coords
798 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
799 fmul Tmap.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
800 fistp Tmap.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
802 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
803 fmul Tmap.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
804 fistp Tmap.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
806 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
808 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
809 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
810 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
811 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
813 fmul Tmap.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
814 fistp Tmap.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
816 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
817 fmul Tmap.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
818 fistp Tmap.DeltaU ; V/ZR 1/ZR U/ZR UR VR
820 // increment terms for next span // st0 st1 st2 st3 st4 st5 st6 st7
821 // Right terms become Left terms--->// V/ZL 1/ZL U/ZL UL VL
823 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZL U/ZL UL VL
824 fxch st(1) // 1/ZL V/ZR U/ZL UL VL
825 fadd Tmap.fl_dwdx_wide // 1/ZR V/ZR U/ZL UL VL
826 fxch st(2) // U/ZL V/ZR 1/ZR UL VL
827 fadd Tmap.fl_dudx_wide // U/ZR V/ZR 1/ZR UL VL
828 fxch st(2) // 1/ZR V/ZR U/ZR UL VL
829 fxch st(1) // V/ZR 1/ZR U/ZR UL VL
832 // setup delta values
834 mov eax,Tmap.DeltaV // get v 16.16 step
835 mov ebx,eax // copy it
836 sar eax,16 // get v int step
837 shl ebx,16 // get v frac step
838 mov Tmap.DeltaVFrac,ebx // store it
839 imul eax,Tmap.src_offset // calculate texture step for v int step
841 mov ebx,Tmap.DeltaU // get u 16.16 step
842 mov ecx,ebx // copy it
843 sar ebx,16 // get u int step
844 shl ecx,16 // get u frac step
845 mov Tmap.DeltaUFrac,ecx // store it
846 add eax,ebx // calculate uint + vint step
847 mov Tmap.uv_delta[4],eax // save whole step in non-v-carry slot
848 add eax,Tmap.src_offset // calculate whole step + v carry
849 mov Tmap.uv_delta[0],eax // save in v-carry slot
851 // setup initial coordinates
852 mov esi,Tmap.UFixed // get u 16.16 fixedpoint coordinate
854 mov ebx,esi // copy it
855 sar esi,16 // get integer part
856 shl ebx,16 // get fractional part
858 mov ecx,Tmap.VFixed // get v 16.16 fixedpoint coordinate
860 mov edx,ecx // copy it
861 sar edx,16 // get integer part
862 shl ecx,16 // get fractional part
863 imul edx,Tmap.src_offset // calc texture scanline address
864 add esi,edx // calc texture offset
865 add esi,Tmap.pixptr // calc address
867 // set up affine registers
873 mov ebp, Tmap.fx_dl_dx
884 // calculate right side coords st0 st1 st2 st3 st4 st5 st6 st7
885 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
886 // This divide should happen while the pixel span is drawn.
887 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
891 // edi = dest dib bits at current pixel
892 // esi = texture pointer at current u,v
894 // ebx = u fraction 0.32
895 // ecx = v fraction 0.32
897 // ebp = v carry scratch
899 mov al,[edi] // preread the destination cache line
901 mov Tmap.InnerLooper, 32/4 // Set up loop counter
904 sub eax, Tmap.pScreenBits
909 // Make ESI = DV:DU in 8:8,8:8 format
915 mov Tmap.DeltaUFrac, esi
917 // Make ECX = V:U in 8:8,8:8 format
927 // ecx = V:U in 8.8:8.8
928 // edx = zbuffer pointer
937 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
938 shr ax, 8 // EAX = V:U in 8.8:8.0
939 rol eax, 8 // EAX = V:U in 0.0:8:8
940 and eax, 0ffffh // clear upper bits
941 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
945 and eax, 0ffffh // clear upper bits
946 mov al, gr_fade_table[eax]
948 add ecx, Tmap.DeltaUFrac
952 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
953 shr ax, 8 // EAX = V:U in 8.8:8.0
954 rol eax, 8 // EAX = V:U in 0.0:8:8
955 and eax, 0ffffh // clear upper bits
956 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
960 and eax, 0ffffh // clear upper bits
961 mov al, gr_fade_table[eax]
963 add ecx, Tmap.DeltaUFrac
967 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
968 shr ax, 8 // EAX = V:U in 8.8:8.0
969 rol eax, 8 // EAX = V:U in 0.0:8:8
970 and eax, 0ffffh // clear upper bits
971 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
975 and eax, 0ffffh // clear upper bits
976 mov al, gr_fade_table[eax]
978 add ecx, Tmap.DeltaUFrac
982 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
983 shr ax, 8 // EAX = V:U in 8.8:8.0
984 rol eax, 8 // EAX = V:U in 0.0:8:8
985 and eax, 0ffffh // clear upper bits
986 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
990 and eax, 0ffffh // clear upper bits
991 mov al, gr_fade_table[eax]
993 add ecx, Tmap.DeltaUFrac
1003 // the fdiv is done, finish right // st0 st1 st2 st3 st4 st5 st6 st7
1004 // ZR V/ZR 1/ZR U/ZR UL VL
1006 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
1007 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
1008 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
1009 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
1011 dec Tmap.Subdivisions // decrement span count
1012 jnz SpanLoop // loop back
1015 HandleLeftoverPixels:
1017 mov esi,Tmap.pixptr // load texture pointer
1019 // edi = dest dib bits
1020 // esi = current texture dib bits
1021 // at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
1022 // inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
1024 cmp Tmap.WidthModLength,0 ; are there remaining pixels to draw?
1025 jz FPUReturn ; nope, pop the FPU and bail
1027 // convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
1029 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
1030 fmul Tmap.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
1031 fistp Tmap.UFixed ; inv. inv. inv. inv. inv. UL VL
1033 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
1034 fmul Tmap.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
1035 fistp Tmap.VFixed ; inv. inv. inv. inv. inv. UL VL
1037 dec Tmap.WidthModLength ; calc how many steps to take
1038 jz OnePixelSpan ; just one, don't do deltas'
1040 // calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
1043 // @todo rearrange things so we don't need these two instructions
1044 fstp Tmap.FloatTemp ; inv. inv. inv. inv. UL VL
1045 fstp Tmap.FloatTemp ; inv. inv. inv. UL VL
1047 fld Tmap.r.v ; V/Zr inv. inv. inv. UL VL
1048 fsub Tmap.deltas.v ; V/ZR inv. inv. inv. UL VL
1049 fld Tmap.r.u ; U/Zr V/ZR inv. inv. inv. UL VL
1050 fsub Tmap.deltas.u ; U/ZR V/ZR inv. inv. inv. UL VL
1051 fld Tmap.r.sw ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
1052 fsub Tmap.deltas.sw ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
1054 fdivr Tmap.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
1056 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
1057 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
1059 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
1061 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
1062 fxch st(1) ; VR UR inv. inv. inv. dU VL
1063 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
1064 fxch st(6) ; dV UR inv. inv. inv. dU VR
1066 fidiv Tmap.WidthModLength ; dv UR inv. inv. inv. dU VR
1067 fmul Tmap.FixedScale ; dv16 UR inv. inv. inv. dU VR
1068 fistp Tmap.DeltaV ; UR inv. inv. inv. dU VR
1070 fxch st(4) ; dU inv. inv. inv. UR VR
1071 fidiv Tmap.WidthModLength ; du inv. inv. inv. UR VR
1072 fmul Tmap.FixedScale ; du16 inv. inv. inv. UR VR
1073 fistp Tmap.DeltaU ; inv. inv. inv. UR VR
1075 // @todo gross! these are to line up with the other loop
1076 fld st(1) ; inv. inv. inv. inv. UR VR
1077 fld st(2) ; inv. inv. inv. inv. inv. UR VR
1080 // setup delta values
1081 mov eax, Tmap.DeltaV // get v 16.16 step
1082 mov ebx, eax // copy it
1083 sar eax, 16 // get v int step
1084 shl ebx, 16 // get v frac step
1085 mov Tmap.DeltaVFrac, ebx // store it
1086 imul eax, Tmap.src_offset // calc texture step for v int step
1088 mov ebx, Tmap.DeltaU // get u 16.16 step
1089 mov ecx, ebx // copy it
1090 sar ebx, 16 // get the u int step
1091 shl ecx, 16 // get the u frac step
1092 mov Tmap.DeltaUFrac, ecx // store it
1093 add eax, ebx // calc uint + vint step
1094 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
1095 add eax, Tmap.src_offset // calc whole step + v carry
1096 mov Tmap.uv_delta[0], eax // save in v-carry slot
1101 ; setup initial coordinates
1102 mov esi, Tmap.UFixed // get u 16.16
1103 mov ebx, esi // copy it
1104 sar esi, 16 // get integer part
1105 shl ebx, 16 // get fractional part
1107 mov ecx, Tmap.VFixed // get v 16.16
1108 mov edx, ecx // copy it
1109 sar edx, 16 // get integer part
1110 shl ecx, 16 // get fractional part
1111 imul edx, Tmap.src_offset // calc texture scanline address
1112 add esi, edx // calc texture offset
1113 add esi, Tmap.pixptr // calc address
1120 // mov edx, Tmap.DeltaUFrac
1124 mov ebx, Tmap.fx_l_right
1130 mov eax, Tmap.fx_dl_dx
1138 sub eax, Tmap.pScreenBits
1143 inc Tmap.WidthModLength
1144 mov eax,Tmap.WidthModLength
1148 mov Tmap.WidthModLength, eax
1152 mov al,[edi] // preread the destination cache line
1154 // Make ESI = DV:DU in 8:8,8:8 format
1155 mov eax, Tmap.DeltaV
1157 mov esi, Tmap.DeltaU
1160 mov Tmap.DeltaUFrac, esi
1162 // Make ECX = V:U in 8:8,8:8 format
1163 mov eax, Tmap.UFixed
1165 mov ecx, Tmap.VFixed
1171 // ecx = V:U in 8.8:8.8
1172 // edx = zbuffer pointer
1174 // edi = screen data
1181 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1182 shr ax, 8 // EAX = V:U in 8.8:8.0
1183 rol eax, 8 // EAX = V:U in 0.0:8:8
1184 and eax, 0ffffh // clear upper bits
1185 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
1189 and eax, 0ffffh // clear upper bits
1190 mov al, gr_fade_table[eax]
1192 add ecx, Tmap.DeltaUFrac
1196 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1197 shr ax, 8 // EAX = V:U in 8.8:8.0
1198 rol eax, 8 // EAX = V:U in 0.0:8:8
1199 and eax, 0ffffh // clear upper bits
1200 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
1204 and eax, 0ffffh // clear upper bits
1205 mov al, gr_fade_table[eax]
1207 add ecx, Tmap.DeltaUFrac
1214 dec Tmap.WidthModLength
1222 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1223 shr ax, 8 // EAX = V:U in 8.8:8.0
1224 rol eax, 8 // EAX = V:U in 0.0:8:8
1225 and eax, 0ffffh // clear upper bits
1226 add eax, Tmap.pixptr // EAX = (V*256)+U + Pixptr
1230 and eax, 0ffffh // clear upper bits
1231 mov al, gr_fade_table[eax]
1233 add ecx, Tmap.DeltaUFrac
1239 // busy FPU registers: // st0 st1 st2 st3 st4 st5 st6 st7
1240 // xxx xxx xxx xxx xxx xxx xxx
1249 fldcw Tmap.OldFPUCW // restore the FPU
1263 // Totally non-general function specifically made for the subpsace effect
1264 void tmapscan_lnn8_tiled_256x256()
1266 if ( Tmap.src_offset != 256 ) {
1267 Int3(); // This only works on 256 wide textures!
1271 // Tmap.fx_u = fl2f(Tmap.l.u);
1272 // Tmap.fx_v = fl2f(Tmap.l.v);
1273 // Tmap.fx_du_dx = fl2f(Tmap.deltas.u);
1274 // Tmap.fx_dv_dx = fl2f(Tmap.deltas.v);
1278 ubyte * src = (ubyte *)Tmap.pixptr;
1279 ubyte * dst = (ubyte *)Tmap.dest_row_data;
1281 for (i=0; i<Tmap.loop_count; i++ ) {
1283 u = f2i(Tmap.fx_u) & 255;
1284 v = f2i(Tmap.fx_v) & 255;
1286 ubyte c = src[u+v*Tmap.src_offset];
1290 Tmap.fx_u += Tmap.fx_du_dx;
1291 Tmap.fx_v += Tmap.fx_dv_dx;
1308 // Need ECX = V.VF:U.UF in 8.8:8.8
1315 // Need EDX = delta V:U in 8.8:8.8
1316 mov eax, Tmap.fx_dv_dx
1318 mov edx, Tmap.fx_du_dx
1322 // Need EDI = pointer to dest row
1323 mov edi, Tmap.dest_row_data
1325 // Need ESI = pointer to texture
1326 mov esi, Tmap.pixptr
1328 // Set up loop counter
1329 mov ebp, Tmap.loop_count
1334 mov Tmap.num_big_steps, ebp
1335 and Tmap.loop_count, 3
1337 // EAX = anything (used as tmp in loop)
1339 // ECX = V.VF:U.UF in 8.8:8.8
1340 // EDX = delta V:U in 8.8:8.8
1341 // ESP = stack pointer (could be saved to Tmap.saved_esp and then used if needed)
1342 // EBP = loop counter
1343 // EDI = pointer to dest row
1344 // ESI = pointer to texture
1349 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1350 add ecx, edx // Increment u&v
1351 shr ax, 8 // EAX = V:U in 8.8:8.0
1352 rol eax, 8 // EAX = V:U in 0.0:8:8
1353 and eax, 0ffffh // clear upper bits
1354 mov al, [eax+esi] // Get pixel from texture
1355 mov [edi+0], al // Write pixel to screen
1357 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1358 add ecx, edx // Increment u&v
1359 shr ax, 8 // EAX = V:U in 8.8:8.0
1360 rol eax, 8 // EAX = V:U in 0.0:8:8
1361 and eax, 0ffffh // clear upper bits
1362 mov al, [eax+esi] // Get pixel from texture
1363 mov [edi+1], al // Write pixel to screen
1365 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1366 add ecx, edx // Increment u&v
1367 shr ax, 8 // EAX = V:U in 8.8:8.0
1368 rol eax, 8 // EAX = V:U in 0.0:8:8
1369 and eax, 0ffffh // clear upper bits
1370 mov al, [eax+esi] // Get pixel from texture
1371 mov [edi+2], al // Write pixel to screen
1373 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1374 add ecx, edx // Increment u&v
1375 shr ax, 8 // EAX = V:U in 8.8:8.0
1376 rol eax, 8 // EAX = V:U in 0.0:8:8
1377 and eax, 0ffffh // clear upper bits
1378 mov al, [eax+esi] // Get pixel from texture
1379 mov [edi+3], al // Write pixel to screen
1383 dec Tmap.num_big_steps
1389 mov ebp,Tmap.loop_count
1392 mov Tmap.loop_count, ebp
1396 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1397 add ecx, edx // Increment u&v
1398 shr ax, 8 // EAX = V:U in 8.8:8.0
1399 rol eax, 8 // EAX = V:U in 0.0:8:8
1400 and eax, 0ffffh // clear upper bits
1401 mov al, [eax+esi] // Get pixel from texture
1402 mov [edi], al // Write pixel to screen
1423 // used only for subpsace effect
1425 #define MASK 0x00ff00ff
1428 // not used, but cool
1429 void tmapscan_pnn8_tiled_256x256_subspace_dithered()
1431 if ( Tmap.src_offset != 256 ) {
1432 Int3(); // This only works on 256 wide textures!
1451 // Need EDI = pointer to dest row
1452 mov edi, Tmap.dest_row_data
1454 // Need ESI = pointer to texture
1455 mov esi, Tmap.pixptr
1458 // Put the FPU in low precision mode
1459 fstcw Tmap.OldFPUCW // store copy of CW
1460 mov ax,Tmap.OldFPUCW // get it in ax
1462 mov Tmap.FPUCW,ax // store it
1463 fldcw Tmap.FPUCW // load the FPU
1465 mov ecx, Tmap.loop_count // ecx = width
1467 // edi = pointer to start pixel in dest dib
1470 mov eax,ecx // eax and ecx = width
1471 shr ecx,5 // ecx = width / subdivision length
1472 and eax,31 // eax = width mod subdivision length
1473 jnz some_left_over // any leftover?
1474 dec ecx // no, so special case last span
1475 mov eax,32 // it's 8 pixels long
1477 mov Tmap.Subdivisions,ecx // store widths
1478 mov Tmap.WidthModLength,eax
1480 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
1481 // st0 st1 st2 st3 st4 st5 st6 st7
1482 fld Tmap.l.v // V/ZL
1483 fld Tmap.l.u // U/ZL V/ZL
1484 fld Tmap.l.sw // 1/ZL U/ZL V/ZL
1485 fld1 // 1 1/ZL U/ZL V/ZL
1486 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
1487 fld st // ZL ZL 1/ZL U/ZL V/ZL
1488 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
1489 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
1490 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
1492 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
1493 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
1495 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
1497 fadd Tmap.fl_dwdx_wide // 1/ZR U/ZL V/ZL UL VL
1498 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
1499 fadd Tmap.fl_dudx_wide // U/ZR 1/ZR V/ZL UL VL
1500 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
1501 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZR U/ZR UL VL
1503 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
1505 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
1506 // @todo overlap this guy
1507 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
1508 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
1509 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
1510 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
1511 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
1513 cmp ecx,0 // check for any full spans
1514 jle HandleLeftoverPixels
1518 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
1519 // UR VR V/ZR 1/ZR U/ZR UL VL
1521 // convert left side coords
1523 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
1524 fmul Tmap.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
1525 fistp Tmap.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
1527 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
1528 fmul Tmap.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
1529 fistp Tmap.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
1531 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
1533 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
1534 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
1535 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
1536 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
1538 fmul Tmap.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
1539 fistp Tmap.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
1541 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
1542 fmul Tmap.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
1543 fistp Tmap.DeltaU ; V/ZR 1/ZR U/ZR UR VR
1545 // increment terms for next span // st0 st1 st2 st3 st4 st5 st6 st7
1546 // Right terms become Left terms--->// V/ZL 1/ZL U/ZL UL VL
1548 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZL U/ZL UL VL
1549 fxch st(1) // 1/ZL V/ZR U/ZL UL VL
1550 fadd Tmap.fl_dwdx_wide // 1/ZR V/ZR U/ZL UL VL
1551 fxch st(2) // U/ZL V/ZR 1/ZR UL VL
1552 fadd Tmap.fl_dudx_wide // U/ZR V/ZR 1/ZR UL VL
1553 fxch st(2) // 1/ZR V/ZR U/ZR UL VL
1554 fxch st(1) // V/ZR 1/ZR U/ZR UL VL
1557 // setup delta values
1558 // set up affine registers
1560 // calculate right side coords st0 st1 st2 st3 st4 st5 st6 st7
1561 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
1562 // This divide should happen while the pixel span is drawn.
1563 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
1566 // 8 pixel span code
1567 // edi = dest dib bits at current pixel
1568 // esi = texture pointer at current u,v
1570 // ebx = u fraction 0.32
1571 // ecx = v fraction 0.32
1572 // edx = u frac step
1573 // ebp = v carry scratch
1575 mov al,[edi] // preread the destination cache line
1577 mov Tmap.InnerLooper, 32/4 // Set up loop counter
1579 // Make EDX = DV:DU in 8:8,8:8 format
1580 mov eax, Tmap.DeltaV
1582 mov edx, Tmap.DeltaU
1586 // Make ECX = V:U in 8:8,8:8 format
1587 mov eax, Tmap.VFixed
1589 mov ecx, Tmap.UFixed
1595 // ecx = V:U in 8.8:8.8
1596 // edx = zbuffer pointer
1598 // edi = screen data
1605 // mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1609 xor eax, 0xA3000000 ; This makes 'r' take 2^32 iterations to repeat
1611 and eax, MASK ; mask out all bits except 8.8:8.8 fraction
1614 shr ax, 8 // EAX = V:U in 8.8:8.0
1615 rol eax, 8 // EAX = V:U in 0.0:8:8
1616 and eax, 0ffffh // clear upper bits
1622 // mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1626 xor eax, 0xA3000000 ; This makes 'r' take 2^32 iterations to repeat
1628 and eax, MASK ; mask out all bits except 8.8:8.8 fraction
1631 shr ax, 8 // EAX = V:U in 8.8:8.0
1632 rol eax, 8 // EAX = V:U in 0.0:8:8
1633 and eax, 0ffffh // clear upper bits
1639 // mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1643 xor eax, 0xA3000000 ; This makes 'r' take 2^32 iterations to repeat
1645 and eax, MASK ; mask out all bits except 8.8:8.8 fraction
1648 shr ax, 8 // EAX = V:U in 8.8:8.0
1649 rol eax, 8 // EAX = V:U in 0.0:8:8
1650 and eax, 0ffffh // clear upper bits
1656 // mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1660 xor eax, 0xA3000000 ; This makes 'r' take 2^32 iterations to repeat
1662 and eax, MASK ; mask out all bits except 8.8:8.8 fraction
1665 shr ax, 8 // EAX = V:U in 8.8:8.0
1666 rol eax, 8 // EAX = V:U in 0.0:8:8
1667 and eax, 0ffffh // clear upper bits
1674 dec Tmap.InnerLooper
1679 // the fdiv is done, finish right // st0 st1 st2 st3 st4 st5 st6 st7
1680 // ZR V/ZR 1/ZR U/ZR UL VL
1682 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
1683 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
1684 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
1685 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
1687 dec Tmap.Subdivisions // decrement span count
1688 jnz SpanLoop // loop back
1691 HandleLeftoverPixels:
1693 // edi = dest dib bits
1694 // esi = current texture dib bits
1695 // at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
1696 // inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
1698 cmp Tmap.WidthModLength,0 ; are there remaining pixels to draw?
1699 jz FPUReturn ; nope, pop the FPU and bail
1701 // convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
1703 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
1704 fmul Tmap.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
1705 fistp Tmap.UFixed ; inv. inv. inv. inv. inv. UL VL
1707 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
1708 fmul Tmap.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
1709 fistp Tmap.VFixed ; inv. inv. inv. inv. inv. UL VL
1711 dec Tmap.WidthModLength ; calc how many steps to take
1712 jz OnePixelSpan ; just one, don't do deltas'
1714 // calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
1717 // @todo rearrange things so we don't need these two instructions
1718 fstp Tmap.FloatTemp ; inv. inv. inv. inv. UL VL
1719 fstp Tmap.FloatTemp ; inv. inv. inv. UL VL
1721 fld Tmap.r.v ; V/Zr inv. inv. inv. UL VL
1722 fsub Tmap.deltas.v ; V/ZR inv. inv. inv. UL VL
1723 fld Tmap.r.u ; U/Zr V/ZR inv. inv. inv. UL VL
1724 fsub Tmap.deltas.u ; U/ZR V/ZR inv. inv. inv. UL VL
1725 fld Tmap.r.sw ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
1726 fsub Tmap.deltas.sw ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
1728 fdivr Tmap.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
1730 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
1731 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
1733 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
1735 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
1736 fxch st(1) ; VR UR inv. inv. inv. dU VL
1737 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
1738 fxch st(6) ; dV UR inv. inv. inv. dU VR
1740 fidiv Tmap.WidthModLength ; dv UR inv. inv. inv. dU VR
1741 fmul Tmap.FixedScale ; dv16 UR inv. inv. inv. dU VR
1742 fistp Tmap.DeltaV ; UR inv. inv. inv. dU VR
1744 fxch st(4) ; dU inv. inv. inv. UR VR
1745 fidiv Tmap.WidthModLength ; du inv. inv. inv. UR VR
1746 fmul Tmap.FixedScale ; du16 inv. inv. inv. UR VR
1747 fistp Tmap.DeltaU ; inv. inv. inv. UR VR
1749 // @todo gross! these are to line up with the other loop
1750 fld st(1) ; inv. inv. inv. inv. UR VR
1751 fld st(2) ; inv. inv. inv. inv. inv. UR VR
1756 // Make EDX = DV:DU in 8:8,8:8 format
1757 mov eax, Tmap.DeltaV
1759 mov edx, Tmap.DeltaU
1763 // Make ECX = V:U in 8:8,8:8 format
1764 mov eax, Tmap.VFixed
1766 mov ecx, Tmap.UFixed
1770 inc Tmap.WidthModLength
1771 mov eax,Tmap.WidthModLength
1775 mov Tmap.WidthModLength, eax
1779 // ecx = V:U in 8.8:8.8
1780 // edx = zbuffer pointer
1782 // edi = screen data
1788 // mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1792 xor eax, 0xA3000000 ; This makes 'r' take 2^32 iterations to repeat
1794 and eax, MASK ; mask out all bits except 8.8:8.8 fraction
1797 shr ax, 8 // EAX = V:U in 8.8:8.0
1798 rol eax, 8 // EAX = V:U in 0.0:8:8
1799 and eax, 0ffffh // clear upper bits
1805 // mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1809 xor eax, 0xA3000000 ; This makes 'r' take 2^32 iterations to repeat
1811 and eax, MASK ; mask out all bits except 8.8:8.8 fraction
1814 shr ax, 8 // EAX = V:U in 8.8:8.0
1815 rol eax, 8 // EAX = V:U in 0.0:8:8
1816 and eax, 0ffffh // clear upper bits
1823 dec Tmap.WidthModLength
1831 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
1835 xor eax, 0xA3000000 ; This makes 'r' take 2^32 iterations to repeat
1837 and eax, MASK ; mask out all bits except 8.8:8.8 fraction
1840 shr ax, 8 // EAX = V:U in 8.8:8.0
1841 rol eax, 8 // EAX = V:U in 0.0:8:8
1842 and eax, 0ffffh // clear upper bits
1851 // busy FPU registers: // st0 st1 st2 st3 st4 st5 st6 st7
1852 // xxx xxx xxx xxx xxx xxx xxx
1861 fldcw Tmap.OldFPUCW // restore the FPU
1875 void tmapscan_pnn8_tiled_256x256_subspace()
1877 if ( Tmap.src_offset != 256 ) {
1878 Int3(); // This only works on 256 wide textures!
1895 // Need EDI = pointer to dest row
1896 mov edi, Tmap.dest_row_data
1898 // Need ESI = pointer to texture
1899 mov esi, Tmap.pixptr
1902 // Put the FPU in low precision mode
1903 fstcw Tmap.OldFPUCW // store copy of CW
1904 mov ax,Tmap.OldFPUCW // get it in ax
1906 mov Tmap.FPUCW,ax // store it
1907 fldcw Tmap.FPUCW // load the FPU
1909 mov ecx, Tmap.loop_count // ecx = width
1911 // edi = pointer to start pixel in dest dib
1914 mov eax,ecx // eax and ecx = width
1915 shr ecx,5 // ecx = width / subdivision length
1916 and eax,31 // eax = width mod subdivision length
1917 jnz some_left_over // any leftover?
1918 dec ecx // no, so special case last span
1919 mov eax,32 // it's 8 pixels long
1921 mov Tmap.Subdivisions,ecx // store widths
1922 mov Tmap.WidthModLength,eax
1924 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
1925 // st0 st1 st2 st3 st4 st5 st6 st7
1926 fld Tmap.l.v // V/ZL
1927 fld Tmap.l.u // U/ZL V/ZL
1928 fld Tmap.l.sw // 1/ZL U/ZL V/ZL
1929 fld1 // 1 1/ZL U/ZL V/ZL
1930 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
1931 fld st // ZL ZL 1/ZL U/ZL V/ZL
1932 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
1933 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
1934 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
1936 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
1937 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
1939 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
1941 fadd Tmap.fl_dwdx_wide // 1/ZR U/ZL V/ZL UL VL
1942 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
1943 fadd Tmap.fl_dudx_wide // U/ZR 1/ZR V/ZL UL VL
1944 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
1945 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZR U/ZR UL VL
1947 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
1949 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
1950 // @todo overlap this guy
1951 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
1952 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
1953 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
1954 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
1955 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
1957 cmp ecx,0 // check for any full spans
1958 jle HandleLeftoverPixels
1962 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
1963 // UR VR V/ZR 1/ZR U/ZR UL VL
1965 // convert left side coords
1967 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
1968 fmul Tmap.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
1969 fistp Tmap.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
1971 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
1972 fmul Tmap.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
1973 fistp Tmap.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
1975 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
1977 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
1978 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
1979 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
1980 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
1982 fmul Tmap.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
1983 fistp Tmap.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
1985 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
1986 fmul Tmap.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
1987 fistp Tmap.DeltaU ; V/ZR 1/ZR U/ZR UR VR
1989 // increment terms for next span // st0 st1 st2 st3 st4 st5 st6 st7
1990 // Right terms become Left terms--->// V/ZL 1/ZL U/ZL UL VL
1992 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZL U/ZL UL VL
1993 fxch st(1) // 1/ZL V/ZR U/ZL UL VL
1994 fadd Tmap.fl_dwdx_wide // 1/ZR V/ZR U/ZL UL VL
1995 fxch st(2) // U/ZL V/ZR 1/ZR UL VL
1996 fadd Tmap.fl_dudx_wide // U/ZR V/ZR 1/ZR UL VL
1997 fxch st(2) // 1/ZR V/ZR U/ZR UL VL
1998 fxch st(1) // V/ZR 1/ZR U/ZR UL VL
2001 // setup delta values
2002 // set up affine registers
2004 // calculate right side coords st0 st1 st2 st3 st4 st5 st6 st7
2005 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
2006 // This divide should happen while the pixel span is drawn.
2007 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
2010 // 8 pixel span code
2011 // edi = dest dib bits at current pixel
2012 // esi = texture pointer at current u,v
2014 // ebx = u fraction 0.32
2015 // ecx = v fraction 0.32
2016 // edx = u frac step
2017 // ebp = v carry scratch
2019 mov al,[edi] // preread the destination cache line
2021 mov Tmap.InnerLooper, 32/4 // Set up loop counter
2023 // Make EDX = DV:DU in 8:8,8:8 format
2024 mov eax, Tmap.DeltaV
2026 mov edx, Tmap.DeltaU
2030 // Make ECX = V:U in 8:8,8:8 format
2031 mov eax, Tmap.VFixed
2033 mov ecx, Tmap.UFixed
2039 // ecx = V:U in 8.8:8.8
2040 // edx = zbuffer pointer
2042 // edi = screen data
2049 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
2051 shr ax, 8 // EAX = V:U in 8.8:8.0
2052 rol eax, 8 // EAX = V:U in 0.0:8:8
2053 and eax, 0ffffh // clear upper bits
2059 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
2061 shr ax, 8 // EAX = V:U in 8.8:8.0
2062 rol eax, 8 // EAX = V:U in 0.0:8:8
2063 and eax, 0ffffh // clear upper bits
2069 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
2071 shr ax, 8 // EAX = V:U in 8.8:8.0
2072 rol eax, 8 // EAX = V:U in 0.0:8:8
2073 and eax, 0ffffh // clear upper bits
2079 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
2081 shr ax, 8 // EAX = V:U in 8.8:8.0
2082 rol eax, 8 // EAX = V:U in 0.0:8:8
2083 and eax, 0ffffh // clear upper bits
2090 dec Tmap.InnerLooper
2095 // the fdiv is done, finish right // st0 st1 st2 st3 st4 st5 st6 st7
2096 // ZR V/ZR 1/ZR U/ZR UL VL
2098 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
2099 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
2100 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
2101 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
2103 dec Tmap.Subdivisions // decrement span count
2104 jnz SpanLoop // loop back
2107 HandleLeftoverPixels:
2109 // edi = dest dib bits
2110 // esi = current texture dib bits
2111 // at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
2112 // inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
2114 cmp Tmap.WidthModLength,0 ; are there remaining pixels to draw?
2115 jz FPUReturn ; nope, pop the FPU and bail
2117 // convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
2119 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
2120 fmul Tmap.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
2121 fistp Tmap.UFixed ; inv. inv. inv. inv. inv. UL VL
2123 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
2124 fmul Tmap.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
2125 fistp Tmap.VFixed ; inv. inv. inv. inv. inv. UL VL
2127 dec Tmap.WidthModLength ; calc how many steps to take
2128 jz OnePixelSpan ; just one, don't do deltas'
2130 // calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
2133 // @todo rearrange things so we don't need these two instructions
2134 fstp Tmap.FloatTemp ; inv. inv. inv. inv. UL VL
2135 fstp Tmap.FloatTemp ; inv. inv. inv. UL VL
2137 fld Tmap.r.v ; V/Zr inv. inv. inv. UL VL
2138 fsub Tmap.deltas.v ; V/ZR inv. inv. inv. UL VL
2139 fld Tmap.r.u ; U/Zr V/ZR inv. inv. inv. UL VL
2140 fsub Tmap.deltas.u ; U/ZR V/ZR inv. inv. inv. UL VL
2141 fld Tmap.r.sw ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
2142 fsub Tmap.deltas.sw ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
2144 fdivr Tmap.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
2146 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
2147 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
2149 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
2151 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
2152 fxch st(1) ; VR UR inv. inv. inv. dU VL
2153 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
2154 fxch st(6) ; dV UR inv. inv. inv. dU VR
2156 fidiv Tmap.WidthModLength ; dv UR inv. inv. inv. dU VR
2157 fmul Tmap.FixedScale ; dv16 UR inv. inv. inv. dU VR
2158 fistp Tmap.DeltaV ; UR inv. inv. inv. dU VR
2160 fxch st(4) ; dU inv. inv. inv. UR VR
2161 fidiv Tmap.WidthModLength ; du inv. inv. inv. UR VR
2162 fmul Tmap.FixedScale ; du16 inv. inv. inv. UR VR
2163 fistp Tmap.DeltaU ; inv. inv. inv. UR VR
2165 // @todo gross! these are to line up with the other loop
2166 fld st(1) ; inv. inv. inv. inv. UR VR
2167 fld st(2) ; inv. inv. inv. inv. inv. UR VR
2172 // Make EDX = DV:DU in 8:8,8:8 format
2173 mov eax, Tmap.DeltaV
2175 mov edx, Tmap.DeltaU
2179 // Make ECX = V:U in 8:8,8:8 format
2180 mov eax, Tmap.VFixed
2182 mov ecx, Tmap.UFixed
2186 inc Tmap.WidthModLength
2187 mov eax,Tmap.WidthModLength
2191 mov Tmap.WidthModLength, eax
2195 // ecx = V:U in 8.8:8.8
2196 // edx = zbuffer pointer
2198 // edi = screen data
2204 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
2206 shr ax, 8 // EAX = V:U in 8.8:8.0
2207 rol eax, 8 // EAX = V:U in 0.0:8:8
2208 and eax, 0ffffh // clear upper bits
2214 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
2216 shr ax, 8 // EAX = V:U in 8.8:8.0
2217 rol eax, 8 // EAX = V:U in 0.0:8:8
2218 and eax, 0ffffh // clear upper bits
2225 dec Tmap.WidthModLength
2233 mov eax, ecx // EAX = V.VF:U.UF in 8.8:8.8
2235 shr ax, 8 // EAX = V:U in 8.8:8.0
2236 rol eax, 8 // EAX = V:U in 0.0:8:8
2237 and eax, 0ffffh // clear upper bits
2244 // busy FPU registers: // st0 st1 st2 st3 st4 st5 st6 st7
2245 // xxx xxx xxx xxx xxx xxx xxx
2254 fldcw Tmap.OldFPUCW // restore the FPU