2 * Copyright (C) Volition, Inc. 1999. All rights reserved.
4 * All source code herein is the property of Volition, Inc. You may not sell
5 * or otherwise commercially exploit the source or things you created based on
10 * $Logfile: /Freespace2/code/Graphics/TmapScanline.cpp $
15 * Routines to draw one textured mapped scanline.
18 * Revision 1.2 2002/06/09 04:41:18 relnev
19 * added copyright header
21 * Revision 1.1.1.1 2002/05/03 03:28:09 root
25 * 5 12/02/98 5:47p Dave
26 * Put in interface xstr code. Converted barracks screen to new format.
28 * 4 11/30/98 5:31p Dave
29 * Fixed up Fred support for software mode.
31 * 3 11/30/98 1:07p Dave
32 * 16 bit conversion, first run.
34 * 2 10/07/98 10:53a Dave
37 * 1 10/07/98 10:49a Dave
39 * 21 4/20/98 4:44p John
40 * Fixed problems with black being xparent on model cache rneders. Made
41 * model cache key off of detail level setting and framerate.
43 * 20 4/09/98 7:58p John
44 * Cleaned up tmapper code a bit. Put NDEBUG around some ndebug stuff.
45 * Took out XPARENT flag settings in all the alpha-blended texture stuff.
47 * 19 3/22/98 2:33p John
48 * Took out fx_v/v_right. Made fx_u etc get calculated in tmapper.
50 * 18 3/10/98 4:19p John
51 * Cleaned up graphics lib. Took out most unused gr functions. Made D3D
52 * & Glide have popups and print screen. Took out all >8bpp software
53 * support. Made Fred zbuffer. Made zbuffer allocate dynamically to
54 * support Fred. Made zbuffering key off of functions rather than one
57 * 17 12/10/96 10:37a John
58 * Restructured texture mapper to remove some overhead from each scanline
59 * setup. This gave about a 30% improvement drawing trans01.pof, which is
60 * a really complex model. In the process, I cleaned up the scanline
61 * functions and separated them into different modules for each pixel
64 * 16 12/02/96 4:03p John
65 * made texture divide pipeline better. 2.5% speedup.
67 * 15 11/26/96 6:50p John
68 * Added some more hicolor primitives. Made windowed mode run as current
69 * bpp, if bpp is 8,16,or 32.
71 * 14 11/18/96 9:58a John
74 * 13 11/07/96 6:19p John
75 * Added a bunch of 16bpp primitives so the game sort of runs in 16bpp
78 * 12 11/07/96 3:49p John
79 * Fixed some old 'c' inner loop code for testing.
81 * 11 11/07/96 2:17p John
82 * Took out the OldTmapper stuff.
84 * 10 11/05/96 4:05p John
85 * Added roller. Added code to draw a distant planet. Made bm_load
86 * return -1 if invalid bitmap.
88 * 9 10/31/96 7:20p John
89 * Added per,tiled tmapper. Made models tile if they use 64x64 textures.
91 * 8 10/26/96 1:40p John
92 * Added some now primitives to the 2d library and
93 * cleaned up some old ones.
100 #include "grinternal.h"
102 #include "tmapscanline.h"
103 #include "floating.h"
108 // Needed to keep warning 4725 to stay away. See PsTypes.h for details why.
109 void disable_warning_4725_stub_ts32()
114 extern void tmapscan_pln8_tiled_256x256();
115 extern void tmapscan_pln8_tiled_128x128();
116 extern void tmapscan_pln8_tiled_64x64();
117 extern void tmapscan_pln8_tiled_32x32();
118 extern void tmapscan_pln8_tiled_16x16();
121 void tmapscan_pln8_tiled()
123 if ( (Tmap.bp->w == 256) && (Tmap.bp->h == 256) ) {
124 tmapscan_pln8_tiled_256x256();
125 } else if ( (Tmap.bp->w == 128) && (Tmap.bp->h == 128) ) {
126 tmapscan_pln8_tiled_128x128();
127 } else if ( (Tmap.bp->w == 64) && (Tmap.bp->h == 64) ) {
128 tmapscan_pln8_tiled_64x64();
129 } else if ( (Tmap.bp->w == 32) && (Tmap.bp->h == 32) ) {
130 tmapscan_pln8_tiled_32x32();
131 } else if ( (Tmap.bp->w == 16) && (Tmap.bp->h == 16) ) {
132 tmapscan_pln8_tiled_16x16();
134 // argh! write another texure mapper!
140 void tmapscan_write_z()
146 dptr = (ubyte *)Tmap.dest_row_data;
151 uint *zbuf = (uint *)&gr_zbuffer[(uint)dptr-(uint)Tmap.pScreenBits];
153 for (i=0; i<Tmap.loop_count; i++ ) {
161 void tmapscan_flat_gouraud_zbuffered()
168 dptr = (ubyte *)Tmap.dest_row_data;
169 c = gr_screen.current_color.raw8;
177 uint *zbuf = (uint *)&gr_zbuffer[(uint)dptr-(uint)Tmap.pScreenBits];
179 for (i=0; i<Tmap.loop_count; i++ ) {
182 *dptr = gr_fade_table[(f2i(l)<<8)+c];
191 // ADAM: Change Nebula colors here:
192 #define NEBULA_COLORS 20
194 void tmapscan_nebula8()
199 dptr = (ubyte *)Tmap.dest_row_data;
201 float max_neb_color = i2fl(NEBULA_COLORS-1);
203 l1 = (int)(Tmap.l.b*max_neb_color*256.0f);
204 l2 = l1 + 256/2; // dithering
205 dldx = (int)(Tmap.deltas.b*max_neb_color*2.0f*256.0f);
207 #ifdef USE_INLINE_ASM
208 // memset( dptr, 31, Tmap.loop_count );
228 _asm mov ecx, Tmap.loop_count
259 if ( Tmap.loop_count > 1 ) {
260 for (i=0; i<Tmap.loop_count/2; i++ ) {
261 dptr[0] = (ubyte)((l1&0xFF00)>>8);
263 dptr[1] = (ubyte)((l2&0xFF00)>>8);
268 if ( Tmap.loop_count & 1 ) {
269 dptr[0] = (ubyte)((l1&0xFF00)>>8);
276 void tmapscan_flat_gouraud()
279 switch(gr_zbuffering_mode) {
282 case GR_ZBUFF_FULL: // both
283 tmapscan_flat_gouraud_zbuffered();
285 case GR_ZBUFF_WRITE: // write only
286 tmapscan_flat_gouraud_zbuffered();
288 case GR_ZBUFF_READ: // read only
289 tmapscan_flat_gouraud_zbuffered();
295 if ( Current_alphacolor ) {
296 ubyte *lookup = &Current_alphacolor->table.lookup[0][0];
302 dptr = (ubyte *)Tmap.dest_row_data;
307 for (i=0; i<Tmap.loop_count; i++ ) {
308 *dptr = lookup[f2i(l*16)*256+*dptr];
319 dptr = (ubyte *)Tmap.dest_row_data;
320 c = gr_screen.current_color.raw8;
325 for (i=0; i<Tmap.loop_count; i++ ) {
326 *dptr = gr_fade_table[f2i(l*32)*256+c];
333 void tmapscan_flat8_zbuffered()
338 dptr = (ubyte *)Tmap.dest_row_data;
339 c = gr_screen.current_color.raw8;
342 for (i=0; i<Tmap.loop_count; i++ ) {
343 int tmp = (uint)dptr-Tmap.pScreenBits;
344 if ( Tmap.fx_w > (int)gr_zbuffer[tmp] ) {
345 gr_zbuffer[tmp] = Tmap.fx_w;
348 Tmap.fx_w += Tmap.fx_dwdx;
353 void tmapscan_flat8()
356 switch(gr_zbuffering_mode) {
359 case GR_ZBUFF_FULL: // both
360 tmapscan_flat8_zbuffered();
362 case GR_ZBUFF_WRITE: // write only
365 case GR_ZBUFF_READ: // read only
366 tmapscan_flat8_zbuffered();
371 memset( (ubyte *)Tmap.dest_row_data, gr_screen.current_color.raw8, Tmap.loop_count );
374 void tmapscan_pln8_zbuffered();
377 void tmapscan_pln8_ppro()
392 // Put the FPU in low precision mode
393 fstcw Tmap.OldFPUCW // store copy of CW
394 mov ax,Tmap.OldFPUCW // get it in ax
396 mov Tmap.FPUCW,ax // store it
397 fldcw Tmap.FPUCW // load the FPU
399 mov ecx, Tmap.loop_count // ecx = width
400 mov edi, Tmap.dest_row_data // edi = dest pointer
402 // edi = pointer to start pixel in dest dib
405 mov eax,ecx // eax and ecx = width
406 shr ecx,5 // ecx = width / subdivision length
407 and eax,31 // eax = width mod subdivision length
408 jnz some_left_over // any leftover?
409 dec ecx // no, so special case last span
410 mov eax,32 // it's 8 pixels long
412 mov Tmap.Subdivisions,ecx // store widths
413 mov Tmap.WidthModLength,eax
415 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
416 // st0 st1 st2 st3 st4 st5 st6 st7
418 fld Tmap.l.u // U/ZL V/ZL
419 fld Tmap.l.sw // 1/ZL U/ZL V/ZL
420 fld1 // 1 1/ZL U/ZL V/ZL
421 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
422 fld st // ZL ZL 1/ZL U/ZL V/ZL
423 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
424 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
425 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
427 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
428 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
430 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
432 fadd Tmap.fl_dwdx_wide // 1/ZR U/ZL V/ZL UL VL
433 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
434 fadd Tmap.fl_dudx_wide // U/ZR 1/ZR V/ZL UL VL
435 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
436 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZR U/ZR UL VL
438 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
440 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
441 // @todo overlap this guy
442 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
443 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
444 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
445 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
446 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
448 cmp ecx,0 // check for any full spans
449 jle HandleLeftoverPixels
453 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
454 // UR VR V/ZR 1/ZR U/ZR UL VL
456 // convert left side coords
458 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
459 fmul Tmap.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
460 fistp Tmap.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
462 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
463 fmul Tmap.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
464 fistp Tmap.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
466 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
468 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
469 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
470 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
471 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
473 fmul Tmap.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
474 fistp Tmap.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
476 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
477 fmul Tmap.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
478 fistp Tmap.DeltaU ; V/ZR 1/ZR U/ZR UR VR
480 // increment terms for next span // st0 st1 st2 st3 st4 st5 st6 st7
481 // Right terms become Left terms--->// V/ZL 1/ZL U/ZL UL VL
483 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZL U/ZL UL VL
484 fxch st(1) // 1/ZL V/ZR U/ZL UL VL
485 fadd Tmap.fl_dwdx_wide // 1/ZR V/ZR U/ZL UL VL
486 fxch st(2) // U/ZL V/ZR 1/ZR UL VL
487 fadd Tmap.fl_dudx_wide // U/ZR V/ZR 1/ZR UL VL
488 fxch st(2) // 1/ZR V/ZR U/ZR UL VL
489 fxch st(1) // V/ZR 1/ZR U/ZR UL VL
492 // setup delta values
494 mov eax,Tmap.DeltaV // get v 16.16 step
495 mov ebx,eax // copy it
496 sar eax,16 // get v int step
497 shl ebx,16 // get v frac step
498 mov Tmap.DeltaVFrac,ebx // store it
499 imul eax,Tmap.src_offset // calculate texture step for v int step
501 mov ebx,Tmap.DeltaU // get u 16.16 step
502 mov ecx,ebx // copy it
503 sar ebx,16 // get u int step
504 shl ecx,16 // get u frac step
505 mov Tmap.DeltaUFrac,ecx // store it
506 add eax,ebx // calculate uint + vint step
507 mov Tmap.uv_delta[4],eax // save whole step in non-v-carry slot
508 add eax,Tmap.src_offset // calculate whole step + v carry
509 mov Tmap.uv_delta[0],eax // save in v-carry slot
511 // setup initial coordinates
512 mov esi,Tmap.UFixed // get u 16.16 fixedpoint coordinate
514 mov ebx,esi // copy it
515 sar esi,16 // get integer part
516 shl ebx,16 // get fractional part
518 mov ecx,Tmap.VFixed // get v 16.16 fixedpoint coordinate
520 mov edx,ecx // copy it
521 sar edx,16 // get integer part
522 shl ecx,16 // get fractional part
523 imul edx,Tmap.src_offset // calc texture scanline address
524 add esi,edx // calc texture offset
525 add esi,Tmap.pixptr // calc address
527 // set up affine registers
528 mov edx,Tmap.DeltaUFrac // get register copy
534 mov ebp, Tmap.fx_dl_dx
545 // calculate right side coords st0 st1 st2 st3 st4 st5 st6 st7
546 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
547 // This divide should happen while the pixel span is drawn.
548 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
552 // edi = dest dib bits at current pixel
553 // esi = texture pointer at current u,v
555 // ebx = u fraction 0.32
556 // ecx = v fraction 0.32
558 // ebp = v carry scratch
561 mov al,[edi] // preread the destination cache line
563 mov al,[esi] // get texture pixel 0
565 mov Tmap.InnerLooper, 32/4 // Set up loop counter
571 mov eax, DWORD PTR gr_fade_table[eax+ebx] // Get shaded pixel
573 add ecx,Tmap.DeltaVFrac // increment v fraction
574 sbb ebp,ebp // get -1 if carry
575 mov [edi+0],al // store pixel
577 mov al,[esi] // get texture pixel
578 add ebx,edx // increment u fraction
579 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
583 mov eax, DWORD PTR gr_fade_table[eax+ebx] // Get shaded pixel
585 add ecx,Tmap.DeltaVFrac // increment v fraction
586 sbb ebp,ebp // get -1 if carry
587 mov [edi+1],al // store pixel
589 mov al,[esi] // get texture pixel
590 add ebx,edx // increment u fraction
591 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
595 mov eax, DWORD PTR gr_fade_table[eax+ebx] // Get shaded pixel
597 add ecx,Tmap.DeltaVFrac // increment v fraction
598 sbb ebp,ebp // get -1 if carry
599 mov [edi+2],al // store pixel
601 mov al,[esi] // get texture pixel
602 add ebx,edx // increment u fraction
603 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
607 mov eax, DWORD PTR gr_fade_table[eax+ebx] // Get shaded pixel
609 add ecx,Tmap.DeltaVFrac // increment v fraction
610 sbb ebp,ebp // get -1 if carry
611 mov [edi+3],al // store pixel
613 mov al,[esi] // get texture pixel
614 add ebx,edx // increment u fraction
615 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
621 // the fdiv is done, finish right // st0 st1 st2 st3 st4 st5 st6 st7
622 // ZR V/ZR 1/ZR U/ZR UL VL
624 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
625 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
626 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
627 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
629 dec Tmap.Subdivisions // decrement span count
630 jnz SpanLoop // loop back
633 HandleLeftoverPixels:
635 mov esi,Tmap.pixptr // load texture pointer
637 // edi = dest dib bits
638 // esi = current texture dib bits
639 // at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
640 // inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
642 cmp Tmap.WidthModLength,0 ; are there remaining pixels to draw?
643 jz FPUReturn ; nope, pop the FPU and bail
645 // convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
647 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
648 fmul Tmap.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
649 fistp Tmap.UFixed ; inv. inv. inv. inv. inv. UL VL
651 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
652 fmul Tmap.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
653 fistp Tmap.VFixed ; inv. inv. inv. inv. inv. UL VL
655 dec Tmap.WidthModLength ; calc how many steps to take
656 jz OnePixelSpan ; just one, do not do deltas
658 // calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
661 // @todo rearrange things so we don't need these two instructions
662 fstp Tmap.FloatTemp ; inv. inv. inv. inv. UL VL
663 fstp Tmap.FloatTemp ; inv. inv. inv. UL VL
665 fld Tmap.r.v ; V/Zr inv. inv. inv. UL VL
666 fsub Tmap.deltas.v ; V/ZR inv. inv. inv. UL VL
667 fld Tmap.r.u ; U/Zr V/ZR inv. inv. inv. UL VL
668 fsub Tmap.deltas.u ; U/ZR V/ZR inv. inv. inv. UL VL
669 fld Tmap.r.sw ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
670 fsub Tmap.deltas.sw ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
672 fdivr Tmap.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
674 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
675 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
677 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
679 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
680 fxch st(1) ; VR UR inv. inv. inv. dU VL
681 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
682 fxch st(6) ; dV UR inv. inv. inv. dU VR
684 fidiv Tmap.WidthModLength ; dv UR inv. inv. inv. dU VR
685 fmul Tmap.FixedScale ; dv16 UR inv. inv. inv. dU VR
686 fistp Tmap.DeltaV ; UR inv. inv. inv. dU VR
688 fxch st(4) ; dU inv. inv. inv. UR VR
689 fidiv Tmap.WidthModLength ; du inv. inv. inv. UR VR
690 fmul Tmap.FixedScale ; du16 inv. inv. inv. UR VR
691 fistp Tmap.DeltaU ; inv. inv. inv. UR VR
693 // @todo gross! these are to line up with the other loop
694 fld st(1) ; inv. inv. inv. inv. UR VR
695 fld st(2) ; inv. inv. inv. inv. inv. UR VR
698 // setup delta values
699 mov eax, Tmap.DeltaV // get v 16.16 step
700 mov ebx, eax // copy it
701 sar eax, 16 // get v int step
702 shl ebx, 16 // get v frac step
703 mov Tmap.DeltaVFrac, ebx // store it
704 imul eax, Tmap.src_offset // calc texture step for v int step
706 mov ebx, Tmap.DeltaU // get u 16.16 step
707 mov ecx, ebx // copy it
708 sar ebx, 16 // get the u int step
709 shl ecx, 16 // get the u frac step
710 mov Tmap.DeltaUFrac, ecx // store it
711 add eax, ebx // calc uint + vint step
712 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
713 add eax, Tmap.src_offset // calc whole step + v carry
714 mov Tmap.uv_delta[0], eax // save in v-carry slot
719 ; setup initial coordinates
720 mov esi, Tmap.UFixed // get u 16.16
721 mov ebx, esi // copy it
722 sar esi, 16 // get integer part
723 shl ebx, 16 // get fractional part
725 mov ecx, Tmap.VFixed // get v 16.16
726 mov edx, ecx // copy it
727 sar edx, 16 // get integer part
728 shl ecx, 16 // get fractional part
729 imul edx, Tmap.src_offset // calc texture scanline address
730 add esi, edx // calc texture offset
731 add esi, Tmap.pixptr // calc address
738 mov edx, Tmap.DeltaUFrac
740 cmp Tmap.WidthModLength, 1
745 mov ebx, Tmap.fx_l_right
751 mov eax, Tmap.fx_dl_dx
760 inc Tmap.WidthModLength
761 mov eax,Tmap.WidthModLength
765 mov Tmap.WidthModLength, eax
769 mov al,[edi] // preread the destination cache line
773 mov ah, bh // move lighting value into place
774 mov al, gr_fade_table[eax] // Get shaded pixel
775 add ecx,Tmap.DeltaVFrac // increment v fraction
776 sbb ebp,ebp // get -1 if carry
777 mov [edi+0],al // store pixel
778 mov al,[esi] // get texture pixel
779 add ebx,edx // increment u fraction
780 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
782 mov ah, bh // move lighting value into place
783 mov al, gr_fade_table[eax] // Get shaded pixel
784 add ecx,Tmap.DeltaVFrac // increment v fraction
785 sbb ebp,ebp // get -1 if carry
786 mov [edi+1],al // store pixel
787 mov al,[esi] // get texture pixel
788 add ebx,edx // increment u fraction
789 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
792 dec Tmap.WidthModLength
800 mov al,[esi] // get texture pixel 2
802 mov al, gr_fade_table[eax]
803 mov [edi],al // store pixel 2
808 // busy FPU registers: // st0 st1 st2 st3 st4 st5 st6 st7
809 // xxx xxx xxx xxx xxx xxx xxx
818 fldcw Tmap.OldFPUCW // restore the FPU
832 void tmapscan_pln8_pentium()
847 // Put the FPU in low precision mode
848 fstcw Tmap.OldFPUCW // store copy of CW
849 mov ax,Tmap.OldFPUCW // get it in ax
851 mov Tmap.FPUCW,ax // store it
852 fldcw Tmap.FPUCW // load the FPU
854 mov ecx, Tmap.loop_count // ecx = width
855 mov edi, Tmap.dest_row_data // edi = dest pointer
857 // edi = pointer to start pixel in dest dib
860 mov eax,ecx // eax and ecx = width
861 shr ecx,5 // ecx = width / subdivision length
862 and eax,31 // eax = width mod subdivision length
863 jnz some_left_over // any leftover?
864 dec ecx // no, so special case last span
865 mov eax,32 // it's 8 pixels long
867 mov Tmap.Subdivisions,ecx // store widths
868 mov Tmap.WidthModLength,eax
870 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
871 // st0 st1 st2 st3 st4 st5 st6 st7
873 fld Tmap.l.u // U/ZL V/ZL
874 fld Tmap.l.sw // 1/ZL U/ZL V/ZL
875 fld1 // 1 1/ZL U/ZL V/ZL
876 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
877 fld st // ZL ZL 1/ZL U/ZL V/ZL
878 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
879 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
880 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
882 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
883 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
885 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
887 fadd Tmap.fl_dwdx_wide // 1/ZR U/ZL V/ZL UL VL
888 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
889 fadd Tmap.fl_dudx_wide // U/ZR 1/ZR V/ZL UL VL
890 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
891 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZR U/ZR UL VL
893 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
895 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
896 // @todo overlap this guy
897 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
898 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
899 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
900 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
901 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
903 cmp ecx,0 // check for any full spans
904 jle HandleLeftoverPixels
908 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
909 // UR VR V/ZR 1/ZR U/ZR UL VL
911 // convert left side coords
913 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
914 fmul Tmap.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
915 fistp Tmap.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
917 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
918 fmul Tmap.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
919 fistp Tmap.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
921 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
923 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
924 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
925 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
926 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
928 fmul Tmap.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
929 fistp Tmap.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
931 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
932 fmul Tmap.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
933 fistp Tmap.DeltaU ; V/ZR 1/ZR U/ZR UR VR
935 // increment terms for next span // st0 st1 st2 st3 st4 st5 st6 st7
936 // Right terms become Left terms--->// V/ZL 1/ZL U/ZL UL VL
938 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZL U/ZL UL VL
939 fxch st(1) // 1/ZL V/ZR U/ZL UL VL
940 fadd Tmap.fl_dwdx_wide // 1/ZR V/ZR U/ZL UL VL
941 fxch st(2) // U/ZL V/ZR 1/ZR UL VL
942 fadd Tmap.fl_dudx_wide // U/ZR V/ZR 1/ZR UL VL
943 fxch st(2) // 1/ZR V/ZR U/ZR UL VL
944 fxch st(1) // V/ZR 1/ZR U/ZR UL VL
947 // setup delta values
949 mov eax,Tmap.DeltaV // get v 16.16 step
950 mov ebx,eax // copy it
951 sar eax,16 // get v int step
952 shl ebx,16 // get v frac step
953 mov Tmap.DeltaVFrac,ebx // store it
954 imul eax,Tmap.src_offset // calculate texture step for v int step
956 mov ebx,Tmap.DeltaU // get u 16.16 step
957 mov ecx,ebx // copy it
958 sar ebx,16 // get u int step
959 shl ecx,16 // get u frac step
960 mov Tmap.DeltaUFrac,ecx // store it
961 add eax,ebx // calculate uint + vint step
962 mov Tmap.uv_delta[4],eax // save whole step in non-v-carry slot
963 add eax,Tmap.src_offset // calculate whole step + v carry
964 mov Tmap.uv_delta[0],eax // save in v-carry slot
966 // setup initial coordinates
967 mov esi,Tmap.UFixed // get u 16.16 fixedpoint coordinate
969 mov ebx,esi // copy it
970 sar esi,16 // get integer part
971 shl ebx,16 // get fractional part
973 mov ecx,Tmap.VFixed // get v 16.16 fixedpoint coordinate
975 mov edx,ecx // copy it
976 sar edx,16 // get integer part
977 shl ecx,16 // get fractional part
978 imul edx,Tmap.src_offset // calc texture scanline address
979 add esi,edx // calc texture offset
980 add esi,Tmap.pixptr // calc address
982 // set up affine registers
983 mov edx,Tmap.DeltaUFrac // get register copy
989 mov ebp, Tmap.fx_dl_dx
1000 // calculate right side coords st0 st1 st2 st3 st4 st5 st6 st7
1001 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
1002 // This divide should happen while the pixel span is drawn.
1003 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
1006 // 8 pixel span code
1007 // edi = dest dib bits at current pixel
1008 // esi = texture pointer at current u,v
1010 // ebx = u fraction 0.32
1011 // ecx = v fraction 0.32
1012 // edx = u frac step
1013 // ebp = v carry scratch
1016 mov al,[edi] // preread the destination cache line
1018 mov al,[esi] // get texture pixel 0
1020 mov Tmap.InnerLooper, 32/4 // Set up loop counter
1024 mov ah, bh // move lighting value into place
1025 mov al, gr_fade_table[eax] // Get shaded pixel
1028 add ecx,Tmap.DeltaVFrac // increment v fraction
1029 sbb ebp,ebp // get -1 if carry
1030 mov [edi+0],al // store pixel
1032 mov al,[esi] // get texture pixel
1033 add ebx,edx // increment u fraction
1034 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
1036 mov ah, bh // move lighting value into place
1037 mov al, gr_fade_table[eax] // Get shaded pixel
1040 add ecx,Tmap.DeltaVFrac // increment v fraction
1041 sbb ebp,ebp // get -1 if carry
1042 mov [edi+1],al // store pixel
1044 mov al,[esi] // get texture pixel
1045 add ebx,edx // increment u fraction
1046 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
1048 mov ah, bh // move lighting value into place
1049 mov al, gr_fade_table[eax] // Get shaded pixel
1052 add ecx,Tmap.DeltaVFrac // increment v fraction
1053 sbb ebp,ebp // get -1 if carry
1054 mov [edi+2],al // store pixel
1056 mov al,[esi] // get texture pixel
1057 add ebx,edx // increment u fraction
1058 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
1060 mov ah, bh // move lighting value into place
1061 mov al, gr_fade_table[eax] // Get shaded pixel
1064 add ecx,Tmap.DeltaVFrac // increment v fraction
1065 sbb ebp,ebp // get -1 if carry
1066 mov [edi+3],al // store pixel
1068 mov al,[esi] // get texture pixel
1069 add ebx,edx // increment u fraction
1070 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
1073 dec Tmap.InnerLooper
1076 // the fdiv is done, finish right // st0 st1 st2 st3 st4 st5 st6 st7
1077 // ZR V/ZR 1/ZR U/ZR UL VL
1079 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
1080 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
1081 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
1082 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
1084 dec Tmap.Subdivisions // decrement span count
1085 jnz SpanLoop // loop back
1088 HandleLeftoverPixels:
1090 mov esi,Tmap.pixptr // load texture pointer
1092 // edi = dest dib bits
1093 // esi = current texture dib bits
1094 // at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
1095 // inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
1097 cmp Tmap.WidthModLength,0 ; are there remaining pixels to draw?
1098 jz FPUReturn ; nope, pop the FPU and bail
1100 // convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
1102 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
1103 fmul Tmap.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
1104 fistp Tmap.UFixed ; inv. inv. inv. inv. inv. UL VL
1106 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
1107 fmul Tmap.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
1108 fistp Tmap.VFixed ; inv. inv. inv. inv. inv. UL VL
1110 dec Tmap.WidthModLength ; calc how many steps to take
1111 jz OnePixelSpan ; just one, do not do deltas
1113 // calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
1116 // @todo rearrange things so we don't need these two instructions
1117 fstp Tmap.FloatTemp ; inv. inv. inv. inv. UL VL
1118 fstp Tmap.FloatTemp ; inv. inv. inv. UL VL
1120 fld Tmap.r.v ; V/Zr inv. inv. inv. UL VL
1121 fsub Tmap.deltas.v ; V/ZR inv. inv. inv. UL VL
1122 fld Tmap.r.u ; U/Zr V/ZR inv. inv. inv. UL VL
1123 fsub Tmap.deltas.u ; U/ZR V/ZR inv. inv. inv. UL VL
1124 fld Tmap.r.sw ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
1125 fsub Tmap.deltas.sw ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
1127 fdivr Tmap.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
1129 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
1130 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
1132 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
1134 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
1135 fxch st(1) ; VR UR inv. inv. inv. dU VL
1136 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
1137 fxch st(6) ; dV UR inv. inv. inv. dU VR
1139 fidiv Tmap.WidthModLength ; dv UR inv. inv. inv. dU VR
1140 fmul Tmap.FixedScale ; dv16 UR inv. inv. inv. dU VR
1141 fistp Tmap.DeltaV ; UR inv. inv. inv. dU VR
1143 fxch st(4) ; dU inv. inv. inv. UR VR
1144 fidiv Tmap.WidthModLength ; du inv. inv. inv. UR VR
1145 fmul Tmap.FixedScale ; du16 inv. inv. inv. UR VR
1146 fistp Tmap.DeltaU ; inv. inv. inv. UR VR
1148 // @todo gross! these are to line up with the other loop
1149 fld st(1) ; inv. inv. inv. inv. UR VR
1150 fld st(2) ; inv. inv. inv. inv. inv. UR VR
1153 // setup delta values
1154 mov eax, Tmap.DeltaV // get v 16.16 step
1155 mov ebx, eax // copy it
1156 sar eax, 16 // get v int step
1157 shl ebx, 16 // get v frac step
1158 mov Tmap.DeltaVFrac, ebx // store it
1159 imul eax, Tmap.src_offset // calc texture step for v int step
1161 mov ebx, Tmap.DeltaU // get u 16.16 step
1162 mov ecx, ebx // copy it
1163 sar ebx, 16 // get the u int step
1164 shl ecx, 16 // get the u frac step
1165 mov Tmap.DeltaUFrac, ecx // store it
1166 add eax, ebx // calc uint + vint step
1167 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
1168 add eax, Tmap.src_offset // calc whole step + v carry
1169 mov Tmap.uv_delta[0], eax // save in v-carry slot
1174 ; setup initial coordinates
1175 mov esi, Tmap.UFixed // get u 16.16
1176 mov ebx, esi // copy it
1177 sar esi, 16 // get integer part
1178 shl ebx, 16 // get fractional part
1180 mov ecx, Tmap.VFixed // get v 16.16
1181 mov edx, ecx // copy it
1182 sar edx, 16 // get integer part
1183 shl ecx, 16 // get fractional part
1184 imul edx, Tmap.src_offset // calc texture scanline address
1185 add esi, edx // calc texture offset
1186 add esi, Tmap.pixptr // calc address
1193 mov edx, Tmap.DeltaUFrac
1195 cmp Tmap.WidthModLength, 1
1200 mov ebx, Tmap.fx_l_right
1206 mov eax, Tmap.fx_dl_dx
1215 inc Tmap.WidthModLength
1216 mov eax,Tmap.WidthModLength
1220 mov Tmap.WidthModLength, eax
1224 mov al,[edi] // preread the destination cache line
1228 mov ah, bh // move lighting value into place
1229 mov al, gr_fade_table[eax] // Get shaded pixel
1230 add ecx,Tmap.DeltaVFrac // increment v fraction
1231 sbb ebp,ebp // get -1 if carry
1232 mov [edi+0],al // store pixel
1233 mov al,[esi] // get texture pixel
1234 add ebx,edx // increment u fraction
1235 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
1237 mov ah, bh // move lighting value into place
1238 mov al, gr_fade_table[eax] // Get shaded pixel
1239 add ecx,Tmap.DeltaVFrac // increment v fraction
1240 sbb ebp,ebp // get -1 if carry
1241 mov [edi+1],al // store pixel
1242 mov al,[esi] // get texture pixel
1243 add ebx,edx // increment u fraction
1244 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
1247 dec Tmap.WidthModLength
1255 mov al,[esi] // get texture pixel 2
1257 mov al, gr_fade_table[eax]
1258 mov [edi],al // store pixel 2
1263 // busy FPU registers: // st0 st1 st2 st3 st4 st5 st6 st7
1264 // xxx xxx xxx xxx xxx xxx xxx
1273 fldcw Tmap.OldFPUCW // restore the FPU
1289 void tmapscan_pln8()
1291 if (gr_zbuffering) {
1292 switch(gr_zbuffering_mode) {
1295 case GR_ZBUFF_FULL: // both
1296 tmapscan_pln8_zbuffered();
1298 case GR_ZBUFF_WRITE: // write only
1301 case GR_ZBUFF_READ: // read only
1302 tmapscan_pln8_zbuffered();
1309 tmapscan_pln8_ppro();
1311 tmapscan_pln8_pentium();
1316 void tmapscan_lln8()
1330 ; setup delta values
1331 mov eax, Tmap.fx_dv_dx // get v 16.16 step
1332 mov ebx, eax // copy it
1333 sar eax, 16 // get v int step
1334 shl ebx, 16 // get v frac step
1335 mov Tmap.DeltaVFrac, ebx // store it
1336 imul eax, Tmap.src_offset // calc texture step for v int step
1338 mov ebx, Tmap.fx_du_dx // get u 16.16 step
1339 mov ecx, ebx // copy it
1340 sar ebx, 16 // get the u int step
1341 shl ecx, 16 // get the u frac step
1342 mov Tmap.DeltaUFrac, ecx // store it
1343 add eax, ebx // calc uint + vint step
1345 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
1346 add eax, Tmap.src_offset // calc whole step + v carry
1347 mov Tmap.uv_delta[0], eax // save in v-carry slot
1349 ; setup initial coordinates
1350 mov esi, Tmap.fx_u // get u 16.16
1351 mov ebx, esi // copy it
1352 sar esi, 16 // get integer part
1353 shl ebx, 16 // get fractional part
1355 mov ecx, Tmap.fx_v // get v 16.16
1356 mov edx, ecx // copy it
1357 sar edx, 16 // get integer part
1358 shl ecx, 16 // get fractional part
1359 imul edx, Tmap.src_offset // calc texture scanline address
1360 add esi, edx // calc texture offset
1361 add esi, Tmap.pixptr // calc address
1363 ; set edi = address of first pixel to modify
1364 mov edi, Tmap.dest_row_data
1366 mov edx, Tmap.DeltaUFrac
1368 mov eax, Tmap.loop_count
1372 mov Tmap.num_big_steps, eax
1373 and Tmap.loop_count, 3
1375 mov al,[edi] // preread the destination cache line
1376 mov al,[esi] // get texture pixel 0
1385 mov ebp, Tmap.fx_dl_dx
1398 // 8 pixel span code
1399 // edi = dest dib bits at current pixel
1400 // esi = texture pointer at current u,v
1402 // ebx = u fraction 0.32
1403 // ecx = v fraction 0.32
1404 // edx = u frac step
1405 // ebp = v carry scratch
1407 mov ah, bh // move lighting value into place
1408 mov al, gr_fade_table[eax] // Get shaded pixel
1409 add ecx,Tmap.DeltaVFrac // increment v fraction
1410 sbb ebp,ebp // get -1 if carry
1411 mov [edi+0],al // store pixel
1412 mov al,[esi] // get texture pixel
1413 add ebx,edx // increment u fraction
1414 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
1416 mov ah, bh // move lighting value into place
1417 mov al, gr_fade_table[eax] // Get shaded pixel
1418 add ecx,Tmap.DeltaVFrac // increment v fraction
1419 sbb ebp,ebp // get -1 if carry
1420 mov [edi+1],al // store pixel
1421 mov al,[esi] // get texture pixel
1422 add ebx,edx // increment u fraction
1423 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
1425 mov ah, bh // move lighting value into place
1426 mov al, gr_fade_table[eax] // Get shaded pixel
1427 add ecx,Tmap.DeltaVFrac // increment v fraction
1428 sbb ebp,ebp // get -1 if carry
1429 mov [edi+2],al // store pixel
1430 mov al,[esi] // get texture pixel
1431 add ebx,edx // increment u fraction
1432 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
1434 mov ah, bh // move lighting value into place
1435 mov al, gr_fade_table[eax] // Get shaded pixel
1436 add ecx,Tmap.DeltaVFrac // increment v fraction
1437 sbb ebp,ebp // get -1 if carry
1438 mov [edi+3],al // store pixel
1439 mov al,[esi] // get texture pixel
1440 add ebx,edx // increment u fraction
1441 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
1444 dec Tmap.num_big_steps
1454 mov ebp, Tmap.fx_dl_dx
1458 mov eax,Tmap.loop_count
1463 mov Tmap.loop_count, eax
1468 mov al, [edi] // preread the destination cache line
1469 mov al, [esi] // Get first texel
1473 mov ah, bh // move lighting value into place
1474 mov al, gr_fade_table[eax] // Get shaded pixel
1475 add ecx,Tmap.DeltaVFrac // increment v fraction
1476 sbb ebp,ebp // get -1 if carry
1477 mov [edi+0],al // store pixel
1478 mov al,[esi] // get texture pixel
1479 add ebx,edx // increment u fraction
1480 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
1482 mov ah, bh // move lighting value into place
1483 mov al, gr_fade_table[eax] // Get shaded pixel
1484 add ecx,Tmap.DeltaVFrac // increment v fraction
1485 sbb ebp,ebp // get -1 if carry
1486 mov [edi+1],al // store pixel
1487 mov al,[esi] // get texture pixel
1488 add ebx,edx // increment u fraction
1489 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
1500 mov al, [esi] // Get first texel
1502 mov al, gr_fade_table[eax]
1503 mov [edi],al // store pixel 2
1518 void tmapscan_lna8_zbuffered_ppro()
1532 ; setup delta values
1533 mov eax, Tmap.fx_dv_dx // get v 16.16 step
1534 mov ebx, eax // copy it
1535 sar eax, 16 // get v int step
1536 shl ebx, 16 // get v frac step
1537 mov Tmap.DeltaVFrac, ebx // store it
1538 imul eax, Tmap.src_offset // calc texture step for v int step
1540 mov ebx, Tmap.fx_du_dx // get u 16.16 step
1541 mov ecx, ebx // copy it
1542 sar ebx, 16 // get the u int step
1543 shl ecx, 16 // get the u frac step
1544 mov Tmap.DeltaUFrac, ecx // store it
1545 add eax, ebx // calc uint + vint step
1547 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
1548 add eax, Tmap.src_offset // calc whole step + v carry
1549 mov Tmap.uv_delta[0], eax // save in v-carry slot
1551 ; setup initial coordinates
1552 mov esi, Tmap.fx_u // get u 16.16
1553 mov ebx, esi // copy it
1554 sar esi, 16 // get integer part
1555 shl ebx, 16 // get fractional part
1557 mov ecx, Tmap.fx_v // get v 16.16
1558 mov edx, ecx // copy it
1559 sar edx, 16 // get integer part
1560 shl ecx, 16 // get fractional part
1561 imul edx, Tmap.src_offset // calc texture scanline address
1562 add esi, edx // calc texture offset
1563 add esi, Tmap.pixptr // calc address
1565 ; set edi = address of first pixel to modify
1566 mov edi, Tmap.dest_row_data
1569 mov al,[edi] // get the destination pixel
1574 sub eax, Tmap.pScreenBits
1578 mov eax, Tmap.loop_count
1583 mov Tmap.num_big_steps, eax
1584 and Tmap.loop_count, 3
1588 // 8 pixel span code
1590 // ebx = u fraction 0.32
1591 // ecx = v fraction 0.32
1592 // edx = zbuffer pointer
1593 // edi = dest dib bits at current pixel
1594 // esi = texture pointer at current u,v
1598 cmp ebp, [edx+4*0] // Compare the Z depth of this pixel with zbuffer
1599 jle Skip0a // If pixel is covered, skip drawing
1600 // mov [edx+4*0], ebp // Write new Z value
1602 // Get pixel and blend it
1606 xor eax, eax // Clear all bits of EAX. This avoids a partial register stall on Pentium Pros
1607 mov ah, [esi] // Get texel into AL
1608 add eax, Tmap.BlendLookup
1609 mov eax, [eax+ebx] // Lookup pixel in lighting table
1612 mov [edi+0],al // store pixel
1614 add ebp,Tmap.fx_dwdx // increment z value
1615 add ecx,Tmap.DeltaVFrac // increment v fraction
1616 sbb eax,eax // get -1 if carry
1617 add ebx,Tmap.DeltaUFrac // increment u fraction
1618 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
1621 cmp ebp, [edx+4*1] // Compare the Z depth of this pixel with zbuffer
1622 jle Skip1a // If pixel is covered, skip drawing
1623 // mov [edx+4*1], ebp // Write new Z value
1625 // Get pixel and blend it
1629 xor eax, eax // Clear all bits of EAX. This avoids a partial register stall on Pentium Pros
1630 mov ah, [esi] // Get texel into AL
1631 add eax, Tmap.BlendLookup
1632 mov eax, [eax+ebx] // Lookup pixel in lighting table
1635 mov [edi+1],al // store pixel
1637 add ebp, Tmap.fx_dwdx
1638 add ecx,Tmap.DeltaVFrac // increment v fraction
1639 sbb eax,eax // get -1 if carry
1640 add ebx,Tmap.DeltaUFrac // increment u fraction
1641 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
1643 cmp ebp, [edx+4*2] // Compare the Z depth of this pixel with zbuffer
1644 jle Skip2a // If pixel is covered, skip drawing
1645 // mov [edx+4*2], ebp // Write new Z value
1650 xor eax, eax // Clear all bits of EAX. This avoids a partial register stall on Pentium Pros
1651 mov ah, [esi] // Get texel into AL
1652 add eax, Tmap.BlendLookup
1653 mov eax, [eax+ebx] // Lookup pixel in lighting table
1656 mov [edi+2],al // store pixel
1658 add ebp, Tmap.fx_dwdx
1659 add ecx,Tmap.DeltaVFrac // increment v fraction
1660 sbb eax,eax // get -1 if carry
1661 add ebx,Tmap.DeltaUFrac // increment u fraction
1662 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
1664 cmp ebp, [edx+4*3] // Compare the Z depth of this pixel with zbuffer
1665 jle Skip3a // If pixel is covered, skip drawing
1666 // mov [edx+4*3], ebp // Write new Z value
1671 xor eax, eax // Clear all bits of EAX. This avoids a partial register stall on Pentium Pros
1672 mov ah, [esi] // Get texel into AL
1673 add eax, Tmap.BlendLookup
1674 mov eax, [eax+ebx] // Lookup pixel in lighting table
1677 mov [edi+3],al // store pixel
1679 add ebp, Tmap.fx_dwdx
1680 add ecx,Tmap.DeltaVFrac // increment v fraction
1681 sbb eax,eax // get -1 if carry
1682 add ebx,Tmap.DeltaUFrac // increment u fraction
1683 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
1687 dec Tmap.num_big_steps
1693 mov eax,Tmap.loop_count
1698 mov Tmap.loop_count, eax
1702 mov al,[edi] // get the destination pixel
1706 cmp ebp, [edx+4*0] // Compare the Z depth of this pixel with zbuffer
1707 jle Skip0b // If pixel is covered, skip drawing
1708 // mov [edx+4*0], ebp // Write new Z value
1709 mov al,[edi+0] // get the destination pixel
1710 mov ah,[esi] // get texture pixel 0
1712 add eax, Tmap.BlendLookup
1713 mov al, [eax] // blend them
1714 mov [edi+0],al // store pixel
1716 add ebp, Tmap.fx_dwdx
1717 add ecx,Tmap.DeltaVFrac // increment v fraction
1718 sbb eax,eax // get -1 if carry
1719 add ebx,Tmap.DeltaUFrac // increment u fraction
1720 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
1722 cmp ebp, [edx+4*1] // Compare the Z depth of this pixel with zbuffer
1723 jle Skip1b // If pixel is covered, skip drawing
1724 // mov [edx+4*1], ebp // Write new Z value
1725 mov al,[edi+1] // get the destination pixel
1726 mov ah,[esi] // get texture pixel 0
1728 add eax, Tmap.BlendLookup
1729 mov al, [eax] // blend them
1730 mov [edi+1],al // store pixel
1732 add ebp, Tmap.fx_dwdx
1733 add ecx,Tmap.DeltaVFrac // increment v fraction
1734 sbb eax,eax // get -1 if carry
1735 add ebx,Tmap.DeltaUFrac // increment u fraction
1736 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
1747 cmp ebp, [edx] // Compare the Z depth of this pixel with zbuffer
1748 jle Skip0c // If pixel is covered, skip drawing
1749 // mov [edx], ebp // Write new Z value
1750 mov al,[edi] // get the destination pixel
1751 mov ah,[esi] // get texture pixel 0
1753 add eax, Tmap.BlendLookup
1754 mov al, [eax] // blend them
1755 mov [edi],al // store pixel
1770 void tmapscan_lna8_zbuffered_pentium()
1784 ; setup delta values
1785 mov eax, Tmap.fx_dv_dx // get v 16.16 step
1786 mov ebx, eax // copy it
1787 sar eax, 16 // get v int step
1788 shl ebx, 16 // get v frac step
1789 mov Tmap.DeltaVFrac, ebx // store it
1790 imul eax, Tmap.src_offset // calc texture step for v int step
1792 mov ebx, Tmap.fx_du_dx // get u 16.16 step
1793 mov ecx, ebx // copy it
1794 sar ebx, 16 // get the u int step
1795 shl ecx, 16 // get the u frac step
1796 mov Tmap.DeltaUFrac, ecx // store it
1797 add eax, ebx // calc uint + vint step
1799 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
1800 add eax, Tmap.src_offset // calc whole step + v carry
1801 mov Tmap.uv_delta[0], eax // save in v-carry slot
1803 ; setup initial coordinates
1804 mov esi, Tmap.fx_u // get u 16.16
1805 mov ebx, esi // copy it
1806 sar esi, 16 // get integer part
1807 shl ebx, 16 // get fractional part
1809 mov ecx, Tmap.fx_v // get v 16.16
1810 mov edx, ecx // copy it
1811 sar edx, 16 // get integer part
1812 shl ecx, 16 // get fractional part
1813 imul edx, Tmap.src_offset // calc texture scanline address
1814 add esi, edx // calc texture offset
1815 add esi, Tmap.pixptr // calc address
1817 ; set edi = address of first pixel to modify
1818 mov edi, Tmap.dest_row_data
1821 mov al,[edi] // get the destination pixel
1826 sub eax, Tmap.pScreenBits
1830 mov eax, Tmap.loop_count
1835 mov Tmap.num_big_steps, eax
1836 and Tmap.loop_count, 3
1840 // 8 pixel span code
1842 // ebx = u fraction 0.32
1843 // ecx = v fraction 0.32
1844 // edx = zbuffer pointer
1845 // edi = dest dib bits at current pixel
1846 // esi = texture pointer at current u,v
1850 cmp ebp, [edx+4*0] // Compare the Z depth of this pixel with zbuffer
1851 jle Skip0a // If pixel is covered, skip drawing
1852 // mov [edx+4*0], ebp // Write new Z value
1853 mov al,[edi+0] // get the destination pixel
1854 mov ah,[esi] // get texture pixel 0
1860 add eax, Tmap.BlendLookup
1861 mov al, [eax] // blend them
1864 mov [edi+0],al // store pixel
1866 add ebp,Tmap.fx_dwdx // increment z value
1867 add ecx,Tmap.DeltaVFrac // increment v fraction
1868 sbb eax,eax // get -1 if carry
1869 add ebx,Tmap.DeltaUFrac // increment u fraction
1870 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
1873 cmp ebp, [edx+4*1] // Compare the Z depth of this pixel with zbuffer
1874 jle Skip1a // If pixel is covered, skip drawing
1875 // mov [edx+4*1], ebp // Write new Z value
1876 mov al,[edi+1] // get the destination pixel
1877 mov ah,[esi] // get texture pixel 0
1883 add eax, Tmap.BlendLookup
1884 mov al, [eax] // blend them
1887 mov [edi+1],al // store pixel
1889 add ebp, Tmap.fx_dwdx
1890 add ecx,Tmap.DeltaVFrac // increment v fraction
1891 sbb eax,eax // get -1 if carry
1892 add ebx,Tmap.DeltaUFrac // increment u fraction
1893 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
1895 cmp ebp, [edx+4*2] // Compare the Z depth of this pixel with zbuffer
1896 jle Skip2a // If pixel is covered, skip drawing
1897 // mov [edx+4*2], ebp // Write new Z value
1898 mov al,[edi+2] // get the destination pixel
1899 mov ah,[esi] // get texture pixel 0
1904 add eax, Tmap.BlendLookup
1905 mov al, [eax] // blend them
1908 mov [edi+2],al // store pixel
1910 add ebp, Tmap.fx_dwdx
1911 add ecx,Tmap.DeltaVFrac // increment v fraction
1912 sbb eax,eax // get -1 if carry
1913 add ebx,Tmap.DeltaUFrac // increment u fraction
1914 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
1916 cmp ebp, [edx+4*3] // Compare the Z depth of this pixel with zbuffer
1917 jle Skip3a // If pixel is covered, skip drawing
1918 // mov [edx+4*3], ebp // Write new Z value
1919 mov al,[edi+3] // get the destination pixel
1920 mov ah,[esi] // get texture pixel 0
1925 add eax, Tmap.BlendLookup
1926 mov al, [eax] // blend them
1929 mov [edi+3],al // store pixel
1931 add ebp, Tmap.fx_dwdx
1932 add ecx,Tmap.DeltaVFrac // increment v fraction
1933 sbb eax,eax // get -1 if carry
1934 add ebx,Tmap.DeltaUFrac // increment u fraction
1935 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
1939 dec Tmap.num_big_steps
1945 mov eax,Tmap.loop_count
1950 mov Tmap.loop_count, eax
1954 mov al,[edi] // get the destination pixel
1958 cmp ebp, [edx+4*0] // Compare the Z depth of this pixel with zbuffer
1959 jle Skip0b // If pixel is covered, skip drawing
1960 // mov [edx+4*0], ebp // Write new Z value
1961 mov al,[edi+0] // get the destination pixel
1962 mov ah,[esi] // get texture pixel 0
1964 add eax, Tmap.BlendLookup
1965 mov al, [eax] // blend them
1966 mov [edi+0],al // store pixel
1968 add ebp, Tmap.fx_dwdx
1969 add ecx,Tmap.DeltaVFrac // increment v fraction
1970 sbb eax,eax // get -1 if carry
1971 add ebx,Tmap.DeltaUFrac // increment u fraction
1972 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
1974 cmp ebp, [edx+4*1] // Compare the Z depth of this pixel with zbuffer
1975 jle Skip1b // If pixel is covered, skip drawing
1976 // mov [edx+4*1], ebp // Write new Z value
1977 mov al,[edi+1] // get the destination pixel
1978 mov ah,[esi] // get texture pixel 0
1980 add eax, Tmap.BlendLookup
1981 mov al, [eax] // blend them
1982 mov [edi+1],al // store pixel
1984 add ebp, Tmap.fx_dwdx
1985 add ecx,Tmap.DeltaVFrac // increment v fraction
1986 sbb eax,eax // get -1 if carry
1987 add ebx,Tmap.DeltaUFrac // increment u fraction
1988 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
1999 cmp ebp, [edx] // Compare the Z depth of this pixel with zbuffer
2000 jle Skip0c // If pixel is covered, skip drawing
2001 // mov [edx], ebp // Write new Z value
2002 mov al,[edi] // get the destination pixel
2003 mov ah,[esi] // get texture pixel 0
2005 add eax, Tmap.BlendLookup
2006 mov al, [eax] // blend them
2007 mov [edi],al // store pixel
2022 void tmapscan_lna8_zbuffered()
2025 tmapscan_lna8_zbuffered_ppro();
2027 tmapscan_lna8_zbuffered_pentium();
2033 extern float Tmap_clipped_left;
2035 void tmapscan_lna8()
2037 if (gr_zbuffering) {
2038 switch(gr_zbuffering_mode) {
2041 case GR_ZBUFF_FULL: // both
2042 case GR_ZBUFF_WRITE: // write only
2043 case GR_ZBUFF_READ: // read only
2044 tmapscan_lna8_zbuffered();
2062 ; setup delta values
2063 mov eax, Tmap.fx_dv_dx // get v 16.16 step
2064 mov ebx, eax // copy it
2065 sar eax, 16 // get v int step
2066 shl ebx, 16 // get v frac step
2067 mov Tmap.DeltaVFrac, ebx // store it
2068 imul eax, Tmap.src_offset // calc texture step for v int step
2070 mov ebx, Tmap.fx_du_dx // get u 16.16 step
2071 mov ecx, ebx // copy it
2072 sar ebx, 16 // get the u int step
2073 shl ecx, 16 // get the u frac step
2074 mov Tmap.DeltaUFrac, ecx // store it
2075 add eax, ebx // calc uint + vint step
2077 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
2078 add eax, Tmap.src_offset // calc whole step + v carry
2079 mov Tmap.uv_delta[0], eax // save in v-carry slot
2081 ; setup initial coordinates
2082 mov esi, Tmap.fx_u // get u 16.16
2083 mov ebx, esi // copy it
2084 sar esi, 16 // get integer part
2085 shl ebx, 16 // get fractional part
2087 mov ecx, Tmap.fx_v // get v 16.16
2088 mov edx, ecx // copy it
2089 sar edx, 16 // get integer part
2090 shl ecx, 16 // get fractional part
2091 imul edx, Tmap.src_offset // calc texture scanline address
2092 add esi, edx // calc texture offset
2093 add esi, Tmap.pixptr // calc address
2095 ; set edi = address of first pixel to modify
2096 mov edi, Tmap.dest_row_data
2098 mov edx, Tmap.DeltaUFrac
2101 mov al,[edi] // get the destination pixel
2103 mov ebp, Tmap.loop_count
2108 mov Tmap.num_big_steps, ebp
2109 and Tmap.loop_count, 3
2115 // 8 pixel span code
2116 // edi = dest dib bits at current pixel
2117 // esi = texture pointer at current u,v
2119 // ebx = u fraction 0.32
2120 // ecx = v fraction 0.32
2121 // edx = u frac step
2122 // ebp = v carry scratch
2126 mov al,[edi+0] // get the destination pixel
2127 mov ah,[esi] // get texture pixel 0
2128 add ecx,Tmap.DeltaVFrac // increment v fraction
2129 sbb ebp,ebp // get -1 if carry
2130 add ebx,edx // increment u fraction
2131 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2132 add eax, Tmap.BlendLookup
2133 mov al, [eax] // blend them
2134 mov [edi+0],al // store pixel
2137 mov al,[edi+1] // get the destination pixel
2138 mov ah,[esi] // get texture pixel 0
2139 add ecx,Tmap.DeltaVFrac // increment v fraction
2140 sbb ebp,ebp // get -1 if carry
2141 add ebx,edx // increment u fraction
2142 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2143 add eax, Tmap.BlendLookup
2144 mov al, [eax] // blend them
2145 mov [edi+1],al // store pixel
2148 mov al,[edi+2] // get the destination pixel
2149 mov ah,[esi] // get texture pixel 0
2150 add ecx,Tmap.DeltaVFrac // increment v fraction
2151 sbb ebp,ebp // get -1 if carry
2152 add ebx,edx // increment u fraction
2153 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2154 add eax, Tmap.BlendLookup
2155 mov al, [eax] // blend them
2156 mov [edi+2],al // store pixel
2159 mov al,[edi+3] // get the destination pixel
2160 mov ah,[esi] // get texture pixel 0
2161 add ecx,Tmap.DeltaVFrac // increment v fraction
2162 sbb ebp,ebp // get -1 if carry
2163 add ebx,edx // increment u fraction
2164 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2165 add eax, Tmap.BlendLookup
2166 mov al, [eax] // blend them
2167 mov [edi+3],al // store pixel
2170 dec Tmap.num_big_steps
2176 mov ebp,Tmap.loop_count
2181 mov Tmap.loop_count, ebp
2184 mov al,[edi] // get the destination pixel
2189 mov al,[edi+0] // get the destination pixel
2190 mov ah,[esi] // get texture pixel 0
2191 add ecx,Tmap.DeltaVFrac // increment v fraction
2192 sbb ebp,ebp // get -1 if carry
2193 add ebx,edx // increment u fraction
2194 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2195 add eax, Tmap.BlendLookup
2196 mov al, [eax] // blend them
2197 mov [edi+0],al // store pixel
2200 mov al,[edi+1] // get the destination pixel
2201 mov ah,[esi] // get texture pixel 0
2202 add ecx,Tmap.DeltaVFrac // increment v fraction
2203 sbb ebp,ebp // get -1 if carry
2204 add ebx,edx // increment u fraction
2205 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2206 add eax, Tmap.BlendLookup
2207 mov al, [eax] // blend them
2208 mov [edi+1],al // store pixel
2219 mov al,[edi] // get the destination pixel
2220 mov ah,[esi] // get texture pixel 0
2221 add eax, Tmap.BlendLookup
2222 mov al, [eax] // blend them
2223 mov [edi],al // store pixel
2237 // HACKED IN SYSTEM FOR DOING MODEL CACHING
2238 int Tmap_scan_read = 0; // 0 = normal mapper, 1=read, 2=write
2240 // HACKED IN SYSTEM FOR DOING MODEL CACHING
2241 void tmapscan_lnn8_read()
2243 Tmap.fx_u = fl2f(Tmap.l.u);
2244 Tmap.fx_v = fl2f(Tmap.l.v);
2245 Tmap.fx_du_dx = fl2f(Tmap.deltas.u);
2246 Tmap.fx_dv_dx = fl2f(Tmap.deltas.v);
2251 ubyte * src = (ubyte *)Tmap.pixptr;
2252 ubyte * dst = (ubyte *)Tmap.dest_row_data;
2254 for (i=0; i<Tmap.loop_count; i++ ) {
2259 src[u+v*Tmap.src_offset] = *dst++;
2261 Tmap.fx_u += Tmap.fx_du_dx;
2262 Tmap.fx_v += Tmap.fx_dv_dx;
2278 ; setup delta values
2279 mov eax, Tmap.fx_dv_dx // get v 16.16 step
2280 mov ebx, eax // copy it
2281 sar eax, 16 // get v int step
2282 shl ebx, 16 // get v frac step
2283 mov Tmap.DeltaVFrac, ebx // store it
2284 imul eax, Tmap.src_offset // calc texture step for v int step
2286 mov ebx, Tmap.fx_du_dx // get u 16.16 step
2287 mov ecx, ebx // copy it
2288 sar ebx, 16 // get the u int step
2289 shl ecx, 16 // get the u frac step
2290 mov Tmap.DeltaUFrac, ecx // store it
2291 add eax, ebx // calc uint + vint step
2293 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
2294 add eax, Tmap.src_offset // calc whole step + v carry
2295 mov Tmap.uv_delta[0], eax // save in v-carry slot
2297 ; setup initial coordinates
2298 mov esi, Tmap.fx_u // get u 16.16
2299 mov ebx, esi // copy it
2300 sar esi, 16 // get integer part
2301 shl ebx, 16 // get fractional part
2303 mov ecx, Tmap.fx_v // get v 16.16
2304 mov edx, ecx // copy it
2305 sar edx, 16 // get integer part
2306 shl ecx, 16 // get fractional part
2307 imul edx, Tmap.src_offset // calc texture scanline address
2308 add esi, edx // calc texture offset
2309 add esi, Tmap.pixptr // calc address
2311 ; set edi = address of first pixel to modify
2312 mov edi, Tmap.dest_row_data
2314 mov edx, Tmap.DeltaUFrac
2316 mov al,[edi] // preread the destination cache line
2318 mov ebp, Tmap.loop_count
2323 mov Tmap.num_big_steps, ebp
2324 and Tmap.loop_count, 3
2329 // 8 pixel span code
2330 // edi = dest dib bits at current pixel
2331 // esi = texture pointer at current u,v
2333 // ebx = u fraction 0.32
2334 // ecx = v fraction 0.32
2335 // edx = u frac step
2336 // ebp = v carry scratch
2338 add ecx,Tmap.DeltaVFrac // increment v fraction
2339 sbb ebp,ebp // get -1 if carry
2340 mov al,[edi+0] // get texture pixel
2341 mov [esi],al // store pixel
2342 add ebx,edx // increment u fraction
2343 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2345 add ecx,Tmap.DeltaVFrac // increment v fraction
2346 sbb ebp,ebp // get -1 if carry
2347 mov al,[edi+1] // get texture pixel
2348 mov [esi],al // store pixel
2349 add ebx,edx // increment u fraction
2350 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2352 add ecx,Tmap.DeltaVFrac // increment v fraction
2353 sbb ebp,ebp // get -1 if carry
2354 mov al,[edi+2] // get texture pixel
2355 mov [esi],al // store pixel
2356 add ebx,edx // increment u fraction
2357 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2359 add ecx,Tmap.DeltaVFrac // increment v fraction
2360 sbb ebp,ebp // get -1 if carry
2361 mov al,[edi+3] // get texture pixel
2362 mov [esi],al // store pixel
2363 add ebx,edx // increment u fraction
2364 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2367 dec Tmap.num_big_steps
2373 mov ebp,Tmap.loop_count
2378 mov Tmap.loop_count, ebp
2383 mov al, [edi] // preread the destination cache line
2387 add ecx,Tmap.DeltaVFrac // increment v fraction
2388 sbb ebp,ebp // get -1 if carry
2389 mov al,[edi+0] // get texture pixel
2390 mov [esi],al // store pixel
2391 add ebx,edx // increment u fraction
2392 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2394 add ecx,Tmap.DeltaVFrac // increment v fraction
2395 sbb ebp,ebp // get -1 if carry
2396 mov al,[edi+1] // get texture pixel
2397 mov [esi],al // store pixel
2398 add ebx,edx // increment u fraction
2399 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2409 mov al,[edi] // get texture pixel
2410 mov [esi],al // store pixel 2
2425 // HACKED IN SYSTEM FOR DOING MODEL CACHING
2426 void tmapscan_lnn8_write()
2428 Tmap.fx_u = fl2f(Tmap.l.u);
2429 Tmap.fx_v = fl2f(Tmap.l.v);
2430 Tmap.fx_du_dx = fl2f(Tmap.deltas.u);
2431 Tmap.fx_dv_dx = fl2f(Tmap.deltas.v);
2436 ubyte * src = (ubyte *)Tmap.pixptr;
2437 ubyte * dst = (ubyte *)Tmap.dest_row_data;
2439 for (i=0; i<Tmap.loop_count; i++ ) {
2444 ubyte c = src[u+v*Tmap.src_offset];
2450 Tmap.fx_u += Tmap.fx_du_dx;
2451 Tmap.fx_v += Tmap.fx_dv_dx;
2467 ; setup delta values
2468 mov eax, Tmap.fx_dv_dx // get v 16.16 step
2469 mov ebx, eax // copy it
2470 sar eax, 16 // get v int step
2471 shl ebx, 16 // get v frac step
2472 mov Tmap.DeltaVFrac, ebx // store it
2473 imul eax, Tmap.src_offset // calc texture step for v int step
2475 mov ebx, Tmap.fx_du_dx // get u 16.16 step
2476 mov ecx, ebx // copy it
2477 sar ebx, 16 // get the u int step
2478 shl ecx, 16 // get the u frac step
2479 mov Tmap.DeltaUFrac, ecx // store it
2480 add eax, ebx // calc uint + vint step
2482 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
2483 add eax, Tmap.src_offset // calc whole step + v carry
2484 mov Tmap.uv_delta[0], eax // save in v-carry slot
2486 ; setup initial coordinates
2487 mov esi, Tmap.fx_u // get u 16.16
2488 mov ebx, esi // copy it
2489 sar esi, 16 // get integer part
2490 shl ebx, 16 // get fractional part
2492 mov ecx, Tmap.fx_v // get v 16.16
2493 mov edx, ecx // copy it
2494 sar edx, 16 // get integer part
2495 shl ecx, 16 // get fractional part
2496 imul edx, Tmap.src_offset // calc texture scanline address
2497 add esi, edx // calc texture offset
2498 add esi, Tmap.pixptr // calc address
2500 ; set edi = address of first pixel to modify
2501 mov edi, Tmap.dest_row_data
2503 mov edx, Tmap.DeltaUFrac
2505 mov al,[edi] // preread the destination cache line
2507 mov ebp, Tmap.loop_count
2512 mov Tmap.num_big_steps, ebp
2513 and Tmap.loop_count, 3
2518 // 8 pixel span code
2519 // edi = dest dib bits at current pixel
2520 // esi = texture pointer at current u,v
2522 // ebx = u fraction 0.32
2523 // ecx = v fraction 0.32
2524 // edx = u frac step
2525 // ebp = v carry scratch
2527 add ecx,Tmap.DeltaVFrac // increment v fraction
2528 sbb ebp,ebp // get -1 if carry
2529 mov al,[esi] // get texture pixel
2530 add ebx,edx // increment u fraction
2531 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2534 mov [edi+0],al // store pixel
2537 add ecx,Tmap.DeltaVFrac // increment v fraction
2538 sbb ebp,ebp // get -1 if carry
2539 mov al,[esi] // get texture pixel
2540 add ebx,edx // increment u fraction
2541 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2544 mov [edi+1],al // store pixel
2547 add ecx,Tmap.DeltaVFrac // increment v fraction
2548 sbb ebp,ebp // get -1 if carry
2549 mov al,[esi] // get texture pixel
2550 add ebx,edx // increment u fraction
2551 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2554 mov [edi+2],al // store pixel
2557 add ecx,Tmap.DeltaVFrac // increment v fraction
2558 sbb ebp,ebp // get -1 if carry
2559 mov al,[esi] // get texture pixel
2560 add ebx,edx // increment u fraction
2561 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2564 mov [edi+3],al // store pixel
2568 dec Tmap.num_big_steps
2574 mov ebp,Tmap.loop_count
2579 mov Tmap.loop_count, ebp
2584 mov al, [edi] // preread the destination cache line
2588 add ecx,Tmap.DeltaVFrac // increment v fraction
2589 sbb ebp,ebp // get -1 if carry
2590 mov al,[esi] // get texture pixel
2591 add ebx,edx // increment u fraction
2592 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2595 mov [edi+0],al // store pixel
2598 add ecx,Tmap.DeltaVFrac // increment v fraction
2599 sbb ebp,ebp // get -1 if carry
2600 mov al,[esi] // get texture pixel
2601 add ebx,edx // increment u fraction
2602 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2605 mov [edi+1],al // store pixel
2616 mov al,[esi] // get texture pixel
2619 mov [edi],al // store pixel
2634 void tmapscan_lnn8()
2636 // HACKED IN SYSTEM FOR DOING MODEL CACHING
2637 if ( Tmap_scan_read==1 ) {
2638 tmapscan_lnn8_read();
2640 } else if ( Tmap_scan_read==2 ) {
2641 tmapscan_lnn8_write();
2646 if ( gr_screen.current_alphablend_mode == GR_ALPHABLEND_FILTER ) {
2663 ; setup delta values
2664 mov eax, Tmap.fx_dv_dx // get v 16.16 step
2665 mov ebx, eax // copy it
2666 sar eax, 16 // get v int step
2667 shl ebx, 16 // get v frac step
2668 mov Tmap.DeltaVFrac, ebx // store it
2669 imul eax, Tmap.src_offset // calc texture step for v int step
2671 mov ebx, Tmap.fx_du_dx // get u 16.16 step
2672 mov ecx, ebx // copy it
2673 sar ebx, 16 // get the u int step
2674 shl ecx, 16 // get the u frac step
2675 mov Tmap.DeltaUFrac, ecx // store it
2676 add eax, ebx // calc uint + vint step
2678 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
2679 add eax, Tmap.src_offset // calc whole step + v carry
2680 mov Tmap.uv_delta[0], eax // save in v-carry slot
2682 ; setup initial coordinates
2683 mov esi, Tmap.fx_u // get u 16.16
2684 mov ebx, esi // copy it
2685 sar esi, 16 // get integer part
2686 shl ebx, 16 // get fractional part
2688 mov ecx, Tmap.fx_v // get v 16.16
2689 mov edx, ecx // copy it
2690 sar edx, 16 // get integer part
2691 shl ecx, 16 // get fractional part
2692 imul edx, Tmap.src_offset // calc texture scanline address
2693 add esi, edx // calc texture offset
2694 add esi, Tmap.pixptr // calc address
2696 ; set edi = address of first pixel to modify
2697 mov edi, Tmap.dest_row_data
2699 mov edx, Tmap.DeltaUFrac
2701 mov al,[edi] // preread the destination cache line
2702 mov al,[esi] // get texture pixel 0
2704 mov ebp, Tmap.loop_count
2709 mov Tmap.num_big_steps, ebp
2710 and Tmap.loop_count, 3
2715 // 8 pixel span code
2716 // edi = dest dib bits at current pixel
2717 // esi = texture pointer at current u,v
2719 // ebx = u fraction 0.32
2720 // ecx = v fraction 0.32
2721 // edx = u frac step
2722 // ebp = v carry scratch
2724 add ecx,Tmap.DeltaVFrac // increment v fraction
2725 sbb ebp,ebp // get -1 if carry
2726 mov [edi+0],al // store pixel
2727 mov al,[esi] // get texture pixel
2728 add ebx,edx // increment u fraction
2729 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2731 add ecx,Tmap.DeltaVFrac // increment v fraction
2732 sbb ebp,ebp // get -1 if carry
2733 mov [edi+1],al // store pixel
2734 mov al,[esi] // get texture pixel
2735 add ebx,edx // increment u fraction
2736 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2738 add ecx,Tmap.DeltaVFrac // increment v fraction
2739 sbb ebp,ebp // get -1 if carry
2740 mov [edi+2],al // store pixel
2741 mov al,[esi] // get texture pixel
2742 add ebx,edx // increment u fraction
2743 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2745 add ecx,Tmap.DeltaVFrac // increment v fraction
2746 sbb ebp,ebp // get -1 if carry
2747 mov [edi+3],al // store pixel
2748 mov al,[esi] // get texture pixel
2749 add ebx,edx // increment u fraction
2750 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2753 dec Tmap.num_big_steps
2759 mov ebp,Tmap.loop_count
2764 mov Tmap.loop_count, ebp
2769 mov al, [edi] // preread the destination cache line
2770 mov al, [esi] // Get first texel
2774 add ecx,Tmap.DeltaVFrac // increment v fraction
2775 sbb ebp,ebp // get -1 if carry
2776 mov [edi+0],al // store pixel
2777 mov al,[esi] // get texture pixel
2778 add ebx,edx // increment u fraction
2779 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2781 add ecx,Tmap.DeltaVFrac // increment v fraction
2782 sbb ebp,ebp // get -1 if carry
2783 mov [edi+1],al // store pixel
2784 mov al,[esi] // get texture pixel
2785 add ebx,edx // increment u fraction
2786 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2796 mov [edi],al // store pixel 2
2810 void tmapscan_lnt8()
2812 if ( gr_screen.current_alphablend_mode == GR_ALPHABLEND_FILTER ) {
2829 ; setup delta values
2830 mov eax, Tmap.fx_dv_dx // get v 16.16 step
2831 mov ebx, eax // copy it
2832 sar eax, 16 // get v int step
2833 shl ebx, 16 // get v frac step
2834 mov Tmap.DeltaVFrac, ebx // store it
2835 imul eax, Tmap.src_offset // calc texture step for v int step
2837 mov ebx, Tmap.fx_du_dx // get u 16.16 step
2838 mov ecx, ebx // copy it
2839 sar ebx, 16 // get the u int step
2840 shl ecx, 16 // get the u frac step
2841 mov Tmap.DeltaUFrac, ecx // store it
2842 add eax, ebx // calc uint + vint step
2844 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
2845 add eax, Tmap.src_offset // calc whole step + v carry
2846 mov Tmap.uv_delta[0], eax // save in v-carry slot
2848 ; setup initial coordinates
2849 mov esi, Tmap.fx_u // get u 16.16
2850 mov ebx, esi // copy it
2851 sar esi, 16 // get integer part
2852 shl ebx, 16 // get fractional part
2854 mov ecx, Tmap.fx_v // get v 16.16
2855 mov edx, ecx // copy it
2856 sar edx, 16 // get integer part
2857 shl ecx, 16 // get fractional part
2858 imul edx, Tmap.src_offset // calc texture scanline address
2859 add esi, edx // calc texture offset
2860 add esi, Tmap.pixptr // calc address
2862 ; set edi = address of first pixel to modify
2863 mov edi, Tmap.dest_row_data
2865 mov edx, Tmap.DeltaUFrac
2867 mov al,[edi] // preread the destination cache line
2868 mov al,[esi] // get texture pixel 0
2870 mov ebp, Tmap.loop_count
2875 mov Tmap.num_big_steps, ebp
2876 and Tmap.loop_count, 3
2880 // 8 pixel span code
2881 // edi = dest dib bits at current pixel
2882 // esi = texture pointer at current u,v
2884 // ebx = u fraction 0.32
2885 // ecx = v fraction 0.32
2886 // edx = u frac step
2887 // ebp = v carry scratch
2889 add ecx,Tmap.DeltaVFrac // increment v fraction
2890 sbb ebp,ebp // get -1 if carry
2893 mov [edi+0],al // store pixel
2895 mov al,[esi] // get texture pixel
2896 add ebx,edx // increment u fraction
2897 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2899 add ecx,Tmap.DeltaVFrac // increment v fraction
2900 sbb ebp,ebp // get -1 if carry
2903 mov [edi+1],al // store pixel
2905 mov al,[esi] // get texture pixel
2906 add ebx,edx // increment u fraction
2907 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2909 add ecx,Tmap.DeltaVFrac // increment v fraction
2910 sbb ebp,ebp // get -1 if carry
2913 mov [edi+2],al // store pixel
2915 mov al,[esi] // get texture pixel
2916 add ebx,edx // increment u fraction
2917 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2919 add ecx,Tmap.DeltaVFrac // increment v fraction
2920 sbb ebp,ebp // get -1 if carry
2923 mov [edi+3],al // store pixel
2925 mov al,[esi] // get texture pixel
2926 add ebx,edx // increment u fraction
2927 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2930 dec Tmap.num_big_steps
2936 mov ebp,Tmap.loop_count
2941 mov Tmap.loop_count, ebp
2946 mov al, [edi] // preread the destination cache line
2947 mov al, [esi] // Get first texel
2951 add ecx,Tmap.DeltaVFrac // increment v fraction
2952 sbb ebp,ebp // get -1 if carry
2955 mov [edi+0],al // store pixel
2957 mov al,[esi] // get texture pixel
2958 add ebx,edx // increment u fraction
2959 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2961 add ecx,Tmap.DeltaVFrac // increment v fraction
2962 sbb ebp,ebp // get -1 if carry
2965 mov [edi+1],al // store pixel
2967 mov al,[esi] // get texture pixel
2968 add ebx,edx // increment u fraction
2969 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2981 mov [edi],al // store pixel
2998 void tmapscan_pln8_zbuffered_ppro()
3013 // Put the FPU in low precision mode
3014 fstcw Tmap.OldFPUCW // store copy of CW
3015 mov ax,Tmap.OldFPUCW // get it in ax
3017 mov Tmap.FPUCW,ax // store it
3018 fldcw Tmap.FPUCW // load the FPU
3021 mov ecx, Tmap.loop_count // ecx = width
3022 mov edi, Tmap.dest_row_data // edi = dest pointer
3024 // edi = pointer to start pixel in dest dib
3027 mov eax,ecx // eax and ecx = width
3028 shr ecx,5 // ecx = width / subdivision length
3029 and eax,31 // eax = width mod subdivision length
3030 jnz some_left_over // any leftover?
3031 dec ecx // no, so special case last span
3032 mov eax,32 // it's 8 pixels long
3034 mov Tmap.Subdivisions,ecx // store widths
3035 mov Tmap.WidthModLength,eax
3037 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
3038 // st0 st1 st2 st3 st4 st5 st6 st7
3039 fld Tmap.l.v // V/ZL
3040 fld Tmap.l.u // U/ZL V/ZL
3041 fld Tmap.l.sw // 1/ZL U/ZL V/ZL
3042 fld1 // 1 1/ZL U/ZL V/ZL
3043 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
3044 fld st // ZL ZL 1/ZL U/ZL V/ZL
3045 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
3046 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
3047 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
3049 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
3050 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
3052 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
3054 fadd Tmap.fl_dwdx_wide // 1/ZR U/ZL V/ZL UL VL
3055 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
3056 fadd Tmap.fl_dudx_wide // U/ZR 1/ZR V/ZL UL VL
3057 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
3058 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZR U/ZR UL VL
3060 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
3062 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
3063 // @todo overlap this guy
3064 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
3065 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
3066 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
3067 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
3068 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
3070 cmp ecx,0 // check for any full spans
3071 jle HandleLeftoverPixels
3075 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
3076 // UR VR V/ZR 1/ZR U/ZR UL VL
3078 // convert left side coords
3080 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
3081 fmul Tmap.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
3082 fistp Tmap.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
3084 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
3085 fmul Tmap.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
3086 fistp Tmap.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
3088 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
3090 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
3091 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
3092 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
3093 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
3095 fmul Tmap.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
3096 fistp Tmap.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
3098 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
3099 fmul Tmap.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
3100 fistp Tmap.DeltaU ; V/ZR 1/ZR U/ZR UR VR
3102 // increment terms for next span // st0 st1 st2 st3 st4 st5 st6 st7
3103 // Right terms become Left terms--->// V/ZL 1/ZL U/ZL UL VL
3105 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZL U/ZL UL VL
3106 fxch st(1) // 1/ZL V/ZR U/ZL UL VL
3107 fadd Tmap.fl_dwdx_wide // 1/ZR V/ZR U/ZL UL VL
3108 fxch st(2) // U/ZL V/ZR 1/ZR UL VL
3109 fadd Tmap.fl_dudx_wide // U/ZR V/ZR 1/ZR UL VL
3110 fxch st(2) // 1/ZR V/ZR U/ZR UL VL
3111 fxch st(1) // V/ZR 1/ZR U/ZR UL VL
3114 // setup delta values
3116 mov eax,Tmap.DeltaV // get v 16.16 step
3117 mov ebx,eax // copy it
3118 sar eax,16 // get v int step
3119 shl ebx,16 // get v frac step
3120 mov Tmap.DeltaVFrac,ebx // store it
3121 imul eax,Tmap.src_offset // calculate texture step for v int step
3123 mov ebx,Tmap.DeltaU // get u 16.16 step
3124 mov ecx,ebx // copy it
3125 sar ebx,16 // get u int step
3126 shl ecx,16 // get u frac step
3127 mov Tmap.DeltaUFrac,ecx // store it
3128 add eax,ebx // calculate uint + vint step
3129 mov Tmap.uv_delta[4],eax // save whole step in non-v-carry slot
3130 add eax,Tmap.src_offset // calculate whole step + v carry
3131 mov Tmap.uv_delta[0],eax // save in v-carry slot
3133 // setup initial coordinates
3134 mov esi,Tmap.UFixed // get u 16.16 fixedpoint coordinate
3136 mov ebx,esi // copy it
3137 sar esi,16 // get integer part
3138 shl ebx,16 // get fractional part
3140 mov ecx,Tmap.VFixed // get v 16.16 fixedpoint coordinate
3142 mov edx,ecx // copy it
3143 sar edx,16 // get integer part
3144 shl ecx,16 // get fractional part
3145 imul edx,Tmap.src_offset // calc texture scanline address
3146 add esi,edx // calc texture offset
3147 add esi,Tmap.pixptr // calc address
3149 // set up affine registers
3155 mov ebp, Tmap.fx_dl_dx
3164 mov edx, Tmap.DeltaUFrac
3166 mov Tmap.DeltaUFrac, edx
3169 // calculate right side coords st0 st1 st2 st3 st4 st5 st6 st7
3170 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
3171 // This divide should happen while the pixel span is drawn.
3172 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
3175 // 8 pixel span code
3176 // edi = dest dib bits at current pixel
3177 // esi = texture pointer at current u,v
3179 // ebx = u fraction 0.32
3180 // ecx = v fraction 0.32
3181 // edx = u frac step
3182 // ebp = v carry scratch
3184 mov al,[edi] // preread the destination cache line
3185 mov al,[esi] // get texture pixel 0
3187 mov Tmap.InnerLooper, 32/4 // Set up loop counter
3193 sub eax, Tmap.pScreenBits
3201 cmp ebp, [edx+0] // Compare the Z depth of this pixel with zbuffer
3202 jle Skip0 // If pixel is covered, skip drawing
3203 mov [edx+0], ebp // Write new Z value
3205 // Get pixel and light it
3207 xor eax, eax // Clear all bits of EAX. This avoids a partial register stall on Pentium Pros
3208 mov al, [esi] // Get texel into AL
3209 and ebx, 0ff00h // Clear out fractional part of EBX
3210 mov eax, DWORD PTR gr_fade_table[eax+ebx] // Lookup pixel in lighting table
3213 mov [edi+0],al // Write new pixel
3215 Skip0: add ecx,Tmap.DeltaVFrac
3217 add ebp,Tmap.fx_dwdx
3218 add ebx,Tmap.DeltaUFrac
3219 adc esi,Tmap.uv_delta[4*eax+4]
3223 cmp ebp, [edx+4] // Compare the Z depth of this pixel with zbuffer
3224 jle Skip1 // If pixel is covered, skip drawing
3225 mov [edx+4], ebp // Write new Z value
3227 // Get pixel and light it
3229 xor eax, eax // Clear all bits of EAX. This avoids a partial register stall on Pentium Pros
3230 mov al, [esi] // Get texel into AL
3231 and ebx, 0ff00h // Clear out fractional part of EBX
3232 mov eax, DWORD PTR gr_fade_table[eax+ebx] // Lookup pixel in lighting table
3235 mov [edi+1],al // Write new pixel
3237 Skip1: add ecx,Tmap.DeltaVFrac
3239 add ebp,Tmap.fx_dwdx
3240 add ebx,Tmap.DeltaUFrac
3241 adc esi,Tmap.uv_delta[4*eax+4]
3245 cmp ebp, [edx+8] // Compare the Z depth of this pixel with zbuffer
3246 jle Skip2 // If pixel is covered, skip drawing
3247 mov [edx+8], ebp // Write new Z value
3249 // Get pixel and light it
3251 xor eax, eax // Clear all bits of EAX. This avoids a partial register stall on Pentium Pros
3252 mov al, [esi] // Get texel into AL
3253 and ebx, 0ff00h // Clear out fractional part of EBX
3254 mov eax, DWORD PTR gr_fade_table[eax+ebx] // Lookup pixel in lighting table
3257 mov [edi+2],al // Write new pixel
3259 Skip2: add ecx,Tmap.DeltaVFrac
3261 add ebp,Tmap.fx_dwdx
3262 add ebx,Tmap.DeltaUFrac
3263 adc esi,Tmap.uv_delta[4*eax+4]
3267 cmp ebp, [edx+12] // Compare the Z depth of this pixel with zbuffer
3268 jle Skip3 // If pixel is covered, skip drawing
3269 mov [edx+12], ebp // Write new Z value
3271 // Get pixel and light it
3273 xor eax, eax // Clear all bits of EAX. This avoids a partial register stall on Pentium Pros
3274 mov al, [esi] // Get texel into AL
3275 and ebx, 0ff00h // Clear out fractional part of EBX
3276 mov eax, DWORD PTR gr_fade_table[eax+ebx] // Lookup pixel in lighting table
3279 mov [edi+3],al // Write new pixel
3281 Skip3: add ecx,Tmap.DeltaVFrac
3283 add ebp,Tmap.fx_dwdx
3284 add ebx,Tmap.DeltaUFrac
3285 adc esi,Tmap.uv_delta[4*eax+4]
3291 dec Tmap.InnerLooper
3296 // the fdiv is done, finish right // st0 st1 st2 st3 st4 st5 st6 st7
3297 // ZR V/ZR 1/ZR U/ZR UL VL
3299 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
3300 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
3301 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
3302 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
3304 dec Tmap.Subdivisions // decrement span count
3305 jnz SpanLoop // loop back
3308 HandleLeftoverPixels:
3310 mov esi,Tmap.pixptr // load texture pointer
3312 // edi = dest dib bits
3313 // esi = current texture dib bits
3314 // at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
3315 // inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
3317 cmp Tmap.WidthModLength,0 ; are there remaining pixels to draw?
3318 jz FPUReturn ; nope, pop the FPU and bail
3320 // convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
3322 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
3323 fmul Tmap.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
3324 fistp Tmap.UFixed ; inv. inv. inv. inv. inv. UL VL
3326 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
3327 fmul Tmap.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
3328 fistp Tmap.VFixed ; inv. inv. inv. inv. inv. UL VL
3330 dec Tmap.WidthModLength ; calc how many steps to take
3331 jz OnePixelSpan ; just one, do not do deltas
3333 // calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
3336 // @todo rearrange things so we don't need these two instructions
3337 fstp Tmap.FloatTemp ; inv. inv. inv. inv. UL VL
3338 fstp Tmap.FloatTemp ; inv. inv. inv. UL VL
3340 fld Tmap.r.v ; V/Zr inv. inv. inv. UL VL
3341 fsub Tmap.deltas.v ; V/ZR inv. inv. inv. UL VL
3342 fld Tmap.r.u ; U/Zr V/ZR inv. inv. inv. UL VL
3343 fsub Tmap.deltas.u ; U/ZR V/ZR inv. inv. inv. UL VL
3344 fld Tmap.r.sw ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
3345 fsub Tmap.deltas.sw ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
3347 fdivr Tmap.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
3349 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
3350 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
3352 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
3354 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
3355 fxch st(1) ; VR UR inv. inv. inv. dU VL
3356 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
3357 fxch st(6) ; dV UR inv. inv. inv. dU VR
3359 fidiv Tmap.WidthModLength ; dv UR inv. inv. inv. dU VR
3360 fmul Tmap.FixedScale ; dv16 UR inv. inv. inv. dU VR
3361 fistp Tmap.DeltaV ; UR inv. inv. inv. dU VR
3363 fxch st(4) ; dU inv. inv. inv. UR VR
3364 fidiv Tmap.WidthModLength ; du inv. inv. inv. UR VR
3365 fmul Tmap.FixedScale ; du16 inv. inv. inv. UR VR
3366 fistp Tmap.DeltaU ; inv. inv. inv. UR VR
3368 // @todo gross! these are to line up with the other loop
3369 fld st(1) ; inv. inv. inv. inv. UR VR
3370 fld st(2) ; inv. inv. inv. inv. inv. UR VR
3373 // setup delta values
3374 mov eax, Tmap.DeltaV // get v 16.16 step
3375 mov ebx, eax // copy it
3376 sar eax, 16 // get v int step
3377 shl ebx, 16 // get v frac step
3378 mov Tmap.DeltaVFrac, ebx // store it
3379 imul eax, Tmap.src_offset // calc texture step for v int step
3381 mov ebx, Tmap.DeltaU // get u 16.16 step
3382 mov ecx, ebx // copy it
3383 sar ebx, 16 // get the u int step
3384 shl ecx, 16 // get the u frac step
3385 mov Tmap.DeltaUFrac, ecx // store it
3386 add eax, ebx // calc uint + vint step
3387 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
3388 add eax, Tmap.src_offset // calc whole step + v carry
3389 mov Tmap.uv_delta[0], eax // save in v-carry slot
3394 ; setup initial coordinates
3395 mov esi, Tmap.UFixed // get u 16.16
3396 mov ebx, esi // copy it
3397 sar esi, 16 // get integer part
3398 shl ebx, 16 // get fractional part
3400 mov ecx, Tmap.VFixed // get v 16.16
3401 mov edx, ecx // copy it
3402 sar edx, 16 // get integer part
3403 shl ecx, 16 // get fractional part
3404 imul edx, Tmap.src_offset // calc texture scanline address
3405 add esi, edx // calc texture offset
3406 add esi, Tmap.pixptr // calc address
3413 // mov edx, Tmap.DeltaUFrac
3415 cmp Tmap.WidthModLength, 1
3420 mov ebx, Tmap.fx_l_right
3426 mov eax, Tmap.fx_dl_dx
3429 mov edx, Tmap.DeltaUFrac
3431 mov Tmap.DeltaUFrac, edx
3440 sub eax, Tmap.pScreenBits
3445 inc Tmap.WidthModLength
3446 mov eax,Tmap.WidthModLength
3450 mov Tmap.WidthModLength, eax
3454 mov al,[edi] // preread the destination cache line
3461 cmp ebp, [edx+0] // Compare the Z depth of this pixel with zbuffer
3462 jle Skip0a // If pixel is covered, skip drawing
3463 mov [edx+0], ebp // Write new Z value
3465 // Get pixel and light it
3467 xor eax, eax // Clear all bits of EAX. This avoids a partial register stall on Pentium Pros
3468 mov al, [esi] // Get texel into AL
3469 and ebx, 0ff00h // Clear out fractional part of EBX
3470 mov eax, DWORD PTR gr_fade_table[eax+ebx] // Lookup pixel in lighting table
3473 mov [edi+0],al // Write new pixel
3475 Skip0a: add ecx,Tmap.DeltaVFrac
3478 //add edx, 4 // Go to next
3479 add ebp,Tmap.fx_dwdx
3481 add ebx,Tmap.DeltaUFrac
3483 adc esi,Tmap.uv_delta[4*eax+4]
3487 cmp ebp, [edx+4] // Compare the Z depth of this pixel with zbuffer
3488 jle Skip1a // If pixel is covered, skip drawing
3489 mov [edx+4], ebp // Write new Z value
3491 // Get pixel and light it
3493 xor eax, eax // Clear all bits of EAX. This avoids a partial register stall on Pentium Pros
3494 mov al, [esi] // Get texel into AL
3495 and ebx, 0ff00h // Clear out fractional part of EBX
3496 mov eax, DWORD PTR gr_fade_table[eax+ebx] // Lookup pixel in lighting table
3499 mov [edi+1],al // Write new pixel
3501 Skip1a: add ecx,Tmap.DeltaVFrac
3504 //add edx, 4 // Go to next
3505 add ebp,Tmap.fx_dwdx
3507 add ebx,Tmap.DeltaUFrac
3509 adc esi,Tmap.uv_delta[4*eax+4]
3514 dec Tmap.WidthModLength
3522 cmp ebp, [edx+0] // Compare the Z depth of this pixel with zbuffer
3523 jle Skip0c // If pixel is covered, skip drawing
3524 mov [edx+0], ebp // Write new Z value
3526 // Get pixel and light it
3528 xor eax, eax // Clear all bits of EAX. This avoids a partial register stall on Pentium Pros
3529 mov al, [esi] // Get texel into AL
3530 and ebx, 0ff00h // Clear out fractional part of EBX
3531 mov eax, DWORD PTR gr_fade_table[eax+ebx] // Lookup pixel in lighting table
3534 mov [edi+0],al // Write new pixel
3540 // busy FPU registers: // st0 st1 st2 st3 st4 st5 st6 st7
3541 // xxx xxx xxx xxx xxx xxx xxx
3550 fldcw Tmap.OldFPUCW // restore the FPU
3563 void tmapscan_pln8_zbuffered_pentium()
3578 // Put the FPU in low precision mode
3579 fstcw Tmap.OldFPUCW // store copy of CW
3580 mov ax,Tmap.OldFPUCW // get it in ax
3582 mov Tmap.FPUCW,ax // store it
3583 fldcw Tmap.FPUCW // load the FPU
3586 mov ecx, Tmap.loop_count // ecx = width
3587 mov edi, Tmap.dest_row_data // edi = dest pointer
3589 // edi = pointer to start pixel in dest dib
3592 mov eax,ecx // eax and ecx = width
3593 shr ecx,5 // ecx = width / subdivision length
3594 and eax,31 // eax = width mod subdivision length
3595 jnz some_left_over // any leftover?
3596 dec ecx // no, so special case last span
3597 mov eax,32 // it's 8 pixels long
3599 mov Tmap.Subdivisions,ecx // store widths
3600 mov Tmap.WidthModLength,eax
3602 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
3603 // st0 st1 st2 st3 st4 st5 st6 st7
3604 fld Tmap.l.v // V/ZL
3605 fld Tmap.l.u // U/ZL V/ZL
3606 fld Tmap.l.sw // 1/ZL U/ZL V/ZL
3607 fld1 // 1 1/ZL U/ZL V/ZL
3608 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
3609 fld st // ZL ZL 1/ZL U/ZL V/ZL
3610 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
3611 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
3612 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
3614 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
3615 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
3617 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
3619 fadd Tmap.fl_dwdx_wide // 1/ZR U/ZL V/ZL UL VL
3620 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
3621 fadd Tmap.fl_dudx_wide // U/ZR 1/ZR V/ZL UL VL
3622 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
3623 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZR U/ZR UL VL
3625 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
3627 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
3628 // @todo overlap this guy
3629 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
3630 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
3631 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
3632 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
3633 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
3635 cmp ecx,0 // check for any full spans
3636 jle HandleLeftoverPixels
3640 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
3641 // UR VR V/ZR 1/ZR U/ZR UL VL
3643 // convert left side coords
3645 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
3646 fmul Tmap.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
3647 fistp Tmap.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
3649 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
3650 fmul Tmap.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
3651 fistp Tmap.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
3653 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
3655 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
3656 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
3657 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
3658 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
3660 fmul Tmap.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
3661 fistp Tmap.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
3663 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
3664 fmul Tmap.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
3665 fistp Tmap.DeltaU ; V/ZR 1/ZR U/ZR UR VR
3667 // increment terms for next span // st0 st1 st2 st3 st4 st5 st6 st7
3668 // Right terms become Left terms--->// V/ZL 1/ZL U/ZL UL VL
3670 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZL U/ZL UL VL
3671 fxch st(1) // 1/ZL V/ZR U/ZL UL VL
3672 fadd Tmap.fl_dwdx_wide // 1/ZR V/ZR U/ZL UL VL
3673 fxch st(2) // U/ZL V/ZR 1/ZR UL VL
3674 fadd Tmap.fl_dudx_wide // U/ZR V/ZR 1/ZR UL VL
3675 fxch st(2) // 1/ZR V/ZR U/ZR UL VL
3676 fxch st(1) // V/ZR 1/ZR U/ZR UL VL
3679 // setup delta values
3681 mov eax,Tmap.DeltaV // get v 16.16 step
3682 mov ebx,eax // copy it
3683 sar eax,16 // get v int step
3684 shl ebx,16 // get v frac step
3685 mov Tmap.DeltaVFrac,ebx // store it
3686 imul eax,Tmap.src_offset // calculate texture step for v int step
3688 mov ebx,Tmap.DeltaU // get u 16.16 step
3689 mov ecx,ebx // copy it
3690 sar ebx,16 // get u int step
3691 shl ecx,16 // get u frac step
3692 mov Tmap.DeltaUFrac,ecx // store it
3693 add eax,ebx // calculate uint + vint step
3694 mov Tmap.uv_delta[4],eax // save whole step in non-v-carry slot
3695 add eax,Tmap.src_offset // calculate whole step + v carry
3696 mov Tmap.uv_delta[0],eax // save in v-carry slot
3698 // setup initial coordinates
3699 mov esi,Tmap.UFixed // get u 16.16 fixedpoint coordinate
3701 mov ebx,esi // copy it
3702 sar esi,16 // get integer part
3703 shl ebx,16 // get fractional part
3705 mov ecx,Tmap.VFixed // get v 16.16 fixedpoint coordinate
3707 mov edx,ecx // copy it
3708 sar edx,16 // get integer part
3709 shl ecx,16 // get fractional part
3710 imul edx,Tmap.src_offset // calc texture scanline address
3711 add esi,edx // calc texture offset
3712 add esi,Tmap.pixptr // calc address
3714 // set up affine registers
3720 mov ebp, Tmap.fx_dl_dx
3729 mov edx, Tmap.DeltaUFrac
3731 mov Tmap.DeltaUFrac, edx
3734 // calculate right side coords st0 st1 st2 st3 st4 st5 st6 st7
3735 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
3736 // This divide should happen while the pixel span is drawn.
3737 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
3740 // 8 pixel span code
3741 // edi = dest dib bits at current pixel
3742 // esi = texture pointer at current u,v
3744 // ebx = u fraction 0.32
3745 // ecx = v fraction 0.32
3746 // edx = u frac step
3747 // ebp = v carry scratch
3749 mov al,[edi] // preread the destination cache line
3750 mov al,[esi] // get texture pixel 0
3752 mov Tmap.InnerLooper, 32/4 // Set up loop counter
3759 sub eax, Tmap.pScreenBits
3766 mov eax, ebx // Get lighting value from BH into AH
3767 and eax, 0ffffh; // Clear upper bits of EAX
3769 cmp ebp, [edx+0] // Compare the Z depth of this pixel with zbuffer
3770 mov al, [esi] // Get texel into AL
3771 jle Skip0 // If pixel is covered, skip drawing
3773 mov [edx+0], ebp // Write new Z value
3775 mov al, gr_fade_table[eax] // Lookup pixel in lighting table
3776 mov [edi+0],al // Write new pixel
3778 Skip0: add ecx,Tmap.DeltaVFrac
3781 //add edx, 4 // Go to next
3782 add ebp,Tmap.fx_dwdx
3784 add ebx,Tmap.DeltaUFrac
3786 adc esi,Tmap.uv_delta[4*eax+4]
3790 mov eax, ebx // Get lighting value from BH into AH
3791 and eax, 0ffffh; // Clear upper bits of EAX
3793 cmp ebp, [edx+4] // Compare the Z depth of this pixel with zbuffer
3794 mov al, [esi] // Get texel into AL
3795 jle Skip1 // If pixel is covered, skip drawing
3798 mov [edx+4], ebp // Write new Z value
3800 mov al, gr_fade_table[eax] // Lookup pixel in lighting table
3801 mov [edi+1],al // Write new pixel
3803 Skip1: add ecx,Tmap.DeltaVFrac
3806 //add edx, 4 // Go to next
3807 add ebp,Tmap.fx_dwdx
3809 add ebx,Tmap.DeltaUFrac
3811 adc esi,Tmap.uv_delta[4*eax+4]
3815 mov eax, ebx // Get lighting value from BH into AH
3816 and eax, 0ffffh; // Clear upper bits of EAX
3819 cmp ebp, [edx+8] // Compare the Z depth of this pixel with zbuffer
3820 mov al, [esi] // Get texel into AL
3821 jle Skip2 // If pixel is covered, skip drawing
3824 mov [edx+8], ebp // Write new Z value
3826 mov al, gr_fade_table[eax] // Lookup pixel in lighting table
3827 mov [edi+2],al // Write new pixel
3829 Skip2: add ecx,Tmap.DeltaVFrac
3832 //add edx, 4 // Go to next
3833 add ebp,Tmap.fx_dwdx
3835 add ebx,Tmap.DeltaUFrac
3837 adc esi,Tmap.uv_delta[4*eax+4]
3840 mov eax, ebx // Get lighting value from BH into AH
3841 and eax, 0ffffh; // Clear upper bits of EAX
3844 cmp ebp, [edx+12] // Compare the Z depth of this pixel with zbuffer
3845 mov al, [esi] // Get texel into AL
3846 jle Skip3 // If pixel is covered, skip drawing
3849 mov [edx+12], ebp // Write new Z value
3851 mov al, gr_fade_table[eax] // Lookup pixel in lighting table
3852 mov [edi+3],al // Write new pixel
3854 Skip3: add ecx,Tmap.DeltaVFrac
3857 //add edx, 4 // Go to next
3858 add ebp,Tmap.fx_dwdx
3860 add ebx,Tmap.DeltaUFrac
3862 adc esi,Tmap.uv_delta[4*eax+4]
3867 dec Tmap.InnerLooper
3872 // the fdiv is done, finish right // st0 st1 st2 st3 st4 st5 st6 st7
3873 // ZR V/ZR 1/ZR U/ZR UL VL
3875 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
3876 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
3877 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
3878 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
3880 dec Tmap.Subdivisions // decrement span count
3881 jnz SpanLoop // loop back
3884 HandleLeftoverPixels:
3886 mov esi,Tmap.pixptr // load texture pointer
3888 // edi = dest dib bits
3889 // esi = current texture dib bits
3890 // at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
3891 // inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
3893 cmp Tmap.WidthModLength,0 ; are there remaining pixels to draw?
3894 jz FPUReturn ; nope, pop the FPU and bail
3896 // convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
3898 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
3899 fmul Tmap.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
3900 fistp Tmap.UFixed ; inv. inv. inv. inv. inv. UL VL
3902 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
3903 fmul Tmap.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
3904 fistp Tmap.VFixed ; inv. inv. inv. inv. inv. UL VL
3906 dec Tmap.WidthModLength ; calc how many steps to take
3907 jz OnePixelSpan ; just one, do not do deltas
3909 // calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
3912 // @todo rearrange things so we don't need these two instructions
3913 fstp Tmap.FloatTemp ; inv. inv. inv. inv. UL VL
3914 fstp Tmap.FloatTemp ; inv. inv. inv. UL VL
3916 fld Tmap.r.v ; V/Zr inv. inv. inv. UL VL
3917 fsub Tmap.deltas.v ; V/ZR inv. inv. inv. UL VL
3918 fld Tmap.r.u ; U/Zr V/ZR inv. inv. inv. UL VL
3919 fsub Tmap.deltas.u ; U/ZR V/ZR inv. inv. inv. UL VL
3920 fld Tmap.r.sw ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
3921 fsub Tmap.deltas.sw ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
3923 fdivr Tmap.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
3925 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
3926 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
3928 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
3930 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
3931 fxch st(1) ; VR UR inv. inv. inv. dU VL
3932 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
3933 fxch st(6) ; dV UR inv. inv. inv. dU VR
3935 fidiv Tmap.WidthModLength ; dv UR inv. inv. inv. dU VR
3936 fmul Tmap.FixedScale ; dv16 UR inv. inv. inv. dU VR
3937 fistp Tmap.DeltaV ; UR inv. inv. inv. dU VR
3939 fxch st(4) ; dU inv. inv. inv. UR VR
3940 fidiv Tmap.WidthModLength ; du inv. inv. inv. UR VR
3941 fmul Tmap.FixedScale ; du16 inv. inv. inv. UR VR
3942 fistp Tmap.DeltaU ; inv. inv. inv. UR VR
3944 // @todo gross! these are to line up with the other loop
3945 fld st(1) ; inv. inv. inv. inv. UR VR
3946 fld st(2) ; inv. inv. inv. inv. inv. UR VR
3949 // setup delta values
3950 mov eax, Tmap.DeltaV // get v 16.16 step
3951 mov ebx, eax // copy it
3952 sar eax, 16 // get v int step
3953 shl ebx, 16 // get v frac step
3954 mov Tmap.DeltaVFrac, ebx // store it
3955 imul eax, Tmap.src_offset // calc texture step for v int step
3957 mov ebx, Tmap.DeltaU // get u 16.16 step
3958 mov ecx, ebx // copy it
3959 sar ebx, 16 // get the u int step
3960 shl ecx, 16 // get the u frac step
3961 mov Tmap.DeltaUFrac, ecx // store it
3962 add eax, ebx // calc uint + vint step
3963 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
3964 add eax, Tmap.src_offset // calc whole step + v carry
3965 mov Tmap.uv_delta[0], eax // save in v-carry slot
3970 ; setup initial coordinates
3971 mov esi, Tmap.UFixed // get u 16.16
3972 mov ebx, esi // copy it
3973 sar esi, 16 // get integer part
3974 shl ebx, 16 // get fractional part
3976 mov ecx, Tmap.VFixed // get v 16.16
3977 mov edx, ecx // copy it
3978 sar edx, 16 // get integer part
3979 shl ecx, 16 // get fractional part
3980 imul edx, Tmap.src_offset // calc texture scanline address
3981 add esi, edx // calc texture offset
3982 add esi, Tmap.pixptr // calc address
3989 // mov edx, Tmap.DeltaUFrac
3991 cmp Tmap.WidthModLength, 1
3996 mov ebx, Tmap.fx_l_right
4002 mov eax, Tmap.fx_dl_dx
4005 mov edx, Tmap.DeltaUFrac
4007 mov Tmap.DeltaUFrac, edx
4016 sub eax, Tmap.pScreenBits
4021 inc Tmap.WidthModLength
4022 mov eax,Tmap.WidthModLength
4026 mov Tmap.WidthModLength, eax
4030 mov al,[edi] // preread the destination cache line
4037 mov eax, ebx // Get lighting value from BH into AH
4038 and eax, 0ffffh; // Clear upper bits of EAX
4040 cmp ebp, [edx+0] // Compare the Z depth of this pixel with zbuffer
4041 mov al, [esi] // Get texel into AL
4042 jle Skip0a // If pixel is covered, skip drawing
4045 mov [edx+0], ebp // Write new Z value
4047 mov al, gr_fade_table[eax] // Lookup pixel in lighting table
4048 mov [edi+0],al // Write new pixel
4050 Skip0a: add ecx,Tmap.DeltaVFrac
4053 //add edx, 4 // Go to next
4054 add ebp,Tmap.fx_dwdx
4056 add ebx,Tmap.DeltaUFrac
4058 adc esi,Tmap.uv_delta[4*eax+4]
4062 mov eax, ebx // Get lighting value from BH into AH
4063 and eax, 0ffffh; // Clear upper bits of EAX
4066 cmp ebp, [edx+4] // Compare the Z depth of this pixel with zbuffer
4067 mov al, [esi] // Get texel into AL
4068 jle Skip1a // If pixel is covered, skip drawing
4070 mov [edx+4], ebp // Write new Z value
4072 mov al, gr_fade_table[eax] // Lookup pixel in lighting table
4073 mov [edi+1],al // Write new pixel
4075 Skip1a: add ecx,Tmap.DeltaVFrac
4078 //add edx, 4 // Go to next
4079 add ebp,Tmap.fx_dwdx
4081 add ebx,Tmap.DeltaUFrac
4083 adc esi,Tmap.uv_delta[4*eax+4]
4088 dec Tmap.WidthModLength
4096 mov eax, ebx // Get lighting value from BH into AH
4097 and eax, 0ffffh; // Clear upper bits of EAX
4099 cmp ebp, [edx+0] // Compare the Z depth of this pixel with zbuffer
4100 mov al, [esi] // Get texel into AL
4101 jle Skip0c // If pixel is covered, skip drawing
4103 mov al, gr_fade_table[eax] // Lookup pixel in lighting table
4105 mov [edx+0], ebp // Write new Z value
4107 mov [edi+0],al // Write new pixel
4113 // busy FPU registers: // st0 st1 st2 st3 st4 st5 st6 st7
4114 // xxx xxx xxx xxx xxx xxx xxx
4123 fldcw Tmap.OldFPUCW // restore the FPU
4136 void tmapscan_pln8_zbuffered()
4139 // Pentium Pro optimized code.
4140 tmapscan_pln8_zbuffered_ppro();
4142 tmapscan_pln8_zbuffered_pentium();
4146 void tmapscan_lnaa8_zbuffered()
4148 #ifndef HARDWARE_ONLY
4149 Tmap.lookup = (uint)&Current_alphacolor->table.lookup[0][0];
4163 ; setup delta values
4164 mov eax, Tmap.fx_dv_dx // get v 16.16 step
4165 mov ebx, eax // copy it
4166 sar eax, 16 // get v int step
4167 shl ebx, 16 // get v frac step
4168 mov Tmap.DeltaVFrac, ebx // store it
4169 imul eax, Tmap.src_offset // calc texture step for v int step
4171 mov ebx, Tmap.fx_du_dx // get u 16.16 step
4172 mov ecx, ebx // copy it
4173 sar ebx, 16 // get the u int step
4174 shl ecx, 16 // get the u frac step
4175 mov Tmap.DeltaUFrac, ecx // store it
4176 add eax, ebx // calc uint + vint step
4178 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
4179 add eax, Tmap.src_offset // calc whole step + v carry
4180 mov Tmap.uv_delta[0], eax // save in v-carry slot
4182 ; setup initial coordinates
4183 mov esi, Tmap.fx_u // get u 16.16
4184 mov ebx, esi // copy it
4185 sar esi, 16 // get integer part
4186 shl ebx, 16 // get fractional part
4188 mov ecx, Tmap.fx_v // get v 16.16
4189 mov edx, ecx // copy it
4190 sar edx, 16 // get integer part
4191 shl ecx, 16 // get fractional part
4192 imul edx, Tmap.src_offset // calc texture scanline address
4193 add esi, edx // calc texture offset
4194 add esi, Tmap.pixptr // calc address
4196 ; set edi = address of first pixel to modify
4197 mov edi, Tmap.dest_row_data
4200 mov al,[edi] // get the destination pixel
4205 sub eax, Tmap.pScreenBits
4209 mov eax, Tmap.loop_count
4214 mov Tmap.num_big_steps, eax
4215 and Tmap.loop_count, 3
4219 // 8 pixel span code
4220 // edi = dest dib bits at current pixel
4221 // esi = texture pointer at current u,v
4223 // ebx = u fraction 0.32
4224 // ecx = v fraction 0.32
4225 // edx = u frac step
4226 // ebp = v carry scratch
4228 cmp ebp, [edx+4*0] // Compare the Z depth of this pixel with zbuffer
4229 jle Skip0a // If pixel is covered, skip drawing
4230 // mov [edx+4*0], ebp // Write new Z value
4231 mov al,[edi+0] // get the destination pixel
4232 mov ah,[esi] // get texture pixel 0
4234 add eax, Tmap.lookup
4235 mov al, [eax] // blend them
4236 mov [edi+0],al // store pixel
4238 add ebp, Tmap.fx_dwdx
4239 add ecx,Tmap.DeltaVFrac // increment v fraction
4240 sbb eax,eax // get -1 if carry
4241 add ebx,Tmap.DeltaUFrac // increment u fraction
4242 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
4245 cmp ebp, [edx+4*1] // Compare the Z depth of this pixel with zbuffer
4246 jle Skip1a // If pixel is covered, skip drawing
4247 // mov [edx+4*1], ebp // Write new Z value
4248 mov al,[edi+1] // get the destination pixel
4249 mov ah,[esi] // get texture pixel 0
4251 add eax, Tmap.lookup
4252 mov al, [eax] // blend them
4253 mov [edi+1],al // store pixel
4255 add ebp, Tmap.fx_dwdx
4256 add ecx,Tmap.DeltaVFrac // increment v fraction
4257 sbb eax,eax // get -1 if carry
4258 add ebx,Tmap.DeltaUFrac // increment u fraction
4259 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
4261 cmp ebp, [edx+4*2] // Compare the Z depth of this pixel with zbuffer
4262 jle Skip2a // If pixel is covered, skip drawing
4263 // mov [edx+4*2], ebp // Write new Z value
4264 mov al,[edi+2] // get the destination pixel
4265 mov ah,[esi] // get texture pixel 0
4267 add eax, Tmap.lookup
4268 mov al, [eax] // blend them
4269 mov [edi+2],al // store pixel
4271 add ebp, Tmap.fx_dwdx
4272 add ecx,Tmap.DeltaVFrac // increment v fraction
4273 sbb eax,eax // get -1 if carry
4274 add ebx,Tmap.DeltaUFrac // increment u fraction
4275 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
4277 cmp ebp, [edx+4*3] // Compare the Z depth of this pixel with zbuffer
4278 jle Skip3a // If pixel is covered, skip drawing
4279 // mov [edx+4*3], ebp // Write new Z value
4280 mov al,[edi+3] // get the destination pixel
4281 mov ah,[esi] // get texture pixel 0
4283 add eax, Tmap.lookup
4284 mov al, [eax] // blend them
4285 mov [edi+3],al // store pixel
4287 add ebp, Tmap.fx_dwdx
4288 add ecx,Tmap.DeltaVFrac // increment v fraction
4289 sbb eax,eax // get -1 if carry
4290 add ebx,Tmap.DeltaUFrac // increment u fraction
4291 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
4295 dec Tmap.num_big_steps
4301 mov eax,Tmap.loop_count
4306 mov Tmap.loop_count, eax
4310 mov al,[edi] // get the destination pixel
4314 cmp ebp, [edx+4*0] // Compare the Z depth of this pixel with zbuffer
4315 jle Skip0b // If pixel is covered, skip drawing
4316 // mov [edx+4*0], ebp // Write new Z value
4317 mov al,[edi+0] // get the destination pixel
4318 mov ah,[esi] // get texture pixel 0
4320 add eax, Tmap.lookup
4321 mov al, [eax] // blend them
4322 mov [edi+0],al // store pixel
4324 add ebp, Tmap.fx_dwdx
4325 add ecx,Tmap.DeltaVFrac // increment v fraction
4326 sbb eax,eax // get -1 if carry
4327 add ebx,Tmap.DeltaUFrac // increment u fraction
4328 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
4330 cmp ebp, [edx+4*1] // Compare the Z depth of this pixel with zbuffer
4331 jle Skip1b // If pixel is covered, skip drawing
4332 // mov [edx+4*1], ebp // Write new Z value
4333 mov al,[edi+1] // get the destination pixel
4334 mov ah,[esi] // get texture pixel 0
4336 add eax, Tmap.lookup
4337 mov al, [eax] // blend them
4338 mov [edi+1],al // store pixel
4340 add ebp, Tmap.fx_dwdx
4341 add ecx,Tmap.DeltaVFrac // increment v fraction
4342 sbb eax,eax // get -1 if carry
4343 add ebx,Tmap.DeltaUFrac // increment u fraction
4344 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
4355 cmp ebp, [edx] // Compare the Z depth of this pixel with zbuffer
4356 jle Skip0c // If pixel is covered, skip drawing
4357 // mov [edx], ebp // Write new Z value
4358 mov al,[edi] // get the destination pixel
4359 mov ah,[esi] // get texture pixel 0
4361 add eax, Tmap.lookup
4362 mov al, [eax] // blend them
4363 mov [edi],al // store pixel
4381 void tmapscan_lnaa8()
4383 #ifndef HARDWARE_ONLY
4384 if (gr_zbuffering) {
4385 switch(gr_zbuffering_mode) {
4388 case GR_ZBUFF_FULL: // both
4389 case GR_ZBUFF_WRITE: // write only
4390 case GR_ZBUFF_READ: // read only
4391 tmapscan_lnaa8_zbuffered();
4397 Tmap.lookup = (uint)&Current_alphacolor->table.lookup[0][0];
4411 ; setup delta values
4412 mov eax, Tmap.fx_dv_dx // get v 16.16 step
4413 mov ebx, eax // copy it
4414 sar eax, 16 // get v int step
4415 shl ebx, 16 // get v frac step
4416 mov Tmap.DeltaVFrac, ebx // store it
4417 imul eax, Tmap.src_offset // calc texture step for v int step
4419 mov ebx, Tmap.fx_du_dx // get u 16.16 step
4420 mov ecx, ebx // copy it
4421 sar ebx, 16 // get the u int step
4422 shl ecx, 16 // get the u frac step
4423 mov Tmap.DeltaUFrac, ecx // store it
4424 add eax, ebx // calc uint + vint step
4426 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
4427 add eax, Tmap.src_offset // calc whole step + v carry
4428 mov Tmap.uv_delta[0], eax // save in v-carry slot
4430 ; setup initial coordinates
4431 mov esi, Tmap.fx_u // get u 16.16
4432 mov ebx, esi // copy it
4433 sar esi, 16 // get integer part
4434 shl ebx, 16 // get fractional part
4436 mov ecx, Tmap.fx_v // get v 16.16
4437 mov edx, ecx // copy it
4438 sar edx, 16 // get integer part
4439 shl ecx, 16 // get fractional part
4440 imul edx, Tmap.src_offset // calc texture scanline address
4441 add esi, edx // calc texture offset
4442 add esi, Tmap.pixptr // calc address
4444 ; set edi = address of first pixel to modify
4445 mov edi, Tmap.dest_row_data
4447 mov edx, Tmap.DeltaUFrac
4450 mov al,[edi] // get the destination pixel
4452 mov ebp, Tmap.loop_count
4457 mov Tmap.num_big_steps, ebp
4458 and Tmap.loop_count, 3
4463 // 8 pixel span code
4464 // edi = dest dib bits at current pixel
4465 // esi = texture pointer at current u,v
4467 // ebx = u fraction 0.32
4468 // ecx = v fraction 0.32
4469 // edx = u frac step
4470 // ebp = v carry scratch
4473 mov al,[edi+0] // get the destination pixel
4474 mov ah,[esi] // get texture pixel 0
4475 add ecx,Tmap.DeltaVFrac // increment v fraction
4476 sbb ebp,ebp // get -1 if carry
4477 add ebx,edx // increment u fraction
4478 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
4479 add eax, Tmap.lookup
4480 mov al, [eax] // blend them
4481 mov [edi+0],al // store pixel
4483 mov al,[edi+1] // get the destination pixel
4484 mov ah,[esi] // get texture pixel 0
4485 add ecx,Tmap.DeltaVFrac // increment v fraction
4486 sbb ebp,ebp // get -1 if carry
4487 add ebx,edx // increment u fraction
4488 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
4489 add eax, Tmap.lookup
4490 mov al, [eax] // blend them
4491 mov [edi+1],al // store pixel
4493 mov al,[edi+2] // get the destination pixel
4494 mov ah,[esi] // get texture pixel 0
4495 add ecx,Tmap.DeltaVFrac // increment v fraction
4496 sbb ebp,ebp // get -1 if carry
4497 add ebx,edx // increment u fraction
4498 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
4499 add eax, Tmap.lookup
4500 mov al, [eax] // blend them
4501 mov [edi+2],al // store pixel
4503 mov al,[edi+3] // get the destination pixel
4504 mov ah,[esi] // get texture pixel 0
4505 add ecx,Tmap.DeltaVFrac // increment v fraction
4506 sbb ebp,ebp // get -1 if carry
4507 add ebx,edx // increment u fraction
4508 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
4509 add eax, Tmap.lookup
4510 mov al, [eax] // blend them
4511 mov [edi+3],al // store pixel
4514 dec Tmap.num_big_steps
4520 mov ebp,Tmap.loop_count
4525 mov Tmap.loop_count, ebp
4529 mov al,[edi] // get the destination pixel
4533 mov al,[edi+0] // get the destination pixel
4534 mov ah,[esi] // get texture pixel 0
4535 add ecx,Tmap.DeltaVFrac // increment v fraction
4536 sbb ebp,ebp // get -1 if carry
4537 add ebx,edx // increment u fraction
4538 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
4539 add eax, Tmap.lookup
4540 mov al, [eax] // blend them
4541 mov [edi+0],al // store pixel
4543 mov al,[edi+1] // get the destination pixel
4544 mov ah,[esi] // get texture pixel 0
4545 add ecx,Tmap.DeltaVFrac // increment v fraction
4546 sbb ebp,ebp // get -1 if carry
4547 add ebx,edx // increment u fraction
4548 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
4549 add eax, Tmap.lookup
4550 mov al, [eax] // blend them
4551 mov [edi+1],al // store pixel
4561 mov al,[edi] // get the destination pixel
4562 mov ah,[esi] // get texture pixel 0
4563 add eax, Tmap.lookup
4564 mov al, [eax] // blend them
4565 mov [edi],al // store pixel