2 * $Logfile: /Freespace2/code/Graphics/TmapScanline.cpp $
7 * Routines to draw one textured mapped scanline.
10 * Revision 1.1 2002/05/03 03:28:09 root
14 * 5 12/02/98 5:47p Dave
15 * Put in interface xstr code. Converted barracks screen to new format.
17 * 4 11/30/98 5:31p Dave
18 * Fixed up Fred support for software mode.
20 * 3 11/30/98 1:07p Dave
21 * 16 bit conversion, first run.
23 * 2 10/07/98 10:53a Dave
26 * 1 10/07/98 10:49a Dave
28 * 21 4/20/98 4:44p John
29 * Fixed problems with black being xparent on model cache rneders. Made
30 * model cache key off of detail level setting and framerate.
32 * 20 4/09/98 7:58p John
33 * Cleaned up tmapper code a bit. Put NDEBUG around some ndebug stuff.
34 * Took out XPARENT flag settings in all the alpha-blended texture stuff.
36 * 19 3/22/98 2:33p John
37 * Took out fx_v/v_right. Made fx_u etc get calculated in tmapper.
39 * 18 3/10/98 4:19p John
40 * Cleaned up graphics lib. Took out most unused gr functions. Made D3D
41 * & Glide have popups and print screen. Took out all >8bpp software
42 * support. Made Fred zbuffer. Made zbuffer allocate dynamically to
43 * support Fred. Made zbuffering key off of functions rather than one
46 * 17 12/10/96 10:37a John
47 * Restructured texture mapper to remove some overhead from each scanline
48 * setup. This gave about a 30% improvement drawing trans01.pof, which is
49 * a really complex model. In the process, I cleaned up the scanline
50 * functions and separated them into different modules for each pixel
53 * 16 12/02/96 4:03p John
54 * made texture divide pipeline better. 2.5% speedup.
56 * 15 11/26/96 6:50p John
57 * Added some more hicolor primitives. Made windowed mode run as current
58 * bpp, if bpp is 8,16,or 32.
60 * 14 11/18/96 9:58a John
63 * 13 11/07/96 6:19p John
64 * Added a bunch of 16bpp primitives so the game sort of runs in 16bpp
67 * 12 11/07/96 3:49p John
68 * Fixed some old 'c' inner loop code for testing.
70 * 11 11/07/96 2:17p John
71 * Took out the OldTmapper stuff.
73 * 10 11/05/96 4:05p John
74 * Added roller. Added code to draw a distant planet. Made bm_load
75 * return -1 if invalid bitmap.
77 * 9 10/31/96 7:20p John
78 * Added per,tiled tmapper. Made models tile if they use 64x64 textures.
80 * 8 10/26/96 1:40p John
81 * Added some now primitives to the 2d library and
82 * cleaned up some old ones.
89 #include "grinternal.h"
91 #include "tmapscanline.h"
97 // Needed to keep warning 4725 to stay away. See PsTypes.h for details why.
98 void disable_warning_4725_stub_ts32()
103 extern void tmapscan_pln8_tiled_256x256();
104 extern void tmapscan_pln8_tiled_128x128();
105 extern void tmapscan_pln8_tiled_64x64();
106 extern void tmapscan_pln8_tiled_32x32();
107 extern void tmapscan_pln8_tiled_16x16();
110 void tmapscan_pln8_tiled()
112 if ( (Tmap.bp->w == 256) && (Tmap.bp->h == 256) ) {
113 tmapscan_pln8_tiled_256x256();
114 } else if ( (Tmap.bp->w == 128) && (Tmap.bp->h == 128) ) {
115 tmapscan_pln8_tiled_128x128();
116 } else if ( (Tmap.bp->w == 64) && (Tmap.bp->h == 64) ) {
117 tmapscan_pln8_tiled_64x64();
118 } else if ( (Tmap.bp->w == 32) && (Tmap.bp->h == 32) ) {
119 tmapscan_pln8_tiled_32x32();
120 } else if ( (Tmap.bp->w == 16) && (Tmap.bp->h == 16) ) {
121 tmapscan_pln8_tiled_16x16();
123 // argh! write another texure mapper!
129 void tmapscan_write_z()
135 dptr = (ubyte *)Tmap.dest_row_data;
140 uint *zbuf = (uint *)&gr_zbuffer[(uint)dptr-(uint)Tmap.pScreenBits];
142 for (i=0; i<Tmap.loop_count; i++ ) {
150 void tmapscan_flat_gouraud_zbuffered()
157 dptr = (ubyte *)Tmap.dest_row_data;
158 c = gr_screen.current_color.raw8;
166 uint *zbuf = (uint *)&gr_zbuffer[(uint)dptr-(uint)Tmap.pScreenBits];
168 for (i=0; i<Tmap.loop_count; i++ ) {
171 *dptr = gr_fade_table[(f2i(l)<<8)+c];
180 // ADAM: Change Nebula colors here:
181 #define NEBULA_COLORS 20
183 void tmapscan_nebula8()
188 dptr = (ubyte *)Tmap.dest_row_data;
190 float max_neb_color = i2fl(NEBULA_COLORS-1);
192 l1 = (int)(Tmap.l.b*max_neb_color*256.0f);
193 l2 = l1 + 256/2; // dithering
194 dldx = (int)(Tmap.deltas.b*max_neb_color*2.0f*256.0f);
196 #ifdef USE_INLINE_ASM
197 // memset( dptr, 31, Tmap.loop_count );
217 _asm mov ecx, Tmap.loop_count
248 if ( Tmap.loop_count > 1 ) {
249 for (i=0; i<Tmap.loop_count/2; i++ ) {
250 dptr[0] = (ubyte)((l1&0xFF00)>>8);
252 dptr[1] = (ubyte)((l2&0xFF00)>>8);
257 if ( Tmap.loop_count & 1 ) {
258 dptr[0] = (ubyte)((l1&0xFF00)>>8);
265 void tmapscan_flat_gouraud()
268 switch(gr_zbuffering_mode) {
271 case GR_ZBUFF_FULL: // both
272 tmapscan_flat_gouraud_zbuffered();
274 case GR_ZBUFF_WRITE: // write only
275 tmapscan_flat_gouraud_zbuffered();
277 case GR_ZBUFF_READ: // read only
278 tmapscan_flat_gouraud_zbuffered();
284 if ( Current_alphacolor ) {
285 ubyte *lookup = &Current_alphacolor->table.lookup[0][0];
291 dptr = (ubyte *)Tmap.dest_row_data;
296 for (i=0; i<Tmap.loop_count; i++ ) {
297 *dptr = lookup[f2i(l*16)*256+*dptr];
308 dptr = (ubyte *)Tmap.dest_row_data;
309 c = gr_screen.current_color.raw8;
314 for (i=0; i<Tmap.loop_count; i++ ) {
315 *dptr = gr_fade_table[f2i(l*32)*256+c];
322 void tmapscan_flat8_zbuffered()
327 dptr = (ubyte *)Tmap.dest_row_data;
328 c = gr_screen.current_color.raw8;
331 for (i=0; i<Tmap.loop_count; i++ ) {
332 int tmp = (uint)dptr-Tmap.pScreenBits;
333 if ( Tmap.fx_w > (int)gr_zbuffer[tmp] ) {
334 gr_zbuffer[tmp] = Tmap.fx_w;
337 Tmap.fx_w += Tmap.fx_dwdx;
342 void tmapscan_flat8()
345 switch(gr_zbuffering_mode) {
348 case GR_ZBUFF_FULL: // both
349 tmapscan_flat8_zbuffered();
351 case GR_ZBUFF_WRITE: // write only
354 case GR_ZBUFF_READ: // read only
355 tmapscan_flat8_zbuffered();
360 memset( (ubyte *)Tmap.dest_row_data, gr_screen.current_color.raw8, Tmap.loop_count );
363 void tmapscan_pln8_zbuffered();
366 void tmapscan_pln8_ppro()
381 // Put the FPU in low precision mode
382 fstcw Tmap.OldFPUCW // store copy of CW
383 mov ax,Tmap.OldFPUCW // get it in ax
385 mov Tmap.FPUCW,ax // store it
386 fldcw Tmap.FPUCW // load the FPU
388 mov ecx, Tmap.loop_count // ecx = width
389 mov edi, Tmap.dest_row_data // edi = dest pointer
391 // edi = pointer to start pixel in dest dib
394 mov eax,ecx // eax and ecx = width
395 shr ecx,5 // ecx = width / subdivision length
396 and eax,31 // eax = width mod subdivision length
397 jnz some_left_over // any leftover?
398 dec ecx // no, so special case last span
399 mov eax,32 // it's 8 pixels long
401 mov Tmap.Subdivisions,ecx // store widths
402 mov Tmap.WidthModLength,eax
404 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
405 // st0 st1 st2 st3 st4 st5 st6 st7
407 fld Tmap.l.u // U/ZL V/ZL
408 fld Tmap.l.sw // 1/ZL U/ZL V/ZL
409 fld1 // 1 1/ZL U/ZL V/ZL
410 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
411 fld st // ZL ZL 1/ZL U/ZL V/ZL
412 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
413 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
414 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
416 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
417 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
419 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
421 fadd Tmap.fl_dwdx_wide // 1/ZR U/ZL V/ZL UL VL
422 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
423 fadd Tmap.fl_dudx_wide // U/ZR 1/ZR V/ZL UL VL
424 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
425 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZR U/ZR UL VL
427 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
429 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
430 // @todo overlap this guy
431 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
432 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
433 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
434 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
435 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
437 cmp ecx,0 // check for any full spans
438 jle HandleLeftoverPixels
442 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
443 // UR VR V/ZR 1/ZR U/ZR UL VL
445 // convert left side coords
447 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
448 fmul Tmap.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
449 fistp Tmap.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
451 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
452 fmul Tmap.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
453 fistp Tmap.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
455 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
457 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
458 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
459 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
460 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
462 fmul Tmap.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
463 fistp Tmap.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
465 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
466 fmul Tmap.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
467 fistp Tmap.DeltaU ; V/ZR 1/ZR U/ZR UR VR
469 // increment terms for next span // st0 st1 st2 st3 st4 st5 st6 st7
470 // Right terms become Left terms--->// V/ZL 1/ZL U/ZL UL VL
472 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZL U/ZL UL VL
473 fxch st(1) // 1/ZL V/ZR U/ZL UL VL
474 fadd Tmap.fl_dwdx_wide // 1/ZR V/ZR U/ZL UL VL
475 fxch st(2) // U/ZL V/ZR 1/ZR UL VL
476 fadd Tmap.fl_dudx_wide // U/ZR V/ZR 1/ZR UL VL
477 fxch st(2) // 1/ZR V/ZR U/ZR UL VL
478 fxch st(1) // V/ZR 1/ZR U/ZR UL VL
481 // setup delta values
483 mov eax,Tmap.DeltaV // get v 16.16 step
484 mov ebx,eax // copy it
485 sar eax,16 // get v int step
486 shl ebx,16 // get v frac step
487 mov Tmap.DeltaVFrac,ebx // store it
488 imul eax,Tmap.src_offset // calculate texture step for v int step
490 mov ebx,Tmap.DeltaU // get u 16.16 step
491 mov ecx,ebx // copy it
492 sar ebx,16 // get u int step
493 shl ecx,16 // get u frac step
494 mov Tmap.DeltaUFrac,ecx // store it
495 add eax,ebx // calculate uint + vint step
496 mov Tmap.uv_delta[4],eax // save whole step in non-v-carry slot
497 add eax,Tmap.src_offset // calculate whole step + v carry
498 mov Tmap.uv_delta[0],eax // save in v-carry slot
500 // setup initial coordinates
501 mov esi,Tmap.UFixed // get u 16.16 fixedpoint coordinate
503 mov ebx,esi // copy it
504 sar esi,16 // get integer part
505 shl ebx,16 // get fractional part
507 mov ecx,Tmap.VFixed // get v 16.16 fixedpoint coordinate
509 mov edx,ecx // copy it
510 sar edx,16 // get integer part
511 shl ecx,16 // get fractional part
512 imul edx,Tmap.src_offset // calc texture scanline address
513 add esi,edx // calc texture offset
514 add esi,Tmap.pixptr // calc address
516 // set up affine registers
517 mov edx,Tmap.DeltaUFrac // get register copy
523 mov ebp, Tmap.fx_dl_dx
534 // calculate right side coords st0 st1 st2 st3 st4 st5 st6 st7
535 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
536 // This divide should happen while the pixel span is drawn.
537 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
541 // edi = dest dib bits at current pixel
542 // esi = texture pointer at current u,v
544 // ebx = u fraction 0.32
545 // ecx = v fraction 0.32
547 // ebp = v carry scratch
550 mov al,[edi] // preread the destination cache line
552 mov al,[esi] // get texture pixel 0
554 mov Tmap.InnerLooper, 32/4 // Set up loop counter
560 mov eax, DWORD PTR gr_fade_table[eax+ebx] // Get shaded pixel
562 add ecx,Tmap.DeltaVFrac // increment v fraction
563 sbb ebp,ebp // get -1 if carry
564 mov [edi+0],al // store pixel
566 mov al,[esi] // get texture pixel
567 add ebx,edx // increment u fraction
568 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
572 mov eax, DWORD PTR gr_fade_table[eax+ebx] // Get shaded pixel
574 add ecx,Tmap.DeltaVFrac // increment v fraction
575 sbb ebp,ebp // get -1 if carry
576 mov [edi+1],al // store pixel
578 mov al,[esi] // get texture pixel
579 add ebx,edx // increment u fraction
580 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
584 mov eax, DWORD PTR gr_fade_table[eax+ebx] // Get shaded pixel
586 add ecx,Tmap.DeltaVFrac // increment v fraction
587 sbb ebp,ebp // get -1 if carry
588 mov [edi+2],al // store pixel
590 mov al,[esi] // get texture pixel
591 add ebx,edx // increment u fraction
592 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
596 mov eax, DWORD PTR gr_fade_table[eax+ebx] // Get shaded pixel
598 add ecx,Tmap.DeltaVFrac // increment v fraction
599 sbb ebp,ebp // get -1 if carry
600 mov [edi+3],al // store pixel
602 mov al,[esi] // get texture pixel
603 add ebx,edx // increment u fraction
604 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
610 // the fdiv is done, finish right // st0 st1 st2 st3 st4 st5 st6 st7
611 // ZR V/ZR 1/ZR U/ZR UL VL
613 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
614 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
615 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
616 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
618 dec Tmap.Subdivisions // decrement span count
619 jnz SpanLoop // loop back
622 HandleLeftoverPixels:
624 mov esi,Tmap.pixptr // load texture pointer
626 // edi = dest dib bits
627 // esi = current texture dib bits
628 // at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
629 // inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
631 cmp Tmap.WidthModLength,0 ; are there remaining pixels to draw?
632 jz FPUReturn ; nope, pop the FPU and bail
634 // convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
636 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
637 fmul Tmap.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
638 fistp Tmap.UFixed ; inv. inv. inv. inv. inv. UL VL
640 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
641 fmul Tmap.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
642 fistp Tmap.VFixed ; inv. inv. inv. inv. inv. UL VL
644 dec Tmap.WidthModLength ; calc how many steps to take
645 jz OnePixelSpan ; just one, do not do deltas
647 // calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
650 // @todo rearrange things so we don't need these two instructions
651 fstp Tmap.FloatTemp ; inv. inv. inv. inv. UL VL
652 fstp Tmap.FloatTemp ; inv. inv. inv. UL VL
654 fld Tmap.r.v ; V/Zr inv. inv. inv. UL VL
655 fsub Tmap.deltas.v ; V/ZR inv. inv. inv. UL VL
656 fld Tmap.r.u ; U/Zr V/ZR inv. inv. inv. UL VL
657 fsub Tmap.deltas.u ; U/ZR V/ZR inv. inv. inv. UL VL
658 fld Tmap.r.sw ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
659 fsub Tmap.deltas.sw ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
661 fdivr Tmap.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
663 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
664 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
666 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
668 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
669 fxch st(1) ; VR UR inv. inv. inv. dU VL
670 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
671 fxch st(6) ; dV UR inv. inv. inv. dU VR
673 fidiv Tmap.WidthModLength ; dv UR inv. inv. inv. dU VR
674 fmul Tmap.FixedScale ; dv16 UR inv. inv. inv. dU VR
675 fistp Tmap.DeltaV ; UR inv. inv. inv. dU VR
677 fxch st(4) ; dU inv. inv. inv. UR VR
678 fidiv Tmap.WidthModLength ; du inv. inv. inv. UR VR
679 fmul Tmap.FixedScale ; du16 inv. inv. inv. UR VR
680 fistp Tmap.DeltaU ; inv. inv. inv. UR VR
682 // @todo gross! these are to line up with the other loop
683 fld st(1) ; inv. inv. inv. inv. UR VR
684 fld st(2) ; inv. inv. inv. inv. inv. UR VR
687 // setup delta values
688 mov eax, Tmap.DeltaV // get v 16.16 step
689 mov ebx, eax // copy it
690 sar eax, 16 // get v int step
691 shl ebx, 16 // get v frac step
692 mov Tmap.DeltaVFrac, ebx // store it
693 imul eax, Tmap.src_offset // calc texture step for v int step
695 mov ebx, Tmap.DeltaU // get u 16.16 step
696 mov ecx, ebx // copy it
697 sar ebx, 16 // get the u int step
698 shl ecx, 16 // get the u frac step
699 mov Tmap.DeltaUFrac, ecx // store it
700 add eax, ebx // calc uint + vint step
701 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
702 add eax, Tmap.src_offset // calc whole step + v carry
703 mov Tmap.uv_delta[0], eax // save in v-carry slot
708 ; setup initial coordinates
709 mov esi, Tmap.UFixed // get u 16.16
710 mov ebx, esi // copy it
711 sar esi, 16 // get integer part
712 shl ebx, 16 // get fractional part
714 mov ecx, Tmap.VFixed // get v 16.16
715 mov edx, ecx // copy it
716 sar edx, 16 // get integer part
717 shl ecx, 16 // get fractional part
718 imul edx, Tmap.src_offset // calc texture scanline address
719 add esi, edx // calc texture offset
720 add esi, Tmap.pixptr // calc address
727 mov edx, Tmap.DeltaUFrac
729 cmp Tmap.WidthModLength, 1
734 mov ebx, Tmap.fx_l_right
740 mov eax, Tmap.fx_dl_dx
749 inc Tmap.WidthModLength
750 mov eax,Tmap.WidthModLength
754 mov Tmap.WidthModLength, eax
758 mov al,[edi] // preread the destination cache line
762 mov ah, bh // move lighting value into place
763 mov al, gr_fade_table[eax] // Get shaded pixel
764 add ecx,Tmap.DeltaVFrac // increment v fraction
765 sbb ebp,ebp // get -1 if carry
766 mov [edi+0],al // store pixel
767 mov al,[esi] // get texture pixel
768 add ebx,edx // increment u fraction
769 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
771 mov ah, bh // move lighting value into place
772 mov al, gr_fade_table[eax] // Get shaded pixel
773 add ecx,Tmap.DeltaVFrac // increment v fraction
774 sbb ebp,ebp // get -1 if carry
775 mov [edi+1],al // store pixel
776 mov al,[esi] // get texture pixel
777 add ebx,edx // increment u fraction
778 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
781 dec Tmap.WidthModLength
789 mov al,[esi] // get texture pixel 2
791 mov al, gr_fade_table[eax]
792 mov [edi],al // store pixel 2
797 // busy FPU registers: // st0 st1 st2 st3 st4 st5 st6 st7
798 // xxx xxx xxx xxx xxx xxx xxx
807 fldcw Tmap.OldFPUCW // restore the FPU
821 void tmapscan_pln8_pentium()
836 // Put the FPU in low precision mode
837 fstcw Tmap.OldFPUCW // store copy of CW
838 mov ax,Tmap.OldFPUCW // get it in ax
840 mov Tmap.FPUCW,ax // store it
841 fldcw Tmap.FPUCW // load the FPU
843 mov ecx, Tmap.loop_count // ecx = width
844 mov edi, Tmap.dest_row_data // edi = dest pointer
846 // edi = pointer to start pixel in dest dib
849 mov eax,ecx // eax and ecx = width
850 shr ecx,5 // ecx = width / subdivision length
851 and eax,31 // eax = width mod subdivision length
852 jnz some_left_over // any leftover?
853 dec ecx // no, so special case last span
854 mov eax,32 // it's 8 pixels long
856 mov Tmap.Subdivisions,ecx // store widths
857 mov Tmap.WidthModLength,eax
859 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
860 // st0 st1 st2 st3 st4 st5 st6 st7
862 fld Tmap.l.u // U/ZL V/ZL
863 fld Tmap.l.sw // 1/ZL U/ZL V/ZL
864 fld1 // 1 1/ZL U/ZL V/ZL
865 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
866 fld st // ZL ZL 1/ZL U/ZL V/ZL
867 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
868 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
869 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
871 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
872 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
874 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
876 fadd Tmap.fl_dwdx_wide // 1/ZR U/ZL V/ZL UL VL
877 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
878 fadd Tmap.fl_dudx_wide // U/ZR 1/ZR V/ZL UL VL
879 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
880 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZR U/ZR UL VL
882 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
884 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
885 // @todo overlap this guy
886 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
887 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
888 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
889 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
890 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
892 cmp ecx,0 // check for any full spans
893 jle HandleLeftoverPixels
897 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
898 // UR VR V/ZR 1/ZR U/ZR UL VL
900 // convert left side coords
902 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
903 fmul Tmap.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
904 fistp Tmap.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
906 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
907 fmul Tmap.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
908 fistp Tmap.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
910 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
912 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
913 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
914 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
915 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
917 fmul Tmap.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
918 fistp Tmap.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
920 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
921 fmul Tmap.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
922 fistp Tmap.DeltaU ; V/ZR 1/ZR U/ZR UR VR
924 // increment terms for next span // st0 st1 st2 st3 st4 st5 st6 st7
925 // Right terms become Left terms--->// V/ZL 1/ZL U/ZL UL VL
927 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZL U/ZL UL VL
928 fxch st(1) // 1/ZL V/ZR U/ZL UL VL
929 fadd Tmap.fl_dwdx_wide // 1/ZR V/ZR U/ZL UL VL
930 fxch st(2) // U/ZL V/ZR 1/ZR UL VL
931 fadd Tmap.fl_dudx_wide // U/ZR V/ZR 1/ZR UL VL
932 fxch st(2) // 1/ZR V/ZR U/ZR UL VL
933 fxch st(1) // V/ZR 1/ZR U/ZR UL VL
936 // setup delta values
938 mov eax,Tmap.DeltaV // get v 16.16 step
939 mov ebx,eax // copy it
940 sar eax,16 // get v int step
941 shl ebx,16 // get v frac step
942 mov Tmap.DeltaVFrac,ebx // store it
943 imul eax,Tmap.src_offset // calculate texture step for v int step
945 mov ebx,Tmap.DeltaU // get u 16.16 step
946 mov ecx,ebx // copy it
947 sar ebx,16 // get u int step
948 shl ecx,16 // get u frac step
949 mov Tmap.DeltaUFrac,ecx // store it
950 add eax,ebx // calculate uint + vint step
951 mov Tmap.uv_delta[4],eax // save whole step in non-v-carry slot
952 add eax,Tmap.src_offset // calculate whole step + v carry
953 mov Tmap.uv_delta[0],eax // save in v-carry slot
955 // setup initial coordinates
956 mov esi,Tmap.UFixed // get u 16.16 fixedpoint coordinate
958 mov ebx,esi // copy it
959 sar esi,16 // get integer part
960 shl ebx,16 // get fractional part
962 mov ecx,Tmap.VFixed // get v 16.16 fixedpoint coordinate
964 mov edx,ecx // copy it
965 sar edx,16 // get integer part
966 shl ecx,16 // get fractional part
967 imul edx,Tmap.src_offset // calc texture scanline address
968 add esi,edx // calc texture offset
969 add esi,Tmap.pixptr // calc address
971 // set up affine registers
972 mov edx,Tmap.DeltaUFrac // get register copy
978 mov ebp, Tmap.fx_dl_dx
989 // calculate right side coords st0 st1 st2 st3 st4 st5 st6 st7
990 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
991 // This divide should happen while the pixel span is drawn.
992 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
996 // edi = dest dib bits at current pixel
997 // esi = texture pointer at current u,v
999 // ebx = u fraction 0.32
1000 // ecx = v fraction 0.32
1001 // edx = u frac step
1002 // ebp = v carry scratch
1005 mov al,[edi] // preread the destination cache line
1007 mov al,[esi] // get texture pixel 0
1009 mov Tmap.InnerLooper, 32/4 // Set up loop counter
1013 mov ah, bh // move lighting value into place
1014 mov al, gr_fade_table[eax] // Get shaded pixel
1017 add ecx,Tmap.DeltaVFrac // increment v fraction
1018 sbb ebp,ebp // get -1 if carry
1019 mov [edi+0],al // store pixel
1021 mov al,[esi] // get texture pixel
1022 add ebx,edx // increment u fraction
1023 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
1025 mov ah, bh // move lighting value into place
1026 mov al, gr_fade_table[eax] // Get shaded pixel
1029 add ecx,Tmap.DeltaVFrac // increment v fraction
1030 sbb ebp,ebp // get -1 if carry
1031 mov [edi+1],al // store pixel
1033 mov al,[esi] // get texture pixel
1034 add ebx,edx // increment u fraction
1035 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
1037 mov ah, bh // move lighting value into place
1038 mov al, gr_fade_table[eax] // Get shaded pixel
1041 add ecx,Tmap.DeltaVFrac // increment v fraction
1042 sbb ebp,ebp // get -1 if carry
1043 mov [edi+2],al // store pixel
1045 mov al,[esi] // get texture pixel
1046 add ebx,edx // increment u fraction
1047 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
1049 mov ah, bh // move lighting value into place
1050 mov al, gr_fade_table[eax] // Get shaded pixel
1053 add ecx,Tmap.DeltaVFrac // increment v fraction
1054 sbb ebp,ebp // get -1 if carry
1055 mov [edi+3],al // store pixel
1057 mov al,[esi] // get texture pixel
1058 add ebx,edx // increment u fraction
1059 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
1062 dec Tmap.InnerLooper
1065 // the fdiv is done, finish right // st0 st1 st2 st3 st4 st5 st6 st7
1066 // ZR V/ZR 1/ZR U/ZR UL VL
1068 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
1069 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
1070 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
1071 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
1073 dec Tmap.Subdivisions // decrement span count
1074 jnz SpanLoop // loop back
1077 HandleLeftoverPixels:
1079 mov esi,Tmap.pixptr // load texture pointer
1081 // edi = dest dib bits
1082 // esi = current texture dib bits
1083 // at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
1084 // inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
1086 cmp Tmap.WidthModLength,0 ; are there remaining pixels to draw?
1087 jz FPUReturn ; nope, pop the FPU and bail
1089 // convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
1091 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
1092 fmul Tmap.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
1093 fistp Tmap.UFixed ; inv. inv. inv. inv. inv. UL VL
1095 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
1096 fmul Tmap.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
1097 fistp Tmap.VFixed ; inv. inv. inv. inv. inv. UL VL
1099 dec Tmap.WidthModLength ; calc how many steps to take
1100 jz OnePixelSpan ; just one, do not do deltas
1102 // calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
1105 // @todo rearrange things so we don't need these two instructions
1106 fstp Tmap.FloatTemp ; inv. inv. inv. inv. UL VL
1107 fstp Tmap.FloatTemp ; inv. inv. inv. UL VL
1109 fld Tmap.r.v ; V/Zr inv. inv. inv. UL VL
1110 fsub Tmap.deltas.v ; V/ZR inv. inv. inv. UL VL
1111 fld Tmap.r.u ; U/Zr V/ZR inv. inv. inv. UL VL
1112 fsub Tmap.deltas.u ; U/ZR V/ZR inv. inv. inv. UL VL
1113 fld Tmap.r.sw ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
1114 fsub Tmap.deltas.sw ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
1116 fdivr Tmap.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
1118 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
1119 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
1121 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
1123 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
1124 fxch st(1) ; VR UR inv. inv. inv. dU VL
1125 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
1126 fxch st(6) ; dV UR inv. inv. inv. dU VR
1128 fidiv Tmap.WidthModLength ; dv UR inv. inv. inv. dU VR
1129 fmul Tmap.FixedScale ; dv16 UR inv. inv. inv. dU VR
1130 fistp Tmap.DeltaV ; UR inv. inv. inv. dU VR
1132 fxch st(4) ; dU inv. inv. inv. UR VR
1133 fidiv Tmap.WidthModLength ; du inv. inv. inv. UR VR
1134 fmul Tmap.FixedScale ; du16 inv. inv. inv. UR VR
1135 fistp Tmap.DeltaU ; inv. inv. inv. UR VR
1137 // @todo gross! these are to line up with the other loop
1138 fld st(1) ; inv. inv. inv. inv. UR VR
1139 fld st(2) ; inv. inv. inv. inv. inv. UR VR
1142 // setup delta values
1143 mov eax, Tmap.DeltaV // get v 16.16 step
1144 mov ebx, eax // copy it
1145 sar eax, 16 // get v int step
1146 shl ebx, 16 // get v frac step
1147 mov Tmap.DeltaVFrac, ebx // store it
1148 imul eax, Tmap.src_offset // calc texture step for v int step
1150 mov ebx, Tmap.DeltaU // get u 16.16 step
1151 mov ecx, ebx // copy it
1152 sar ebx, 16 // get the u int step
1153 shl ecx, 16 // get the u frac step
1154 mov Tmap.DeltaUFrac, ecx // store it
1155 add eax, ebx // calc uint + vint step
1156 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
1157 add eax, Tmap.src_offset // calc whole step + v carry
1158 mov Tmap.uv_delta[0], eax // save in v-carry slot
1163 ; setup initial coordinates
1164 mov esi, Tmap.UFixed // get u 16.16
1165 mov ebx, esi // copy it
1166 sar esi, 16 // get integer part
1167 shl ebx, 16 // get fractional part
1169 mov ecx, Tmap.VFixed // get v 16.16
1170 mov edx, ecx // copy it
1171 sar edx, 16 // get integer part
1172 shl ecx, 16 // get fractional part
1173 imul edx, Tmap.src_offset // calc texture scanline address
1174 add esi, edx // calc texture offset
1175 add esi, Tmap.pixptr // calc address
1182 mov edx, Tmap.DeltaUFrac
1184 cmp Tmap.WidthModLength, 1
1189 mov ebx, Tmap.fx_l_right
1195 mov eax, Tmap.fx_dl_dx
1204 inc Tmap.WidthModLength
1205 mov eax,Tmap.WidthModLength
1209 mov Tmap.WidthModLength, eax
1213 mov al,[edi] // preread the destination cache line
1217 mov ah, bh // move lighting value into place
1218 mov al, gr_fade_table[eax] // Get shaded pixel
1219 add ecx,Tmap.DeltaVFrac // increment v fraction
1220 sbb ebp,ebp // get -1 if carry
1221 mov [edi+0],al // store pixel
1222 mov al,[esi] // get texture pixel
1223 add ebx,edx // increment u fraction
1224 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
1226 mov ah, bh // move lighting value into place
1227 mov al, gr_fade_table[eax] // Get shaded pixel
1228 add ecx,Tmap.DeltaVFrac // increment v fraction
1229 sbb ebp,ebp // get -1 if carry
1230 mov [edi+1],al // store pixel
1231 mov al,[esi] // get texture pixel
1232 add ebx,edx // increment u fraction
1233 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
1236 dec Tmap.WidthModLength
1244 mov al,[esi] // get texture pixel 2
1246 mov al, gr_fade_table[eax]
1247 mov [edi],al // store pixel 2
1252 // busy FPU registers: // st0 st1 st2 st3 st4 st5 st6 st7
1253 // xxx xxx xxx xxx xxx xxx xxx
1262 fldcw Tmap.OldFPUCW // restore the FPU
1278 void tmapscan_pln8()
1280 if (gr_zbuffering) {
1281 switch(gr_zbuffering_mode) {
1284 case GR_ZBUFF_FULL: // both
1285 tmapscan_pln8_zbuffered();
1287 case GR_ZBUFF_WRITE: // write only
1290 case GR_ZBUFF_READ: // read only
1291 tmapscan_pln8_zbuffered();
1298 tmapscan_pln8_ppro();
1300 tmapscan_pln8_pentium();
1305 void tmapscan_lln8()
1319 ; setup delta values
1320 mov eax, Tmap.fx_dv_dx // get v 16.16 step
1321 mov ebx, eax // copy it
1322 sar eax, 16 // get v int step
1323 shl ebx, 16 // get v frac step
1324 mov Tmap.DeltaVFrac, ebx // store it
1325 imul eax, Tmap.src_offset // calc texture step for v int step
1327 mov ebx, Tmap.fx_du_dx // get u 16.16 step
1328 mov ecx, ebx // copy it
1329 sar ebx, 16 // get the u int step
1330 shl ecx, 16 // get the u frac step
1331 mov Tmap.DeltaUFrac, ecx // store it
1332 add eax, ebx // calc uint + vint step
1334 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
1335 add eax, Tmap.src_offset // calc whole step + v carry
1336 mov Tmap.uv_delta[0], eax // save in v-carry slot
1338 ; setup initial coordinates
1339 mov esi, Tmap.fx_u // get u 16.16
1340 mov ebx, esi // copy it
1341 sar esi, 16 // get integer part
1342 shl ebx, 16 // get fractional part
1344 mov ecx, Tmap.fx_v // get v 16.16
1345 mov edx, ecx // copy it
1346 sar edx, 16 // get integer part
1347 shl ecx, 16 // get fractional part
1348 imul edx, Tmap.src_offset // calc texture scanline address
1349 add esi, edx // calc texture offset
1350 add esi, Tmap.pixptr // calc address
1352 ; set edi = address of first pixel to modify
1353 mov edi, Tmap.dest_row_data
1355 mov edx, Tmap.DeltaUFrac
1357 mov eax, Tmap.loop_count
1361 mov Tmap.num_big_steps, eax
1362 and Tmap.loop_count, 3
1364 mov al,[edi] // preread the destination cache line
1365 mov al,[esi] // get texture pixel 0
1374 mov ebp, Tmap.fx_dl_dx
1387 // 8 pixel span code
1388 // edi = dest dib bits at current pixel
1389 // esi = texture pointer at current u,v
1391 // ebx = u fraction 0.32
1392 // ecx = v fraction 0.32
1393 // edx = u frac step
1394 // ebp = v carry scratch
1396 mov ah, bh // move lighting value into place
1397 mov al, gr_fade_table[eax] // Get shaded pixel
1398 add ecx,Tmap.DeltaVFrac // increment v fraction
1399 sbb ebp,ebp // get -1 if carry
1400 mov [edi+0],al // store pixel
1401 mov al,[esi] // get texture pixel
1402 add ebx,edx // increment u fraction
1403 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
1405 mov ah, bh // move lighting value into place
1406 mov al, gr_fade_table[eax] // Get shaded pixel
1407 add ecx,Tmap.DeltaVFrac // increment v fraction
1408 sbb ebp,ebp // get -1 if carry
1409 mov [edi+1],al // store pixel
1410 mov al,[esi] // get texture pixel
1411 add ebx,edx // increment u fraction
1412 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
1414 mov ah, bh // move lighting value into place
1415 mov al, gr_fade_table[eax] // Get shaded pixel
1416 add ecx,Tmap.DeltaVFrac // increment v fraction
1417 sbb ebp,ebp // get -1 if carry
1418 mov [edi+2],al // store pixel
1419 mov al,[esi] // get texture pixel
1420 add ebx,edx // increment u fraction
1421 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
1423 mov ah, bh // move lighting value into place
1424 mov al, gr_fade_table[eax] // Get shaded pixel
1425 add ecx,Tmap.DeltaVFrac // increment v fraction
1426 sbb ebp,ebp // get -1 if carry
1427 mov [edi+3],al // store pixel
1428 mov al,[esi] // get texture pixel
1429 add ebx,edx // increment u fraction
1430 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
1433 dec Tmap.num_big_steps
1443 mov ebp, Tmap.fx_dl_dx
1447 mov eax,Tmap.loop_count
1452 mov Tmap.loop_count, eax
1457 mov al, [edi] // preread the destination cache line
1458 mov al, [esi] // Get first texel
1462 mov ah, bh // move lighting value into place
1463 mov al, gr_fade_table[eax] // Get shaded pixel
1464 add ecx,Tmap.DeltaVFrac // increment v fraction
1465 sbb ebp,ebp // get -1 if carry
1466 mov [edi+0],al // store pixel
1467 mov al,[esi] // get texture pixel
1468 add ebx,edx // increment u fraction
1469 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
1471 mov ah, bh // move lighting value into place
1472 mov al, gr_fade_table[eax] // Get shaded pixel
1473 add ecx,Tmap.DeltaVFrac // increment v fraction
1474 sbb ebp,ebp // get -1 if carry
1475 mov [edi+1],al // store pixel
1476 mov al,[esi] // get texture pixel
1477 add ebx,edx // increment u fraction
1478 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
1489 mov al, [esi] // Get first texel
1491 mov al, gr_fade_table[eax]
1492 mov [edi],al // store pixel 2
1507 void tmapscan_lna8_zbuffered_ppro()
1521 ; setup delta values
1522 mov eax, Tmap.fx_dv_dx // get v 16.16 step
1523 mov ebx, eax // copy it
1524 sar eax, 16 // get v int step
1525 shl ebx, 16 // get v frac step
1526 mov Tmap.DeltaVFrac, ebx // store it
1527 imul eax, Tmap.src_offset // calc texture step for v int step
1529 mov ebx, Tmap.fx_du_dx // get u 16.16 step
1530 mov ecx, ebx // copy it
1531 sar ebx, 16 // get the u int step
1532 shl ecx, 16 // get the u frac step
1533 mov Tmap.DeltaUFrac, ecx // store it
1534 add eax, ebx // calc uint + vint step
1536 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
1537 add eax, Tmap.src_offset // calc whole step + v carry
1538 mov Tmap.uv_delta[0], eax // save in v-carry slot
1540 ; setup initial coordinates
1541 mov esi, Tmap.fx_u // get u 16.16
1542 mov ebx, esi // copy it
1543 sar esi, 16 // get integer part
1544 shl ebx, 16 // get fractional part
1546 mov ecx, Tmap.fx_v // get v 16.16
1547 mov edx, ecx // copy it
1548 sar edx, 16 // get integer part
1549 shl ecx, 16 // get fractional part
1550 imul edx, Tmap.src_offset // calc texture scanline address
1551 add esi, edx // calc texture offset
1552 add esi, Tmap.pixptr // calc address
1554 ; set edi = address of first pixel to modify
1555 mov edi, Tmap.dest_row_data
1558 mov al,[edi] // get the destination pixel
1563 sub eax, Tmap.pScreenBits
1567 mov eax, Tmap.loop_count
1572 mov Tmap.num_big_steps, eax
1573 and Tmap.loop_count, 3
1577 // 8 pixel span code
1579 // ebx = u fraction 0.32
1580 // ecx = v fraction 0.32
1581 // edx = zbuffer pointer
1582 // edi = dest dib bits at current pixel
1583 // esi = texture pointer at current u,v
1587 cmp ebp, [edx+4*0] // Compare the Z depth of this pixel with zbuffer
1588 jle Skip0a // If pixel is covered, skip drawing
1589 // mov [edx+4*0], ebp // Write new Z value
1591 // Get pixel and blend it
1595 xor eax, eax // Clear all bits of EAX. This avoids a partial register stall on Pentium Pros
1596 mov ah, [esi] // Get texel into AL
1597 add eax, Tmap.BlendLookup
1598 mov eax, [eax+ebx] // Lookup pixel in lighting table
1601 mov [edi+0],al // store pixel
1603 add ebp,Tmap.fx_dwdx // increment z value
1604 add ecx,Tmap.DeltaVFrac // increment v fraction
1605 sbb eax,eax // get -1 if carry
1606 add ebx,Tmap.DeltaUFrac // increment u fraction
1607 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
1610 cmp ebp, [edx+4*1] // Compare the Z depth of this pixel with zbuffer
1611 jle Skip1a // If pixel is covered, skip drawing
1612 // mov [edx+4*1], ebp // Write new Z value
1614 // Get pixel and blend it
1618 xor eax, eax // Clear all bits of EAX. This avoids a partial register stall on Pentium Pros
1619 mov ah, [esi] // Get texel into AL
1620 add eax, Tmap.BlendLookup
1621 mov eax, [eax+ebx] // Lookup pixel in lighting table
1624 mov [edi+1],al // store pixel
1626 add ebp, Tmap.fx_dwdx
1627 add ecx,Tmap.DeltaVFrac // increment v fraction
1628 sbb eax,eax // get -1 if carry
1629 add ebx,Tmap.DeltaUFrac // increment u fraction
1630 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
1632 cmp ebp, [edx+4*2] // Compare the Z depth of this pixel with zbuffer
1633 jle Skip2a // If pixel is covered, skip drawing
1634 // mov [edx+4*2], ebp // Write new Z value
1639 xor eax, eax // Clear all bits of EAX. This avoids a partial register stall on Pentium Pros
1640 mov ah, [esi] // Get texel into AL
1641 add eax, Tmap.BlendLookup
1642 mov eax, [eax+ebx] // Lookup pixel in lighting table
1645 mov [edi+2],al // store pixel
1647 add ebp, Tmap.fx_dwdx
1648 add ecx,Tmap.DeltaVFrac // increment v fraction
1649 sbb eax,eax // get -1 if carry
1650 add ebx,Tmap.DeltaUFrac // increment u fraction
1651 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
1653 cmp ebp, [edx+4*3] // Compare the Z depth of this pixel with zbuffer
1654 jle Skip3a // If pixel is covered, skip drawing
1655 // mov [edx+4*3], ebp // Write new Z value
1660 xor eax, eax // Clear all bits of EAX. This avoids a partial register stall on Pentium Pros
1661 mov ah, [esi] // Get texel into AL
1662 add eax, Tmap.BlendLookup
1663 mov eax, [eax+ebx] // Lookup pixel in lighting table
1666 mov [edi+3],al // store pixel
1668 add ebp, Tmap.fx_dwdx
1669 add ecx,Tmap.DeltaVFrac // increment v fraction
1670 sbb eax,eax // get -1 if carry
1671 add ebx,Tmap.DeltaUFrac // increment u fraction
1672 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
1676 dec Tmap.num_big_steps
1682 mov eax,Tmap.loop_count
1687 mov Tmap.loop_count, eax
1691 mov al,[edi] // get the destination pixel
1695 cmp ebp, [edx+4*0] // Compare the Z depth of this pixel with zbuffer
1696 jle Skip0b // If pixel is covered, skip drawing
1697 // mov [edx+4*0], ebp // Write new Z value
1698 mov al,[edi+0] // get the destination pixel
1699 mov ah,[esi] // get texture pixel 0
1701 add eax, Tmap.BlendLookup
1702 mov al, [eax] // blend them
1703 mov [edi+0],al // store pixel
1705 add ebp, Tmap.fx_dwdx
1706 add ecx,Tmap.DeltaVFrac // increment v fraction
1707 sbb eax,eax // get -1 if carry
1708 add ebx,Tmap.DeltaUFrac // increment u fraction
1709 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
1711 cmp ebp, [edx+4*1] // Compare the Z depth of this pixel with zbuffer
1712 jle Skip1b // If pixel is covered, skip drawing
1713 // mov [edx+4*1], ebp // Write new Z value
1714 mov al,[edi+1] // get the destination pixel
1715 mov ah,[esi] // get texture pixel 0
1717 add eax, Tmap.BlendLookup
1718 mov al, [eax] // blend them
1719 mov [edi+1],al // store pixel
1721 add ebp, Tmap.fx_dwdx
1722 add ecx,Tmap.DeltaVFrac // increment v fraction
1723 sbb eax,eax // get -1 if carry
1724 add ebx,Tmap.DeltaUFrac // increment u fraction
1725 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
1736 cmp ebp, [edx] // Compare the Z depth of this pixel with zbuffer
1737 jle Skip0c // If pixel is covered, skip drawing
1738 // mov [edx], ebp // Write new Z value
1739 mov al,[edi] // get the destination pixel
1740 mov ah,[esi] // get texture pixel 0
1742 add eax, Tmap.BlendLookup
1743 mov al, [eax] // blend them
1744 mov [edi],al // store pixel
1759 void tmapscan_lna8_zbuffered_pentium()
1773 ; setup delta values
1774 mov eax, Tmap.fx_dv_dx // get v 16.16 step
1775 mov ebx, eax // copy it
1776 sar eax, 16 // get v int step
1777 shl ebx, 16 // get v frac step
1778 mov Tmap.DeltaVFrac, ebx // store it
1779 imul eax, Tmap.src_offset // calc texture step for v int step
1781 mov ebx, Tmap.fx_du_dx // get u 16.16 step
1782 mov ecx, ebx // copy it
1783 sar ebx, 16 // get the u int step
1784 shl ecx, 16 // get the u frac step
1785 mov Tmap.DeltaUFrac, ecx // store it
1786 add eax, ebx // calc uint + vint step
1788 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
1789 add eax, Tmap.src_offset // calc whole step + v carry
1790 mov Tmap.uv_delta[0], eax // save in v-carry slot
1792 ; setup initial coordinates
1793 mov esi, Tmap.fx_u // get u 16.16
1794 mov ebx, esi // copy it
1795 sar esi, 16 // get integer part
1796 shl ebx, 16 // get fractional part
1798 mov ecx, Tmap.fx_v // get v 16.16
1799 mov edx, ecx // copy it
1800 sar edx, 16 // get integer part
1801 shl ecx, 16 // get fractional part
1802 imul edx, Tmap.src_offset // calc texture scanline address
1803 add esi, edx // calc texture offset
1804 add esi, Tmap.pixptr // calc address
1806 ; set edi = address of first pixel to modify
1807 mov edi, Tmap.dest_row_data
1810 mov al,[edi] // get the destination pixel
1815 sub eax, Tmap.pScreenBits
1819 mov eax, Tmap.loop_count
1824 mov Tmap.num_big_steps, eax
1825 and Tmap.loop_count, 3
1829 // 8 pixel span code
1831 // ebx = u fraction 0.32
1832 // ecx = v fraction 0.32
1833 // edx = zbuffer pointer
1834 // edi = dest dib bits at current pixel
1835 // esi = texture pointer at current u,v
1839 cmp ebp, [edx+4*0] // Compare the Z depth of this pixel with zbuffer
1840 jle Skip0a // If pixel is covered, skip drawing
1841 // mov [edx+4*0], ebp // Write new Z value
1842 mov al,[edi+0] // get the destination pixel
1843 mov ah,[esi] // get texture pixel 0
1849 add eax, Tmap.BlendLookup
1850 mov al, [eax] // blend them
1853 mov [edi+0],al // store pixel
1855 add ebp,Tmap.fx_dwdx // increment z value
1856 add ecx,Tmap.DeltaVFrac // increment v fraction
1857 sbb eax,eax // get -1 if carry
1858 add ebx,Tmap.DeltaUFrac // increment u fraction
1859 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
1862 cmp ebp, [edx+4*1] // Compare the Z depth of this pixel with zbuffer
1863 jle Skip1a // If pixel is covered, skip drawing
1864 // mov [edx+4*1], ebp // Write new Z value
1865 mov al,[edi+1] // get the destination pixel
1866 mov ah,[esi] // get texture pixel 0
1872 add eax, Tmap.BlendLookup
1873 mov al, [eax] // blend them
1876 mov [edi+1],al // store pixel
1878 add ebp, Tmap.fx_dwdx
1879 add ecx,Tmap.DeltaVFrac // increment v fraction
1880 sbb eax,eax // get -1 if carry
1881 add ebx,Tmap.DeltaUFrac // increment u fraction
1882 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
1884 cmp ebp, [edx+4*2] // Compare the Z depth of this pixel with zbuffer
1885 jle Skip2a // If pixel is covered, skip drawing
1886 // mov [edx+4*2], ebp // Write new Z value
1887 mov al,[edi+2] // get the destination pixel
1888 mov ah,[esi] // get texture pixel 0
1893 add eax, Tmap.BlendLookup
1894 mov al, [eax] // blend them
1897 mov [edi+2],al // store pixel
1899 add ebp, Tmap.fx_dwdx
1900 add ecx,Tmap.DeltaVFrac // increment v fraction
1901 sbb eax,eax // get -1 if carry
1902 add ebx,Tmap.DeltaUFrac // increment u fraction
1903 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
1905 cmp ebp, [edx+4*3] // Compare the Z depth of this pixel with zbuffer
1906 jle Skip3a // If pixel is covered, skip drawing
1907 // mov [edx+4*3], ebp // Write new Z value
1908 mov al,[edi+3] // get the destination pixel
1909 mov ah,[esi] // get texture pixel 0
1914 add eax, Tmap.BlendLookup
1915 mov al, [eax] // blend them
1918 mov [edi+3],al // store pixel
1920 add ebp, Tmap.fx_dwdx
1921 add ecx,Tmap.DeltaVFrac // increment v fraction
1922 sbb eax,eax // get -1 if carry
1923 add ebx,Tmap.DeltaUFrac // increment u fraction
1924 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
1928 dec Tmap.num_big_steps
1934 mov eax,Tmap.loop_count
1939 mov Tmap.loop_count, eax
1943 mov al,[edi] // get the destination pixel
1947 cmp ebp, [edx+4*0] // Compare the Z depth of this pixel with zbuffer
1948 jle Skip0b // If pixel is covered, skip drawing
1949 // mov [edx+4*0], ebp // Write new Z value
1950 mov al,[edi+0] // get the destination pixel
1951 mov ah,[esi] // get texture pixel 0
1953 add eax, Tmap.BlendLookup
1954 mov al, [eax] // blend them
1955 mov [edi+0],al // store pixel
1957 add ebp, Tmap.fx_dwdx
1958 add ecx,Tmap.DeltaVFrac // increment v fraction
1959 sbb eax,eax // get -1 if carry
1960 add ebx,Tmap.DeltaUFrac // increment u fraction
1961 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
1963 cmp ebp, [edx+4*1] // Compare the Z depth of this pixel with zbuffer
1964 jle Skip1b // If pixel is covered, skip drawing
1965 // mov [edx+4*1], ebp // Write new Z value
1966 mov al,[edi+1] // get the destination pixel
1967 mov ah,[esi] // get texture pixel 0
1969 add eax, Tmap.BlendLookup
1970 mov al, [eax] // blend them
1971 mov [edi+1],al // store pixel
1973 add ebp, Tmap.fx_dwdx
1974 add ecx,Tmap.DeltaVFrac // increment v fraction
1975 sbb eax,eax // get -1 if carry
1976 add ebx,Tmap.DeltaUFrac // increment u fraction
1977 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
1988 cmp ebp, [edx] // Compare the Z depth of this pixel with zbuffer
1989 jle Skip0c // If pixel is covered, skip drawing
1990 // mov [edx], ebp // Write new Z value
1991 mov al,[edi] // get the destination pixel
1992 mov ah,[esi] // get texture pixel 0
1994 add eax, Tmap.BlendLookup
1995 mov al, [eax] // blend them
1996 mov [edi],al // store pixel
2011 void tmapscan_lna8_zbuffered()
2014 tmapscan_lna8_zbuffered_ppro();
2016 tmapscan_lna8_zbuffered_pentium();
2022 extern float Tmap_clipped_left;
2024 void tmapscan_lna8()
2026 if (gr_zbuffering) {
2027 switch(gr_zbuffering_mode) {
2030 case GR_ZBUFF_FULL: // both
2031 case GR_ZBUFF_WRITE: // write only
2032 case GR_ZBUFF_READ: // read only
2033 tmapscan_lna8_zbuffered();
2051 ; setup delta values
2052 mov eax, Tmap.fx_dv_dx // get v 16.16 step
2053 mov ebx, eax // copy it
2054 sar eax, 16 // get v int step
2055 shl ebx, 16 // get v frac step
2056 mov Tmap.DeltaVFrac, ebx // store it
2057 imul eax, Tmap.src_offset // calc texture step for v int step
2059 mov ebx, Tmap.fx_du_dx // get u 16.16 step
2060 mov ecx, ebx // copy it
2061 sar ebx, 16 // get the u int step
2062 shl ecx, 16 // get the u frac step
2063 mov Tmap.DeltaUFrac, ecx // store it
2064 add eax, ebx // calc uint + vint step
2066 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
2067 add eax, Tmap.src_offset // calc whole step + v carry
2068 mov Tmap.uv_delta[0], eax // save in v-carry slot
2070 ; setup initial coordinates
2071 mov esi, Tmap.fx_u // get u 16.16
2072 mov ebx, esi // copy it
2073 sar esi, 16 // get integer part
2074 shl ebx, 16 // get fractional part
2076 mov ecx, Tmap.fx_v // get v 16.16
2077 mov edx, ecx // copy it
2078 sar edx, 16 // get integer part
2079 shl ecx, 16 // get fractional part
2080 imul edx, Tmap.src_offset // calc texture scanline address
2081 add esi, edx // calc texture offset
2082 add esi, Tmap.pixptr // calc address
2084 ; set edi = address of first pixel to modify
2085 mov edi, Tmap.dest_row_data
2087 mov edx, Tmap.DeltaUFrac
2090 mov al,[edi] // get the destination pixel
2092 mov ebp, Tmap.loop_count
2097 mov Tmap.num_big_steps, ebp
2098 and Tmap.loop_count, 3
2104 // 8 pixel span code
2105 // edi = dest dib bits at current pixel
2106 // esi = texture pointer at current u,v
2108 // ebx = u fraction 0.32
2109 // ecx = v fraction 0.32
2110 // edx = u frac step
2111 // ebp = v carry scratch
2115 mov al,[edi+0] // get the destination pixel
2116 mov ah,[esi] // get texture pixel 0
2117 add ecx,Tmap.DeltaVFrac // increment v fraction
2118 sbb ebp,ebp // get -1 if carry
2119 add ebx,edx // increment u fraction
2120 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2121 add eax, Tmap.BlendLookup
2122 mov al, [eax] // blend them
2123 mov [edi+0],al // store pixel
2126 mov al,[edi+1] // get the destination pixel
2127 mov ah,[esi] // get texture pixel 0
2128 add ecx,Tmap.DeltaVFrac // increment v fraction
2129 sbb ebp,ebp // get -1 if carry
2130 add ebx,edx // increment u fraction
2131 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2132 add eax, Tmap.BlendLookup
2133 mov al, [eax] // blend them
2134 mov [edi+1],al // store pixel
2137 mov al,[edi+2] // get the destination pixel
2138 mov ah,[esi] // get texture pixel 0
2139 add ecx,Tmap.DeltaVFrac // increment v fraction
2140 sbb ebp,ebp // get -1 if carry
2141 add ebx,edx // increment u fraction
2142 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2143 add eax, Tmap.BlendLookup
2144 mov al, [eax] // blend them
2145 mov [edi+2],al // store pixel
2148 mov al,[edi+3] // get the destination pixel
2149 mov ah,[esi] // get texture pixel 0
2150 add ecx,Tmap.DeltaVFrac // increment v fraction
2151 sbb ebp,ebp // get -1 if carry
2152 add ebx,edx // increment u fraction
2153 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2154 add eax, Tmap.BlendLookup
2155 mov al, [eax] // blend them
2156 mov [edi+3],al // store pixel
2159 dec Tmap.num_big_steps
2165 mov ebp,Tmap.loop_count
2170 mov Tmap.loop_count, ebp
2173 mov al,[edi] // get the destination pixel
2178 mov al,[edi+0] // get the destination pixel
2179 mov ah,[esi] // get texture pixel 0
2180 add ecx,Tmap.DeltaVFrac // increment v fraction
2181 sbb ebp,ebp // get -1 if carry
2182 add ebx,edx // increment u fraction
2183 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2184 add eax, Tmap.BlendLookup
2185 mov al, [eax] // blend them
2186 mov [edi+0],al // store pixel
2189 mov al,[edi+1] // get the destination pixel
2190 mov ah,[esi] // get texture pixel 0
2191 add ecx,Tmap.DeltaVFrac // increment v fraction
2192 sbb ebp,ebp // get -1 if carry
2193 add ebx,edx // increment u fraction
2194 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2195 add eax, Tmap.BlendLookup
2196 mov al, [eax] // blend them
2197 mov [edi+1],al // store pixel
2208 mov al,[edi] // get the destination pixel
2209 mov ah,[esi] // get texture pixel 0
2210 add eax, Tmap.BlendLookup
2211 mov al, [eax] // blend them
2212 mov [edi],al // store pixel
2226 // HACKED IN SYSTEM FOR DOING MODEL CACHING
2227 int Tmap_scan_read = 0; // 0 = normal mapper, 1=read, 2=write
2229 // HACKED IN SYSTEM FOR DOING MODEL CACHING
2230 void tmapscan_lnn8_read()
2232 Tmap.fx_u = fl2f(Tmap.l.u);
2233 Tmap.fx_v = fl2f(Tmap.l.v);
2234 Tmap.fx_du_dx = fl2f(Tmap.deltas.u);
2235 Tmap.fx_dv_dx = fl2f(Tmap.deltas.v);
2240 ubyte * src = (ubyte *)Tmap.pixptr;
2241 ubyte * dst = (ubyte *)Tmap.dest_row_data;
2243 for (i=0; i<Tmap.loop_count; i++ ) {
2248 src[u+v*Tmap.src_offset] = *dst++;
2250 Tmap.fx_u += Tmap.fx_du_dx;
2251 Tmap.fx_v += Tmap.fx_dv_dx;
2267 ; setup delta values
2268 mov eax, Tmap.fx_dv_dx // get v 16.16 step
2269 mov ebx, eax // copy it
2270 sar eax, 16 // get v int step
2271 shl ebx, 16 // get v frac step
2272 mov Tmap.DeltaVFrac, ebx // store it
2273 imul eax, Tmap.src_offset // calc texture step for v int step
2275 mov ebx, Tmap.fx_du_dx // get u 16.16 step
2276 mov ecx, ebx // copy it
2277 sar ebx, 16 // get the u int step
2278 shl ecx, 16 // get the u frac step
2279 mov Tmap.DeltaUFrac, ecx // store it
2280 add eax, ebx // calc uint + vint step
2282 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
2283 add eax, Tmap.src_offset // calc whole step + v carry
2284 mov Tmap.uv_delta[0], eax // save in v-carry slot
2286 ; setup initial coordinates
2287 mov esi, Tmap.fx_u // get u 16.16
2288 mov ebx, esi // copy it
2289 sar esi, 16 // get integer part
2290 shl ebx, 16 // get fractional part
2292 mov ecx, Tmap.fx_v // get v 16.16
2293 mov edx, ecx // copy it
2294 sar edx, 16 // get integer part
2295 shl ecx, 16 // get fractional part
2296 imul edx, Tmap.src_offset // calc texture scanline address
2297 add esi, edx // calc texture offset
2298 add esi, Tmap.pixptr // calc address
2300 ; set edi = address of first pixel to modify
2301 mov edi, Tmap.dest_row_data
2303 mov edx, Tmap.DeltaUFrac
2305 mov al,[edi] // preread the destination cache line
2307 mov ebp, Tmap.loop_count
2312 mov Tmap.num_big_steps, ebp
2313 and Tmap.loop_count, 3
2318 // 8 pixel span code
2319 // edi = dest dib bits at current pixel
2320 // esi = texture pointer at current u,v
2322 // ebx = u fraction 0.32
2323 // ecx = v fraction 0.32
2324 // edx = u frac step
2325 // ebp = v carry scratch
2327 add ecx,Tmap.DeltaVFrac // increment v fraction
2328 sbb ebp,ebp // get -1 if carry
2329 mov al,[edi+0] // get texture pixel
2330 mov [esi],al // store pixel
2331 add ebx,edx // increment u fraction
2332 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2334 add ecx,Tmap.DeltaVFrac // increment v fraction
2335 sbb ebp,ebp // get -1 if carry
2336 mov al,[edi+1] // get texture pixel
2337 mov [esi],al // store pixel
2338 add ebx,edx // increment u fraction
2339 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2341 add ecx,Tmap.DeltaVFrac // increment v fraction
2342 sbb ebp,ebp // get -1 if carry
2343 mov al,[edi+2] // get texture pixel
2344 mov [esi],al // store pixel
2345 add ebx,edx // increment u fraction
2346 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2348 add ecx,Tmap.DeltaVFrac // increment v fraction
2349 sbb ebp,ebp // get -1 if carry
2350 mov al,[edi+3] // get texture pixel
2351 mov [esi],al // store pixel
2352 add ebx,edx // increment u fraction
2353 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2356 dec Tmap.num_big_steps
2362 mov ebp,Tmap.loop_count
2367 mov Tmap.loop_count, ebp
2372 mov al, [edi] // preread the destination cache line
2376 add ecx,Tmap.DeltaVFrac // increment v fraction
2377 sbb ebp,ebp // get -1 if carry
2378 mov al,[edi+0] // get texture pixel
2379 mov [esi],al // store pixel
2380 add ebx,edx // increment u fraction
2381 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2383 add ecx,Tmap.DeltaVFrac // increment v fraction
2384 sbb ebp,ebp // get -1 if carry
2385 mov al,[edi+1] // get texture pixel
2386 mov [esi],al // store pixel
2387 add ebx,edx // increment u fraction
2388 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2398 mov al,[edi] // get texture pixel
2399 mov [esi],al // store pixel 2
2414 // HACKED IN SYSTEM FOR DOING MODEL CACHING
2415 void tmapscan_lnn8_write()
2417 Tmap.fx_u = fl2f(Tmap.l.u);
2418 Tmap.fx_v = fl2f(Tmap.l.v);
2419 Tmap.fx_du_dx = fl2f(Tmap.deltas.u);
2420 Tmap.fx_dv_dx = fl2f(Tmap.deltas.v);
2425 ubyte * src = (ubyte *)Tmap.pixptr;
2426 ubyte * dst = (ubyte *)Tmap.dest_row_data;
2428 for (i=0; i<Tmap.loop_count; i++ ) {
2433 ubyte c = src[u+v*Tmap.src_offset];
2439 Tmap.fx_u += Tmap.fx_du_dx;
2440 Tmap.fx_v += Tmap.fx_dv_dx;
2456 ; setup delta values
2457 mov eax, Tmap.fx_dv_dx // get v 16.16 step
2458 mov ebx, eax // copy it
2459 sar eax, 16 // get v int step
2460 shl ebx, 16 // get v frac step
2461 mov Tmap.DeltaVFrac, ebx // store it
2462 imul eax, Tmap.src_offset // calc texture step for v int step
2464 mov ebx, Tmap.fx_du_dx // get u 16.16 step
2465 mov ecx, ebx // copy it
2466 sar ebx, 16 // get the u int step
2467 shl ecx, 16 // get the u frac step
2468 mov Tmap.DeltaUFrac, ecx // store it
2469 add eax, ebx // calc uint + vint step
2471 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
2472 add eax, Tmap.src_offset // calc whole step + v carry
2473 mov Tmap.uv_delta[0], eax // save in v-carry slot
2475 ; setup initial coordinates
2476 mov esi, Tmap.fx_u // get u 16.16
2477 mov ebx, esi // copy it
2478 sar esi, 16 // get integer part
2479 shl ebx, 16 // get fractional part
2481 mov ecx, Tmap.fx_v // get v 16.16
2482 mov edx, ecx // copy it
2483 sar edx, 16 // get integer part
2484 shl ecx, 16 // get fractional part
2485 imul edx, Tmap.src_offset // calc texture scanline address
2486 add esi, edx // calc texture offset
2487 add esi, Tmap.pixptr // calc address
2489 ; set edi = address of first pixel to modify
2490 mov edi, Tmap.dest_row_data
2492 mov edx, Tmap.DeltaUFrac
2494 mov al,[edi] // preread the destination cache line
2496 mov ebp, Tmap.loop_count
2501 mov Tmap.num_big_steps, ebp
2502 and Tmap.loop_count, 3
2507 // 8 pixel span code
2508 // edi = dest dib bits at current pixel
2509 // esi = texture pointer at current u,v
2511 // ebx = u fraction 0.32
2512 // ecx = v fraction 0.32
2513 // edx = u frac step
2514 // ebp = v carry scratch
2516 add ecx,Tmap.DeltaVFrac // increment v fraction
2517 sbb ebp,ebp // get -1 if carry
2518 mov al,[esi] // get texture pixel
2519 add ebx,edx // increment u fraction
2520 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2523 mov [edi+0],al // store pixel
2526 add ecx,Tmap.DeltaVFrac // increment v fraction
2527 sbb ebp,ebp // get -1 if carry
2528 mov al,[esi] // get texture pixel
2529 add ebx,edx // increment u fraction
2530 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2533 mov [edi+1],al // store pixel
2536 add ecx,Tmap.DeltaVFrac // increment v fraction
2537 sbb ebp,ebp // get -1 if carry
2538 mov al,[esi] // get texture pixel
2539 add ebx,edx // increment u fraction
2540 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2543 mov [edi+2],al // store pixel
2546 add ecx,Tmap.DeltaVFrac // increment v fraction
2547 sbb ebp,ebp // get -1 if carry
2548 mov al,[esi] // get texture pixel
2549 add ebx,edx // increment u fraction
2550 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2553 mov [edi+3],al // store pixel
2557 dec Tmap.num_big_steps
2563 mov ebp,Tmap.loop_count
2568 mov Tmap.loop_count, ebp
2573 mov al, [edi] // preread the destination cache line
2577 add ecx,Tmap.DeltaVFrac // increment v fraction
2578 sbb ebp,ebp // get -1 if carry
2579 mov al,[esi] // get texture pixel
2580 add ebx,edx // increment u fraction
2581 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2584 mov [edi+0],al // store pixel
2587 add ecx,Tmap.DeltaVFrac // increment v fraction
2588 sbb ebp,ebp // get -1 if carry
2589 mov al,[esi] // get texture pixel
2590 add ebx,edx // increment u fraction
2591 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2594 mov [edi+1],al // store pixel
2605 mov al,[esi] // get texture pixel
2608 mov [edi],al // store pixel
2623 void tmapscan_lnn8()
2625 // HACKED IN SYSTEM FOR DOING MODEL CACHING
2626 if ( Tmap_scan_read==1 ) {
2627 tmapscan_lnn8_read();
2629 } else if ( Tmap_scan_read==2 ) {
2630 tmapscan_lnn8_write();
2635 if ( gr_screen.current_alphablend_mode == GR_ALPHABLEND_FILTER ) {
2652 ; setup delta values
2653 mov eax, Tmap.fx_dv_dx // get v 16.16 step
2654 mov ebx, eax // copy it
2655 sar eax, 16 // get v int step
2656 shl ebx, 16 // get v frac step
2657 mov Tmap.DeltaVFrac, ebx // store it
2658 imul eax, Tmap.src_offset // calc texture step for v int step
2660 mov ebx, Tmap.fx_du_dx // get u 16.16 step
2661 mov ecx, ebx // copy it
2662 sar ebx, 16 // get the u int step
2663 shl ecx, 16 // get the u frac step
2664 mov Tmap.DeltaUFrac, ecx // store it
2665 add eax, ebx // calc uint + vint step
2667 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
2668 add eax, Tmap.src_offset // calc whole step + v carry
2669 mov Tmap.uv_delta[0], eax // save in v-carry slot
2671 ; setup initial coordinates
2672 mov esi, Tmap.fx_u // get u 16.16
2673 mov ebx, esi // copy it
2674 sar esi, 16 // get integer part
2675 shl ebx, 16 // get fractional part
2677 mov ecx, Tmap.fx_v // get v 16.16
2678 mov edx, ecx // copy it
2679 sar edx, 16 // get integer part
2680 shl ecx, 16 // get fractional part
2681 imul edx, Tmap.src_offset // calc texture scanline address
2682 add esi, edx // calc texture offset
2683 add esi, Tmap.pixptr // calc address
2685 ; set edi = address of first pixel to modify
2686 mov edi, Tmap.dest_row_data
2688 mov edx, Tmap.DeltaUFrac
2690 mov al,[edi] // preread the destination cache line
2691 mov al,[esi] // get texture pixel 0
2693 mov ebp, Tmap.loop_count
2698 mov Tmap.num_big_steps, ebp
2699 and Tmap.loop_count, 3
2704 // 8 pixel span code
2705 // edi = dest dib bits at current pixel
2706 // esi = texture pointer at current u,v
2708 // ebx = u fraction 0.32
2709 // ecx = v fraction 0.32
2710 // edx = u frac step
2711 // ebp = v carry scratch
2713 add ecx,Tmap.DeltaVFrac // increment v fraction
2714 sbb ebp,ebp // get -1 if carry
2715 mov [edi+0],al // store pixel
2716 mov al,[esi] // get texture pixel
2717 add ebx,edx // increment u fraction
2718 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2720 add ecx,Tmap.DeltaVFrac // increment v fraction
2721 sbb ebp,ebp // get -1 if carry
2722 mov [edi+1],al // store pixel
2723 mov al,[esi] // get texture pixel
2724 add ebx,edx // increment u fraction
2725 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2727 add ecx,Tmap.DeltaVFrac // increment v fraction
2728 sbb ebp,ebp // get -1 if carry
2729 mov [edi+2],al // store pixel
2730 mov al,[esi] // get texture pixel
2731 add ebx,edx // increment u fraction
2732 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2734 add ecx,Tmap.DeltaVFrac // increment v fraction
2735 sbb ebp,ebp // get -1 if carry
2736 mov [edi+3],al // store pixel
2737 mov al,[esi] // get texture pixel
2738 add ebx,edx // increment u fraction
2739 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2742 dec Tmap.num_big_steps
2748 mov ebp,Tmap.loop_count
2753 mov Tmap.loop_count, ebp
2758 mov al, [edi] // preread the destination cache line
2759 mov al, [esi] // Get first texel
2763 add ecx,Tmap.DeltaVFrac // increment v fraction
2764 sbb ebp,ebp // get -1 if carry
2765 mov [edi+0],al // store pixel
2766 mov al,[esi] // get texture pixel
2767 add ebx,edx // increment u fraction
2768 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2770 add ecx,Tmap.DeltaVFrac // increment v fraction
2771 sbb ebp,ebp // get -1 if carry
2772 mov [edi+1],al // store pixel
2773 mov al,[esi] // get texture pixel
2774 add ebx,edx // increment u fraction
2775 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2785 mov [edi],al // store pixel 2
2799 void tmapscan_lnt8()
2801 if ( gr_screen.current_alphablend_mode == GR_ALPHABLEND_FILTER ) {
2818 ; setup delta values
2819 mov eax, Tmap.fx_dv_dx // get v 16.16 step
2820 mov ebx, eax // copy it
2821 sar eax, 16 // get v int step
2822 shl ebx, 16 // get v frac step
2823 mov Tmap.DeltaVFrac, ebx // store it
2824 imul eax, Tmap.src_offset // calc texture step for v int step
2826 mov ebx, Tmap.fx_du_dx // get u 16.16 step
2827 mov ecx, ebx // copy it
2828 sar ebx, 16 // get the u int step
2829 shl ecx, 16 // get the u frac step
2830 mov Tmap.DeltaUFrac, ecx // store it
2831 add eax, ebx // calc uint + vint step
2833 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
2834 add eax, Tmap.src_offset // calc whole step + v carry
2835 mov Tmap.uv_delta[0], eax // save in v-carry slot
2837 ; setup initial coordinates
2838 mov esi, Tmap.fx_u // get u 16.16
2839 mov ebx, esi // copy it
2840 sar esi, 16 // get integer part
2841 shl ebx, 16 // get fractional part
2843 mov ecx, Tmap.fx_v // get v 16.16
2844 mov edx, ecx // copy it
2845 sar edx, 16 // get integer part
2846 shl ecx, 16 // get fractional part
2847 imul edx, Tmap.src_offset // calc texture scanline address
2848 add esi, edx // calc texture offset
2849 add esi, Tmap.pixptr // calc address
2851 ; set edi = address of first pixel to modify
2852 mov edi, Tmap.dest_row_data
2854 mov edx, Tmap.DeltaUFrac
2856 mov al,[edi] // preread the destination cache line
2857 mov al,[esi] // get texture pixel 0
2859 mov ebp, Tmap.loop_count
2864 mov Tmap.num_big_steps, ebp
2865 and Tmap.loop_count, 3
2869 // 8 pixel span code
2870 // edi = dest dib bits at current pixel
2871 // esi = texture pointer at current u,v
2873 // ebx = u fraction 0.32
2874 // ecx = v fraction 0.32
2875 // edx = u frac step
2876 // ebp = v carry scratch
2878 add ecx,Tmap.DeltaVFrac // increment v fraction
2879 sbb ebp,ebp // get -1 if carry
2882 mov [edi+0],al // store pixel
2884 mov al,[esi] // get texture pixel
2885 add ebx,edx // increment u fraction
2886 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2888 add ecx,Tmap.DeltaVFrac // increment v fraction
2889 sbb ebp,ebp // get -1 if carry
2892 mov [edi+1],al // store pixel
2894 mov al,[esi] // get texture pixel
2895 add ebx,edx // increment u fraction
2896 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2898 add ecx,Tmap.DeltaVFrac // increment v fraction
2899 sbb ebp,ebp // get -1 if carry
2902 mov [edi+2],al // store pixel
2904 mov al,[esi] // get texture pixel
2905 add ebx,edx // increment u fraction
2906 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2908 add ecx,Tmap.DeltaVFrac // increment v fraction
2909 sbb ebp,ebp // get -1 if carry
2912 mov [edi+3],al // store pixel
2914 mov al,[esi] // get texture pixel
2915 add ebx,edx // increment u fraction
2916 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2919 dec Tmap.num_big_steps
2925 mov ebp,Tmap.loop_count
2930 mov Tmap.loop_count, ebp
2935 mov al, [edi] // preread the destination cache line
2936 mov al, [esi] // Get first texel
2940 add ecx,Tmap.DeltaVFrac // increment v fraction
2941 sbb ebp,ebp // get -1 if carry
2944 mov [edi+0],al // store pixel
2946 mov al,[esi] // get texture pixel
2947 add ebx,edx // increment u fraction
2948 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2950 add ecx,Tmap.DeltaVFrac // increment v fraction
2951 sbb ebp,ebp // get -1 if carry
2954 mov [edi+1],al // store pixel
2956 mov al,[esi] // get texture pixel
2957 add ebx,edx // increment u fraction
2958 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
2970 mov [edi],al // store pixel
2987 void tmapscan_pln8_zbuffered_ppro()
3002 // Put the FPU in low precision mode
3003 fstcw Tmap.OldFPUCW // store copy of CW
3004 mov ax,Tmap.OldFPUCW // get it in ax
3006 mov Tmap.FPUCW,ax // store it
3007 fldcw Tmap.FPUCW // load the FPU
3010 mov ecx, Tmap.loop_count // ecx = width
3011 mov edi, Tmap.dest_row_data // edi = dest pointer
3013 // edi = pointer to start pixel in dest dib
3016 mov eax,ecx // eax and ecx = width
3017 shr ecx,5 // ecx = width / subdivision length
3018 and eax,31 // eax = width mod subdivision length
3019 jnz some_left_over // any leftover?
3020 dec ecx // no, so special case last span
3021 mov eax,32 // it's 8 pixels long
3023 mov Tmap.Subdivisions,ecx // store widths
3024 mov Tmap.WidthModLength,eax
3026 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
3027 // st0 st1 st2 st3 st4 st5 st6 st7
3028 fld Tmap.l.v // V/ZL
3029 fld Tmap.l.u // U/ZL V/ZL
3030 fld Tmap.l.sw // 1/ZL U/ZL V/ZL
3031 fld1 // 1 1/ZL U/ZL V/ZL
3032 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
3033 fld st // ZL ZL 1/ZL U/ZL V/ZL
3034 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
3035 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
3036 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
3038 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
3039 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
3041 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
3043 fadd Tmap.fl_dwdx_wide // 1/ZR U/ZL V/ZL UL VL
3044 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
3045 fadd Tmap.fl_dudx_wide // U/ZR 1/ZR V/ZL UL VL
3046 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
3047 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZR U/ZR UL VL
3049 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
3051 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
3052 // @todo overlap this guy
3053 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
3054 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
3055 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
3056 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
3057 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
3059 cmp ecx,0 // check for any full spans
3060 jle HandleLeftoverPixels
3064 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
3065 // UR VR V/ZR 1/ZR U/ZR UL VL
3067 // convert left side coords
3069 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
3070 fmul Tmap.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
3071 fistp Tmap.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
3073 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
3074 fmul Tmap.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
3075 fistp Tmap.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
3077 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
3079 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
3080 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
3081 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
3082 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
3084 fmul Tmap.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
3085 fistp Tmap.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
3087 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
3088 fmul Tmap.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
3089 fistp Tmap.DeltaU ; V/ZR 1/ZR U/ZR UR VR
3091 // increment terms for next span // st0 st1 st2 st3 st4 st5 st6 st7
3092 // Right terms become Left terms--->// V/ZL 1/ZL U/ZL UL VL
3094 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZL U/ZL UL VL
3095 fxch st(1) // 1/ZL V/ZR U/ZL UL VL
3096 fadd Tmap.fl_dwdx_wide // 1/ZR V/ZR U/ZL UL VL
3097 fxch st(2) // U/ZL V/ZR 1/ZR UL VL
3098 fadd Tmap.fl_dudx_wide // U/ZR V/ZR 1/ZR UL VL
3099 fxch st(2) // 1/ZR V/ZR U/ZR UL VL
3100 fxch st(1) // V/ZR 1/ZR U/ZR UL VL
3103 // setup delta values
3105 mov eax,Tmap.DeltaV // get v 16.16 step
3106 mov ebx,eax // copy it
3107 sar eax,16 // get v int step
3108 shl ebx,16 // get v frac step
3109 mov Tmap.DeltaVFrac,ebx // store it
3110 imul eax,Tmap.src_offset // calculate texture step for v int step
3112 mov ebx,Tmap.DeltaU // get u 16.16 step
3113 mov ecx,ebx // copy it
3114 sar ebx,16 // get u int step
3115 shl ecx,16 // get u frac step
3116 mov Tmap.DeltaUFrac,ecx // store it
3117 add eax,ebx // calculate uint + vint step
3118 mov Tmap.uv_delta[4],eax // save whole step in non-v-carry slot
3119 add eax,Tmap.src_offset // calculate whole step + v carry
3120 mov Tmap.uv_delta[0],eax // save in v-carry slot
3122 // setup initial coordinates
3123 mov esi,Tmap.UFixed // get u 16.16 fixedpoint coordinate
3125 mov ebx,esi // copy it
3126 sar esi,16 // get integer part
3127 shl ebx,16 // get fractional part
3129 mov ecx,Tmap.VFixed // get v 16.16 fixedpoint coordinate
3131 mov edx,ecx // copy it
3132 sar edx,16 // get integer part
3133 shl ecx,16 // get fractional part
3134 imul edx,Tmap.src_offset // calc texture scanline address
3135 add esi,edx // calc texture offset
3136 add esi,Tmap.pixptr // calc address
3138 // set up affine registers
3144 mov ebp, Tmap.fx_dl_dx
3153 mov edx, Tmap.DeltaUFrac
3155 mov Tmap.DeltaUFrac, edx
3158 // calculate right side coords st0 st1 st2 st3 st4 st5 st6 st7
3159 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
3160 // This divide should happen while the pixel span is drawn.
3161 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
3164 // 8 pixel span code
3165 // edi = dest dib bits at current pixel
3166 // esi = texture pointer at current u,v
3168 // ebx = u fraction 0.32
3169 // ecx = v fraction 0.32
3170 // edx = u frac step
3171 // ebp = v carry scratch
3173 mov al,[edi] // preread the destination cache line
3174 mov al,[esi] // get texture pixel 0
3176 mov Tmap.InnerLooper, 32/4 // Set up loop counter
3182 sub eax, Tmap.pScreenBits
3190 cmp ebp, [edx+0] // Compare the Z depth of this pixel with zbuffer
3191 jle Skip0 // If pixel is covered, skip drawing
3192 mov [edx+0], ebp // Write new Z value
3194 // Get pixel and light it
3196 xor eax, eax // Clear all bits of EAX. This avoids a partial register stall on Pentium Pros
3197 mov al, [esi] // Get texel into AL
3198 and ebx, 0ff00h // Clear out fractional part of EBX
3199 mov eax, DWORD PTR gr_fade_table[eax+ebx] // Lookup pixel in lighting table
3202 mov [edi+0],al // Write new pixel
3204 Skip0: add ecx,Tmap.DeltaVFrac
3206 add ebp,Tmap.fx_dwdx
3207 add ebx,Tmap.DeltaUFrac
3208 adc esi,Tmap.uv_delta[4*eax+4]
3212 cmp ebp, [edx+4] // Compare the Z depth of this pixel with zbuffer
3213 jle Skip1 // If pixel is covered, skip drawing
3214 mov [edx+4], ebp // Write new Z value
3216 // Get pixel and light it
3218 xor eax, eax // Clear all bits of EAX. This avoids a partial register stall on Pentium Pros
3219 mov al, [esi] // Get texel into AL
3220 and ebx, 0ff00h // Clear out fractional part of EBX
3221 mov eax, DWORD PTR gr_fade_table[eax+ebx] // Lookup pixel in lighting table
3224 mov [edi+1],al // Write new pixel
3226 Skip1: add ecx,Tmap.DeltaVFrac
3228 add ebp,Tmap.fx_dwdx
3229 add ebx,Tmap.DeltaUFrac
3230 adc esi,Tmap.uv_delta[4*eax+4]
3234 cmp ebp, [edx+8] // Compare the Z depth of this pixel with zbuffer
3235 jle Skip2 // If pixel is covered, skip drawing
3236 mov [edx+8], ebp // Write new Z value
3238 // Get pixel and light it
3240 xor eax, eax // Clear all bits of EAX. This avoids a partial register stall on Pentium Pros
3241 mov al, [esi] // Get texel into AL
3242 and ebx, 0ff00h // Clear out fractional part of EBX
3243 mov eax, DWORD PTR gr_fade_table[eax+ebx] // Lookup pixel in lighting table
3246 mov [edi+2],al // Write new pixel
3248 Skip2: add ecx,Tmap.DeltaVFrac
3250 add ebp,Tmap.fx_dwdx
3251 add ebx,Tmap.DeltaUFrac
3252 adc esi,Tmap.uv_delta[4*eax+4]
3256 cmp ebp, [edx+12] // Compare the Z depth of this pixel with zbuffer
3257 jle Skip3 // If pixel is covered, skip drawing
3258 mov [edx+12], ebp // Write new Z value
3260 // Get pixel and light it
3262 xor eax, eax // Clear all bits of EAX. This avoids a partial register stall on Pentium Pros
3263 mov al, [esi] // Get texel into AL
3264 and ebx, 0ff00h // Clear out fractional part of EBX
3265 mov eax, DWORD PTR gr_fade_table[eax+ebx] // Lookup pixel in lighting table
3268 mov [edi+3],al // Write new pixel
3270 Skip3: add ecx,Tmap.DeltaVFrac
3272 add ebp,Tmap.fx_dwdx
3273 add ebx,Tmap.DeltaUFrac
3274 adc esi,Tmap.uv_delta[4*eax+4]
3280 dec Tmap.InnerLooper
3285 // the fdiv is done, finish right // st0 st1 st2 st3 st4 st5 st6 st7
3286 // ZR V/ZR 1/ZR U/ZR UL VL
3288 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
3289 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
3290 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
3291 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
3293 dec Tmap.Subdivisions // decrement span count
3294 jnz SpanLoop // loop back
3297 HandleLeftoverPixels:
3299 mov esi,Tmap.pixptr // load texture pointer
3301 // edi = dest dib bits
3302 // esi = current texture dib bits
3303 // at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
3304 // inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
3306 cmp Tmap.WidthModLength,0 ; are there remaining pixels to draw?
3307 jz FPUReturn ; nope, pop the FPU and bail
3309 // convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
3311 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
3312 fmul Tmap.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
3313 fistp Tmap.UFixed ; inv. inv. inv. inv. inv. UL VL
3315 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
3316 fmul Tmap.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
3317 fistp Tmap.VFixed ; inv. inv. inv. inv. inv. UL VL
3319 dec Tmap.WidthModLength ; calc how many steps to take
3320 jz OnePixelSpan ; just one, do not do deltas
3322 // calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
3325 // @todo rearrange things so we don't need these two instructions
3326 fstp Tmap.FloatTemp ; inv. inv. inv. inv. UL VL
3327 fstp Tmap.FloatTemp ; inv. inv. inv. UL VL
3329 fld Tmap.r.v ; V/Zr inv. inv. inv. UL VL
3330 fsub Tmap.deltas.v ; V/ZR inv. inv. inv. UL VL
3331 fld Tmap.r.u ; U/Zr V/ZR inv. inv. inv. UL VL
3332 fsub Tmap.deltas.u ; U/ZR V/ZR inv. inv. inv. UL VL
3333 fld Tmap.r.sw ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
3334 fsub Tmap.deltas.sw ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
3336 fdivr Tmap.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
3338 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
3339 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
3341 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
3343 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
3344 fxch st(1) ; VR UR inv. inv. inv. dU VL
3345 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
3346 fxch st(6) ; dV UR inv. inv. inv. dU VR
3348 fidiv Tmap.WidthModLength ; dv UR inv. inv. inv. dU VR
3349 fmul Tmap.FixedScale ; dv16 UR inv. inv. inv. dU VR
3350 fistp Tmap.DeltaV ; UR inv. inv. inv. dU VR
3352 fxch st(4) ; dU inv. inv. inv. UR VR
3353 fidiv Tmap.WidthModLength ; du inv. inv. inv. UR VR
3354 fmul Tmap.FixedScale ; du16 inv. inv. inv. UR VR
3355 fistp Tmap.DeltaU ; inv. inv. inv. UR VR
3357 // @todo gross! these are to line up with the other loop
3358 fld st(1) ; inv. inv. inv. inv. UR VR
3359 fld st(2) ; inv. inv. inv. inv. inv. UR VR
3362 // setup delta values
3363 mov eax, Tmap.DeltaV // get v 16.16 step
3364 mov ebx, eax // copy it
3365 sar eax, 16 // get v int step
3366 shl ebx, 16 // get v frac step
3367 mov Tmap.DeltaVFrac, ebx // store it
3368 imul eax, Tmap.src_offset // calc texture step for v int step
3370 mov ebx, Tmap.DeltaU // get u 16.16 step
3371 mov ecx, ebx // copy it
3372 sar ebx, 16 // get the u int step
3373 shl ecx, 16 // get the u frac step
3374 mov Tmap.DeltaUFrac, ecx // store it
3375 add eax, ebx // calc uint + vint step
3376 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
3377 add eax, Tmap.src_offset // calc whole step + v carry
3378 mov Tmap.uv_delta[0], eax // save in v-carry slot
3383 ; setup initial coordinates
3384 mov esi, Tmap.UFixed // get u 16.16
3385 mov ebx, esi // copy it
3386 sar esi, 16 // get integer part
3387 shl ebx, 16 // get fractional part
3389 mov ecx, Tmap.VFixed // get v 16.16
3390 mov edx, ecx // copy it
3391 sar edx, 16 // get integer part
3392 shl ecx, 16 // get fractional part
3393 imul edx, Tmap.src_offset // calc texture scanline address
3394 add esi, edx // calc texture offset
3395 add esi, Tmap.pixptr // calc address
3402 // mov edx, Tmap.DeltaUFrac
3404 cmp Tmap.WidthModLength, 1
3409 mov ebx, Tmap.fx_l_right
3415 mov eax, Tmap.fx_dl_dx
3418 mov edx, Tmap.DeltaUFrac
3420 mov Tmap.DeltaUFrac, edx
3429 sub eax, Tmap.pScreenBits
3434 inc Tmap.WidthModLength
3435 mov eax,Tmap.WidthModLength
3439 mov Tmap.WidthModLength, eax
3443 mov al,[edi] // preread the destination cache line
3450 cmp ebp, [edx+0] // Compare the Z depth of this pixel with zbuffer
3451 jle Skip0a // If pixel is covered, skip drawing
3452 mov [edx+0], ebp // Write new Z value
3454 // Get pixel and light it
3456 xor eax, eax // Clear all bits of EAX. This avoids a partial register stall on Pentium Pros
3457 mov al, [esi] // Get texel into AL
3458 and ebx, 0ff00h // Clear out fractional part of EBX
3459 mov eax, DWORD PTR gr_fade_table[eax+ebx] // Lookup pixel in lighting table
3462 mov [edi+0],al // Write new pixel
3464 Skip0a: add ecx,Tmap.DeltaVFrac
3467 //add edx, 4 // Go to next
3468 add ebp,Tmap.fx_dwdx
3470 add ebx,Tmap.DeltaUFrac
3472 adc esi,Tmap.uv_delta[4*eax+4]
3476 cmp ebp, [edx+4] // Compare the Z depth of this pixel with zbuffer
3477 jle Skip1a // If pixel is covered, skip drawing
3478 mov [edx+4], ebp // Write new Z value
3480 // Get pixel and light it
3482 xor eax, eax // Clear all bits of EAX. This avoids a partial register stall on Pentium Pros
3483 mov al, [esi] // Get texel into AL
3484 and ebx, 0ff00h // Clear out fractional part of EBX
3485 mov eax, DWORD PTR gr_fade_table[eax+ebx] // Lookup pixel in lighting table
3488 mov [edi+1],al // Write new pixel
3490 Skip1a: add ecx,Tmap.DeltaVFrac
3493 //add edx, 4 // Go to next
3494 add ebp,Tmap.fx_dwdx
3496 add ebx,Tmap.DeltaUFrac
3498 adc esi,Tmap.uv_delta[4*eax+4]
3503 dec Tmap.WidthModLength
3511 cmp ebp, [edx+0] // Compare the Z depth of this pixel with zbuffer
3512 jle Skip0c // If pixel is covered, skip drawing
3513 mov [edx+0], ebp // Write new Z value
3515 // Get pixel and light it
3517 xor eax, eax // Clear all bits of EAX. This avoids a partial register stall on Pentium Pros
3518 mov al, [esi] // Get texel into AL
3519 and ebx, 0ff00h // Clear out fractional part of EBX
3520 mov eax, DWORD PTR gr_fade_table[eax+ebx] // Lookup pixel in lighting table
3523 mov [edi+0],al // Write new pixel
3529 // busy FPU registers: // st0 st1 st2 st3 st4 st5 st6 st7
3530 // xxx xxx xxx xxx xxx xxx xxx
3539 fldcw Tmap.OldFPUCW // restore the FPU
3552 void tmapscan_pln8_zbuffered_pentium()
3567 // Put the FPU in low precision mode
3568 fstcw Tmap.OldFPUCW // store copy of CW
3569 mov ax,Tmap.OldFPUCW // get it in ax
3571 mov Tmap.FPUCW,ax // store it
3572 fldcw Tmap.FPUCW // load the FPU
3575 mov ecx, Tmap.loop_count // ecx = width
3576 mov edi, Tmap.dest_row_data // edi = dest pointer
3578 // edi = pointer to start pixel in dest dib
3581 mov eax,ecx // eax and ecx = width
3582 shr ecx,5 // ecx = width / subdivision length
3583 and eax,31 // eax = width mod subdivision length
3584 jnz some_left_over // any leftover?
3585 dec ecx // no, so special case last span
3586 mov eax,32 // it's 8 pixels long
3588 mov Tmap.Subdivisions,ecx // store widths
3589 mov Tmap.WidthModLength,eax
3591 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
3592 // st0 st1 st2 st3 st4 st5 st6 st7
3593 fld Tmap.l.v // V/ZL
3594 fld Tmap.l.u // U/ZL V/ZL
3595 fld Tmap.l.sw // 1/ZL U/ZL V/ZL
3596 fld1 // 1 1/ZL U/ZL V/ZL
3597 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
3598 fld st // ZL ZL 1/ZL U/ZL V/ZL
3599 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
3600 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
3601 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
3603 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
3604 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
3606 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
3608 fadd Tmap.fl_dwdx_wide // 1/ZR U/ZL V/ZL UL VL
3609 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
3610 fadd Tmap.fl_dudx_wide // U/ZR 1/ZR V/ZL UL VL
3611 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
3612 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZR U/ZR UL VL
3614 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
3616 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
3617 // @todo overlap this guy
3618 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
3619 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
3620 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
3621 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
3622 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
3624 cmp ecx,0 // check for any full spans
3625 jle HandleLeftoverPixels
3629 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
3630 // UR VR V/ZR 1/ZR U/ZR UL VL
3632 // convert left side coords
3634 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
3635 fmul Tmap.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
3636 fistp Tmap.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
3638 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
3639 fmul Tmap.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
3640 fistp Tmap.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
3642 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
3644 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
3645 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
3646 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
3647 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
3649 fmul Tmap.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
3650 fistp Tmap.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
3652 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
3653 fmul Tmap.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
3654 fistp Tmap.DeltaU ; V/ZR 1/ZR U/ZR UR VR
3656 // increment terms for next span // st0 st1 st2 st3 st4 st5 st6 st7
3657 // Right terms become Left terms--->// V/ZL 1/ZL U/ZL UL VL
3659 fadd Tmap.fl_dvdx_wide // V/ZR 1/ZL U/ZL UL VL
3660 fxch st(1) // 1/ZL V/ZR U/ZL UL VL
3661 fadd Tmap.fl_dwdx_wide // 1/ZR V/ZR U/ZL UL VL
3662 fxch st(2) // U/ZL V/ZR 1/ZR UL VL
3663 fadd Tmap.fl_dudx_wide // U/ZR V/ZR 1/ZR UL VL
3664 fxch st(2) // 1/ZR V/ZR U/ZR UL VL
3665 fxch st(1) // V/ZR 1/ZR U/ZR UL VL
3668 // setup delta values
3670 mov eax,Tmap.DeltaV // get v 16.16 step
3671 mov ebx,eax // copy it
3672 sar eax,16 // get v int step
3673 shl ebx,16 // get v frac step
3674 mov Tmap.DeltaVFrac,ebx // store it
3675 imul eax,Tmap.src_offset // calculate texture step for v int step
3677 mov ebx,Tmap.DeltaU // get u 16.16 step
3678 mov ecx,ebx // copy it
3679 sar ebx,16 // get u int step
3680 shl ecx,16 // get u frac step
3681 mov Tmap.DeltaUFrac,ecx // store it
3682 add eax,ebx // calculate uint + vint step
3683 mov Tmap.uv_delta[4],eax // save whole step in non-v-carry slot
3684 add eax,Tmap.src_offset // calculate whole step + v carry
3685 mov Tmap.uv_delta[0],eax // save in v-carry slot
3687 // setup initial coordinates
3688 mov esi,Tmap.UFixed // get u 16.16 fixedpoint coordinate
3690 mov ebx,esi // copy it
3691 sar esi,16 // get integer part
3692 shl ebx,16 // get fractional part
3694 mov ecx,Tmap.VFixed // get v 16.16 fixedpoint coordinate
3696 mov edx,ecx // copy it
3697 sar edx,16 // get integer part
3698 shl ecx,16 // get fractional part
3699 imul edx,Tmap.src_offset // calc texture scanline address
3700 add esi,edx // calc texture offset
3701 add esi,Tmap.pixptr // calc address
3703 // set up affine registers
3709 mov ebp, Tmap.fx_dl_dx
3718 mov edx, Tmap.DeltaUFrac
3720 mov Tmap.DeltaUFrac, edx
3723 // calculate right side coords st0 st1 st2 st3 st4 st5 st6 st7
3724 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
3725 // This divide should happen while the pixel span is drawn.
3726 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
3729 // 8 pixel span code
3730 // edi = dest dib bits at current pixel
3731 // esi = texture pointer at current u,v
3733 // ebx = u fraction 0.32
3734 // ecx = v fraction 0.32
3735 // edx = u frac step
3736 // ebp = v carry scratch
3738 mov al,[edi] // preread the destination cache line
3739 mov al,[esi] // get texture pixel 0
3741 mov Tmap.InnerLooper, 32/4 // Set up loop counter
3748 sub eax, Tmap.pScreenBits
3755 mov eax, ebx // Get lighting value from BH into AH
3756 and eax, 0ffffh; // Clear upper bits of EAX
3758 cmp ebp, [edx+0] // Compare the Z depth of this pixel with zbuffer
3759 mov al, [esi] // Get texel into AL
3760 jle Skip0 // If pixel is covered, skip drawing
3762 mov [edx+0], ebp // Write new Z value
3764 mov al, gr_fade_table[eax] // Lookup pixel in lighting table
3765 mov [edi+0],al // Write new pixel
3767 Skip0: add ecx,Tmap.DeltaVFrac
3770 //add edx, 4 // Go to next
3771 add ebp,Tmap.fx_dwdx
3773 add ebx,Tmap.DeltaUFrac
3775 adc esi,Tmap.uv_delta[4*eax+4]
3779 mov eax, ebx // Get lighting value from BH into AH
3780 and eax, 0ffffh; // Clear upper bits of EAX
3782 cmp ebp, [edx+4] // Compare the Z depth of this pixel with zbuffer
3783 mov al, [esi] // Get texel into AL
3784 jle Skip1 // If pixel is covered, skip drawing
3787 mov [edx+4], ebp // Write new Z value
3789 mov al, gr_fade_table[eax] // Lookup pixel in lighting table
3790 mov [edi+1],al // Write new pixel
3792 Skip1: add ecx,Tmap.DeltaVFrac
3795 //add edx, 4 // Go to next
3796 add ebp,Tmap.fx_dwdx
3798 add ebx,Tmap.DeltaUFrac
3800 adc esi,Tmap.uv_delta[4*eax+4]
3804 mov eax, ebx // Get lighting value from BH into AH
3805 and eax, 0ffffh; // Clear upper bits of EAX
3808 cmp ebp, [edx+8] // Compare the Z depth of this pixel with zbuffer
3809 mov al, [esi] // Get texel into AL
3810 jle Skip2 // If pixel is covered, skip drawing
3813 mov [edx+8], ebp // Write new Z value
3815 mov al, gr_fade_table[eax] // Lookup pixel in lighting table
3816 mov [edi+2],al // Write new pixel
3818 Skip2: add ecx,Tmap.DeltaVFrac
3821 //add edx, 4 // Go to next
3822 add ebp,Tmap.fx_dwdx
3824 add ebx,Tmap.DeltaUFrac
3826 adc esi,Tmap.uv_delta[4*eax+4]
3829 mov eax, ebx // Get lighting value from BH into AH
3830 and eax, 0ffffh; // Clear upper bits of EAX
3833 cmp ebp, [edx+12] // Compare the Z depth of this pixel with zbuffer
3834 mov al, [esi] // Get texel into AL
3835 jle Skip3 // If pixel is covered, skip drawing
3838 mov [edx+12], ebp // Write new Z value
3840 mov al, gr_fade_table[eax] // Lookup pixel in lighting table
3841 mov [edi+3],al // Write new pixel
3843 Skip3: add ecx,Tmap.DeltaVFrac
3846 //add edx, 4 // Go to next
3847 add ebp,Tmap.fx_dwdx
3849 add ebx,Tmap.DeltaUFrac
3851 adc esi,Tmap.uv_delta[4*eax+4]
3856 dec Tmap.InnerLooper
3861 // the fdiv is done, finish right // st0 st1 st2 st3 st4 st5 st6 st7
3862 // ZR V/ZR 1/ZR U/ZR UL VL
3864 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
3865 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
3866 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
3867 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
3869 dec Tmap.Subdivisions // decrement span count
3870 jnz SpanLoop // loop back
3873 HandleLeftoverPixels:
3875 mov esi,Tmap.pixptr // load texture pointer
3877 // edi = dest dib bits
3878 // esi = current texture dib bits
3879 // at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
3880 // inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
3882 cmp Tmap.WidthModLength,0 ; are there remaining pixels to draw?
3883 jz FPUReturn ; nope, pop the FPU and bail
3885 // convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
3887 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
3888 fmul Tmap.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
3889 fistp Tmap.UFixed ; inv. inv. inv. inv. inv. UL VL
3891 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
3892 fmul Tmap.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
3893 fistp Tmap.VFixed ; inv. inv. inv. inv. inv. UL VL
3895 dec Tmap.WidthModLength ; calc how many steps to take
3896 jz OnePixelSpan ; just one, do not do deltas
3898 // calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
3901 // @todo rearrange things so we don't need these two instructions
3902 fstp Tmap.FloatTemp ; inv. inv. inv. inv. UL VL
3903 fstp Tmap.FloatTemp ; inv. inv. inv. UL VL
3905 fld Tmap.r.v ; V/Zr inv. inv. inv. UL VL
3906 fsub Tmap.deltas.v ; V/ZR inv. inv. inv. UL VL
3907 fld Tmap.r.u ; U/Zr V/ZR inv. inv. inv. UL VL
3908 fsub Tmap.deltas.u ; U/ZR V/ZR inv. inv. inv. UL VL
3909 fld Tmap.r.sw ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
3910 fsub Tmap.deltas.sw ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
3912 fdivr Tmap.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
3914 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
3915 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
3917 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
3919 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
3920 fxch st(1) ; VR UR inv. inv. inv. dU VL
3921 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
3922 fxch st(6) ; dV UR inv. inv. inv. dU VR
3924 fidiv Tmap.WidthModLength ; dv UR inv. inv. inv. dU VR
3925 fmul Tmap.FixedScale ; dv16 UR inv. inv. inv. dU VR
3926 fistp Tmap.DeltaV ; UR inv. inv. inv. dU VR
3928 fxch st(4) ; dU inv. inv. inv. UR VR
3929 fidiv Tmap.WidthModLength ; du inv. inv. inv. UR VR
3930 fmul Tmap.FixedScale ; du16 inv. inv. inv. UR VR
3931 fistp Tmap.DeltaU ; inv. inv. inv. UR VR
3933 // @todo gross! these are to line up with the other loop
3934 fld st(1) ; inv. inv. inv. inv. UR VR
3935 fld st(2) ; inv. inv. inv. inv. inv. UR VR
3938 // setup delta values
3939 mov eax, Tmap.DeltaV // get v 16.16 step
3940 mov ebx, eax // copy it
3941 sar eax, 16 // get v int step
3942 shl ebx, 16 // get v frac step
3943 mov Tmap.DeltaVFrac, ebx // store it
3944 imul eax, Tmap.src_offset // calc texture step for v int step
3946 mov ebx, Tmap.DeltaU // get u 16.16 step
3947 mov ecx, ebx // copy it
3948 sar ebx, 16 // get the u int step
3949 shl ecx, 16 // get the u frac step
3950 mov Tmap.DeltaUFrac, ecx // store it
3951 add eax, ebx // calc uint + vint step
3952 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
3953 add eax, Tmap.src_offset // calc whole step + v carry
3954 mov Tmap.uv_delta[0], eax // save in v-carry slot
3959 ; setup initial coordinates
3960 mov esi, Tmap.UFixed // get u 16.16
3961 mov ebx, esi // copy it
3962 sar esi, 16 // get integer part
3963 shl ebx, 16 // get fractional part
3965 mov ecx, Tmap.VFixed // get v 16.16
3966 mov edx, ecx // copy it
3967 sar edx, 16 // get integer part
3968 shl ecx, 16 // get fractional part
3969 imul edx, Tmap.src_offset // calc texture scanline address
3970 add esi, edx // calc texture offset
3971 add esi, Tmap.pixptr // calc address
3978 // mov edx, Tmap.DeltaUFrac
3980 cmp Tmap.WidthModLength, 1
3985 mov ebx, Tmap.fx_l_right
3991 mov eax, Tmap.fx_dl_dx
3994 mov edx, Tmap.DeltaUFrac
3996 mov Tmap.DeltaUFrac, edx
4005 sub eax, Tmap.pScreenBits
4010 inc Tmap.WidthModLength
4011 mov eax,Tmap.WidthModLength
4015 mov Tmap.WidthModLength, eax
4019 mov al,[edi] // preread the destination cache line
4026 mov eax, ebx // Get lighting value from BH into AH
4027 and eax, 0ffffh; // Clear upper bits of EAX
4029 cmp ebp, [edx+0] // Compare the Z depth of this pixel with zbuffer
4030 mov al, [esi] // Get texel into AL
4031 jle Skip0a // If pixel is covered, skip drawing
4034 mov [edx+0], ebp // Write new Z value
4036 mov al, gr_fade_table[eax] // Lookup pixel in lighting table
4037 mov [edi+0],al // Write new pixel
4039 Skip0a: add ecx,Tmap.DeltaVFrac
4042 //add edx, 4 // Go to next
4043 add ebp,Tmap.fx_dwdx
4045 add ebx,Tmap.DeltaUFrac
4047 adc esi,Tmap.uv_delta[4*eax+4]
4051 mov eax, ebx // Get lighting value from BH into AH
4052 and eax, 0ffffh; // Clear upper bits of EAX
4055 cmp ebp, [edx+4] // Compare the Z depth of this pixel with zbuffer
4056 mov al, [esi] // Get texel into AL
4057 jle Skip1a // If pixel is covered, skip drawing
4059 mov [edx+4], ebp // Write new Z value
4061 mov al, gr_fade_table[eax] // Lookup pixel in lighting table
4062 mov [edi+1],al // Write new pixel
4064 Skip1a: add ecx,Tmap.DeltaVFrac
4067 //add edx, 4 // Go to next
4068 add ebp,Tmap.fx_dwdx
4070 add ebx,Tmap.DeltaUFrac
4072 adc esi,Tmap.uv_delta[4*eax+4]
4077 dec Tmap.WidthModLength
4085 mov eax, ebx // Get lighting value from BH into AH
4086 and eax, 0ffffh; // Clear upper bits of EAX
4088 cmp ebp, [edx+0] // Compare the Z depth of this pixel with zbuffer
4089 mov al, [esi] // Get texel into AL
4090 jle Skip0c // If pixel is covered, skip drawing
4092 mov al, gr_fade_table[eax] // Lookup pixel in lighting table
4094 mov [edx+0], ebp // Write new Z value
4096 mov [edi+0],al // Write new pixel
4102 // busy FPU registers: // st0 st1 st2 st3 st4 st5 st6 st7
4103 // xxx xxx xxx xxx xxx xxx xxx
4112 fldcw Tmap.OldFPUCW // restore the FPU
4125 void tmapscan_pln8_zbuffered()
4128 // Pentium Pro optimized code.
4129 tmapscan_pln8_zbuffered_ppro();
4131 tmapscan_pln8_zbuffered_pentium();
4135 void tmapscan_lnaa8_zbuffered()
4137 #ifndef HARDWARE_ONLY
4138 Tmap.lookup = (uint)&Current_alphacolor->table.lookup[0][0];
4152 ; setup delta values
4153 mov eax, Tmap.fx_dv_dx // get v 16.16 step
4154 mov ebx, eax // copy it
4155 sar eax, 16 // get v int step
4156 shl ebx, 16 // get v frac step
4157 mov Tmap.DeltaVFrac, ebx // store it
4158 imul eax, Tmap.src_offset // calc texture step for v int step
4160 mov ebx, Tmap.fx_du_dx // get u 16.16 step
4161 mov ecx, ebx // copy it
4162 sar ebx, 16 // get the u int step
4163 shl ecx, 16 // get the u frac step
4164 mov Tmap.DeltaUFrac, ecx // store it
4165 add eax, ebx // calc uint + vint step
4167 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
4168 add eax, Tmap.src_offset // calc whole step + v carry
4169 mov Tmap.uv_delta[0], eax // save in v-carry slot
4171 ; setup initial coordinates
4172 mov esi, Tmap.fx_u // get u 16.16
4173 mov ebx, esi // copy it
4174 sar esi, 16 // get integer part
4175 shl ebx, 16 // get fractional part
4177 mov ecx, Tmap.fx_v // get v 16.16
4178 mov edx, ecx // copy it
4179 sar edx, 16 // get integer part
4180 shl ecx, 16 // get fractional part
4181 imul edx, Tmap.src_offset // calc texture scanline address
4182 add esi, edx // calc texture offset
4183 add esi, Tmap.pixptr // calc address
4185 ; set edi = address of first pixel to modify
4186 mov edi, Tmap.dest_row_data
4189 mov al,[edi] // get the destination pixel
4194 sub eax, Tmap.pScreenBits
4198 mov eax, Tmap.loop_count
4203 mov Tmap.num_big_steps, eax
4204 and Tmap.loop_count, 3
4208 // 8 pixel span code
4209 // edi = dest dib bits at current pixel
4210 // esi = texture pointer at current u,v
4212 // ebx = u fraction 0.32
4213 // ecx = v fraction 0.32
4214 // edx = u frac step
4215 // ebp = v carry scratch
4217 cmp ebp, [edx+4*0] // Compare the Z depth of this pixel with zbuffer
4218 jle Skip0a // If pixel is covered, skip drawing
4219 // mov [edx+4*0], ebp // Write new Z value
4220 mov al,[edi+0] // get the destination pixel
4221 mov ah,[esi] // get texture pixel 0
4223 add eax, Tmap.lookup
4224 mov al, [eax] // blend them
4225 mov [edi+0],al // store pixel
4227 add ebp, Tmap.fx_dwdx
4228 add ecx,Tmap.DeltaVFrac // increment v fraction
4229 sbb eax,eax // get -1 if carry
4230 add ebx,Tmap.DeltaUFrac // increment u fraction
4231 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
4234 cmp ebp, [edx+4*1] // Compare the Z depth of this pixel with zbuffer
4235 jle Skip1a // If pixel is covered, skip drawing
4236 // mov [edx+4*1], ebp // Write new Z value
4237 mov al,[edi+1] // get the destination pixel
4238 mov ah,[esi] // get texture pixel 0
4240 add eax, Tmap.lookup
4241 mov al, [eax] // blend them
4242 mov [edi+1],al // store pixel
4244 add ebp, Tmap.fx_dwdx
4245 add ecx,Tmap.DeltaVFrac // increment v fraction
4246 sbb eax,eax // get -1 if carry
4247 add ebx,Tmap.DeltaUFrac // increment u fraction
4248 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
4250 cmp ebp, [edx+4*2] // Compare the Z depth of this pixel with zbuffer
4251 jle Skip2a // If pixel is covered, skip drawing
4252 // mov [edx+4*2], ebp // Write new Z value
4253 mov al,[edi+2] // get the destination pixel
4254 mov ah,[esi] // get texture pixel 0
4256 add eax, Tmap.lookup
4257 mov al, [eax] // blend them
4258 mov [edi+2],al // store pixel
4260 add ebp, Tmap.fx_dwdx
4261 add ecx,Tmap.DeltaVFrac // increment v fraction
4262 sbb eax,eax // get -1 if carry
4263 add ebx,Tmap.DeltaUFrac // increment u fraction
4264 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
4266 cmp ebp, [edx+4*3] // Compare the Z depth of this pixel with zbuffer
4267 jle Skip3a // If pixel is covered, skip drawing
4268 // mov [edx+4*3], ebp // Write new Z value
4269 mov al,[edi+3] // get the destination pixel
4270 mov ah,[esi] // get texture pixel 0
4272 add eax, Tmap.lookup
4273 mov al, [eax] // blend them
4274 mov [edi+3],al // store pixel
4276 add ebp, Tmap.fx_dwdx
4277 add ecx,Tmap.DeltaVFrac // increment v fraction
4278 sbb eax,eax // get -1 if carry
4279 add ebx,Tmap.DeltaUFrac // increment u fraction
4280 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
4284 dec Tmap.num_big_steps
4290 mov eax,Tmap.loop_count
4295 mov Tmap.loop_count, eax
4299 mov al,[edi] // get the destination pixel
4303 cmp ebp, [edx+4*0] // Compare the Z depth of this pixel with zbuffer
4304 jle Skip0b // If pixel is covered, skip drawing
4305 // mov [edx+4*0], ebp // Write new Z value
4306 mov al,[edi+0] // get the destination pixel
4307 mov ah,[esi] // get texture pixel 0
4309 add eax, Tmap.lookup
4310 mov al, [eax] // blend them
4311 mov [edi+0],al // store pixel
4313 add ebp, Tmap.fx_dwdx
4314 add ecx,Tmap.DeltaVFrac // increment v fraction
4315 sbb eax,eax // get -1 if carry
4316 add ebx,Tmap.DeltaUFrac // increment u fraction
4317 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
4319 cmp ebp, [edx+4*1] // Compare the Z depth of this pixel with zbuffer
4320 jle Skip1b // If pixel is covered, skip drawing
4321 // mov [edx+4*1], ebp // Write new Z value
4322 mov al,[edi+1] // get the destination pixel
4323 mov ah,[esi] // get texture pixel 0
4325 add eax, Tmap.lookup
4326 mov al, [eax] // blend them
4327 mov [edi+1],al // store pixel
4329 add ebp, Tmap.fx_dwdx
4330 add ecx,Tmap.DeltaVFrac // increment v fraction
4331 sbb eax,eax // get -1 if carry
4332 add ebx,Tmap.DeltaUFrac // increment u fraction
4333 adc esi,Tmap.uv_delta[4*eax+4] // add in step ints & carries
4344 cmp ebp, [edx] // Compare the Z depth of this pixel with zbuffer
4345 jle Skip0c // If pixel is covered, skip drawing
4346 // mov [edx], ebp // Write new Z value
4347 mov al,[edi] // get the destination pixel
4348 mov ah,[esi] // get texture pixel 0
4350 add eax, Tmap.lookup
4351 mov al, [eax] // blend them
4352 mov [edi],al // store pixel
4370 void tmapscan_lnaa8()
4372 #ifndef HARDWARE_ONLY
4373 if (gr_zbuffering) {
4374 switch(gr_zbuffering_mode) {
4377 case GR_ZBUFF_FULL: // both
4378 case GR_ZBUFF_WRITE: // write only
4379 case GR_ZBUFF_READ: // read only
4380 tmapscan_lnaa8_zbuffered();
4386 Tmap.lookup = (uint)&Current_alphacolor->table.lookup[0][0];
4400 ; setup delta values
4401 mov eax, Tmap.fx_dv_dx // get v 16.16 step
4402 mov ebx, eax // copy it
4403 sar eax, 16 // get v int step
4404 shl ebx, 16 // get v frac step
4405 mov Tmap.DeltaVFrac, ebx // store it
4406 imul eax, Tmap.src_offset // calc texture step for v int step
4408 mov ebx, Tmap.fx_du_dx // get u 16.16 step
4409 mov ecx, ebx // copy it
4410 sar ebx, 16 // get the u int step
4411 shl ecx, 16 // get the u frac step
4412 mov Tmap.DeltaUFrac, ecx // store it
4413 add eax, ebx // calc uint + vint step
4415 mov Tmap.uv_delta[4], eax // save whole step in non-v-carry slot
4416 add eax, Tmap.src_offset // calc whole step + v carry
4417 mov Tmap.uv_delta[0], eax // save in v-carry slot
4419 ; setup initial coordinates
4420 mov esi, Tmap.fx_u // get u 16.16
4421 mov ebx, esi // copy it
4422 sar esi, 16 // get integer part
4423 shl ebx, 16 // get fractional part
4425 mov ecx, Tmap.fx_v // get v 16.16
4426 mov edx, ecx // copy it
4427 sar edx, 16 // get integer part
4428 shl ecx, 16 // get fractional part
4429 imul edx, Tmap.src_offset // calc texture scanline address
4430 add esi, edx // calc texture offset
4431 add esi, Tmap.pixptr // calc address
4433 ; set edi = address of first pixel to modify
4434 mov edi, Tmap.dest_row_data
4436 mov edx, Tmap.DeltaUFrac
4439 mov al,[edi] // get the destination pixel
4441 mov ebp, Tmap.loop_count
4446 mov Tmap.num_big_steps, ebp
4447 and Tmap.loop_count, 3
4452 // 8 pixel span code
4453 // edi = dest dib bits at current pixel
4454 // esi = texture pointer at current u,v
4456 // ebx = u fraction 0.32
4457 // ecx = v fraction 0.32
4458 // edx = u frac step
4459 // ebp = v carry scratch
4462 mov al,[edi+0] // get the destination pixel
4463 mov ah,[esi] // get texture pixel 0
4464 add ecx,Tmap.DeltaVFrac // increment v fraction
4465 sbb ebp,ebp // get -1 if carry
4466 add ebx,edx // increment u fraction
4467 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
4468 add eax, Tmap.lookup
4469 mov al, [eax] // blend them
4470 mov [edi+0],al // store pixel
4472 mov al,[edi+1] // get the destination pixel
4473 mov ah,[esi] // get texture pixel 0
4474 add ecx,Tmap.DeltaVFrac // increment v fraction
4475 sbb ebp,ebp // get -1 if carry
4476 add ebx,edx // increment u fraction
4477 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
4478 add eax, Tmap.lookup
4479 mov al, [eax] // blend them
4480 mov [edi+1],al // store pixel
4482 mov al,[edi+2] // get the destination pixel
4483 mov ah,[esi] // get texture pixel 0
4484 add ecx,Tmap.DeltaVFrac // increment v fraction
4485 sbb ebp,ebp // get -1 if carry
4486 add ebx,edx // increment u fraction
4487 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
4488 add eax, Tmap.lookup
4489 mov al, [eax] // blend them
4490 mov [edi+2],al // store pixel
4492 mov al,[edi+3] // get the destination pixel
4493 mov ah,[esi] // get texture pixel 0
4494 add ecx,Tmap.DeltaVFrac // increment v fraction
4495 sbb ebp,ebp // get -1 if carry
4496 add ebx,edx // increment u fraction
4497 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
4498 add eax, Tmap.lookup
4499 mov al, [eax] // blend them
4500 mov [edi+3],al // store pixel
4503 dec Tmap.num_big_steps
4509 mov ebp,Tmap.loop_count
4514 mov Tmap.loop_count, ebp
4518 mov al,[edi] // get the destination pixel
4522 mov al,[edi+0] // get the destination pixel
4523 mov ah,[esi] // get texture pixel 0
4524 add ecx,Tmap.DeltaVFrac // increment v fraction
4525 sbb ebp,ebp // get -1 if carry
4526 add ebx,edx // increment u fraction
4527 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
4528 add eax, Tmap.lookup
4529 mov al, [eax] // blend them
4530 mov [edi+0],al // store pixel
4532 mov al,[edi+1] // get the destination pixel
4533 mov ah,[esi] // get texture pixel 0
4534 add ecx,Tmap.DeltaVFrac // increment v fraction
4535 sbb ebp,ebp // get -1 if carry
4536 add ebx,edx // increment u fraction
4537 adc esi,Tmap.uv_delta[4*ebp+4] // add in step ints & carries
4538 add eax, Tmap.lookup
4539 mov al, [eax] // blend them
4540 mov [edi+1],al // store pixel
4550 mov al,[edi] // get the destination pixel
4551 mov ah,[esi] // get texture pixel 0
4552 add eax, Tmap.lookup
4553 mov al, [eax] // blend them
4554 mov [edi],al // store pixel