2 * Copyright (C) Volition, Inc. 1999. All rights reserved.
4 * All source code herein is the property of Volition, Inc. You may not sell
5 * or otherwise commercially exploit the source or things you created based on
10 * $Logfile: /Freespace2/code/Graphics/TmapGenericScans.cpp $
15 * Some code for generic scanlines. This isn't used, it is just
16 * basically a dump area for inner loops I was experimenting with.
17 * this entire file is #ifdef 0'd out.
20 * Revision 1.3 2002/06/09 04:41:18 relnev
21 * added copyright header
23 * Revision 1.2 2002/05/07 03:16:45 theoddone33
24 * The Great Newline Fix
26 * Revision 1.1.1.1 2002/05/03 03:28:09 root
30 * 2 10/07/98 10:53a Dave
33 * 1 10/07/98 10:49a Dave
35 * 7 4/24/97 4:45p John
36 * Added tiled texture mappers for 64x64, 128x128, and 256x256 textures.
38 * 6 4/24/97 3:01p John
39 * added code to not crash on non-256x256 textures.
41 * 5 3/14/97 3:55p John
42 * Made tiled tmapper not always be zbuffered.
44 * 4 3/13/97 10:32a John
45 * Added code for tiled 256x256 textures in certain models.
47 * 3 3/10/97 5:20p John
48 * Differentiated between Gouraud and Flat shading. Since we only do flat
49 * shading as of now, we don't need to interpolate L in the outer loop.
50 * This should save a few percent.
52 * 2 12/10/96 10:37a John
53 * Restructured texture mapper to remove some overhead from each scanline
54 * setup. This gave about a 30% improvement drawing trans01.pof, which is
55 * a really complex model. In the process, I cleaned up the scanline
56 * functions and separated them into different modules for each pixel
65 #include "tmapscanline.h"
70 #pragma warning(disable:4410)
78 #include "tmapscanline.h"
83 #pragma warning(disable:4410)
85 // These must be global because I use them in assembly
86 // code that uses the EBP register, so the variables
87 // can't be accessed off the stack.
88 int _fx_u, _fx_v, _fx_w, _fx_l;
89 int _fx_u_right, _fx_v_right, _fx_w_right;
90 int _fx_du, _fx_dv, _fx_dw, _fx_dl;
91 uint _fx_destptr,_fx_srcptr, light_table;
92 int V0, U0, DU1, DV1, DZ1;
93 int _loop_count,num_big_steps;
96 int rgbtable_inited = 0;
105 for (i=0; i<512; i++ ) {
108 else if ( v > 255 ) v = 255;
111 rgbtable3[i] = v<<16;
116 void asm_tmap_scanline_lln();
117 void asm_tmap_scanline_lln_tiled();
119 void tmapscan_lln8( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
121 Tmap1.dest_row_data = GR_SCREEN_PTR(ubyte,lx,y);
122 Tmap1.loop_count = rx - lx;
123 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
124 Tmap1.bp = tmap_bitmap;
125 Tmap1.src_offset = tmap_bitmap->w;
127 Tmap1.fx_u = fl2f(p->u);
128 Tmap1.fx_v = fl2f(p->v);
129 Tmap1.fx_l = fl2f(p->l*32.0);
130 Tmap1.fx_dl_dx = fl2f(dp->l*32.0);
131 Tmap1.fx_du_dx = fl2f(dp->u);
132 Tmap1.fx_dv_dx = fl2f(dp->v);
133 Tmap1.fx_u_right = fl2f(rp->u);
134 Tmap1.fx_v_right = fl2f(rp->v);
136 asm_tmap_scanline_lln();
139 extern void asm_tmap_scanline_lnt();
141 void tmapscan_lnt8( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
143 Tmap1.dest_row_data = GR_SCREEN_PTR(ubyte,lx,y);
144 Tmap1.loop_count = rx - lx;
145 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
146 Tmap1.bp = tmap_bitmap;
147 Tmap1.src_offset = tmap_bitmap->w;
149 Tmap1.fx_u = fl2f(p->u);
150 Tmap1.fx_v = fl2f(p->v);
151 Tmap1.fx_du_dx = fl2f(dp->u);
152 Tmap1.fx_dv_dx = fl2f(dp->v);
153 Tmap1.fx_u_right = fl2f(rp->u);
154 Tmap1.fx_v_right = fl2f(rp->v);
156 asm_tmap_scanline_lnt();
159 extern void asm_tmap_scanline_lnn();
161 void tmapscan_lnn8( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
163 Tmap1.dest_row_data = GR_SCREEN_PTR(ubyte,lx,y);
164 Tmap1.loop_count = rx - lx;
165 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
166 Tmap1.bp = tmap_bitmap;
167 Tmap1.src_offset = tmap_bitmap->w;
169 Tmap1.fx_u = fl2f(p->u);
170 Tmap1.fx_v = fl2f(p->v);
171 Tmap1.fx_du_dx = fl2f(dp->u);
172 Tmap1.fx_dv_dx = fl2f(dp->v);
173 Tmap1.fx_u_right = fl2f(rp->u);
174 Tmap1.fx_v_right = fl2f(rp->v);
176 asm_tmap_scanline_lnn();
180 void tmapscan_lln8_tiled( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
182 Tmap1.dest_row_data = GR_SCREEN_PTR(ubyte,lx,y);
183 Tmap1.loop_count = rx - lx;
184 Tmap1.fx_u = fl2f(p->u);
185 Tmap1.fx_v = fl2f(p->v);
186 Tmap1.fx_l = fl2f(p->l*32.0);
187 Tmap1.fx_du_dx = fl2f(dp->u);
188 Tmap1.fx_dv_dx = fl2f(dp->v);
189 Tmap1.fx_dl_dx = fl2f(dp->l*32.0);
190 Tmap1.fx_u_right = fl2f(rp->u);
191 Tmap1.fx_v_right = fl2f(rp->v);
192 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
193 Tmap1.bp = tmap_bitmap;
194 Tmap1.src_offset = tmap_bitmap->w;
196 Tmap1.BitmapWidth = tmap_bitmap->w;
197 Tmap1.BitmapHeight = tmap_bitmap->h;
200 // asm_tmap_scanline_lln_tiled();
207 void c_tmap_scanline_per_sub_new();
209 void tmapscan_pln8( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
211 Tmap1.dest_row_data = GR_SCREEN_PTR(ubyte,lx,y);
212 Tmap1.loop_count = rx - lx;
213 Tmap1.fx_l = fl2f(p->l*32.0);
214 Tmap1.fx_dl_dx = fl2f(dp->l*32.0);
218 Tmap1.OneOverZ = p->sw;
220 Tmap1.dUOverZdX8 = dp->u*32.0f;
221 Tmap1.dVOverZdX8 = dp->v*32.0f;
222 Tmap1.dOneOverZdX8 = dp->sw*32.0f;
224 Tmap1.dUOverZdX = dp->u;
225 Tmap1.dVOverZdX = dp->v;
226 Tmap1.dOneOverZdX = dp->sw;
228 Tmap1.RightUOverZ = rp->u;
229 Tmap1.RightVOverZ = rp->v;
230 Tmap1.RightOneOverZ = rp->sw;
232 if ( Tmap1.fx_dl_dx < 0 ) {
233 Tmap1.fx_dl_dx = -Tmap1.fx_dl_dx;
234 Tmap1.fx_l = (67*F1_0)-Tmap1.fx_l;
235 Tmap1.fx_l_right = (67*F1_0)-Tmap1.fx_l_right;
237 // SDL_assert( Tmap1.fx_l > 31*F1_0 );
238 // SDL_assert( Tmap1.fx_l < 66*F1_0 );
239 // SDL_assert( Tmap1.fx_dl_dx >= 0 );
240 // SDL_assert( Tmap1.fx_dl_dx < 31*F1_0 );
252 dldx = Tmap1.fx_dl_dx;
253 dest = Tmap1.dest_row_data;
255 for (x=Tmap1.loop_count; x >= 0; x-- ) {
256 //*dest++ = gr_fade_table[ ((l>>8)&(0xff00)) + 35 ];
276 // put the FPU in 32 bit mode
277 // @todo move this out of here!
279 fstcw Tmap1.OldFPUCW // store copy of CW
280 mov ax,Tmap1.OldFPUCW // get it in ax
282 mov Tmap1.FPUCW,ax // store it
283 fldcw Tmap1.FPUCW // load the FPU
285 mov ecx, Tmap1.loop_count // ecx = width
287 mov edi, Tmap1.dest_row_data // edi = dest pointer
289 // edi = pointer to start pixel in dest dib
292 mov eax,ecx // eax and ecx = width
293 shr ecx,5 // ecx = width / subdivision length
294 and eax,31 // eax = width mod subdivision length
295 jnz some_left_over // any leftover?
297 dec ecx // no, so special case last span
298 mov eax,32 // it's 8 pixels long
300 mov Tmap1.Subdivisions,ecx // store widths
301 mov Tmap1.WidthModLength,eax
303 // mov ebx,pLeft ; get left edge pointer
304 // mov edx,pGradients ; get gradients pointer
306 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
307 // st0 st1 st2 st3 st4 st5 st6 st7
308 fld Tmap1.VOverZ // V/ZL
309 fld Tmap1.UOverZ // U/ZL V/ZL
310 fld Tmap1.OneOverZ // 1/ZL U/ZL V/ZL
311 fld1 // 1 1/ZL U/ZL V/ZL
312 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
313 fld st // ZL ZL 1/ZL U/ZL V/ZL
314 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
315 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
316 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
318 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
319 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
321 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
323 fadd Tmap1.dOneOverZdX8 // 1/ZR U/ZL V/ZL UL VL
324 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
325 fadd Tmap1.dUOverZdX8 // U/ZR 1/ZR V/ZL UL VL
326 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
327 fadd Tmap1.dVOverZdX8 // V/ZR 1/ZR U/ZR UL VL
329 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
331 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
332 // @todo overlap this guy
333 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
334 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
335 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
336 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
337 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
339 cmp ecx,0 // check for any full spans
340 jle HandleLeftoverPixels
344 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
345 // UR VR V/ZR 1/ZR U/ZR UL VL
347 // convert left side coords
349 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
350 fmul Tmap1.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
351 fistp Tmap1.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
353 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
354 fmul Tmap1.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
355 fistp Tmap1.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
357 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
359 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
360 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
361 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
362 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
364 fmul Tmap1.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
365 fistp Tmap1.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
367 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
368 fmul Tmap1.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
369 fistp Tmap1.DeltaU ; V/ZR 1/ZR U/ZR UR VR
371 // increment terms for next span ; st0 st1 st2 st3 st4 st5 st6 st7
372 // Right terms become Left terms---->; V/ZL 1/ZL U/ZL UL VL
374 fadd Tmap1.dVOverZdX8 ; V/ZR 1/ZL U/ZL UL VL
375 fxch st(1) ; 1/ZL V/ZR U/ZL UL VL
376 fadd Tmap1.dOneOverZdX8 ; 1/ZR V/ZR U/ZL UL VL
377 fxch st(2) ; U/ZL V/ZR 1/ZR UL VL
378 fadd Tmap1.dUOverZdX8 ; U/ZR V/ZR 1/ZR UL VL
379 fxch st(2) ; 1/ZR V/ZR U/ZR UL VL
380 fxch st(1) ; V/ZR 1/ZR U/ZR UL VL
383 ; set up affine registers
387 mov eax,Tmap1.DeltaV ; get v 16.16 step
388 mov ebx,eax ; copy it
389 sar eax,16 ; get v int step
390 shl ebx,16 ; get v frac step
391 mov Tmap1.DeltaVFrac,ebx ; store it
392 imul eax,Tmap1.src_offset ; calculate texture step for v int step
394 mov ebx,Tmap1.DeltaU ; get u 16.16 step
395 mov ecx,ebx ; copy it
396 sar ebx,16 ; get u int step
397 shl ecx,16 ; get u frac step
398 mov Tmap1.DeltaUFrac,ecx ; store it
399 add eax,ebx ; calculate uint + vint step
400 mov Tmap1.UVintVfracStepVNoCarry,eax; save whole step in non-v-carry slot
401 add eax,Tmap1.src_offset ; calculate whole step + v carry
402 mov Tmap1.UVintVfracStepVCarry,eax ; save in v-carry slot
404 ; setup initial coordinates
405 mov esi,Tmap1.UFixed ; get u 16.16 fixedpoint coordinate
407 mov ebx,esi ; copy it
408 sar esi,16 ; get integer part
409 shl ebx,16 ; get fractional part
411 mov ecx,Tmap1.VFixed ; get v 16.16 fixedpoint coordinate
413 mov edx,ecx ; copy it
414 sar edx,16 ; get integer part
415 shl ecx,16 ; get fractional part
416 imul edx,Tmap1.src_offset ; calc texture scanline address
417 add esi,edx ; calc texture offset
418 add esi,Tmap1.pixptr ; calc address
420 mov edx,Tmap1.DeltaUFrac ; get register copy
426 mov ebp, Tmap1.fx_dl_dx
438 ; calculate right side coords ; st0 st1 st2 st3 st4 st5 st6 st7
440 fld1 ; 1 V/ZR 1/ZR U/ZR UL VL
441 fdiv st,st(2) ; ZR V/ZR 1/ZR U/ZR UL VL
445 // edi = dest dib bits at current pixel
446 // esi = texture pointer at current u,v
448 // ebx = u fraction 0.32
449 // ecx = v fraction 0.32
451 // ebp = v carry scratch
453 mov al,[edi] // preread the destination cache line
456 mov al,[esi] // get texture pixel 0
458 mov al, gr_fade_table[eax]
460 add ecx,Tmap1.DeltaVFrac // increment v fraction
461 sbb ebp,ebp // get -1 if carry
462 add ebx,edx // increment u fraction
464 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
465 add ecx,Tmap1.DeltaVFrac // increment v fraction
467 sbb ebp,ebp // get -1 if carry
468 // mov al, 0 // Uncomment this line to show divisions
469 mov [edi+0],al // store pixel 0
471 add ebx,edx // increment u fraction
472 mov al,[esi] // get texture pixel 1
474 mov al, gr_fade_table[eax]
476 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
477 add ecx,Tmap1.DeltaVFrac // increment v fraction
479 sbb ebp,ebp // get -1 if carry
480 mov [edi+1],al // store pixel 1
482 add ebx,edx // increment u fraction
483 mov al,[esi] // get texture pixel 2
485 mov al, gr_fade_table[eax]
487 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
488 add ecx,Tmap1.DeltaVFrac // increment v fraction
490 sbb ebp,ebp // get -1 if carry
491 mov [edi+2],al // store pixel 2
493 add ebx,edx // increment u fraction
494 mov al,[esi] // get texture pixel 3
496 mov al, gr_fade_table[eax]
498 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
499 add ecx,Tmap1.DeltaVFrac // increment v fraction
501 sbb ebp,ebp // get -1 if carry
502 mov [edi+3],al // store pixel 3
504 add ebx,edx // increment u fraction
505 mov al,[esi] // get texture pixel 4
507 mov al, gr_fade_table[eax]
508 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
509 add ecx,Tmap1.DeltaVFrac // increment v fraction
511 sbb ebp,ebp // get -1 if carry
512 mov [edi+4],al // store pixel 3
514 add ebx,edx // increment u fraction
515 mov al,[esi] // get texture pixel 4
517 mov al, gr_fade_table[eax]
518 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
519 add ecx,Tmap1.DeltaVFrac // increment v fraction
521 sbb ebp,ebp // get -1 if carry
522 mov [edi+5],al // store pixel 3
524 add ebx,edx // increment u fraction
525 mov al,[esi] // get texture pixel 4
527 mov al, gr_fade_table[eax]
528 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
529 add ecx,Tmap1.DeltaVFrac // increment v fraction
531 sbb ebp,ebp // get -1 if carry
532 mov [edi+6],al // store pixel 3
534 add ebx,edx // increment u fraction
535 mov al,[esi] // get texture pixel 4
537 mov al, gr_fade_table[eax]
538 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
539 add ecx,Tmap1.DeltaVFrac // increment v fraction
541 sbb ebp,ebp // get -1 if carry
542 mov [edi+7],al // store pixel 3
544 add ebx,edx // increment u fraction
545 mov al,[esi] // get texture pixel 4
547 mov al, gr_fade_table[eax]
548 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
549 add ecx,Tmap1.DeltaVFrac // increment v fraction
551 sbb ebp,ebp // get -1 if carry
552 mov [edi+8],al // store pixel 3
554 add ebx,edx // increment u fraction
555 mov al,[esi] // get texture pixel 4
557 mov al, gr_fade_table[eax]
558 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
559 add ecx,Tmap1.DeltaVFrac // increment v fraction
561 sbb ebp,ebp // get -1 if carry
562 mov [edi+9],al // store pixel 3
564 add ebx,edx // increment u fraction
565 mov al,[esi] // get texture pixel 4
567 mov al, gr_fade_table[eax]
568 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
569 add ecx,Tmap1.DeltaVFrac // increment v fraction
571 sbb ebp,ebp // get -1 if carry
572 mov [edi+10],al // store pixel 3
574 add ebx,edx // increment u fraction
575 mov al,[esi] // get texture pixel 4
577 mov al, gr_fade_table[eax]
580 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
581 add ecx,Tmap1.DeltaVFrac // increment v fraction
583 sbb ebp,ebp // get -1 if carry
584 mov [edi+11],al // store pixel 3
586 add ebx,edx // increment u fraction
587 mov al,[esi] // get texture pixel 4
589 mov al, gr_fade_table[eax]
592 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
593 add ecx,Tmap1.DeltaVFrac // increment v fraction
595 sbb ebp,ebp // get -1 if carry
596 mov [edi+12],al // store pixel 3
598 add ebx,edx // increment u fraction
599 mov al,[esi] // get texture pixel 4
601 mov al, gr_fade_table[eax]
604 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
605 add ecx,Tmap1.DeltaVFrac // increment v fraction
607 sbb ebp,ebp // get -1 if carry
608 mov [edi+13],al // store pixel 3
610 add ebx,edx // increment u fraction
611 mov al,[esi] // get texture pixel 4
613 mov al, gr_fade_table[eax]
616 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
617 add ecx,Tmap1.DeltaVFrac // increment v fraction
619 sbb ebp,ebp // get -1 if carry
620 mov [edi+14],al // store pixel 3
622 add ebx,edx // increment u fraction
623 mov al,[esi] // get texture pixel 4
625 mov al, gr_fade_table[eax]
628 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
629 add ecx,Tmap1.DeltaVFrac // increment v fraction
631 sbb ebp,ebp // get -1 if carry
632 mov [edi+15],al // store pixel 3
634 add ebx,edx // increment u fraction
635 mov al,[esi] // get texture pixel 4
637 mov al, gr_fade_table[eax]
640 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
641 add ecx,Tmap1.DeltaVFrac // increment v fraction
643 sbb ebp,ebp // get -1 if carry
644 mov [edi+16],al // store pixel 3
646 add ebx,edx // increment u fraction
647 mov al,[esi] // get texture pixel 4
649 mov al, gr_fade_table[eax]
652 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
653 add ecx,Tmap1.DeltaVFrac // increment v fraction
655 sbb ebp,ebp // get -1 if carry
656 mov [edi+17],al // store pixel 3
658 add ebx,edx // increment u fraction
659 mov al,[esi] // get texture pixel 4
661 mov al, gr_fade_table[eax]
664 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
665 add ecx,Tmap1.DeltaVFrac // increment v fraction
667 sbb ebp,ebp // get -1 if carry
668 mov [edi+18],al // store pixel 3
670 add ebx,edx // increment u fraction
671 mov al,[esi] // get texture pixel 4
673 mov al, gr_fade_table[eax]
676 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
677 add ecx,Tmap1.DeltaVFrac // increment v fraction
679 sbb ebp,ebp // get -1 if carry
680 mov [edi+19],al // store pixel 3
682 add ebx,edx // increment u fraction
683 mov al,[esi] // get texture pixel 4
685 mov al, gr_fade_table[eax]
688 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
689 add ecx,Tmap1.DeltaVFrac // increment v fraction
691 sbb ebp,ebp // get -1 if carry
692 mov [edi+20],al // store pixel 3
694 add ebx,edx // increment u fraction
695 mov al,[esi] // get texture pixel 4
697 mov al, gr_fade_table[eax]
700 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
701 add ecx,Tmap1.DeltaVFrac // increment v fraction
703 sbb ebp,ebp // get -1 if carry
704 mov [edi+21],al // store pixel 3
706 add ebx,edx // increment u fraction
707 mov al,[esi] // get texture pixel 4
709 mov al, gr_fade_table[eax]
712 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
713 add ecx,Tmap1.DeltaVFrac // increment v fraction
715 sbb ebp,ebp // get -1 if carry
716 mov [edi+22],al // store pixel 3
718 add ebx,edx // increment u fraction
719 mov al,[esi] // get texture pixel 4
721 mov al, gr_fade_table[eax]
724 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
725 add ecx,Tmap1.DeltaVFrac // increment v fraction
727 sbb ebp,ebp // get -1 if carry
728 mov [edi+23],al // store pixel 3
730 add ebx,edx // increment u fraction
731 mov al,[esi] // get texture pixel 4
733 mov al, gr_fade_table[eax]
736 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
737 add ecx,Tmap1.DeltaVFrac // increment v fraction
739 sbb ebp,ebp // get -1 if carry
740 mov [edi+24],al // store pixel 3
742 add ebx,edx // increment u fraction
743 mov al,[esi] // get texture pixel 4
745 mov al, gr_fade_table[eax]
748 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
749 add ecx,Tmap1.DeltaVFrac // increment v fraction
751 sbb ebp,ebp // get -1 if carry
752 mov [edi+25],al // store pixel 3
754 add ebx,edx // increment u fraction
755 mov al,[esi] // get texture pixel 4
757 mov al, gr_fade_table[eax]
760 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
761 add ecx,Tmap1.DeltaVFrac // increment v fraction
765 sbb ebp,ebp // get -1 if carry
766 mov [edi+26],al // store pixel 3
768 add ebx,edx // increment u fraction
769 mov al,[esi] // get texture pixel 4
771 mov al, gr_fade_table[eax]
774 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
775 add ecx,Tmap1.DeltaVFrac // increment v fraction
777 sbb ebp,ebp // get -1 if carry
778 mov [edi+27],al // store pixel 3
780 add ebx,edx // increment u fraction
781 mov al,[esi] // get texture pixel 4
783 mov al, gr_fade_table[eax]
785 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
786 add ecx,Tmap1.DeltaVFrac // increment v fraction
788 sbb ebp,ebp // get -1 if carry
789 mov [edi+28],al // store pixel 4
791 add ebx,edx // increment u fraction
792 mov al,[esi] // get texture pixel 5
794 mov al, gr_fade_table[eax]
796 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
797 add ecx,Tmap1.DeltaVFrac // increment v fraction
799 sbb ebp,ebp // get -1 if carry
800 mov [edi+29],al // store pixel 5
802 add ebx,edx // increment u fraction
803 mov al,[esi] // get texture pixel 6
805 mov al, gr_fade_table[eax]
807 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
808 add ecx,Tmap1.DeltaVFrac // increment v fraction
810 sbb ebp,ebp // get -1 if carry
811 mov [edi+30],al // store pixel 6
813 add ebx,edx // increment u fraction
815 mov al,[esi] // get texture pixel 7
817 mov al, gr_fade_table[eax]
819 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
821 mov [edi+31],al // store pixel 7
825 ; ************** Okay to Access Stack Frame ****************
826 ; ************** Okay to Access Stack Frame ****************
827 ; ************** Okay to Access Stack Frame ****************
830 ; the fdiv is done, finish right ; st0 st1 st2 st3 st4 st5 st6 st7
831 ; ZR V/ZR 1/ZR U/ZR UL VL
833 fld st ; ZR ZR V/ZR 1/ZR U/ZR UL VL
834 fmul st,st(2) ; VR ZR V/ZR 1/ZR U/ZR UL VL
835 fxch st(1) ; ZR VR V/ZR 1/ZR U/ZR UL VL
836 fmul st,st(4) ; UR VR V/ZR 1/ZR U/ZR UL VL
838 add edi,32 ; increment to next span
839 dec Tmap1.Subdivisions ; decrement span count
840 jnz SpanLoop ; loop back
842 // save new lighting values
845 // mov Tmap1.fx_l, eax
849 // mov Tmap1.fx_dl_dx, eax
851 HandleLeftoverPixels:
854 mov esi,Tmap1.pixptr ; load texture pointer
856 ; edi = dest dib bits
857 ; esi = current texture dib bits
858 ; at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
859 ; inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
861 cmp Tmap1.WidthModLength,0 ; are there remaining pixels to draw?
862 jz FPUReturn ; nope, pop the FPU and bail
864 ; convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
866 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
867 fmul Tmap1.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
868 fistp Tmap1.UFixed ; inv. inv. inv. inv. inv. UL VL
870 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
871 fmul Tmap1.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
872 fistp Tmap1.VFixed ; inv. inv. inv. inv. inv. UL VL
874 dec Tmap1.WidthModLength ; calc how many steps to take
875 jz OnePixelSpan ; just one, don't do deltas
877 ; calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
880 ; @todo rearrange things so we don't need these two instructions
881 fstp Tmap1.FloatTemp ; inv. inv. inv. inv. UL VL
882 fstp Tmap1.FloatTemp ; inv. inv. inv. UL VL
884 fld Tmap1.RightVOverZ ; V/Zr inv. inv. inv. UL VL
885 fsub Tmap1.dVOverZdX ; V/ZR inv. inv. inv. UL VL
886 fld Tmap1.RightUOverZ ; U/Zr V/ZR inv. inv. inv. UL VL
887 fsub Tmap1.dUOverZdX ; U/ZR V/ZR inv. inv. inv. UL VL
888 fld Tmap1.RightOneOverZ ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
889 fsub Tmap1.dOneOverZdX ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
891 fdivr Tmap1.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
893 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
894 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
896 ; calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
898 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
899 fxch st(1) ; VR UR inv. inv. inv. dU VL
900 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
901 fxch st(6) ; dV UR inv. inv. inv. dU VR
903 fidiv Tmap1.WidthModLength ; dv UR inv. inv. inv. dU VR
904 fmul Tmap1.FixedScale ; dv16 UR inv. inv. inv. dU VR
905 fistp Tmap1.DeltaV ; UR inv. inv. inv. dU VR
907 fxch st(4) ; dU inv. inv. inv. UR VR
908 fidiv Tmap1.WidthModLength ; du inv. inv. inv. UR VR
909 fmul Tmap1.FixedScale ; du16 inv. inv. inv. UR VR
910 fistp Tmap1.DeltaU ; inv. inv. inv. UR VR
912 ; @todo gross! these are to line up with the other loop
913 fld st(1) ; inv. inv. inv. inv. UR VR
914 fld st(2) ; inv. inv. inv. inv. inv. UR VR
920 mov eax, Tmap1.DeltaV // get v 16.16 step
921 mov ebx, eax // copy it
922 sar eax, 16 // get v int step
923 shl ebx, 16 // get v frac step
924 mov Tmap1.DeltaVFrac, ebx // store it
925 imul eax, Tmap1.src_offset // calc texture step for v int step
927 mov ebx, Tmap1.DeltaU // get u 16.16 step
928 mov ecx, ebx // copy it
929 sar ebx, 16 // get the u int step
930 shl ecx, 16 // get the u frac step
931 mov Tmap1.DeltaUFrac, ecx // store it
932 add eax, ebx // calc uint + vint step
933 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
934 add eax, Tmap1.src_offset // calc whole step + v carry
935 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
942 ; check coordinate ranges
943 mov eax, Tmap1.UFixed
944 cmp eax, Tmap1.MinUFixed
946 mov eax, Tmap1.MinUFixed
947 mov Tmap1.UFixed, eax
950 cmp eax, Tmap1.MaxUFixed
952 mov eax, Tmap1.MaxUFixed
953 mov Tmap1.UFixed, eax
955 mov eax, Tmap1.VFixed
956 cmp eax, Tmap1.MinVFixed
958 mov eax, Tmap1.MinVFixed
959 mov Tmap1.VFixed, eax
962 cmp eax, Tmap1.MaxVFixed
964 mov eax, Tmap1.MaxVFixed
965 mov Tmap1.VFixed, eax
972 ; setup initial coordinates
973 mov esi, Tmap1.UFixed // get u 16.16
974 mov ebx, esi // copy it
975 sar esi, 16 // get integer part
976 shl ebx, 16 // get fractional part
978 mov ecx, Tmap1.VFixed // get v 16.16
979 mov edx, ecx // copy it
980 sar edx, 16 // get integer part
981 shl ecx, 16 // get fractional part
982 imul edx, Tmap1.src_offset // calc texture scanline address
983 add esi, edx // calc texture offset
984 add esi, Tmap1.pixptr // calc address
986 ; set edi = address of first pixel to modify
987 ; mov edi, Tmap1.dest_row_data
996 mov edx, Tmap1.DeltaUFrac
998 cmp Tmap1.WidthModLength, 1
1003 mov ebx, Tmap1.fx_l_right
1010 // slow but maybe better
1013 mov ebx, Tmap1.WidthModLength
1018 mov eax, Tmap1.fx_dl_dx
1028 inc Tmap1.WidthModLength
1029 mov eax,Tmap1.WidthModLength
1033 mov Tmap1.WidthModLength, eax
1037 mov al,[edi] // preread the destination cache line
1040 mov al,[esi] // get texture pixel 0
1042 mov al, gr_fade_table[eax]
1044 add ecx,Tmap1.DeltaVFrac // increment v fraction
1045 sbb ebp,ebp // get -1 if carry
1046 add ebx,edx // increment u fraction
1047 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
1048 mov [edi+0],al // store pixel 0
1050 add ecx,Tmap1.DeltaVFrac // increment v fraction
1051 sbb ebp,ebp // get -1 if carry
1052 add ebx,edx // increment u fraction
1053 mov al,[esi] // get texture pixel 1
1055 mov al, gr_fade_table[eax]
1057 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
1058 mov [edi+1],al // store pixel 1
1061 dec Tmap1.WidthModLength
1069 mov al,[esi] // get texture pixel 2
1071 mov al, gr_fade_table[eax]
1072 mov [edi],al // store pixel 2
1087 OldWay: // This is 6% slower than above
1089 mov ebx,Tmap1.UFixed ; get starting coordinates
1090 mov ecx,Tmap1.VFixed ; for span
1092 ; leftover pixels loop
1093 ; edi = dest dib bits
1094 ; esi = texture dib bits
1100 mov eax,ecx ; copy v
1102 imul eax,Tmap1.src_offset ; scan offset
1103 mov edx,ebx ; copy u
1105 add eax,edx ; texture offset
1106 mov al,[esi+eax] ; get source pixel
1108 mov [edi],al ; store it
1110 add ebx,Tmap1.DeltaU ; increment u coordinate
1111 add ecx,Tmap1.DeltaV ; increment v coordinate
1113 dec Tmap1.WidthModLength ; decrement loop count
1114 jl FPUReturn ; finish up
1118 mov eax,ecx ; copy v
1120 imul eax,Tmap1.src_offset ; scan offset
1121 mov edx,ebx ; copy u
1123 add eax,edx ; texture offset
1124 mov al,[esi+eax] ; get source pixel
1125 mov [edi],al ; store it
1127 add ebx,Tmap1.DeltaU ; increment u coordinate
1128 add ecx,Tmap1.DeltaV ; increment v coordinate
1130 dec Tmap1.WidthModLength ; decrement loop count
1131 jge LeftoverLoop ; finish up
1136 ; busy FPU registers: ; st0 st1 st2 st3 st4 st5 st6 st7
1137 ; xxx xxx xxx xxx xxx xxx xxx
1146 fldcw Tmap1.OldFPUCW // restore the FPU
1161 void tmapscan_lln8_old( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
1163 _fx_srcptr = (uint)tmap_bitmap->data;
1164 _fx_destptr = (uint)GR_SCREEN_PTR(ubyte,lx,y);
1165 _loop_count = rx - lx;
1166 _fx_u = fl2f(p->u*64.0f);
1167 _fx_v = fl2f(p->v*64.0f);
1168 _fx_l = fl2f(p->l*32.0+1.0);
1169 _fx_du = fl2f(dp->u*64.0f);
1170 _fx_dv = fl2f(dp->v*64.0f);
1171 _fx_dl = fl2f(dp->l*32.0);
1172 light_table = (uint)&gr_fade_table[0];
1183 ; set edi = address of first pixel to modify
1184 mov edi, _fx_destptr
1191 mov dx, ax ; EDX=U:V in 6.10 format
1197 mov si, ax ; ESI=DU:DV in 6.10 format
1206 mov eax, _loop_count
1208 mov _loop_count, eax
1213 mov num_big_steps, eax
1226 mov al, gr_fade_table[eax]
1238 mov al, gr_fade_table[eax]
1250 mov al, gr_fade_table[eax]
1262 mov al, gr_fade_table[eax]
1274 mov al, gr_fade_table[eax]
1286 mov al, gr_fade_table[eax]
1298 mov al, gr_fade_table[eax]
1310 mov al, gr_fade_table[eax]
1324 mov _loop_count, eax
1337 mov al, gr_fade_table[eax]
1348 mov al, gr_fade_table[eax]
1366 mov al, gr_fade_table[eax]
1383 void tmapscan_flat16( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
1388 pDestBits = GR_SCREEN_PTR(ushort,lx,y);
1390 for (i=0; i<(rx-lx+1); i++ )
1391 *pDestBits++ = gr_screen.current_color.raw16;
1394 float tmap_max_z = 0.0f;
1396 void tmapscan_lln8_z( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
1399 ubyte *pDestBits, tmp;
1400 float u, dudx, v, dvdx, l, dldx;
1403 pDestBits = GR_SCREEN_PTR(ubyte,lx,y);
1405 ubyte * cdata = (ubyte *)tmap_bitmap->data;
1416 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1417 if ( z < tmap_max_z ) {
1418 tmp = cdata[fl2i(v)*tmap_bitmap->w+fl2i(u)];
1419 *pDestBits = gr_fade_table[ fl2i(l)*256+tmp ];
1430 void tmapscan_generic8( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
1433 ubyte *pDestBits, tmp;
1434 int u, dudx, v, dvdx, w, dwdx, l, dldx;
1436 pDestBits = GR_SCREEN_PTR(ubyte,lx,y);
1438 if ( Tmap1.flags & TMAP_FLAG_TEXTURED ) {
1439 ubyte * cdata = (ubyte *)tmap_bitmap->data;
1440 if ( flags & TMAP_FLAG_RAMP ) {
1441 if ( Tmap1.flags & TMAP_FLAG_CORRECT ) {
1442 float fu, fv, fw, fdu, fdv, fdw;
1444 tmapscan_pln8( lx, rx, y, p, dp, rp,Tmap1.flags );
1451 l = fl2f(p->l*32.0f);
1456 dldx = fl2f(dp->l*32.0f);
1458 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1461 tmp = cdata[v*tmap_bitmap->w+u];
1462 *pDestBits++ = tmp; //gr_fade_table[ (l>>16)*256+tmp ];
1464 //*pDestBits++ = tmp+1;
1473 tmapscan_lln8( lx, rx, y, p, dp, rp, flags );
1475 u = fl2f(p->u*64.0f);
1476 v = fl2f(p->v*64.0f);
1477 l = fl2f(p->l*32.0f);
1478 dudx = fl2f(dp->u*64.0f);
1479 dvdx = fl2f(dp->v*64.0f);
1480 dldx = fl2f(dp->l*32.0f);
1482 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1484 //tmp = cdata[((v>>16)&63)*64+((u>>16)&63)];
1485 //*pDestBits++ = ;//gr_fade_table[ (l>>16)*256+tmp ];
1495 if ( flags & TMAP_FLAG_CORRECT ) {
1496 u = fl2f(p->u*64.0f);
1497 v = fl2f(p->v*64.0f);
1498 w = fl2f(p->sw*16.0f);
1500 dudx = fl2f(dp->u*64.0f);
1501 dvdx = fl2f(dp->v*64.0f);
1502 dwdx = fl2f(dp->sw*16.0f);
1504 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1505 tmp = cdata[((v/w)&63)*64+((u/w)&63)];
1512 u = fl2f(p->u*64.0f);
1513 v = fl2f(p->v*64.0f);
1514 dudx = fl2f(dp->u*64.0f);
1515 dvdx = fl2f(dp->v*64.0f);
1517 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1518 tmp = cdata[((v>>16)&63)*64+((u>>16)&63)];
1526 if ( Tmap1.flags & TMAP_FLAG_RAMP ) {
1527 l = fl2f(p->l*32.0f);
1528 dldx = fl2f(dp->l*32.0f);
1530 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1531 *pDestBits++ = gr_fade_table[ (l>>16)*256+gr_screen.current_color.raw8 ];
1535 memset( pDestBits, gr_screen.current_color.raw8, (rx-lx+1) );
1541 uint fsave_area[64];
1543 unsigned __int64 packrgb( int r, int g, int b )
1545 unsigned __int64 tmp;
1550 tmps = (unsigned int *)&r;
1551 tmp |= *tmps & 0xFFFF;
1554 tmps = (unsigned int *)&g;
1555 tmp |= *tmps & 0xFFFF;
1558 tmps = (unsigned int *)&b;
1559 tmp |= *tmps & 0xFFFF;
1566 void tmapscan_generic( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
1569 uint *pDestBits, tmp, tmp1;
1570 int u, dudx, v, dvdx, w, dwdx;
1571 int r, g, b, dr, dg, db;
1573 if ( !rgbtable_inited )
1576 pDestBits = GR_SCREEN_PTR(uint,lx,y);
1578 if ( Tmap1.flags & TMAP_FLAG_TEXTURED ) {
1579 uint * cdata = (uint *)tmap_bitmap->data;
1581 if ( Tmap1.flags & TMAP_FLAG_GOURAUD ) {
1582 if ( Tmap1.flags & TMAP_FLAG_CORRECT ) {
1583 u = fl2f(p->u*64.0f);
1584 v = fl2f(p->v*64.0f);
1587 r = fl2f(p->r*255.0f);
1588 g = fl2f(p->g*255.0f);
1589 b = fl2f(p->b*255.0f);
1591 dr = fl2f(dp->r*255.0f);
1592 dg = fl2f(dp->g*255.0f);
1593 db = fl2f(dp->b*255.0f);
1595 dudx = fl2f(dp->u*64.0f);
1596 dvdx = fl2f(dp->v*64.0f);
1597 dwdx = fl2f(dp->sw);
1599 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1600 tmp = cdata[((v/w)&63)*64+((u/w)&63)];
1601 tmp1 = rgbtable1[ (tmp & 0xFF)+ (b>>16) ];
1602 tmp1 |= rgbtable2[ ((tmp>>8) & 0xFF)+ (g>>16) ];
1603 tmp1 |= rgbtable3[ ((tmp>>16) & 0xFF)+ (r>>16) ];
1604 *pDestBits++ = tmp1;
1614 __int64 light, deltalight;
1616 u = fl2f(p->u*64.0f);
1617 v = fl2f(p->v*64.0f);
1618 dudx = fl2f(dp->u*64.0f);
1619 dvdx = fl2f(dp->v*64.0f);
1622 r = fl2f(p->r*255.0f)>>8;
1623 g = fl2f(p->g*255.0f)>>8;
1624 b = fl2f(p->b*255.0f)>>8;
1626 dr = fl2f(dp->r*255.0f)>>8;
1627 dg = fl2f(dp->g*255.0f)>>8;
1628 db = fl2f(dp->b*255.0f)>>8;
1634 dr = fl2f(dp->r)>>7;
1635 dg = fl2f(dp->g)>>7;
1636 db = fl2f(dp->b)>>7;
1644 light = packrgb( r, g, b );
1645 deltalight = packrgb( dr, dg, db );
1647 _asm fstenv fsave_area
1648 _asm movq mm3, light
1649 _asm movq mm4, deltalight
1650 _asm pxor mm2, mm2 ; mm0 = 0
1652 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1653 testpixel = cdata[((v>>16)&63)*64+((u>>16)&63)];
1655 _asm punpcklbw mm2, testpixel ; mm0 = 8.8,8.8, 8.8 rgb
1656 _asm pmulhw mm2, mm3 ;
1657 _asm paddsw mm3, mm4 ; light += deltalight
1658 _asm packuswb mm2, mm2 ;mm2 is who cares
1659 _asm movd testpixel, mm2 ; load tmp
1660 _asm pxor mm2, mm2 ; mm0 = 0
1662 *pDestBits++ = testpixel;
1667 _asm frstor fsave_area
1670 if ( Tmap1.flags & TMAP_FLAG_CORRECT ) {
1671 u = fl2f(p->u*64.0f);
1672 v = fl2f(p->v*64.0f);
1674 dudx = fl2f(dp->u*64.0f);
1675 dvdx = fl2f(dp->v*64.0f);
1676 dwdx = fl2f(dp->sw);
1678 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1679 *pDestBits++ = cdata[((v/w)&63)*64+((u/w)&63)];
1685 u = fl2f(p->u*64.0f);
1686 v = fl2f(p->v*64.0f);
1687 dudx = fl2f(dp->u*64.0f);
1688 dvdx = fl2f(dp->v*64.0f);
1690 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1691 *pDestBits++ = cdata[((v>>16)&63)*64+((u>>16)&63)];
1697 } else if ( Tmap1.flags & TMAP_FLAG_GOURAUD ) {
1699 r = fl2f(p->r*255.0f);
1700 g = fl2f(p->g*255.0f);
1701 b = fl2f(p->b*255.0f);
1703 dr = fl2f(dp->r*255.0f);
1704 dg = fl2f(dp->g*255.0f);
1705 db = fl2f(dp->b*255.0f);
1707 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1708 *pDestBits++ = (r&0xFF0000)|((g>>8)&0xFF00)|(b>>16);
1712 //*pDestBits++ = 100;
1715 memset( pDestBits, gr_screen.current_color.raw32, (rx-lx+1)*4 );
1719 void tmapscan_flat( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
1724 pDestBits = GR_SCREEN_PTR(uint,lx,y);
1726 #ifdef USE_INLINE_ASM
1727 _asm mov eax, gr_screen.current_color.raw32
1729 _asm mov edi, pDestBits
1733 for (i=0; i<w; i++ ) {
1734 *pDestBits++ = gr_screen.current_color.raw32;
1739 float zbuffer[640*480];
1741 void zbuffer_clear()
1744 for (i=0; i<640*480; i++ )
1745 zbuffer[i] = 10000.0f;
1748 void tmapscan_flat_z( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
1755 tz = &zbuffer[y*640+lx];
1756 pDestBits = GR_SCREEN_PTR(uint,lx,y);
1760 //#ifdef USE_INLINE_ASM
1762 _asm mov eax, gr_screen.current_color.raw32
1764 _asm mov edi, pDestBits
1769 for (i=0; i<w; i++ ) {
1772 *pDestBits = gr_screen.current_color.raw32;
1786 uint fsave_area1[64];
1788 void tmapscan_pln( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
1790 __int64 light, deltalight;
1791 int r, g, b, dr, dg, db;
1792 _fx_srcptr = (uint)tmap_bitmap->data;
1793 _fx_destptr = (uint)GR_SCREEN_PTR(uint,lx,y);
1794 _loop_count = rx - lx;
1795 _fx_u = fl2f(p->u*64.0f);
1796 _fx_v = fl2f(p->v*64.0f);
1797 _fx_w = fl2f(p->sw*16.0);
1798 _fx_du = fl2f(dp->u*64.0f);
1799 _fx_dv = fl2f(dp->v*64.0f);
1800 _fx_dw = fl2f(dp->sw*16.0);
1802 _fx_u_right = fl2f(rp->u*64.0f);
1803 _fx_v_right = fl2f(rp->v*64.0f);
1804 _fx_w_right = fl2f(rp->sw*16.0);
1810 dr = fl2f(dp->r)>>7;
1811 dg = fl2f(dp->g)>>7;
1812 db = fl2f(dp->b)>>7;
1814 light = ((__int64)r<<32)|((__int64)g<<16)|(__int64)b;
1815 deltalight = ((__int64)dr<<32)|((__int64)dg<<16)|(__int64)db;
1817 _asm fstenv fsave_area1
1818 _asm movq mm3, light
1819 _asm movq mm4, deltalight
1835 ; compute initial v coordinate
1840 idiv ecx ; eax = (v/z)
1843 ; compute initial u coordinate
1848 idiv ecx ; eax = (v/z)
1853 ; find number of subdivisions
1854 mov eax, _loop_count
1859 mov num_left_over, esi
1860 jz DoEndPixels ;there are no 2^NBITS chunks, do divide/pixel for whole scanline
1861 mov _loop_count, eax
1863 ; Set deltas to NPIXS pixel increments
1880 ; Done with ebx, ebp, ecx until next iteration
1892 idiv ecx ; eax = (v/z)
1893 mov ebx, eax ; ebx = U1 until pop's
1900 idiv ecx ; eax = (v/z)
1901 mov ebp, eax ; ebx = V1 until pop's
1903 ; Get last correct U,Vs
1904 mov ecx, U0 ; ecx = U0 until pop's
1905 mov edi, V0 ; edi = V0 until pop's
1907 ; Make ESI = V0:U0 in 6:10,6:10 format
1914 ; Make EDX = DV:DU in 6:10,6:10 format
1920 shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac
1921 mov dx, ax ; put delta u in low word
1923 ; Save the U1 and V1 so we don't have to divide on the next iteration
1927 pop edi ; Restore EDI before using it
1938 movd mm1, [eax*4+ecx]
1939 pxor mm2, mm2 ; mm2 = 0
1940 punpcklbw mm2, mm1 ; mm0 = 8.8,8.8, 8.8 rgb
1942 paddsw mm3, mm4 ; light += deltalight
1943 packuswb mm2, mm2 ;mm2 is who cares
1944 movd [edi], mm2 ; load tmp
1957 test num_left_over, -1
1960 cmp num_left_over, 4
1963 ; If less than 4, then just keep interpolating without
1964 ; calculating a new DU:DV.
1968 ; ----------------------------------------- Start of LeftOver Pixels ------------------------------------------
1972 mov ecx, _fx_w_right
1975 mov eax, _fx_u_right
1979 idiv ecx ; eax = (v/z)
1980 mov ebx, eax ; ebx = U1 until pop's
1983 mov eax, _fx_v_right
1987 idiv ecx ; eax = (v/z)
1988 mov ebp, eax ; ebp = V1 until pop's
1990 mov ecx, U0 ; ecx = U0 until pop's
1991 mov edi, V0 ; edi = V0 until pop's
1993 ; Make EDX = DV:DU in 6:10,6:10 format
1996 mov edx, eax ; These two lines are faster than cdq
1998 idiv num_left_over ; eax = (v1-v0)/num_left_over
1999 shl eax, 16-6 ; go from 16.16 to 6.10, and move into high 16 bits
2000 mov esi, eax ; esi = dvdx<<16
2004 mov edx, eax ; These two lines are faster than cdq
2006 idiv num_left_over ; eax = (u1-u0)/num_left_over
2007 sar eax, 6 ; go from 16.16 to 6.10 (ax=dvdx in 6.10)
2008 mov si, ax ; esi = dvdx:dudx
2011 ; Make ESI = V0:U0 in 6:10,6:10 format
2018 pop edi ; Restore EDI before using it
2029 ; mov eax, [eax*4+ecx]
2031 movd mm1, [eax*4+ecx]
2032 pxor mm2, mm2 ; mm2 = 0
2033 punpcklbw mm2, mm1 ; mm0 = 8.8,8.8, 8.8 rgb
2035 paddsw mm3, mm4 ; light += deltalight
2036 packuswb mm2, mm2 ;mm2 is who cares
2037 movd [edi], mm2 ; load tmp
2055 _asm frstor fsave_area1
2059 void tmapscan_lln( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
2061 __int64 light, deltalight;
2062 int r, g, b, dr, dg, db;
2063 _fx_srcptr = (uint)tmap_bitmap->data;
2064 _fx_destptr = (uint)GR_SCREEN_PTR(uint,lx,y);
2065 _loop_count = rx - lx;
2066 _fx_u = fl2f(p->u*64.0f);
2067 _fx_v = fl2f(p->v*64.0f);
2068 _fx_du = fl2f(dp->u*64.0f);
2069 _fx_dv = fl2f(dp->v*64.0f);
2075 dr = fl2f(dp->r)>>7;
2076 dg = fl2f(dp->g)>>7;
2077 db = fl2f(dp->b)>>7;
2079 light = ((__int64)r<<32)|((__int64)g<<16)|(__int64)b;
2080 deltalight = ((__int64)dr<<32)|((__int64)dg<<16)|(__int64)db;
2082 _asm fstenv fsave_area1
2083 _asm movq mm3, light
2084 _asm movq mm4, deltalight
2099 ; find number of subdivisions
2100 mov eax, _loop_count
2103 mov _loop_count, eax
2105 ; Make ESI = V0:U0 in 6:10,6:10 format
2112 ; Make EDX = DV:DU in 6:10,6:10 format
2118 shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac
2119 mov dx, ax ; put delta u in low word
2122 mov ebx, _loop_count
2130 movd mm1, [eax*4+ecx]
2131 pxor mm2, mm2 ; mm2 = 0
2132 punpcklbw mm2, mm1 ; mm0 = 8.8,8.8, 8.8 rgb
2134 paddsw mm3, mm4 ; light += deltalight
2135 packuswb mm2, mm2 ;mm2 is who cares
2136 movd [edi], mm2 ; load tmp
2153 _asm frstor fsave_area1
2159 void tmapscan_pln8_tiled( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
2161 Tmap1.dest_row_data = GR_SCREEN_PTR(ubyte,lx,y);
2162 Tmap1.loop_count = rx - lx;
2163 Tmap1.fx_u = fl2f(p->u);
2164 Tmap1.fx_v = fl2f(p->v);
2165 Tmap1.fx_du_dx = fl2f(dp->u);
2166 Tmap1.fx_dv_dx = fl2f(dp->v);
2168 Tmap1.fx_l = fl2f(p->l*32.0);
2169 Tmap1.fx_dl_dx = fl2f(dp->l*32.0);
2171 Tmap1.fx_u_right = fl2f(rp->u);
2172 Tmap1.fx_v_right = fl2f(rp->v);
2173 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
2174 Tmap1.bp = tmap_bitmap;
2175 Tmap1.src_offset = tmap_bitmap->w;
2178 Tmap1.FixedScale = 65536.0f;
2179 Tmap1.FixedScale8 = 2048.0f; //8192.0f; // 2^16 / 8
2183 Tmap1.UOverZ = p->u;
2184 Tmap1.VOverZ = p->v;
2185 Tmap1.OneOverZ = p->sw;
2187 Tmap1.dUOverZdX8 = dp->u*32.0f;
2188 Tmap1.dVOverZdX8 = dp->v*32.0f;
2189 Tmap1.dOneOverZdX8 = dp->sw*32.0f;
2191 Tmap1.dUOverZdX = dp->u;
2192 Tmap1.dVOverZdX = dp->v;
2193 Tmap1.dOneOverZdX = dp->sw;
2195 Tmap1.RightUOverZ = rp->u;
2196 Tmap1.RightVOverZ = rp->v;
2197 Tmap1.RightOneOverZ = rp->sw;
2199 Tmap1.BitmapWidth = Tmap1.bp->w;
2200 Tmap1.BitmapHeight = Tmap1.bp->h;
2202 if (Tmap1.BitmapWidth!=64) return;
2203 if (Tmap1.BitmapHeight!=64) return;
2207 if ( Tmap1.fx_dl_dx < 0 ) {
2208 Tmap1.fx_dl_dx = -Tmap1.fx_dl_dx;
2209 Tmap1.fx_l = (67*F1_0)-Tmap1.fx_l;
2210 Tmap1.fx_l_right = (67*F1_0)-Tmap1.fx_l_right;
2225 // put the FPU in 32 bit mode
2226 // @todo move this out of here!
2228 fstcw Tmap1.OldFPUCW // store copy of CW
2229 mov ax,Tmap1.OldFPUCW // get it in ax
2230 //hh and eax,NOT 1100000000y // 24 bit precision
2232 mov Tmap1.FPUCW,ax // store it
2233 fldcw Tmap1.FPUCW // load the FPU
2235 mov ecx, Tmap1.loop_count // ecx = width
2237 mov edi, Tmap1.dest_row_data // edi = dest pointer
2239 // edi = pointer to start pixel in dest dib
2242 mov eax,ecx // eax and ecx = width
2243 shr ecx,5 // ecx = width / subdivision length
2244 and eax,31 // eax = width mod subdivision length
2245 jnz some_left_over // any leftover?
2247 dec ecx // no, so special case last span
2248 mov eax,32 // it's 8 pixels long
2250 mov Tmap1.Subdivisions,ecx // store widths
2251 mov Tmap1.WidthModLength,eax
2253 // mov ebx,pLeft ; get left edge pointer
2254 // mov edx,pGradients ; get gradients pointer
2256 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
2257 // st0 st1 st2 st3 st4 st5 st6 st7
2258 fld Tmap1.VOverZ // V/ZL
2259 fld Tmap1.UOverZ // U/ZL V/ZL
2260 fld Tmap1.OneOverZ // 1/ZL U/ZL V/ZL
2261 fld1 // 1 1/ZL U/ZL V/ZL
2262 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
2263 fld st // ZL ZL 1/ZL U/ZL V/ZL
2264 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
2265 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
2266 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
2268 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
2269 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
2271 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
2273 fadd Tmap1.dOneOverZdX8 // 1/ZR U/ZL V/ZL UL VL
2274 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
2275 fadd Tmap1.dUOverZdX8 // U/ZR 1/ZR V/ZL UL VL
2276 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
2277 fadd Tmap1.dVOverZdX8 // V/ZR 1/ZR U/ZR UL VL
2279 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
2281 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
2282 // @todo overlap this guy
2283 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
2284 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
2285 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
2286 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
2287 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
2289 cmp ecx,0 // check for any full spans
2290 jle HandleLeftoverPixels
2294 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
2295 // UR VR V/ZR 1/ZR U/ZR UL VL
2297 // convert left side coords
2299 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
2300 fmul Tmap1.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
2301 fistp Tmap1.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
2303 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
2304 fmul Tmap1.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
2305 fistp Tmap1.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
2307 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
2309 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
2310 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
2311 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
2312 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
2314 fmul Tmap1.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
2315 fistp Tmap1.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
2317 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
2318 fmul Tmap1.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
2319 fistp Tmap1.DeltaU ; V/ZR 1/ZR U/ZR UR VR
2321 // increment terms for next span ; st0 st1 st2 st3 st4 st5 st6 st7
2322 // Right terms become Left terms---->; V/ZL 1/ZL U/ZL UL VL
2324 fadd Tmap1.dVOverZdX8 ; V/ZR 1/ZL U/ZL UL VL
2325 fxch st(1) ; 1/ZL V/ZR U/ZL UL VL
2326 fadd Tmap1.dOneOverZdX8 ; 1/ZR V/ZR U/ZL UL VL
2327 fxch st(2) ; U/ZL V/ZR 1/ZR UL VL
2328 fadd Tmap1.dUOverZdX8 ; U/ZR V/ZR 1/ZR UL VL
2329 fxch st(2) ; 1/ZR V/ZR U/ZR UL VL
2330 fxch st(1) ; V/ZR 1/ZR U/ZR UL VL
2332 ; calculate right side coords ; st0 st1 st2 st3 st4 st5 st6 st7
2334 fld1 ; 1 V/ZR 1/ZR U/ZR UL VL
2335 fdiv st,st(2) ; ZR V/ZR 1/ZR U/ZR UL VL
2338 ; ************** Can't Access Stack Frame ******************
2339 ; ************** Can't Access Stack Frame ******************
2340 ; ************** Can't Access Stack Frame ******************
2342 // 8 pixel span code
2343 // edi = dest dib bits at current pixel
2344 // esi = texture pointer at current u,v
2346 // ebx = u fraction 0.32
2347 // ecx = v fraction 0.32
2348 // edx = u frac step
2349 // ebp = v carry scratch
2354 // ecx = source pixels
2355 // edx = u v in 6.10 6.10
2356 // esi = du dv in 6.10 6.10
2357 // edi = dest pixels
2358 // ebp = dldx in 24.8
2365 mov ebp, Tmap1.fx_dl_dx
2374 mov ecx, Tmap1.pixptr // ecx = source pixels
2376 ; Make ESI = DV:DU in 6:10,6:10 format
2377 mov eax, Tmap1.DeltaU
2379 mov esi, Tmap1.DeltaV
2383 ; Make EDX = DV:DU in 6:10,6:10 format
2385 mov eax, Tmap1.UFixed
2387 mov edx, Tmap1.VFixed
2402 mov al, gr_fade_table[eax]
2414 mov al, gr_fade_table[eax]
2426 mov al, gr_fade_table[eax]
2438 mov al, gr_fade_table[eax]
2450 mov al, gr_fade_table[eax]
2462 mov al, gr_fade_table[eax]
2474 mov al, gr_fade_table[eax]
2486 mov al, gr_fade_table[eax]
2498 mov al, gr_fade_table[eax]
2510 mov al, gr_fade_table[eax]
2522 mov al, gr_fade_table[eax]
2534 mov al, gr_fade_table[eax]
2546 mov al, gr_fade_table[eax]
2558 mov al, gr_fade_table[eax]
2570 mov al, gr_fade_table[eax]
2582 mov al, gr_fade_table[eax]
2594 mov al, gr_fade_table[eax]
2606 mov al, gr_fade_table[eax]
2618 mov al, gr_fade_table[eax]
2630 mov al, gr_fade_table[eax]
2642 mov al, gr_fade_table[eax]
2654 mov al, gr_fade_table[eax]
2666 mov al, gr_fade_table[eax]
2678 mov al, gr_fade_table[eax]
2690 mov al, gr_fade_table[eax]
2702 mov al, gr_fade_table[eax]
2714 mov al, gr_fade_table[eax]
2726 mov al, gr_fade_table[eax]
2738 mov al, gr_fade_table[eax]
2750 mov al, gr_fade_table[eax]
2762 mov al, gr_fade_table[eax]
2774 mov al, gr_fade_table[eax]
2778 ; ************** Okay to Access Stack Frame ****************
2779 ; ************** Okay to Access Stack Frame ****************
2780 ; ************** Okay to Access Stack Frame ****************
2783 ; the fdiv is done, finish right ; st0 st1 st2 st3 st4 st5 st6 st7
2784 ; ZR V/ZR 1/ZR U/ZR UL VL
2786 fld st ; ZR ZR V/ZR 1/ZR U/ZR UL VL
2787 fmul st,st(2) ; VR ZR V/ZR 1/ZR U/ZR UL VL
2788 fxch st(1) ; ZR VR V/ZR 1/ZR U/ZR UL VL
2789 fmul st,st(4) ; UR VR V/ZR 1/ZR U/ZR UL VL
2791 add edi,32 ; increment to next span
2792 dec Tmap1.Subdivisions ; decrement span count
2793 jnz SpanLoop ; loop back
2795 HandleLeftoverPixels:
2797 mov esi,Tmap1.pixptr ; load texture pointer
2799 ; edi = dest dib bits
2800 ; esi = current texture dib bits
2801 ; at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
2802 ; inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
2804 cmp Tmap1.WidthModLength,0 ; are there remaining pixels to draw?
2805 jz FPUReturn ; nope, pop the FPU and bail
2807 ; convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
2809 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
2810 fmul Tmap1.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
2811 fistp Tmap1.UFixed ; inv. inv. inv. inv. inv. UL VL
2813 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
2814 fmul Tmap1.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
2815 fistp Tmap1.VFixed ; inv. inv. inv. inv. inv. UL VL
2817 dec Tmap1.WidthModLength ; calc how many steps to take
2818 jz OnePixelSpan ; just one, don't do deltas
2820 ; calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
2823 ; @todo rearrange things so we don't need these two instructions
2824 fstp Tmap1.FloatTemp ; inv. inv. inv. inv. UL VL
2825 fstp Tmap1.FloatTemp ; inv. inv. inv. UL VL
2827 fld Tmap1.RightVOverZ ; V/Zr inv. inv. inv. UL VL
2828 fsub Tmap1.dVOverZdX ; V/ZR inv. inv. inv. UL VL
2829 fld Tmap1.RightUOverZ ; U/Zr V/ZR inv. inv. inv. UL VL
2830 fsub Tmap1.dUOverZdX ; U/ZR V/ZR inv. inv. inv. UL VL
2831 fld Tmap1.RightOneOverZ ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
2832 fsub Tmap1.dOneOverZdX ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
2834 fdivr Tmap1.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
2836 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
2837 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
2839 ; calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
2841 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
2842 fxch st(1) ; VR UR inv. inv. inv. dU VL
2843 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
2844 fxch st(6) ; dV UR inv. inv. inv. dU VR
2846 fidiv Tmap1.WidthModLength ; dv UR inv. inv. inv. dU VR
2847 fmul Tmap1.FixedScale ; dv16 UR inv. inv. inv. dU VR
2848 fistp Tmap1.DeltaV ; UR inv. inv. inv. dU VR
2850 fxch st(4) ; dU inv. inv. inv. UR VR
2851 fidiv Tmap1.WidthModLength ; du inv. inv. inv. UR VR
2852 fmul Tmap1.FixedScale ; du16 inv. inv. inv. UR VR
2853 fistp Tmap1.DeltaU ; inv. inv. inv. UR VR
2855 ; @todo gross! these are to line up with the other loop
2856 fld st(1) ; inv. inv. inv. inv. UR VR
2857 fld st(2) ; inv. inv. inv. inv. inv. UR VR
2865 mov ebp, Tmap1.fx_dl_dx
2875 ; Make ESI = DV:DU in 6:10,6:10 format
2876 mov eax, Tmap1.DeltaU
2878 mov esi, Tmap1.DeltaV
2882 ; Make EDX = DV:DU in 6:10,6:10 format
2884 mov eax, Tmap1.UFixed
2886 mov edx, Tmap1.VFixed
2890 mov ecx, Tmap1.pixptr // ecx = source pixels
2892 inc Tmap1.WidthModLength
2893 mov eax,Tmap1.WidthModLength
2897 mov Tmap1.WidthModLength, eax
2913 mov al, gr_fade_table[eax]
2925 mov al, gr_fade_table[eax]
2930 dec Tmap1.WidthModLength
2948 mov al, gr_fade_table[eax]
2953 ; busy FPU registers: ; st0 st1 st2 st3 st4 st5 st6 st7
2954 ; xxx xxx xxx xxx xxx xxx xxx
2965 fldcw Tmap1.OldFPUCW // restore the FPU
2979 void c_tmap_scanline_flat()
2981 switch( gr_screen.bits_per_pixel ) {
2984 memset( Tmap1.dest_row_data, gr_screen.current_color.raw8, Tmap1.loop_count );
2989 dest = Tmap1.dest_row_data;
2991 for (x=Tmap1.loop_count; x >= 0; x-- ) {
2993 *dest++ = Tmap1.tmap_flat_color;
2999 _asm mov ecx, Tmap1.loop_count
3000 _asm mov ax, gr_screen.current_color.raw16;
3001 _asm mov edi, Tmap1.dest_row_data16
3006 _asm mov ecx, Tmap1.loop_count
3007 _asm mov ax, gr_screen.current_color.raw16;
3008 _asm mov edi, Tmap1.dest_row_data16
3013 _asm mov ecx, Tmap1.loop_count
3014 _asm mov eax, gr_screen.current_color.raw32;
3015 _asm mov edi, Tmap1.dest_row_data32
3023 void c_tmap_scanline_shaded()
3029 dest = Tmap1.dest_row_data;
3031 fade = Tmap1.tmap_flat_shade_value<<8;
3032 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3033 *dest++ = gr_fade_table[ fade |(*dest)];
3037 void c_tmap_scanline_lin_nolight()
3046 dudx = Tmap1.fx_du_dx;
3047 dvdx = Tmap1.fx_dv_dx*64;
3049 dest = Tmap1.dest_row_data;
3051 if (!Tmap1.Transparency_on) {
3052 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3053 *dest++ = (uint)Tmap1.pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
3058 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3059 c = (uint)Tmap1.pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
3070 void c_tmap_scanline_lin()
3077 void c_tmap_scanline_per_nolight()
3082 fix u,v,z,dudx, dvdx, dzdx;
3087 dudx = Tmap1.fx_du_dx;
3088 dvdx = Tmap1.fx_dv_dx*64;
3089 dzdx = Tmap1.fx_dz_dx;
3091 dest = Tmap1.dest_row_data;
3093 if (!Tmap1.Transparency_on) {
3094 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3095 *dest++ = (uint)Tmap1.pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
3101 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3102 c = (uint)Tmap1.pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
3113 void c_tmap_scanline_per1()
3118 fix u,v,z,l,dudx, dvdx, dzdx, dldx;
3123 dudx = Tmap1.fx_du_dx;
3124 dvdx = Tmap1.fx_dv_dx*64;
3125 dzdx = Tmap1.fx_dz_dx;
3128 dldx = Tmap1.fx_dl_dx;
3129 dest = Tmap1.dest_row_data;
3131 if (!Tmap1.Transparency_on) {
3132 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3133 *dest++ = gr_fade_table[ (l&(0xff00)) + (uint)Tmap1.pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
3140 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3141 c = (uint)Tmap1.pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
3143 *dest = gr_fade_table[ (l&(0xff00)) + c ];
3155 void c_tmap_scanline_editor()
3160 fix u,v,z,dudx, dvdx, dzdx;
3165 dudx = Tmap1.fx_du_dx;
3166 dvdx = Tmap1.fx_dv_dx*64;
3167 dzdx = Tmap1.fx_dz_dx;
3169 dest = Tmap1.dest_row_data;
3171 if (!Tmap1.Transparency_on) {
3172 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3174 //(uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
3180 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3181 c = (uint)Tmap1.pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
3192 void asm_tmap_scanline_lln_tiled()
3194 if ( Tmap1.BitmapWidth != 64 ) return;
3195 if ( Tmap1.BitmapHeight != 64 ) return;
3206 ; set edi = address of first pixel to modify
3207 mov edi, Tmap1.dest_row_data
3213 mov dx, ax ; EDX=U:V in 6.10 format
3215 mov eax, Tmap1.fx_dv_dx
3217 mov esi, Tmap1.fx_du_dx
3219 mov si, ax ; ESI=DU:DV in 6.10 format
3223 mov ebp, Tmap1.fx_dl_dx
3226 mov ecx, Tmap1.pixptr
3228 mov eax, Tmap1.loop_count
3230 mov Tmap1.loop_count, eax
3235 mov Tmap1.num_big_steps, eax
3236 and Tmap1.loop_count, 7
3248 mov al, gr_fade_table[eax]
3260 mov al, gr_fade_table[eax]
3272 mov al, gr_fade_table[eax]
3284 mov al, gr_fade_table[eax]
3296 mov al, gr_fade_table[eax]
3308 mov al, gr_fade_table[eax]
3320 mov al, gr_fade_table[eax]
3332 mov al, gr_fade_table[eax]
3336 dec Tmap1.num_big_steps
3341 mov eax,Tmap1.loop_count
3346 mov Tmap1.loop_count, eax
3359 mov al, gr_fade_table[eax]
3370 mov al, gr_fade_table[eax]
3375 dec Tmap1.loop_count
3388 mov al, gr_fade_table[eax]
3403 void asm_tmap_scanline_lln32();
3405 void asm_tmap_scanline_lln()
3410 if ( Tmap1.tmap_flags & TMAP_FLAG_TILED ) {
3411 asm_tmap_scanline_lln_tiled();
3415 end = f2i(Tmap1.fx_u);
3416 if ( end >= Tmap1.bp->w ) return;
3418 end = f2i(Tmap1.fx_v);
3419 if ( end >= Tmap1.bp->h ) return;
3421 end = f2i(Tmap1.fx_u_right);
3422 if ( end >= Tmap1.bp->w ) return;
3424 end = f2i(Tmap1.fx_v_right);
3425 if ( end >= Tmap1.bp->h ) return;
3427 if ( Tmap1.fx_dl_dx < 0 ) {
3428 Tmap1.fx_dl_dx = -Tmap1.fx_dl_dx;
3429 Tmap1.fx_l = (67*F1_0)-Tmap1.fx_l;
3430 Tmap1.fx_l_right = (67*F1_0)-Tmap1.fx_l_right;
3432 // SDL_assert( Tmap1.fx_l > 31*F1_0 );
3433 // SDL_assert( Tmap1.fx_l < 66*F1_0 );
3434 // SDL_assert( Tmap1.fx_dl_dx >= 0 );
3435 // SDL_assert( Tmap1.fx_dl_dx < 31*F1_0 );
3448 ; setup delta values
3449 mov eax, Tmap1.fx_dv_dx // get v 16.16 step
3450 mov ebx, eax // copy it
3451 sar eax, 16 // get v int step
3452 shl ebx, 16 // get v frac step
3453 mov Tmap1.DeltaVFrac, ebx // store it
3454 imul eax, Tmap1.src_offset // calc texture step for v int step
3456 mov ebx, Tmap1.fx_du_dx // get u 16.16 step
3457 mov ecx, ebx // copy it
3458 sar ebx, 16 // get the u int step
3459 shl ecx, 16 // get the u frac step
3460 mov Tmap1.DeltaUFrac, ecx // store it
3461 add eax, ebx // calc uint + vint step
3462 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
3463 add eax, Tmap1.src_offset // calc whole step + v carry
3464 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
3466 ; setup initial coordinates
3467 mov esi, Tmap1.fx_u // get u 16.16
3468 mov ebx, esi // copy it
3469 sar esi, 16 // get integer part
3470 shl ebx, 16 // get fractional part
3472 mov ecx, Tmap1.fx_v // get v 16.16
3473 mov edx, ecx // copy it
3474 sar edx, 16 // get integer part
3475 shl ecx, 16 // get fractional part
3476 imul edx, Tmap1.src_offset // calc texture scanline address
3477 add esi, edx // calc texture offset
3478 add esi, Tmap1.pixptr // calc address
3480 ; set edi = address of first pixel to modify
3481 mov edi, Tmap1.dest_row_data
3483 mov edx, Tmap1.DeltaUFrac
3485 mov eax, Tmap1.loop_count
3487 mov Tmap1.loop_count, eax
3492 mov Tmap1.num_big_steps, eax
3493 and Tmap1.loop_count, 7
3502 mov ebp, Tmap1.fx_dl_dx
3514 // 8 pixel span code
3515 // edi = dest dib bits at current pixel
3516 // esi = texture pointer at current u,v
3518 // ebx = u fraction 0.32
3519 // ecx = v fraction 0.32
3520 // edx = u frac step
3521 // ebp = v carry scratch
3523 mov al,[edi] // preread the destination cache line
3525 mov al,[esi] // get texture pixel 0
3527 mov al, gr_fade_table[eax]
3529 add ecx,Tmap1.DeltaVFrac // increment v fraction
3530 sbb ebp,ebp // get -1 if carry
3531 add ebx,edx // increment u fraction
3533 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3534 add ecx,Tmap1.DeltaVFrac // increment v fraction
3536 sbb ebp,ebp // get -1 if carry
3537 mov [edi+0],al // store pixel 0
3539 add ebx,edx // increment u fraction
3540 mov al,[esi] // get texture pixel 1
3542 mov al, gr_fade_table[eax]
3544 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3545 add ecx,Tmap1.DeltaVFrac // increment v fraction
3547 sbb ebp,ebp // get -1 if carry
3548 mov [edi+1],al // store pixel 1
3550 add ebx,edx // increment u fraction
3551 mov al,[esi] // get texture pixel 2
3553 mov al, gr_fade_table[eax]
3555 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3556 add ecx,Tmap1.DeltaVFrac // increment v fraction
3558 sbb ebp,ebp // get -1 if carry
3559 mov [edi+2],al // store pixel 2
3561 add ebx,edx // increment u fraction
3562 mov al,[esi] // get texture pixel 3
3564 mov al, gr_fade_table[eax]
3566 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3567 add ecx,Tmap1.DeltaVFrac // increment v fraction
3569 sbb ebp,ebp // get -1 if carry
3570 mov [edi+3],al // store pixel 3
3572 add ebx,edx // increment u fraction
3573 mov al,[esi] // get texture pixel 4
3575 mov al, gr_fade_table[eax]
3577 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3578 add ecx,Tmap1.DeltaVFrac // increment v fraction
3580 sbb ebp,ebp // get -1 if carry
3581 mov [edi+4],al // store pixel 4
3583 add ebx,edx // increment u fraction
3584 mov al,[esi] // get texture pixel 5
3586 mov al, gr_fade_table[eax]
3588 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3589 add ecx,Tmap1.DeltaVFrac // increment v fraction
3591 sbb ebp,ebp // get -1 if carry
3592 mov [edi+5],al // store pixel 5
3594 add ebx,edx // increment u fraction
3595 mov al,[esi] // get texture pixel 6
3597 mov al, gr_fade_table[eax]
3599 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3600 add ecx,Tmap1.DeltaVFrac // increment v fraction
3602 sbb ebp,ebp // get -1 if carry
3603 mov [edi+6],al // store pixel 6
3605 add ebx,edx // increment u fraction
3607 mov al,[esi] // get texture pixel 7
3609 mov al, gr_fade_table[eax]
3611 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3613 mov [edi+7],al // store pixel 7
3619 dec Tmap1.num_big_steps
3625 mov eax,Tmap1.loop_count
3630 mov Tmap1.loop_count, eax
3640 mov ebp, Tmap1.fx_dl_dx
3644 mov al,[edi] // preread the destination cache line
3645 // add ebx,edx // increment u fraction
3649 mov al,[esi] // get texture pixel 0
3651 mov al, gr_fade_table[eax]
3653 add ecx,Tmap1.DeltaVFrac // increment v fraction
3654 sbb ebp,ebp // get -1 if carry
3655 add ebx,edx // increment u fraction
3656 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3657 mov [edi+0],al // store pixel 0
3659 add ecx,Tmap1.DeltaVFrac // increment v fraction
3660 sbb ebp,ebp // get -1 if carry
3661 add ebx,edx // increment u fraction
3662 mov al,[esi] // get texture pixel 1
3664 mov al, gr_fade_table[eax]
3666 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3667 mov [edi+1],al // store pixel 1
3670 dec Tmap1.loop_count
3678 mov al,[esi] // get texture pixel 2
3680 mov al, gr_fade_table[eax]
3681 mov [edi],al // store pixel 2
3695 void asm_tmap_scanline_lln32()
3699 end = f2i(Tmap1.fx_u);
3700 if ( end >= Tmap1.bp->w ) return;
3702 end = f2i(Tmap1.fx_v);
3703 if ( end >= Tmap1.bp->h ) return;
3705 end = f2i(Tmap1.fx_u_right);
3706 if ( end >= Tmap1.bp->w ) return;
3708 end = f2i(Tmap1.fx_v_right);
3709 if ( end >= Tmap1.bp->h ) return;
3720 ; setup delta values
3721 mov eax, Tmap1.fx_dv_dx // get v 16.16 step
3722 mov ebx, eax // copy it
3723 sar eax, 16 // get v int step
3724 shl ebx, 16 // get v frac step
3725 mov Tmap1.DeltaVFrac, ebx // store it
3726 imul eax, Tmap1.src_offset // calc texture step for v int step
3728 mov ebx, Tmap1.fx_du_dx // get u 16.16 step
3729 mov ecx, ebx // copy it
3730 sar ebx, 16 // get the u int step
3731 shl ecx, 16 // get the u frac step
3732 mov Tmap1.DeltaUFrac, ecx // store it
3733 add eax, ebx // calc uint + vint step
3734 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
3735 add eax, Tmap1.src_offset // calc whole step + v carry
3736 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
3738 ; setup initial coordinates
3739 mov esi, Tmap1.fx_u // get u 16.16
3740 mov ebx, esi // copy it
3741 sar esi, 16 // get integer part
3742 shl ebx, 16 // get fractional part
3744 mov ecx, Tmap1.fx_v // get v 16.16
3745 mov edx, ecx // copy it
3746 sar edx, 16 // get integer part
3747 shl ecx, 16 // get fractional part
3748 imul edx, Tmap1.src_offset // calc texture scanline address
3749 add esi, edx // calc texture offset
3750 add esi, Tmap1.pixptr // calc address
3752 ; set edi = address of first pixel to modify
3753 mov edi, Tmap1.dest_row_data32
3755 mov edx, Tmap1.DeltaUFrac
3757 mov eax, Tmap1.fx_l // use bx and dx to do lighting
3759 mov eax, Tmap1.fx_dl_dx // use bx and dx to do lighting
3762 mov eax, Tmap1.loop_count
3764 mov Tmap1.loop_count, eax
3769 mov Tmap1.num_big_steps, eax
3770 and Tmap1.loop_count, 7
3775 // 8 pixel span code
3776 // edi = dest dib bits at current pixel
3777 // esi = texture pointer at current u,v
3779 // ebx = u fraction 0.32
3780 // ecx = v fraction 0.32
3781 // edx = u frac step
3782 // ebp = v carry scratch
3784 mov al,[edi] // preread the destination cache line
3786 mov al,[esi] // get texture pixel 0
3788 mov eax, gr_fade_table32[eax*4]
3790 add ecx,Tmap1.DeltaVFrac // increment v fraction
3791 sbb ebp,ebp // get -1 if carry
3792 add ebx,edx // increment u fraction
3794 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3795 add ecx,Tmap1.DeltaVFrac // increment v fraction
3797 sbb ebp,ebp // get -1 if carry
3798 mov [edi+0],eax // store pixel 0
3800 add ebx,edx // increment u fraction
3801 mov al,[esi] // get texture pixel 1
3803 mov eax, gr_fade_table32[eax*4]
3805 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3806 add ecx,Tmap1.DeltaVFrac // increment v fraction
3808 sbb ebp,ebp // get -1 if carry
3809 mov [edi+4],al // store pixel 1
3811 add ebx,edx // increment u fraction
3812 mov al,[esi] // get texture pixel 2
3814 mov eax, gr_fade_table32[eax*4]
3816 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3817 add ecx,Tmap1.DeltaVFrac // increment v fraction
3819 sbb ebp,ebp // get -1 if carry
3820 mov [edi+8],eax // store pixel 2
3822 add ebx,edx // increment u fraction
3823 mov al,[esi] // get texture pixel 3
3825 mov eax, gr_fade_table32[eax*4]
3827 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3828 add ecx,Tmap1.DeltaVFrac // increment v fraction
3830 sbb ebp,ebp // get -1 if carry
3831 mov [edi+12],eax // store pixel 3
3833 add ebx,edx // increment u fraction
3834 mov al,[esi] // get texture pixel 4
3836 mov eax, gr_fade_table32[eax*4]
3838 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3839 add ecx,Tmap1.DeltaVFrac // increment v fraction
3841 sbb ebp,ebp // get -1 if carry
3842 mov [edi+16],eax // store pixel 4
3844 add ebx,edx // increment u fraction
3845 mov al,[esi] // get texture pixel 5
3847 mov eax, gr_fade_table32[eax*4]
3849 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3850 add ecx,Tmap1.DeltaVFrac // increment v fraction
3852 sbb ebp,ebp // get -1 if carry
3853 mov [edi+20],eax // store pixel 5
3855 add ebx,edx // increment u fraction
3856 mov al,[esi] // get texture pixel 6
3858 mov eax, gr_fade_table32[eax*4]
3860 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3861 add ecx,Tmap1.DeltaVFrac // increment v fraction
3863 sbb ebp,ebp // get -1 if carry
3864 mov [edi+24],eax // store pixel 6
3866 add ebx,edx // increment u fraction
3868 mov al,[esi] // get texture pixel 7
3870 mov eax, gr_fade_table32[eax]
3872 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3874 mov [edi+28],eax // store pixel 7
3880 dec Tmap1.num_big_steps
3886 mov eax,Tmap1.loop_count
3891 mov Tmap1.loop_count, eax
3897 mov al,[edi] // preread the destination cache line
3899 mov al,[esi] // get texture pixel 0
3901 mov eax, gr_fade_table32[eax*4]
3903 add ecx,Tmap1.DeltaVFrac // increment v fraction
3904 sbb ebp,ebp // get -1 if carry
3905 add ebx,edx // increment u fraction
3906 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3907 mov [edi+0],eax // store pixel 0
3909 add ecx,Tmap1.DeltaVFrac // increment v fraction
3910 sbb ebp,ebp // get -1 if carry
3911 add ebx,edx // increment u fraction
3912 mov al,[esi] // get texture pixel 1
3914 mov eax, gr_fade_table32[eax*4]
3916 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3917 mov [edi+1],al // store pixel 1
3920 dec Tmap1.loop_count
3928 mov al,[esi] // get texture pixel 2
3930 mov eax, gr_fade_table32[eax*4]
3931 mov [edi],eax // store pixel 2
3944 void asm_tmap_scanline_lnt()
3948 end = f2i(Tmap1.fx_u);
3949 if ( end >= Tmap1.bp->w ) return;
3951 end = f2i(Tmap1.fx_v);
3952 if ( end >= Tmap1.bp->h ) return;
3954 end = f2i(Tmap1.fx_u_right);
3955 if ( end >= Tmap1.bp->w ) return;
3957 end = f2i(Tmap1.fx_v_right);
3958 if ( end >= Tmap1.bp->h ) return;
3970 ; setup delta values
3971 mov eax, Tmap1.fx_dv_dx // get v 16.16 step
3972 mov ebx, eax // copy it
3973 sar eax, 16 // get v int step
3974 shl ebx, 16 // get v frac step
3975 mov Tmap1.DeltaVFrac, ebx // store it
3976 imul eax, Tmap1.src_offset // calc texture step for v int step
3978 mov ebx, Tmap1.fx_du_dx // get u 16.16 step
3979 mov ecx, ebx // copy it
3980 sar ebx, 16 // get the u int step
3981 shl ecx, 16 // get the u frac step
3982 mov Tmap1.DeltaUFrac, ecx // store it
3983 add eax, ebx // calc uint + vint step
3984 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
3985 add eax, Tmap1.src_offset // calc whole step + v carry
3986 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
3988 ; setup initial coordinates
3989 mov esi, Tmap1.fx_u // get u 16.16
3990 mov ebx, esi // copy it
3991 sar esi, 16 // get integer part
3992 shl ebx, 16 // get fractional part
3994 mov ecx, Tmap1.fx_v // get v 16.16
3995 mov edx, ecx // copy it
3996 sar edx, 16 // get integer part
3997 shl ecx, 16 // get fractional part
3998 imul edx, Tmap1.src_offset // calc texture scanline address
3999 add esi, edx // calc texture offset
4000 add esi, Tmap1.pixptr // calc address
4002 ; set edi = address of first pixel to modify
4003 mov edi, Tmap1.dest_row_data
4005 mov edx, Tmap1.DeltaUFrac
4007 mov eax, Tmap1.loop_count
4009 mov Tmap1.loop_count, eax
4014 mov Tmap1.num_big_steps, eax
4015 and Tmap1.loop_count, 7
4020 // 8 pixel span code
4021 // edi = dest dib bits at current pixel
4022 // esi = texture pointer at current u,v
4024 // ebx = u fraction 0.32
4025 // ecx = v fraction 0.32
4026 // edx = u frac step
4027 // ebp = v carry scratch
4029 mov al,[edi] // preread the destination cache line
4031 mov al,[esi] // get texture pixel 0
4033 add ecx,Tmap1.DeltaVFrac // increment v fraction
4034 sbb ebp,ebp // get -1 if carry
4035 add ebx,edx // increment u fraction
4037 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4038 add ecx,Tmap1.DeltaVFrac // increment v fraction
4040 sbb ebp,ebp // get -1 if carry
4043 mov [edi+0],al // store pixel 0
4046 add ebx,edx // increment u fraction
4047 mov al,[esi] // get texture pixel 1
4049 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4050 add ecx,Tmap1.DeltaVFrac // increment v fraction
4052 sbb ebp,ebp // get -1 if carry
4055 mov [edi+1],al // store pixel 0
4058 add ebx,edx // increment u fraction
4059 mov al,[esi] // get texture pixel 2
4061 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4062 add ecx,Tmap1.DeltaVFrac // increment v fraction
4064 sbb ebp,ebp // get -1 if carry
4067 mov [edi+2],al // store pixel 0
4070 add ebx,edx // increment u fraction
4071 mov al,[esi] // get texture pixel 3
4073 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4074 add ecx,Tmap1.DeltaVFrac // increment v fraction
4076 sbb ebp,ebp // get -1 if carry
4079 mov [edi+3],al // store pixel 0
4082 add ebx,edx // increment u fraction
4083 mov al,[esi] // get texture pixel 4
4085 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4086 add ecx,Tmap1.DeltaVFrac // increment v fraction
4088 sbb ebp,ebp // get -1 if carry
4091 mov [edi+4],al // store pixel 0
4094 add ebx,edx // increment u fraction
4095 mov al,[esi] // get texture pixel 5
4097 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4098 add ecx,Tmap1.DeltaVFrac // increment v fraction
4100 sbb ebp,ebp // get -1 if carry
4103 mov [edi+5],al // store pixel 0
4106 add ebx,edx // increment u fraction
4107 mov al,[esi] // get texture pixel 6
4109 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4110 add ecx,Tmap1.DeltaVFrac // increment v fraction
4112 sbb ebp,ebp // get -1 if carry
4115 mov [edi+6],al // store pixel 0
4118 add ebx,edx // increment u fraction
4120 mov al,[esi] // get texture pixel 7
4122 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4126 mov [edi+7],al // store pixel 0
4133 dec Tmap1.num_big_steps
4139 mov eax,Tmap1.loop_count
4144 mov Tmap1.loop_count, eax
4149 mov al,[edi] // preread the destination cache line
4150 // add ebx,edx // increment u fraction
4154 mov al,[esi] // get texture pixel 0
4156 add ecx,Tmap1.DeltaVFrac // increment v fraction
4157 sbb ebp,ebp // get -1 if carry
4158 add ebx,edx // increment u fraction
4159 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4162 mov [edi+0],al // store pixel 0
4165 add ecx,Tmap1.DeltaVFrac // increment v fraction
4166 sbb ebp,ebp // get -1 if carry
4167 add ebx,edx // increment u fraction
4168 mov al,[esi] // get texture pixel 1
4170 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4173 mov [edi+1],al // store pixel 0
4177 dec Tmap1.loop_count
4185 mov al,[esi] // get texture pixel 2
4188 mov [edi],al // store pixel 0
4203 void asm_tmap_scanline_lnn()
4207 end = f2i(Tmap1.fx_u);
4208 if ( end >= Tmap1.bp->w ) return;
4210 end = f2i(Tmap1.fx_v);
4211 if ( end >= Tmap1.bp->h ) return;
4213 end = f2i(Tmap1.fx_u_right);
4214 if ( end >= Tmap1.bp->w ) return;
4216 end = f2i(Tmap1.fx_v_right);
4217 if ( end >= Tmap1.bp->h ) return;
4229 ; setup delta values
4230 mov eax, Tmap1.fx_dv_dx // get v 16.16 step
4231 mov ebx, eax // copy it
4232 sar eax, 16 // get v int step
4233 shl ebx, 16 // get v frac step
4234 mov Tmap1.DeltaVFrac, ebx // store it
4235 imul eax, Tmap1.src_offset // calc texture step for v int step
4237 mov ebx, Tmap1.fx_du_dx // get u 16.16 step
4238 mov ecx, ebx // copy it
4239 sar ebx, 16 // get the u int step
4240 shl ecx, 16 // get the u frac step
4241 mov Tmap1.DeltaUFrac, ecx // store it
4242 add eax, ebx // calc uint + vint step
4243 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
4244 add eax, Tmap1.src_offset // calc whole step + v carry
4245 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
4247 ; setup initial coordinates
4248 mov esi, Tmap1.fx_u // get u 16.16
4249 mov ebx, esi // copy it
4250 sar esi, 16 // get integer part
4251 shl ebx, 16 // get fractional part
4253 mov ecx, Tmap1.fx_v // get v 16.16
4254 mov edx, ecx // copy it
4255 sar edx, 16 // get integer part
4256 shl ecx, 16 // get fractional part
4257 imul edx, Tmap1.src_offset // calc texture scanline address
4258 add esi, edx // calc texture offset
4259 add esi, Tmap1.pixptr // calc address
4261 ; set edi = address of first pixel to modify
4262 mov edi, Tmap1.dest_row_data
4264 mov edx, Tmap1.DeltaUFrac
4266 mov eax, Tmap1.loop_count
4268 mov Tmap1.loop_count, eax
4273 mov Tmap1.num_big_steps, eax
4274 and Tmap1.loop_count, 7
4279 // 8 pixel span code
4280 // edi = dest dib bits at current pixel
4281 // esi = texture pointer at current u,v
4283 // ebx = u fraction 0.32
4284 // ecx = v fraction 0.32
4285 // edx = u frac step
4286 // ebp = v carry scratch
4288 mov al,[edi] // preread the destination cache line
4290 mov al,[esi] // get texture pixel 0
4292 add ecx,Tmap1.DeltaVFrac // increment v fraction
4293 sbb ebp,ebp // get -1 if carry
4294 add ebx,edx // increment u fraction
4296 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4297 add ecx,Tmap1.DeltaVFrac // increment v fraction
4299 sbb ebp,ebp // get -1 if carry
4300 mov [edi+0],al // store pixel 0
4302 add ebx,edx // increment u fraction
4303 mov al,[esi] // get texture pixel 1
4305 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4306 add ecx,Tmap1.DeltaVFrac // increment v fraction
4308 sbb ebp,ebp // get -1 if carry
4309 mov [edi+1],al // store pixel 0
4311 add ebx,edx // increment u fraction
4312 mov al,[esi] // get texture pixel 2
4314 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4315 add ecx,Tmap1.DeltaVFrac // increment v fraction
4317 sbb ebp,ebp // get -1 if carry
4318 mov [edi+2],al // store pixel 0
4320 add ebx,edx // increment u fraction
4321 mov al,[esi] // get texture pixel 3
4323 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4324 add ecx,Tmap1.DeltaVFrac // increment v fraction
4326 sbb ebp,ebp // get -1 if carry
4327 mov [edi+3],al // store pixel 0
4329 add ebx,edx // increment u fraction
4330 mov al,[esi] // get texture pixel 4
4332 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4333 add ecx,Tmap1.DeltaVFrac // increment v fraction
4335 sbb ebp,ebp // get -1 if carry
4336 mov [edi+4],al // store pixel 0
4338 add ebx,edx // increment u fraction
4339 mov al,[esi] // get texture pixel 5
4341 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4342 add ecx,Tmap1.DeltaVFrac // increment v fraction
4344 sbb ebp,ebp // get -1 if carry
4345 mov [edi+5],al // store pixel 0
4347 add ebx,edx // increment u fraction
4348 mov al,[esi] // get texture pixel 6
4350 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4351 add ecx,Tmap1.DeltaVFrac // increment v fraction
4353 sbb ebp,ebp // get -1 if carry
4354 mov [edi+6],al // store pixel 0
4356 add ebx,edx // increment u fraction
4358 mov al,[esi] // get texture pixel 7
4360 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4362 mov [edi+7],al // store pixel 0
4368 dec Tmap1.num_big_steps
4374 mov eax,Tmap1.loop_count
4379 mov Tmap1.loop_count, eax
4384 mov al,[edi] // preread the destination cache line
4385 // add ebx,edx // increment u fraction
4389 mov al,[esi] // get texture pixel 0
4391 add ecx,Tmap1.DeltaVFrac // increment v fraction
4392 sbb ebp,ebp // get -1 if carry
4393 add ebx,edx // increment u fraction
4394 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4395 mov [edi+0],al // store pixel 0
4397 add ecx,Tmap1.DeltaVFrac // increment v fraction
4398 sbb ebp,ebp // get -1 if carry
4399 add ebx,edx // increment u fraction
4400 mov al,[esi] // get texture pixel 1
4402 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4403 mov [edi+1],al // store pixel 0
4406 dec Tmap1.loop_count
4414 mov al,[esi] // get texture pixel 2
4415 mov [edi],al // store pixel 0
4428 void tmapscan_pln16( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
4430 Tmap1.dest_row_data = (ubyte *)GR_SCREEN_PTR(ushort,lx,y);
4431 Tmap1.loop_count = rx - lx;
4432 Tmap1.fx_u = fl2f(p->u);
4433 Tmap1.fx_v = fl2f(p->v);
4434 Tmap1.fx_du_dx = fl2f(dp->u);
4435 Tmap1.fx_dv_dx = fl2f(dp->v);
4437 Tmap1.fx_l = fl2f(p->l*32.0);
4438 Tmap1.fx_dl_dx = fl2f(dp->l*32.0);
4440 Tmap1.fx_u_right = fl2f(rp->u);
4441 Tmap1.fx_v_right = fl2f(rp->v);
4442 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
4443 Tmap1.bp = tmap_bitmap;
4444 Tmap1.src_offset = tmap_bitmap->w;
4447 Tmap1.FixedScale = 65536.0f;
4448 Tmap1.FixedScale8 = 2048.0f; //8192.0f; // 2^16 / 8
4452 Tmap1.UOverZ = p->u;
4453 Tmap1.VOverZ = p->v;
4454 Tmap1.OneOverZ = p->sw;
4456 Tmap1.dUOverZdX8 = dp->u*32.0f;
4457 Tmap1.dVOverZdX8 = dp->v*32.0f;
4458 Tmap1.dOneOverZdX8 = dp->sw*32.0f;
4460 Tmap1.dUOverZdX = dp->u;
4461 Tmap1.dVOverZdX = dp->v;
4462 Tmap1.dOneOverZdX = dp->sw;
4464 Tmap1.RightUOverZ = rp->u;
4465 Tmap1.RightVOverZ = rp->v;
4466 Tmap1.RightOneOverZ = rp->sw;
4470 Tmap1.BitmapWidth = Tmap1.bp->w;
4471 Tmap1.BitmapHeight = Tmap1.bp->h;
4474 if ( Tmap1.fx_dl_dx < 0 ) {
4475 Tmap1.fx_dl_dx = -Tmap1.fx_dl_dx;
4476 Tmap1.fx_l = (67*F1_0)-Tmap1.fx_l;
4477 Tmap1.fx_l_right = (67*F1_0)-Tmap1.fx_l_right;
4479 // SDL_assert( Tmap1.fx_l > 31*F1_0 );
4480 // SDL_assert( Tmap1.fx_l < 66*F1_0 );
4481 // SDL_assert( Tmap1.fx_dl_dx >= 0 );
4482 // SDL_assert( Tmap1.fx_dl_dx < 31*F1_0 );
4500 // put the FPU in 32 bit mode
4501 // @todo move this out of here!
4503 fstcw Tmap1.OldFPUCW // store copy of CW
4504 mov ax,Tmap1.OldFPUCW // get it in ax
4505 //hh and eax,NOT 1100000000y // 24 bit precision
4507 mov Tmap1.FPUCW,ax // store it
4508 fldcw Tmap1.FPUCW // load the FPU
4510 mov ecx, Tmap1.loop_count // ecx = width
4512 mov edi, Tmap1.dest_row_data // edi = dest pointer
4514 // edi = pointer to start pixel in dest dib
4517 mov eax,ecx // eax and ecx = width
4518 shr ecx,5 // ecx = width / subdivision length
4519 and eax,31 // eax = width mod subdivision length
4520 jnz some_left_over // any leftover?
4522 dec ecx // no, so special case last span
4523 mov eax,32 // it's 8 pixels long
4525 mov Tmap1.Subdivisions,ecx // store widths
4526 mov Tmap1.WidthModLength,eax
4528 // mov ebx,pLeft ; get left edge pointer
4529 // mov edx,pGradients ; get gradients pointer
4531 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
4532 // st0 st1 st2 st3 st4 st5 st6 st7
4533 fld Tmap1.VOverZ // V/ZL
4534 fld Tmap1.UOverZ // U/ZL V/ZL
4535 fld Tmap1.OneOverZ // 1/ZL U/ZL V/ZL
4536 fld1 // 1 1/ZL U/ZL V/ZL
4537 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
4538 fld st // ZL ZL 1/ZL U/ZL V/ZL
4539 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
4540 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
4541 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
4543 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
4544 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
4546 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
4548 fadd Tmap1.dOneOverZdX8 // 1/ZR U/ZL V/ZL UL VL
4549 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
4550 fadd Tmap1.dUOverZdX8 // U/ZR 1/ZR V/ZL UL VL
4551 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
4552 fadd Tmap1.dVOverZdX8 // V/ZR 1/ZR U/ZR UL VL
4554 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
4556 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
4557 // @todo overlap this guy
4558 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
4559 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
4560 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
4561 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
4562 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
4564 cmp ecx,0 // check for any full spans
4565 jle HandleLeftoverPixels
4569 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
4570 // UR VR V/ZR 1/ZR U/ZR UL VL
4572 // convert left side coords
4574 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
4575 fmul Tmap1.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
4576 fistp Tmap1.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
4578 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
4579 fmul Tmap1.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
4580 fistp Tmap1.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
4582 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
4584 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
4585 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
4586 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
4587 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
4589 fmul Tmap1.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
4590 fistp Tmap1.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
4592 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
4593 fmul Tmap1.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
4594 fistp Tmap1.DeltaU ; V/ZR 1/ZR U/ZR UR VR
4596 // increment terms for next span ; st0 st1 st2 st3 st4 st5 st6 st7
4597 // Right terms become Left terms---->; V/ZL 1/ZL U/ZL UL VL
4599 fadd Tmap1.dVOverZdX8 ; V/ZR 1/ZL U/ZL UL VL
4600 fxch st(1) ; 1/ZL V/ZR U/ZL UL VL
4601 fadd Tmap1.dOneOverZdX8 ; 1/ZR V/ZR U/ZL UL VL
4602 fxch st(2) ; U/ZL V/ZR 1/ZR UL VL
4603 fadd Tmap1.dUOverZdX8 ; U/ZR V/ZR 1/ZR UL VL
4604 fxch st(2) ; 1/ZR V/ZR U/ZR UL VL
4605 fxch st(1) ; V/ZR 1/ZR U/ZR UL VL
4607 ; calculate right side coords ; st0 st1 st2 st3 st4 st5 st6 st7
4609 fld1 ; 1 V/ZR 1/ZR U/ZR UL VL
4610 fdiv st,st(2) ; ZR V/ZR 1/ZR U/ZR UL VL
4613 ; set up affine registers
4615 ; setup delta values
4617 mov eax,Tmap1.DeltaV ; get v 16.16 step
4618 mov ebx,eax ; copy it
4619 sar eax,16 ; get v int step
4620 shl ebx,16 ; get v frac step
4621 mov Tmap1.DeltaVFrac,ebx ; store it
4622 imul eax,Tmap1.src_offset ; calculate texture step for v int step
4624 mov ebx,Tmap1.DeltaU ; get u 16.16 step
4625 mov ecx,ebx ; copy it
4626 sar ebx,16 ; get u int step
4627 shl ecx,16 ; get u frac step
4628 mov Tmap1.DeltaUFrac,ecx ; store it
4629 add eax,ebx ; calculate uint + vint step
4630 mov Tmap1.UVintVfracStepVNoCarry,eax; save whole step in non-v-carry slot
4631 add eax,Tmap1.src_offset ; calculate whole step + v carry
4632 mov Tmap1.UVintVfracStepVCarry,eax ; save in v-carry slot
4636 ; check coordinate ranges
4637 mov eax, Tmap1.UFixed
4638 cmp eax, Tmap1.MinUFixed
4640 mov eax, Tmap1.MinUFixed
4641 mov Tmap1.UFixed, eax
4644 cmp eax, Tmap1.MaxUFixed
4646 mov eax, Tmap1.MaxUFixed
4647 mov Tmap1.UFixed, eax
4649 mov eax, Tmap1.VFixed
4650 cmp eax, Tmap1.MinVFixed
4652 mov eax, Tmap1.MinVFixed
4653 mov Tmap1.VFixed, eax
4656 cmp eax, Tmap1.MaxVFixed
4658 mov eax, Tmap1.MaxVFixed
4659 mov Tmap1.VFixed, eax
4663 ; setup initial coordinates
4664 mov esi,Tmap1.UFixed ; get u 16.16 fixedpoint coordinate
4666 mov ebx,esi ; copy it
4667 sar esi,16 ; get integer part
4668 shl ebx,16 ; get fractional part
4670 mov ecx,Tmap1.VFixed ; get v 16.16 fixedpoint coordinate
4672 mov edx,ecx ; copy it
4673 sar edx,16 ; get integer part
4674 shl ecx,16 ; get fractional part
4675 imul edx,Tmap1.src_offset ; calc texture scanline address
4676 add esi,edx ; calc texture offset
4677 add esi,Tmap1.pixptr ; calc address
4679 mov edx,Tmap1.DeltaUFrac ; get register copy
4685 mov ebp, Tmap1.fx_dl_dx
4696 // add Tmap1.fx_l, eax
4699 // mov eax, Tmap1.fx_l // use bx and dx to do lighting
4702 // mov eax, Tmap1.fx_dl_dx // use bx and dx to do lighting
4708 ; ************** Can't Access Stack Frame ******************
4709 ; ************** Can't Access Stack Frame ******************
4710 ; ************** Can't Access Stack Frame ******************
4712 // 8 pixel span code
4713 // edi = dest dib bits at current pixel
4714 // esi = texture pointer at current u,v
4716 // ebx = u fraction 0.32
4717 // ecx = v fraction 0.32
4718 // edx = u frac step
4719 // ebp = v carry scratch
4721 mov al,[edi] // preread the destination cache line
4724 mov al,[esi] // get texture pixel 0
4726 mov ax, gr_fade_table16[eax*2]
4728 add ecx,Tmap1.DeltaVFrac // increment v fraction
4729 sbb ebp,ebp // get -1 if carry
4730 add ebx,edx // increment u fraction
4732 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4733 add ecx,Tmap1.DeltaVFrac // increment v fraction
4735 sbb ebp,ebp // get -1 if carry
4736 // mov al, 0 // Uncomment this line to show divisions
4737 mov [edi+0],ax // store pixel 0
4739 add ebx,edx // increment u fraction
4740 mov al,[esi] // get texture pixel 1
4742 mov ax, gr_fade_table16[eax*2]
4744 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4745 add ecx,Tmap1.DeltaVFrac // increment v fraction
4747 sbb ebp,ebp // get -1 if carry
4748 mov [edi+2],ax // store pixel 1
4750 add ebx,edx // increment u fraction
4751 mov al,[esi] // get texture pixel 2
4753 mov ax, gr_fade_table16[eax*2]
4755 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4756 add ecx,Tmap1.DeltaVFrac // increment v fraction
4758 sbb ebp,ebp // get -1 if carry
4759 mov [edi+4],ax // store pixel 2
4761 add ebx,edx // increment u fraction
4762 mov al,[esi] // get texture pixel 3
4764 mov ax, gr_fade_table16[eax*2]
4766 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4767 add ecx,Tmap1.DeltaVFrac // increment v fraction
4769 sbb ebp,ebp // get -1 if carry
4770 mov [edi+6],ax // store pixel 3
4772 add ebx,edx // increment u fraction
4773 mov al,[esi] // get texture pixel 4
4775 mov ax, gr_fade_table16[eax*2]
4776 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4777 add ecx,Tmap1.DeltaVFrac // increment v fraction
4779 sbb ebp,ebp // get -1 if carry
4780 mov [edi+8],ax // store pixel 3
4782 add ebx,edx // increment u fraction
4783 mov al,[esi] // get texture pixel 4
4785 mov ax, gr_fade_table16[eax*2]
4786 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4787 add ecx,Tmap1.DeltaVFrac // increment v fraction
4789 sbb ebp,ebp // get -1 if carry
4790 mov [edi+10],ax // store pixel 3
4792 add ebx,edx // increment u fraction
4793 mov al,[esi] // get texture pixel 4
4795 mov ax, gr_fade_table16[eax*2]
4796 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4797 add ecx,Tmap1.DeltaVFrac // increment v fraction
4799 sbb ebp,ebp // get -1 if carry
4800 mov [edi+12],ax // store pixel 3
4802 add ebx,edx // increment u fraction
4803 mov al,[esi] // get texture pixel 4
4805 mov ax, gr_fade_table16[eax*2]
4806 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4807 add ecx,Tmap1.DeltaVFrac // increment v fraction
4809 sbb ebp,ebp // get -1 if carry
4810 mov [edi+14],ax // store pixel 3
4812 add ebx,edx // increment u fraction
4813 mov al,[esi] // get texture pixel 4
4815 mov ax, gr_fade_table16[eax*2]
4816 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4817 add ecx,Tmap1.DeltaVFrac // increment v fraction
4819 sbb ebp,ebp // get -1 if carry
4820 mov [edi+16],ax // store pixel 3
4822 add ebx,edx // increment u fraction
4823 mov al,[esi] // get texture pixel 4
4825 mov ax, gr_fade_table16[eax*2]
4826 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4827 add ecx,Tmap1.DeltaVFrac // increment v fraction
4829 sbb ebp,ebp // get -1 if carry
4830 mov [edi+18],ax // store pixel 3
4832 add ebx,edx // increment u fraction
4833 mov al,[esi] // get texture pixel 4
4835 mov ax, gr_fade_table16[eax*2]
4836 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4837 add ecx,Tmap1.DeltaVFrac // increment v fraction
4839 sbb ebp,ebp // get -1 if carry
4840 mov [edi+20],ax // store pixel 3
4842 add ebx,edx // increment u fraction
4843 mov al,[esi] // get texture pixel 4
4845 mov ax, gr_fade_table16[eax*2]
4848 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4849 add ecx,Tmap1.DeltaVFrac // increment v fraction
4851 sbb ebp,ebp // get -1 if carry
4852 mov [edi+22],ax // store pixel 3
4854 add ebx,edx // increment u fraction
4855 mov al,[esi] // get texture pixel 4
4857 mov ax, gr_fade_table16[eax*2]
4860 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4861 add ecx,Tmap1.DeltaVFrac // increment v fraction
4863 sbb ebp,ebp // get -1 if carry
4864 mov [edi+24],ax // store pixel 3
4866 add ebx,edx // increment u fraction
4867 mov al,[esi] // get texture pixel 4
4869 mov ax, gr_fade_table16[eax*2]
4872 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4873 add ecx,Tmap1.DeltaVFrac // increment v fraction
4875 sbb ebp,ebp // get -1 if carry
4876 mov [edi+26],ax // store pixel 3
4878 add ebx,edx // increment u fraction
4879 mov al,[esi] // get texture pixel 4
4881 mov ax, gr_fade_table16[eax*2]
4884 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4885 add ecx,Tmap1.DeltaVFrac // increment v fraction
4887 sbb ebp,ebp // get -1 if carry
4888 mov [edi+28],ax // store pixel 3
4890 add ebx,edx // increment u fraction
4891 mov al,[esi] // get texture pixel 4
4893 mov ax, gr_fade_table16[eax*2]
4896 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4897 add ecx,Tmap1.DeltaVFrac // increment v fraction
4899 sbb ebp,ebp // get -1 if carry
4900 mov [edi+30],ax // store pixel 3
4902 add ebx,edx // increment u fraction
4903 mov al,[esi] // get texture pixel 4
4905 mov ax, gr_fade_table16[eax*2]
4908 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4909 add ecx,Tmap1.DeltaVFrac // increment v fraction
4911 sbb ebp,ebp // get -1 if carry
4912 mov [edi+32],ax // store pixel 3
4914 add ebx,edx // increment u fraction
4915 mov al,[esi] // get texture pixel 4
4917 mov ax, gr_fade_table16[eax*2]
4920 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4921 add ecx,Tmap1.DeltaVFrac // increment v fraction
4923 sbb ebp,ebp // get -1 if carry
4924 mov [edi+34],ax // store pixel 3
4926 add ebx,edx // increment u fraction
4927 mov al,[esi] // get texture pixel 4
4929 mov ax, gr_fade_table16[eax*2]
4932 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4933 add ecx,Tmap1.DeltaVFrac // increment v fraction
4935 sbb ebp,ebp // get -1 if carry
4936 mov [edi+36],ax // store pixel 3
4938 add ebx,edx // increment u fraction
4939 mov al,[esi] // get texture pixel 4
4941 mov ax, gr_fade_table16[eax*2]
4944 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4945 add ecx,Tmap1.DeltaVFrac // increment v fraction
4947 sbb ebp,ebp // get -1 if carry
4948 mov [edi+38],ax // store pixel 3
4950 add ebx,edx // increment u fraction
4951 mov al,[esi] // get texture pixel 4
4953 mov ax, gr_fade_table16[eax*2]
4956 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4957 add ecx,Tmap1.DeltaVFrac // increment v fraction
4959 sbb ebp,ebp // get -1 if carry
4960 mov [edi+40],ax // store pixel 3
4962 add ebx,edx // increment u fraction
4963 mov al,[esi] // get texture pixel 4
4965 mov ax, gr_fade_table16[eax*2]
4968 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4969 add ecx,Tmap1.DeltaVFrac // increment v fraction
4971 sbb ebp,ebp // get -1 if carry
4972 mov [edi+42],ax // store pixel 3
4974 add ebx,edx // increment u fraction
4975 mov al,[esi] // get texture pixel 4
4977 mov ax, gr_fade_table16[eax*2]
4980 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4981 add ecx,Tmap1.DeltaVFrac // increment v fraction
4983 sbb ebp,ebp // get -1 if carry
4984 mov [edi+44],ax // store pixel 3
4986 add ebx,edx // increment u fraction
4987 mov al,[esi] // get texture pixel 4
4989 mov ax, gr_fade_table16[eax*2]
4992 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4993 add ecx,Tmap1.DeltaVFrac // increment v fraction
4995 sbb ebp,ebp // get -1 if carry
4996 mov [edi+46],ax // store pixel 3
4998 add ebx,edx // increment u fraction
4999 mov al,[esi] // get texture pixel 4
5001 mov ax, gr_fade_table16[eax*2]
5004 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5005 add ecx,Tmap1.DeltaVFrac // increment v fraction
5007 sbb ebp,ebp // get -1 if carry
5008 mov [edi+48],ax // store pixel 3
5010 add ebx,edx // increment u fraction
5011 mov al,[esi] // get texture pixel 4
5013 mov ax, gr_fade_table16[eax*2]
5016 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5017 add ecx,Tmap1.DeltaVFrac // increment v fraction
5019 sbb ebp,ebp // get -1 if carry
5020 mov [edi+50],ax // store pixel 3
5022 add ebx,edx // increment u fraction
5023 mov al,[esi] // get texture pixel 4
5025 mov ax, gr_fade_table16[eax*2]
5028 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5029 add ecx,Tmap1.DeltaVFrac // increment v fraction
5033 sbb ebp,ebp // get -1 if carry
5034 mov [edi+52],ax // store pixel 3
5036 add ebx,edx // increment u fraction
5037 mov al,[esi] // get texture pixel 4
5039 mov ax, gr_fade_table16[eax*2]
5042 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5043 add ecx,Tmap1.DeltaVFrac // increment v fraction
5045 sbb ebp,ebp // get -1 if carry
5046 mov [edi+54],ax // store pixel 3
5048 add ebx,edx // increment u fraction
5049 mov al,[esi] // get texture pixel 4
5051 mov ax, gr_fade_table16[eax*2]
5053 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5054 add ecx,Tmap1.DeltaVFrac // increment v fraction
5056 sbb ebp,ebp // get -1 if carry
5057 mov [edi+56],ax // store pixel 4
5059 add ebx,edx // increment u fraction
5060 mov al,[esi] // get texture pixel 5
5062 mov ax, gr_fade_table16[eax*2]
5064 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5065 add ecx,Tmap1.DeltaVFrac // increment v fraction
5067 sbb ebp,ebp // get -1 if carry
5068 mov [edi+58],ax // store pixel 5
5070 add ebx,edx // increment u fraction
5071 mov al,[esi] // get texture pixel 6
5073 mov ax, gr_fade_table16[eax*2]
5075 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5076 add ecx,Tmap1.DeltaVFrac // increment v fraction
5078 sbb ebp,ebp // get -1 if carry
5079 mov [edi+60],ax // store pixel 6
5081 add ebx,edx // increment u fraction
5083 mov al,[esi] // get texture pixel 7
5085 mov ax, gr_fade_table16[eax*2]
5087 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5089 mov [edi+62],ax // store pixel 7
5093 ; ************** Okay to Access Stack Frame ****************
5094 ; ************** Okay to Access Stack Frame ****************
5095 ; ************** Okay to Access Stack Frame ****************
5098 ; the fdiv is done, finish right ; st0 st1 st2 st3 st4 st5 st6 st7
5099 ; ZR V/ZR 1/ZR U/ZR UL VL
5101 fld st ; ZR ZR V/ZR 1/ZR U/ZR UL VL
5102 fmul st,st(2) ; VR ZR V/ZR 1/ZR U/ZR UL VL
5103 fxch st(1) ; ZR VR V/ZR 1/ZR U/ZR UL VL
5104 fmul st,st(4) ; UR VR V/ZR 1/ZR U/ZR UL VL
5106 add edi,64 ; increment to next span
5107 dec Tmap1.Subdivisions ; decrement span count
5108 jnz SpanLoop ; loop back
5110 // save new lighting values
5113 // mov Tmap1.fx_l, eax
5117 // mov Tmap1.fx_dl_dx, eax
5119 HandleLeftoverPixels:
5122 mov esi,Tmap1.pixptr ; load texture pointer
5124 ; edi = dest dib bits
5125 ; esi = current texture dib bits
5126 ; at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
5127 ; inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
5129 cmp Tmap1.WidthModLength,0 ; are there remaining pixels to draw?
5130 jz FPUReturn ; nope, pop the FPU and bail
5132 ; convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
5134 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
5135 fmul Tmap1.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
5136 fistp Tmap1.UFixed ; inv. inv. inv. inv. inv. UL VL
5138 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
5139 fmul Tmap1.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
5140 fistp Tmap1.VFixed ; inv. inv. inv. inv. inv. UL VL
5142 dec Tmap1.WidthModLength ; calc how many steps to take
5143 jz OnePixelSpan ; just one, don't do deltas
5145 ; calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
5148 ; @todo rearrange things so we don't need these two instructions
5149 fstp Tmap1.FloatTemp ; inv. inv. inv. inv. UL VL
5150 fstp Tmap1.FloatTemp ; inv. inv. inv. UL VL
5152 fld Tmap1.RightVOverZ ; V/Zr inv. inv. inv. UL VL
5153 fsub Tmap1.dVOverZdX ; V/ZR inv. inv. inv. UL VL
5154 fld Tmap1.RightUOverZ ; U/Zr V/ZR inv. inv. inv. UL VL
5155 fsub Tmap1.dUOverZdX ; U/ZR V/ZR inv. inv. inv. UL VL
5156 fld Tmap1.RightOneOverZ ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
5157 fsub Tmap1.dOneOverZdX ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
5159 fdivr Tmap1.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
5161 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
5162 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
5164 ; calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
5166 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
5167 fxch st(1) ; VR UR inv. inv. inv. dU VL
5168 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
5169 fxch st(6) ; dV UR inv. inv. inv. dU VR
5171 fidiv Tmap1.WidthModLength ; dv UR inv. inv. inv. dU VR
5172 fmul Tmap1.FixedScale ; dv16 UR inv. inv. inv. dU VR
5173 fistp Tmap1.DeltaV ; UR inv. inv. inv. dU VR
5175 fxch st(4) ; dU inv. inv. inv. UR VR
5176 fidiv Tmap1.WidthModLength ; du inv. inv. inv. UR VR
5177 fmul Tmap1.FixedScale ; du16 inv. inv. inv. UR VR
5178 fistp Tmap1.DeltaU ; inv. inv. inv. UR VR
5180 ; @todo gross! these are to line up with the other loop
5181 fld st(1) ; inv. inv. inv. inv. UR VR
5182 fld st(2) ; inv. inv. inv. inv. inv. UR VR
5187 ; setup delta values
5188 mov eax, Tmap1.DeltaV // get v 16.16 step
5189 mov ebx, eax // copy it
5190 sar eax, 16 // get v int step
5191 shl ebx, 16 // get v frac step
5192 mov Tmap1.DeltaVFrac, ebx // store it
5193 imul eax, Tmap1.src_offset // calc texture step for v int step
5195 mov ebx, Tmap1.DeltaU // get u 16.16 step
5196 mov ecx, ebx // copy it
5197 sar ebx, 16 // get the u int step
5198 shl ecx, 16 // get the u frac step
5199 mov Tmap1.DeltaUFrac, ecx // store it
5200 add eax, ebx // calc uint + vint step
5201 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
5202 add eax, Tmap1.src_offset // calc whole step + v carry
5203 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
5210 ; check coordinate ranges
5211 mov eax, Tmap1.UFixed
5212 cmp eax, Tmap1.MinUFixed
5214 mov eax, Tmap1.MinUFixed
5215 mov Tmap1.UFixed, eax
5218 cmp eax, Tmap1.MaxUFixed
5220 mov eax, Tmap1.MaxUFixed
5221 mov Tmap1.UFixed, eax
5223 mov eax, Tmap1.VFixed
5224 cmp eax, Tmap1.MinVFixed
5226 mov eax, Tmap1.MinVFixed
5227 mov Tmap1.VFixed, eax
5230 cmp eax, Tmap1.MaxVFixed
5232 mov eax, Tmap1.MaxVFixed
5233 mov Tmap1.VFixed, eax
5240 ; setup initial coordinates
5241 mov esi, Tmap1.UFixed // get u 16.16
5242 mov ebx, esi // copy it
5243 sar esi, 16 // get integer part
5244 shl ebx, 16 // get fractional part
5246 mov ecx, Tmap1.VFixed // get v 16.16
5247 mov edx, ecx // copy it
5248 sar edx, 16 // get integer part
5249 shl ecx, 16 // get fractional part
5250 imul edx, Tmap1.src_offset // calc texture scanline address
5251 add esi, edx // calc texture offset
5252 add esi, Tmap1.pixptr // calc address
5254 ; set edi = address of first pixel to modify
5255 ; mov edi, Tmap1.dest_row_data
5264 mov edx, Tmap1.DeltaUFrac
5266 cmp Tmap1.WidthModLength, 1
5271 mov ebx, Tmap1.fx_l_right
5278 // slow but maybe better
5281 mov ebx, Tmap1.WidthModLength
5286 mov eax, Tmap1.fx_dl_dx
5296 inc Tmap1.WidthModLength
5297 mov eax,Tmap1.WidthModLength
5301 mov Tmap1.WidthModLength, eax
5305 mov al,[edi] // preread the destination cache line
5308 mov al,[esi] // get texture pixel 0
5310 mov ax, gr_fade_table16[eax*2]
5312 add ecx,Tmap1.DeltaVFrac // increment v fraction
5313 sbb ebp,ebp // get -1 if carry
5314 add ebx,edx // increment u fraction
5315 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5316 mov [edi+0],ax // store pixel 0
5318 add ecx,Tmap1.DeltaVFrac // increment v fraction
5319 sbb ebp,ebp // get -1 if carry
5320 add ebx,edx // increment u fraction
5321 mov al,[esi] // get texture pixel 1
5323 mov ax, gr_fade_table16[eax*2]
5325 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5326 mov [edi+2],ax // store pixel 1
5329 dec Tmap1.WidthModLength
5337 mov al,[esi] // get texture pixel 2
5339 mov ax, gr_fade_table16[eax*2]
5340 mov [edi],ax // store pixel 2
5355 OldWay: // This is 6% slower than above
5357 mov ebx,Tmap1.UFixed ; get starting coordinates
5358 mov ecx,Tmap1.VFixed ; for span
5360 ; leftover pixels loop
5361 ; edi = dest dib bits
5362 ; esi = texture dib bits
5368 mov eax,ecx ; copy v
5370 imul eax,Tmap1.src_offset ; scan offset
5371 mov edx,ebx ; copy u
5373 add eax,edx ; texture offset
5374 mov al,[esi+eax] ; get source pixel
5376 mov [edi],al ; store it
5378 add ebx,Tmap1.DeltaU ; increment u coordinate
5379 add ecx,Tmap1.DeltaV ; increment v coordinate
5381 dec Tmap1.WidthModLength ; decrement loop count
5382 jl FPUReturn ; finish up
5386 mov eax,ecx ; copy v
5388 imul eax,Tmap1.src_offset ; scan offset
5389 mov edx,ebx ; copy u
5391 add eax,edx ; texture offset
5392 mov al,[esi+eax] ; get source pixel
5393 mov [edi],al ; store it
5395 add ebx,Tmap1.DeltaU ; increment u coordinate
5396 add ecx,Tmap1.DeltaV ; increment v coordinate
5398 dec Tmap1.WidthModLength ; decrement loop count
5399 jge LeftoverLoop ; finish up
5404 ; busy FPU registers: ; st0 st1 st2 st3 st4 st5 st6 st7
5405 ; xxx xxx xxx xxx xxx xxx xxx
5416 fldcw Tmap1.OldFPUCW // restore the FPU
5433 void tmapscan_lnn16( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
5435 Tmap1.dest_row_data = (ubyte *)GR_SCREEN_PTR(ushort,lx,y);
5436 Tmap1.loop_count = rx - lx;
5437 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
5438 Tmap1.bp = tmap_bitmap;
5439 Tmap1.src_offset = tmap_bitmap->w;
5441 Tmap1.fx_u = fl2f(p->u);
5442 Tmap1.fx_v = fl2f(p->v);
5443 Tmap1.fx_du_dx = fl2f(dp->u);
5444 Tmap1.fx_dv_dx = fl2f(dp->v);
5445 Tmap1.fx_u_right = fl2f(rp->u);
5446 Tmap1.fx_v_right = fl2f(rp->v);
5450 end = f2i(Tmap1.fx_u);
5451 if ( end >= Tmap1.bp->w ) return;
5453 end = f2i(Tmap1.fx_v);
5454 if ( end >= Tmap1.bp->h ) return;
5456 end = f2i(Tmap1.fx_u_right);
5457 if ( end >= Tmap1.bp->w ) return;
5459 end = f2i(Tmap1.fx_v_right);
5460 if ( end >= Tmap1.bp->h ) return;
5472 ; setup delta values
5473 mov eax, Tmap1.fx_dv_dx // get v 16.16 step
5474 mov ebx, eax // copy it
5475 sar eax, 16 // get v int step
5476 shl ebx, 16 // get v frac step
5477 mov Tmap1.DeltaVFrac, ebx // store it
5478 imul eax, Tmap1.src_offset // calc texture step for v int step
5480 mov ebx, Tmap1.fx_du_dx // get u 16.16 step
5481 mov ecx, ebx // copy it
5482 sar ebx, 16 // get the u int step
5483 shl ecx, 16 // get the u frac step
5484 mov Tmap1.DeltaUFrac, ecx // store it
5485 add eax, ebx // calc uint + vint step
5486 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
5487 add eax, Tmap1.src_offset // calc whole step + v carry
5488 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
5490 ; setup initial coordinates
5491 mov esi, Tmap1.fx_u // get u 16.16
5492 mov ebx, esi // copy it
5493 sar esi, 16 // get integer part
5494 shl ebx, 16 // get fractional part
5496 mov ecx, Tmap1.fx_v // get v 16.16
5497 mov edx, ecx // copy it
5498 sar edx, 16 // get integer part
5499 shl ecx, 16 // get fractional part
5500 imul edx, Tmap1.src_offset // calc texture scanline address
5501 add esi, edx // calc texture offset
5502 add esi, Tmap1.pixptr // calc address
5504 ; set edi = address of first pixel to modify
5505 mov edi, Tmap1.dest_row_data
5507 mov edx, Tmap1.DeltaUFrac
5509 mov eax, Tmap1.loop_count
5511 mov Tmap1.loop_count, eax
5516 mov Tmap1.num_big_steps, eax
5517 and Tmap1.loop_count, 7
5523 // 8 pixel span code
5524 // edi = dest dib bits at current pixel
5525 // esi = texture pointer at current u,v
5527 // ebx = u fraction 0.32
5528 // ecx = v fraction 0.32
5529 // edx = u frac step
5530 // ebp = v carry scratch
5532 mov al,[edi] // preread the destination cache line
5534 movzx eax,byte ptr [esi] // get texture pixel 0
5536 add ecx,Tmap1.DeltaVFrac // increment v fraction
5537 sbb ebp,ebp // get -1 if carry
5538 add ebx,edx // increment u fraction
5540 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5541 add ecx,Tmap1.DeltaVFrac // increment v fraction
5543 sbb ebp,ebp // get -1 if carry
5544 mov ax, palman_8_16_xlat[eax*2]
5545 mov [edi+0],ax // store pixel 0
5547 add ebx,edx // increment u fraction
5548 movzx eax,byte ptr [esi] // get texture pixel 0
5550 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5551 add ecx,Tmap1.DeltaVFrac // increment v fraction
5553 sbb ebp,ebp // get -1 if carry
5554 mov ax, palman_8_16_xlat[eax*2]
5555 mov [edi+2],ax // store pixel 0
5557 add ebx,edx // increment u fraction
5558 movzx eax,byte ptr [esi] // get texture pixel 0
5560 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5561 add ecx,Tmap1.DeltaVFrac // increment v fraction
5563 sbb ebp,ebp // get -1 if carry
5564 mov ax, palman_8_16_xlat[eax*2]
5565 mov [edi+4],ax // store pixel 0
5567 add ebx,edx // increment u fraction
5568 movzx eax,byte ptr [esi] // get texture pixel 0
5570 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5571 add ecx,Tmap1.DeltaVFrac // increment v fraction
5573 sbb ebp,ebp // get -1 if carry
5574 mov ax, palman_8_16_xlat[eax*2]
5575 mov [edi+6],ax // store pixel 0
5577 add ebx,edx // increment u fraction
5578 movzx eax,byte ptr [esi] // get texture pixel 0
5580 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5581 add ecx,Tmap1.DeltaVFrac // increment v fraction
5583 sbb ebp,ebp // get -1 if carry
5584 mov ax, palman_8_16_xlat[eax*2]
5585 mov [edi+8],ax // store pixel 0
5587 add ebx,edx // increment u fraction
5588 movzx eax,byte ptr [esi] // get texture pixel 0
5590 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5591 add ecx,Tmap1.DeltaVFrac // increment v fraction
5593 sbb ebp,ebp // get -1 if carry
5594 mov ax, palman_8_16_xlat[eax*2]
5595 mov [edi+10],ax // store pixel 0
5597 add ebx,edx // increment u fraction
5598 movzx eax,byte ptr [esi] // get texture pixel 0
5600 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5601 add ecx,Tmap1.DeltaVFrac // increment v fraction
5603 sbb ebp,ebp // get -1 if carry
5604 mov ax, palman_8_16_xlat[eax*2]
5605 mov [edi+12],ax // store pixel 0
5607 add ebx,edx // increment u fraction
5609 movzx eax,byte ptr [esi] // get texture pixel 0
5611 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5613 mov ax, palman_8_16_xlat[eax*2]
5614 mov [edi+14],ax // store pixel 0
5620 dec Tmap1.num_big_steps
5626 mov eax,Tmap1.loop_count
5631 mov Tmap1.loop_count, eax
5636 mov al,[edi] // preread the destination cache line
5637 // add ebx,edx // increment u fraction
5641 movzx eax,byte ptr [esi] // get texture pixel 0
5643 add ecx,Tmap1.DeltaVFrac // increment v fraction
5644 sbb ebp,ebp // get -1 if carry
5645 add ebx,edx // increment u fraction
5646 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5647 mov ax, palman_8_16_xlat[eax*2]
5648 mov [edi+0],ax // store pixel 0
5650 add ecx,Tmap1.DeltaVFrac // increment v fraction
5651 sbb ebp,ebp // get -1 if carry
5652 add ebx,edx // increment u fraction
5653 movzx eax,byte ptr [esi] // get texture pixel 0
5655 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5656 mov ax, palman_8_16_xlat[eax*2]
5657 mov [edi+2],ax // store pixel 0
5660 dec Tmap1.loop_count
5668 movzx eax,byte ptr [esi] // get texture pixel 0
5669 mov ax, palman_8_16_xlat[eax*2]
5670 mov [edi],ax // store pixel 0
5685 void tmapscan_lnn32( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
5687 Tmap1.dest_row_data = (ubyte *)GR_SCREEN_PTR(uint,lx,y);
5688 Tmap1.loop_count = rx - lx;
5689 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
5690 Tmap1.bp = tmap_bitmap;
5691 Tmap1.src_offset = tmap_bitmap->w;
5693 Tmap1.fx_u = fl2f(p->u);
5694 Tmap1.fx_v = fl2f(p->v);
5695 Tmap1.fx_du_dx = fl2f(dp->u);
5696 Tmap1.fx_dv_dx = fl2f(dp->v);
5697 Tmap1.fx_u_right = fl2f(rp->u);
5698 Tmap1.fx_v_right = fl2f(rp->v);
5702 end = f2i(Tmap1.fx_u);
5703 if ( end >= Tmap1.bp->w ) return;
5705 end = f2i(Tmap1.fx_v);
5706 if ( end >= Tmap1.bp->h ) return;
5708 end = f2i(Tmap1.fx_u_right);
5709 if ( end >= Tmap1.bp->w ) return;
5711 end = f2i(Tmap1.fx_v_right);
5712 if ( end >= Tmap1.bp->h ) return;
5724 ; setup delta values
5725 mov eax, Tmap1.fx_dv_dx // get v 16.16 step
5726 mov ebx, eax // copy it
5727 sar eax, 16 // get v int step
5728 shl ebx, 16 // get v frac step
5729 mov Tmap1.DeltaVFrac, ebx // store it
5730 imul eax, Tmap1.src_offset // calc texture step for v int step
5732 mov ebx, Tmap1.fx_du_dx // get u 16.16 step
5733 mov ecx, ebx // copy it
5734 sar ebx, 16 // get the u int step
5735 shl ecx, 16 // get the u frac step
5736 mov Tmap1.DeltaUFrac, ecx // store it
5737 add eax, ebx // calc uint + vint step
5738 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
5739 add eax, Tmap1.src_offset // calc whole step + v carry
5740 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
5742 ; setup initial coordinates
5743 mov esi, Tmap1.fx_u // get u 16.16
5744 mov ebx, esi // copy it
5745 sar esi, 16 // get integer part
5746 shl ebx, 16 // get fractional part
5748 mov ecx, Tmap1.fx_v // get v 16.16
5749 mov edx, ecx // copy it
5750 sar edx, 16 // get integer part
5751 shl ecx, 16 // get fractional part
5752 imul edx, Tmap1.src_offset // calc texture scanline address
5753 add esi, edx // calc texture offset
5754 add esi, Tmap1.pixptr // calc address
5756 ; set edi = address of first pixel to modify
5757 mov edi, Tmap1.dest_row_data
5759 mov edx, Tmap1.DeltaUFrac
5761 mov eax, Tmap1.loop_count
5763 mov Tmap1.loop_count, eax
5768 mov Tmap1.num_big_steps, eax
5769 and Tmap1.loop_count, 7
5775 // 8 pixel span code
5776 // edi = dest dib bits at current pixel
5777 // esi = texture pointer at current u,v
5779 // ebx = u fraction 0.32
5780 // ecx = v fraction 0.32
5781 // edx = u frac step
5782 // ebp = v carry scratch
5784 mov al,[edi] // preread the destination cache line
5786 movzx eax,byte ptr [esi] // get texture pixel 0
5788 add ecx,Tmap1.DeltaVFrac // increment v fraction
5789 sbb ebp,ebp // get -1 if carry
5790 add ebx,edx // increment u fraction
5792 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5793 add ecx,Tmap1.DeltaVFrac // increment v fraction
5795 sbb ebp,ebp // get -1 if carry
5796 mov eax, palman_8_32_xlat[eax*4]
5797 mov [edi+0],eax // store pixel 0
5799 add ebx,edx // increment u fraction
5800 movzx eax,byte ptr [esi] // get texture pixel 0
5802 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5803 add ecx,Tmap1.DeltaVFrac // increment v fraction
5805 sbb ebp,ebp // get -1 if carry
5806 mov eax, palman_8_32_xlat[eax*4]
5807 mov [edi+4],eax // store pixel 0
5809 add ebx,edx // increment u fraction
5810 movzx eax,byte ptr [esi] // get texture pixel 0
5812 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5813 add ecx,Tmap1.DeltaVFrac // increment v fraction
5815 sbb ebp,ebp // get -1 if carry
5816 mov eax, palman_8_32_xlat[eax*4]
5817 mov [edi+8],eax // store pixel 0
5819 add ebx,edx // increment u fraction
5820 movzx eax,byte ptr [esi] // get texture pixel 0
5822 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5823 add ecx,Tmap1.DeltaVFrac // increment v fraction
5825 sbb ebp,ebp // get -1 if carry
5826 mov eax, palman_8_32_xlat[eax*4]
5827 mov [edi+12],eax // store pixel 0
5829 add ebx,edx // increment u fraction
5830 movzx eax,byte ptr [esi] // get texture pixel 0
5832 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5833 add ecx,Tmap1.DeltaVFrac // increment v fraction
5835 sbb ebp,ebp // get -1 if carry
5836 mov eax, palman_8_32_xlat[eax*4]
5837 mov [edi+16],eax // store pixel 0
5839 add ebx,edx // increment u fraction
5840 movzx eax,byte ptr [esi] // get texture pixel 0
5842 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5843 add ecx,Tmap1.DeltaVFrac // increment v fraction
5845 sbb ebp,ebp // get -1 if carry
5846 mov eax, palman_8_32_xlat[eax*4]
5847 mov [edi+20],eax // store pixel 0
5849 add ebx,edx // increment u fraction
5850 movzx eax,byte ptr [esi] // get texture pixel 0
5852 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5853 add ecx,Tmap1.DeltaVFrac // increment v fraction
5855 sbb ebp,ebp // get -1 if carry
5856 mov eax, palman_8_32_xlat[eax*4]
5857 mov [edi+24],eax // store pixel 0
5859 add ebx,edx // increment u fraction
5861 movzx eax,byte ptr [esi] // get texture pixel 0
5863 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5865 mov eax, palman_8_32_xlat[eax*4]
5866 mov [edi+28],eax // store pixel 0
5872 dec Tmap1.num_big_steps
5878 mov eax,Tmap1.loop_count
5883 mov Tmap1.loop_count, eax
5888 mov al,[edi] // preread the destination cache line
5889 // add ebx,edx // increment u fraction
5893 movzx eax,byte ptr [esi] // get texture pixel 0
5895 add ecx,Tmap1.DeltaVFrac // increment v fraction
5896 sbb ebp,ebp // get -1 if carry
5897 add ebx,edx // increment u fraction
5898 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5899 mov eax, palman_8_32_xlat[eax*4]
5900 mov [edi+0],eax // store pixel 0
5902 add ecx,Tmap1.DeltaVFrac // increment v fraction
5903 sbb ebp,ebp // get -1 if carry
5904 add ebx,edx // increment u fraction
5905 movzx eax,byte ptr [esi] // get texture pixel 0
5907 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5908 mov eax, palman_8_32_xlat[eax*4]
5909 mov [edi+4],eax // store pixel 0
5912 dec Tmap1.loop_count
5920 movzx eax,byte ptr [esi] // get texture pixel 0
5921 mov eax, palman_8_32_xlat[eax*4]
5922 mov [edi],eax // store pixel 0
5936 void tmapscan_pln32( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
5938 Tmap1.dest_row_data = (ubyte *)GR_SCREEN_PTR(uint,lx,y);
5939 Tmap1.loop_count = rx - lx;
5940 Tmap1.fx_u = fl2f(p->u);
5941 Tmap1.fx_v = fl2f(p->v);
5942 Tmap1.fx_du_dx = fl2f(dp->u);
5943 Tmap1.fx_dv_dx = fl2f(dp->v);
5945 Tmap1.fx_l = fl2f(p->l*32.0);
5946 Tmap1.fx_dl_dx = fl2f(dp->l*32.0);
5948 Tmap1.fx_u_right = fl2f(rp->u);
5949 Tmap1.fx_v_right = fl2f(rp->v);
5950 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
5951 Tmap1.bp = tmap_bitmap;
5952 Tmap1.src_offset = tmap_bitmap->w;
5955 Tmap1.FixedScale = 65536.0f;
5956 Tmap1.FixedScale8 = 2048.0f; //8192.0f; // 2^16 / 8
5960 Tmap1.UOverZ = p->u;
5961 Tmap1.VOverZ = p->v;
5962 Tmap1.OneOverZ = p->sw;
5964 Tmap1.dUOverZdX8 = dp->u*32.0f;
5965 Tmap1.dVOverZdX8 = dp->v*32.0f;
5966 Tmap1.dOneOverZdX8 = dp->sw*32.0f;
5968 Tmap1.dUOverZdX = dp->u;
5969 Tmap1.dVOverZdX = dp->v;
5970 Tmap1.dOneOverZdX = dp->sw;
5972 Tmap1.RightUOverZ = rp->u;
5973 Tmap1.RightVOverZ = rp->v;
5974 Tmap1.RightOneOverZ = rp->sw;
5977 Tmap1.BitmapWidth = Tmap1.bp->w;
5978 Tmap1.BitmapHeight = Tmap1.bp->h;
5981 if ( Tmap1.fx_dl_dx < 0 ) {
5982 Tmap1.fx_dl_dx = -Tmap1.fx_dl_dx;
5983 Tmap1.fx_l = (67*F1_0)-Tmap1.fx_l;
5984 Tmap1.fx_l_right = (67*F1_0)-Tmap1.fx_l_right;
5986 // SDL_assert( Tmap1.fx_l > 31*F1_0 );
5987 // SDL_assert( Tmap1.fx_l < 66*F1_0 );
5988 // SDL_assert( Tmap1.fx_dl_dx >= 0 );
5989 // SDL_assert( Tmap1.fx_dl_dx < 31*F1_0 );
6007 // put the FPU in 32 bit mode
6008 // @todo move this out of here!
6010 fstcw Tmap1.OldFPUCW // store copy of CW
6011 mov ax,Tmap1.OldFPUCW // get it in ax
6012 //hh and eax,NOT 1100000000y // 24 bit precision
6014 mov Tmap1.FPUCW,ax // store it
6015 fldcw Tmap1.FPUCW // load the FPU
6017 mov ecx, Tmap1.loop_count // ecx = width
6019 mov edi, Tmap1.dest_row_data // edi = dest pointer
6021 // edi = pointer to start pixel in dest dib
6024 mov eax,ecx // eax and ecx = width
6025 shr ecx,5 // ecx = width / subdivision length
6026 and eax,31 // eax = width mod subdivision length
6027 jnz some_left_over // any leftover?
6029 dec ecx // no, so special case last span
6030 mov eax,32 // it's 8 pixels long
6032 mov Tmap1.Subdivisions,ecx // store widths
6033 mov Tmap1.WidthModLength,eax
6035 // mov ebx,pLeft ; get left edge pointer
6036 // mov edx,pGradients ; get gradients pointer
6038 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
6039 // st0 st1 st2 st3 st4 st5 st6 st7
6040 fld Tmap1.VOverZ // V/ZL
6041 fld Tmap1.UOverZ // U/ZL V/ZL
6042 fld Tmap1.OneOverZ // 1/ZL U/ZL V/ZL
6043 fld1 // 1 1/ZL U/ZL V/ZL
6044 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
6045 fld st // ZL ZL 1/ZL U/ZL V/ZL
6046 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
6047 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
6048 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
6050 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
6051 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
6053 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
6055 fadd Tmap1.dOneOverZdX8 // 1/ZR U/ZL V/ZL UL VL
6056 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
6057 fadd Tmap1.dUOverZdX8 // U/ZR 1/ZR V/ZL UL VL
6058 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
6059 fadd Tmap1.dVOverZdX8 // V/ZR 1/ZR U/ZR UL VL
6061 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
6063 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
6064 // @todo overlap this guy
6065 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
6066 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
6067 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
6068 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
6069 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
6071 cmp ecx,0 // check for any full spans
6072 jle HandleLeftoverPixels
6076 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
6077 // UR VR V/ZR 1/ZR U/ZR UL VL
6079 // convert left side coords
6081 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
6082 fmul Tmap1.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
6083 fistp Tmap1.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
6085 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
6086 fmul Tmap1.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
6087 fistp Tmap1.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
6089 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
6091 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
6092 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
6093 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
6094 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
6096 fmul Tmap1.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
6097 fistp Tmap1.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
6099 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
6100 fmul Tmap1.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
6101 fistp Tmap1.DeltaU ; V/ZR 1/ZR U/ZR UR VR
6103 // increment terms for next span ; st0 st1 st2 st3 st4 st5 st6 st7
6104 // Right terms become Left terms---->; V/ZL 1/ZL U/ZL UL VL
6106 fadd Tmap1.dVOverZdX8 ; V/ZR 1/ZL U/ZL UL VL
6107 fxch st(1) ; 1/ZL V/ZR U/ZL UL VL
6108 fadd Tmap1.dOneOverZdX8 ; 1/ZR V/ZR U/ZL UL VL
6109 fxch st(2) ; U/ZL V/ZR 1/ZR UL VL
6110 fadd Tmap1.dUOverZdX8 ; U/ZR V/ZR 1/ZR UL VL
6111 fxch st(2) ; 1/ZR V/ZR U/ZR UL VL
6112 fxch st(1) ; V/ZR 1/ZR U/ZR UL VL
6114 ; calculate right side coords ; st0 st1 st2 st3 st4 st5 st6 st7
6116 fld1 ; 1 V/ZR 1/ZR U/ZR UL VL
6117 fdiv st,st(2) ; ZR V/ZR 1/ZR U/ZR UL VL
6120 ; set up affine registers
6122 ; setup delta values
6124 mov eax,Tmap1.DeltaV ; get v 16.16 step
6125 mov ebx,eax ; copy it
6126 sar eax,16 ; get v int step
6127 shl ebx,16 ; get v frac step
6128 mov Tmap1.DeltaVFrac,ebx ; store it
6129 imul eax,Tmap1.src_offset ; calculate texture step for v int step
6131 mov ebx,Tmap1.DeltaU ; get u 16.16 step
6132 mov ecx,ebx ; copy it
6133 sar ebx,16 ; get u int step
6134 shl ecx,16 ; get u frac step
6135 mov Tmap1.DeltaUFrac,ecx ; store it
6136 add eax,ebx ; calculate uint + vint step
6137 mov Tmap1.UVintVfracStepVNoCarry,eax; save whole step in non-v-carry slot
6138 add eax,Tmap1.src_offset ; calculate whole step + v carry
6139 mov Tmap1.UVintVfracStepVCarry,eax ; save in v-carry slot
6143 ; check coordinate ranges
6144 mov eax, Tmap1.UFixed
6145 cmp eax, Tmap1.MinUFixed
6147 mov eax, Tmap1.MinUFixed
6148 mov Tmap1.UFixed, eax
6151 cmp eax, Tmap1.MaxUFixed
6153 mov eax, Tmap1.MaxUFixed
6154 mov Tmap1.UFixed, eax
6156 mov eax, Tmap1.VFixed
6157 cmp eax, Tmap1.MinVFixed
6159 mov eax, Tmap1.MinVFixed
6160 mov Tmap1.VFixed, eax
6163 cmp eax, Tmap1.MaxVFixed
6165 mov eax, Tmap1.MaxVFixed
6166 mov Tmap1.VFixed, eax
6170 ; setup initial coordinates
6171 mov esi,Tmap1.UFixed ; get u 16.16 fixedpoint coordinate
6173 mov ebx,esi ; copy it
6174 sar esi,16 ; get integer part
6175 shl ebx,16 ; get fractional part
6177 mov ecx,Tmap1.VFixed ; get v 16.16 fixedpoint coordinate
6179 mov edx,ecx ; copy it
6180 sar edx,16 ; get integer part
6181 shl ecx,16 ; get fractional part
6182 imul edx,Tmap1.src_offset ; calc texture scanline address
6183 add esi,edx ; calc texture offset
6184 add esi,Tmap1.pixptr ; calc address
6186 mov edx,Tmap1.DeltaUFrac ; get register copy
6192 mov ebp, Tmap1.fx_dl_dx
6203 // add Tmap1.fx_l, eax
6206 // mov eax, Tmap1.fx_l // use bx and dx to do lighting
6209 // mov eax, Tmap1.fx_dl_dx // use bx and dx to do lighting
6215 ; ************** Can't Access Stack Frame ******************
6216 ; ************** Can't Access Stack Frame ******************
6217 ; ************** Can't Access Stack Frame ******************
6219 // 8 pixel span code
6220 // edi = dest dib bits at current pixel
6221 // esi = texture pointer at current u,v
6223 // ebx = u fraction 0.32
6224 // ecx = v fraction 0.32
6225 // edx = u frac step
6226 // ebp = v carry scratch
6228 mov al,[edi] // preread the destination cache line
6231 movzx eax,byte ptr [esi] // get texture pixel 0
6233 mov eax, gr_fade_table32[eax*4]
6235 add ecx,Tmap1.DeltaVFrac // increment v fraction
6236 sbb ebp,ebp // get -1 if carry
6237 add ebx,edx // increment u fraction
6239 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6240 add ecx,Tmap1.DeltaVFrac // increment v fraction
6242 sbb ebp,ebp // get -1 if carry
6243 // mov al, 0 // Uncomment this line to show divisions
6244 mov [edi+0],eax // store pixel 0
6246 add ebx,edx // increment u fraction
6247 movzx eax,byte ptr [esi] // get texture pixel 0
6249 mov eax, gr_fade_table32[eax*4]
6251 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6252 add ecx,Tmap1.DeltaVFrac // increment v fraction
6254 sbb ebp,ebp // get -1 if carry
6255 mov [edi+4],eax // store pixel 1
6257 add ebx,edx // increment u fraction
6258 movzx eax,byte ptr [esi] // get texture pixel 0
6260 mov eax, gr_fade_table32[eax*4]
6262 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6263 add ecx,Tmap1.DeltaVFrac // increment v fraction
6265 sbb ebp,ebp // get -1 if carry
6266 mov [edi+8],eax // store pixel 2
6268 add ebx,edx // increment u fraction
6269 movzx eax,byte ptr [esi] // get texture pixel 0
6271 mov eax, gr_fade_table32[eax*4]
6273 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6274 add ecx,Tmap1.DeltaVFrac // increment v fraction
6276 sbb ebp,ebp // get -1 if carry
6277 mov [edi+12],eax // store pixel 3
6279 add ebx,edx // increment u fraction
6280 movzx eax,byte ptr [esi] // get texture pixel 0
6282 mov eax, gr_fade_table32[eax*4]
6283 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6284 add ecx,Tmap1.DeltaVFrac // increment v fraction
6286 sbb ebp,ebp // get -1 if carry
6287 mov [edi+16],eax // store pixel 3
6289 add ebx,edx // increment u fraction
6290 movzx eax,byte ptr [esi] // get texture pixel 0
6292 mov eax, gr_fade_table32[eax*4]
6293 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6294 add ecx,Tmap1.DeltaVFrac // increment v fraction
6296 sbb ebp,ebp // get -1 if carry
6297 mov [edi+20],eax // store pixel 3
6299 add ebx,edx // increment u fraction
6300 movzx eax,byte ptr [esi] // get texture pixel 0
6302 mov eax, gr_fade_table32[eax*4]
6303 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6304 add ecx,Tmap1.DeltaVFrac // increment v fraction
6306 sbb ebp,ebp // get -1 if carry
6307 mov [edi+24],eax // store pixel 3
6309 add ebx,edx // increment u fraction
6310 movzx eax,byte ptr [esi] // get texture pixel 0
6312 mov eax, gr_fade_table32[eax*4]
6313 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6314 add ecx,Tmap1.DeltaVFrac // increment v fraction
6316 sbb ebp,ebp // get -1 if carry
6317 mov [edi+28],eax // store pixel 3
6319 add ebx,edx // increment u fraction
6320 movzx eax,byte ptr [esi] // get texture pixel 0
6322 mov eax, gr_fade_table32[eax*4]
6323 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6324 add ecx,Tmap1.DeltaVFrac // increment v fraction
6326 sbb ebp,ebp // get -1 if carry
6327 mov [edi+32],eax // store pixel 3
6329 add ebx,edx // increment u fraction
6330 movzx eax,byte ptr [esi] // get texture pixel 0
6332 mov eax, gr_fade_table32[eax*4]
6333 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6334 add ecx,Tmap1.DeltaVFrac // increment v fraction
6336 sbb ebp,ebp // get -1 if carry
6337 mov [edi+36],eax // store pixel 3
6339 add ebx,edx // increment u fraction
6340 movzx eax,byte ptr [esi] // get texture pixel 0
6342 mov eax, gr_fade_table32[eax*4]
6343 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6344 add ecx,Tmap1.DeltaVFrac // increment v fraction
6346 sbb ebp,ebp // get -1 if carry
6347 mov [edi+40],eax // store pixel 3
6349 add ebx,edx // increment u fraction
6350 movzx eax,byte ptr [esi] // get texture pixel 0
6352 mov eax, gr_fade_table32[eax*4]
6355 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6356 add ecx,Tmap1.DeltaVFrac // increment v fraction
6358 sbb ebp,ebp // get -1 if carry
6359 mov [edi+44],eax // store pixel 3
6361 add ebx,edx // increment u fraction
6362 movzx eax,byte ptr [esi] // get texture pixel 0
6364 mov eax, gr_fade_table32[eax*4]
6367 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6368 add ecx,Tmap1.DeltaVFrac // increment v fraction
6370 sbb ebp,ebp // get -1 if carry
6371 mov [edi+48],eax // store pixel 3
6373 add ebx,edx // increment u fraction
6374 movzx eax,byte ptr [esi] // get texture pixel 0
6376 mov eax, gr_fade_table32[eax*4]
6379 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6380 add ecx,Tmap1.DeltaVFrac // increment v fraction
6382 sbb ebp,ebp // get -1 if carry
6383 mov [edi+52],eax // store pixel 3
6385 add ebx,edx // increment u fraction
6386 movzx eax,byte ptr [esi] // get texture pixel 0
6388 mov eax, gr_fade_table32[eax*4]
6391 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6392 add ecx,Tmap1.DeltaVFrac // increment v fraction
6394 sbb ebp,ebp // get -1 if carry
6395 mov [edi+56],eax // store pixel 3
6397 add ebx,edx // increment u fraction
6398 movzx eax,byte ptr [esi] // get texture pixel 0
6400 mov eax, gr_fade_table32[eax*4]
6403 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6404 add ecx,Tmap1.DeltaVFrac // increment v fraction
6406 sbb ebp,ebp // get -1 if carry
6407 mov [edi+60],eax // store pixel 3
6409 add ebx,edx // increment u fraction
6410 movzx eax,byte ptr [esi] // get texture pixel 0
6412 mov eax, gr_fade_table32[eax*4]
6415 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6416 add ecx,Tmap1.DeltaVFrac // increment v fraction
6418 sbb ebp,ebp // get -1 if carry
6419 mov [edi+64],eax // store pixel 3
6421 add ebx,edx // increment u fraction
6422 movzx eax,byte ptr [esi] // get texture pixel 0
6424 mov eax, gr_fade_table32[eax*4]
6427 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6428 add ecx,Tmap1.DeltaVFrac // increment v fraction
6430 sbb ebp,ebp // get -1 if carry
6431 mov [edi+68],eax // store pixel 3
6433 add ebx,edx // increment u fraction
6434 movzx eax,byte ptr [esi] // get texture pixel 0
6436 mov eax, gr_fade_table32[eax*4]
6439 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6440 add ecx,Tmap1.DeltaVFrac // increment v fraction
6442 sbb ebp,ebp // get -1 if carry
6443 mov [edi+72],eax // store pixel 3
6445 add ebx,edx // increment u fraction
6446 movzx eax,byte ptr [esi] // get texture pixel 0
6448 mov eax, gr_fade_table32[eax*4]
6451 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6452 add ecx,Tmap1.DeltaVFrac // increment v fraction
6454 sbb ebp,ebp // get -1 if carry
6455 mov [edi+76],eax // store pixel 3
6457 add ebx,edx // increment u fraction
6458 movzx eax,byte ptr [esi] // get texture pixel 0
6460 mov eax, gr_fade_table32[eax*4]
6463 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6464 add ecx,Tmap1.DeltaVFrac // increment v fraction
6466 sbb ebp,ebp // get -1 if carry
6467 mov [edi+80],eax // store pixel 3
6469 add ebx,edx // increment u fraction
6470 movzx eax,byte ptr [esi] // get texture pixel 0
6472 mov eax, gr_fade_table32[eax*4]
6475 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6476 add ecx,Tmap1.DeltaVFrac // increment v fraction
6478 sbb ebp,ebp // get -1 if carry
6479 mov [edi+84],eax // store pixel 3
6481 add ebx,edx // increment u fraction
6482 movzx eax,byte ptr [esi] // get texture pixel 0
6484 mov eax, gr_fade_table32[eax*4]
6487 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6488 add ecx,Tmap1.DeltaVFrac // increment v fraction
6490 sbb ebp,ebp // get -1 if carry
6491 mov [edi+88],eax // store pixel 3
6493 add ebx,edx // increment u fraction
6494 movzx eax,byte ptr [esi] // get texture pixel 0
6496 mov eax, gr_fade_table32[eax*4]
6499 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6500 add ecx,Tmap1.DeltaVFrac // increment v fraction
6502 sbb ebp,ebp // get -1 if carry
6503 mov [edi+92],eax // store pixel 3
6505 add ebx,edx // increment u fraction
6506 movzx eax,byte ptr [esi] // get texture pixel 0
6508 mov eax, gr_fade_table32[eax*4]
6511 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6512 add ecx,Tmap1.DeltaVFrac // increment v fraction
6514 sbb ebp,ebp // get -1 if carry
6515 mov [edi+96],eax // store pixel 3
6517 add ebx,edx // increment u fraction
6518 movzx eax,byte ptr [esi] // get texture pixel 0
6520 mov eax, gr_fade_table32[eax*4]
6523 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6524 add ecx,Tmap1.DeltaVFrac // increment v fraction
6526 sbb ebp,ebp // get -1 if carry
6527 mov [edi+100],eax // store pixel 3
6529 add ebx,edx // increment u fraction
6530 movzx eax,byte ptr [esi] // get texture pixel 0
6532 mov eax, gr_fade_table32[eax*4]
6535 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6536 add ecx,Tmap1.DeltaVFrac // increment v fraction
6540 sbb ebp,ebp // get -1 if carry
6541 mov [edi+104],eax // store pixel 3
6543 add ebx,edx // increment u fraction
6544 movzx eax,byte ptr [esi] // get texture pixel 0
6546 mov eax, gr_fade_table32[eax*4]
6549 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6550 add ecx,Tmap1.DeltaVFrac // increment v fraction
6552 sbb ebp,ebp // get -1 if carry
6553 mov [edi+108],eax // store pixel 3
6555 add ebx,edx // increment u fraction
6556 movzx eax,byte ptr [esi] // get texture pixel 0
6558 mov eax, gr_fade_table32[eax*4]
6560 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6561 add ecx,Tmap1.DeltaVFrac // increment v fraction
6563 sbb ebp,ebp // get -1 if carry
6564 mov [edi+112],eax // store pixel 4
6566 add ebx,edx // increment u fraction
6567 movzx eax,byte ptr [esi] // get texture pixel 0
6569 mov eax, gr_fade_table32[eax*4]
6571 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6572 add ecx,Tmap1.DeltaVFrac // increment v fraction
6574 sbb ebp,ebp // get -1 if carry
6575 mov [edi+116],eax // store pixel 5
6577 add ebx,edx // increment u fraction
6578 movzx eax,byte ptr [esi] // get texture pixel 0
6580 mov eax, gr_fade_table32[eax*4]
6582 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6583 add ecx,Tmap1.DeltaVFrac // increment v fraction
6585 sbb ebp,ebp // get -1 if carry
6586 mov [edi+120],eax // store pixel 6
6588 add ebx,edx // increment u fraction
6590 movzx eax,byte ptr [esi] // get texture pixel 0
6592 mov eax, gr_fade_table32[eax*4]
6594 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6596 mov [edi+124],eax // store pixel 7
6600 ; ************** Okay to Access Stack Frame ****************
6601 ; ************** Okay to Access Stack Frame ****************
6602 ; ************** Okay to Access Stack Frame ****************
6605 ; the fdiv is done, finish right ; st0 st1 st2 st3 st4 st5 st6 st7
6606 ; ZR V/ZR 1/ZR U/ZR UL VL
6608 fld st ; ZR ZR V/ZR 1/ZR U/ZR UL VL
6609 fmul st,st(2) ; VR ZR V/ZR 1/ZR U/ZR UL VL
6610 fxch st(1) ; ZR VR V/ZR 1/ZR U/ZR UL VL
6611 fmul st,st(4) ; UR VR V/ZR 1/ZR U/ZR UL VL
6613 add edi,128 ; increment to next span
6614 dec Tmap1.Subdivisions ; decrement span count
6615 jnz SpanLoop ; loop back
6617 // save new lighting values
6620 // mov Tmap1.fx_l, eax
6624 // mov Tmap1.fx_dl_dx, eax
6626 HandleLeftoverPixels:
6629 mov esi,Tmap1.pixptr ; load texture pointer
6631 ; edi = dest dib bits
6632 ; esi = current texture dib bits
6633 ; at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
6634 ; inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
6636 cmp Tmap1.WidthModLength,0 ; are there remaining pixels to draw?
6637 jz FPUReturn ; nope, pop the FPU and bail
6639 ; convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
6641 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
6642 fmul Tmap1.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
6643 fistp Tmap1.UFixed ; inv. inv. inv. inv. inv. UL VL
6645 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
6646 fmul Tmap1.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
6647 fistp Tmap1.VFixed ; inv. inv. inv. inv. inv. UL VL
6649 dec Tmap1.WidthModLength ; calc how many steps to take
6650 jz OnePixelSpan ; just one, don't do deltas
6652 ; calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
6655 ; @todo rearrange things so we don't need these two instructions
6656 fstp Tmap1.FloatTemp ; inv. inv. inv. inv. UL VL
6657 fstp Tmap1.FloatTemp ; inv. inv. inv. UL VL
6659 fld Tmap1.RightVOverZ ; V/Zr inv. inv. inv. UL VL
6660 fsub Tmap1.dVOverZdX ; V/ZR inv. inv. inv. UL VL
6661 fld Tmap1.RightUOverZ ; U/Zr V/ZR inv. inv. inv. UL VL
6662 fsub Tmap1.dUOverZdX ; U/ZR V/ZR inv. inv. inv. UL VL
6663 fld Tmap1.RightOneOverZ ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
6664 fsub Tmap1.dOneOverZdX ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
6666 fdivr Tmap1.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
6668 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
6669 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
6671 ; calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
6673 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
6674 fxch st(1) ; VR UR inv. inv. inv. dU VL
6675 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
6676 fxch st(6) ; dV UR inv. inv. inv. dU VR
6678 fidiv Tmap1.WidthModLength ; dv UR inv. inv. inv. dU VR
6679 fmul Tmap1.FixedScale ; dv16 UR inv. inv. inv. dU VR
6680 fistp Tmap1.DeltaV ; UR inv. inv. inv. dU VR
6682 fxch st(4) ; dU inv. inv. inv. UR VR
6683 fidiv Tmap1.WidthModLength ; du inv. inv. inv. UR VR
6684 fmul Tmap1.FixedScale ; du16 inv. inv. inv. UR VR
6685 fistp Tmap1.DeltaU ; inv. inv. inv. UR VR
6687 ; @todo gross! these are to line up with the other loop
6688 fld st(1) ; inv. inv. inv. inv. UR VR
6689 fld st(2) ; inv. inv. inv. inv. inv. UR VR
6694 ; setup delta values
6695 mov eax, Tmap1.DeltaV // get v 16.16 step
6696 mov ebx, eax // copy it
6697 sar eax, 16 // get v int step
6698 shl ebx, 16 // get v frac step
6699 mov Tmap1.DeltaVFrac, ebx // store it
6700 imul eax, Tmap1.src_offset // calc texture step for v int step
6702 mov ebx, Tmap1.DeltaU // get u 16.16 step
6703 mov ecx, ebx // copy it
6704 sar ebx, 16 // get the u int step
6705 shl ecx, 16 // get the u frac step
6706 mov Tmap1.DeltaUFrac, ecx // store it
6707 add eax, ebx // calc uint + vint step
6708 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
6709 add eax, Tmap1.src_offset // calc whole step + v carry
6710 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
6717 ; check coordinate ranges
6718 mov eax, Tmap1.UFixed
6719 cmp eax, Tmap1.MinUFixed
6721 mov eax, Tmap1.MinUFixed
6722 mov Tmap1.UFixed, eax
6725 cmp eax, Tmap1.MaxUFixed
6727 mov eax, Tmap1.MaxUFixed
6728 mov Tmap1.UFixed, eax
6730 mov eax, Tmap1.VFixed
6731 cmp eax, Tmap1.MinVFixed
6733 mov eax, Tmap1.MinVFixed
6734 mov Tmap1.VFixed, eax
6737 cmp eax, Tmap1.MaxVFixed
6739 mov eax, Tmap1.MaxVFixed
6740 mov Tmap1.VFixed, eax
6747 ; setup initial coordinates
6748 mov esi, Tmap1.UFixed // get u 16.16
6749 mov ebx, esi // copy it
6750 sar esi, 16 // get integer part
6751 shl ebx, 16 // get fractional part
6753 mov ecx, Tmap1.VFixed // get v 16.16
6754 mov edx, ecx // copy it
6755 sar edx, 16 // get integer part
6756 shl ecx, 16 // get fractional part
6757 imul edx, Tmap1.src_offset // calc texture scanline address
6758 add esi, edx // calc texture offset
6759 add esi, Tmap1.pixptr // calc address
6761 ; set edi = address of first pixel to modify
6762 ; mov edi, Tmap1.dest_row_data
6771 mov edx, Tmap1.DeltaUFrac
6773 cmp Tmap1.WidthModLength, 1
6778 mov ebx, Tmap1.fx_l_right
6785 // slow but maybe better
6788 mov ebx, Tmap1.WidthModLength
6793 mov eax, Tmap1.fx_dl_dx
6803 inc Tmap1.WidthModLength
6804 mov eax,Tmap1.WidthModLength
6808 mov Tmap1.WidthModLength, eax
6812 mov al,[edi] // preread the destination cache line
6815 movzx eax,byte ptr [esi] // get texture pixel 0
6817 mov eax, gr_fade_table32[eax*4]
6819 add ecx,Tmap1.DeltaVFrac // increment v fraction
6820 sbb ebp,ebp // get -1 if carry
6821 add ebx,edx // increment u fraction
6822 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6823 mov [edi+0],eax // store pixel 0
6825 add ecx,Tmap1.DeltaVFrac // increment v fraction
6826 sbb ebp,ebp // get -1 if carry
6827 add ebx,edx // increment u fraction
6828 movzx eax,byte ptr [esi] // get texture pixel 0
6830 mov eax, gr_fade_table32[eax*4]
6832 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6833 mov [edi+4],eax // store pixel 1
6836 dec Tmap1.WidthModLength
6844 movzx eax,byte ptr [esi] // get texture pixel 0
6846 mov eax, gr_fade_table32[eax*4]
6847 mov [edi],eax // store pixel 2
6862 OldWay: // This is 6% slower than above
6864 mov ebx,Tmap1.UFixed ; get starting coordinates
6865 mov ecx,Tmap1.VFixed ; for span
6867 ; leftover pixels loop
6868 ; edi = dest dib bits
6869 ; esi = texture dib bits
6875 mov eax,ecx ; copy v
6877 imul eax,Tmap1.src_offset ; scan offset
6878 mov edx,ebx ; copy u
6880 add eax,edx ; texture offset
6881 mov al,[esi+eax] ; get source pixel
6883 mov [edi],al ; store it
6885 add ebx,Tmap1.DeltaU ; increment u coordinate
6886 add ecx,Tmap1.DeltaV ; increment v coordinate
6888 dec Tmap1.WidthModLength ; decrement loop count
6889 jl FPUReturn ; finish up
6893 mov eax,ecx ; copy v
6895 imul eax,Tmap1.src_offset ; scan offset
6896 mov edx,ebx ; copy u
6898 add eax,edx ; texture offset
6899 mov al,[esi+eax] ; get source pixel
6900 mov [edi],al ; store it
6902 add ebx,Tmap1.DeltaU ; increment u coordinate
6903 add ecx,Tmap1.DeltaV ; increment v coordinate
6905 dec Tmap1.WidthModLength ; decrement loop count
6906 jge LeftoverLoop ; finish up
6911 ; busy FPU registers: ; st0 st1 st2 st3 st4 st5 st6 st7
6912 ; xxx xxx xxx xxx xxx xxx xxx
6923 fldcw Tmap1.OldFPUCW // restore the FPU
6940 add edx,DeltaVFrac ; Add in 0.32 DeltaVFrac to VFrac
6941 sbb ebp,ebp ; ebp will equal -1 if there was a carry
6942 mov BYTE PTR [edi], al ; blit destination pixel
6943 mov al, BYTE PTR [esi] ; get next texel
6944 add ecx,ebx ; add 0.32 DeltaUFrac to UFrac, plus light
6945 adc esi, [UVStepCarry1+(ebp*4)]
6946 mov ah, ch ; move lighting value into place
6947 mov al, ShadeTable[eax] ; Get shaded pixel