2 * $Logfile: /Freespace2/code/Graphics/TmapGenericScans.cpp $
7 * Some code for generic scanlines. This isn't used, it is just
8 * basically a dump area for inner loops I was experimenting with.
9 * this entire file is #ifdef 0'd out.
12 * Revision 1.2 2002/05/07 03:16:45 theoddone33
13 * The Great Newline Fix
15 * Revision 1.1.1.1 2002/05/03 03:28:09 root
19 * 2 10/07/98 10:53a Dave
22 * 1 10/07/98 10:49a Dave
24 * 7 4/24/97 4:45p John
25 * Added tiled texture mappers for 64x64, 128x128, and 256x256 textures.
27 * 6 4/24/97 3:01p John
28 * added code to not crash on non-256x256 textures.
30 * 5 3/14/97 3:55p John
31 * Made tiled tmapper not always be zbuffered.
33 * 4 3/13/97 10:32a John
34 * Added code for tiled 256x256 textures in certain models.
36 * 3 3/10/97 5:20p John
37 * Differentiated between Gouraud and Flat shading. Since we only do flat
38 * shading as of now, we don't need to interpolate L in the outer loop.
39 * This should save a few percent.
41 * 2 12/10/96 10:37a John
42 * Restructured texture mapper to remove some overhead from each scanline
43 * setup. This gave about a 30% improvement drawing trans01.pof, which is
44 * a really complex model. In the process, I cleaned up the scanline
45 * functions and separated them into different modules for each pixel
54 #include "tmapscanline.h"
59 #pragma warning(disable:4410)
67 #include "tmapscanline.h"
72 #pragma warning(disable:4410)
74 // These must be global because I use them in assembly
75 // code that uses the EBP register, so the variables
76 // can't be accessed off the stack.
77 int _fx_u, _fx_v, _fx_w, _fx_l;
78 int _fx_u_right, _fx_v_right, _fx_w_right;
79 int _fx_du, _fx_dv, _fx_dw, _fx_dl;
80 uint _fx_destptr,_fx_srcptr, light_table;
81 int V0, U0, DU1, DV1, DZ1;
82 int _loop_count,num_big_steps;
85 int rgbtable_inited = 0;
94 for (i=0; i<512; i++ ) {
97 else if ( v > 255 ) v = 255;
100 rgbtable3[i] = v<<16;
105 void asm_tmap_scanline_lln();
106 void asm_tmap_scanline_lln_tiled();
108 void tmapscan_lln8( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
110 Tmap1.dest_row_data = GR_SCREEN_PTR(ubyte,lx,y);
111 Tmap1.loop_count = rx - lx;
112 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
113 Tmap1.bp = tmap_bitmap;
114 Tmap1.src_offset = tmap_bitmap->w;
116 Tmap1.fx_u = fl2f(p->u);
117 Tmap1.fx_v = fl2f(p->v);
118 Tmap1.fx_l = fl2f(p->l*32.0);
119 Tmap1.fx_dl_dx = fl2f(dp->l*32.0);
120 Tmap1.fx_du_dx = fl2f(dp->u);
121 Tmap1.fx_dv_dx = fl2f(dp->v);
122 Tmap1.fx_u_right = fl2f(rp->u);
123 Tmap1.fx_v_right = fl2f(rp->v);
125 asm_tmap_scanline_lln();
128 extern void asm_tmap_scanline_lnt();
130 void tmapscan_lnt8( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
132 Tmap1.dest_row_data = GR_SCREEN_PTR(ubyte,lx,y);
133 Tmap1.loop_count = rx - lx;
134 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
135 Tmap1.bp = tmap_bitmap;
136 Tmap1.src_offset = tmap_bitmap->w;
138 Tmap1.fx_u = fl2f(p->u);
139 Tmap1.fx_v = fl2f(p->v);
140 Tmap1.fx_du_dx = fl2f(dp->u);
141 Tmap1.fx_dv_dx = fl2f(dp->v);
142 Tmap1.fx_u_right = fl2f(rp->u);
143 Tmap1.fx_v_right = fl2f(rp->v);
145 asm_tmap_scanline_lnt();
148 extern void asm_tmap_scanline_lnn();
150 void tmapscan_lnn8( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
152 Tmap1.dest_row_data = GR_SCREEN_PTR(ubyte,lx,y);
153 Tmap1.loop_count = rx - lx;
154 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
155 Tmap1.bp = tmap_bitmap;
156 Tmap1.src_offset = tmap_bitmap->w;
158 Tmap1.fx_u = fl2f(p->u);
159 Tmap1.fx_v = fl2f(p->v);
160 Tmap1.fx_du_dx = fl2f(dp->u);
161 Tmap1.fx_dv_dx = fl2f(dp->v);
162 Tmap1.fx_u_right = fl2f(rp->u);
163 Tmap1.fx_v_right = fl2f(rp->v);
165 asm_tmap_scanline_lnn();
169 void tmapscan_lln8_tiled( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
171 Tmap1.dest_row_data = GR_SCREEN_PTR(ubyte,lx,y);
172 Tmap1.loop_count = rx - lx;
173 Tmap1.fx_u = fl2f(p->u);
174 Tmap1.fx_v = fl2f(p->v);
175 Tmap1.fx_l = fl2f(p->l*32.0);
176 Tmap1.fx_du_dx = fl2f(dp->u);
177 Tmap1.fx_dv_dx = fl2f(dp->v);
178 Tmap1.fx_dl_dx = fl2f(dp->l*32.0);
179 Tmap1.fx_u_right = fl2f(rp->u);
180 Tmap1.fx_v_right = fl2f(rp->v);
181 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
182 Tmap1.bp = tmap_bitmap;
183 Tmap1.src_offset = tmap_bitmap->w;
185 Tmap1.BitmapWidth = tmap_bitmap->w;
186 Tmap1.BitmapHeight = tmap_bitmap->h;
189 // asm_tmap_scanline_lln_tiled();
196 void c_tmap_scanline_per_sub_new();
198 void tmapscan_pln8( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
200 Tmap1.dest_row_data = GR_SCREEN_PTR(ubyte,lx,y);
201 Tmap1.loop_count = rx - lx;
202 Tmap1.fx_l = fl2f(p->l*32.0);
203 Tmap1.fx_dl_dx = fl2f(dp->l*32.0);
207 Tmap1.OneOverZ = p->sw;
209 Tmap1.dUOverZdX8 = dp->u*32.0f;
210 Tmap1.dVOverZdX8 = dp->v*32.0f;
211 Tmap1.dOneOverZdX8 = dp->sw*32.0f;
213 Tmap1.dUOverZdX = dp->u;
214 Tmap1.dVOverZdX = dp->v;
215 Tmap1.dOneOverZdX = dp->sw;
217 Tmap1.RightUOverZ = rp->u;
218 Tmap1.RightVOverZ = rp->v;
219 Tmap1.RightOneOverZ = rp->sw;
221 if ( Tmap1.fx_dl_dx < 0 ) {
222 Tmap1.fx_dl_dx = -Tmap1.fx_dl_dx;
223 Tmap1.fx_l = (67*F1_0)-Tmap1.fx_l;
224 Tmap1.fx_l_right = (67*F1_0)-Tmap1.fx_l_right;
226 // Assert( Tmap1.fx_l > 31*F1_0 );
227 // Assert( Tmap1.fx_l < 66*F1_0 );
228 // Assert( Tmap1.fx_dl_dx >= 0 );
229 // Assert( Tmap1.fx_dl_dx < 31*F1_0 );
241 dldx = Tmap1.fx_dl_dx;
242 dest = Tmap1.dest_row_data;
244 for (x=Tmap1.loop_count; x >= 0; x-- ) {
245 //*dest++ = gr_fade_table[ ((l>>8)&(0xff00)) + 35 ];
265 // put the FPU in 32 bit mode
266 // @todo move this out of here!
268 fstcw Tmap1.OldFPUCW // store copy of CW
269 mov ax,Tmap1.OldFPUCW // get it in ax
271 mov Tmap1.FPUCW,ax // store it
272 fldcw Tmap1.FPUCW // load the FPU
274 mov ecx, Tmap1.loop_count // ecx = width
276 mov edi, Tmap1.dest_row_data // edi = dest pointer
278 // edi = pointer to start pixel in dest dib
281 mov eax,ecx // eax and ecx = width
282 shr ecx,5 // ecx = width / subdivision length
283 and eax,31 // eax = width mod subdivision length
284 jnz some_left_over // any leftover?
286 dec ecx // no, so special case last span
287 mov eax,32 // it's 8 pixels long
289 mov Tmap1.Subdivisions,ecx // store widths
290 mov Tmap1.WidthModLength,eax
292 // mov ebx,pLeft ; get left edge pointer
293 // mov edx,pGradients ; get gradients pointer
295 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
296 // st0 st1 st2 st3 st4 st5 st6 st7
297 fld Tmap1.VOverZ // V/ZL
298 fld Tmap1.UOverZ // U/ZL V/ZL
299 fld Tmap1.OneOverZ // 1/ZL U/ZL V/ZL
300 fld1 // 1 1/ZL U/ZL V/ZL
301 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
302 fld st // ZL ZL 1/ZL U/ZL V/ZL
303 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
304 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
305 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
307 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
308 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
310 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
312 fadd Tmap1.dOneOverZdX8 // 1/ZR U/ZL V/ZL UL VL
313 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
314 fadd Tmap1.dUOverZdX8 // U/ZR 1/ZR V/ZL UL VL
315 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
316 fadd Tmap1.dVOverZdX8 // V/ZR 1/ZR U/ZR UL VL
318 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
320 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
321 // @todo overlap this guy
322 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
323 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
324 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
325 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
326 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
328 cmp ecx,0 // check for any full spans
329 jle HandleLeftoverPixels
333 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
334 // UR VR V/ZR 1/ZR U/ZR UL VL
336 // convert left side coords
338 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
339 fmul Tmap1.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
340 fistp Tmap1.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
342 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
343 fmul Tmap1.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
344 fistp Tmap1.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
346 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
348 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
349 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
350 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
351 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
353 fmul Tmap1.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
354 fistp Tmap1.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
356 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
357 fmul Tmap1.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
358 fistp Tmap1.DeltaU ; V/ZR 1/ZR U/ZR UR VR
360 // increment terms for next span ; st0 st1 st2 st3 st4 st5 st6 st7
361 // Right terms become Left terms---->; V/ZL 1/ZL U/ZL UL VL
363 fadd Tmap1.dVOverZdX8 ; V/ZR 1/ZL U/ZL UL VL
364 fxch st(1) ; 1/ZL V/ZR U/ZL UL VL
365 fadd Tmap1.dOneOverZdX8 ; 1/ZR V/ZR U/ZL UL VL
366 fxch st(2) ; U/ZL V/ZR 1/ZR UL VL
367 fadd Tmap1.dUOverZdX8 ; U/ZR V/ZR 1/ZR UL VL
368 fxch st(2) ; 1/ZR V/ZR U/ZR UL VL
369 fxch st(1) ; V/ZR 1/ZR U/ZR UL VL
372 ; set up affine registers
376 mov eax,Tmap1.DeltaV ; get v 16.16 step
377 mov ebx,eax ; copy it
378 sar eax,16 ; get v int step
379 shl ebx,16 ; get v frac step
380 mov Tmap1.DeltaVFrac,ebx ; store it
381 imul eax,Tmap1.src_offset ; calculate texture step for v int step
383 mov ebx,Tmap1.DeltaU ; get u 16.16 step
384 mov ecx,ebx ; copy it
385 sar ebx,16 ; get u int step
386 shl ecx,16 ; get u frac step
387 mov Tmap1.DeltaUFrac,ecx ; store it
388 add eax,ebx ; calculate uint + vint step
389 mov Tmap1.UVintVfracStepVNoCarry,eax; save whole step in non-v-carry slot
390 add eax,Tmap1.src_offset ; calculate whole step + v carry
391 mov Tmap1.UVintVfracStepVCarry,eax ; save in v-carry slot
393 ; setup initial coordinates
394 mov esi,Tmap1.UFixed ; get u 16.16 fixedpoint coordinate
396 mov ebx,esi ; copy it
397 sar esi,16 ; get integer part
398 shl ebx,16 ; get fractional part
400 mov ecx,Tmap1.VFixed ; get v 16.16 fixedpoint coordinate
402 mov edx,ecx ; copy it
403 sar edx,16 ; get integer part
404 shl ecx,16 ; get fractional part
405 imul edx,Tmap1.src_offset ; calc texture scanline address
406 add esi,edx ; calc texture offset
407 add esi,Tmap1.pixptr ; calc address
409 mov edx,Tmap1.DeltaUFrac ; get register copy
415 mov ebp, Tmap1.fx_dl_dx
427 ; calculate right side coords ; st0 st1 st2 st3 st4 st5 st6 st7
429 fld1 ; 1 V/ZR 1/ZR U/ZR UL VL
430 fdiv st,st(2) ; ZR V/ZR 1/ZR U/ZR UL VL
434 // edi = dest dib bits at current pixel
435 // esi = texture pointer at current u,v
437 // ebx = u fraction 0.32
438 // ecx = v fraction 0.32
440 // ebp = v carry scratch
442 mov al,[edi] // preread the destination cache line
445 mov al,[esi] // get texture pixel 0
447 mov al, gr_fade_table[eax]
449 add ecx,Tmap1.DeltaVFrac // increment v fraction
450 sbb ebp,ebp // get -1 if carry
451 add ebx,edx // increment u fraction
453 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
454 add ecx,Tmap1.DeltaVFrac // increment v fraction
456 sbb ebp,ebp // get -1 if carry
457 // mov al, 0 // Uncomment this line to show divisions
458 mov [edi+0],al // store pixel 0
460 add ebx,edx // increment u fraction
461 mov al,[esi] // get texture pixel 1
463 mov al, gr_fade_table[eax]
465 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
466 add ecx,Tmap1.DeltaVFrac // increment v fraction
468 sbb ebp,ebp // get -1 if carry
469 mov [edi+1],al // store pixel 1
471 add ebx,edx // increment u fraction
472 mov al,[esi] // get texture pixel 2
474 mov al, gr_fade_table[eax]
476 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
477 add ecx,Tmap1.DeltaVFrac // increment v fraction
479 sbb ebp,ebp // get -1 if carry
480 mov [edi+2],al // store pixel 2
482 add ebx,edx // increment u fraction
483 mov al,[esi] // get texture pixel 3
485 mov al, gr_fade_table[eax]
487 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
488 add ecx,Tmap1.DeltaVFrac // increment v fraction
490 sbb ebp,ebp // get -1 if carry
491 mov [edi+3],al // store pixel 3
493 add ebx,edx // increment u fraction
494 mov al,[esi] // get texture pixel 4
496 mov al, gr_fade_table[eax]
497 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
498 add ecx,Tmap1.DeltaVFrac // increment v fraction
500 sbb ebp,ebp // get -1 if carry
501 mov [edi+4],al // store pixel 3
503 add ebx,edx // increment u fraction
504 mov al,[esi] // get texture pixel 4
506 mov al, gr_fade_table[eax]
507 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
508 add ecx,Tmap1.DeltaVFrac // increment v fraction
510 sbb ebp,ebp // get -1 if carry
511 mov [edi+5],al // store pixel 3
513 add ebx,edx // increment u fraction
514 mov al,[esi] // get texture pixel 4
516 mov al, gr_fade_table[eax]
517 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
518 add ecx,Tmap1.DeltaVFrac // increment v fraction
520 sbb ebp,ebp // get -1 if carry
521 mov [edi+6],al // store pixel 3
523 add ebx,edx // increment u fraction
524 mov al,[esi] // get texture pixel 4
526 mov al, gr_fade_table[eax]
527 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
528 add ecx,Tmap1.DeltaVFrac // increment v fraction
530 sbb ebp,ebp // get -1 if carry
531 mov [edi+7],al // store pixel 3
533 add ebx,edx // increment u fraction
534 mov al,[esi] // get texture pixel 4
536 mov al, gr_fade_table[eax]
537 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
538 add ecx,Tmap1.DeltaVFrac // increment v fraction
540 sbb ebp,ebp // get -1 if carry
541 mov [edi+8],al // store pixel 3
543 add ebx,edx // increment u fraction
544 mov al,[esi] // get texture pixel 4
546 mov al, gr_fade_table[eax]
547 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
548 add ecx,Tmap1.DeltaVFrac // increment v fraction
550 sbb ebp,ebp // get -1 if carry
551 mov [edi+9],al // store pixel 3
553 add ebx,edx // increment u fraction
554 mov al,[esi] // get texture pixel 4
556 mov al, gr_fade_table[eax]
557 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
558 add ecx,Tmap1.DeltaVFrac // increment v fraction
560 sbb ebp,ebp // get -1 if carry
561 mov [edi+10],al // store pixel 3
563 add ebx,edx // increment u fraction
564 mov al,[esi] // get texture pixel 4
566 mov al, gr_fade_table[eax]
569 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
570 add ecx,Tmap1.DeltaVFrac // increment v fraction
572 sbb ebp,ebp // get -1 if carry
573 mov [edi+11],al // store pixel 3
575 add ebx,edx // increment u fraction
576 mov al,[esi] // get texture pixel 4
578 mov al, gr_fade_table[eax]
581 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
582 add ecx,Tmap1.DeltaVFrac // increment v fraction
584 sbb ebp,ebp // get -1 if carry
585 mov [edi+12],al // store pixel 3
587 add ebx,edx // increment u fraction
588 mov al,[esi] // get texture pixel 4
590 mov al, gr_fade_table[eax]
593 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
594 add ecx,Tmap1.DeltaVFrac // increment v fraction
596 sbb ebp,ebp // get -1 if carry
597 mov [edi+13],al // store pixel 3
599 add ebx,edx // increment u fraction
600 mov al,[esi] // get texture pixel 4
602 mov al, gr_fade_table[eax]
605 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
606 add ecx,Tmap1.DeltaVFrac // increment v fraction
608 sbb ebp,ebp // get -1 if carry
609 mov [edi+14],al // store pixel 3
611 add ebx,edx // increment u fraction
612 mov al,[esi] // get texture pixel 4
614 mov al, gr_fade_table[eax]
617 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
618 add ecx,Tmap1.DeltaVFrac // increment v fraction
620 sbb ebp,ebp // get -1 if carry
621 mov [edi+15],al // store pixel 3
623 add ebx,edx // increment u fraction
624 mov al,[esi] // get texture pixel 4
626 mov al, gr_fade_table[eax]
629 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
630 add ecx,Tmap1.DeltaVFrac // increment v fraction
632 sbb ebp,ebp // get -1 if carry
633 mov [edi+16],al // store pixel 3
635 add ebx,edx // increment u fraction
636 mov al,[esi] // get texture pixel 4
638 mov al, gr_fade_table[eax]
641 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
642 add ecx,Tmap1.DeltaVFrac // increment v fraction
644 sbb ebp,ebp // get -1 if carry
645 mov [edi+17],al // store pixel 3
647 add ebx,edx // increment u fraction
648 mov al,[esi] // get texture pixel 4
650 mov al, gr_fade_table[eax]
653 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
654 add ecx,Tmap1.DeltaVFrac // increment v fraction
656 sbb ebp,ebp // get -1 if carry
657 mov [edi+18],al // store pixel 3
659 add ebx,edx // increment u fraction
660 mov al,[esi] // get texture pixel 4
662 mov al, gr_fade_table[eax]
665 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
666 add ecx,Tmap1.DeltaVFrac // increment v fraction
668 sbb ebp,ebp // get -1 if carry
669 mov [edi+19],al // store pixel 3
671 add ebx,edx // increment u fraction
672 mov al,[esi] // get texture pixel 4
674 mov al, gr_fade_table[eax]
677 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
678 add ecx,Tmap1.DeltaVFrac // increment v fraction
680 sbb ebp,ebp // get -1 if carry
681 mov [edi+20],al // store pixel 3
683 add ebx,edx // increment u fraction
684 mov al,[esi] // get texture pixel 4
686 mov al, gr_fade_table[eax]
689 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
690 add ecx,Tmap1.DeltaVFrac // increment v fraction
692 sbb ebp,ebp // get -1 if carry
693 mov [edi+21],al // store pixel 3
695 add ebx,edx // increment u fraction
696 mov al,[esi] // get texture pixel 4
698 mov al, gr_fade_table[eax]
701 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
702 add ecx,Tmap1.DeltaVFrac // increment v fraction
704 sbb ebp,ebp // get -1 if carry
705 mov [edi+22],al // store pixel 3
707 add ebx,edx // increment u fraction
708 mov al,[esi] // get texture pixel 4
710 mov al, gr_fade_table[eax]
713 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
714 add ecx,Tmap1.DeltaVFrac // increment v fraction
716 sbb ebp,ebp // get -1 if carry
717 mov [edi+23],al // store pixel 3
719 add ebx,edx // increment u fraction
720 mov al,[esi] // get texture pixel 4
722 mov al, gr_fade_table[eax]
725 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
726 add ecx,Tmap1.DeltaVFrac // increment v fraction
728 sbb ebp,ebp // get -1 if carry
729 mov [edi+24],al // store pixel 3
731 add ebx,edx // increment u fraction
732 mov al,[esi] // get texture pixel 4
734 mov al, gr_fade_table[eax]
737 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
738 add ecx,Tmap1.DeltaVFrac // increment v fraction
740 sbb ebp,ebp // get -1 if carry
741 mov [edi+25],al // store pixel 3
743 add ebx,edx // increment u fraction
744 mov al,[esi] // get texture pixel 4
746 mov al, gr_fade_table[eax]
749 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
750 add ecx,Tmap1.DeltaVFrac // increment v fraction
754 sbb ebp,ebp // get -1 if carry
755 mov [edi+26],al // store pixel 3
757 add ebx,edx // increment u fraction
758 mov al,[esi] // get texture pixel 4
760 mov al, gr_fade_table[eax]
763 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
764 add ecx,Tmap1.DeltaVFrac // increment v fraction
766 sbb ebp,ebp // get -1 if carry
767 mov [edi+27],al // store pixel 3
769 add ebx,edx // increment u fraction
770 mov al,[esi] // get texture pixel 4
772 mov al, gr_fade_table[eax]
774 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
775 add ecx,Tmap1.DeltaVFrac // increment v fraction
777 sbb ebp,ebp // get -1 if carry
778 mov [edi+28],al // store pixel 4
780 add ebx,edx // increment u fraction
781 mov al,[esi] // get texture pixel 5
783 mov al, gr_fade_table[eax]
785 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
786 add ecx,Tmap1.DeltaVFrac // increment v fraction
788 sbb ebp,ebp // get -1 if carry
789 mov [edi+29],al // store pixel 5
791 add ebx,edx // increment u fraction
792 mov al,[esi] // get texture pixel 6
794 mov al, gr_fade_table[eax]
796 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
797 add ecx,Tmap1.DeltaVFrac // increment v fraction
799 sbb ebp,ebp // get -1 if carry
800 mov [edi+30],al // store pixel 6
802 add ebx,edx // increment u fraction
804 mov al,[esi] // get texture pixel 7
806 mov al, gr_fade_table[eax]
808 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
810 mov [edi+31],al // store pixel 7
814 ; ************** Okay to Access Stack Frame ****************
815 ; ************** Okay to Access Stack Frame ****************
816 ; ************** Okay to Access Stack Frame ****************
819 ; the fdiv is done, finish right ; st0 st1 st2 st3 st4 st5 st6 st7
820 ; ZR V/ZR 1/ZR U/ZR UL VL
822 fld st ; ZR ZR V/ZR 1/ZR U/ZR UL VL
823 fmul st,st(2) ; VR ZR V/ZR 1/ZR U/ZR UL VL
824 fxch st(1) ; ZR VR V/ZR 1/ZR U/ZR UL VL
825 fmul st,st(4) ; UR VR V/ZR 1/ZR U/ZR UL VL
827 add edi,32 ; increment to next span
828 dec Tmap1.Subdivisions ; decrement span count
829 jnz SpanLoop ; loop back
831 // save new lighting values
834 // mov Tmap1.fx_l, eax
838 // mov Tmap1.fx_dl_dx, eax
840 HandleLeftoverPixels:
843 mov esi,Tmap1.pixptr ; load texture pointer
845 ; edi = dest dib bits
846 ; esi = current texture dib bits
847 ; at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
848 ; inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
850 cmp Tmap1.WidthModLength,0 ; are there remaining pixels to draw?
851 jz FPUReturn ; nope, pop the FPU and bail
853 ; convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
855 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
856 fmul Tmap1.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
857 fistp Tmap1.UFixed ; inv. inv. inv. inv. inv. UL VL
859 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
860 fmul Tmap1.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
861 fistp Tmap1.VFixed ; inv. inv. inv. inv. inv. UL VL
863 dec Tmap1.WidthModLength ; calc how many steps to take
864 jz OnePixelSpan ; just one, don't do deltas
866 ; calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
869 ; @todo rearrange things so we don't need these two instructions
870 fstp Tmap1.FloatTemp ; inv. inv. inv. inv. UL VL
871 fstp Tmap1.FloatTemp ; inv. inv. inv. UL VL
873 fld Tmap1.RightVOverZ ; V/Zr inv. inv. inv. UL VL
874 fsub Tmap1.dVOverZdX ; V/ZR inv. inv. inv. UL VL
875 fld Tmap1.RightUOverZ ; U/Zr V/ZR inv. inv. inv. UL VL
876 fsub Tmap1.dUOverZdX ; U/ZR V/ZR inv. inv. inv. UL VL
877 fld Tmap1.RightOneOverZ ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
878 fsub Tmap1.dOneOverZdX ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
880 fdivr Tmap1.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
882 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
883 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
885 ; calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
887 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
888 fxch st(1) ; VR UR inv. inv. inv. dU VL
889 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
890 fxch st(6) ; dV UR inv. inv. inv. dU VR
892 fidiv Tmap1.WidthModLength ; dv UR inv. inv. inv. dU VR
893 fmul Tmap1.FixedScale ; dv16 UR inv. inv. inv. dU VR
894 fistp Tmap1.DeltaV ; UR inv. inv. inv. dU VR
896 fxch st(4) ; dU inv. inv. inv. UR VR
897 fidiv Tmap1.WidthModLength ; du inv. inv. inv. UR VR
898 fmul Tmap1.FixedScale ; du16 inv. inv. inv. UR VR
899 fistp Tmap1.DeltaU ; inv. inv. inv. UR VR
901 ; @todo gross! these are to line up with the other loop
902 fld st(1) ; inv. inv. inv. inv. UR VR
903 fld st(2) ; inv. inv. inv. inv. inv. UR VR
909 mov eax, Tmap1.DeltaV // get v 16.16 step
910 mov ebx, eax // copy it
911 sar eax, 16 // get v int step
912 shl ebx, 16 // get v frac step
913 mov Tmap1.DeltaVFrac, ebx // store it
914 imul eax, Tmap1.src_offset // calc texture step for v int step
916 mov ebx, Tmap1.DeltaU // get u 16.16 step
917 mov ecx, ebx // copy it
918 sar ebx, 16 // get the u int step
919 shl ecx, 16 // get the u frac step
920 mov Tmap1.DeltaUFrac, ecx // store it
921 add eax, ebx // calc uint + vint step
922 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
923 add eax, Tmap1.src_offset // calc whole step + v carry
924 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
931 ; check coordinate ranges
932 mov eax, Tmap1.UFixed
933 cmp eax, Tmap1.MinUFixed
935 mov eax, Tmap1.MinUFixed
936 mov Tmap1.UFixed, eax
939 cmp eax, Tmap1.MaxUFixed
941 mov eax, Tmap1.MaxUFixed
942 mov Tmap1.UFixed, eax
944 mov eax, Tmap1.VFixed
945 cmp eax, Tmap1.MinVFixed
947 mov eax, Tmap1.MinVFixed
948 mov Tmap1.VFixed, eax
951 cmp eax, Tmap1.MaxVFixed
953 mov eax, Tmap1.MaxVFixed
954 mov Tmap1.VFixed, eax
961 ; setup initial coordinates
962 mov esi, Tmap1.UFixed // get u 16.16
963 mov ebx, esi // copy it
964 sar esi, 16 // get integer part
965 shl ebx, 16 // get fractional part
967 mov ecx, Tmap1.VFixed // get v 16.16
968 mov edx, ecx // copy it
969 sar edx, 16 // get integer part
970 shl ecx, 16 // get fractional part
971 imul edx, Tmap1.src_offset // calc texture scanline address
972 add esi, edx // calc texture offset
973 add esi, Tmap1.pixptr // calc address
975 ; set edi = address of first pixel to modify
976 ; mov edi, Tmap1.dest_row_data
985 mov edx, Tmap1.DeltaUFrac
987 cmp Tmap1.WidthModLength, 1
992 mov ebx, Tmap1.fx_l_right
999 // slow but maybe better
1002 mov ebx, Tmap1.WidthModLength
1007 mov eax, Tmap1.fx_dl_dx
1017 inc Tmap1.WidthModLength
1018 mov eax,Tmap1.WidthModLength
1022 mov Tmap1.WidthModLength, eax
1026 mov al,[edi] // preread the destination cache line
1029 mov al,[esi] // get texture pixel 0
1031 mov al, gr_fade_table[eax]
1033 add ecx,Tmap1.DeltaVFrac // increment v fraction
1034 sbb ebp,ebp // get -1 if carry
1035 add ebx,edx // increment u fraction
1036 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
1037 mov [edi+0],al // store pixel 0
1039 add ecx,Tmap1.DeltaVFrac // increment v fraction
1040 sbb ebp,ebp // get -1 if carry
1041 add ebx,edx // increment u fraction
1042 mov al,[esi] // get texture pixel 1
1044 mov al, gr_fade_table[eax]
1046 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
1047 mov [edi+1],al // store pixel 1
1050 dec Tmap1.WidthModLength
1058 mov al,[esi] // get texture pixel 2
1060 mov al, gr_fade_table[eax]
1061 mov [edi],al // store pixel 2
1076 OldWay: // This is 6% slower than above
1078 mov ebx,Tmap1.UFixed ; get starting coordinates
1079 mov ecx,Tmap1.VFixed ; for span
1081 ; leftover pixels loop
1082 ; edi = dest dib bits
1083 ; esi = texture dib bits
1089 mov eax,ecx ; copy v
1091 imul eax,Tmap1.src_offset ; scan offset
1092 mov edx,ebx ; copy u
1094 add eax,edx ; texture offset
1095 mov al,[esi+eax] ; get source pixel
1097 mov [edi],al ; store it
1099 add ebx,Tmap1.DeltaU ; increment u coordinate
1100 add ecx,Tmap1.DeltaV ; increment v coordinate
1102 dec Tmap1.WidthModLength ; decrement loop count
1103 jl FPUReturn ; finish up
1107 mov eax,ecx ; copy v
1109 imul eax,Tmap1.src_offset ; scan offset
1110 mov edx,ebx ; copy u
1112 add eax,edx ; texture offset
1113 mov al,[esi+eax] ; get source pixel
1114 mov [edi],al ; store it
1116 add ebx,Tmap1.DeltaU ; increment u coordinate
1117 add ecx,Tmap1.DeltaV ; increment v coordinate
1119 dec Tmap1.WidthModLength ; decrement loop count
1120 jge LeftoverLoop ; finish up
1125 ; busy FPU registers: ; st0 st1 st2 st3 st4 st5 st6 st7
1126 ; xxx xxx xxx xxx xxx xxx xxx
1135 fldcw Tmap1.OldFPUCW // restore the FPU
1150 void tmapscan_lln8_old( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
1152 _fx_srcptr = (uint)tmap_bitmap->data;
1153 _fx_destptr = (uint)GR_SCREEN_PTR(ubyte,lx,y);
1154 _loop_count = rx - lx;
1155 _fx_u = fl2f(p->u*64.0f);
1156 _fx_v = fl2f(p->v*64.0f);
1157 _fx_l = fl2f(p->l*32.0+1.0);
1158 _fx_du = fl2f(dp->u*64.0f);
1159 _fx_dv = fl2f(dp->v*64.0f);
1160 _fx_dl = fl2f(dp->l*32.0);
1161 light_table = (uint)&gr_fade_table[0];
1172 ; set edi = address of first pixel to modify
1173 mov edi, _fx_destptr
1180 mov dx, ax ; EDX=U:V in 6.10 format
1186 mov si, ax ; ESI=DU:DV in 6.10 format
1195 mov eax, _loop_count
1197 mov _loop_count, eax
1202 mov num_big_steps, eax
1215 mov al, gr_fade_table[eax]
1227 mov al, gr_fade_table[eax]
1239 mov al, gr_fade_table[eax]
1251 mov al, gr_fade_table[eax]
1263 mov al, gr_fade_table[eax]
1275 mov al, gr_fade_table[eax]
1287 mov al, gr_fade_table[eax]
1299 mov al, gr_fade_table[eax]
1313 mov _loop_count, eax
1326 mov al, gr_fade_table[eax]
1337 mov al, gr_fade_table[eax]
1355 mov al, gr_fade_table[eax]
1372 void tmapscan_flat16( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
1377 pDestBits = GR_SCREEN_PTR(ushort,lx,y);
1379 for (i=0; i<(rx-lx+1); i++ )
1380 *pDestBits++ = gr_screen.current_color.raw16;
1383 float tmap_max_z = 0.0f;
1385 void tmapscan_lln8_z( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
1388 ubyte *pDestBits, tmp;
1389 float u, dudx, v, dvdx, l, dldx;
1392 pDestBits = GR_SCREEN_PTR(ubyte,lx,y);
1394 ubyte * cdata = (ubyte *)tmap_bitmap->data;
1405 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1406 if ( z < tmap_max_z ) {
1407 tmp = cdata[fl2i(v)*tmap_bitmap->w+fl2i(u)];
1408 *pDestBits = gr_fade_table[ fl2i(l)*256+tmp ];
1419 void tmapscan_generic8( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
1422 ubyte *pDestBits, tmp;
1423 int u, dudx, v, dvdx, w, dwdx, l, dldx;
1425 pDestBits = GR_SCREEN_PTR(ubyte,lx,y);
1427 if ( Tmap1.flags & TMAP_FLAG_TEXTURED ) {
1428 ubyte * cdata = (ubyte *)tmap_bitmap->data;
1429 if ( flags & TMAP_FLAG_RAMP ) {
1430 if ( Tmap1.flags & TMAP_FLAG_CORRECT ) {
1431 float fu, fv, fw, fdu, fdv, fdw;
1433 tmapscan_pln8( lx, rx, y, p, dp, rp,Tmap1.flags );
1440 l = fl2f(p->l*32.0f);
1445 dldx = fl2f(dp->l*32.0f);
1447 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1450 tmp = cdata[v*tmap_bitmap->w+u];
1451 *pDestBits++ = tmp; //gr_fade_table[ (l>>16)*256+tmp ];
1453 //*pDestBits++ = tmp+1;
1462 tmapscan_lln8( lx, rx, y, p, dp, rp, flags );
1464 u = fl2f(p->u*64.0f);
1465 v = fl2f(p->v*64.0f);
1466 l = fl2f(p->l*32.0f);
1467 dudx = fl2f(dp->u*64.0f);
1468 dvdx = fl2f(dp->v*64.0f);
1469 dldx = fl2f(dp->l*32.0f);
1471 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1473 //tmp = cdata[((v>>16)&63)*64+((u>>16)&63)];
1474 //*pDestBits++ = ;//gr_fade_table[ (l>>16)*256+tmp ];
1484 if ( flags & TMAP_FLAG_CORRECT ) {
1485 u = fl2f(p->u*64.0f);
1486 v = fl2f(p->v*64.0f);
1487 w = fl2f(p->sw*16.0f);
1489 dudx = fl2f(dp->u*64.0f);
1490 dvdx = fl2f(dp->v*64.0f);
1491 dwdx = fl2f(dp->sw*16.0f);
1493 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1494 tmp = cdata[((v/w)&63)*64+((u/w)&63)];
1501 u = fl2f(p->u*64.0f);
1502 v = fl2f(p->v*64.0f);
1503 dudx = fl2f(dp->u*64.0f);
1504 dvdx = fl2f(dp->v*64.0f);
1506 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1507 tmp = cdata[((v>>16)&63)*64+((u>>16)&63)];
1515 if ( Tmap1.flags & TMAP_FLAG_RAMP ) {
1516 l = fl2f(p->l*32.0f);
1517 dldx = fl2f(dp->l*32.0f);
1519 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1520 *pDestBits++ = gr_fade_table[ (l>>16)*256+gr_screen.current_color.raw8 ];
1524 memset( pDestBits, gr_screen.current_color.raw8, (rx-lx+1) );
1530 uint fsave_area[64];
1532 unsigned __int64 packrgb( int r, int g, int b )
1534 unsigned __int64 tmp;
1539 tmps = (unsigned int *)&r;
1540 tmp |= *tmps & 0xFFFF;
1543 tmps = (unsigned int *)&g;
1544 tmp |= *tmps & 0xFFFF;
1547 tmps = (unsigned int *)&b;
1548 tmp |= *tmps & 0xFFFF;
1555 void tmapscan_generic( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
1558 uint *pDestBits, tmp, tmp1;
1559 int u, dudx, v, dvdx, w, dwdx;
1560 int r, g, b, dr, dg, db;
1562 if ( !rgbtable_inited )
1565 pDestBits = GR_SCREEN_PTR(uint,lx,y);
1567 if ( Tmap1.flags & TMAP_FLAG_TEXTURED ) {
1568 uint * cdata = (uint *)tmap_bitmap->data;
1570 if ( Tmap1.flags & TMAP_FLAG_GOURAUD ) {
1571 if ( Tmap1.flags & TMAP_FLAG_CORRECT ) {
1572 u = fl2f(p->u*64.0f);
1573 v = fl2f(p->v*64.0f);
1576 r = fl2f(p->r*255.0f);
1577 g = fl2f(p->g*255.0f);
1578 b = fl2f(p->b*255.0f);
1580 dr = fl2f(dp->r*255.0f);
1581 dg = fl2f(dp->g*255.0f);
1582 db = fl2f(dp->b*255.0f);
1584 dudx = fl2f(dp->u*64.0f);
1585 dvdx = fl2f(dp->v*64.0f);
1586 dwdx = fl2f(dp->sw);
1588 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1589 tmp = cdata[((v/w)&63)*64+((u/w)&63)];
1590 tmp1 = rgbtable1[ (tmp & 0xFF)+ (b>>16) ];
1591 tmp1 |= rgbtable2[ ((tmp>>8) & 0xFF)+ (g>>16) ];
1592 tmp1 |= rgbtable3[ ((tmp>>16) & 0xFF)+ (r>>16) ];
1593 *pDestBits++ = tmp1;
1603 __int64 light, deltalight;
1605 u = fl2f(p->u*64.0f);
1606 v = fl2f(p->v*64.0f);
1607 dudx = fl2f(dp->u*64.0f);
1608 dvdx = fl2f(dp->v*64.0f);
1611 r = fl2f(p->r*255.0f)>>8;
1612 g = fl2f(p->g*255.0f)>>8;
1613 b = fl2f(p->b*255.0f)>>8;
1615 dr = fl2f(dp->r*255.0f)>>8;
1616 dg = fl2f(dp->g*255.0f)>>8;
1617 db = fl2f(dp->b*255.0f)>>8;
1623 dr = fl2f(dp->r)>>7;
1624 dg = fl2f(dp->g)>>7;
1625 db = fl2f(dp->b)>>7;
1633 light = packrgb( r, g, b );
1634 deltalight = packrgb( dr, dg, db );
1636 _asm fstenv fsave_area
1637 _asm movq mm3, light
1638 _asm movq mm4, deltalight
1639 _asm pxor mm2, mm2 ; mm0 = 0
1641 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1642 testpixel = cdata[((v>>16)&63)*64+((u>>16)&63)];
1644 _asm punpcklbw mm2, testpixel ; mm0 = 8.8,8.8, 8.8 rgb
1645 _asm pmulhw mm2, mm3 ;
1646 _asm paddsw mm3, mm4 ; light += deltalight
1647 _asm packuswb mm2, mm2 ;mm2 is who cares
1648 _asm movd testpixel, mm2 ; load tmp
1649 _asm pxor mm2, mm2 ; mm0 = 0
1651 *pDestBits++ = testpixel;
1656 _asm frstor fsave_area
1659 if ( Tmap1.flags & TMAP_FLAG_CORRECT ) {
1660 u = fl2f(p->u*64.0f);
1661 v = fl2f(p->v*64.0f);
1663 dudx = fl2f(dp->u*64.0f);
1664 dvdx = fl2f(dp->v*64.0f);
1665 dwdx = fl2f(dp->sw);
1667 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1668 *pDestBits++ = cdata[((v/w)&63)*64+((u/w)&63)];
1674 u = fl2f(p->u*64.0f);
1675 v = fl2f(p->v*64.0f);
1676 dudx = fl2f(dp->u*64.0f);
1677 dvdx = fl2f(dp->v*64.0f);
1679 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1680 *pDestBits++ = cdata[((v>>16)&63)*64+((u>>16)&63)];
1686 } else if ( Tmap1.flags & TMAP_FLAG_GOURAUD ) {
1688 r = fl2f(p->r*255.0f);
1689 g = fl2f(p->g*255.0f);
1690 b = fl2f(p->b*255.0f);
1692 dr = fl2f(dp->r*255.0f);
1693 dg = fl2f(dp->g*255.0f);
1694 db = fl2f(dp->b*255.0f);
1696 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1697 *pDestBits++ = (r&0xFF0000)|((g>>8)&0xFF00)|(b>>16);
1701 //*pDestBits++ = 100;
1704 memset( pDestBits, gr_screen.current_color.raw32, (rx-lx+1)*4 );
1708 void tmapscan_flat( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
1713 pDestBits = GR_SCREEN_PTR(uint,lx,y);
1715 #ifdef USE_INLINE_ASM
1716 _asm mov eax, gr_screen.current_color.raw32
1718 _asm mov edi, pDestBits
1722 for (i=0; i<w; i++ ) {
1723 *pDestBits++ = gr_screen.current_color.raw32;
1728 float zbuffer[640*480];
1730 void zbuffer_clear()
1733 for (i=0; i<640*480; i++ )
1734 zbuffer[i] = 10000.0f;
1737 void tmapscan_flat_z( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
1744 tz = &zbuffer[y*640+lx];
1745 pDestBits = GR_SCREEN_PTR(uint,lx,y);
1749 //#ifdef USE_INLINE_ASM
1751 _asm mov eax, gr_screen.current_color.raw32
1753 _asm mov edi, pDestBits
1758 for (i=0; i<w; i++ ) {
1761 *pDestBits = gr_screen.current_color.raw32;
1775 uint fsave_area1[64];
1777 void tmapscan_pln( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
1779 __int64 light, deltalight;
1780 int r, g, b, dr, dg, db;
1781 _fx_srcptr = (uint)tmap_bitmap->data;
1782 _fx_destptr = (uint)GR_SCREEN_PTR(uint,lx,y);
1783 _loop_count = rx - lx;
1784 _fx_u = fl2f(p->u*64.0f);
1785 _fx_v = fl2f(p->v*64.0f);
1786 _fx_w = fl2f(p->sw*16.0);
1787 _fx_du = fl2f(dp->u*64.0f);
1788 _fx_dv = fl2f(dp->v*64.0f);
1789 _fx_dw = fl2f(dp->sw*16.0);
1791 _fx_u_right = fl2f(rp->u*64.0f);
1792 _fx_v_right = fl2f(rp->v*64.0f);
1793 _fx_w_right = fl2f(rp->sw*16.0);
1799 dr = fl2f(dp->r)>>7;
1800 dg = fl2f(dp->g)>>7;
1801 db = fl2f(dp->b)>>7;
1803 light = ((__int64)r<<32)|((__int64)g<<16)|(__int64)b;
1804 deltalight = ((__int64)dr<<32)|((__int64)dg<<16)|(__int64)db;
1806 _asm fstenv fsave_area1
1807 _asm movq mm3, light
1808 _asm movq mm4, deltalight
1824 ; compute initial v coordinate
1829 idiv ecx ; eax = (v/z)
1832 ; compute initial u coordinate
1837 idiv ecx ; eax = (v/z)
1842 ; find number of subdivisions
1843 mov eax, _loop_count
1848 mov num_left_over, esi
1849 jz DoEndPixels ;there are no 2^NBITS chunks, do divide/pixel for whole scanline
1850 mov _loop_count, eax
1852 ; Set deltas to NPIXS pixel increments
1869 ; Done with ebx, ebp, ecx until next iteration
1881 idiv ecx ; eax = (v/z)
1882 mov ebx, eax ; ebx = U1 until pop's
1889 idiv ecx ; eax = (v/z)
1890 mov ebp, eax ; ebx = V1 until pop's
1892 ; Get last correct U,Vs
1893 mov ecx, U0 ; ecx = U0 until pop's
1894 mov edi, V0 ; edi = V0 until pop's
1896 ; Make ESI = V0:U0 in 6:10,6:10 format
1903 ; Make EDX = DV:DU in 6:10,6:10 format
1909 shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac
1910 mov dx, ax ; put delta u in low word
1912 ; Save the U1 and V1 so we don't have to divide on the next iteration
1916 pop edi ; Restore EDI before using it
1927 movd mm1, [eax*4+ecx]
1928 pxor mm2, mm2 ; mm2 = 0
1929 punpcklbw mm2, mm1 ; mm0 = 8.8,8.8, 8.8 rgb
1931 paddsw mm3, mm4 ; light += deltalight
1932 packuswb mm2, mm2 ;mm2 is who cares
1933 movd [edi], mm2 ; load tmp
1946 test num_left_over, -1
1949 cmp num_left_over, 4
1952 ; If less than 4, then just keep interpolating without
1953 ; calculating a new DU:DV.
1957 ; ----------------------------------------- Start of LeftOver Pixels ------------------------------------------
1961 mov ecx, _fx_w_right
1964 mov eax, _fx_u_right
1968 idiv ecx ; eax = (v/z)
1969 mov ebx, eax ; ebx = U1 until pop's
1972 mov eax, _fx_v_right
1976 idiv ecx ; eax = (v/z)
1977 mov ebp, eax ; ebp = V1 until pop's
1979 mov ecx, U0 ; ecx = U0 until pop's
1980 mov edi, V0 ; edi = V0 until pop's
1982 ; Make EDX = DV:DU in 6:10,6:10 format
1985 mov edx, eax ; These two lines are faster than cdq
1987 idiv num_left_over ; eax = (v1-v0)/num_left_over
1988 shl eax, 16-6 ; go from 16.16 to 6.10, and move into high 16 bits
1989 mov esi, eax ; esi = dvdx<<16
1993 mov edx, eax ; These two lines are faster than cdq
1995 idiv num_left_over ; eax = (u1-u0)/num_left_over
1996 sar eax, 6 ; go from 16.16 to 6.10 (ax=dvdx in 6.10)
1997 mov si, ax ; esi = dvdx:dudx
2000 ; Make ESI = V0:U0 in 6:10,6:10 format
2007 pop edi ; Restore EDI before using it
2018 ; mov eax, [eax*4+ecx]
2020 movd mm1, [eax*4+ecx]
2021 pxor mm2, mm2 ; mm2 = 0
2022 punpcklbw mm2, mm1 ; mm0 = 8.8,8.8, 8.8 rgb
2024 paddsw mm3, mm4 ; light += deltalight
2025 packuswb mm2, mm2 ;mm2 is who cares
2026 movd [edi], mm2 ; load tmp
2044 _asm frstor fsave_area1
2048 void tmapscan_lln( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
2050 __int64 light, deltalight;
2051 int r, g, b, dr, dg, db;
2052 _fx_srcptr = (uint)tmap_bitmap->data;
2053 _fx_destptr = (uint)GR_SCREEN_PTR(uint,lx,y);
2054 _loop_count = rx - lx;
2055 _fx_u = fl2f(p->u*64.0f);
2056 _fx_v = fl2f(p->v*64.0f);
2057 _fx_du = fl2f(dp->u*64.0f);
2058 _fx_dv = fl2f(dp->v*64.0f);
2064 dr = fl2f(dp->r)>>7;
2065 dg = fl2f(dp->g)>>7;
2066 db = fl2f(dp->b)>>7;
2068 light = ((__int64)r<<32)|((__int64)g<<16)|(__int64)b;
2069 deltalight = ((__int64)dr<<32)|((__int64)dg<<16)|(__int64)db;
2071 _asm fstenv fsave_area1
2072 _asm movq mm3, light
2073 _asm movq mm4, deltalight
2088 ; find number of subdivisions
2089 mov eax, _loop_count
2092 mov _loop_count, eax
2094 ; Make ESI = V0:U0 in 6:10,6:10 format
2101 ; Make EDX = DV:DU in 6:10,6:10 format
2107 shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac
2108 mov dx, ax ; put delta u in low word
2111 mov ebx, _loop_count
2119 movd mm1, [eax*4+ecx]
2120 pxor mm2, mm2 ; mm2 = 0
2121 punpcklbw mm2, mm1 ; mm0 = 8.8,8.8, 8.8 rgb
2123 paddsw mm3, mm4 ; light += deltalight
2124 packuswb mm2, mm2 ;mm2 is who cares
2125 movd [edi], mm2 ; load tmp
2142 _asm frstor fsave_area1
2148 void tmapscan_pln8_tiled( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
2150 Tmap1.dest_row_data = GR_SCREEN_PTR(ubyte,lx,y);
2151 Tmap1.loop_count = rx - lx;
2152 Tmap1.fx_u = fl2f(p->u);
2153 Tmap1.fx_v = fl2f(p->v);
2154 Tmap1.fx_du_dx = fl2f(dp->u);
2155 Tmap1.fx_dv_dx = fl2f(dp->v);
2157 Tmap1.fx_l = fl2f(p->l*32.0);
2158 Tmap1.fx_dl_dx = fl2f(dp->l*32.0);
2160 Tmap1.fx_u_right = fl2f(rp->u);
2161 Tmap1.fx_v_right = fl2f(rp->v);
2162 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
2163 Tmap1.bp = tmap_bitmap;
2164 Tmap1.src_offset = tmap_bitmap->w;
2167 Tmap1.FixedScale = 65536.0f;
2168 Tmap1.FixedScale8 = 2048.0f; //8192.0f; // 2^16 / 8
2172 Tmap1.UOverZ = p->u;
2173 Tmap1.VOverZ = p->v;
2174 Tmap1.OneOverZ = p->sw;
2176 Tmap1.dUOverZdX8 = dp->u*32.0f;
2177 Tmap1.dVOverZdX8 = dp->v*32.0f;
2178 Tmap1.dOneOverZdX8 = dp->sw*32.0f;
2180 Tmap1.dUOverZdX = dp->u;
2181 Tmap1.dVOverZdX = dp->v;
2182 Tmap1.dOneOverZdX = dp->sw;
2184 Tmap1.RightUOverZ = rp->u;
2185 Tmap1.RightVOverZ = rp->v;
2186 Tmap1.RightOneOverZ = rp->sw;
2188 Tmap1.BitmapWidth = Tmap1.bp->w;
2189 Tmap1.BitmapHeight = Tmap1.bp->h;
2191 if (Tmap1.BitmapWidth!=64) return;
2192 if (Tmap1.BitmapHeight!=64) return;
2196 if ( Tmap1.fx_dl_dx < 0 ) {
2197 Tmap1.fx_dl_dx = -Tmap1.fx_dl_dx;
2198 Tmap1.fx_l = (67*F1_0)-Tmap1.fx_l;
2199 Tmap1.fx_l_right = (67*F1_0)-Tmap1.fx_l_right;
2214 // put the FPU in 32 bit mode
2215 // @todo move this out of here!
2217 fstcw Tmap1.OldFPUCW // store copy of CW
2218 mov ax,Tmap1.OldFPUCW // get it in ax
2219 //hh and eax,NOT 1100000000y // 24 bit precision
2221 mov Tmap1.FPUCW,ax // store it
2222 fldcw Tmap1.FPUCW // load the FPU
2224 mov ecx, Tmap1.loop_count // ecx = width
2226 mov edi, Tmap1.dest_row_data // edi = dest pointer
2228 // edi = pointer to start pixel in dest dib
2231 mov eax,ecx // eax and ecx = width
2232 shr ecx,5 // ecx = width / subdivision length
2233 and eax,31 // eax = width mod subdivision length
2234 jnz some_left_over // any leftover?
2236 dec ecx // no, so special case last span
2237 mov eax,32 // it's 8 pixels long
2239 mov Tmap1.Subdivisions,ecx // store widths
2240 mov Tmap1.WidthModLength,eax
2242 // mov ebx,pLeft ; get left edge pointer
2243 // mov edx,pGradients ; get gradients pointer
2245 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
2246 // st0 st1 st2 st3 st4 st5 st6 st7
2247 fld Tmap1.VOverZ // V/ZL
2248 fld Tmap1.UOverZ // U/ZL V/ZL
2249 fld Tmap1.OneOverZ // 1/ZL U/ZL V/ZL
2250 fld1 // 1 1/ZL U/ZL V/ZL
2251 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
2252 fld st // ZL ZL 1/ZL U/ZL V/ZL
2253 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
2254 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
2255 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
2257 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
2258 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
2260 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
2262 fadd Tmap1.dOneOverZdX8 // 1/ZR U/ZL V/ZL UL VL
2263 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
2264 fadd Tmap1.dUOverZdX8 // U/ZR 1/ZR V/ZL UL VL
2265 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
2266 fadd Tmap1.dVOverZdX8 // V/ZR 1/ZR U/ZR UL VL
2268 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
2270 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
2271 // @todo overlap this guy
2272 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
2273 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
2274 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
2275 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
2276 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
2278 cmp ecx,0 // check for any full spans
2279 jle HandleLeftoverPixels
2283 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
2284 // UR VR V/ZR 1/ZR U/ZR UL VL
2286 // convert left side coords
2288 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
2289 fmul Tmap1.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
2290 fistp Tmap1.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
2292 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
2293 fmul Tmap1.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
2294 fistp Tmap1.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
2296 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
2298 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
2299 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
2300 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
2301 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
2303 fmul Tmap1.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
2304 fistp Tmap1.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
2306 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
2307 fmul Tmap1.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
2308 fistp Tmap1.DeltaU ; V/ZR 1/ZR U/ZR UR VR
2310 // increment terms for next span ; st0 st1 st2 st3 st4 st5 st6 st7
2311 // Right terms become Left terms---->; V/ZL 1/ZL U/ZL UL VL
2313 fadd Tmap1.dVOverZdX8 ; V/ZR 1/ZL U/ZL UL VL
2314 fxch st(1) ; 1/ZL V/ZR U/ZL UL VL
2315 fadd Tmap1.dOneOverZdX8 ; 1/ZR V/ZR U/ZL UL VL
2316 fxch st(2) ; U/ZL V/ZR 1/ZR UL VL
2317 fadd Tmap1.dUOverZdX8 ; U/ZR V/ZR 1/ZR UL VL
2318 fxch st(2) ; 1/ZR V/ZR U/ZR UL VL
2319 fxch st(1) ; V/ZR 1/ZR U/ZR UL VL
2321 ; calculate right side coords ; st0 st1 st2 st3 st4 st5 st6 st7
2323 fld1 ; 1 V/ZR 1/ZR U/ZR UL VL
2324 fdiv st,st(2) ; ZR V/ZR 1/ZR U/ZR UL VL
2327 ; ************** Can't Access Stack Frame ******************
2328 ; ************** Can't Access Stack Frame ******************
2329 ; ************** Can't Access Stack Frame ******************
2331 // 8 pixel span code
2332 // edi = dest dib bits at current pixel
2333 // esi = texture pointer at current u,v
2335 // ebx = u fraction 0.32
2336 // ecx = v fraction 0.32
2337 // edx = u frac step
2338 // ebp = v carry scratch
2343 // ecx = source pixels
2344 // edx = u v in 6.10 6.10
2345 // esi = du dv in 6.10 6.10
2346 // edi = dest pixels
2347 // ebp = dldx in 24.8
2354 mov ebp, Tmap1.fx_dl_dx
2363 mov ecx, Tmap1.pixptr // ecx = source pixels
2365 ; Make ESI = DV:DU in 6:10,6:10 format
2366 mov eax, Tmap1.DeltaU
2368 mov esi, Tmap1.DeltaV
2372 ; Make EDX = DV:DU in 6:10,6:10 format
2374 mov eax, Tmap1.UFixed
2376 mov edx, Tmap1.VFixed
2391 mov al, gr_fade_table[eax]
2403 mov al, gr_fade_table[eax]
2415 mov al, gr_fade_table[eax]
2427 mov al, gr_fade_table[eax]
2439 mov al, gr_fade_table[eax]
2451 mov al, gr_fade_table[eax]
2463 mov al, gr_fade_table[eax]
2475 mov al, gr_fade_table[eax]
2487 mov al, gr_fade_table[eax]
2499 mov al, gr_fade_table[eax]
2511 mov al, gr_fade_table[eax]
2523 mov al, gr_fade_table[eax]
2535 mov al, gr_fade_table[eax]
2547 mov al, gr_fade_table[eax]
2559 mov al, gr_fade_table[eax]
2571 mov al, gr_fade_table[eax]
2583 mov al, gr_fade_table[eax]
2595 mov al, gr_fade_table[eax]
2607 mov al, gr_fade_table[eax]
2619 mov al, gr_fade_table[eax]
2631 mov al, gr_fade_table[eax]
2643 mov al, gr_fade_table[eax]
2655 mov al, gr_fade_table[eax]
2667 mov al, gr_fade_table[eax]
2679 mov al, gr_fade_table[eax]
2691 mov al, gr_fade_table[eax]
2703 mov al, gr_fade_table[eax]
2715 mov al, gr_fade_table[eax]
2727 mov al, gr_fade_table[eax]
2739 mov al, gr_fade_table[eax]
2751 mov al, gr_fade_table[eax]
2763 mov al, gr_fade_table[eax]
2767 ; ************** Okay to Access Stack Frame ****************
2768 ; ************** Okay to Access Stack Frame ****************
2769 ; ************** Okay to Access Stack Frame ****************
2772 ; the fdiv is done, finish right ; st0 st1 st2 st3 st4 st5 st6 st7
2773 ; ZR V/ZR 1/ZR U/ZR UL VL
2775 fld st ; ZR ZR V/ZR 1/ZR U/ZR UL VL
2776 fmul st,st(2) ; VR ZR V/ZR 1/ZR U/ZR UL VL
2777 fxch st(1) ; ZR VR V/ZR 1/ZR U/ZR UL VL
2778 fmul st,st(4) ; UR VR V/ZR 1/ZR U/ZR UL VL
2780 add edi,32 ; increment to next span
2781 dec Tmap1.Subdivisions ; decrement span count
2782 jnz SpanLoop ; loop back
2784 HandleLeftoverPixels:
2786 mov esi,Tmap1.pixptr ; load texture pointer
2788 ; edi = dest dib bits
2789 ; esi = current texture dib bits
2790 ; at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
2791 ; inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
2793 cmp Tmap1.WidthModLength,0 ; are there remaining pixels to draw?
2794 jz FPUReturn ; nope, pop the FPU and bail
2796 ; convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
2798 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
2799 fmul Tmap1.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
2800 fistp Tmap1.UFixed ; inv. inv. inv. inv. inv. UL VL
2802 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
2803 fmul Tmap1.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
2804 fistp Tmap1.VFixed ; inv. inv. inv. inv. inv. UL VL
2806 dec Tmap1.WidthModLength ; calc how many steps to take
2807 jz OnePixelSpan ; just one, don't do deltas
2809 ; calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
2812 ; @todo rearrange things so we don't need these two instructions
2813 fstp Tmap1.FloatTemp ; inv. inv. inv. inv. UL VL
2814 fstp Tmap1.FloatTemp ; inv. inv. inv. UL VL
2816 fld Tmap1.RightVOverZ ; V/Zr inv. inv. inv. UL VL
2817 fsub Tmap1.dVOverZdX ; V/ZR inv. inv. inv. UL VL
2818 fld Tmap1.RightUOverZ ; U/Zr V/ZR inv. inv. inv. UL VL
2819 fsub Tmap1.dUOverZdX ; U/ZR V/ZR inv. inv. inv. UL VL
2820 fld Tmap1.RightOneOverZ ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
2821 fsub Tmap1.dOneOverZdX ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
2823 fdivr Tmap1.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
2825 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
2826 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
2828 ; calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
2830 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
2831 fxch st(1) ; VR UR inv. inv. inv. dU VL
2832 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
2833 fxch st(6) ; dV UR inv. inv. inv. dU VR
2835 fidiv Tmap1.WidthModLength ; dv UR inv. inv. inv. dU VR
2836 fmul Tmap1.FixedScale ; dv16 UR inv. inv. inv. dU VR
2837 fistp Tmap1.DeltaV ; UR inv. inv. inv. dU VR
2839 fxch st(4) ; dU inv. inv. inv. UR VR
2840 fidiv Tmap1.WidthModLength ; du inv. inv. inv. UR VR
2841 fmul Tmap1.FixedScale ; du16 inv. inv. inv. UR VR
2842 fistp Tmap1.DeltaU ; inv. inv. inv. UR VR
2844 ; @todo gross! these are to line up with the other loop
2845 fld st(1) ; inv. inv. inv. inv. UR VR
2846 fld st(2) ; inv. inv. inv. inv. inv. UR VR
2854 mov ebp, Tmap1.fx_dl_dx
2864 ; Make ESI = DV:DU in 6:10,6:10 format
2865 mov eax, Tmap1.DeltaU
2867 mov esi, Tmap1.DeltaV
2871 ; Make EDX = DV:DU in 6:10,6:10 format
2873 mov eax, Tmap1.UFixed
2875 mov edx, Tmap1.VFixed
2879 mov ecx, Tmap1.pixptr // ecx = source pixels
2881 inc Tmap1.WidthModLength
2882 mov eax,Tmap1.WidthModLength
2886 mov Tmap1.WidthModLength, eax
2902 mov al, gr_fade_table[eax]
2914 mov al, gr_fade_table[eax]
2919 dec Tmap1.WidthModLength
2937 mov al, gr_fade_table[eax]
2942 ; busy FPU registers: ; st0 st1 st2 st3 st4 st5 st6 st7
2943 ; xxx xxx xxx xxx xxx xxx xxx
2954 fldcw Tmap1.OldFPUCW // restore the FPU
2968 void c_tmap_scanline_flat()
2970 switch( gr_screen.bits_per_pixel ) {
2973 memset( Tmap1.dest_row_data, gr_screen.current_color.raw8, Tmap1.loop_count );
2978 dest = Tmap1.dest_row_data;
2980 for (x=Tmap1.loop_count; x >= 0; x-- ) {
2982 *dest++ = Tmap1.tmap_flat_color;
2988 _asm mov ecx, Tmap1.loop_count
2989 _asm mov ax, gr_screen.current_color.raw16;
2990 _asm mov edi, Tmap1.dest_row_data16
2995 _asm mov ecx, Tmap1.loop_count
2996 _asm mov ax, gr_screen.current_color.raw16;
2997 _asm mov edi, Tmap1.dest_row_data16
3002 _asm mov ecx, Tmap1.loop_count
3003 _asm mov eax, gr_screen.current_color.raw32;
3004 _asm mov edi, Tmap1.dest_row_data32
3012 void c_tmap_scanline_shaded()
3018 dest = Tmap1.dest_row_data;
3020 fade = Tmap1.tmap_flat_shade_value<<8;
3021 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3022 *dest++ = gr_fade_table[ fade |(*dest)];
3026 void c_tmap_scanline_lin_nolight()
3035 dudx = Tmap1.fx_du_dx;
3036 dvdx = Tmap1.fx_dv_dx*64;
3038 dest = Tmap1.dest_row_data;
3040 if (!Tmap1.Transparency_on) {
3041 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3042 *dest++ = (uint)Tmap1.pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
3047 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3048 c = (uint)Tmap1.pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
3059 void c_tmap_scanline_lin()
3066 void c_tmap_scanline_per_nolight()
3071 fix u,v,z,dudx, dvdx, dzdx;
3076 dudx = Tmap1.fx_du_dx;
3077 dvdx = Tmap1.fx_dv_dx*64;
3078 dzdx = Tmap1.fx_dz_dx;
3080 dest = Tmap1.dest_row_data;
3082 if (!Tmap1.Transparency_on) {
3083 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3084 *dest++ = (uint)Tmap1.pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
3090 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3091 c = (uint)Tmap1.pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
3102 void c_tmap_scanline_per1()
3107 fix u,v,z,l,dudx, dvdx, dzdx, dldx;
3112 dudx = Tmap1.fx_du_dx;
3113 dvdx = Tmap1.fx_dv_dx*64;
3114 dzdx = Tmap1.fx_dz_dx;
3117 dldx = Tmap1.fx_dl_dx;
3118 dest = Tmap1.dest_row_data;
3120 if (!Tmap1.Transparency_on) {
3121 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3122 *dest++ = gr_fade_table[ (l&(0xff00)) + (uint)Tmap1.pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
3129 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3130 c = (uint)Tmap1.pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
3132 *dest = gr_fade_table[ (l&(0xff00)) + c ];
3144 void c_tmap_scanline_editor()
3149 fix u,v,z,dudx, dvdx, dzdx;
3154 dudx = Tmap1.fx_du_dx;
3155 dvdx = Tmap1.fx_dv_dx*64;
3156 dzdx = Tmap1.fx_dz_dx;
3158 dest = Tmap1.dest_row_data;
3160 if (!Tmap1.Transparency_on) {
3161 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3163 //(uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
3169 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3170 c = (uint)Tmap1.pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
3181 void asm_tmap_scanline_lln_tiled()
3183 if ( Tmap1.BitmapWidth != 64 ) return;
3184 if ( Tmap1.BitmapHeight != 64 ) return;
3195 ; set edi = address of first pixel to modify
3196 mov edi, Tmap1.dest_row_data
3202 mov dx, ax ; EDX=U:V in 6.10 format
3204 mov eax, Tmap1.fx_dv_dx
3206 mov esi, Tmap1.fx_du_dx
3208 mov si, ax ; ESI=DU:DV in 6.10 format
3212 mov ebp, Tmap1.fx_dl_dx
3215 mov ecx, Tmap1.pixptr
3217 mov eax, Tmap1.loop_count
3219 mov Tmap1.loop_count, eax
3224 mov Tmap1.num_big_steps, eax
3225 and Tmap1.loop_count, 7
3237 mov al, gr_fade_table[eax]
3249 mov al, gr_fade_table[eax]
3261 mov al, gr_fade_table[eax]
3273 mov al, gr_fade_table[eax]
3285 mov al, gr_fade_table[eax]
3297 mov al, gr_fade_table[eax]
3309 mov al, gr_fade_table[eax]
3321 mov al, gr_fade_table[eax]
3325 dec Tmap1.num_big_steps
3330 mov eax,Tmap1.loop_count
3335 mov Tmap1.loop_count, eax
3348 mov al, gr_fade_table[eax]
3359 mov al, gr_fade_table[eax]
3364 dec Tmap1.loop_count
3377 mov al, gr_fade_table[eax]
3392 void asm_tmap_scanline_lln32();
3394 void asm_tmap_scanline_lln()
3399 if ( Tmap1.tmap_flags & TMAP_FLAG_TILED ) {
3400 asm_tmap_scanline_lln_tiled();
3404 end = f2i(Tmap1.fx_u);
3405 if ( end >= Tmap1.bp->w ) return;
3407 end = f2i(Tmap1.fx_v);
3408 if ( end >= Tmap1.bp->h ) return;
3410 end = f2i(Tmap1.fx_u_right);
3411 if ( end >= Tmap1.bp->w ) return;
3413 end = f2i(Tmap1.fx_v_right);
3414 if ( end >= Tmap1.bp->h ) return;
3416 if ( Tmap1.fx_dl_dx < 0 ) {
3417 Tmap1.fx_dl_dx = -Tmap1.fx_dl_dx;
3418 Tmap1.fx_l = (67*F1_0)-Tmap1.fx_l;
3419 Tmap1.fx_l_right = (67*F1_0)-Tmap1.fx_l_right;
3421 // Assert( Tmap1.fx_l > 31*F1_0 );
3422 // Assert( Tmap1.fx_l < 66*F1_0 );
3423 // Assert( Tmap1.fx_dl_dx >= 0 );
3424 // Assert( Tmap1.fx_dl_dx < 31*F1_0 );
3437 ; setup delta values
3438 mov eax, Tmap1.fx_dv_dx // get v 16.16 step
3439 mov ebx, eax // copy it
3440 sar eax, 16 // get v int step
3441 shl ebx, 16 // get v frac step
3442 mov Tmap1.DeltaVFrac, ebx // store it
3443 imul eax, Tmap1.src_offset // calc texture step for v int step
3445 mov ebx, Tmap1.fx_du_dx // get u 16.16 step
3446 mov ecx, ebx // copy it
3447 sar ebx, 16 // get the u int step
3448 shl ecx, 16 // get the u frac step
3449 mov Tmap1.DeltaUFrac, ecx // store it
3450 add eax, ebx // calc uint + vint step
3451 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
3452 add eax, Tmap1.src_offset // calc whole step + v carry
3453 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
3455 ; setup initial coordinates
3456 mov esi, Tmap1.fx_u // get u 16.16
3457 mov ebx, esi // copy it
3458 sar esi, 16 // get integer part
3459 shl ebx, 16 // get fractional part
3461 mov ecx, Tmap1.fx_v // get v 16.16
3462 mov edx, ecx // copy it
3463 sar edx, 16 // get integer part
3464 shl ecx, 16 // get fractional part
3465 imul edx, Tmap1.src_offset // calc texture scanline address
3466 add esi, edx // calc texture offset
3467 add esi, Tmap1.pixptr // calc address
3469 ; set edi = address of first pixel to modify
3470 mov edi, Tmap1.dest_row_data
3472 mov edx, Tmap1.DeltaUFrac
3474 mov eax, Tmap1.loop_count
3476 mov Tmap1.loop_count, eax
3481 mov Tmap1.num_big_steps, eax
3482 and Tmap1.loop_count, 7
3491 mov ebp, Tmap1.fx_dl_dx
3503 // 8 pixel span code
3504 // edi = dest dib bits at current pixel
3505 // esi = texture pointer at current u,v
3507 // ebx = u fraction 0.32
3508 // ecx = v fraction 0.32
3509 // edx = u frac step
3510 // ebp = v carry scratch
3512 mov al,[edi] // preread the destination cache line
3514 mov al,[esi] // get texture pixel 0
3516 mov al, gr_fade_table[eax]
3518 add ecx,Tmap1.DeltaVFrac // increment v fraction
3519 sbb ebp,ebp // get -1 if carry
3520 add ebx,edx // increment u fraction
3522 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3523 add ecx,Tmap1.DeltaVFrac // increment v fraction
3525 sbb ebp,ebp // get -1 if carry
3526 mov [edi+0],al // store pixel 0
3528 add ebx,edx // increment u fraction
3529 mov al,[esi] // get texture pixel 1
3531 mov al, gr_fade_table[eax]
3533 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3534 add ecx,Tmap1.DeltaVFrac // increment v fraction
3536 sbb ebp,ebp // get -1 if carry
3537 mov [edi+1],al // store pixel 1
3539 add ebx,edx // increment u fraction
3540 mov al,[esi] // get texture pixel 2
3542 mov al, gr_fade_table[eax]
3544 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3545 add ecx,Tmap1.DeltaVFrac // increment v fraction
3547 sbb ebp,ebp // get -1 if carry
3548 mov [edi+2],al // store pixel 2
3550 add ebx,edx // increment u fraction
3551 mov al,[esi] // get texture pixel 3
3553 mov al, gr_fade_table[eax]
3555 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3556 add ecx,Tmap1.DeltaVFrac // increment v fraction
3558 sbb ebp,ebp // get -1 if carry
3559 mov [edi+3],al // store pixel 3
3561 add ebx,edx // increment u fraction
3562 mov al,[esi] // get texture pixel 4
3564 mov al, gr_fade_table[eax]
3566 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3567 add ecx,Tmap1.DeltaVFrac // increment v fraction
3569 sbb ebp,ebp // get -1 if carry
3570 mov [edi+4],al // store pixel 4
3572 add ebx,edx // increment u fraction
3573 mov al,[esi] // get texture pixel 5
3575 mov al, gr_fade_table[eax]
3577 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3578 add ecx,Tmap1.DeltaVFrac // increment v fraction
3580 sbb ebp,ebp // get -1 if carry
3581 mov [edi+5],al // store pixel 5
3583 add ebx,edx // increment u fraction
3584 mov al,[esi] // get texture pixel 6
3586 mov al, gr_fade_table[eax]
3588 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3589 add ecx,Tmap1.DeltaVFrac // increment v fraction
3591 sbb ebp,ebp // get -1 if carry
3592 mov [edi+6],al // store pixel 6
3594 add ebx,edx // increment u fraction
3596 mov al,[esi] // get texture pixel 7
3598 mov al, gr_fade_table[eax]
3600 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3602 mov [edi+7],al // store pixel 7
3608 dec Tmap1.num_big_steps
3614 mov eax,Tmap1.loop_count
3619 mov Tmap1.loop_count, eax
3629 mov ebp, Tmap1.fx_dl_dx
3633 mov al,[edi] // preread the destination cache line
3634 // add ebx,edx // increment u fraction
3638 mov al,[esi] // get texture pixel 0
3640 mov al, gr_fade_table[eax]
3642 add ecx,Tmap1.DeltaVFrac // increment v fraction
3643 sbb ebp,ebp // get -1 if carry
3644 add ebx,edx // increment u fraction
3645 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3646 mov [edi+0],al // store pixel 0
3648 add ecx,Tmap1.DeltaVFrac // increment v fraction
3649 sbb ebp,ebp // get -1 if carry
3650 add ebx,edx // increment u fraction
3651 mov al,[esi] // get texture pixel 1
3653 mov al, gr_fade_table[eax]
3655 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3656 mov [edi+1],al // store pixel 1
3659 dec Tmap1.loop_count
3667 mov al,[esi] // get texture pixel 2
3669 mov al, gr_fade_table[eax]
3670 mov [edi],al // store pixel 2
3684 void asm_tmap_scanline_lln32()
3688 end = f2i(Tmap1.fx_u);
3689 if ( end >= Tmap1.bp->w ) return;
3691 end = f2i(Tmap1.fx_v);
3692 if ( end >= Tmap1.bp->h ) return;
3694 end = f2i(Tmap1.fx_u_right);
3695 if ( end >= Tmap1.bp->w ) return;
3697 end = f2i(Tmap1.fx_v_right);
3698 if ( end >= Tmap1.bp->h ) return;
3709 ; setup delta values
3710 mov eax, Tmap1.fx_dv_dx // get v 16.16 step
3711 mov ebx, eax // copy it
3712 sar eax, 16 // get v int step
3713 shl ebx, 16 // get v frac step
3714 mov Tmap1.DeltaVFrac, ebx // store it
3715 imul eax, Tmap1.src_offset // calc texture step for v int step
3717 mov ebx, Tmap1.fx_du_dx // get u 16.16 step
3718 mov ecx, ebx // copy it
3719 sar ebx, 16 // get the u int step
3720 shl ecx, 16 // get the u frac step
3721 mov Tmap1.DeltaUFrac, ecx // store it
3722 add eax, ebx // calc uint + vint step
3723 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
3724 add eax, Tmap1.src_offset // calc whole step + v carry
3725 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
3727 ; setup initial coordinates
3728 mov esi, Tmap1.fx_u // get u 16.16
3729 mov ebx, esi // copy it
3730 sar esi, 16 // get integer part
3731 shl ebx, 16 // get fractional part
3733 mov ecx, Tmap1.fx_v // get v 16.16
3734 mov edx, ecx // copy it
3735 sar edx, 16 // get integer part
3736 shl ecx, 16 // get fractional part
3737 imul edx, Tmap1.src_offset // calc texture scanline address
3738 add esi, edx // calc texture offset
3739 add esi, Tmap1.pixptr // calc address
3741 ; set edi = address of first pixel to modify
3742 mov edi, Tmap1.dest_row_data32
3744 mov edx, Tmap1.DeltaUFrac
3746 mov eax, Tmap1.fx_l // use bx and dx to do lighting
3748 mov eax, Tmap1.fx_dl_dx // use bx and dx to do lighting
3751 mov eax, Tmap1.loop_count
3753 mov Tmap1.loop_count, eax
3758 mov Tmap1.num_big_steps, eax
3759 and Tmap1.loop_count, 7
3764 // 8 pixel span code
3765 // edi = dest dib bits at current pixel
3766 // esi = texture pointer at current u,v
3768 // ebx = u fraction 0.32
3769 // ecx = v fraction 0.32
3770 // edx = u frac step
3771 // ebp = v carry scratch
3773 mov al,[edi] // preread the destination cache line
3775 mov al,[esi] // get texture pixel 0
3777 mov eax, gr_fade_table32[eax*4]
3779 add ecx,Tmap1.DeltaVFrac // increment v fraction
3780 sbb ebp,ebp // get -1 if carry
3781 add ebx,edx // increment u fraction
3783 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3784 add ecx,Tmap1.DeltaVFrac // increment v fraction
3786 sbb ebp,ebp // get -1 if carry
3787 mov [edi+0],eax // store pixel 0
3789 add ebx,edx // increment u fraction
3790 mov al,[esi] // get texture pixel 1
3792 mov eax, gr_fade_table32[eax*4]
3794 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3795 add ecx,Tmap1.DeltaVFrac // increment v fraction
3797 sbb ebp,ebp // get -1 if carry
3798 mov [edi+4],al // store pixel 1
3800 add ebx,edx // increment u fraction
3801 mov al,[esi] // get texture pixel 2
3803 mov eax, gr_fade_table32[eax*4]
3805 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3806 add ecx,Tmap1.DeltaVFrac // increment v fraction
3808 sbb ebp,ebp // get -1 if carry
3809 mov [edi+8],eax // store pixel 2
3811 add ebx,edx // increment u fraction
3812 mov al,[esi] // get texture pixel 3
3814 mov eax, gr_fade_table32[eax*4]
3816 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3817 add ecx,Tmap1.DeltaVFrac // increment v fraction
3819 sbb ebp,ebp // get -1 if carry
3820 mov [edi+12],eax // store pixel 3
3822 add ebx,edx // increment u fraction
3823 mov al,[esi] // get texture pixel 4
3825 mov eax, gr_fade_table32[eax*4]
3827 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3828 add ecx,Tmap1.DeltaVFrac // increment v fraction
3830 sbb ebp,ebp // get -1 if carry
3831 mov [edi+16],eax // store pixel 4
3833 add ebx,edx // increment u fraction
3834 mov al,[esi] // get texture pixel 5
3836 mov eax, gr_fade_table32[eax*4]
3838 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3839 add ecx,Tmap1.DeltaVFrac // increment v fraction
3841 sbb ebp,ebp // get -1 if carry
3842 mov [edi+20],eax // store pixel 5
3844 add ebx,edx // increment u fraction
3845 mov al,[esi] // get texture pixel 6
3847 mov eax, gr_fade_table32[eax*4]
3849 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3850 add ecx,Tmap1.DeltaVFrac // increment v fraction
3852 sbb ebp,ebp // get -1 if carry
3853 mov [edi+24],eax // store pixel 6
3855 add ebx,edx // increment u fraction
3857 mov al,[esi] // get texture pixel 7
3859 mov eax, gr_fade_table32[eax]
3861 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3863 mov [edi+28],eax // store pixel 7
3869 dec Tmap1.num_big_steps
3875 mov eax,Tmap1.loop_count
3880 mov Tmap1.loop_count, eax
3886 mov al,[edi] // preread the destination cache line
3888 mov al,[esi] // get texture pixel 0
3890 mov eax, gr_fade_table32[eax*4]
3892 add ecx,Tmap1.DeltaVFrac // increment v fraction
3893 sbb ebp,ebp // get -1 if carry
3894 add ebx,edx // increment u fraction
3895 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3896 mov [edi+0],eax // store pixel 0
3898 add ecx,Tmap1.DeltaVFrac // increment v fraction
3899 sbb ebp,ebp // get -1 if carry
3900 add ebx,edx // increment u fraction
3901 mov al,[esi] // get texture pixel 1
3903 mov eax, gr_fade_table32[eax*4]
3905 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3906 mov [edi+1],al // store pixel 1
3909 dec Tmap1.loop_count
3917 mov al,[esi] // get texture pixel 2
3919 mov eax, gr_fade_table32[eax*4]
3920 mov [edi],eax // store pixel 2
3933 void asm_tmap_scanline_lnt()
3937 end = f2i(Tmap1.fx_u);
3938 if ( end >= Tmap1.bp->w ) return;
3940 end = f2i(Tmap1.fx_v);
3941 if ( end >= Tmap1.bp->h ) return;
3943 end = f2i(Tmap1.fx_u_right);
3944 if ( end >= Tmap1.bp->w ) return;
3946 end = f2i(Tmap1.fx_v_right);
3947 if ( end >= Tmap1.bp->h ) return;
3959 ; setup delta values
3960 mov eax, Tmap1.fx_dv_dx // get v 16.16 step
3961 mov ebx, eax // copy it
3962 sar eax, 16 // get v int step
3963 shl ebx, 16 // get v frac step
3964 mov Tmap1.DeltaVFrac, ebx // store it
3965 imul eax, Tmap1.src_offset // calc texture step for v int step
3967 mov ebx, Tmap1.fx_du_dx // get u 16.16 step
3968 mov ecx, ebx // copy it
3969 sar ebx, 16 // get the u int step
3970 shl ecx, 16 // get the u frac step
3971 mov Tmap1.DeltaUFrac, ecx // store it
3972 add eax, ebx // calc uint + vint step
3973 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
3974 add eax, Tmap1.src_offset // calc whole step + v carry
3975 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
3977 ; setup initial coordinates
3978 mov esi, Tmap1.fx_u // get u 16.16
3979 mov ebx, esi // copy it
3980 sar esi, 16 // get integer part
3981 shl ebx, 16 // get fractional part
3983 mov ecx, Tmap1.fx_v // get v 16.16
3984 mov edx, ecx // copy it
3985 sar edx, 16 // get integer part
3986 shl ecx, 16 // get fractional part
3987 imul edx, Tmap1.src_offset // calc texture scanline address
3988 add esi, edx // calc texture offset
3989 add esi, Tmap1.pixptr // calc address
3991 ; set edi = address of first pixel to modify
3992 mov edi, Tmap1.dest_row_data
3994 mov edx, Tmap1.DeltaUFrac
3996 mov eax, Tmap1.loop_count
3998 mov Tmap1.loop_count, eax
4003 mov Tmap1.num_big_steps, eax
4004 and Tmap1.loop_count, 7
4009 // 8 pixel span code
4010 // edi = dest dib bits at current pixel
4011 // esi = texture pointer at current u,v
4013 // ebx = u fraction 0.32
4014 // ecx = v fraction 0.32
4015 // edx = u frac step
4016 // ebp = v carry scratch
4018 mov al,[edi] // preread the destination cache line
4020 mov al,[esi] // get texture pixel 0
4022 add ecx,Tmap1.DeltaVFrac // increment v fraction
4023 sbb ebp,ebp // get -1 if carry
4024 add ebx,edx // increment u fraction
4026 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4027 add ecx,Tmap1.DeltaVFrac // increment v fraction
4029 sbb ebp,ebp // get -1 if carry
4032 mov [edi+0],al // store pixel 0
4035 add ebx,edx // increment u fraction
4036 mov al,[esi] // get texture pixel 1
4038 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4039 add ecx,Tmap1.DeltaVFrac // increment v fraction
4041 sbb ebp,ebp // get -1 if carry
4044 mov [edi+1],al // store pixel 0
4047 add ebx,edx // increment u fraction
4048 mov al,[esi] // get texture pixel 2
4050 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4051 add ecx,Tmap1.DeltaVFrac // increment v fraction
4053 sbb ebp,ebp // get -1 if carry
4056 mov [edi+2],al // store pixel 0
4059 add ebx,edx // increment u fraction
4060 mov al,[esi] // get texture pixel 3
4062 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4063 add ecx,Tmap1.DeltaVFrac // increment v fraction
4065 sbb ebp,ebp // get -1 if carry
4068 mov [edi+3],al // store pixel 0
4071 add ebx,edx // increment u fraction
4072 mov al,[esi] // get texture pixel 4
4074 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4075 add ecx,Tmap1.DeltaVFrac // increment v fraction
4077 sbb ebp,ebp // get -1 if carry
4080 mov [edi+4],al // store pixel 0
4083 add ebx,edx // increment u fraction
4084 mov al,[esi] // get texture pixel 5
4086 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4087 add ecx,Tmap1.DeltaVFrac // increment v fraction
4089 sbb ebp,ebp // get -1 if carry
4092 mov [edi+5],al // store pixel 0
4095 add ebx,edx // increment u fraction
4096 mov al,[esi] // get texture pixel 6
4098 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4099 add ecx,Tmap1.DeltaVFrac // increment v fraction
4101 sbb ebp,ebp // get -1 if carry
4104 mov [edi+6],al // store pixel 0
4107 add ebx,edx // increment u fraction
4109 mov al,[esi] // get texture pixel 7
4111 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4115 mov [edi+7],al // store pixel 0
4122 dec Tmap1.num_big_steps
4128 mov eax,Tmap1.loop_count
4133 mov Tmap1.loop_count, eax
4138 mov al,[edi] // preread the destination cache line
4139 // add ebx,edx // increment u fraction
4143 mov al,[esi] // get texture pixel 0
4145 add ecx,Tmap1.DeltaVFrac // increment v fraction
4146 sbb ebp,ebp // get -1 if carry
4147 add ebx,edx // increment u fraction
4148 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4151 mov [edi+0],al // store pixel 0
4154 add ecx,Tmap1.DeltaVFrac // increment v fraction
4155 sbb ebp,ebp // get -1 if carry
4156 add ebx,edx // increment u fraction
4157 mov al,[esi] // get texture pixel 1
4159 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4162 mov [edi+1],al // store pixel 0
4166 dec Tmap1.loop_count
4174 mov al,[esi] // get texture pixel 2
4177 mov [edi],al // store pixel 0
4192 void asm_tmap_scanline_lnn()
4196 end = f2i(Tmap1.fx_u);
4197 if ( end >= Tmap1.bp->w ) return;
4199 end = f2i(Tmap1.fx_v);
4200 if ( end >= Tmap1.bp->h ) return;
4202 end = f2i(Tmap1.fx_u_right);
4203 if ( end >= Tmap1.bp->w ) return;
4205 end = f2i(Tmap1.fx_v_right);
4206 if ( end >= Tmap1.bp->h ) return;
4218 ; setup delta values
4219 mov eax, Tmap1.fx_dv_dx // get v 16.16 step
4220 mov ebx, eax // copy it
4221 sar eax, 16 // get v int step
4222 shl ebx, 16 // get v frac step
4223 mov Tmap1.DeltaVFrac, ebx // store it
4224 imul eax, Tmap1.src_offset // calc texture step for v int step
4226 mov ebx, Tmap1.fx_du_dx // get u 16.16 step
4227 mov ecx, ebx // copy it
4228 sar ebx, 16 // get the u int step
4229 shl ecx, 16 // get the u frac step
4230 mov Tmap1.DeltaUFrac, ecx // store it
4231 add eax, ebx // calc uint + vint step
4232 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
4233 add eax, Tmap1.src_offset // calc whole step + v carry
4234 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
4236 ; setup initial coordinates
4237 mov esi, Tmap1.fx_u // get u 16.16
4238 mov ebx, esi // copy it
4239 sar esi, 16 // get integer part
4240 shl ebx, 16 // get fractional part
4242 mov ecx, Tmap1.fx_v // get v 16.16
4243 mov edx, ecx // copy it
4244 sar edx, 16 // get integer part
4245 shl ecx, 16 // get fractional part
4246 imul edx, Tmap1.src_offset // calc texture scanline address
4247 add esi, edx // calc texture offset
4248 add esi, Tmap1.pixptr // calc address
4250 ; set edi = address of first pixel to modify
4251 mov edi, Tmap1.dest_row_data
4253 mov edx, Tmap1.DeltaUFrac
4255 mov eax, Tmap1.loop_count
4257 mov Tmap1.loop_count, eax
4262 mov Tmap1.num_big_steps, eax
4263 and Tmap1.loop_count, 7
4268 // 8 pixel span code
4269 // edi = dest dib bits at current pixel
4270 // esi = texture pointer at current u,v
4272 // ebx = u fraction 0.32
4273 // ecx = v fraction 0.32
4274 // edx = u frac step
4275 // ebp = v carry scratch
4277 mov al,[edi] // preread the destination cache line
4279 mov al,[esi] // get texture pixel 0
4281 add ecx,Tmap1.DeltaVFrac // increment v fraction
4282 sbb ebp,ebp // get -1 if carry
4283 add ebx,edx // increment u fraction
4285 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4286 add ecx,Tmap1.DeltaVFrac // increment v fraction
4288 sbb ebp,ebp // get -1 if carry
4289 mov [edi+0],al // store pixel 0
4291 add ebx,edx // increment u fraction
4292 mov al,[esi] // get texture pixel 1
4294 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4295 add ecx,Tmap1.DeltaVFrac // increment v fraction
4297 sbb ebp,ebp // get -1 if carry
4298 mov [edi+1],al // store pixel 0
4300 add ebx,edx // increment u fraction
4301 mov al,[esi] // get texture pixel 2
4303 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4304 add ecx,Tmap1.DeltaVFrac // increment v fraction
4306 sbb ebp,ebp // get -1 if carry
4307 mov [edi+2],al // store pixel 0
4309 add ebx,edx // increment u fraction
4310 mov al,[esi] // get texture pixel 3
4312 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4313 add ecx,Tmap1.DeltaVFrac // increment v fraction
4315 sbb ebp,ebp // get -1 if carry
4316 mov [edi+3],al // store pixel 0
4318 add ebx,edx // increment u fraction
4319 mov al,[esi] // get texture pixel 4
4321 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4322 add ecx,Tmap1.DeltaVFrac // increment v fraction
4324 sbb ebp,ebp // get -1 if carry
4325 mov [edi+4],al // store pixel 0
4327 add ebx,edx // increment u fraction
4328 mov al,[esi] // get texture pixel 5
4330 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4331 add ecx,Tmap1.DeltaVFrac // increment v fraction
4333 sbb ebp,ebp // get -1 if carry
4334 mov [edi+5],al // store pixel 0
4336 add ebx,edx // increment u fraction
4337 mov al,[esi] // get texture pixel 6
4339 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4340 add ecx,Tmap1.DeltaVFrac // increment v fraction
4342 sbb ebp,ebp // get -1 if carry
4343 mov [edi+6],al // store pixel 0
4345 add ebx,edx // increment u fraction
4347 mov al,[esi] // get texture pixel 7
4349 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4351 mov [edi+7],al // store pixel 0
4357 dec Tmap1.num_big_steps
4363 mov eax,Tmap1.loop_count
4368 mov Tmap1.loop_count, eax
4373 mov al,[edi] // preread the destination cache line
4374 // add ebx,edx // increment u fraction
4378 mov al,[esi] // get texture pixel 0
4380 add ecx,Tmap1.DeltaVFrac // increment v fraction
4381 sbb ebp,ebp // get -1 if carry
4382 add ebx,edx // increment u fraction
4383 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4384 mov [edi+0],al // store pixel 0
4386 add ecx,Tmap1.DeltaVFrac // increment v fraction
4387 sbb ebp,ebp // get -1 if carry
4388 add ebx,edx // increment u fraction
4389 mov al,[esi] // get texture pixel 1
4391 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4392 mov [edi+1],al // store pixel 0
4395 dec Tmap1.loop_count
4403 mov al,[esi] // get texture pixel 2
4404 mov [edi],al // store pixel 0
4417 void tmapscan_pln16( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
4419 Tmap1.dest_row_data = (ubyte *)GR_SCREEN_PTR(ushort,lx,y);
4420 Tmap1.loop_count = rx - lx;
4421 Tmap1.fx_u = fl2f(p->u);
4422 Tmap1.fx_v = fl2f(p->v);
4423 Tmap1.fx_du_dx = fl2f(dp->u);
4424 Tmap1.fx_dv_dx = fl2f(dp->v);
4426 Tmap1.fx_l = fl2f(p->l*32.0);
4427 Tmap1.fx_dl_dx = fl2f(dp->l*32.0);
4429 Tmap1.fx_u_right = fl2f(rp->u);
4430 Tmap1.fx_v_right = fl2f(rp->v);
4431 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
4432 Tmap1.bp = tmap_bitmap;
4433 Tmap1.src_offset = tmap_bitmap->w;
4436 Tmap1.FixedScale = 65536.0f;
4437 Tmap1.FixedScale8 = 2048.0f; //8192.0f; // 2^16 / 8
4441 Tmap1.UOverZ = p->u;
4442 Tmap1.VOverZ = p->v;
4443 Tmap1.OneOverZ = p->sw;
4445 Tmap1.dUOverZdX8 = dp->u*32.0f;
4446 Tmap1.dVOverZdX8 = dp->v*32.0f;
4447 Tmap1.dOneOverZdX8 = dp->sw*32.0f;
4449 Tmap1.dUOverZdX = dp->u;
4450 Tmap1.dVOverZdX = dp->v;
4451 Tmap1.dOneOverZdX = dp->sw;
4453 Tmap1.RightUOverZ = rp->u;
4454 Tmap1.RightVOverZ = rp->v;
4455 Tmap1.RightOneOverZ = rp->sw;
4459 Tmap1.BitmapWidth = Tmap1.bp->w;
4460 Tmap1.BitmapHeight = Tmap1.bp->h;
4463 if ( Tmap1.fx_dl_dx < 0 ) {
4464 Tmap1.fx_dl_dx = -Tmap1.fx_dl_dx;
4465 Tmap1.fx_l = (67*F1_0)-Tmap1.fx_l;
4466 Tmap1.fx_l_right = (67*F1_0)-Tmap1.fx_l_right;
4468 // Assert( Tmap1.fx_l > 31*F1_0 );
4469 // Assert( Tmap1.fx_l < 66*F1_0 );
4470 // Assert( Tmap1.fx_dl_dx >= 0 );
4471 // Assert( Tmap1.fx_dl_dx < 31*F1_0 );
4489 // put the FPU in 32 bit mode
4490 // @todo move this out of here!
4492 fstcw Tmap1.OldFPUCW // store copy of CW
4493 mov ax,Tmap1.OldFPUCW // get it in ax
4494 //hh and eax,NOT 1100000000y // 24 bit precision
4496 mov Tmap1.FPUCW,ax // store it
4497 fldcw Tmap1.FPUCW // load the FPU
4499 mov ecx, Tmap1.loop_count // ecx = width
4501 mov edi, Tmap1.dest_row_data // edi = dest pointer
4503 // edi = pointer to start pixel in dest dib
4506 mov eax,ecx // eax and ecx = width
4507 shr ecx,5 // ecx = width / subdivision length
4508 and eax,31 // eax = width mod subdivision length
4509 jnz some_left_over // any leftover?
4511 dec ecx // no, so special case last span
4512 mov eax,32 // it's 8 pixels long
4514 mov Tmap1.Subdivisions,ecx // store widths
4515 mov Tmap1.WidthModLength,eax
4517 // mov ebx,pLeft ; get left edge pointer
4518 // mov edx,pGradients ; get gradients pointer
4520 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
4521 // st0 st1 st2 st3 st4 st5 st6 st7
4522 fld Tmap1.VOverZ // V/ZL
4523 fld Tmap1.UOverZ // U/ZL V/ZL
4524 fld Tmap1.OneOverZ // 1/ZL U/ZL V/ZL
4525 fld1 // 1 1/ZL U/ZL V/ZL
4526 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
4527 fld st // ZL ZL 1/ZL U/ZL V/ZL
4528 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
4529 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
4530 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
4532 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
4533 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
4535 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
4537 fadd Tmap1.dOneOverZdX8 // 1/ZR U/ZL V/ZL UL VL
4538 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
4539 fadd Tmap1.dUOverZdX8 // U/ZR 1/ZR V/ZL UL VL
4540 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
4541 fadd Tmap1.dVOverZdX8 // V/ZR 1/ZR U/ZR UL VL
4543 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
4545 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
4546 // @todo overlap this guy
4547 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
4548 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
4549 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
4550 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
4551 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
4553 cmp ecx,0 // check for any full spans
4554 jle HandleLeftoverPixels
4558 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
4559 // UR VR V/ZR 1/ZR U/ZR UL VL
4561 // convert left side coords
4563 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
4564 fmul Tmap1.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
4565 fistp Tmap1.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
4567 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
4568 fmul Tmap1.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
4569 fistp Tmap1.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
4571 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
4573 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
4574 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
4575 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
4576 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
4578 fmul Tmap1.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
4579 fistp Tmap1.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
4581 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
4582 fmul Tmap1.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
4583 fistp Tmap1.DeltaU ; V/ZR 1/ZR U/ZR UR VR
4585 // increment terms for next span ; st0 st1 st2 st3 st4 st5 st6 st7
4586 // Right terms become Left terms---->; V/ZL 1/ZL U/ZL UL VL
4588 fadd Tmap1.dVOverZdX8 ; V/ZR 1/ZL U/ZL UL VL
4589 fxch st(1) ; 1/ZL V/ZR U/ZL UL VL
4590 fadd Tmap1.dOneOverZdX8 ; 1/ZR V/ZR U/ZL UL VL
4591 fxch st(2) ; U/ZL V/ZR 1/ZR UL VL
4592 fadd Tmap1.dUOverZdX8 ; U/ZR V/ZR 1/ZR UL VL
4593 fxch st(2) ; 1/ZR V/ZR U/ZR UL VL
4594 fxch st(1) ; V/ZR 1/ZR U/ZR UL VL
4596 ; calculate right side coords ; st0 st1 st2 st3 st4 st5 st6 st7
4598 fld1 ; 1 V/ZR 1/ZR U/ZR UL VL
4599 fdiv st,st(2) ; ZR V/ZR 1/ZR U/ZR UL VL
4602 ; set up affine registers
4604 ; setup delta values
4606 mov eax,Tmap1.DeltaV ; get v 16.16 step
4607 mov ebx,eax ; copy it
4608 sar eax,16 ; get v int step
4609 shl ebx,16 ; get v frac step
4610 mov Tmap1.DeltaVFrac,ebx ; store it
4611 imul eax,Tmap1.src_offset ; calculate texture step for v int step
4613 mov ebx,Tmap1.DeltaU ; get u 16.16 step
4614 mov ecx,ebx ; copy it
4615 sar ebx,16 ; get u int step
4616 shl ecx,16 ; get u frac step
4617 mov Tmap1.DeltaUFrac,ecx ; store it
4618 add eax,ebx ; calculate uint + vint step
4619 mov Tmap1.UVintVfracStepVNoCarry,eax; save whole step in non-v-carry slot
4620 add eax,Tmap1.src_offset ; calculate whole step + v carry
4621 mov Tmap1.UVintVfracStepVCarry,eax ; save in v-carry slot
4625 ; check coordinate ranges
4626 mov eax, Tmap1.UFixed
4627 cmp eax, Tmap1.MinUFixed
4629 mov eax, Tmap1.MinUFixed
4630 mov Tmap1.UFixed, eax
4633 cmp eax, Tmap1.MaxUFixed
4635 mov eax, Tmap1.MaxUFixed
4636 mov Tmap1.UFixed, eax
4638 mov eax, Tmap1.VFixed
4639 cmp eax, Tmap1.MinVFixed
4641 mov eax, Tmap1.MinVFixed
4642 mov Tmap1.VFixed, eax
4645 cmp eax, Tmap1.MaxVFixed
4647 mov eax, Tmap1.MaxVFixed
4648 mov Tmap1.VFixed, eax
4652 ; setup initial coordinates
4653 mov esi,Tmap1.UFixed ; get u 16.16 fixedpoint coordinate
4655 mov ebx,esi ; copy it
4656 sar esi,16 ; get integer part
4657 shl ebx,16 ; get fractional part
4659 mov ecx,Tmap1.VFixed ; get v 16.16 fixedpoint coordinate
4661 mov edx,ecx ; copy it
4662 sar edx,16 ; get integer part
4663 shl ecx,16 ; get fractional part
4664 imul edx,Tmap1.src_offset ; calc texture scanline address
4665 add esi,edx ; calc texture offset
4666 add esi,Tmap1.pixptr ; calc address
4668 mov edx,Tmap1.DeltaUFrac ; get register copy
4674 mov ebp, Tmap1.fx_dl_dx
4685 // add Tmap1.fx_l, eax
4688 // mov eax, Tmap1.fx_l // use bx and dx to do lighting
4691 // mov eax, Tmap1.fx_dl_dx // use bx and dx to do lighting
4697 ; ************** Can't Access Stack Frame ******************
4698 ; ************** Can't Access Stack Frame ******************
4699 ; ************** Can't Access Stack Frame ******************
4701 // 8 pixel span code
4702 // edi = dest dib bits at current pixel
4703 // esi = texture pointer at current u,v
4705 // ebx = u fraction 0.32
4706 // ecx = v fraction 0.32
4707 // edx = u frac step
4708 // ebp = v carry scratch
4710 mov al,[edi] // preread the destination cache line
4713 mov al,[esi] // get texture pixel 0
4715 mov ax, gr_fade_table16[eax*2]
4717 add ecx,Tmap1.DeltaVFrac // increment v fraction
4718 sbb ebp,ebp // get -1 if carry
4719 add ebx,edx // increment u fraction
4721 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4722 add ecx,Tmap1.DeltaVFrac // increment v fraction
4724 sbb ebp,ebp // get -1 if carry
4725 // mov al, 0 // Uncomment this line to show divisions
4726 mov [edi+0],ax // store pixel 0
4728 add ebx,edx // increment u fraction
4729 mov al,[esi] // get texture pixel 1
4731 mov ax, gr_fade_table16[eax*2]
4733 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4734 add ecx,Tmap1.DeltaVFrac // increment v fraction
4736 sbb ebp,ebp // get -1 if carry
4737 mov [edi+2],ax // store pixel 1
4739 add ebx,edx // increment u fraction
4740 mov al,[esi] // get texture pixel 2
4742 mov ax, gr_fade_table16[eax*2]
4744 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4745 add ecx,Tmap1.DeltaVFrac // increment v fraction
4747 sbb ebp,ebp // get -1 if carry
4748 mov [edi+4],ax // store pixel 2
4750 add ebx,edx // increment u fraction
4751 mov al,[esi] // get texture pixel 3
4753 mov ax, gr_fade_table16[eax*2]
4755 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4756 add ecx,Tmap1.DeltaVFrac // increment v fraction
4758 sbb ebp,ebp // get -1 if carry
4759 mov [edi+6],ax // store pixel 3
4761 add ebx,edx // increment u fraction
4762 mov al,[esi] // get texture pixel 4
4764 mov ax, gr_fade_table16[eax*2]
4765 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4766 add ecx,Tmap1.DeltaVFrac // increment v fraction
4768 sbb ebp,ebp // get -1 if carry
4769 mov [edi+8],ax // store pixel 3
4771 add ebx,edx // increment u fraction
4772 mov al,[esi] // get texture pixel 4
4774 mov ax, gr_fade_table16[eax*2]
4775 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4776 add ecx,Tmap1.DeltaVFrac // increment v fraction
4778 sbb ebp,ebp // get -1 if carry
4779 mov [edi+10],ax // store pixel 3
4781 add ebx,edx // increment u fraction
4782 mov al,[esi] // get texture pixel 4
4784 mov ax, gr_fade_table16[eax*2]
4785 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4786 add ecx,Tmap1.DeltaVFrac // increment v fraction
4788 sbb ebp,ebp // get -1 if carry
4789 mov [edi+12],ax // store pixel 3
4791 add ebx,edx // increment u fraction
4792 mov al,[esi] // get texture pixel 4
4794 mov ax, gr_fade_table16[eax*2]
4795 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4796 add ecx,Tmap1.DeltaVFrac // increment v fraction
4798 sbb ebp,ebp // get -1 if carry
4799 mov [edi+14],ax // store pixel 3
4801 add ebx,edx // increment u fraction
4802 mov al,[esi] // get texture pixel 4
4804 mov ax, gr_fade_table16[eax*2]
4805 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4806 add ecx,Tmap1.DeltaVFrac // increment v fraction
4808 sbb ebp,ebp // get -1 if carry
4809 mov [edi+16],ax // store pixel 3
4811 add ebx,edx // increment u fraction
4812 mov al,[esi] // get texture pixel 4
4814 mov ax, gr_fade_table16[eax*2]
4815 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4816 add ecx,Tmap1.DeltaVFrac // increment v fraction
4818 sbb ebp,ebp // get -1 if carry
4819 mov [edi+18],ax // store pixel 3
4821 add ebx,edx // increment u fraction
4822 mov al,[esi] // get texture pixel 4
4824 mov ax, gr_fade_table16[eax*2]
4825 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4826 add ecx,Tmap1.DeltaVFrac // increment v fraction
4828 sbb ebp,ebp // get -1 if carry
4829 mov [edi+20],ax // store pixel 3
4831 add ebx,edx // increment u fraction
4832 mov al,[esi] // get texture pixel 4
4834 mov ax, gr_fade_table16[eax*2]
4837 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4838 add ecx,Tmap1.DeltaVFrac // increment v fraction
4840 sbb ebp,ebp // get -1 if carry
4841 mov [edi+22],ax // store pixel 3
4843 add ebx,edx // increment u fraction
4844 mov al,[esi] // get texture pixel 4
4846 mov ax, gr_fade_table16[eax*2]
4849 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4850 add ecx,Tmap1.DeltaVFrac // increment v fraction
4852 sbb ebp,ebp // get -1 if carry
4853 mov [edi+24],ax // store pixel 3
4855 add ebx,edx // increment u fraction
4856 mov al,[esi] // get texture pixel 4
4858 mov ax, gr_fade_table16[eax*2]
4861 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4862 add ecx,Tmap1.DeltaVFrac // increment v fraction
4864 sbb ebp,ebp // get -1 if carry
4865 mov [edi+26],ax // store pixel 3
4867 add ebx,edx // increment u fraction
4868 mov al,[esi] // get texture pixel 4
4870 mov ax, gr_fade_table16[eax*2]
4873 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4874 add ecx,Tmap1.DeltaVFrac // increment v fraction
4876 sbb ebp,ebp // get -1 if carry
4877 mov [edi+28],ax // store pixel 3
4879 add ebx,edx // increment u fraction
4880 mov al,[esi] // get texture pixel 4
4882 mov ax, gr_fade_table16[eax*2]
4885 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4886 add ecx,Tmap1.DeltaVFrac // increment v fraction
4888 sbb ebp,ebp // get -1 if carry
4889 mov [edi+30],ax // store pixel 3
4891 add ebx,edx // increment u fraction
4892 mov al,[esi] // get texture pixel 4
4894 mov ax, gr_fade_table16[eax*2]
4897 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4898 add ecx,Tmap1.DeltaVFrac // increment v fraction
4900 sbb ebp,ebp // get -1 if carry
4901 mov [edi+32],ax // store pixel 3
4903 add ebx,edx // increment u fraction
4904 mov al,[esi] // get texture pixel 4
4906 mov ax, gr_fade_table16[eax*2]
4909 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4910 add ecx,Tmap1.DeltaVFrac // increment v fraction
4912 sbb ebp,ebp // get -1 if carry
4913 mov [edi+34],ax // store pixel 3
4915 add ebx,edx // increment u fraction
4916 mov al,[esi] // get texture pixel 4
4918 mov ax, gr_fade_table16[eax*2]
4921 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4922 add ecx,Tmap1.DeltaVFrac // increment v fraction
4924 sbb ebp,ebp // get -1 if carry
4925 mov [edi+36],ax // store pixel 3
4927 add ebx,edx // increment u fraction
4928 mov al,[esi] // get texture pixel 4
4930 mov ax, gr_fade_table16[eax*2]
4933 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4934 add ecx,Tmap1.DeltaVFrac // increment v fraction
4936 sbb ebp,ebp // get -1 if carry
4937 mov [edi+38],ax // store pixel 3
4939 add ebx,edx // increment u fraction
4940 mov al,[esi] // get texture pixel 4
4942 mov ax, gr_fade_table16[eax*2]
4945 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4946 add ecx,Tmap1.DeltaVFrac // increment v fraction
4948 sbb ebp,ebp // get -1 if carry
4949 mov [edi+40],ax // store pixel 3
4951 add ebx,edx // increment u fraction
4952 mov al,[esi] // get texture pixel 4
4954 mov ax, gr_fade_table16[eax*2]
4957 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4958 add ecx,Tmap1.DeltaVFrac // increment v fraction
4960 sbb ebp,ebp // get -1 if carry
4961 mov [edi+42],ax // store pixel 3
4963 add ebx,edx // increment u fraction
4964 mov al,[esi] // get texture pixel 4
4966 mov ax, gr_fade_table16[eax*2]
4969 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4970 add ecx,Tmap1.DeltaVFrac // increment v fraction
4972 sbb ebp,ebp // get -1 if carry
4973 mov [edi+44],ax // store pixel 3
4975 add ebx,edx // increment u fraction
4976 mov al,[esi] // get texture pixel 4
4978 mov ax, gr_fade_table16[eax*2]
4981 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4982 add ecx,Tmap1.DeltaVFrac // increment v fraction
4984 sbb ebp,ebp // get -1 if carry
4985 mov [edi+46],ax // store pixel 3
4987 add ebx,edx // increment u fraction
4988 mov al,[esi] // get texture pixel 4
4990 mov ax, gr_fade_table16[eax*2]
4993 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4994 add ecx,Tmap1.DeltaVFrac // increment v fraction
4996 sbb ebp,ebp // get -1 if carry
4997 mov [edi+48],ax // store pixel 3
4999 add ebx,edx // increment u fraction
5000 mov al,[esi] // get texture pixel 4
5002 mov ax, gr_fade_table16[eax*2]
5005 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5006 add ecx,Tmap1.DeltaVFrac // increment v fraction
5008 sbb ebp,ebp // get -1 if carry
5009 mov [edi+50],ax // store pixel 3
5011 add ebx,edx // increment u fraction
5012 mov al,[esi] // get texture pixel 4
5014 mov ax, gr_fade_table16[eax*2]
5017 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5018 add ecx,Tmap1.DeltaVFrac // increment v fraction
5022 sbb ebp,ebp // get -1 if carry
5023 mov [edi+52],ax // store pixel 3
5025 add ebx,edx // increment u fraction
5026 mov al,[esi] // get texture pixel 4
5028 mov ax, gr_fade_table16[eax*2]
5031 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5032 add ecx,Tmap1.DeltaVFrac // increment v fraction
5034 sbb ebp,ebp // get -1 if carry
5035 mov [edi+54],ax // store pixel 3
5037 add ebx,edx // increment u fraction
5038 mov al,[esi] // get texture pixel 4
5040 mov ax, gr_fade_table16[eax*2]
5042 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5043 add ecx,Tmap1.DeltaVFrac // increment v fraction
5045 sbb ebp,ebp // get -1 if carry
5046 mov [edi+56],ax // store pixel 4
5048 add ebx,edx // increment u fraction
5049 mov al,[esi] // get texture pixel 5
5051 mov ax, gr_fade_table16[eax*2]
5053 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5054 add ecx,Tmap1.DeltaVFrac // increment v fraction
5056 sbb ebp,ebp // get -1 if carry
5057 mov [edi+58],ax // store pixel 5
5059 add ebx,edx // increment u fraction
5060 mov al,[esi] // get texture pixel 6
5062 mov ax, gr_fade_table16[eax*2]
5064 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5065 add ecx,Tmap1.DeltaVFrac // increment v fraction
5067 sbb ebp,ebp // get -1 if carry
5068 mov [edi+60],ax // store pixel 6
5070 add ebx,edx // increment u fraction
5072 mov al,[esi] // get texture pixel 7
5074 mov ax, gr_fade_table16[eax*2]
5076 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5078 mov [edi+62],ax // store pixel 7
5082 ; ************** Okay to Access Stack Frame ****************
5083 ; ************** Okay to Access Stack Frame ****************
5084 ; ************** Okay to Access Stack Frame ****************
5087 ; the fdiv is done, finish right ; st0 st1 st2 st3 st4 st5 st6 st7
5088 ; ZR V/ZR 1/ZR U/ZR UL VL
5090 fld st ; ZR ZR V/ZR 1/ZR U/ZR UL VL
5091 fmul st,st(2) ; VR ZR V/ZR 1/ZR U/ZR UL VL
5092 fxch st(1) ; ZR VR V/ZR 1/ZR U/ZR UL VL
5093 fmul st,st(4) ; UR VR V/ZR 1/ZR U/ZR UL VL
5095 add edi,64 ; increment to next span
5096 dec Tmap1.Subdivisions ; decrement span count
5097 jnz SpanLoop ; loop back
5099 // save new lighting values
5102 // mov Tmap1.fx_l, eax
5106 // mov Tmap1.fx_dl_dx, eax
5108 HandleLeftoverPixels:
5111 mov esi,Tmap1.pixptr ; load texture pointer
5113 ; edi = dest dib bits
5114 ; esi = current texture dib bits
5115 ; at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
5116 ; inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
5118 cmp Tmap1.WidthModLength,0 ; are there remaining pixels to draw?
5119 jz FPUReturn ; nope, pop the FPU and bail
5121 ; convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
5123 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
5124 fmul Tmap1.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
5125 fistp Tmap1.UFixed ; inv. inv. inv. inv. inv. UL VL
5127 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
5128 fmul Tmap1.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
5129 fistp Tmap1.VFixed ; inv. inv. inv. inv. inv. UL VL
5131 dec Tmap1.WidthModLength ; calc how many steps to take
5132 jz OnePixelSpan ; just one, don't do deltas
5134 ; calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
5137 ; @todo rearrange things so we don't need these two instructions
5138 fstp Tmap1.FloatTemp ; inv. inv. inv. inv. UL VL
5139 fstp Tmap1.FloatTemp ; inv. inv. inv. UL VL
5141 fld Tmap1.RightVOverZ ; V/Zr inv. inv. inv. UL VL
5142 fsub Tmap1.dVOverZdX ; V/ZR inv. inv. inv. UL VL
5143 fld Tmap1.RightUOverZ ; U/Zr V/ZR inv. inv. inv. UL VL
5144 fsub Tmap1.dUOverZdX ; U/ZR V/ZR inv. inv. inv. UL VL
5145 fld Tmap1.RightOneOverZ ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
5146 fsub Tmap1.dOneOverZdX ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
5148 fdivr Tmap1.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
5150 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
5151 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
5153 ; calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
5155 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
5156 fxch st(1) ; VR UR inv. inv. inv. dU VL
5157 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
5158 fxch st(6) ; dV UR inv. inv. inv. dU VR
5160 fidiv Tmap1.WidthModLength ; dv UR inv. inv. inv. dU VR
5161 fmul Tmap1.FixedScale ; dv16 UR inv. inv. inv. dU VR
5162 fistp Tmap1.DeltaV ; UR inv. inv. inv. dU VR
5164 fxch st(4) ; dU inv. inv. inv. UR VR
5165 fidiv Tmap1.WidthModLength ; du inv. inv. inv. UR VR
5166 fmul Tmap1.FixedScale ; du16 inv. inv. inv. UR VR
5167 fistp Tmap1.DeltaU ; inv. inv. inv. UR VR
5169 ; @todo gross! these are to line up with the other loop
5170 fld st(1) ; inv. inv. inv. inv. UR VR
5171 fld st(2) ; inv. inv. inv. inv. inv. UR VR
5176 ; setup delta values
5177 mov eax, Tmap1.DeltaV // get v 16.16 step
5178 mov ebx, eax // copy it
5179 sar eax, 16 // get v int step
5180 shl ebx, 16 // get v frac step
5181 mov Tmap1.DeltaVFrac, ebx // store it
5182 imul eax, Tmap1.src_offset // calc texture step for v int step
5184 mov ebx, Tmap1.DeltaU // get u 16.16 step
5185 mov ecx, ebx // copy it
5186 sar ebx, 16 // get the u int step
5187 shl ecx, 16 // get the u frac step
5188 mov Tmap1.DeltaUFrac, ecx // store it
5189 add eax, ebx // calc uint + vint step
5190 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
5191 add eax, Tmap1.src_offset // calc whole step + v carry
5192 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
5199 ; check coordinate ranges
5200 mov eax, Tmap1.UFixed
5201 cmp eax, Tmap1.MinUFixed
5203 mov eax, Tmap1.MinUFixed
5204 mov Tmap1.UFixed, eax
5207 cmp eax, Tmap1.MaxUFixed
5209 mov eax, Tmap1.MaxUFixed
5210 mov Tmap1.UFixed, eax
5212 mov eax, Tmap1.VFixed
5213 cmp eax, Tmap1.MinVFixed
5215 mov eax, Tmap1.MinVFixed
5216 mov Tmap1.VFixed, eax
5219 cmp eax, Tmap1.MaxVFixed
5221 mov eax, Tmap1.MaxVFixed
5222 mov Tmap1.VFixed, eax
5229 ; setup initial coordinates
5230 mov esi, Tmap1.UFixed // get u 16.16
5231 mov ebx, esi // copy it
5232 sar esi, 16 // get integer part
5233 shl ebx, 16 // get fractional part
5235 mov ecx, Tmap1.VFixed // get v 16.16
5236 mov edx, ecx // copy it
5237 sar edx, 16 // get integer part
5238 shl ecx, 16 // get fractional part
5239 imul edx, Tmap1.src_offset // calc texture scanline address
5240 add esi, edx // calc texture offset
5241 add esi, Tmap1.pixptr // calc address
5243 ; set edi = address of first pixel to modify
5244 ; mov edi, Tmap1.dest_row_data
5253 mov edx, Tmap1.DeltaUFrac
5255 cmp Tmap1.WidthModLength, 1
5260 mov ebx, Tmap1.fx_l_right
5267 // slow but maybe better
5270 mov ebx, Tmap1.WidthModLength
5275 mov eax, Tmap1.fx_dl_dx
5285 inc Tmap1.WidthModLength
5286 mov eax,Tmap1.WidthModLength
5290 mov Tmap1.WidthModLength, eax
5294 mov al,[edi] // preread the destination cache line
5297 mov al,[esi] // get texture pixel 0
5299 mov ax, gr_fade_table16[eax*2]
5301 add ecx,Tmap1.DeltaVFrac // increment v fraction
5302 sbb ebp,ebp // get -1 if carry
5303 add ebx,edx // increment u fraction
5304 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5305 mov [edi+0],ax // store pixel 0
5307 add ecx,Tmap1.DeltaVFrac // increment v fraction
5308 sbb ebp,ebp // get -1 if carry
5309 add ebx,edx // increment u fraction
5310 mov al,[esi] // get texture pixel 1
5312 mov ax, gr_fade_table16[eax*2]
5314 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5315 mov [edi+2],ax // store pixel 1
5318 dec Tmap1.WidthModLength
5326 mov al,[esi] // get texture pixel 2
5328 mov ax, gr_fade_table16[eax*2]
5329 mov [edi],ax // store pixel 2
5344 OldWay: // This is 6% slower than above
5346 mov ebx,Tmap1.UFixed ; get starting coordinates
5347 mov ecx,Tmap1.VFixed ; for span
5349 ; leftover pixels loop
5350 ; edi = dest dib bits
5351 ; esi = texture dib bits
5357 mov eax,ecx ; copy v
5359 imul eax,Tmap1.src_offset ; scan offset
5360 mov edx,ebx ; copy u
5362 add eax,edx ; texture offset
5363 mov al,[esi+eax] ; get source pixel
5365 mov [edi],al ; store it
5367 add ebx,Tmap1.DeltaU ; increment u coordinate
5368 add ecx,Tmap1.DeltaV ; increment v coordinate
5370 dec Tmap1.WidthModLength ; decrement loop count
5371 jl FPUReturn ; finish up
5375 mov eax,ecx ; copy v
5377 imul eax,Tmap1.src_offset ; scan offset
5378 mov edx,ebx ; copy u
5380 add eax,edx ; texture offset
5381 mov al,[esi+eax] ; get source pixel
5382 mov [edi],al ; store it
5384 add ebx,Tmap1.DeltaU ; increment u coordinate
5385 add ecx,Tmap1.DeltaV ; increment v coordinate
5387 dec Tmap1.WidthModLength ; decrement loop count
5388 jge LeftoverLoop ; finish up
5393 ; busy FPU registers: ; st0 st1 st2 st3 st4 st5 st6 st7
5394 ; xxx xxx xxx xxx xxx xxx xxx
5405 fldcw Tmap1.OldFPUCW // restore the FPU
5422 void tmapscan_lnn16( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
5424 Tmap1.dest_row_data = (ubyte *)GR_SCREEN_PTR(ushort,lx,y);
5425 Tmap1.loop_count = rx - lx;
5426 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
5427 Tmap1.bp = tmap_bitmap;
5428 Tmap1.src_offset = tmap_bitmap->w;
5430 Tmap1.fx_u = fl2f(p->u);
5431 Tmap1.fx_v = fl2f(p->v);
5432 Tmap1.fx_du_dx = fl2f(dp->u);
5433 Tmap1.fx_dv_dx = fl2f(dp->v);
5434 Tmap1.fx_u_right = fl2f(rp->u);
5435 Tmap1.fx_v_right = fl2f(rp->v);
5439 end = f2i(Tmap1.fx_u);
5440 if ( end >= Tmap1.bp->w ) return;
5442 end = f2i(Tmap1.fx_v);
5443 if ( end >= Tmap1.bp->h ) return;
5445 end = f2i(Tmap1.fx_u_right);
5446 if ( end >= Tmap1.bp->w ) return;
5448 end = f2i(Tmap1.fx_v_right);
5449 if ( end >= Tmap1.bp->h ) return;
5461 ; setup delta values
5462 mov eax, Tmap1.fx_dv_dx // get v 16.16 step
5463 mov ebx, eax // copy it
5464 sar eax, 16 // get v int step
5465 shl ebx, 16 // get v frac step
5466 mov Tmap1.DeltaVFrac, ebx // store it
5467 imul eax, Tmap1.src_offset // calc texture step for v int step
5469 mov ebx, Tmap1.fx_du_dx // get u 16.16 step
5470 mov ecx, ebx // copy it
5471 sar ebx, 16 // get the u int step
5472 shl ecx, 16 // get the u frac step
5473 mov Tmap1.DeltaUFrac, ecx // store it
5474 add eax, ebx // calc uint + vint step
5475 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
5476 add eax, Tmap1.src_offset // calc whole step + v carry
5477 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
5479 ; setup initial coordinates
5480 mov esi, Tmap1.fx_u // get u 16.16
5481 mov ebx, esi // copy it
5482 sar esi, 16 // get integer part
5483 shl ebx, 16 // get fractional part
5485 mov ecx, Tmap1.fx_v // get v 16.16
5486 mov edx, ecx // copy it
5487 sar edx, 16 // get integer part
5488 shl ecx, 16 // get fractional part
5489 imul edx, Tmap1.src_offset // calc texture scanline address
5490 add esi, edx // calc texture offset
5491 add esi, Tmap1.pixptr // calc address
5493 ; set edi = address of first pixel to modify
5494 mov edi, Tmap1.dest_row_data
5496 mov edx, Tmap1.DeltaUFrac
5498 mov eax, Tmap1.loop_count
5500 mov Tmap1.loop_count, eax
5505 mov Tmap1.num_big_steps, eax
5506 and Tmap1.loop_count, 7
5512 // 8 pixel span code
5513 // edi = dest dib bits at current pixel
5514 // esi = texture pointer at current u,v
5516 // ebx = u fraction 0.32
5517 // ecx = v fraction 0.32
5518 // edx = u frac step
5519 // ebp = v carry scratch
5521 mov al,[edi] // preread the destination cache line
5523 movzx eax,byte ptr [esi] // get texture pixel 0
5525 add ecx,Tmap1.DeltaVFrac // increment v fraction
5526 sbb ebp,ebp // get -1 if carry
5527 add ebx,edx // increment u fraction
5529 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5530 add ecx,Tmap1.DeltaVFrac // increment v fraction
5532 sbb ebp,ebp // get -1 if carry
5533 mov ax, palman_8_16_xlat[eax*2]
5534 mov [edi+0],ax // store pixel 0
5536 add ebx,edx // increment u fraction
5537 movzx eax,byte ptr [esi] // get texture pixel 0
5539 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5540 add ecx,Tmap1.DeltaVFrac // increment v fraction
5542 sbb ebp,ebp // get -1 if carry
5543 mov ax, palman_8_16_xlat[eax*2]
5544 mov [edi+2],ax // store pixel 0
5546 add ebx,edx // increment u fraction
5547 movzx eax,byte ptr [esi] // get texture pixel 0
5549 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5550 add ecx,Tmap1.DeltaVFrac // increment v fraction
5552 sbb ebp,ebp // get -1 if carry
5553 mov ax, palman_8_16_xlat[eax*2]
5554 mov [edi+4],ax // store pixel 0
5556 add ebx,edx // increment u fraction
5557 movzx eax,byte ptr [esi] // get texture pixel 0
5559 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5560 add ecx,Tmap1.DeltaVFrac // increment v fraction
5562 sbb ebp,ebp // get -1 if carry
5563 mov ax, palman_8_16_xlat[eax*2]
5564 mov [edi+6],ax // store pixel 0
5566 add ebx,edx // increment u fraction
5567 movzx eax,byte ptr [esi] // get texture pixel 0
5569 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5570 add ecx,Tmap1.DeltaVFrac // increment v fraction
5572 sbb ebp,ebp // get -1 if carry
5573 mov ax, palman_8_16_xlat[eax*2]
5574 mov [edi+8],ax // store pixel 0
5576 add ebx,edx // increment u fraction
5577 movzx eax,byte ptr [esi] // get texture pixel 0
5579 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5580 add ecx,Tmap1.DeltaVFrac // increment v fraction
5582 sbb ebp,ebp // get -1 if carry
5583 mov ax, palman_8_16_xlat[eax*2]
5584 mov [edi+10],ax // store pixel 0
5586 add ebx,edx // increment u fraction
5587 movzx eax,byte ptr [esi] // get texture pixel 0
5589 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5590 add ecx,Tmap1.DeltaVFrac // increment v fraction
5592 sbb ebp,ebp // get -1 if carry
5593 mov ax, palman_8_16_xlat[eax*2]
5594 mov [edi+12],ax // store pixel 0
5596 add ebx,edx // increment u fraction
5598 movzx eax,byte ptr [esi] // get texture pixel 0
5600 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5602 mov ax, palman_8_16_xlat[eax*2]
5603 mov [edi+14],ax // store pixel 0
5609 dec Tmap1.num_big_steps
5615 mov eax,Tmap1.loop_count
5620 mov Tmap1.loop_count, eax
5625 mov al,[edi] // preread the destination cache line
5626 // add ebx,edx // increment u fraction
5630 movzx eax,byte ptr [esi] // get texture pixel 0
5632 add ecx,Tmap1.DeltaVFrac // increment v fraction
5633 sbb ebp,ebp // get -1 if carry
5634 add ebx,edx // increment u fraction
5635 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5636 mov ax, palman_8_16_xlat[eax*2]
5637 mov [edi+0],ax // store pixel 0
5639 add ecx,Tmap1.DeltaVFrac // increment v fraction
5640 sbb ebp,ebp // get -1 if carry
5641 add ebx,edx // increment u fraction
5642 movzx eax,byte ptr [esi] // get texture pixel 0
5644 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5645 mov ax, palman_8_16_xlat[eax*2]
5646 mov [edi+2],ax // store pixel 0
5649 dec Tmap1.loop_count
5657 movzx eax,byte ptr [esi] // get texture pixel 0
5658 mov ax, palman_8_16_xlat[eax*2]
5659 mov [edi],ax // store pixel 0
5674 void tmapscan_lnn32( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
5676 Tmap1.dest_row_data = (ubyte *)GR_SCREEN_PTR(uint,lx,y);
5677 Tmap1.loop_count = rx - lx;
5678 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
5679 Tmap1.bp = tmap_bitmap;
5680 Tmap1.src_offset = tmap_bitmap->w;
5682 Tmap1.fx_u = fl2f(p->u);
5683 Tmap1.fx_v = fl2f(p->v);
5684 Tmap1.fx_du_dx = fl2f(dp->u);
5685 Tmap1.fx_dv_dx = fl2f(dp->v);
5686 Tmap1.fx_u_right = fl2f(rp->u);
5687 Tmap1.fx_v_right = fl2f(rp->v);
5691 end = f2i(Tmap1.fx_u);
5692 if ( end >= Tmap1.bp->w ) return;
5694 end = f2i(Tmap1.fx_v);
5695 if ( end >= Tmap1.bp->h ) return;
5697 end = f2i(Tmap1.fx_u_right);
5698 if ( end >= Tmap1.bp->w ) return;
5700 end = f2i(Tmap1.fx_v_right);
5701 if ( end >= Tmap1.bp->h ) return;
5713 ; setup delta values
5714 mov eax, Tmap1.fx_dv_dx // get v 16.16 step
5715 mov ebx, eax // copy it
5716 sar eax, 16 // get v int step
5717 shl ebx, 16 // get v frac step
5718 mov Tmap1.DeltaVFrac, ebx // store it
5719 imul eax, Tmap1.src_offset // calc texture step for v int step
5721 mov ebx, Tmap1.fx_du_dx // get u 16.16 step
5722 mov ecx, ebx // copy it
5723 sar ebx, 16 // get the u int step
5724 shl ecx, 16 // get the u frac step
5725 mov Tmap1.DeltaUFrac, ecx // store it
5726 add eax, ebx // calc uint + vint step
5727 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
5728 add eax, Tmap1.src_offset // calc whole step + v carry
5729 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
5731 ; setup initial coordinates
5732 mov esi, Tmap1.fx_u // get u 16.16
5733 mov ebx, esi // copy it
5734 sar esi, 16 // get integer part
5735 shl ebx, 16 // get fractional part
5737 mov ecx, Tmap1.fx_v // get v 16.16
5738 mov edx, ecx // copy it
5739 sar edx, 16 // get integer part
5740 shl ecx, 16 // get fractional part
5741 imul edx, Tmap1.src_offset // calc texture scanline address
5742 add esi, edx // calc texture offset
5743 add esi, Tmap1.pixptr // calc address
5745 ; set edi = address of first pixel to modify
5746 mov edi, Tmap1.dest_row_data
5748 mov edx, Tmap1.DeltaUFrac
5750 mov eax, Tmap1.loop_count
5752 mov Tmap1.loop_count, eax
5757 mov Tmap1.num_big_steps, eax
5758 and Tmap1.loop_count, 7
5764 // 8 pixel span code
5765 // edi = dest dib bits at current pixel
5766 // esi = texture pointer at current u,v
5768 // ebx = u fraction 0.32
5769 // ecx = v fraction 0.32
5770 // edx = u frac step
5771 // ebp = v carry scratch
5773 mov al,[edi] // preread the destination cache line
5775 movzx eax,byte ptr [esi] // get texture pixel 0
5777 add ecx,Tmap1.DeltaVFrac // increment v fraction
5778 sbb ebp,ebp // get -1 if carry
5779 add ebx,edx // increment u fraction
5781 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5782 add ecx,Tmap1.DeltaVFrac // increment v fraction
5784 sbb ebp,ebp // get -1 if carry
5785 mov eax, palman_8_32_xlat[eax*4]
5786 mov [edi+0],eax // store pixel 0
5788 add ebx,edx // increment u fraction
5789 movzx eax,byte ptr [esi] // get texture pixel 0
5791 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5792 add ecx,Tmap1.DeltaVFrac // increment v fraction
5794 sbb ebp,ebp // get -1 if carry
5795 mov eax, palman_8_32_xlat[eax*4]
5796 mov [edi+4],eax // store pixel 0
5798 add ebx,edx // increment u fraction
5799 movzx eax,byte ptr [esi] // get texture pixel 0
5801 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5802 add ecx,Tmap1.DeltaVFrac // increment v fraction
5804 sbb ebp,ebp // get -1 if carry
5805 mov eax, palman_8_32_xlat[eax*4]
5806 mov [edi+8],eax // store pixel 0
5808 add ebx,edx // increment u fraction
5809 movzx eax,byte ptr [esi] // get texture pixel 0
5811 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5812 add ecx,Tmap1.DeltaVFrac // increment v fraction
5814 sbb ebp,ebp // get -1 if carry
5815 mov eax, palman_8_32_xlat[eax*4]
5816 mov [edi+12],eax // store pixel 0
5818 add ebx,edx // increment u fraction
5819 movzx eax,byte ptr [esi] // get texture pixel 0
5821 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5822 add ecx,Tmap1.DeltaVFrac // increment v fraction
5824 sbb ebp,ebp // get -1 if carry
5825 mov eax, palman_8_32_xlat[eax*4]
5826 mov [edi+16],eax // store pixel 0
5828 add ebx,edx // increment u fraction
5829 movzx eax,byte ptr [esi] // get texture pixel 0
5831 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5832 add ecx,Tmap1.DeltaVFrac // increment v fraction
5834 sbb ebp,ebp // get -1 if carry
5835 mov eax, palman_8_32_xlat[eax*4]
5836 mov [edi+20],eax // store pixel 0
5838 add ebx,edx // increment u fraction
5839 movzx eax,byte ptr [esi] // get texture pixel 0
5841 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5842 add ecx,Tmap1.DeltaVFrac // increment v fraction
5844 sbb ebp,ebp // get -1 if carry
5845 mov eax, palman_8_32_xlat[eax*4]
5846 mov [edi+24],eax // store pixel 0
5848 add ebx,edx // increment u fraction
5850 movzx eax,byte ptr [esi] // get texture pixel 0
5852 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5854 mov eax, palman_8_32_xlat[eax*4]
5855 mov [edi+28],eax // store pixel 0
5861 dec Tmap1.num_big_steps
5867 mov eax,Tmap1.loop_count
5872 mov Tmap1.loop_count, eax
5877 mov al,[edi] // preread the destination cache line
5878 // add ebx,edx // increment u fraction
5882 movzx eax,byte ptr [esi] // get texture pixel 0
5884 add ecx,Tmap1.DeltaVFrac // increment v fraction
5885 sbb ebp,ebp // get -1 if carry
5886 add ebx,edx // increment u fraction
5887 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5888 mov eax, palman_8_32_xlat[eax*4]
5889 mov [edi+0],eax // store pixel 0
5891 add ecx,Tmap1.DeltaVFrac // increment v fraction
5892 sbb ebp,ebp // get -1 if carry
5893 add ebx,edx // increment u fraction
5894 movzx eax,byte ptr [esi] // get texture pixel 0
5896 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5897 mov eax, palman_8_32_xlat[eax*4]
5898 mov [edi+4],eax // store pixel 0
5901 dec Tmap1.loop_count
5909 movzx eax,byte ptr [esi] // get texture pixel 0
5910 mov eax, palman_8_32_xlat[eax*4]
5911 mov [edi],eax // store pixel 0
5925 void tmapscan_pln32( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
5927 Tmap1.dest_row_data = (ubyte *)GR_SCREEN_PTR(uint,lx,y);
5928 Tmap1.loop_count = rx - lx;
5929 Tmap1.fx_u = fl2f(p->u);
5930 Tmap1.fx_v = fl2f(p->v);
5931 Tmap1.fx_du_dx = fl2f(dp->u);
5932 Tmap1.fx_dv_dx = fl2f(dp->v);
5934 Tmap1.fx_l = fl2f(p->l*32.0);
5935 Tmap1.fx_dl_dx = fl2f(dp->l*32.0);
5937 Tmap1.fx_u_right = fl2f(rp->u);
5938 Tmap1.fx_v_right = fl2f(rp->v);
5939 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
5940 Tmap1.bp = tmap_bitmap;
5941 Tmap1.src_offset = tmap_bitmap->w;
5944 Tmap1.FixedScale = 65536.0f;
5945 Tmap1.FixedScale8 = 2048.0f; //8192.0f; // 2^16 / 8
5949 Tmap1.UOverZ = p->u;
5950 Tmap1.VOverZ = p->v;
5951 Tmap1.OneOverZ = p->sw;
5953 Tmap1.dUOverZdX8 = dp->u*32.0f;
5954 Tmap1.dVOverZdX8 = dp->v*32.0f;
5955 Tmap1.dOneOverZdX8 = dp->sw*32.0f;
5957 Tmap1.dUOverZdX = dp->u;
5958 Tmap1.dVOverZdX = dp->v;
5959 Tmap1.dOneOverZdX = dp->sw;
5961 Tmap1.RightUOverZ = rp->u;
5962 Tmap1.RightVOverZ = rp->v;
5963 Tmap1.RightOneOverZ = rp->sw;
5966 Tmap1.BitmapWidth = Tmap1.bp->w;
5967 Tmap1.BitmapHeight = Tmap1.bp->h;
5970 if ( Tmap1.fx_dl_dx < 0 ) {
5971 Tmap1.fx_dl_dx = -Tmap1.fx_dl_dx;
5972 Tmap1.fx_l = (67*F1_0)-Tmap1.fx_l;
5973 Tmap1.fx_l_right = (67*F1_0)-Tmap1.fx_l_right;
5975 // Assert( Tmap1.fx_l > 31*F1_0 );
5976 // Assert( Tmap1.fx_l < 66*F1_0 );
5977 // Assert( Tmap1.fx_dl_dx >= 0 );
5978 // Assert( Tmap1.fx_dl_dx < 31*F1_0 );
5996 // put the FPU in 32 bit mode
5997 // @todo move this out of here!
5999 fstcw Tmap1.OldFPUCW // store copy of CW
6000 mov ax,Tmap1.OldFPUCW // get it in ax
6001 //hh and eax,NOT 1100000000y // 24 bit precision
6003 mov Tmap1.FPUCW,ax // store it
6004 fldcw Tmap1.FPUCW // load the FPU
6006 mov ecx, Tmap1.loop_count // ecx = width
6008 mov edi, Tmap1.dest_row_data // edi = dest pointer
6010 // edi = pointer to start pixel in dest dib
6013 mov eax,ecx // eax and ecx = width
6014 shr ecx,5 // ecx = width / subdivision length
6015 and eax,31 // eax = width mod subdivision length
6016 jnz some_left_over // any leftover?
6018 dec ecx // no, so special case last span
6019 mov eax,32 // it's 8 pixels long
6021 mov Tmap1.Subdivisions,ecx // store widths
6022 mov Tmap1.WidthModLength,eax
6024 // mov ebx,pLeft ; get left edge pointer
6025 // mov edx,pGradients ; get gradients pointer
6027 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
6028 // st0 st1 st2 st3 st4 st5 st6 st7
6029 fld Tmap1.VOverZ // V/ZL
6030 fld Tmap1.UOverZ // U/ZL V/ZL
6031 fld Tmap1.OneOverZ // 1/ZL U/ZL V/ZL
6032 fld1 // 1 1/ZL U/ZL V/ZL
6033 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
6034 fld st // ZL ZL 1/ZL U/ZL V/ZL
6035 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
6036 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
6037 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
6039 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
6040 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
6042 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
6044 fadd Tmap1.dOneOverZdX8 // 1/ZR U/ZL V/ZL UL VL
6045 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
6046 fadd Tmap1.dUOverZdX8 // U/ZR 1/ZR V/ZL UL VL
6047 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
6048 fadd Tmap1.dVOverZdX8 // V/ZR 1/ZR U/ZR UL VL
6050 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
6052 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
6053 // @todo overlap this guy
6054 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
6055 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
6056 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
6057 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
6058 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
6060 cmp ecx,0 // check for any full spans
6061 jle HandleLeftoverPixels
6065 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
6066 // UR VR V/ZR 1/ZR U/ZR UL VL
6068 // convert left side coords
6070 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
6071 fmul Tmap1.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
6072 fistp Tmap1.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
6074 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
6075 fmul Tmap1.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
6076 fistp Tmap1.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
6078 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
6080 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
6081 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
6082 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
6083 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
6085 fmul Tmap1.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
6086 fistp Tmap1.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
6088 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
6089 fmul Tmap1.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
6090 fistp Tmap1.DeltaU ; V/ZR 1/ZR U/ZR UR VR
6092 // increment terms for next span ; st0 st1 st2 st3 st4 st5 st6 st7
6093 // Right terms become Left terms---->; V/ZL 1/ZL U/ZL UL VL
6095 fadd Tmap1.dVOverZdX8 ; V/ZR 1/ZL U/ZL UL VL
6096 fxch st(1) ; 1/ZL V/ZR U/ZL UL VL
6097 fadd Tmap1.dOneOverZdX8 ; 1/ZR V/ZR U/ZL UL VL
6098 fxch st(2) ; U/ZL V/ZR 1/ZR UL VL
6099 fadd Tmap1.dUOverZdX8 ; U/ZR V/ZR 1/ZR UL VL
6100 fxch st(2) ; 1/ZR V/ZR U/ZR UL VL
6101 fxch st(1) ; V/ZR 1/ZR U/ZR UL VL
6103 ; calculate right side coords ; st0 st1 st2 st3 st4 st5 st6 st7
6105 fld1 ; 1 V/ZR 1/ZR U/ZR UL VL
6106 fdiv st,st(2) ; ZR V/ZR 1/ZR U/ZR UL VL
6109 ; set up affine registers
6111 ; setup delta values
6113 mov eax,Tmap1.DeltaV ; get v 16.16 step
6114 mov ebx,eax ; copy it
6115 sar eax,16 ; get v int step
6116 shl ebx,16 ; get v frac step
6117 mov Tmap1.DeltaVFrac,ebx ; store it
6118 imul eax,Tmap1.src_offset ; calculate texture step for v int step
6120 mov ebx,Tmap1.DeltaU ; get u 16.16 step
6121 mov ecx,ebx ; copy it
6122 sar ebx,16 ; get u int step
6123 shl ecx,16 ; get u frac step
6124 mov Tmap1.DeltaUFrac,ecx ; store it
6125 add eax,ebx ; calculate uint + vint step
6126 mov Tmap1.UVintVfracStepVNoCarry,eax; save whole step in non-v-carry slot
6127 add eax,Tmap1.src_offset ; calculate whole step + v carry
6128 mov Tmap1.UVintVfracStepVCarry,eax ; save in v-carry slot
6132 ; check coordinate ranges
6133 mov eax, Tmap1.UFixed
6134 cmp eax, Tmap1.MinUFixed
6136 mov eax, Tmap1.MinUFixed
6137 mov Tmap1.UFixed, eax
6140 cmp eax, Tmap1.MaxUFixed
6142 mov eax, Tmap1.MaxUFixed
6143 mov Tmap1.UFixed, eax
6145 mov eax, Tmap1.VFixed
6146 cmp eax, Tmap1.MinVFixed
6148 mov eax, Tmap1.MinVFixed
6149 mov Tmap1.VFixed, eax
6152 cmp eax, Tmap1.MaxVFixed
6154 mov eax, Tmap1.MaxVFixed
6155 mov Tmap1.VFixed, eax
6159 ; setup initial coordinates
6160 mov esi,Tmap1.UFixed ; get u 16.16 fixedpoint coordinate
6162 mov ebx,esi ; copy it
6163 sar esi,16 ; get integer part
6164 shl ebx,16 ; get fractional part
6166 mov ecx,Tmap1.VFixed ; get v 16.16 fixedpoint coordinate
6168 mov edx,ecx ; copy it
6169 sar edx,16 ; get integer part
6170 shl ecx,16 ; get fractional part
6171 imul edx,Tmap1.src_offset ; calc texture scanline address
6172 add esi,edx ; calc texture offset
6173 add esi,Tmap1.pixptr ; calc address
6175 mov edx,Tmap1.DeltaUFrac ; get register copy
6181 mov ebp, Tmap1.fx_dl_dx
6192 // add Tmap1.fx_l, eax
6195 // mov eax, Tmap1.fx_l // use bx and dx to do lighting
6198 // mov eax, Tmap1.fx_dl_dx // use bx and dx to do lighting
6204 ; ************** Can't Access Stack Frame ******************
6205 ; ************** Can't Access Stack Frame ******************
6206 ; ************** Can't Access Stack Frame ******************
6208 // 8 pixel span code
6209 // edi = dest dib bits at current pixel
6210 // esi = texture pointer at current u,v
6212 // ebx = u fraction 0.32
6213 // ecx = v fraction 0.32
6214 // edx = u frac step
6215 // ebp = v carry scratch
6217 mov al,[edi] // preread the destination cache line
6220 movzx eax,byte ptr [esi] // get texture pixel 0
6222 mov eax, gr_fade_table32[eax*4]
6224 add ecx,Tmap1.DeltaVFrac // increment v fraction
6225 sbb ebp,ebp // get -1 if carry
6226 add ebx,edx // increment u fraction
6228 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6229 add ecx,Tmap1.DeltaVFrac // increment v fraction
6231 sbb ebp,ebp // get -1 if carry
6232 // mov al, 0 // Uncomment this line to show divisions
6233 mov [edi+0],eax // store pixel 0
6235 add ebx,edx // increment u fraction
6236 movzx eax,byte ptr [esi] // get texture pixel 0
6238 mov eax, gr_fade_table32[eax*4]
6240 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6241 add ecx,Tmap1.DeltaVFrac // increment v fraction
6243 sbb ebp,ebp // get -1 if carry
6244 mov [edi+4],eax // store pixel 1
6246 add ebx,edx // increment u fraction
6247 movzx eax,byte ptr [esi] // get texture pixel 0
6249 mov eax, gr_fade_table32[eax*4]
6251 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6252 add ecx,Tmap1.DeltaVFrac // increment v fraction
6254 sbb ebp,ebp // get -1 if carry
6255 mov [edi+8],eax // store pixel 2
6257 add ebx,edx // increment u fraction
6258 movzx eax,byte ptr [esi] // get texture pixel 0
6260 mov eax, gr_fade_table32[eax*4]
6262 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6263 add ecx,Tmap1.DeltaVFrac // increment v fraction
6265 sbb ebp,ebp // get -1 if carry
6266 mov [edi+12],eax // store pixel 3
6268 add ebx,edx // increment u fraction
6269 movzx eax,byte ptr [esi] // get texture pixel 0
6271 mov eax, gr_fade_table32[eax*4]
6272 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6273 add ecx,Tmap1.DeltaVFrac // increment v fraction
6275 sbb ebp,ebp // get -1 if carry
6276 mov [edi+16],eax // store pixel 3
6278 add ebx,edx // increment u fraction
6279 movzx eax,byte ptr [esi] // get texture pixel 0
6281 mov eax, gr_fade_table32[eax*4]
6282 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6283 add ecx,Tmap1.DeltaVFrac // increment v fraction
6285 sbb ebp,ebp // get -1 if carry
6286 mov [edi+20],eax // store pixel 3
6288 add ebx,edx // increment u fraction
6289 movzx eax,byte ptr [esi] // get texture pixel 0
6291 mov eax, gr_fade_table32[eax*4]
6292 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6293 add ecx,Tmap1.DeltaVFrac // increment v fraction
6295 sbb ebp,ebp // get -1 if carry
6296 mov [edi+24],eax // store pixel 3
6298 add ebx,edx // increment u fraction
6299 movzx eax,byte ptr [esi] // get texture pixel 0
6301 mov eax, gr_fade_table32[eax*4]
6302 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6303 add ecx,Tmap1.DeltaVFrac // increment v fraction
6305 sbb ebp,ebp // get -1 if carry
6306 mov [edi+28],eax // store pixel 3
6308 add ebx,edx // increment u fraction
6309 movzx eax,byte ptr [esi] // get texture pixel 0
6311 mov eax, gr_fade_table32[eax*4]
6312 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6313 add ecx,Tmap1.DeltaVFrac // increment v fraction
6315 sbb ebp,ebp // get -1 if carry
6316 mov [edi+32],eax // store pixel 3
6318 add ebx,edx // increment u fraction
6319 movzx eax,byte ptr [esi] // get texture pixel 0
6321 mov eax, gr_fade_table32[eax*4]
6322 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6323 add ecx,Tmap1.DeltaVFrac // increment v fraction
6325 sbb ebp,ebp // get -1 if carry
6326 mov [edi+36],eax // store pixel 3
6328 add ebx,edx // increment u fraction
6329 movzx eax,byte ptr [esi] // get texture pixel 0
6331 mov eax, gr_fade_table32[eax*4]
6332 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6333 add ecx,Tmap1.DeltaVFrac // increment v fraction
6335 sbb ebp,ebp // get -1 if carry
6336 mov [edi+40],eax // store pixel 3
6338 add ebx,edx // increment u fraction
6339 movzx eax,byte ptr [esi] // get texture pixel 0
6341 mov eax, gr_fade_table32[eax*4]
6344 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6345 add ecx,Tmap1.DeltaVFrac // increment v fraction
6347 sbb ebp,ebp // get -1 if carry
6348 mov [edi+44],eax // store pixel 3
6350 add ebx,edx // increment u fraction
6351 movzx eax,byte ptr [esi] // get texture pixel 0
6353 mov eax, gr_fade_table32[eax*4]
6356 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6357 add ecx,Tmap1.DeltaVFrac // increment v fraction
6359 sbb ebp,ebp // get -1 if carry
6360 mov [edi+48],eax // store pixel 3
6362 add ebx,edx // increment u fraction
6363 movzx eax,byte ptr [esi] // get texture pixel 0
6365 mov eax, gr_fade_table32[eax*4]
6368 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6369 add ecx,Tmap1.DeltaVFrac // increment v fraction
6371 sbb ebp,ebp // get -1 if carry
6372 mov [edi+52],eax // store pixel 3
6374 add ebx,edx // increment u fraction
6375 movzx eax,byte ptr [esi] // get texture pixel 0
6377 mov eax, gr_fade_table32[eax*4]
6380 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6381 add ecx,Tmap1.DeltaVFrac // increment v fraction
6383 sbb ebp,ebp // get -1 if carry
6384 mov [edi+56],eax // store pixel 3
6386 add ebx,edx // increment u fraction
6387 movzx eax,byte ptr [esi] // get texture pixel 0
6389 mov eax, gr_fade_table32[eax*4]
6392 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6393 add ecx,Tmap1.DeltaVFrac // increment v fraction
6395 sbb ebp,ebp // get -1 if carry
6396 mov [edi+60],eax // store pixel 3
6398 add ebx,edx // increment u fraction
6399 movzx eax,byte ptr [esi] // get texture pixel 0
6401 mov eax, gr_fade_table32[eax*4]
6404 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6405 add ecx,Tmap1.DeltaVFrac // increment v fraction
6407 sbb ebp,ebp // get -1 if carry
6408 mov [edi+64],eax // store pixel 3
6410 add ebx,edx // increment u fraction
6411 movzx eax,byte ptr [esi] // get texture pixel 0
6413 mov eax, gr_fade_table32[eax*4]
6416 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6417 add ecx,Tmap1.DeltaVFrac // increment v fraction
6419 sbb ebp,ebp // get -1 if carry
6420 mov [edi+68],eax // store pixel 3
6422 add ebx,edx // increment u fraction
6423 movzx eax,byte ptr [esi] // get texture pixel 0
6425 mov eax, gr_fade_table32[eax*4]
6428 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6429 add ecx,Tmap1.DeltaVFrac // increment v fraction
6431 sbb ebp,ebp // get -1 if carry
6432 mov [edi+72],eax // store pixel 3
6434 add ebx,edx // increment u fraction
6435 movzx eax,byte ptr [esi] // get texture pixel 0
6437 mov eax, gr_fade_table32[eax*4]
6440 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6441 add ecx,Tmap1.DeltaVFrac // increment v fraction
6443 sbb ebp,ebp // get -1 if carry
6444 mov [edi+76],eax // store pixel 3
6446 add ebx,edx // increment u fraction
6447 movzx eax,byte ptr [esi] // get texture pixel 0
6449 mov eax, gr_fade_table32[eax*4]
6452 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6453 add ecx,Tmap1.DeltaVFrac // increment v fraction
6455 sbb ebp,ebp // get -1 if carry
6456 mov [edi+80],eax // store pixel 3
6458 add ebx,edx // increment u fraction
6459 movzx eax,byte ptr [esi] // get texture pixel 0
6461 mov eax, gr_fade_table32[eax*4]
6464 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6465 add ecx,Tmap1.DeltaVFrac // increment v fraction
6467 sbb ebp,ebp // get -1 if carry
6468 mov [edi+84],eax // store pixel 3
6470 add ebx,edx // increment u fraction
6471 movzx eax,byte ptr [esi] // get texture pixel 0
6473 mov eax, gr_fade_table32[eax*4]
6476 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6477 add ecx,Tmap1.DeltaVFrac // increment v fraction
6479 sbb ebp,ebp // get -1 if carry
6480 mov [edi+88],eax // store pixel 3
6482 add ebx,edx // increment u fraction
6483 movzx eax,byte ptr [esi] // get texture pixel 0
6485 mov eax, gr_fade_table32[eax*4]
6488 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6489 add ecx,Tmap1.DeltaVFrac // increment v fraction
6491 sbb ebp,ebp // get -1 if carry
6492 mov [edi+92],eax // store pixel 3
6494 add ebx,edx // increment u fraction
6495 movzx eax,byte ptr [esi] // get texture pixel 0
6497 mov eax, gr_fade_table32[eax*4]
6500 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6501 add ecx,Tmap1.DeltaVFrac // increment v fraction
6503 sbb ebp,ebp // get -1 if carry
6504 mov [edi+96],eax // store pixel 3
6506 add ebx,edx // increment u fraction
6507 movzx eax,byte ptr [esi] // get texture pixel 0
6509 mov eax, gr_fade_table32[eax*4]
6512 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6513 add ecx,Tmap1.DeltaVFrac // increment v fraction
6515 sbb ebp,ebp // get -1 if carry
6516 mov [edi+100],eax // store pixel 3
6518 add ebx,edx // increment u fraction
6519 movzx eax,byte ptr [esi] // get texture pixel 0
6521 mov eax, gr_fade_table32[eax*4]
6524 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6525 add ecx,Tmap1.DeltaVFrac // increment v fraction
6529 sbb ebp,ebp // get -1 if carry
6530 mov [edi+104],eax // store pixel 3
6532 add ebx,edx // increment u fraction
6533 movzx eax,byte ptr [esi] // get texture pixel 0
6535 mov eax, gr_fade_table32[eax*4]
6538 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6539 add ecx,Tmap1.DeltaVFrac // increment v fraction
6541 sbb ebp,ebp // get -1 if carry
6542 mov [edi+108],eax // store pixel 3
6544 add ebx,edx // increment u fraction
6545 movzx eax,byte ptr [esi] // get texture pixel 0
6547 mov eax, gr_fade_table32[eax*4]
6549 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6550 add ecx,Tmap1.DeltaVFrac // increment v fraction
6552 sbb ebp,ebp // get -1 if carry
6553 mov [edi+112],eax // store pixel 4
6555 add ebx,edx // increment u fraction
6556 movzx eax,byte ptr [esi] // get texture pixel 0
6558 mov eax, gr_fade_table32[eax*4]
6560 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6561 add ecx,Tmap1.DeltaVFrac // increment v fraction
6563 sbb ebp,ebp // get -1 if carry
6564 mov [edi+116],eax // store pixel 5
6566 add ebx,edx // increment u fraction
6567 movzx eax,byte ptr [esi] // get texture pixel 0
6569 mov eax, gr_fade_table32[eax*4]
6571 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6572 add ecx,Tmap1.DeltaVFrac // increment v fraction
6574 sbb ebp,ebp // get -1 if carry
6575 mov [edi+120],eax // store pixel 6
6577 add ebx,edx // increment u fraction
6579 movzx eax,byte ptr [esi] // get texture pixel 0
6581 mov eax, gr_fade_table32[eax*4]
6583 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6585 mov [edi+124],eax // store pixel 7
6589 ; ************** Okay to Access Stack Frame ****************
6590 ; ************** Okay to Access Stack Frame ****************
6591 ; ************** Okay to Access Stack Frame ****************
6594 ; the fdiv is done, finish right ; st0 st1 st2 st3 st4 st5 st6 st7
6595 ; ZR V/ZR 1/ZR U/ZR UL VL
6597 fld st ; ZR ZR V/ZR 1/ZR U/ZR UL VL
6598 fmul st,st(2) ; VR ZR V/ZR 1/ZR U/ZR UL VL
6599 fxch st(1) ; ZR VR V/ZR 1/ZR U/ZR UL VL
6600 fmul st,st(4) ; UR VR V/ZR 1/ZR U/ZR UL VL
6602 add edi,128 ; increment to next span
6603 dec Tmap1.Subdivisions ; decrement span count
6604 jnz SpanLoop ; loop back
6606 // save new lighting values
6609 // mov Tmap1.fx_l, eax
6613 // mov Tmap1.fx_dl_dx, eax
6615 HandleLeftoverPixels:
6618 mov esi,Tmap1.pixptr ; load texture pointer
6620 ; edi = dest dib bits
6621 ; esi = current texture dib bits
6622 ; at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
6623 ; inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
6625 cmp Tmap1.WidthModLength,0 ; are there remaining pixels to draw?
6626 jz FPUReturn ; nope, pop the FPU and bail
6628 ; convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
6630 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
6631 fmul Tmap1.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
6632 fistp Tmap1.UFixed ; inv. inv. inv. inv. inv. UL VL
6634 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
6635 fmul Tmap1.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
6636 fistp Tmap1.VFixed ; inv. inv. inv. inv. inv. UL VL
6638 dec Tmap1.WidthModLength ; calc how many steps to take
6639 jz OnePixelSpan ; just one, don't do deltas
6641 ; calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
6644 ; @todo rearrange things so we don't need these two instructions
6645 fstp Tmap1.FloatTemp ; inv. inv. inv. inv. UL VL
6646 fstp Tmap1.FloatTemp ; inv. inv. inv. UL VL
6648 fld Tmap1.RightVOverZ ; V/Zr inv. inv. inv. UL VL
6649 fsub Tmap1.dVOverZdX ; V/ZR inv. inv. inv. UL VL
6650 fld Tmap1.RightUOverZ ; U/Zr V/ZR inv. inv. inv. UL VL
6651 fsub Tmap1.dUOverZdX ; U/ZR V/ZR inv. inv. inv. UL VL
6652 fld Tmap1.RightOneOverZ ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
6653 fsub Tmap1.dOneOverZdX ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
6655 fdivr Tmap1.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
6657 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
6658 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
6660 ; calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
6662 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
6663 fxch st(1) ; VR UR inv. inv. inv. dU VL
6664 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
6665 fxch st(6) ; dV UR inv. inv. inv. dU VR
6667 fidiv Tmap1.WidthModLength ; dv UR inv. inv. inv. dU VR
6668 fmul Tmap1.FixedScale ; dv16 UR inv. inv. inv. dU VR
6669 fistp Tmap1.DeltaV ; UR inv. inv. inv. dU VR
6671 fxch st(4) ; dU inv. inv. inv. UR VR
6672 fidiv Tmap1.WidthModLength ; du inv. inv. inv. UR VR
6673 fmul Tmap1.FixedScale ; du16 inv. inv. inv. UR VR
6674 fistp Tmap1.DeltaU ; inv. inv. inv. UR VR
6676 ; @todo gross! these are to line up with the other loop
6677 fld st(1) ; inv. inv. inv. inv. UR VR
6678 fld st(2) ; inv. inv. inv. inv. inv. UR VR
6683 ; setup delta values
6684 mov eax, Tmap1.DeltaV // get v 16.16 step
6685 mov ebx, eax // copy it
6686 sar eax, 16 // get v int step
6687 shl ebx, 16 // get v frac step
6688 mov Tmap1.DeltaVFrac, ebx // store it
6689 imul eax, Tmap1.src_offset // calc texture step for v int step
6691 mov ebx, Tmap1.DeltaU // get u 16.16 step
6692 mov ecx, ebx // copy it
6693 sar ebx, 16 // get the u int step
6694 shl ecx, 16 // get the u frac step
6695 mov Tmap1.DeltaUFrac, ecx // store it
6696 add eax, ebx // calc uint + vint step
6697 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
6698 add eax, Tmap1.src_offset // calc whole step + v carry
6699 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
6706 ; check coordinate ranges
6707 mov eax, Tmap1.UFixed
6708 cmp eax, Tmap1.MinUFixed
6710 mov eax, Tmap1.MinUFixed
6711 mov Tmap1.UFixed, eax
6714 cmp eax, Tmap1.MaxUFixed
6716 mov eax, Tmap1.MaxUFixed
6717 mov Tmap1.UFixed, eax
6719 mov eax, Tmap1.VFixed
6720 cmp eax, Tmap1.MinVFixed
6722 mov eax, Tmap1.MinVFixed
6723 mov Tmap1.VFixed, eax
6726 cmp eax, Tmap1.MaxVFixed
6728 mov eax, Tmap1.MaxVFixed
6729 mov Tmap1.VFixed, eax
6736 ; setup initial coordinates
6737 mov esi, Tmap1.UFixed // get u 16.16
6738 mov ebx, esi // copy it
6739 sar esi, 16 // get integer part
6740 shl ebx, 16 // get fractional part
6742 mov ecx, Tmap1.VFixed // get v 16.16
6743 mov edx, ecx // copy it
6744 sar edx, 16 // get integer part
6745 shl ecx, 16 // get fractional part
6746 imul edx, Tmap1.src_offset // calc texture scanline address
6747 add esi, edx // calc texture offset
6748 add esi, Tmap1.pixptr // calc address
6750 ; set edi = address of first pixel to modify
6751 ; mov edi, Tmap1.dest_row_data
6760 mov edx, Tmap1.DeltaUFrac
6762 cmp Tmap1.WidthModLength, 1
6767 mov ebx, Tmap1.fx_l_right
6774 // slow but maybe better
6777 mov ebx, Tmap1.WidthModLength
6782 mov eax, Tmap1.fx_dl_dx
6792 inc Tmap1.WidthModLength
6793 mov eax,Tmap1.WidthModLength
6797 mov Tmap1.WidthModLength, eax
6801 mov al,[edi] // preread the destination cache line
6804 movzx eax,byte ptr [esi] // get texture pixel 0
6806 mov eax, gr_fade_table32[eax*4]
6808 add ecx,Tmap1.DeltaVFrac // increment v fraction
6809 sbb ebp,ebp // get -1 if carry
6810 add ebx,edx // increment u fraction
6811 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6812 mov [edi+0],eax // store pixel 0
6814 add ecx,Tmap1.DeltaVFrac // increment v fraction
6815 sbb ebp,ebp // get -1 if carry
6816 add ebx,edx // increment u fraction
6817 movzx eax,byte ptr [esi] // get texture pixel 0
6819 mov eax, gr_fade_table32[eax*4]
6821 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6822 mov [edi+4],eax // store pixel 1
6825 dec Tmap1.WidthModLength
6833 movzx eax,byte ptr [esi] // get texture pixel 0
6835 mov eax, gr_fade_table32[eax*4]
6836 mov [edi],eax // store pixel 2
6851 OldWay: // This is 6% slower than above
6853 mov ebx,Tmap1.UFixed ; get starting coordinates
6854 mov ecx,Tmap1.VFixed ; for span
6856 ; leftover pixels loop
6857 ; edi = dest dib bits
6858 ; esi = texture dib bits
6864 mov eax,ecx ; copy v
6866 imul eax,Tmap1.src_offset ; scan offset
6867 mov edx,ebx ; copy u
6869 add eax,edx ; texture offset
6870 mov al,[esi+eax] ; get source pixel
6872 mov [edi],al ; store it
6874 add ebx,Tmap1.DeltaU ; increment u coordinate
6875 add ecx,Tmap1.DeltaV ; increment v coordinate
6877 dec Tmap1.WidthModLength ; decrement loop count
6878 jl FPUReturn ; finish up
6882 mov eax,ecx ; copy v
6884 imul eax,Tmap1.src_offset ; scan offset
6885 mov edx,ebx ; copy u
6887 add eax,edx ; texture offset
6888 mov al,[esi+eax] ; get source pixel
6889 mov [edi],al ; store it
6891 add ebx,Tmap1.DeltaU ; increment u coordinate
6892 add ecx,Tmap1.DeltaV ; increment v coordinate
6894 dec Tmap1.WidthModLength ; decrement loop count
6895 jge LeftoverLoop ; finish up
6900 ; busy FPU registers: ; st0 st1 st2 st3 st4 st5 st6 st7
6901 ; xxx xxx xxx xxx xxx xxx xxx
6912 fldcw Tmap1.OldFPUCW // restore the FPU
6929 add edx,DeltaVFrac ; Add in 0.32 DeltaVFrac to VFrac
6930 sbb ebp,ebp ; ebp will equal -1 if there was a carry
6931 mov BYTE PTR [edi], al ; blit destination pixel
6932 mov al, BYTE PTR [esi] ; get next texel
6933 add ecx,ebx ; add 0.32 DeltaUFrac to UFrac, plus light
6934 adc esi, [UVStepCarry1+(ebp*4)]
6935 mov ah, ch ; move lighting value into place
6936 mov al, ShadeTable[eax] ; Get shaded pixel