2 * $Logfile: /Freespace2/code/Graphics/TmapGenericScans.cpp $
7 * Some code for generic scanlines. This isn't used, it is just
8 * basically a dump area for inner loops I was experimenting with.
9 * this entire file is #ifdef 0'd out.
12 * Revision 1.1 2002/05/03 03:28:09 root
16 * 2 10/07/98 10:53a Dave
19 * 1 10/07/98 10:49a Dave
21 * 7 4/24/97 4:45p John
22 * Added tiled texture mappers for 64x64, 128x128, and 256x256 textures.
24 * 6 4/24/97 3:01p John
25 * added code to not crash on non-256x256 textures.
27 * 5 3/14/97 3:55p John
28 * Made tiled tmapper not always be zbuffered.
30 * 4 3/13/97 10:32a John
31 * Added code for tiled 256x256 textures in certain models.
33 * 3 3/10/97 5:20p John
34 * Differentiated between Gouraud and Flat shading. Since we only do flat
35 * shading as of now, we don't need to interpolate L in the outer loop.
36 * This should save a few percent.
38 * 2 12/10/96 10:37a John
39 * Restructured texture mapper to remove some overhead from each scanline
40 * setup. This gave about a 30% improvement drawing trans01.pof, which is
41 * a really complex model. In the process, I cleaned up the scanline
42 * functions and separated them into different modules for each pixel
51 #include "tmapscanline.h"
56 #pragma warning(disable:4410)
64 #include "tmapscanline.h"
69 #pragma warning(disable:4410)
71 // These must be global because I use them in assembly
72 // code that uses the EBP register, so the variables
73 // can't be accessed off the stack.
74 int _fx_u, _fx_v, _fx_w, _fx_l;
75 int _fx_u_right, _fx_v_right, _fx_w_right;
76 int _fx_du, _fx_dv, _fx_dw, _fx_dl;
77 uint _fx_destptr,_fx_srcptr, light_table;
78 int V0, U0, DU1, DV1, DZ1;
79 int _loop_count,num_big_steps;
82 int rgbtable_inited = 0;
91 for (i=0; i<512; i++ ) {
94 else if ( v > 255 ) v = 255;
102 void asm_tmap_scanline_lln();
103 void asm_tmap_scanline_lln_tiled();
105 void tmapscan_lln8( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
107 Tmap1.dest_row_data = GR_SCREEN_PTR(ubyte,lx,y);
108 Tmap1.loop_count = rx - lx;
109 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
110 Tmap1.bp = tmap_bitmap;
111 Tmap1.src_offset = tmap_bitmap->w;
113 Tmap1.fx_u = fl2f(p->u);
114 Tmap1.fx_v = fl2f(p->v);
115 Tmap1.fx_l = fl2f(p->l*32.0);
116 Tmap1.fx_dl_dx = fl2f(dp->l*32.0);
117 Tmap1.fx_du_dx = fl2f(dp->u);
118 Tmap1.fx_dv_dx = fl2f(dp->v);
119 Tmap1.fx_u_right = fl2f(rp->u);
120 Tmap1.fx_v_right = fl2f(rp->v);
122 asm_tmap_scanline_lln();
125 extern void asm_tmap_scanline_lnt();
127 void tmapscan_lnt8( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
129 Tmap1.dest_row_data = GR_SCREEN_PTR(ubyte,lx,y);
130 Tmap1.loop_count = rx - lx;
131 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
132 Tmap1.bp = tmap_bitmap;
133 Tmap1.src_offset = tmap_bitmap->w;
135 Tmap1.fx_u = fl2f(p->u);
136 Tmap1.fx_v = fl2f(p->v);
137 Tmap1.fx_du_dx = fl2f(dp->u);
138 Tmap1.fx_dv_dx = fl2f(dp->v);
139 Tmap1.fx_u_right = fl2f(rp->u);
140 Tmap1.fx_v_right = fl2f(rp->v);
142 asm_tmap_scanline_lnt();
145 extern void asm_tmap_scanline_lnn();
147 void tmapscan_lnn8( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
149 Tmap1.dest_row_data = GR_SCREEN_PTR(ubyte,lx,y);
150 Tmap1.loop_count = rx - lx;
151 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
152 Tmap1.bp = tmap_bitmap;
153 Tmap1.src_offset = tmap_bitmap->w;
155 Tmap1.fx_u = fl2f(p->u);
156 Tmap1.fx_v = fl2f(p->v);
157 Tmap1.fx_du_dx = fl2f(dp->u);
158 Tmap1.fx_dv_dx = fl2f(dp->v);
159 Tmap1.fx_u_right = fl2f(rp->u);
160 Tmap1.fx_v_right = fl2f(rp->v);
162 asm_tmap_scanline_lnn();
166 void tmapscan_lln8_tiled( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
168 Tmap1.dest_row_data = GR_SCREEN_PTR(ubyte,lx,y);
169 Tmap1.loop_count = rx - lx;
170 Tmap1.fx_u = fl2f(p->u);
171 Tmap1.fx_v = fl2f(p->v);
172 Tmap1.fx_l = fl2f(p->l*32.0);
173 Tmap1.fx_du_dx = fl2f(dp->u);
174 Tmap1.fx_dv_dx = fl2f(dp->v);
175 Tmap1.fx_dl_dx = fl2f(dp->l*32.0);
176 Tmap1.fx_u_right = fl2f(rp->u);
177 Tmap1.fx_v_right = fl2f(rp->v);
178 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
179 Tmap1.bp = tmap_bitmap;
180 Tmap1.src_offset = tmap_bitmap->w;
182 Tmap1.BitmapWidth = tmap_bitmap->w;
183 Tmap1.BitmapHeight = tmap_bitmap->h;
186 // asm_tmap_scanline_lln_tiled();
193 void c_tmap_scanline_per_sub_new();
195 void tmapscan_pln8( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
197 Tmap1.dest_row_data = GR_SCREEN_PTR(ubyte,lx,y);
198 Tmap1.loop_count = rx - lx;
199 Tmap1.fx_l = fl2f(p->l*32.0);
200 Tmap1.fx_dl_dx = fl2f(dp->l*32.0);
204 Tmap1.OneOverZ = p->sw;
206 Tmap1.dUOverZdX8 = dp->u*32.0f;
207 Tmap1.dVOverZdX8 = dp->v*32.0f;
208 Tmap1.dOneOverZdX8 = dp->sw*32.0f;
210 Tmap1.dUOverZdX = dp->u;
211 Tmap1.dVOverZdX = dp->v;
212 Tmap1.dOneOverZdX = dp->sw;
214 Tmap1.RightUOverZ = rp->u;
215 Tmap1.RightVOverZ = rp->v;
216 Tmap1.RightOneOverZ = rp->sw;
218 if ( Tmap1.fx_dl_dx < 0 ) {
219 Tmap1.fx_dl_dx = -Tmap1.fx_dl_dx;
220 Tmap1.fx_l = (67*F1_0)-Tmap1.fx_l;
221 Tmap1.fx_l_right = (67*F1_0)-Tmap1.fx_l_right;
223 // Assert( Tmap1.fx_l > 31*F1_0 );
224 // Assert( Tmap1.fx_l < 66*F1_0 );
225 // Assert( Tmap1.fx_dl_dx >= 0 );
226 // Assert( Tmap1.fx_dl_dx < 31*F1_0 );
238 dldx = Tmap1.fx_dl_dx;
239 dest = Tmap1.dest_row_data;
241 for (x=Tmap1.loop_count; x >= 0; x-- ) {
242 //*dest++ = gr_fade_table[ ((l>>8)&(0xff00)) + 35 ];
262 // put the FPU in 32 bit mode
263 // @todo move this out of here!
265 fstcw Tmap1.OldFPUCW // store copy of CW
266 mov ax,Tmap1.OldFPUCW // get it in ax
268 mov Tmap1.FPUCW,ax // store it
269 fldcw Tmap1.FPUCW // load the FPU
271 mov ecx, Tmap1.loop_count // ecx = width
273 mov edi, Tmap1.dest_row_data // edi = dest pointer
275 // edi = pointer to start pixel in dest dib
278 mov eax,ecx // eax and ecx = width
279 shr ecx,5 // ecx = width / subdivision length
280 and eax,31 // eax = width mod subdivision length
281 jnz some_left_over // any leftover?
283 dec ecx // no, so special case last span
284 mov eax,32 // it's 8 pixels long
286 mov Tmap1.Subdivisions,ecx // store widths
287 mov Tmap1.WidthModLength,eax
289 // mov ebx,pLeft ; get left edge pointer
290 // mov edx,pGradients ; get gradients pointer
292 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
293 // st0 st1 st2 st3 st4 st5 st6 st7
294 fld Tmap1.VOverZ // V/ZL
295 fld Tmap1.UOverZ // U/ZL V/ZL
296 fld Tmap1.OneOverZ // 1/ZL U/ZL V/ZL
297 fld1 // 1 1/ZL U/ZL V/ZL
298 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
299 fld st // ZL ZL 1/ZL U/ZL V/ZL
300 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
301 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
302 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
304 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
305 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
307 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
309 fadd Tmap1.dOneOverZdX8 // 1/ZR U/ZL V/ZL UL VL
310 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
311 fadd Tmap1.dUOverZdX8 // U/ZR 1/ZR V/ZL UL VL
312 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
313 fadd Tmap1.dVOverZdX8 // V/ZR 1/ZR U/ZR UL VL
315 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
317 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
318 // @todo overlap this guy
319 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
320 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
321 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
322 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
323 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
325 cmp ecx,0 // check for any full spans
326 jle HandleLeftoverPixels
330 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
331 // UR VR V/ZR 1/ZR U/ZR UL VL
333 // convert left side coords
335 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
336 fmul Tmap1.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
337 fistp Tmap1.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
339 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
340 fmul Tmap1.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
341 fistp Tmap1.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
343 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
345 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
346 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
347 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
348 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
350 fmul Tmap1.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
351 fistp Tmap1.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
353 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
354 fmul Tmap1.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
355 fistp Tmap1.DeltaU ; V/ZR 1/ZR U/ZR UR VR
357 // increment terms for next span ; st0 st1 st2 st3 st4 st5 st6 st7
358 // Right terms become Left terms---->; V/ZL 1/ZL U/ZL UL VL
360 fadd Tmap1.dVOverZdX8 ; V/ZR 1/ZL U/ZL UL VL
361 fxch st(1) ; 1/ZL V/ZR U/ZL UL VL
362 fadd Tmap1.dOneOverZdX8 ; 1/ZR V/ZR U/ZL UL VL
363 fxch st(2) ; U/ZL V/ZR 1/ZR UL VL
364 fadd Tmap1.dUOverZdX8 ; U/ZR V/ZR 1/ZR UL VL
365 fxch st(2) ; 1/ZR V/ZR U/ZR UL VL
366 fxch st(1) ; V/ZR 1/ZR U/ZR UL VL
369 ; set up affine registers
373 mov eax,Tmap1.DeltaV ; get v 16.16 step
374 mov ebx,eax ; copy it
375 sar eax,16 ; get v int step
376 shl ebx,16 ; get v frac step
377 mov Tmap1.DeltaVFrac,ebx ; store it
378 imul eax,Tmap1.src_offset ; calculate texture step for v int step
380 mov ebx,Tmap1.DeltaU ; get u 16.16 step
381 mov ecx,ebx ; copy it
382 sar ebx,16 ; get u int step
383 shl ecx,16 ; get u frac step
384 mov Tmap1.DeltaUFrac,ecx ; store it
385 add eax,ebx ; calculate uint + vint step
386 mov Tmap1.UVintVfracStepVNoCarry,eax; save whole step in non-v-carry slot
387 add eax,Tmap1.src_offset ; calculate whole step + v carry
388 mov Tmap1.UVintVfracStepVCarry,eax ; save in v-carry slot
390 ; setup initial coordinates
391 mov esi,Tmap1.UFixed ; get u 16.16 fixedpoint coordinate
393 mov ebx,esi ; copy it
394 sar esi,16 ; get integer part
395 shl ebx,16 ; get fractional part
397 mov ecx,Tmap1.VFixed ; get v 16.16 fixedpoint coordinate
399 mov edx,ecx ; copy it
400 sar edx,16 ; get integer part
401 shl ecx,16 ; get fractional part
402 imul edx,Tmap1.src_offset ; calc texture scanline address
403 add esi,edx ; calc texture offset
404 add esi,Tmap1.pixptr ; calc address
406 mov edx,Tmap1.DeltaUFrac ; get register copy
412 mov ebp, Tmap1.fx_dl_dx
424 ; calculate right side coords ; st0 st1 st2 st3 st4 st5 st6 st7
426 fld1 ; 1 V/ZR 1/ZR U/ZR UL VL
427 fdiv st,st(2) ; ZR V/ZR 1/ZR U/ZR UL VL
431 // edi = dest dib bits at current pixel
432 // esi = texture pointer at current u,v
434 // ebx = u fraction 0.32
435 // ecx = v fraction 0.32
437 // ebp = v carry scratch
439 mov al,[edi] // preread the destination cache line
442 mov al,[esi] // get texture pixel 0
444 mov al, gr_fade_table[eax]
446 add ecx,Tmap1.DeltaVFrac // increment v fraction
447 sbb ebp,ebp // get -1 if carry
448 add ebx,edx // increment u fraction
450 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
451 add ecx,Tmap1.DeltaVFrac // increment v fraction
453 sbb ebp,ebp // get -1 if carry
454 // mov al, 0 // Uncomment this line to show divisions
455 mov [edi+0],al // store pixel 0
457 add ebx,edx // increment u fraction
458 mov al,[esi] // get texture pixel 1
460 mov al, gr_fade_table[eax]
462 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
463 add ecx,Tmap1.DeltaVFrac // increment v fraction
465 sbb ebp,ebp // get -1 if carry
466 mov [edi+1],al // store pixel 1
468 add ebx,edx // increment u fraction
469 mov al,[esi] // get texture pixel 2
471 mov al, gr_fade_table[eax]
473 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
474 add ecx,Tmap1.DeltaVFrac // increment v fraction
476 sbb ebp,ebp // get -1 if carry
477 mov [edi+2],al // store pixel 2
479 add ebx,edx // increment u fraction
480 mov al,[esi] // get texture pixel 3
482 mov al, gr_fade_table[eax]
484 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
485 add ecx,Tmap1.DeltaVFrac // increment v fraction
487 sbb ebp,ebp // get -1 if carry
488 mov [edi+3],al // store pixel 3
490 add ebx,edx // increment u fraction
491 mov al,[esi] // get texture pixel 4
493 mov al, gr_fade_table[eax]
494 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
495 add ecx,Tmap1.DeltaVFrac // increment v fraction
497 sbb ebp,ebp // get -1 if carry
498 mov [edi+4],al // store pixel 3
500 add ebx,edx // increment u fraction
501 mov al,[esi] // get texture pixel 4
503 mov al, gr_fade_table[eax]
504 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
505 add ecx,Tmap1.DeltaVFrac // increment v fraction
507 sbb ebp,ebp // get -1 if carry
508 mov [edi+5],al // store pixel 3
510 add ebx,edx // increment u fraction
511 mov al,[esi] // get texture pixel 4
513 mov al, gr_fade_table[eax]
514 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
515 add ecx,Tmap1.DeltaVFrac // increment v fraction
517 sbb ebp,ebp // get -1 if carry
518 mov [edi+6],al // store pixel 3
520 add ebx,edx // increment u fraction
521 mov al,[esi] // get texture pixel 4
523 mov al, gr_fade_table[eax]
524 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
525 add ecx,Tmap1.DeltaVFrac // increment v fraction
527 sbb ebp,ebp // get -1 if carry
528 mov [edi+7],al // store pixel 3
530 add ebx,edx // increment u fraction
531 mov al,[esi] // get texture pixel 4
533 mov al, gr_fade_table[eax]
534 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
535 add ecx,Tmap1.DeltaVFrac // increment v fraction
537 sbb ebp,ebp // get -1 if carry
538 mov [edi+8],al // store pixel 3
540 add ebx,edx // increment u fraction
541 mov al,[esi] // get texture pixel 4
543 mov al, gr_fade_table[eax]
544 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
545 add ecx,Tmap1.DeltaVFrac // increment v fraction
547 sbb ebp,ebp // get -1 if carry
548 mov [edi+9],al // store pixel 3
550 add ebx,edx // increment u fraction
551 mov al,[esi] // get texture pixel 4
553 mov al, gr_fade_table[eax]
554 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
555 add ecx,Tmap1.DeltaVFrac // increment v fraction
557 sbb ebp,ebp // get -1 if carry
558 mov [edi+10],al // store pixel 3
560 add ebx,edx // increment u fraction
561 mov al,[esi] // get texture pixel 4
563 mov al, gr_fade_table[eax]
566 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
567 add ecx,Tmap1.DeltaVFrac // increment v fraction
569 sbb ebp,ebp // get -1 if carry
570 mov [edi+11],al // store pixel 3
572 add ebx,edx // increment u fraction
573 mov al,[esi] // get texture pixel 4
575 mov al, gr_fade_table[eax]
578 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
579 add ecx,Tmap1.DeltaVFrac // increment v fraction
581 sbb ebp,ebp // get -1 if carry
582 mov [edi+12],al // store pixel 3
584 add ebx,edx // increment u fraction
585 mov al,[esi] // get texture pixel 4
587 mov al, gr_fade_table[eax]
590 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
591 add ecx,Tmap1.DeltaVFrac // increment v fraction
593 sbb ebp,ebp // get -1 if carry
594 mov [edi+13],al // store pixel 3
596 add ebx,edx // increment u fraction
597 mov al,[esi] // get texture pixel 4
599 mov al, gr_fade_table[eax]
602 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
603 add ecx,Tmap1.DeltaVFrac // increment v fraction
605 sbb ebp,ebp // get -1 if carry
606 mov [edi+14],al // store pixel 3
608 add ebx,edx // increment u fraction
609 mov al,[esi] // get texture pixel 4
611 mov al, gr_fade_table[eax]
614 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
615 add ecx,Tmap1.DeltaVFrac // increment v fraction
617 sbb ebp,ebp // get -1 if carry
618 mov [edi+15],al // store pixel 3
620 add ebx,edx // increment u fraction
621 mov al,[esi] // get texture pixel 4
623 mov al, gr_fade_table[eax]
626 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
627 add ecx,Tmap1.DeltaVFrac // increment v fraction
629 sbb ebp,ebp // get -1 if carry
630 mov [edi+16],al // store pixel 3
632 add ebx,edx // increment u fraction
633 mov al,[esi] // get texture pixel 4
635 mov al, gr_fade_table[eax]
638 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
639 add ecx,Tmap1.DeltaVFrac // increment v fraction
641 sbb ebp,ebp // get -1 if carry
642 mov [edi+17],al // store pixel 3
644 add ebx,edx // increment u fraction
645 mov al,[esi] // get texture pixel 4
647 mov al, gr_fade_table[eax]
650 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
651 add ecx,Tmap1.DeltaVFrac // increment v fraction
653 sbb ebp,ebp // get -1 if carry
654 mov [edi+18],al // store pixel 3
656 add ebx,edx // increment u fraction
657 mov al,[esi] // get texture pixel 4
659 mov al, gr_fade_table[eax]
662 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
663 add ecx,Tmap1.DeltaVFrac // increment v fraction
665 sbb ebp,ebp // get -1 if carry
666 mov [edi+19],al // store pixel 3
668 add ebx,edx // increment u fraction
669 mov al,[esi] // get texture pixel 4
671 mov al, gr_fade_table[eax]
674 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
675 add ecx,Tmap1.DeltaVFrac // increment v fraction
677 sbb ebp,ebp // get -1 if carry
678 mov [edi+20],al // store pixel 3
680 add ebx,edx // increment u fraction
681 mov al,[esi] // get texture pixel 4
683 mov al, gr_fade_table[eax]
686 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
687 add ecx,Tmap1.DeltaVFrac // increment v fraction
689 sbb ebp,ebp // get -1 if carry
690 mov [edi+21],al // store pixel 3
692 add ebx,edx // increment u fraction
693 mov al,[esi] // get texture pixel 4
695 mov al, gr_fade_table[eax]
698 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
699 add ecx,Tmap1.DeltaVFrac // increment v fraction
701 sbb ebp,ebp // get -1 if carry
702 mov [edi+22],al // store pixel 3
704 add ebx,edx // increment u fraction
705 mov al,[esi] // get texture pixel 4
707 mov al, gr_fade_table[eax]
710 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
711 add ecx,Tmap1.DeltaVFrac // increment v fraction
713 sbb ebp,ebp // get -1 if carry
714 mov [edi+23],al // store pixel 3
716 add ebx,edx // increment u fraction
717 mov al,[esi] // get texture pixel 4
719 mov al, gr_fade_table[eax]
722 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
723 add ecx,Tmap1.DeltaVFrac // increment v fraction
725 sbb ebp,ebp // get -1 if carry
726 mov [edi+24],al // store pixel 3
728 add ebx,edx // increment u fraction
729 mov al,[esi] // get texture pixel 4
731 mov al, gr_fade_table[eax]
734 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
735 add ecx,Tmap1.DeltaVFrac // increment v fraction
737 sbb ebp,ebp // get -1 if carry
738 mov [edi+25],al // store pixel 3
740 add ebx,edx // increment u fraction
741 mov al,[esi] // get texture pixel 4
743 mov al, gr_fade_table[eax]
746 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
747 add ecx,Tmap1.DeltaVFrac // increment v fraction
751 sbb ebp,ebp // get -1 if carry
752 mov [edi+26],al // store pixel 3
754 add ebx,edx // increment u fraction
755 mov al,[esi] // get texture pixel 4
757 mov al, gr_fade_table[eax]
760 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
761 add ecx,Tmap1.DeltaVFrac // increment v fraction
763 sbb ebp,ebp // get -1 if carry
764 mov [edi+27],al // store pixel 3
766 add ebx,edx // increment u fraction
767 mov al,[esi] // get texture pixel 4
769 mov al, gr_fade_table[eax]
771 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
772 add ecx,Tmap1.DeltaVFrac // increment v fraction
774 sbb ebp,ebp // get -1 if carry
775 mov [edi+28],al // store pixel 4
777 add ebx,edx // increment u fraction
778 mov al,[esi] // get texture pixel 5
780 mov al, gr_fade_table[eax]
782 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
783 add ecx,Tmap1.DeltaVFrac // increment v fraction
785 sbb ebp,ebp // get -1 if carry
786 mov [edi+29],al // store pixel 5
788 add ebx,edx // increment u fraction
789 mov al,[esi] // get texture pixel 6
791 mov al, gr_fade_table[eax]
793 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
794 add ecx,Tmap1.DeltaVFrac // increment v fraction
796 sbb ebp,ebp // get -1 if carry
797 mov [edi+30],al // store pixel 6
799 add ebx,edx // increment u fraction
801 mov al,[esi] // get texture pixel 7
803 mov al, gr_fade_table[eax]
805 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
807 mov [edi+31],al // store pixel 7
811 ; ************** Okay to Access Stack Frame ****************
812 ; ************** Okay to Access Stack Frame ****************
813 ; ************** Okay to Access Stack Frame ****************
816 ; the fdiv is done, finish right ; st0 st1 st2 st3 st4 st5 st6 st7
817 ; ZR V/ZR 1/ZR U/ZR UL VL
819 fld st ; ZR ZR V/ZR 1/ZR U/ZR UL VL
820 fmul st,st(2) ; VR ZR V/ZR 1/ZR U/ZR UL VL
821 fxch st(1) ; ZR VR V/ZR 1/ZR U/ZR UL VL
822 fmul st,st(4) ; UR VR V/ZR 1/ZR U/ZR UL VL
824 add edi,32 ; increment to next span
825 dec Tmap1.Subdivisions ; decrement span count
826 jnz SpanLoop ; loop back
828 // save new lighting values
831 // mov Tmap1.fx_l, eax
835 // mov Tmap1.fx_dl_dx, eax
837 HandleLeftoverPixels:
840 mov esi,Tmap1.pixptr ; load texture pointer
842 ; edi = dest dib bits
843 ; esi = current texture dib bits
844 ; at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
845 ; inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
847 cmp Tmap1.WidthModLength,0 ; are there remaining pixels to draw?
848 jz FPUReturn ; nope, pop the FPU and bail
850 ; convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
852 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
853 fmul Tmap1.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
854 fistp Tmap1.UFixed ; inv. inv. inv. inv. inv. UL VL
856 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
857 fmul Tmap1.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
858 fistp Tmap1.VFixed ; inv. inv. inv. inv. inv. UL VL
860 dec Tmap1.WidthModLength ; calc how many steps to take
861 jz OnePixelSpan ; just one, don't do deltas
863 ; calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
866 ; @todo rearrange things so we don't need these two instructions
867 fstp Tmap1.FloatTemp ; inv. inv. inv. inv. UL VL
868 fstp Tmap1.FloatTemp ; inv. inv. inv. UL VL
870 fld Tmap1.RightVOverZ ; V/Zr inv. inv. inv. UL VL
871 fsub Tmap1.dVOverZdX ; V/ZR inv. inv. inv. UL VL
872 fld Tmap1.RightUOverZ ; U/Zr V/ZR inv. inv. inv. UL VL
873 fsub Tmap1.dUOverZdX ; U/ZR V/ZR inv. inv. inv. UL VL
874 fld Tmap1.RightOneOverZ ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
875 fsub Tmap1.dOneOverZdX ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
877 fdivr Tmap1.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
879 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
880 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
882 ; calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
884 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
885 fxch st(1) ; VR UR inv. inv. inv. dU VL
886 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
887 fxch st(6) ; dV UR inv. inv. inv. dU VR
889 fidiv Tmap1.WidthModLength ; dv UR inv. inv. inv. dU VR
890 fmul Tmap1.FixedScale ; dv16 UR inv. inv. inv. dU VR
891 fistp Tmap1.DeltaV ; UR inv. inv. inv. dU VR
893 fxch st(4) ; dU inv. inv. inv. UR VR
894 fidiv Tmap1.WidthModLength ; du inv. inv. inv. UR VR
895 fmul Tmap1.FixedScale ; du16 inv. inv. inv. UR VR
896 fistp Tmap1.DeltaU ; inv. inv. inv. UR VR
898 ; @todo gross! these are to line up with the other loop
899 fld st(1) ; inv. inv. inv. inv. UR VR
900 fld st(2) ; inv. inv. inv. inv. inv. UR VR
906 mov eax, Tmap1.DeltaV // get v 16.16 step
907 mov ebx, eax // copy it
908 sar eax, 16 // get v int step
909 shl ebx, 16 // get v frac step
910 mov Tmap1.DeltaVFrac, ebx // store it
911 imul eax, Tmap1.src_offset // calc texture step for v int step
913 mov ebx, Tmap1.DeltaU // get u 16.16 step
914 mov ecx, ebx // copy it
915 sar ebx, 16 // get the u int step
916 shl ecx, 16 // get the u frac step
917 mov Tmap1.DeltaUFrac, ecx // store it
918 add eax, ebx // calc uint + vint step
919 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
920 add eax, Tmap1.src_offset // calc whole step + v carry
921 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
928 ; check coordinate ranges
929 mov eax, Tmap1.UFixed
930 cmp eax, Tmap1.MinUFixed
932 mov eax, Tmap1.MinUFixed
933 mov Tmap1.UFixed, eax
936 cmp eax, Tmap1.MaxUFixed
938 mov eax, Tmap1.MaxUFixed
939 mov Tmap1.UFixed, eax
941 mov eax, Tmap1.VFixed
942 cmp eax, Tmap1.MinVFixed
944 mov eax, Tmap1.MinVFixed
945 mov Tmap1.VFixed, eax
948 cmp eax, Tmap1.MaxVFixed
950 mov eax, Tmap1.MaxVFixed
951 mov Tmap1.VFixed, eax
958 ; setup initial coordinates
959 mov esi, Tmap1.UFixed // get u 16.16
960 mov ebx, esi // copy it
961 sar esi, 16 // get integer part
962 shl ebx, 16 // get fractional part
964 mov ecx, Tmap1.VFixed // get v 16.16
965 mov edx, ecx // copy it
966 sar edx, 16 // get integer part
967 shl ecx, 16 // get fractional part
968 imul edx, Tmap1.src_offset // calc texture scanline address
969 add esi, edx // calc texture offset
970 add esi, Tmap1.pixptr // calc address
972 ; set edi = address of first pixel to modify
973 ; mov edi, Tmap1.dest_row_data
982 mov edx, Tmap1.DeltaUFrac
984 cmp Tmap1.WidthModLength, 1
989 mov ebx, Tmap1.fx_l_right
996 // slow but maybe better
999 mov ebx, Tmap1.WidthModLength
1004 mov eax, Tmap1.fx_dl_dx
1014 inc Tmap1.WidthModLength
1015 mov eax,Tmap1.WidthModLength
1019 mov Tmap1.WidthModLength, eax
1023 mov al,[edi] // preread the destination cache line
1026 mov al,[esi] // get texture pixel 0
1028 mov al, gr_fade_table[eax]
1030 add ecx,Tmap1.DeltaVFrac // increment v fraction
1031 sbb ebp,ebp // get -1 if carry
1032 add ebx,edx // increment u fraction
1033 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
1034 mov [edi+0],al // store pixel 0
1036 add ecx,Tmap1.DeltaVFrac // increment v fraction
1037 sbb ebp,ebp // get -1 if carry
1038 add ebx,edx // increment u fraction
1039 mov al,[esi] // get texture pixel 1
1041 mov al, gr_fade_table[eax]
1043 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
1044 mov [edi+1],al // store pixel 1
1047 dec Tmap1.WidthModLength
1055 mov al,[esi] // get texture pixel 2
1057 mov al, gr_fade_table[eax]
1058 mov [edi],al // store pixel 2
1073 OldWay: // This is 6% slower than above
1075 mov ebx,Tmap1.UFixed ; get starting coordinates
1076 mov ecx,Tmap1.VFixed ; for span
1078 ; leftover pixels loop
1079 ; edi = dest dib bits
1080 ; esi = texture dib bits
1086 mov eax,ecx ; copy v
1088 imul eax,Tmap1.src_offset ; scan offset
1089 mov edx,ebx ; copy u
1091 add eax,edx ; texture offset
1092 mov al,[esi+eax] ; get source pixel
1094 mov [edi],al ; store it
1096 add ebx,Tmap1.DeltaU ; increment u coordinate
1097 add ecx,Tmap1.DeltaV ; increment v coordinate
1099 dec Tmap1.WidthModLength ; decrement loop count
1100 jl FPUReturn ; finish up
1104 mov eax,ecx ; copy v
1106 imul eax,Tmap1.src_offset ; scan offset
1107 mov edx,ebx ; copy u
1109 add eax,edx ; texture offset
1110 mov al,[esi+eax] ; get source pixel
1111 mov [edi],al ; store it
1113 add ebx,Tmap1.DeltaU ; increment u coordinate
1114 add ecx,Tmap1.DeltaV ; increment v coordinate
1116 dec Tmap1.WidthModLength ; decrement loop count
1117 jge LeftoverLoop ; finish up
1122 ; busy FPU registers: ; st0 st1 st2 st3 st4 st5 st6 st7
1123 ; xxx xxx xxx xxx xxx xxx xxx
1132 fldcw Tmap1.OldFPUCW // restore the FPU
1147 void tmapscan_lln8_old( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
1149 _fx_srcptr = (uint)tmap_bitmap->data;
1150 _fx_destptr = (uint)GR_SCREEN_PTR(ubyte,lx,y);
1151 _loop_count = rx - lx;
1152 _fx_u = fl2f(p->u*64.0f);
1153 _fx_v = fl2f(p->v*64.0f);
1154 _fx_l = fl2f(p->l*32.0+1.0);
1155 _fx_du = fl2f(dp->u*64.0f);
1156 _fx_dv = fl2f(dp->v*64.0f);
1157 _fx_dl = fl2f(dp->l*32.0);
1158 light_table = (uint)&gr_fade_table[0];
1169 ; set edi = address of first pixel to modify
1170 mov edi, _fx_destptr
1177 mov dx, ax ; EDX=U:V in 6.10 format
1183 mov si, ax ; ESI=DU:DV in 6.10 format
1192 mov eax, _loop_count
1194 mov _loop_count, eax
1199 mov num_big_steps, eax
1212 mov al, gr_fade_table[eax]
1224 mov al, gr_fade_table[eax]
1236 mov al, gr_fade_table[eax]
1248 mov al, gr_fade_table[eax]
1260 mov al, gr_fade_table[eax]
1272 mov al, gr_fade_table[eax]
1284 mov al, gr_fade_table[eax]
1296 mov al, gr_fade_table[eax]
1310 mov _loop_count, eax
1323 mov al, gr_fade_table[eax]
1334 mov al, gr_fade_table[eax]
1352 mov al, gr_fade_table[eax]
1369 void tmapscan_flat16( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
1374 pDestBits = GR_SCREEN_PTR(ushort,lx,y);
1376 for (i=0; i<(rx-lx+1); i++ )
1377 *pDestBits++ = gr_screen.current_color.raw16;
1380 float tmap_max_z = 0.0f;
1382 void tmapscan_lln8_z( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
1385 ubyte *pDestBits, tmp;
1386 float u, dudx, v, dvdx, l, dldx;
1389 pDestBits = GR_SCREEN_PTR(ubyte,lx,y);
1391 ubyte * cdata = (ubyte *)tmap_bitmap->data;
1402 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1403 if ( z < tmap_max_z ) {
1404 tmp = cdata[fl2i(v)*tmap_bitmap->w+fl2i(u)];
1405 *pDestBits = gr_fade_table[ fl2i(l)*256+tmp ];
1416 void tmapscan_generic8( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
1419 ubyte *pDestBits, tmp;
1420 int u, dudx, v, dvdx, w, dwdx, l, dldx;
1422 pDestBits = GR_SCREEN_PTR(ubyte,lx,y);
1424 if ( Tmap1.flags & TMAP_FLAG_TEXTURED ) {
1425 ubyte * cdata = (ubyte *)tmap_bitmap->data;
1426 if ( flags & TMAP_FLAG_RAMP ) {
1427 if ( Tmap1.flags & TMAP_FLAG_CORRECT ) {
1428 float fu, fv, fw, fdu, fdv, fdw;
1430 tmapscan_pln8( lx, rx, y, p, dp, rp,Tmap1.flags );
1437 l = fl2f(p->l*32.0f);
1442 dldx = fl2f(dp->l*32.0f);
1444 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1447 tmp = cdata[v*tmap_bitmap->w+u];
1448 *pDestBits++ = tmp; //gr_fade_table[ (l>>16)*256+tmp ];
1450 //*pDestBits++ = tmp+1;
1459 tmapscan_lln8( lx, rx, y, p, dp, rp, flags );
1461 u = fl2f(p->u*64.0f);
1462 v = fl2f(p->v*64.0f);
1463 l = fl2f(p->l*32.0f);
1464 dudx = fl2f(dp->u*64.0f);
1465 dvdx = fl2f(dp->v*64.0f);
1466 dldx = fl2f(dp->l*32.0f);
1468 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1470 //tmp = cdata[((v>>16)&63)*64+((u>>16)&63)];
1471 //*pDestBits++ = ;//gr_fade_table[ (l>>16)*256+tmp ];
1481 if ( flags & TMAP_FLAG_CORRECT ) {
1482 u = fl2f(p->u*64.0f);
1483 v = fl2f(p->v*64.0f);
1484 w = fl2f(p->sw*16.0f);
1486 dudx = fl2f(dp->u*64.0f);
1487 dvdx = fl2f(dp->v*64.0f);
1488 dwdx = fl2f(dp->sw*16.0f);
1490 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1491 tmp = cdata[((v/w)&63)*64+((u/w)&63)];
1498 u = fl2f(p->u*64.0f);
1499 v = fl2f(p->v*64.0f);
1500 dudx = fl2f(dp->u*64.0f);
1501 dvdx = fl2f(dp->v*64.0f);
1503 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1504 tmp = cdata[((v>>16)&63)*64+((u>>16)&63)];
1512 if ( Tmap1.flags & TMAP_FLAG_RAMP ) {
1513 l = fl2f(p->l*32.0f);
1514 dldx = fl2f(dp->l*32.0f);
1516 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1517 *pDestBits++ = gr_fade_table[ (l>>16)*256+gr_screen.current_color.raw8 ];
1521 memset( pDestBits, gr_screen.current_color.raw8, (rx-lx+1) );
1527 uint fsave_area[64];
1529 unsigned __int64 packrgb( int r, int g, int b )
1531 unsigned __int64 tmp;
1536 tmps = (unsigned int *)&r;
1537 tmp |= *tmps & 0xFFFF;
1540 tmps = (unsigned int *)&g;
1541 tmp |= *tmps & 0xFFFF;
1544 tmps = (unsigned int *)&b;
1545 tmp |= *tmps & 0xFFFF;
1552 void tmapscan_generic( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
1555 uint *pDestBits, tmp, tmp1;
1556 int u, dudx, v, dvdx, w, dwdx;
1557 int r, g, b, dr, dg, db;
1559 if ( !rgbtable_inited )
1562 pDestBits = GR_SCREEN_PTR(uint,lx,y);
1564 if ( Tmap1.flags & TMAP_FLAG_TEXTURED ) {
1565 uint * cdata = (uint *)tmap_bitmap->data;
1567 if ( Tmap1.flags & TMAP_FLAG_GOURAUD ) {
1568 if ( Tmap1.flags & TMAP_FLAG_CORRECT ) {
1569 u = fl2f(p->u*64.0f);
1570 v = fl2f(p->v*64.0f);
1573 r = fl2f(p->r*255.0f);
1574 g = fl2f(p->g*255.0f);
1575 b = fl2f(p->b*255.0f);
1577 dr = fl2f(dp->r*255.0f);
1578 dg = fl2f(dp->g*255.0f);
1579 db = fl2f(dp->b*255.0f);
1581 dudx = fl2f(dp->u*64.0f);
1582 dvdx = fl2f(dp->v*64.0f);
1583 dwdx = fl2f(dp->sw);
1585 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1586 tmp = cdata[((v/w)&63)*64+((u/w)&63)];
1587 tmp1 = rgbtable1[ (tmp & 0xFF)+ (b>>16) ];
1588 tmp1 |= rgbtable2[ ((tmp>>8) & 0xFF)+ (g>>16) ];
1589 tmp1 |= rgbtable3[ ((tmp>>16) & 0xFF)+ (r>>16) ];
1590 *pDestBits++ = tmp1;
1600 __int64 light, deltalight;
1602 u = fl2f(p->u*64.0f);
1603 v = fl2f(p->v*64.0f);
1604 dudx = fl2f(dp->u*64.0f);
1605 dvdx = fl2f(dp->v*64.0f);
1608 r = fl2f(p->r*255.0f)>>8;
1609 g = fl2f(p->g*255.0f)>>8;
1610 b = fl2f(p->b*255.0f)>>8;
1612 dr = fl2f(dp->r*255.0f)>>8;
1613 dg = fl2f(dp->g*255.0f)>>8;
1614 db = fl2f(dp->b*255.0f)>>8;
1620 dr = fl2f(dp->r)>>7;
1621 dg = fl2f(dp->g)>>7;
1622 db = fl2f(dp->b)>>7;
1630 light = packrgb( r, g, b );
1631 deltalight = packrgb( dr, dg, db );
1633 _asm fstenv fsave_area
1634 _asm movq mm3, light
1635 _asm movq mm4, deltalight
1636 _asm pxor mm2, mm2 ; mm0 = 0
1638 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1639 testpixel = cdata[((v>>16)&63)*64+((u>>16)&63)];
1641 _asm punpcklbw mm2, testpixel ; mm0 = 8.8,8.8, 8.8 rgb
1642 _asm pmulhw mm2, mm3 ;
1643 _asm paddsw mm3, mm4 ; light += deltalight
1644 _asm packuswb mm2, mm2 ;mm2 is who cares
1645 _asm movd testpixel, mm2 ; load tmp
1646 _asm pxor mm2, mm2 ; mm0 = 0
1648 *pDestBits++ = testpixel;
1653 _asm frstor fsave_area
1656 if ( Tmap1.flags & TMAP_FLAG_CORRECT ) {
1657 u = fl2f(p->u*64.0f);
1658 v = fl2f(p->v*64.0f);
1660 dudx = fl2f(dp->u*64.0f);
1661 dvdx = fl2f(dp->v*64.0f);
1662 dwdx = fl2f(dp->sw);
1664 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1665 *pDestBits++ = cdata[((v/w)&63)*64+((u/w)&63)];
1671 u = fl2f(p->u*64.0f);
1672 v = fl2f(p->v*64.0f);
1673 dudx = fl2f(dp->u*64.0f);
1674 dvdx = fl2f(dp->v*64.0f);
1676 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1677 *pDestBits++ = cdata[((v>>16)&63)*64+((u>>16)&63)];
1683 } else if ( Tmap1.flags & TMAP_FLAG_GOURAUD ) {
1685 r = fl2f(p->r*255.0f);
1686 g = fl2f(p->g*255.0f);
1687 b = fl2f(p->b*255.0f);
1689 dr = fl2f(dp->r*255.0f);
1690 dg = fl2f(dp->g*255.0f);
1691 db = fl2f(dp->b*255.0f);
1693 for ( count = rx - lx + 1 ; count > 0; count-- ) {
1694 *pDestBits++ = (r&0xFF0000)|((g>>8)&0xFF00)|(b>>16);
1698 //*pDestBits++ = 100;
1701 memset( pDestBits, gr_screen.current_color.raw32, (rx-lx+1)*4 );
1705 void tmapscan_flat( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
1710 pDestBits = GR_SCREEN_PTR(uint,lx,y);
1712 #ifdef USE_INLINE_ASM
1713 _asm mov eax, gr_screen.current_color.raw32
1715 _asm mov edi, pDestBits
1719 for (i=0; i<w; i++ ) {
1720 *pDestBits++ = gr_screen.current_color.raw32;
1725 float zbuffer[640*480];
1727 void zbuffer_clear()
1730 for (i=0; i<640*480; i++ )
1731 zbuffer[i] = 10000.0f;
1734 void tmapscan_flat_z( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
1741 tz = &zbuffer[y*640+lx];
1742 pDestBits = GR_SCREEN_PTR(uint,lx,y);
1746 //#ifdef USE_INLINE_ASM
1748 _asm mov eax, gr_screen.current_color.raw32
1750 _asm mov edi, pDestBits
1755 for (i=0; i<w; i++ ) {
1758 *pDestBits = gr_screen.current_color.raw32;
1772 uint fsave_area1[64];
1774 void tmapscan_pln( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
1776 __int64 light, deltalight;
1777 int r, g, b, dr, dg, db;
1778 _fx_srcptr = (uint)tmap_bitmap->data;
1779 _fx_destptr = (uint)GR_SCREEN_PTR(uint,lx,y);
1780 _loop_count = rx - lx;
1781 _fx_u = fl2f(p->u*64.0f);
1782 _fx_v = fl2f(p->v*64.0f);
1783 _fx_w = fl2f(p->sw*16.0);
1784 _fx_du = fl2f(dp->u*64.0f);
1785 _fx_dv = fl2f(dp->v*64.0f);
1786 _fx_dw = fl2f(dp->sw*16.0);
1788 _fx_u_right = fl2f(rp->u*64.0f);
1789 _fx_v_right = fl2f(rp->v*64.0f);
1790 _fx_w_right = fl2f(rp->sw*16.0);
1796 dr = fl2f(dp->r)>>7;
1797 dg = fl2f(dp->g)>>7;
1798 db = fl2f(dp->b)>>7;
1800 light = ((__int64)r<<32)|((__int64)g<<16)|(__int64)b;
1801 deltalight = ((__int64)dr<<32)|((__int64)dg<<16)|(__int64)db;
1803 _asm fstenv fsave_area1
1804 _asm movq mm3, light
1805 _asm movq mm4, deltalight
1821 ; compute initial v coordinate
1826 idiv ecx ; eax = (v/z)
1829 ; compute initial u coordinate
1834 idiv ecx ; eax = (v/z)
1839 ; find number of subdivisions
1840 mov eax, _loop_count
1845 mov num_left_over, esi
1846 jz DoEndPixels ;there are no 2^NBITS chunks, do divide/pixel for whole scanline
1847 mov _loop_count, eax
1849 ; Set deltas to NPIXS pixel increments
1866 ; Done with ebx, ebp, ecx until next iteration
1878 idiv ecx ; eax = (v/z)
1879 mov ebx, eax ; ebx = U1 until pop's
1886 idiv ecx ; eax = (v/z)
1887 mov ebp, eax ; ebx = V1 until pop's
1889 ; Get last correct U,Vs
1890 mov ecx, U0 ; ecx = U0 until pop's
1891 mov edi, V0 ; edi = V0 until pop's
1893 ; Make ESI = V0:U0 in 6:10,6:10 format
1900 ; Make EDX = DV:DU in 6:10,6:10 format
1906 shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac
1907 mov dx, ax ; put delta u in low word
1909 ; Save the U1 and V1 so we don't have to divide on the next iteration
1913 pop edi ; Restore EDI before using it
1924 movd mm1, [eax*4+ecx]
1925 pxor mm2, mm2 ; mm2 = 0
1926 punpcklbw mm2, mm1 ; mm0 = 8.8,8.8, 8.8 rgb
1928 paddsw mm3, mm4 ; light += deltalight
1929 packuswb mm2, mm2 ;mm2 is who cares
1930 movd [edi], mm2 ; load tmp
1943 test num_left_over, -1
1946 cmp num_left_over, 4
1949 ; If less than 4, then just keep interpolating without
1950 ; calculating a new DU:DV.
1954 ; ----------------------------------------- Start of LeftOver Pixels ------------------------------------------
1958 mov ecx, _fx_w_right
1961 mov eax, _fx_u_right
1965 idiv ecx ; eax = (v/z)
1966 mov ebx, eax ; ebx = U1 until pop's
1969 mov eax, _fx_v_right
1973 idiv ecx ; eax = (v/z)
1974 mov ebp, eax ; ebp = V1 until pop's
1976 mov ecx, U0 ; ecx = U0 until pop's
1977 mov edi, V0 ; edi = V0 until pop's
1979 ; Make EDX = DV:DU in 6:10,6:10 format
1982 mov edx, eax ; These two lines are faster than cdq
1984 idiv num_left_over ; eax = (v1-v0)/num_left_over
1985 shl eax, 16-6 ; go from 16.16 to 6.10, and move into high 16 bits
1986 mov esi, eax ; esi = dvdx<<16
1990 mov edx, eax ; These two lines are faster than cdq
1992 idiv num_left_over ; eax = (u1-u0)/num_left_over
1993 sar eax, 6 ; go from 16.16 to 6.10 (ax=dvdx in 6.10)
1994 mov si, ax ; esi = dvdx:dudx
1997 ; Make ESI = V0:U0 in 6:10,6:10 format
2004 pop edi ; Restore EDI before using it
2015 ; mov eax, [eax*4+ecx]
2017 movd mm1, [eax*4+ecx]
2018 pxor mm2, mm2 ; mm2 = 0
2019 punpcklbw mm2, mm1 ; mm0 = 8.8,8.8, 8.8 rgb
2021 paddsw mm3, mm4 ; light += deltalight
2022 packuswb mm2, mm2 ;mm2 is who cares
2023 movd [edi], mm2 ; load tmp
2041 _asm frstor fsave_area1
2045 void tmapscan_lln( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
2047 __int64 light, deltalight;
2048 int r, g, b, dr, dg, db;
2049 _fx_srcptr = (uint)tmap_bitmap->data;
2050 _fx_destptr = (uint)GR_SCREEN_PTR(uint,lx,y);
2051 _loop_count = rx - lx;
2052 _fx_u = fl2f(p->u*64.0f);
2053 _fx_v = fl2f(p->v*64.0f);
2054 _fx_du = fl2f(dp->u*64.0f);
2055 _fx_dv = fl2f(dp->v*64.0f);
2061 dr = fl2f(dp->r)>>7;
2062 dg = fl2f(dp->g)>>7;
2063 db = fl2f(dp->b)>>7;
2065 light = ((__int64)r<<32)|((__int64)g<<16)|(__int64)b;
2066 deltalight = ((__int64)dr<<32)|((__int64)dg<<16)|(__int64)db;
2068 _asm fstenv fsave_area1
2069 _asm movq mm3, light
2070 _asm movq mm4, deltalight
2085 ; find number of subdivisions
2086 mov eax, _loop_count
2089 mov _loop_count, eax
2091 ; Make ESI = V0:U0 in 6:10,6:10 format
2098 ; Make EDX = DV:DU in 6:10,6:10 format
2104 shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac
2105 mov dx, ax ; put delta u in low word
2108 mov ebx, _loop_count
2116 movd mm1, [eax*4+ecx]
2117 pxor mm2, mm2 ; mm2 = 0
2118 punpcklbw mm2, mm1 ; mm0 = 8.8,8.8, 8.8 rgb
2120 paddsw mm3, mm4 ; light += deltalight
2121 packuswb mm2, mm2 ;mm2 is who cares
2122 movd [edi], mm2 ; load tmp
2139 _asm frstor fsave_area1
2145 void tmapscan_pln8_tiled( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
2147 Tmap1.dest_row_data = GR_SCREEN_PTR(ubyte,lx,y);
2148 Tmap1.loop_count = rx - lx;
2149 Tmap1.fx_u = fl2f(p->u);
2150 Tmap1.fx_v = fl2f(p->v);
2151 Tmap1.fx_du_dx = fl2f(dp->u);
2152 Tmap1.fx_dv_dx = fl2f(dp->v);
2154 Tmap1.fx_l = fl2f(p->l*32.0);
2155 Tmap1.fx_dl_dx = fl2f(dp->l*32.0);
2157 Tmap1.fx_u_right = fl2f(rp->u);
2158 Tmap1.fx_v_right = fl2f(rp->v);
2159 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
2160 Tmap1.bp = tmap_bitmap;
2161 Tmap1.src_offset = tmap_bitmap->w;
2164 Tmap1.FixedScale = 65536.0f;
2165 Tmap1.FixedScale8 = 2048.0f; //8192.0f; // 2^16 / 8
2169 Tmap1.UOverZ = p->u;
2170 Tmap1.VOverZ = p->v;
2171 Tmap1.OneOverZ = p->sw;
2173 Tmap1.dUOverZdX8 = dp->u*32.0f;
2174 Tmap1.dVOverZdX8 = dp->v*32.0f;
2175 Tmap1.dOneOverZdX8 = dp->sw*32.0f;
2177 Tmap1.dUOverZdX = dp->u;
2178 Tmap1.dVOverZdX = dp->v;
2179 Tmap1.dOneOverZdX = dp->sw;
2181 Tmap1.RightUOverZ = rp->u;
2182 Tmap1.RightVOverZ = rp->v;
2183 Tmap1.RightOneOverZ = rp->sw;
2185 Tmap1.BitmapWidth = Tmap1.bp->w;
2186 Tmap1.BitmapHeight = Tmap1.bp->h;
2188 if (Tmap1.BitmapWidth!=64) return;
2189 if (Tmap1.BitmapHeight!=64) return;
2193 if ( Tmap1.fx_dl_dx < 0 ) {
2194 Tmap1.fx_dl_dx = -Tmap1.fx_dl_dx;
2195 Tmap1.fx_l = (67*F1_0)-Tmap1.fx_l;
2196 Tmap1.fx_l_right = (67*F1_0)-Tmap1.fx_l_right;
2211 // put the FPU in 32 bit mode
2212 // @todo move this out of here!
2214 fstcw Tmap1.OldFPUCW // store copy of CW
2215 mov ax,Tmap1.OldFPUCW // get it in ax
2216 //hh and eax,NOT 1100000000y // 24 bit precision
2218 mov Tmap1.FPUCW,ax // store it
2219 fldcw Tmap1.FPUCW // load the FPU
2221 mov ecx, Tmap1.loop_count // ecx = width
2223 mov edi, Tmap1.dest_row_data // edi = dest pointer
2225 // edi = pointer to start pixel in dest dib
2228 mov eax,ecx // eax and ecx = width
2229 shr ecx,5 // ecx = width / subdivision length
2230 and eax,31 // eax = width mod subdivision length
2231 jnz some_left_over // any leftover?
2233 dec ecx // no, so special case last span
2234 mov eax,32 // it's 8 pixels long
2236 mov Tmap1.Subdivisions,ecx // store widths
2237 mov Tmap1.WidthModLength,eax
2239 // mov ebx,pLeft ; get left edge pointer
2240 // mov edx,pGradients ; get gradients pointer
2242 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
2243 // st0 st1 st2 st3 st4 st5 st6 st7
2244 fld Tmap1.VOverZ // V/ZL
2245 fld Tmap1.UOverZ // U/ZL V/ZL
2246 fld Tmap1.OneOverZ // 1/ZL U/ZL V/ZL
2247 fld1 // 1 1/ZL U/ZL V/ZL
2248 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
2249 fld st // ZL ZL 1/ZL U/ZL V/ZL
2250 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
2251 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
2252 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
2254 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
2255 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
2257 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
2259 fadd Tmap1.dOneOverZdX8 // 1/ZR U/ZL V/ZL UL VL
2260 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
2261 fadd Tmap1.dUOverZdX8 // U/ZR 1/ZR V/ZL UL VL
2262 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
2263 fadd Tmap1.dVOverZdX8 // V/ZR 1/ZR U/ZR UL VL
2265 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
2267 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
2268 // @todo overlap this guy
2269 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
2270 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
2271 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
2272 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
2273 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
2275 cmp ecx,0 // check for any full spans
2276 jle HandleLeftoverPixels
2280 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
2281 // UR VR V/ZR 1/ZR U/ZR UL VL
2283 // convert left side coords
2285 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
2286 fmul Tmap1.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
2287 fistp Tmap1.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
2289 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
2290 fmul Tmap1.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
2291 fistp Tmap1.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
2293 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
2295 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
2296 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
2297 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
2298 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
2300 fmul Tmap1.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
2301 fistp Tmap1.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
2303 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
2304 fmul Tmap1.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
2305 fistp Tmap1.DeltaU ; V/ZR 1/ZR U/ZR UR VR
2307 // increment terms for next span ; st0 st1 st2 st3 st4 st5 st6 st7
2308 // Right terms become Left terms---->; V/ZL 1/ZL U/ZL UL VL
2310 fadd Tmap1.dVOverZdX8 ; V/ZR 1/ZL U/ZL UL VL
2311 fxch st(1) ; 1/ZL V/ZR U/ZL UL VL
2312 fadd Tmap1.dOneOverZdX8 ; 1/ZR V/ZR U/ZL UL VL
2313 fxch st(2) ; U/ZL V/ZR 1/ZR UL VL
2314 fadd Tmap1.dUOverZdX8 ; U/ZR V/ZR 1/ZR UL VL
2315 fxch st(2) ; 1/ZR V/ZR U/ZR UL VL
2316 fxch st(1) ; V/ZR 1/ZR U/ZR UL VL
2318 ; calculate right side coords ; st0 st1 st2 st3 st4 st5 st6 st7
2320 fld1 ; 1 V/ZR 1/ZR U/ZR UL VL
2321 fdiv st,st(2) ; ZR V/ZR 1/ZR U/ZR UL VL
2324 ; ************** Can't Access Stack Frame ******************
2325 ; ************** Can't Access Stack Frame ******************
2326 ; ************** Can't Access Stack Frame ******************
2328 // 8 pixel span code
2329 // edi = dest dib bits at current pixel
2330 // esi = texture pointer at current u,v
2332 // ebx = u fraction 0.32
2333 // ecx = v fraction 0.32
2334 // edx = u frac step
2335 // ebp = v carry scratch
2340 // ecx = source pixels
2341 // edx = u v in 6.10 6.10
2342 // esi = du dv in 6.10 6.10
2343 // edi = dest pixels
2344 // ebp = dldx in 24.8
2351 mov ebp, Tmap1.fx_dl_dx
2360 mov ecx, Tmap1.pixptr // ecx = source pixels
2362 ; Make ESI = DV:DU in 6:10,6:10 format
2363 mov eax, Tmap1.DeltaU
2365 mov esi, Tmap1.DeltaV
2369 ; Make EDX = DV:DU in 6:10,6:10 format
2371 mov eax, Tmap1.UFixed
2373 mov edx, Tmap1.VFixed
2388 mov al, gr_fade_table[eax]
2400 mov al, gr_fade_table[eax]
2412 mov al, gr_fade_table[eax]
2424 mov al, gr_fade_table[eax]
2436 mov al, gr_fade_table[eax]
2448 mov al, gr_fade_table[eax]
2460 mov al, gr_fade_table[eax]
2472 mov al, gr_fade_table[eax]
2484 mov al, gr_fade_table[eax]
2496 mov al, gr_fade_table[eax]
2508 mov al, gr_fade_table[eax]
2520 mov al, gr_fade_table[eax]
2532 mov al, gr_fade_table[eax]
2544 mov al, gr_fade_table[eax]
2556 mov al, gr_fade_table[eax]
2568 mov al, gr_fade_table[eax]
2580 mov al, gr_fade_table[eax]
2592 mov al, gr_fade_table[eax]
2604 mov al, gr_fade_table[eax]
2616 mov al, gr_fade_table[eax]
2628 mov al, gr_fade_table[eax]
2640 mov al, gr_fade_table[eax]
2652 mov al, gr_fade_table[eax]
2664 mov al, gr_fade_table[eax]
2676 mov al, gr_fade_table[eax]
2688 mov al, gr_fade_table[eax]
2700 mov al, gr_fade_table[eax]
2712 mov al, gr_fade_table[eax]
2724 mov al, gr_fade_table[eax]
2736 mov al, gr_fade_table[eax]
2748 mov al, gr_fade_table[eax]
2760 mov al, gr_fade_table[eax]
2764 ; ************** Okay to Access Stack Frame ****************
2765 ; ************** Okay to Access Stack Frame ****************
2766 ; ************** Okay to Access Stack Frame ****************
2769 ; the fdiv is done, finish right ; st0 st1 st2 st3 st4 st5 st6 st7
2770 ; ZR V/ZR 1/ZR U/ZR UL VL
2772 fld st ; ZR ZR V/ZR 1/ZR U/ZR UL VL
2773 fmul st,st(2) ; VR ZR V/ZR 1/ZR U/ZR UL VL
2774 fxch st(1) ; ZR VR V/ZR 1/ZR U/ZR UL VL
2775 fmul st,st(4) ; UR VR V/ZR 1/ZR U/ZR UL VL
2777 add edi,32 ; increment to next span
2778 dec Tmap1.Subdivisions ; decrement span count
2779 jnz SpanLoop ; loop back
2781 HandleLeftoverPixels:
2783 mov esi,Tmap1.pixptr ; load texture pointer
2785 ; edi = dest dib bits
2786 ; esi = current texture dib bits
2787 ; at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
2788 ; inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
2790 cmp Tmap1.WidthModLength,0 ; are there remaining pixels to draw?
2791 jz FPUReturn ; nope, pop the FPU and bail
2793 ; convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
2795 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
2796 fmul Tmap1.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
2797 fistp Tmap1.UFixed ; inv. inv. inv. inv. inv. UL VL
2799 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
2800 fmul Tmap1.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
2801 fistp Tmap1.VFixed ; inv. inv. inv. inv. inv. UL VL
2803 dec Tmap1.WidthModLength ; calc how many steps to take
2804 jz OnePixelSpan ; just one, don't do deltas
2806 ; calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
2809 ; @todo rearrange things so we don't need these two instructions
2810 fstp Tmap1.FloatTemp ; inv. inv. inv. inv. UL VL
2811 fstp Tmap1.FloatTemp ; inv. inv. inv. UL VL
2813 fld Tmap1.RightVOverZ ; V/Zr inv. inv. inv. UL VL
2814 fsub Tmap1.dVOverZdX ; V/ZR inv. inv. inv. UL VL
2815 fld Tmap1.RightUOverZ ; U/Zr V/ZR inv. inv. inv. UL VL
2816 fsub Tmap1.dUOverZdX ; U/ZR V/ZR inv. inv. inv. UL VL
2817 fld Tmap1.RightOneOverZ ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
2818 fsub Tmap1.dOneOverZdX ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
2820 fdivr Tmap1.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
2822 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
2823 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
2825 ; calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
2827 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
2828 fxch st(1) ; VR UR inv. inv. inv. dU VL
2829 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
2830 fxch st(6) ; dV UR inv. inv. inv. dU VR
2832 fidiv Tmap1.WidthModLength ; dv UR inv. inv. inv. dU VR
2833 fmul Tmap1.FixedScale ; dv16 UR inv. inv. inv. dU VR
2834 fistp Tmap1.DeltaV ; UR inv. inv. inv. dU VR
2836 fxch st(4) ; dU inv. inv. inv. UR VR
2837 fidiv Tmap1.WidthModLength ; du inv. inv. inv. UR VR
2838 fmul Tmap1.FixedScale ; du16 inv. inv. inv. UR VR
2839 fistp Tmap1.DeltaU ; inv. inv. inv. UR VR
2841 ; @todo gross! these are to line up with the other loop
2842 fld st(1) ; inv. inv. inv. inv. UR VR
2843 fld st(2) ; inv. inv. inv. inv. inv. UR VR
2851 mov ebp, Tmap1.fx_dl_dx
2861 ; Make ESI = DV:DU in 6:10,6:10 format
2862 mov eax, Tmap1.DeltaU
2864 mov esi, Tmap1.DeltaV
2868 ; Make EDX = DV:DU in 6:10,6:10 format
2870 mov eax, Tmap1.UFixed
2872 mov edx, Tmap1.VFixed
2876 mov ecx, Tmap1.pixptr // ecx = source pixels
2878 inc Tmap1.WidthModLength
2879 mov eax,Tmap1.WidthModLength
2883 mov Tmap1.WidthModLength, eax
2899 mov al, gr_fade_table[eax]
2911 mov al, gr_fade_table[eax]
2916 dec Tmap1.WidthModLength
2934 mov al, gr_fade_table[eax]
2939 ; busy FPU registers: ; st0 st1 st2 st3 st4 st5 st6 st7
2940 ; xxx xxx xxx xxx xxx xxx xxx
2951 fldcw Tmap1.OldFPUCW // restore the FPU
2965 void c_tmap_scanline_flat()
2967 switch( gr_screen.bits_per_pixel ) {
2970 memset( Tmap1.dest_row_data, gr_screen.current_color.raw8, Tmap1.loop_count );
2975 dest = Tmap1.dest_row_data;
2977 for (x=Tmap1.loop_count; x >= 0; x-- ) {
2979 *dest++ = Tmap1.tmap_flat_color;
2985 _asm mov ecx, Tmap1.loop_count
2986 _asm mov ax, gr_screen.current_color.raw16;
2987 _asm mov edi, Tmap1.dest_row_data16
2992 _asm mov ecx, Tmap1.loop_count
2993 _asm mov ax, gr_screen.current_color.raw16;
2994 _asm mov edi, Tmap1.dest_row_data16
2999 _asm mov ecx, Tmap1.loop_count
3000 _asm mov eax, gr_screen.current_color.raw32;
3001 _asm mov edi, Tmap1.dest_row_data32
3009 void c_tmap_scanline_shaded()
3015 dest = Tmap1.dest_row_data;
3017 fade = Tmap1.tmap_flat_shade_value<<8;
3018 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3019 *dest++ = gr_fade_table[ fade |(*dest)];
3023 void c_tmap_scanline_lin_nolight()
3032 dudx = Tmap1.fx_du_dx;
3033 dvdx = Tmap1.fx_dv_dx*64;
3035 dest = Tmap1.dest_row_data;
3037 if (!Tmap1.Transparency_on) {
3038 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3039 *dest++ = (uint)Tmap1.pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
3044 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3045 c = (uint)Tmap1.pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
3056 void c_tmap_scanline_lin()
3063 void c_tmap_scanline_per_nolight()
3068 fix u,v,z,dudx, dvdx, dzdx;
3073 dudx = Tmap1.fx_du_dx;
3074 dvdx = Tmap1.fx_dv_dx*64;
3075 dzdx = Tmap1.fx_dz_dx;
3077 dest = Tmap1.dest_row_data;
3079 if (!Tmap1.Transparency_on) {
3080 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3081 *dest++ = (uint)Tmap1.pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
3087 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3088 c = (uint)Tmap1.pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
3099 void c_tmap_scanline_per1()
3104 fix u,v,z,l,dudx, dvdx, dzdx, dldx;
3109 dudx = Tmap1.fx_du_dx;
3110 dvdx = Tmap1.fx_dv_dx*64;
3111 dzdx = Tmap1.fx_dz_dx;
3114 dldx = Tmap1.fx_dl_dx;
3115 dest = Tmap1.dest_row_data;
3117 if (!Tmap1.Transparency_on) {
3118 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3119 *dest++ = gr_fade_table[ (l&(0xff00)) + (uint)Tmap1.pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
3126 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3127 c = (uint)Tmap1.pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
3129 *dest = gr_fade_table[ (l&(0xff00)) + c ];
3141 void c_tmap_scanline_editor()
3146 fix u,v,z,dudx, dvdx, dzdx;
3151 dudx = Tmap1.fx_du_dx;
3152 dvdx = Tmap1.fx_dv_dx*64;
3153 dzdx = Tmap1.fx_dz_dx;
3155 dest = Tmap1.dest_row_data;
3157 if (!Tmap1.Transparency_on) {
3158 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3160 //(uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
3166 for (x=Tmap1.loop_count; x >= 0; x-- ) {
3167 c = (uint)Tmap1.pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
3178 void asm_tmap_scanline_lln_tiled()
3180 if ( Tmap1.BitmapWidth != 64 ) return;
3181 if ( Tmap1.BitmapHeight != 64 ) return;
3192 ; set edi = address of first pixel to modify
3193 mov edi, Tmap1.dest_row_data
3199 mov dx, ax ; EDX=U:V in 6.10 format
3201 mov eax, Tmap1.fx_dv_dx
3203 mov esi, Tmap1.fx_du_dx
3205 mov si, ax ; ESI=DU:DV in 6.10 format
3209 mov ebp, Tmap1.fx_dl_dx
3212 mov ecx, Tmap1.pixptr
3214 mov eax, Tmap1.loop_count
3216 mov Tmap1.loop_count, eax
3221 mov Tmap1.num_big_steps, eax
3222 and Tmap1.loop_count, 7
3234 mov al, gr_fade_table[eax]
3246 mov al, gr_fade_table[eax]
3258 mov al, gr_fade_table[eax]
3270 mov al, gr_fade_table[eax]
3282 mov al, gr_fade_table[eax]
3294 mov al, gr_fade_table[eax]
3306 mov al, gr_fade_table[eax]
3318 mov al, gr_fade_table[eax]
3322 dec Tmap1.num_big_steps
3327 mov eax,Tmap1.loop_count
3332 mov Tmap1.loop_count, eax
3345 mov al, gr_fade_table[eax]
3356 mov al, gr_fade_table[eax]
3361 dec Tmap1.loop_count
3374 mov al, gr_fade_table[eax]
3389 void asm_tmap_scanline_lln32();
3391 void asm_tmap_scanline_lln()
3396 if ( Tmap1.tmap_flags & TMAP_FLAG_TILED ) {
3397 asm_tmap_scanline_lln_tiled();
3401 end = f2i(Tmap1.fx_u);
3402 if ( end >= Tmap1.bp->w ) return;
3404 end = f2i(Tmap1.fx_v);
3405 if ( end >= Tmap1.bp->h ) return;
3407 end = f2i(Tmap1.fx_u_right);
3408 if ( end >= Tmap1.bp->w ) return;
3410 end = f2i(Tmap1.fx_v_right);
3411 if ( end >= Tmap1.bp->h ) return;
3413 if ( Tmap1.fx_dl_dx < 0 ) {
3414 Tmap1.fx_dl_dx = -Tmap1.fx_dl_dx;
3415 Tmap1.fx_l = (67*F1_0)-Tmap1.fx_l;
3416 Tmap1.fx_l_right = (67*F1_0)-Tmap1.fx_l_right;
3418 // Assert( Tmap1.fx_l > 31*F1_0 );
3419 // Assert( Tmap1.fx_l < 66*F1_0 );
3420 // Assert( Tmap1.fx_dl_dx >= 0 );
3421 // Assert( Tmap1.fx_dl_dx < 31*F1_0 );
3434 ; setup delta values
3435 mov eax, Tmap1.fx_dv_dx // get v 16.16 step
3436 mov ebx, eax // copy it
3437 sar eax, 16 // get v int step
3438 shl ebx, 16 // get v frac step
3439 mov Tmap1.DeltaVFrac, ebx // store it
3440 imul eax, Tmap1.src_offset // calc texture step for v int step
3442 mov ebx, Tmap1.fx_du_dx // get u 16.16 step
3443 mov ecx, ebx // copy it
3444 sar ebx, 16 // get the u int step
3445 shl ecx, 16 // get the u frac step
3446 mov Tmap1.DeltaUFrac, ecx // store it
3447 add eax, ebx // calc uint + vint step
3448 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
3449 add eax, Tmap1.src_offset // calc whole step + v carry
3450 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
3452 ; setup initial coordinates
3453 mov esi, Tmap1.fx_u // get u 16.16
3454 mov ebx, esi // copy it
3455 sar esi, 16 // get integer part
3456 shl ebx, 16 // get fractional part
3458 mov ecx, Tmap1.fx_v // get v 16.16
3459 mov edx, ecx // copy it
3460 sar edx, 16 // get integer part
3461 shl ecx, 16 // get fractional part
3462 imul edx, Tmap1.src_offset // calc texture scanline address
3463 add esi, edx // calc texture offset
3464 add esi, Tmap1.pixptr // calc address
3466 ; set edi = address of first pixel to modify
3467 mov edi, Tmap1.dest_row_data
3469 mov edx, Tmap1.DeltaUFrac
3471 mov eax, Tmap1.loop_count
3473 mov Tmap1.loop_count, eax
3478 mov Tmap1.num_big_steps, eax
3479 and Tmap1.loop_count, 7
3488 mov ebp, Tmap1.fx_dl_dx
3500 // 8 pixel span code
3501 // edi = dest dib bits at current pixel
3502 // esi = texture pointer at current u,v
3504 // ebx = u fraction 0.32
3505 // ecx = v fraction 0.32
3506 // edx = u frac step
3507 // ebp = v carry scratch
3509 mov al,[edi] // preread the destination cache line
3511 mov al,[esi] // get texture pixel 0
3513 mov al, gr_fade_table[eax]
3515 add ecx,Tmap1.DeltaVFrac // increment v fraction
3516 sbb ebp,ebp // get -1 if carry
3517 add ebx,edx // increment u fraction
3519 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3520 add ecx,Tmap1.DeltaVFrac // increment v fraction
3522 sbb ebp,ebp // get -1 if carry
3523 mov [edi+0],al // store pixel 0
3525 add ebx,edx // increment u fraction
3526 mov al,[esi] // get texture pixel 1
3528 mov al, gr_fade_table[eax]
3530 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3531 add ecx,Tmap1.DeltaVFrac // increment v fraction
3533 sbb ebp,ebp // get -1 if carry
3534 mov [edi+1],al // store pixel 1
3536 add ebx,edx // increment u fraction
3537 mov al,[esi] // get texture pixel 2
3539 mov al, gr_fade_table[eax]
3541 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3542 add ecx,Tmap1.DeltaVFrac // increment v fraction
3544 sbb ebp,ebp // get -1 if carry
3545 mov [edi+2],al // store pixel 2
3547 add ebx,edx // increment u fraction
3548 mov al,[esi] // get texture pixel 3
3550 mov al, gr_fade_table[eax]
3552 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3553 add ecx,Tmap1.DeltaVFrac // increment v fraction
3555 sbb ebp,ebp // get -1 if carry
3556 mov [edi+3],al // store pixel 3
3558 add ebx,edx // increment u fraction
3559 mov al,[esi] // get texture pixel 4
3561 mov al, gr_fade_table[eax]
3563 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3564 add ecx,Tmap1.DeltaVFrac // increment v fraction
3566 sbb ebp,ebp // get -1 if carry
3567 mov [edi+4],al // store pixel 4
3569 add ebx,edx // increment u fraction
3570 mov al,[esi] // get texture pixel 5
3572 mov al, gr_fade_table[eax]
3574 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3575 add ecx,Tmap1.DeltaVFrac // increment v fraction
3577 sbb ebp,ebp // get -1 if carry
3578 mov [edi+5],al // store pixel 5
3580 add ebx,edx // increment u fraction
3581 mov al,[esi] // get texture pixel 6
3583 mov al, gr_fade_table[eax]
3585 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3586 add ecx,Tmap1.DeltaVFrac // increment v fraction
3588 sbb ebp,ebp // get -1 if carry
3589 mov [edi+6],al // store pixel 6
3591 add ebx,edx // increment u fraction
3593 mov al,[esi] // get texture pixel 7
3595 mov al, gr_fade_table[eax]
3597 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3599 mov [edi+7],al // store pixel 7
3605 dec Tmap1.num_big_steps
3611 mov eax,Tmap1.loop_count
3616 mov Tmap1.loop_count, eax
3626 mov ebp, Tmap1.fx_dl_dx
3630 mov al,[edi] // preread the destination cache line
3631 // add ebx,edx // increment u fraction
3635 mov al,[esi] // get texture pixel 0
3637 mov al, gr_fade_table[eax]
3639 add ecx,Tmap1.DeltaVFrac // increment v fraction
3640 sbb ebp,ebp // get -1 if carry
3641 add ebx,edx // increment u fraction
3642 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3643 mov [edi+0],al // store pixel 0
3645 add ecx,Tmap1.DeltaVFrac // increment v fraction
3646 sbb ebp,ebp // get -1 if carry
3647 add ebx,edx // increment u fraction
3648 mov al,[esi] // get texture pixel 1
3650 mov al, gr_fade_table[eax]
3652 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3653 mov [edi+1],al // store pixel 1
3656 dec Tmap1.loop_count
3664 mov al,[esi] // get texture pixel 2
3666 mov al, gr_fade_table[eax]
3667 mov [edi],al // store pixel 2
3681 void asm_tmap_scanline_lln32()
3685 end = f2i(Tmap1.fx_u);
3686 if ( end >= Tmap1.bp->w ) return;
3688 end = f2i(Tmap1.fx_v);
3689 if ( end >= Tmap1.bp->h ) return;
3691 end = f2i(Tmap1.fx_u_right);
3692 if ( end >= Tmap1.bp->w ) return;
3694 end = f2i(Tmap1.fx_v_right);
3695 if ( end >= Tmap1.bp->h ) return;
3706 ; setup delta values
3707 mov eax, Tmap1.fx_dv_dx // get v 16.16 step
3708 mov ebx, eax // copy it
3709 sar eax, 16 // get v int step
3710 shl ebx, 16 // get v frac step
3711 mov Tmap1.DeltaVFrac, ebx // store it
3712 imul eax, Tmap1.src_offset // calc texture step for v int step
3714 mov ebx, Tmap1.fx_du_dx // get u 16.16 step
3715 mov ecx, ebx // copy it
3716 sar ebx, 16 // get the u int step
3717 shl ecx, 16 // get the u frac step
3718 mov Tmap1.DeltaUFrac, ecx // store it
3719 add eax, ebx // calc uint + vint step
3720 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
3721 add eax, Tmap1.src_offset // calc whole step + v carry
3722 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
3724 ; setup initial coordinates
3725 mov esi, Tmap1.fx_u // get u 16.16
3726 mov ebx, esi // copy it
3727 sar esi, 16 // get integer part
3728 shl ebx, 16 // get fractional part
3730 mov ecx, Tmap1.fx_v // get v 16.16
3731 mov edx, ecx // copy it
3732 sar edx, 16 // get integer part
3733 shl ecx, 16 // get fractional part
3734 imul edx, Tmap1.src_offset // calc texture scanline address
3735 add esi, edx // calc texture offset
3736 add esi, Tmap1.pixptr // calc address
3738 ; set edi = address of first pixel to modify
3739 mov edi, Tmap1.dest_row_data32
3741 mov edx, Tmap1.DeltaUFrac
3743 mov eax, Tmap1.fx_l // use bx and dx to do lighting
3745 mov eax, Tmap1.fx_dl_dx // use bx and dx to do lighting
3748 mov eax, Tmap1.loop_count
3750 mov Tmap1.loop_count, eax
3755 mov Tmap1.num_big_steps, eax
3756 and Tmap1.loop_count, 7
3761 // 8 pixel span code
3762 // edi = dest dib bits at current pixel
3763 // esi = texture pointer at current u,v
3765 // ebx = u fraction 0.32
3766 // ecx = v fraction 0.32
3767 // edx = u frac step
3768 // ebp = v carry scratch
3770 mov al,[edi] // preread the destination cache line
3772 mov al,[esi] // get texture pixel 0
3774 mov eax, gr_fade_table32[eax*4]
3776 add ecx,Tmap1.DeltaVFrac // increment v fraction
3777 sbb ebp,ebp // get -1 if carry
3778 add ebx,edx // increment u fraction
3780 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3781 add ecx,Tmap1.DeltaVFrac // increment v fraction
3783 sbb ebp,ebp // get -1 if carry
3784 mov [edi+0],eax // store pixel 0
3786 add ebx,edx // increment u fraction
3787 mov al,[esi] // get texture pixel 1
3789 mov eax, gr_fade_table32[eax*4]
3791 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3792 add ecx,Tmap1.DeltaVFrac // increment v fraction
3794 sbb ebp,ebp // get -1 if carry
3795 mov [edi+4],al // store pixel 1
3797 add ebx,edx // increment u fraction
3798 mov al,[esi] // get texture pixel 2
3800 mov eax, gr_fade_table32[eax*4]
3802 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3803 add ecx,Tmap1.DeltaVFrac // increment v fraction
3805 sbb ebp,ebp // get -1 if carry
3806 mov [edi+8],eax // store pixel 2
3808 add ebx,edx // increment u fraction
3809 mov al,[esi] // get texture pixel 3
3811 mov eax, gr_fade_table32[eax*4]
3813 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3814 add ecx,Tmap1.DeltaVFrac // increment v fraction
3816 sbb ebp,ebp // get -1 if carry
3817 mov [edi+12],eax // store pixel 3
3819 add ebx,edx // increment u fraction
3820 mov al,[esi] // get texture pixel 4
3822 mov eax, gr_fade_table32[eax*4]
3824 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3825 add ecx,Tmap1.DeltaVFrac // increment v fraction
3827 sbb ebp,ebp // get -1 if carry
3828 mov [edi+16],eax // store pixel 4
3830 add ebx,edx // increment u fraction
3831 mov al,[esi] // get texture pixel 5
3833 mov eax, gr_fade_table32[eax*4]
3835 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3836 add ecx,Tmap1.DeltaVFrac // increment v fraction
3838 sbb ebp,ebp // get -1 if carry
3839 mov [edi+20],eax // store pixel 5
3841 add ebx,edx // increment u fraction
3842 mov al,[esi] // get texture pixel 6
3844 mov eax, gr_fade_table32[eax*4]
3846 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3847 add ecx,Tmap1.DeltaVFrac // increment v fraction
3849 sbb ebp,ebp // get -1 if carry
3850 mov [edi+24],eax // store pixel 6
3852 add ebx,edx // increment u fraction
3854 mov al,[esi] // get texture pixel 7
3856 mov eax, gr_fade_table32[eax]
3858 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3860 mov [edi+28],eax // store pixel 7
3866 dec Tmap1.num_big_steps
3872 mov eax,Tmap1.loop_count
3877 mov Tmap1.loop_count, eax
3883 mov al,[edi] // preread the destination cache line
3885 mov al,[esi] // get texture pixel 0
3887 mov eax, gr_fade_table32[eax*4]
3889 add ecx,Tmap1.DeltaVFrac // increment v fraction
3890 sbb ebp,ebp // get -1 if carry
3891 add ebx,edx // increment u fraction
3892 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3893 mov [edi+0],eax // store pixel 0
3895 add ecx,Tmap1.DeltaVFrac // increment v fraction
3896 sbb ebp,ebp // get -1 if carry
3897 add ebx,edx // increment u fraction
3898 mov al,[esi] // get texture pixel 1
3900 mov eax, gr_fade_table32[eax*4]
3902 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
3903 mov [edi+1],al // store pixel 1
3906 dec Tmap1.loop_count
3914 mov al,[esi] // get texture pixel 2
3916 mov eax, gr_fade_table32[eax*4]
3917 mov [edi],eax // store pixel 2
3930 void asm_tmap_scanline_lnt()
3934 end = f2i(Tmap1.fx_u);
3935 if ( end >= Tmap1.bp->w ) return;
3937 end = f2i(Tmap1.fx_v);
3938 if ( end >= Tmap1.bp->h ) return;
3940 end = f2i(Tmap1.fx_u_right);
3941 if ( end >= Tmap1.bp->w ) return;
3943 end = f2i(Tmap1.fx_v_right);
3944 if ( end >= Tmap1.bp->h ) return;
3956 ; setup delta values
3957 mov eax, Tmap1.fx_dv_dx // get v 16.16 step
3958 mov ebx, eax // copy it
3959 sar eax, 16 // get v int step
3960 shl ebx, 16 // get v frac step
3961 mov Tmap1.DeltaVFrac, ebx // store it
3962 imul eax, Tmap1.src_offset // calc texture step for v int step
3964 mov ebx, Tmap1.fx_du_dx // get u 16.16 step
3965 mov ecx, ebx // copy it
3966 sar ebx, 16 // get the u int step
3967 shl ecx, 16 // get the u frac step
3968 mov Tmap1.DeltaUFrac, ecx // store it
3969 add eax, ebx // calc uint + vint step
3970 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
3971 add eax, Tmap1.src_offset // calc whole step + v carry
3972 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
3974 ; setup initial coordinates
3975 mov esi, Tmap1.fx_u // get u 16.16
3976 mov ebx, esi // copy it
3977 sar esi, 16 // get integer part
3978 shl ebx, 16 // get fractional part
3980 mov ecx, Tmap1.fx_v // get v 16.16
3981 mov edx, ecx // copy it
3982 sar edx, 16 // get integer part
3983 shl ecx, 16 // get fractional part
3984 imul edx, Tmap1.src_offset // calc texture scanline address
3985 add esi, edx // calc texture offset
3986 add esi, Tmap1.pixptr // calc address
3988 ; set edi = address of first pixel to modify
3989 mov edi, Tmap1.dest_row_data
3991 mov edx, Tmap1.DeltaUFrac
3993 mov eax, Tmap1.loop_count
3995 mov Tmap1.loop_count, eax
4000 mov Tmap1.num_big_steps, eax
4001 and Tmap1.loop_count, 7
4006 // 8 pixel span code
4007 // edi = dest dib bits at current pixel
4008 // esi = texture pointer at current u,v
4010 // ebx = u fraction 0.32
4011 // ecx = v fraction 0.32
4012 // edx = u frac step
4013 // ebp = v carry scratch
4015 mov al,[edi] // preread the destination cache line
4017 mov al,[esi] // get texture pixel 0
4019 add ecx,Tmap1.DeltaVFrac // increment v fraction
4020 sbb ebp,ebp // get -1 if carry
4021 add ebx,edx // increment u fraction
4023 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4024 add ecx,Tmap1.DeltaVFrac // increment v fraction
4026 sbb ebp,ebp // get -1 if carry
4029 mov [edi+0],al // store pixel 0
4032 add ebx,edx // increment u fraction
4033 mov al,[esi] // get texture pixel 1
4035 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4036 add ecx,Tmap1.DeltaVFrac // increment v fraction
4038 sbb ebp,ebp // get -1 if carry
4041 mov [edi+1],al // store pixel 0
4044 add ebx,edx // increment u fraction
4045 mov al,[esi] // get texture pixel 2
4047 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4048 add ecx,Tmap1.DeltaVFrac // increment v fraction
4050 sbb ebp,ebp // get -1 if carry
4053 mov [edi+2],al // store pixel 0
4056 add ebx,edx // increment u fraction
4057 mov al,[esi] // get texture pixel 3
4059 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4060 add ecx,Tmap1.DeltaVFrac // increment v fraction
4062 sbb ebp,ebp // get -1 if carry
4065 mov [edi+3],al // store pixel 0
4068 add ebx,edx // increment u fraction
4069 mov al,[esi] // get texture pixel 4
4071 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4072 add ecx,Tmap1.DeltaVFrac // increment v fraction
4074 sbb ebp,ebp // get -1 if carry
4077 mov [edi+4],al // store pixel 0
4080 add ebx,edx // increment u fraction
4081 mov al,[esi] // get texture pixel 5
4083 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4084 add ecx,Tmap1.DeltaVFrac // increment v fraction
4086 sbb ebp,ebp // get -1 if carry
4089 mov [edi+5],al // store pixel 0
4092 add ebx,edx // increment u fraction
4093 mov al,[esi] // get texture pixel 6
4095 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4096 add ecx,Tmap1.DeltaVFrac // increment v fraction
4098 sbb ebp,ebp // get -1 if carry
4101 mov [edi+6],al // store pixel 0
4104 add ebx,edx // increment u fraction
4106 mov al,[esi] // get texture pixel 7
4108 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4112 mov [edi+7],al // store pixel 0
4119 dec Tmap1.num_big_steps
4125 mov eax,Tmap1.loop_count
4130 mov Tmap1.loop_count, eax
4135 mov al,[edi] // preread the destination cache line
4136 // add ebx,edx // increment u fraction
4140 mov al,[esi] // get texture pixel 0
4142 add ecx,Tmap1.DeltaVFrac // increment v fraction
4143 sbb ebp,ebp // get -1 if carry
4144 add ebx,edx // increment u fraction
4145 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4148 mov [edi+0],al // store pixel 0
4151 add ecx,Tmap1.DeltaVFrac // increment v fraction
4152 sbb ebp,ebp // get -1 if carry
4153 add ebx,edx // increment u fraction
4154 mov al,[esi] // get texture pixel 1
4156 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4159 mov [edi+1],al // store pixel 0
4163 dec Tmap1.loop_count
4171 mov al,[esi] // get texture pixel 2
4174 mov [edi],al // store pixel 0
4189 void asm_tmap_scanline_lnn()
4193 end = f2i(Tmap1.fx_u);
4194 if ( end >= Tmap1.bp->w ) return;
4196 end = f2i(Tmap1.fx_v);
4197 if ( end >= Tmap1.bp->h ) return;
4199 end = f2i(Tmap1.fx_u_right);
4200 if ( end >= Tmap1.bp->w ) return;
4202 end = f2i(Tmap1.fx_v_right);
4203 if ( end >= Tmap1.bp->h ) return;
4215 ; setup delta values
4216 mov eax, Tmap1.fx_dv_dx // get v 16.16 step
4217 mov ebx, eax // copy it
4218 sar eax, 16 // get v int step
4219 shl ebx, 16 // get v frac step
4220 mov Tmap1.DeltaVFrac, ebx // store it
4221 imul eax, Tmap1.src_offset // calc texture step for v int step
4223 mov ebx, Tmap1.fx_du_dx // get u 16.16 step
4224 mov ecx, ebx // copy it
4225 sar ebx, 16 // get the u int step
4226 shl ecx, 16 // get the u frac step
4227 mov Tmap1.DeltaUFrac, ecx // store it
4228 add eax, ebx // calc uint + vint step
4229 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
4230 add eax, Tmap1.src_offset // calc whole step + v carry
4231 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
4233 ; setup initial coordinates
4234 mov esi, Tmap1.fx_u // get u 16.16
4235 mov ebx, esi // copy it
4236 sar esi, 16 // get integer part
4237 shl ebx, 16 // get fractional part
4239 mov ecx, Tmap1.fx_v // get v 16.16
4240 mov edx, ecx // copy it
4241 sar edx, 16 // get integer part
4242 shl ecx, 16 // get fractional part
4243 imul edx, Tmap1.src_offset // calc texture scanline address
4244 add esi, edx // calc texture offset
4245 add esi, Tmap1.pixptr // calc address
4247 ; set edi = address of first pixel to modify
4248 mov edi, Tmap1.dest_row_data
4250 mov edx, Tmap1.DeltaUFrac
4252 mov eax, Tmap1.loop_count
4254 mov Tmap1.loop_count, eax
4259 mov Tmap1.num_big_steps, eax
4260 and Tmap1.loop_count, 7
4265 // 8 pixel span code
4266 // edi = dest dib bits at current pixel
4267 // esi = texture pointer at current u,v
4269 // ebx = u fraction 0.32
4270 // ecx = v fraction 0.32
4271 // edx = u frac step
4272 // ebp = v carry scratch
4274 mov al,[edi] // preread the destination cache line
4276 mov al,[esi] // get texture pixel 0
4278 add ecx,Tmap1.DeltaVFrac // increment v fraction
4279 sbb ebp,ebp // get -1 if carry
4280 add ebx,edx // increment u fraction
4282 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4283 add ecx,Tmap1.DeltaVFrac // increment v fraction
4285 sbb ebp,ebp // get -1 if carry
4286 mov [edi+0],al // store pixel 0
4288 add ebx,edx // increment u fraction
4289 mov al,[esi] // get texture pixel 1
4291 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4292 add ecx,Tmap1.DeltaVFrac // increment v fraction
4294 sbb ebp,ebp // get -1 if carry
4295 mov [edi+1],al // store pixel 0
4297 add ebx,edx // increment u fraction
4298 mov al,[esi] // get texture pixel 2
4300 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4301 add ecx,Tmap1.DeltaVFrac // increment v fraction
4303 sbb ebp,ebp // get -1 if carry
4304 mov [edi+2],al // store pixel 0
4306 add ebx,edx // increment u fraction
4307 mov al,[esi] // get texture pixel 3
4309 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4310 add ecx,Tmap1.DeltaVFrac // increment v fraction
4312 sbb ebp,ebp // get -1 if carry
4313 mov [edi+3],al // store pixel 0
4315 add ebx,edx // increment u fraction
4316 mov al,[esi] // get texture pixel 4
4318 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4319 add ecx,Tmap1.DeltaVFrac // increment v fraction
4321 sbb ebp,ebp // get -1 if carry
4322 mov [edi+4],al // store pixel 0
4324 add ebx,edx // increment u fraction
4325 mov al,[esi] // get texture pixel 5
4327 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4328 add ecx,Tmap1.DeltaVFrac // increment v fraction
4330 sbb ebp,ebp // get -1 if carry
4331 mov [edi+5],al // store pixel 0
4333 add ebx,edx // increment u fraction
4334 mov al,[esi] // get texture pixel 6
4336 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4337 add ecx,Tmap1.DeltaVFrac // increment v fraction
4339 sbb ebp,ebp // get -1 if carry
4340 mov [edi+6],al // store pixel 0
4342 add ebx,edx // increment u fraction
4344 mov al,[esi] // get texture pixel 7
4346 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4348 mov [edi+7],al // store pixel 0
4354 dec Tmap1.num_big_steps
4360 mov eax,Tmap1.loop_count
4365 mov Tmap1.loop_count, eax
4370 mov al,[edi] // preread the destination cache line
4371 // add ebx,edx // increment u fraction
4375 mov al,[esi] // get texture pixel 0
4377 add ecx,Tmap1.DeltaVFrac // increment v fraction
4378 sbb ebp,ebp // get -1 if carry
4379 add ebx,edx // increment u fraction
4380 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4381 mov [edi+0],al // store pixel 0
4383 add ecx,Tmap1.DeltaVFrac // increment v fraction
4384 sbb ebp,ebp // get -1 if carry
4385 add ebx,edx // increment u fraction
4386 mov al,[esi] // get texture pixel 1
4388 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4389 mov [edi+1],al // store pixel 0
4392 dec Tmap1.loop_count
4400 mov al,[esi] // get texture pixel 2
4401 mov [edi],al // store pixel 0
4414 void tmapscan_pln16( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
4416 Tmap1.dest_row_data = (ubyte *)GR_SCREEN_PTR(ushort,lx,y);
4417 Tmap1.loop_count = rx - lx;
4418 Tmap1.fx_u = fl2f(p->u);
4419 Tmap1.fx_v = fl2f(p->v);
4420 Tmap1.fx_du_dx = fl2f(dp->u);
4421 Tmap1.fx_dv_dx = fl2f(dp->v);
4423 Tmap1.fx_l = fl2f(p->l*32.0);
4424 Tmap1.fx_dl_dx = fl2f(dp->l*32.0);
4426 Tmap1.fx_u_right = fl2f(rp->u);
4427 Tmap1.fx_v_right = fl2f(rp->v);
4428 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
4429 Tmap1.bp = tmap_bitmap;
4430 Tmap1.src_offset = tmap_bitmap->w;
4433 Tmap1.FixedScale = 65536.0f;
4434 Tmap1.FixedScale8 = 2048.0f; //8192.0f; // 2^16 / 8
4438 Tmap1.UOverZ = p->u;
4439 Tmap1.VOverZ = p->v;
4440 Tmap1.OneOverZ = p->sw;
4442 Tmap1.dUOverZdX8 = dp->u*32.0f;
4443 Tmap1.dVOverZdX8 = dp->v*32.0f;
4444 Tmap1.dOneOverZdX8 = dp->sw*32.0f;
4446 Tmap1.dUOverZdX = dp->u;
4447 Tmap1.dVOverZdX = dp->v;
4448 Tmap1.dOneOverZdX = dp->sw;
4450 Tmap1.RightUOverZ = rp->u;
4451 Tmap1.RightVOverZ = rp->v;
4452 Tmap1.RightOneOverZ = rp->sw;
4456 Tmap1.BitmapWidth = Tmap1.bp->w;
4457 Tmap1.BitmapHeight = Tmap1.bp->h;
4460 if ( Tmap1.fx_dl_dx < 0 ) {
4461 Tmap1.fx_dl_dx = -Tmap1.fx_dl_dx;
4462 Tmap1.fx_l = (67*F1_0)-Tmap1.fx_l;
4463 Tmap1.fx_l_right = (67*F1_0)-Tmap1.fx_l_right;
4465 // Assert( Tmap1.fx_l > 31*F1_0 );
4466 // Assert( Tmap1.fx_l < 66*F1_0 );
4467 // Assert( Tmap1.fx_dl_dx >= 0 );
4468 // Assert( Tmap1.fx_dl_dx < 31*F1_0 );
4486 // put the FPU in 32 bit mode
4487 // @todo move this out of here!
4489 fstcw Tmap1.OldFPUCW // store copy of CW
4490 mov ax,Tmap1.OldFPUCW // get it in ax
4491 //hh and eax,NOT 1100000000y // 24 bit precision
4493 mov Tmap1.FPUCW,ax // store it
4494 fldcw Tmap1.FPUCW // load the FPU
4496 mov ecx, Tmap1.loop_count // ecx = width
4498 mov edi, Tmap1.dest_row_data // edi = dest pointer
4500 // edi = pointer to start pixel in dest dib
4503 mov eax,ecx // eax and ecx = width
4504 shr ecx,5 // ecx = width / subdivision length
4505 and eax,31 // eax = width mod subdivision length
4506 jnz some_left_over // any leftover?
4508 dec ecx // no, so special case last span
4509 mov eax,32 // it's 8 pixels long
4511 mov Tmap1.Subdivisions,ecx // store widths
4512 mov Tmap1.WidthModLength,eax
4514 // mov ebx,pLeft ; get left edge pointer
4515 // mov edx,pGradients ; get gradients pointer
4517 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
4518 // st0 st1 st2 st3 st4 st5 st6 st7
4519 fld Tmap1.VOverZ // V/ZL
4520 fld Tmap1.UOverZ // U/ZL V/ZL
4521 fld Tmap1.OneOverZ // 1/ZL U/ZL V/ZL
4522 fld1 // 1 1/ZL U/ZL V/ZL
4523 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
4524 fld st // ZL ZL 1/ZL U/ZL V/ZL
4525 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
4526 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
4527 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
4529 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
4530 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
4532 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
4534 fadd Tmap1.dOneOverZdX8 // 1/ZR U/ZL V/ZL UL VL
4535 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
4536 fadd Tmap1.dUOverZdX8 // U/ZR 1/ZR V/ZL UL VL
4537 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
4538 fadd Tmap1.dVOverZdX8 // V/ZR 1/ZR U/ZR UL VL
4540 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
4542 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
4543 // @todo overlap this guy
4544 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
4545 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
4546 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
4547 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
4548 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
4550 cmp ecx,0 // check for any full spans
4551 jle HandleLeftoverPixels
4555 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
4556 // UR VR V/ZR 1/ZR U/ZR UL VL
4558 // convert left side coords
4560 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
4561 fmul Tmap1.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
4562 fistp Tmap1.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
4564 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
4565 fmul Tmap1.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
4566 fistp Tmap1.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
4568 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
4570 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
4571 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
4572 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
4573 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
4575 fmul Tmap1.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
4576 fistp Tmap1.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
4578 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
4579 fmul Tmap1.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
4580 fistp Tmap1.DeltaU ; V/ZR 1/ZR U/ZR UR VR
4582 // increment terms for next span ; st0 st1 st2 st3 st4 st5 st6 st7
4583 // Right terms become Left terms---->; V/ZL 1/ZL U/ZL UL VL
4585 fadd Tmap1.dVOverZdX8 ; V/ZR 1/ZL U/ZL UL VL
4586 fxch st(1) ; 1/ZL V/ZR U/ZL UL VL
4587 fadd Tmap1.dOneOverZdX8 ; 1/ZR V/ZR U/ZL UL VL
4588 fxch st(2) ; U/ZL V/ZR 1/ZR UL VL
4589 fadd Tmap1.dUOverZdX8 ; U/ZR V/ZR 1/ZR UL VL
4590 fxch st(2) ; 1/ZR V/ZR U/ZR UL VL
4591 fxch st(1) ; V/ZR 1/ZR U/ZR UL VL
4593 ; calculate right side coords ; st0 st1 st2 st3 st4 st5 st6 st7
4595 fld1 ; 1 V/ZR 1/ZR U/ZR UL VL
4596 fdiv st,st(2) ; ZR V/ZR 1/ZR U/ZR UL VL
4599 ; set up affine registers
4601 ; setup delta values
4603 mov eax,Tmap1.DeltaV ; get v 16.16 step
4604 mov ebx,eax ; copy it
4605 sar eax,16 ; get v int step
4606 shl ebx,16 ; get v frac step
4607 mov Tmap1.DeltaVFrac,ebx ; store it
4608 imul eax,Tmap1.src_offset ; calculate texture step for v int step
4610 mov ebx,Tmap1.DeltaU ; get u 16.16 step
4611 mov ecx,ebx ; copy it
4612 sar ebx,16 ; get u int step
4613 shl ecx,16 ; get u frac step
4614 mov Tmap1.DeltaUFrac,ecx ; store it
4615 add eax,ebx ; calculate uint + vint step
4616 mov Tmap1.UVintVfracStepVNoCarry,eax; save whole step in non-v-carry slot
4617 add eax,Tmap1.src_offset ; calculate whole step + v carry
4618 mov Tmap1.UVintVfracStepVCarry,eax ; save in v-carry slot
4622 ; check coordinate ranges
4623 mov eax, Tmap1.UFixed
4624 cmp eax, Tmap1.MinUFixed
4626 mov eax, Tmap1.MinUFixed
4627 mov Tmap1.UFixed, eax
4630 cmp eax, Tmap1.MaxUFixed
4632 mov eax, Tmap1.MaxUFixed
4633 mov Tmap1.UFixed, eax
4635 mov eax, Tmap1.VFixed
4636 cmp eax, Tmap1.MinVFixed
4638 mov eax, Tmap1.MinVFixed
4639 mov Tmap1.VFixed, eax
4642 cmp eax, Tmap1.MaxVFixed
4644 mov eax, Tmap1.MaxVFixed
4645 mov Tmap1.VFixed, eax
4649 ; setup initial coordinates
4650 mov esi,Tmap1.UFixed ; get u 16.16 fixedpoint coordinate
4652 mov ebx,esi ; copy it
4653 sar esi,16 ; get integer part
4654 shl ebx,16 ; get fractional part
4656 mov ecx,Tmap1.VFixed ; get v 16.16 fixedpoint coordinate
4658 mov edx,ecx ; copy it
4659 sar edx,16 ; get integer part
4660 shl ecx,16 ; get fractional part
4661 imul edx,Tmap1.src_offset ; calc texture scanline address
4662 add esi,edx ; calc texture offset
4663 add esi,Tmap1.pixptr ; calc address
4665 mov edx,Tmap1.DeltaUFrac ; get register copy
4671 mov ebp, Tmap1.fx_dl_dx
4682 // add Tmap1.fx_l, eax
4685 // mov eax, Tmap1.fx_l // use bx and dx to do lighting
4688 // mov eax, Tmap1.fx_dl_dx // use bx and dx to do lighting
4694 ; ************** Can't Access Stack Frame ******************
4695 ; ************** Can't Access Stack Frame ******************
4696 ; ************** Can't Access Stack Frame ******************
4698 // 8 pixel span code
4699 // edi = dest dib bits at current pixel
4700 // esi = texture pointer at current u,v
4702 // ebx = u fraction 0.32
4703 // ecx = v fraction 0.32
4704 // edx = u frac step
4705 // ebp = v carry scratch
4707 mov al,[edi] // preread the destination cache line
4710 mov al,[esi] // get texture pixel 0
4712 mov ax, gr_fade_table16[eax*2]
4714 add ecx,Tmap1.DeltaVFrac // increment v fraction
4715 sbb ebp,ebp // get -1 if carry
4716 add ebx,edx // increment u fraction
4718 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4719 add ecx,Tmap1.DeltaVFrac // increment v fraction
4721 sbb ebp,ebp // get -1 if carry
4722 // mov al, 0 // Uncomment this line to show divisions
4723 mov [edi+0],ax // store pixel 0
4725 add ebx,edx // increment u fraction
4726 mov al,[esi] // get texture pixel 1
4728 mov ax, gr_fade_table16[eax*2]
4730 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4731 add ecx,Tmap1.DeltaVFrac // increment v fraction
4733 sbb ebp,ebp // get -1 if carry
4734 mov [edi+2],ax // store pixel 1
4736 add ebx,edx // increment u fraction
4737 mov al,[esi] // get texture pixel 2
4739 mov ax, gr_fade_table16[eax*2]
4741 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4742 add ecx,Tmap1.DeltaVFrac // increment v fraction
4744 sbb ebp,ebp // get -1 if carry
4745 mov [edi+4],ax // store pixel 2
4747 add ebx,edx // increment u fraction
4748 mov al,[esi] // get texture pixel 3
4750 mov ax, gr_fade_table16[eax*2]
4752 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4753 add ecx,Tmap1.DeltaVFrac // increment v fraction
4755 sbb ebp,ebp // get -1 if carry
4756 mov [edi+6],ax // store pixel 3
4758 add ebx,edx // increment u fraction
4759 mov al,[esi] // get texture pixel 4
4761 mov ax, gr_fade_table16[eax*2]
4762 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4763 add ecx,Tmap1.DeltaVFrac // increment v fraction
4765 sbb ebp,ebp // get -1 if carry
4766 mov [edi+8],ax // store pixel 3
4768 add ebx,edx // increment u fraction
4769 mov al,[esi] // get texture pixel 4
4771 mov ax, gr_fade_table16[eax*2]
4772 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4773 add ecx,Tmap1.DeltaVFrac // increment v fraction
4775 sbb ebp,ebp // get -1 if carry
4776 mov [edi+10],ax // store pixel 3
4778 add ebx,edx // increment u fraction
4779 mov al,[esi] // get texture pixel 4
4781 mov ax, gr_fade_table16[eax*2]
4782 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4783 add ecx,Tmap1.DeltaVFrac // increment v fraction
4785 sbb ebp,ebp // get -1 if carry
4786 mov [edi+12],ax // store pixel 3
4788 add ebx,edx // increment u fraction
4789 mov al,[esi] // get texture pixel 4
4791 mov ax, gr_fade_table16[eax*2]
4792 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4793 add ecx,Tmap1.DeltaVFrac // increment v fraction
4795 sbb ebp,ebp // get -1 if carry
4796 mov [edi+14],ax // store pixel 3
4798 add ebx,edx // increment u fraction
4799 mov al,[esi] // get texture pixel 4
4801 mov ax, gr_fade_table16[eax*2]
4802 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4803 add ecx,Tmap1.DeltaVFrac // increment v fraction
4805 sbb ebp,ebp // get -1 if carry
4806 mov [edi+16],ax // store pixel 3
4808 add ebx,edx // increment u fraction
4809 mov al,[esi] // get texture pixel 4
4811 mov ax, gr_fade_table16[eax*2]
4812 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4813 add ecx,Tmap1.DeltaVFrac // increment v fraction
4815 sbb ebp,ebp // get -1 if carry
4816 mov [edi+18],ax // store pixel 3
4818 add ebx,edx // increment u fraction
4819 mov al,[esi] // get texture pixel 4
4821 mov ax, gr_fade_table16[eax*2]
4822 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4823 add ecx,Tmap1.DeltaVFrac // increment v fraction
4825 sbb ebp,ebp // get -1 if carry
4826 mov [edi+20],ax // store pixel 3
4828 add ebx,edx // increment u fraction
4829 mov al,[esi] // get texture pixel 4
4831 mov ax, gr_fade_table16[eax*2]
4834 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4835 add ecx,Tmap1.DeltaVFrac // increment v fraction
4837 sbb ebp,ebp // get -1 if carry
4838 mov [edi+22],ax // store pixel 3
4840 add ebx,edx // increment u fraction
4841 mov al,[esi] // get texture pixel 4
4843 mov ax, gr_fade_table16[eax*2]
4846 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4847 add ecx,Tmap1.DeltaVFrac // increment v fraction
4849 sbb ebp,ebp // get -1 if carry
4850 mov [edi+24],ax // store pixel 3
4852 add ebx,edx // increment u fraction
4853 mov al,[esi] // get texture pixel 4
4855 mov ax, gr_fade_table16[eax*2]
4858 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4859 add ecx,Tmap1.DeltaVFrac // increment v fraction
4861 sbb ebp,ebp // get -1 if carry
4862 mov [edi+26],ax // store pixel 3
4864 add ebx,edx // increment u fraction
4865 mov al,[esi] // get texture pixel 4
4867 mov ax, gr_fade_table16[eax*2]
4870 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4871 add ecx,Tmap1.DeltaVFrac // increment v fraction
4873 sbb ebp,ebp // get -1 if carry
4874 mov [edi+28],ax // store pixel 3
4876 add ebx,edx // increment u fraction
4877 mov al,[esi] // get texture pixel 4
4879 mov ax, gr_fade_table16[eax*2]
4882 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4883 add ecx,Tmap1.DeltaVFrac // increment v fraction
4885 sbb ebp,ebp // get -1 if carry
4886 mov [edi+30],ax // store pixel 3
4888 add ebx,edx // increment u fraction
4889 mov al,[esi] // get texture pixel 4
4891 mov ax, gr_fade_table16[eax*2]
4894 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4895 add ecx,Tmap1.DeltaVFrac // increment v fraction
4897 sbb ebp,ebp // get -1 if carry
4898 mov [edi+32],ax // store pixel 3
4900 add ebx,edx // increment u fraction
4901 mov al,[esi] // get texture pixel 4
4903 mov ax, gr_fade_table16[eax*2]
4906 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4907 add ecx,Tmap1.DeltaVFrac // increment v fraction
4909 sbb ebp,ebp // get -1 if carry
4910 mov [edi+34],ax // store pixel 3
4912 add ebx,edx // increment u fraction
4913 mov al,[esi] // get texture pixel 4
4915 mov ax, gr_fade_table16[eax*2]
4918 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4919 add ecx,Tmap1.DeltaVFrac // increment v fraction
4921 sbb ebp,ebp // get -1 if carry
4922 mov [edi+36],ax // store pixel 3
4924 add ebx,edx // increment u fraction
4925 mov al,[esi] // get texture pixel 4
4927 mov ax, gr_fade_table16[eax*2]
4930 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4931 add ecx,Tmap1.DeltaVFrac // increment v fraction
4933 sbb ebp,ebp // get -1 if carry
4934 mov [edi+38],ax // store pixel 3
4936 add ebx,edx // increment u fraction
4937 mov al,[esi] // get texture pixel 4
4939 mov ax, gr_fade_table16[eax*2]
4942 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4943 add ecx,Tmap1.DeltaVFrac // increment v fraction
4945 sbb ebp,ebp // get -1 if carry
4946 mov [edi+40],ax // store pixel 3
4948 add ebx,edx // increment u fraction
4949 mov al,[esi] // get texture pixel 4
4951 mov ax, gr_fade_table16[eax*2]
4954 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4955 add ecx,Tmap1.DeltaVFrac // increment v fraction
4957 sbb ebp,ebp // get -1 if carry
4958 mov [edi+42],ax // store pixel 3
4960 add ebx,edx // increment u fraction
4961 mov al,[esi] // get texture pixel 4
4963 mov ax, gr_fade_table16[eax*2]
4966 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4967 add ecx,Tmap1.DeltaVFrac // increment v fraction
4969 sbb ebp,ebp // get -1 if carry
4970 mov [edi+44],ax // store pixel 3
4972 add ebx,edx // increment u fraction
4973 mov al,[esi] // get texture pixel 4
4975 mov ax, gr_fade_table16[eax*2]
4978 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4979 add ecx,Tmap1.DeltaVFrac // increment v fraction
4981 sbb ebp,ebp // get -1 if carry
4982 mov [edi+46],ax // store pixel 3
4984 add ebx,edx // increment u fraction
4985 mov al,[esi] // get texture pixel 4
4987 mov ax, gr_fade_table16[eax*2]
4990 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
4991 add ecx,Tmap1.DeltaVFrac // increment v fraction
4993 sbb ebp,ebp // get -1 if carry
4994 mov [edi+48],ax // store pixel 3
4996 add ebx,edx // increment u fraction
4997 mov al,[esi] // get texture pixel 4
4999 mov ax, gr_fade_table16[eax*2]
5002 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5003 add ecx,Tmap1.DeltaVFrac // increment v fraction
5005 sbb ebp,ebp // get -1 if carry
5006 mov [edi+50],ax // store pixel 3
5008 add ebx,edx // increment u fraction
5009 mov al,[esi] // get texture pixel 4
5011 mov ax, gr_fade_table16[eax*2]
5014 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5015 add ecx,Tmap1.DeltaVFrac // increment v fraction
5019 sbb ebp,ebp // get -1 if carry
5020 mov [edi+52],ax // store pixel 3
5022 add ebx,edx // increment u fraction
5023 mov al,[esi] // get texture pixel 4
5025 mov ax, gr_fade_table16[eax*2]
5028 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5029 add ecx,Tmap1.DeltaVFrac // increment v fraction
5031 sbb ebp,ebp // get -1 if carry
5032 mov [edi+54],ax // store pixel 3
5034 add ebx,edx // increment u fraction
5035 mov al,[esi] // get texture pixel 4
5037 mov ax, gr_fade_table16[eax*2]
5039 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5040 add ecx,Tmap1.DeltaVFrac // increment v fraction
5042 sbb ebp,ebp // get -1 if carry
5043 mov [edi+56],ax // store pixel 4
5045 add ebx,edx // increment u fraction
5046 mov al,[esi] // get texture pixel 5
5048 mov ax, gr_fade_table16[eax*2]
5050 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5051 add ecx,Tmap1.DeltaVFrac // increment v fraction
5053 sbb ebp,ebp // get -1 if carry
5054 mov [edi+58],ax // store pixel 5
5056 add ebx,edx // increment u fraction
5057 mov al,[esi] // get texture pixel 6
5059 mov ax, gr_fade_table16[eax*2]
5061 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5062 add ecx,Tmap1.DeltaVFrac // increment v fraction
5064 sbb ebp,ebp // get -1 if carry
5065 mov [edi+60],ax // store pixel 6
5067 add ebx,edx // increment u fraction
5069 mov al,[esi] // get texture pixel 7
5071 mov ax, gr_fade_table16[eax*2]
5073 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5075 mov [edi+62],ax // store pixel 7
5079 ; ************** Okay to Access Stack Frame ****************
5080 ; ************** Okay to Access Stack Frame ****************
5081 ; ************** Okay to Access Stack Frame ****************
5084 ; the fdiv is done, finish right ; st0 st1 st2 st3 st4 st5 st6 st7
5085 ; ZR V/ZR 1/ZR U/ZR UL VL
5087 fld st ; ZR ZR V/ZR 1/ZR U/ZR UL VL
5088 fmul st,st(2) ; VR ZR V/ZR 1/ZR U/ZR UL VL
5089 fxch st(1) ; ZR VR V/ZR 1/ZR U/ZR UL VL
5090 fmul st,st(4) ; UR VR V/ZR 1/ZR U/ZR UL VL
5092 add edi,64 ; increment to next span
5093 dec Tmap1.Subdivisions ; decrement span count
5094 jnz SpanLoop ; loop back
5096 // save new lighting values
5099 // mov Tmap1.fx_l, eax
5103 // mov Tmap1.fx_dl_dx, eax
5105 HandleLeftoverPixels:
5108 mov esi,Tmap1.pixptr ; load texture pointer
5110 ; edi = dest dib bits
5111 ; esi = current texture dib bits
5112 ; at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
5113 ; inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
5115 cmp Tmap1.WidthModLength,0 ; are there remaining pixels to draw?
5116 jz FPUReturn ; nope, pop the FPU and bail
5118 ; convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
5120 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
5121 fmul Tmap1.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
5122 fistp Tmap1.UFixed ; inv. inv. inv. inv. inv. UL VL
5124 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
5125 fmul Tmap1.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
5126 fistp Tmap1.VFixed ; inv. inv. inv. inv. inv. UL VL
5128 dec Tmap1.WidthModLength ; calc how many steps to take
5129 jz OnePixelSpan ; just one, don't do deltas
5131 ; calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
5134 ; @todo rearrange things so we don't need these two instructions
5135 fstp Tmap1.FloatTemp ; inv. inv. inv. inv. UL VL
5136 fstp Tmap1.FloatTemp ; inv. inv. inv. UL VL
5138 fld Tmap1.RightVOverZ ; V/Zr inv. inv. inv. UL VL
5139 fsub Tmap1.dVOverZdX ; V/ZR inv. inv. inv. UL VL
5140 fld Tmap1.RightUOverZ ; U/Zr V/ZR inv. inv. inv. UL VL
5141 fsub Tmap1.dUOverZdX ; U/ZR V/ZR inv. inv. inv. UL VL
5142 fld Tmap1.RightOneOverZ ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
5143 fsub Tmap1.dOneOverZdX ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
5145 fdivr Tmap1.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
5147 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
5148 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
5150 ; calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
5152 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
5153 fxch st(1) ; VR UR inv. inv. inv. dU VL
5154 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
5155 fxch st(6) ; dV UR inv. inv. inv. dU VR
5157 fidiv Tmap1.WidthModLength ; dv UR inv. inv. inv. dU VR
5158 fmul Tmap1.FixedScale ; dv16 UR inv. inv. inv. dU VR
5159 fistp Tmap1.DeltaV ; UR inv. inv. inv. dU VR
5161 fxch st(4) ; dU inv. inv. inv. UR VR
5162 fidiv Tmap1.WidthModLength ; du inv. inv. inv. UR VR
5163 fmul Tmap1.FixedScale ; du16 inv. inv. inv. UR VR
5164 fistp Tmap1.DeltaU ; inv. inv. inv. UR VR
5166 ; @todo gross! these are to line up with the other loop
5167 fld st(1) ; inv. inv. inv. inv. UR VR
5168 fld st(2) ; inv. inv. inv. inv. inv. UR VR
5173 ; setup delta values
5174 mov eax, Tmap1.DeltaV // get v 16.16 step
5175 mov ebx, eax // copy it
5176 sar eax, 16 // get v int step
5177 shl ebx, 16 // get v frac step
5178 mov Tmap1.DeltaVFrac, ebx // store it
5179 imul eax, Tmap1.src_offset // calc texture step for v int step
5181 mov ebx, Tmap1.DeltaU // get u 16.16 step
5182 mov ecx, ebx // copy it
5183 sar ebx, 16 // get the u int step
5184 shl ecx, 16 // get the u frac step
5185 mov Tmap1.DeltaUFrac, ecx // store it
5186 add eax, ebx // calc uint + vint step
5187 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
5188 add eax, Tmap1.src_offset // calc whole step + v carry
5189 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
5196 ; check coordinate ranges
5197 mov eax, Tmap1.UFixed
5198 cmp eax, Tmap1.MinUFixed
5200 mov eax, Tmap1.MinUFixed
5201 mov Tmap1.UFixed, eax
5204 cmp eax, Tmap1.MaxUFixed
5206 mov eax, Tmap1.MaxUFixed
5207 mov Tmap1.UFixed, eax
5209 mov eax, Tmap1.VFixed
5210 cmp eax, Tmap1.MinVFixed
5212 mov eax, Tmap1.MinVFixed
5213 mov Tmap1.VFixed, eax
5216 cmp eax, Tmap1.MaxVFixed
5218 mov eax, Tmap1.MaxVFixed
5219 mov Tmap1.VFixed, eax
5226 ; setup initial coordinates
5227 mov esi, Tmap1.UFixed // get u 16.16
5228 mov ebx, esi // copy it
5229 sar esi, 16 // get integer part
5230 shl ebx, 16 // get fractional part
5232 mov ecx, Tmap1.VFixed // get v 16.16
5233 mov edx, ecx // copy it
5234 sar edx, 16 // get integer part
5235 shl ecx, 16 // get fractional part
5236 imul edx, Tmap1.src_offset // calc texture scanline address
5237 add esi, edx // calc texture offset
5238 add esi, Tmap1.pixptr // calc address
5240 ; set edi = address of first pixel to modify
5241 ; mov edi, Tmap1.dest_row_data
5250 mov edx, Tmap1.DeltaUFrac
5252 cmp Tmap1.WidthModLength, 1
5257 mov ebx, Tmap1.fx_l_right
5264 // slow but maybe better
5267 mov ebx, Tmap1.WidthModLength
5272 mov eax, Tmap1.fx_dl_dx
5282 inc Tmap1.WidthModLength
5283 mov eax,Tmap1.WidthModLength
5287 mov Tmap1.WidthModLength, eax
5291 mov al,[edi] // preread the destination cache line
5294 mov al,[esi] // get texture pixel 0
5296 mov ax, gr_fade_table16[eax*2]
5298 add ecx,Tmap1.DeltaVFrac // increment v fraction
5299 sbb ebp,ebp // get -1 if carry
5300 add ebx,edx // increment u fraction
5301 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5302 mov [edi+0],ax // store pixel 0
5304 add ecx,Tmap1.DeltaVFrac // increment v fraction
5305 sbb ebp,ebp // get -1 if carry
5306 add ebx,edx // increment u fraction
5307 mov al,[esi] // get texture pixel 1
5309 mov ax, gr_fade_table16[eax*2]
5311 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5312 mov [edi+2],ax // store pixel 1
5315 dec Tmap1.WidthModLength
5323 mov al,[esi] // get texture pixel 2
5325 mov ax, gr_fade_table16[eax*2]
5326 mov [edi],ax // store pixel 2
5341 OldWay: // This is 6% slower than above
5343 mov ebx,Tmap1.UFixed ; get starting coordinates
5344 mov ecx,Tmap1.VFixed ; for span
5346 ; leftover pixels loop
5347 ; edi = dest dib bits
5348 ; esi = texture dib bits
5354 mov eax,ecx ; copy v
5356 imul eax,Tmap1.src_offset ; scan offset
5357 mov edx,ebx ; copy u
5359 add eax,edx ; texture offset
5360 mov al,[esi+eax] ; get source pixel
5362 mov [edi],al ; store it
5364 add ebx,Tmap1.DeltaU ; increment u coordinate
5365 add ecx,Tmap1.DeltaV ; increment v coordinate
5367 dec Tmap1.WidthModLength ; decrement loop count
5368 jl FPUReturn ; finish up
5372 mov eax,ecx ; copy v
5374 imul eax,Tmap1.src_offset ; scan offset
5375 mov edx,ebx ; copy u
5377 add eax,edx ; texture offset
5378 mov al,[esi+eax] ; get source pixel
5379 mov [edi],al ; store it
5381 add ebx,Tmap1.DeltaU ; increment u coordinate
5382 add ecx,Tmap1.DeltaV ; increment v coordinate
5384 dec Tmap1.WidthModLength ; decrement loop count
5385 jge LeftoverLoop ; finish up
5390 ; busy FPU registers: ; st0 st1 st2 st3 st4 st5 st6 st7
5391 ; xxx xxx xxx xxx xxx xxx xxx
5402 fldcw Tmap1.OldFPUCW // restore the FPU
5419 void tmapscan_lnn16( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
5421 Tmap1.dest_row_data = (ubyte *)GR_SCREEN_PTR(ushort,lx,y);
5422 Tmap1.loop_count = rx - lx;
5423 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
5424 Tmap1.bp = tmap_bitmap;
5425 Tmap1.src_offset = tmap_bitmap->w;
5427 Tmap1.fx_u = fl2f(p->u);
5428 Tmap1.fx_v = fl2f(p->v);
5429 Tmap1.fx_du_dx = fl2f(dp->u);
5430 Tmap1.fx_dv_dx = fl2f(dp->v);
5431 Tmap1.fx_u_right = fl2f(rp->u);
5432 Tmap1.fx_v_right = fl2f(rp->v);
5436 end = f2i(Tmap1.fx_u);
5437 if ( end >= Tmap1.bp->w ) return;
5439 end = f2i(Tmap1.fx_v);
5440 if ( end >= Tmap1.bp->h ) return;
5442 end = f2i(Tmap1.fx_u_right);
5443 if ( end >= Tmap1.bp->w ) return;
5445 end = f2i(Tmap1.fx_v_right);
5446 if ( end >= Tmap1.bp->h ) return;
5458 ; setup delta values
5459 mov eax, Tmap1.fx_dv_dx // get v 16.16 step
5460 mov ebx, eax // copy it
5461 sar eax, 16 // get v int step
5462 shl ebx, 16 // get v frac step
5463 mov Tmap1.DeltaVFrac, ebx // store it
5464 imul eax, Tmap1.src_offset // calc texture step for v int step
5466 mov ebx, Tmap1.fx_du_dx // get u 16.16 step
5467 mov ecx, ebx // copy it
5468 sar ebx, 16 // get the u int step
5469 shl ecx, 16 // get the u frac step
5470 mov Tmap1.DeltaUFrac, ecx // store it
5471 add eax, ebx // calc uint + vint step
5472 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
5473 add eax, Tmap1.src_offset // calc whole step + v carry
5474 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
5476 ; setup initial coordinates
5477 mov esi, Tmap1.fx_u // get u 16.16
5478 mov ebx, esi // copy it
5479 sar esi, 16 // get integer part
5480 shl ebx, 16 // get fractional part
5482 mov ecx, Tmap1.fx_v // get v 16.16
5483 mov edx, ecx // copy it
5484 sar edx, 16 // get integer part
5485 shl ecx, 16 // get fractional part
5486 imul edx, Tmap1.src_offset // calc texture scanline address
5487 add esi, edx // calc texture offset
5488 add esi, Tmap1.pixptr // calc address
5490 ; set edi = address of first pixel to modify
5491 mov edi, Tmap1.dest_row_data
5493 mov edx, Tmap1.DeltaUFrac
5495 mov eax, Tmap1.loop_count
5497 mov Tmap1.loop_count, eax
5502 mov Tmap1.num_big_steps, eax
5503 and Tmap1.loop_count, 7
5509 // 8 pixel span code
5510 // edi = dest dib bits at current pixel
5511 // esi = texture pointer at current u,v
5513 // ebx = u fraction 0.32
5514 // ecx = v fraction 0.32
5515 // edx = u frac step
5516 // ebp = v carry scratch
5518 mov al,[edi] // preread the destination cache line
5520 movzx eax,byte ptr [esi] // get texture pixel 0
5522 add ecx,Tmap1.DeltaVFrac // increment v fraction
5523 sbb ebp,ebp // get -1 if carry
5524 add ebx,edx // increment u fraction
5526 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5527 add ecx,Tmap1.DeltaVFrac // increment v fraction
5529 sbb ebp,ebp // get -1 if carry
5530 mov ax, palman_8_16_xlat[eax*2]
5531 mov [edi+0],ax // store pixel 0
5533 add ebx,edx // increment u fraction
5534 movzx eax,byte ptr [esi] // get texture pixel 0
5536 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5537 add ecx,Tmap1.DeltaVFrac // increment v fraction
5539 sbb ebp,ebp // get -1 if carry
5540 mov ax, palman_8_16_xlat[eax*2]
5541 mov [edi+2],ax // store pixel 0
5543 add ebx,edx // increment u fraction
5544 movzx eax,byte ptr [esi] // get texture pixel 0
5546 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5547 add ecx,Tmap1.DeltaVFrac // increment v fraction
5549 sbb ebp,ebp // get -1 if carry
5550 mov ax, palman_8_16_xlat[eax*2]
5551 mov [edi+4],ax // store pixel 0
5553 add ebx,edx // increment u fraction
5554 movzx eax,byte ptr [esi] // get texture pixel 0
5556 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5557 add ecx,Tmap1.DeltaVFrac // increment v fraction
5559 sbb ebp,ebp // get -1 if carry
5560 mov ax, palman_8_16_xlat[eax*2]
5561 mov [edi+6],ax // store pixel 0
5563 add ebx,edx // increment u fraction
5564 movzx eax,byte ptr [esi] // get texture pixel 0
5566 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5567 add ecx,Tmap1.DeltaVFrac // increment v fraction
5569 sbb ebp,ebp // get -1 if carry
5570 mov ax, palman_8_16_xlat[eax*2]
5571 mov [edi+8],ax // store pixel 0
5573 add ebx,edx // increment u fraction
5574 movzx eax,byte ptr [esi] // get texture pixel 0
5576 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5577 add ecx,Tmap1.DeltaVFrac // increment v fraction
5579 sbb ebp,ebp // get -1 if carry
5580 mov ax, palman_8_16_xlat[eax*2]
5581 mov [edi+10],ax // store pixel 0
5583 add ebx,edx // increment u fraction
5584 movzx eax,byte ptr [esi] // get texture pixel 0
5586 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5587 add ecx,Tmap1.DeltaVFrac // increment v fraction
5589 sbb ebp,ebp // get -1 if carry
5590 mov ax, palman_8_16_xlat[eax*2]
5591 mov [edi+12],ax // store pixel 0
5593 add ebx,edx // increment u fraction
5595 movzx eax,byte ptr [esi] // get texture pixel 0
5597 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5599 mov ax, palman_8_16_xlat[eax*2]
5600 mov [edi+14],ax // store pixel 0
5606 dec Tmap1.num_big_steps
5612 mov eax,Tmap1.loop_count
5617 mov Tmap1.loop_count, eax
5622 mov al,[edi] // preread the destination cache line
5623 // add ebx,edx // increment u fraction
5627 movzx eax,byte ptr [esi] // get texture pixel 0
5629 add ecx,Tmap1.DeltaVFrac // increment v fraction
5630 sbb ebp,ebp // get -1 if carry
5631 add ebx,edx // increment u fraction
5632 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5633 mov ax, palman_8_16_xlat[eax*2]
5634 mov [edi+0],ax // store pixel 0
5636 add ecx,Tmap1.DeltaVFrac // increment v fraction
5637 sbb ebp,ebp // get -1 if carry
5638 add ebx,edx // increment u fraction
5639 movzx eax,byte ptr [esi] // get texture pixel 0
5641 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5642 mov ax, palman_8_16_xlat[eax*2]
5643 mov [edi+2],ax // store pixel 0
5646 dec Tmap1.loop_count
5654 movzx eax,byte ptr [esi] // get texture pixel 0
5655 mov ax, palman_8_16_xlat[eax*2]
5656 mov [edi],ax // store pixel 0
5671 void tmapscan_lnn32( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
5673 Tmap1.dest_row_data = (ubyte *)GR_SCREEN_PTR(uint,lx,y);
5674 Tmap1.loop_count = rx - lx;
5675 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
5676 Tmap1.bp = tmap_bitmap;
5677 Tmap1.src_offset = tmap_bitmap->w;
5679 Tmap1.fx_u = fl2f(p->u);
5680 Tmap1.fx_v = fl2f(p->v);
5681 Tmap1.fx_du_dx = fl2f(dp->u);
5682 Tmap1.fx_dv_dx = fl2f(dp->v);
5683 Tmap1.fx_u_right = fl2f(rp->u);
5684 Tmap1.fx_v_right = fl2f(rp->v);
5688 end = f2i(Tmap1.fx_u);
5689 if ( end >= Tmap1.bp->w ) return;
5691 end = f2i(Tmap1.fx_v);
5692 if ( end >= Tmap1.bp->h ) return;
5694 end = f2i(Tmap1.fx_u_right);
5695 if ( end >= Tmap1.bp->w ) return;
5697 end = f2i(Tmap1.fx_v_right);
5698 if ( end >= Tmap1.bp->h ) return;
5710 ; setup delta values
5711 mov eax, Tmap1.fx_dv_dx // get v 16.16 step
5712 mov ebx, eax // copy it
5713 sar eax, 16 // get v int step
5714 shl ebx, 16 // get v frac step
5715 mov Tmap1.DeltaVFrac, ebx // store it
5716 imul eax, Tmap1.src_offset // calc texture step for v int step
5718 mov ebx, Tmap1.fx_du_dx // get u 16.16 step
5719 mov ecx, ebx // copy it
5720 sar ebx, 16 // get the u int step
5721 shl ecx, 16 // get the u frac step
5722 mov Tmap1.DeltaUFrac, ecx // store it
5723 add eax, ebx // calc uint + vint step
5724 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
5725 add eax, Tmap1.src_offset // calc whole step + v carry
5726 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
5728 ; setup initial coordinates
5729 mov esi, Tmap1.fx_u // get u 16.16
5730 mov ebx, esi // copy it
5731 sar esi, 16 // get integer part
5732 shl ebx, 16 // get fractional part
5734 mov ecx, Tmap1.fx_v // get v 16.16
5735 mov edx, ecx // copy it
5736 sar edx, 16 // get integer part
5737 shl ecx, 16 // get fractional part
5738 imul edx, Tmap1.src_offset // calc texture scanline address
5739 add esi, edx // calc texture offset
5740 add esi, Tmap1.pixptr // calc address
5742 ; set edi = address of first pixel to modify
5743 mov edi, Tmap1.dest_row_data
5745 mov edx, Tmap1.DeltaUFrac
5747 mov eax, Tmap1.loop_count
5749 mov Tmap1.loop_count, eax
5754 mov Tmap1.num_big_steps, eax
5755 and Tmap1.loop_count, 7
5761 // 8 pixel span code
5762 // edi = dest dib bits at current pixel
5763 // esi = texture pointer at current u,v
5765 // ebx = u fraction 0.32
5766 // ecx = v fraction 0.32
5767 // edx = u frac step
5768 // ebp = v carry scratch
5770 mov al,[edi] // preread the destination cache line
5772 movzx eax,byte ptr [esi] // get texture pixel 0
5774 add ecx,Tmap1.DeltaVFrac // increment v fraction
5775 sbb ebp,ebp // get -1 if carry
5776 add ebx,edx // increment u fraction
5778 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5779 add ecx,Tmap1.DeltaVFrac // increment v fraction
5781 sbb ebp,ebp // get -1 if carry
5782 mov eax, palman_8_32_xlat[eax*4]
5783 mov [edi+0],eax // store pixel 0
5785 add ebx,edx // increment u fraction
5786 movzx eax,byte ptr [esi] // get texture pixel 0
5788 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5789 add ecx,Tmap1.DeltaVFrac // increment v fraction
5791 sbb ebp,ebp // get -1 if carry
5792 mov eax, palman_8_32_xlat[eax*4]
5793 mov [edi+4],eax // store pixel 0
5795 add ebx,edx // increment u fraction
5796 movzx eax,byte ptr [esi] // get texture pixel 0
5798 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5799 add ecx,Tmap1.DeltaVFrac // increment v fraction
5801 sbb ebp,ebp // get -1 if carry
5802 mov eax, palman_8_32_xlat[eax*4]
5803 mov [edi+8],eax // store pixel 0
5805 add ebx,edx // increment u fraction
5806 movzx eax,byte ptr [esi] // get texture pixel 0
5808 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5809 add ecx,Tmap1.DeltaVFrac // increment v fraction
5811 sbb ebp,ebp // get -1 if carry
5812 mov eax, palman_8_32_xlat[eax*4]
5813 mov [edi+12],eax // store pixel 0
5815 add ebx,edx // increment u fraction
5816 movzx eax,byte ptr [esi] // get texture pixel 0
5818 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5819 add ecx,Tmap1.DeltaVFrac // increment v fraction
5821 sbb ebp,ebp // get -1 if carry
5822 mov eax, palman_8_32_xlat[eax*4]
5823 mov [edi+16],eax // store pixel 0
5825 add ebx,edx // increment u fraction
5826 movzx eax,byte ptr [esi] // get texture pixel 0
5828 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5829 add ecx,Tmap1.DeltaVFrac // increment v fraction
5831 sbb ebp,ebp // get -1 if carry
5832 mov eax, palman_8_32_xlat[eax*4]
5833 mov [edi+20],eax // store pixel 0
5835 add ebx,edx // increment u fraction
5836 movzx eax,byte ptr [esi] // get texture pixel 0
5838 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5839 add ecx,Tmap1.DeltaVFrac // increment v fraction
5841 sbb ebp,ebp // get -1 if carry
5842 mov eax, palman_8_32_xlat[eax*4]
5843 mov [edi+24],eax // store pixel 0
5845 add ebx,edx // increment u fraction
5847 movzx eax,byte ptr [esi] // get texture pixel 0
5849 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5851 mov eax, palman_8_32_xlat[eax*4]
5852 mov [edi+28],eax // store pixel 0
5858 dec Tmap1.num_big_steps
5864 mov eax,Tmap1.loop_count
5869 mov Tmap1.loop_count, eax
5874 mov al,[edi] // preread the destination cache line
5875 // add ebx,edx // increment u fraction
5879 movzx eax,byte ptr [esi] // get texture pixel 0
5881 add ecx,Tmap1.DeltaVFrac // increment v fraction
5882 sbb ebp,ebp // get -1 if carry
5883 add ebx,edx // increment u fraction
5884 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5885 mov eax, palman_8_32_xlat[eax*4]
5886 mov [edi+0],eax // store pixel 0
5888 add ecx,Tmap1.DeltaVFrac // increment v fraction
5889 sbb ebp,ebp // get -1 if carry
5890 add ebx,edx // increment u fraction
5891 movzx eax,byte ptr [esi] // get texture pixel 0
5893 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
5894 mov eax, palman_8_32_xlat[eax*4]
5895 mov [edi+4],eax // store pixel 0
5898 dec Tmap1.loop_count
5906 movzx eax,byte ptr [esi] // get texture pixel 0
5907 mov eax, palman_8_32_xlat[eax*4]
5908 mov [edi],eax // store pixel 0
5922 void tmapscan_pln32( int lx, int rx, int y, vertex *p, vertex *dp, vertex * rp,uint flags )
5924 Tmap1.dest_row_data = (ubyte *)GR_SCREEN_PTR(uint,lx,y);
5925 Tmap1.loop_count = rx - lx;
5926 Tmap1.fx_u = fl2f(p->u);
5927 Tmap1.fx_v = fl2f(p->v);
5928 Tmap1.fx_du_dx = fl2f(dp->u);
5929 Tmap1.fx_dv_dx = fl2f(dp->v);
5931 Tmap1.fx_l = fl2f(p->l*32.0);
5932 Tmap1.fx_dl_dx = fl2f(dp->l*32.0);
5934 Tmap1.fx_u_right = fl2f(rp->u);
5935 Tmap1.fx_v_right = fl2f(rp->v);
5936 Tmap1.pixptr = (unsigned char *)tmap_bitmap->data;
5937 Tmap1.bp = tmap_bitmap;
5938 Tmap1.src_offset = tmap_bitmap->w;
5941 Tmap1.FixedScale = 65536.0f;
5942 Tmap1.FixedScale8 = 2048.0f; //8192.0f; // 2^16 / 8
5946 Tmap1.UOverZ = p->u;
5947 Tmap1.VOverZ = p->v;
5948 Tmap1.OneOverZ = p->sw;
5950 Tmap1.dUOverZdX8 = dp->u*32.0f;
5951 Tmap1.dVOverZdX8 = dp->v*32.0f;
5952 Tmap1.dOneOverZdX8 = dp->sw*32.0f;
5954 Tmap1.dUOverZdX = dp->u;
5955 Tmap1.dVOverZdX = dp->v;
5956 Tmap1.dOneOverZdX = dp->sw;
5958 Tmap1.RightUOverZ = rp->u;
5959 Tmap1.RightVOverZ = rp->v;
5960 Tmap1.RightOneOverZ = rp->sw;
5963 Tmap1.BitmapWidth = Tmap1.bp->w;
5964 Tmap1.BitmapHeight = Tmap1.bp->h;
5967 if ( Tmap1.fx_dl_dx < 0 ) {
5968 Tmap1.fx_dl_dx = -Tmap1.fx_dl_dx;
5969 Tmap1.fx_l = (67*F1_0)-Tmap1.fx_l;
5970 Tmap1.fx_l_right = (67*F1_0)-Tmap1.fx_l_right;
5972 // Assert( Tmap1.fx_l > 31*F1_0 );
5973 // Assert( Tmap1.fx_l < 66*F1_0 );
5974 // Assert( Tmap1.fx_dl_dx >= 0 );
5975 // Assert( Tmap1.fx_dl_dx < 31*F1_0 );
5993 // put the FPU in 32 bit mode
5994 // @todo move this out of here!
5996 fstcw Tmap1.OldFPUCW // store copy of CW
5997 mov ax,Tmap1.OldFPUCW // get it in ax
5998 //hh and eax,NOT 1100000000y // 24 bit precision
6000 mov Tmap1.FPUCW,ax // store it
6001 fldcw Tmap1.FPUCW // load the FPU
6003 mov ecx, Tmap1.loop_count // ecx = width
6005 mov edi, Tmap1.dest_row_data // edi = dest pointer
6007 // edi = pointer to start pixel in dest dib
6010 mov eax,ecx // eax and ecx = width
6011 shr ecx,5 // ecx = width / subdivision length
6012 and eax,31 // eax = width mod subdivision length
6013 jnz some_left_over // any leftover?
6015 dec ecx // no, so special case last span
6016 mov eax,32 // it's 8 pixels long
6018 mov Tmap1.Subdivisions,ecx // store widths
6019 mov Tmap1.WidthModLength,eax
6021 // mov ebx,pLeft ; get left edge pointer
6022 // mov edx,pGradients ; get gradients pointer
6024 // calculate ULeft and VLeft // FPU Stack (ZL = ZLeft)
6025 // st0 st1 st2 st3 st4 st5 st6 st7
6026 fld Tmap1.VOverZ // V/ZL
6027 fld Tmap1.UOverZ // U/ZL V/ZL
6028 fld Tmap1.OneOverZ // 1/ZL U/ZL V/ZL
6029 fld1 // 1 1/ZL U/ZL V/ZL
6030 fdiv st,st(1) // ZL 1/ZL U/ZL V/ZL
6031 fld st // ZL ZL 1/ZL U/ZL V/ZL
6032 fmul st,st(4) // VL ZL 1/ZL U/ZL V/ZL
6033 fxch st(1) // ZL VL 1/ZL U/ZL V/ZL
6034 fmul st,st(3) // UL VL 1/ZL U/ZL V/ZL
6036 fstp st(5) // VL 1/ZL U/ZL V/ZL UL
6037 fstp st(5) // 1/ZL U/ZL V/ZL UL VL
6039 // calculate right side OverZ terms ; st0 st1 st2 st3 st4 st5 st6 st7
6041 fadd Tmap1.dOneOverZdX8 // 1/ZR U/ZL V/ZL UL VL
6042 fxch st(1) // U/ZL 1/ZR V/ZL UL VL
6043 fadd Tmap1.dUOverZdX8 // U/ZR 1/ZR V/ZL UL VL
6044 fxch st(2) // V/ZL 1/ZR U/ZR UL VL
6045 fadd Tmap1.dVOverZdX8 // V/ZR 1/ZR U/ZR UL VL
6047 // calculate right side coords // st0 st1 st2 st3 st4 st5 st6 st7
6049 fld1 // 1 V/ZR 1/ZR U/ZR UL VL
6050 // @todo overlap this guy
6051 fdiv st,st(2) // ZR V/ZR 1/ZR U/ZR UL VL
6052 fld st // ZR ZR V/ZR 1/ZR U/ZR UL VL
6053 fmul st,st(2) // VR ZR V/ZR 1/ZR U/ZR UL VL
6054 fxch st(1) // ZR VR V/ZR 1/ZR U/ZR UL VL
6055 fmul st,st(4) // UR VR V/ZR 1/ZR U/ZR UL VL
6057 cmp ecx,0 // check for any full spans
6058 jle HandleLeftoverPixels
6062 // at this point the FPU contains // st0 st1 st2 st3 st4 st5 st6 st7
6063 // UR VR V/ZR 1/ZR U/ZR UL VL
6065 // convert left side coords
6067 fld st(5) ; UL UR VR V/ZR 1/ZR U/ZR UL VL
6068 fmul Tmap1.FixedScale ; UL16 UR VR V/ZR 1/ZR U/ZR UL VL
6069 fistp Tmap1.UFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
6071 fld st(6) ; VL UR VR V/ZR 1/ZR U/ZR UL VL
6072 fmul Tmap1.FixedScale ; VL16 UR VR V/ZR 1/ZR U/ZR UL VL
6073 fistp Tmap1.VFixed ; UR VR V/ZR 1/ZR U/ZR UL VL
6075 // calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
6077 fsubr st(5),st ; UR VR V/ZR 1/ZR U/ZR dU VL
6078 fxch st(1) ; VR UR V/ZR 1/ZR U/ZR dU VL
6079 fsubr st(6),st ; VR UR V/ZR 1/ZR U/ZR dU dV
6080 fxch st(6) ; dV UR V/ZR 1/ZR U/ZR dU VR
6082 fmul Tmap1.FixedScale8 ; dV8 UR V/ZR 1/ZR U/ZR dU VR
6083 fistp Tmap1.DeltaV ; UR V/ZR 1/ZR U/ZR dU VR
6085 fxch st(4) ; dU V/ZR 1/ZR U/ZR UR VR
6086 fmul Tmap1.FixedScale8 ; dU8 V/ZR 1/ZR U/ZR UR VR
6087 fistp Tmap1.DeltaU ; V/ZR 1/ZR U/ZR UR VR
6089 // increment terms for next span ; st0 st1 st2 st3 st4 st5 st6 st7
6090 // Right terms become Left terms---->; V/ZL 1/ZL U/ZL UL VL
6092 fadd Tmap1.dVOverZdX8 ; V/ZR 1/ZL U/ZL UL VL
6093 fxch st(1) ; 1/ZL V/ZR U/ZL UL VL
6094 fadd Tmap1.dOneOverZdX8 ; 1/ZR V/ZR U/ZL UL VL
6095 fxch st(2) ; U/ZL V/ZR 1/ZR UL VL
6096 fadd Tmap1.dUOverZdX8 ; U/ZR V/ZR 1/ZR UL VL
6097 fxch st(2) ; 1/ZR V/ZR U/ZR UL VL
6098 fxch st(1) ; V/ZR 1/ZR U/ZR UL VL
6100 ; calculate right side coords ; st0 st1 st2 st3 st4 st5 st6 st7
6102 fld1 ; 1 V/ZR 1/ZR U/ZR UL VL
6103 fdiv st,st(2) ; ZR V/ZR 1/ZR U/ZR UL VL
6106 ; set up affine registers
6108 ; setup delta values
6110 mov eax,Tmap1.DeltaV ; get v 16.16 step
6111 mov ebx,eax ; copy it
6112 sar eax,16 ; get v int step
6113 shl ebx,16 ; get v frac step
6114 mov Tmap1.DeltaVFrac,ebx ; store it
6115 imul eax,Tmap1.src_offset ; calculate texture step for v int step
6117 mov ebx,Tmap1.DeltaU ; get u 16.16 step
6118 mov ecx,ebx ; copy it
6119 sar ebx,16 ; get u int step
6120 shl ecx,16 ; get u frac step
6121 mov Tmap1.DeltaUFrac,ecx ; store it
6122 add eax,ebx ; calculate uint + vint step
6123 mov Tmap1.UVintVfracStepVNoCarry,eax; save whole step in non-v-carry slot
6124 add eax,Tmap1.src_offset ; calculate whole step + v carry
6125 mov Tmap1.UVintVfracStepVCarry,eax ; save in v-carry slot
6129 ; check coordinate ranges
6130 mov eax, Tmap1.UFixed
6131 cmp eax, Tmap1.MinUFixed
6133 mov eax, Tmap1.MinUFixed
6134 mov Tmap1.UFixed, eax
6137 cmp eax, Tmap1.MaxUFixed
6139 mov eax, Tmap1.MaxUFixed
6140 mov Tmap1.UFixed, eax
6142 mov eax, Tmap1.VFixed
6143 cmp eax, Tmap1.MinVFixed
6145 mov eax, Tmap1.MinVFixed
6146 mov Tmap1.VFixed, eax
6149 cmp eax, Tmap1.MaxVFixed
6151 mov eax, Tmap1.MaxVFixed
6152 mov Tmap1.VFixed, eax
6156 ; setup initial coordinates
6157 mov esi,Tmap1.UFixed ; get u 16.16 fixedpoint coordinate
6159 mov ebx,esi ; copy it
6160 sar esi,16 ; get integer part
6161 shl ebx,16 ; get fractional part
6163 mov ecx,Tmap1.VFixed ; get v 16.16 fixedpoint coordinate
6165 mov edx,ecx ; copy it
6166 sar edx,16 ; get integer part
6167 shl ecx,16 ; get fractional part
6168 imul edx,Tmap1.src_offset ; calc texture scanline address
6169 add esi,edx ; calc texture offset
6170 add esi,Tmap1.pixptr ; calc address
6172 mov edx,Tmap1.DeltaUFrac ; get register copy
6178 mov ebp, Tmap1.fx_dl_dx
6189 // add Tmap1.fx_l, eax
6192 // mov eax, Tmap1.fx_l // use bx and dx to do lighting
6195 // mov eax, Tmap1.fx_dl_dx // use bx and dx to do lighting
6201 ; ************** Can't Access Stack Frame ******************
6202 ; ************** Can't Access Stack Frame ******************
6203 ; ************** Can't Access Stack Frame ******************
6205 // 8 pixel span code
6206 // edi = dest dib bits at current pixel
6207 // esi = texture pointer at current u,v
6209 // ebx = u fraction 0.32
6210 // ecx = v fraction 0.32
6211 // edx = u frac step
6212 // ebp = v carry scratch
6214 mov al,[edi] // preread the destination cache line
6217 movzx eax,byte ptr [esi] // get texture pixel 0
6219 mov eax, gr_fade_table32[eax*4]
6221 add ecx,Tmap1.DeltaVFrac // increment v fraction
6222 sbb ebp,ebp // get -1 if carry
6223 add ebx,edx // increment u fraction
6225 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6226 add ecx,Tmap1.DeltaVFrac // increment v fraction
6228 sbb ebp,ebp // get -1 if carry
6229 // mov al, 0 // Uncomment this line to show divisions
6230 mov [edi+0],eax // store pixel 0
6232 add ebx,edx // increment u fraction
6233 movzx eax,byte ptr [esi] // get texture pixel 0
6235 mov eax, gr_fade_table32[eax*4]
6237 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6238 add ecx,Tmap1.DeltaVFrac // increment v fraction
6240 sbb ebp,ebp // get -1 if carry
6241 mov [edi+4],eax // store pixel 1
6243 add ebx,edx // increment u fraction
6244 movzx eax,byte ptr [esi] // get texture pixel 0
6246 mov eax, gr_fade_table32[eax*4]
6248 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6249 add ecx,Tmap1.DeltaVFrac // increment v fraction
6251 sbb ebp,ebp // get -1 if carry
6252 mov [edi+8],eax // store pixel 2
6254 add ebx,edx // increment u fraction
6255 movzx eax,byte ptr [esi] // get texture pixel 0
6257 mov eax, gr_fade_table32[eax*4]
6259 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6260 add ecx,Tmap1.DeltaVFrac // increment v fraction
6262 sbb ebp,ebp // get -1 if carry
6263 mov [edi+12],eax // store pixel 3
6265 add ebx,edx // increment u fraction
6266 movzx eax,byte ptr [esi] // get texture pixel 0
6268 mov eax, gr_fade_table32[eax*4]
6269 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6270 add ecx,Tmap1.DeltaVFrac // increment v fraction
6272 sbb ebp,ebp // get -1 if carry
6273 mov [edi+16],eax // store pixel 3
6275 add ebx,edx // increment u fraction
6276 movzx eax,byte ptr [esi] // get texture pixel 0
6278 mov eax, gr_fade_table32[eax*4]
6279 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6280 add ecx,Tmap1.DeltaVFrac // increment v fraction
6282 sbb ebp,ebp // get -1 if carry
6283 mov [edi+20],eax // store pixel 3
6285 add ebx,edx // increment u fraction
6286 movzx eax,byte ptr [esi] // get texture pixel 0
6288 mov eax, gr_fade_table32[eax*4]
6289 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6290 add ecx,Tmap1.DeltaVFrac // increment v fraction
6292 sbb ebp,ebp // get -1 if carry
6293 mov [edi+24],eax // store pixel 3
6295 add ebx,edx // increment u fraction
6296 movzx eax,byte ptr [esi] // get texture pixel 0
6298 mov eax, gr_fade_table32[eax*4]
6299 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6300 add ecx,Tmap1.DeltaVFrac // increment v fraction
6302 sbb ebp,ebp // get -1 if carry
6303 mov [edi+28],eax // store pixel 3
6305 add ebx,edx // increment u fraction
6306 movzx eax,byte ptr [esi] // get texture pixel 0
6308 mov eax, gr_fade_table32[eax*4]
6309 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6310 add ecx,Tmap1.DeltaVFrac // increment v fraction
6312 sbb ebp,ebp // get -1 if carry
6313 mov [edi+32],eax // store pixel 3
6315 add ebx,edx // increment u fraction
6316 movzx eax,byte ptr [esi] // get texture pixel 0
6318 mov eax, gr_fade_table32[eax*4]
6319 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6320 add ecx,Tmap1.DeltaVFrac // increment v fraction
6322 sbb ebp,ebp // get -1 if carry
6323 mov [edi+36],eax // store pixel 3
6325 add ebx,edx // increment u fraction
6326 movzx eax,byte ptr [esi] // get texture pixel 0
6328 mov eax, gr_fade_table32[eax*4]
6329 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6330 add ecx,Tmap1.DeltaVFrac // increment v fraction
6332 sbb ebp,ebp // get -1 if carry
6333 mov [edi+40],eax // store pixel 3
6335 add ebx,edx // increment u fraction
6336 movzx eax,byte ptr [esi] // get texture pixel 0
6338 mov eax, gr_fade_table32[eax*4]
6341 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6342 add ecx,Tmap1.DeltaVFrac // increment v fraction
6344 sbb ebp,ebp // get -1 if carry
6345 mov [edi+44],eax // store pixel 3
6347 add ebx,edx // increment u fraction
6348 movzx eax,byte ptr [esi] // get texture pixel 0
6350 mov eax, gr_fade_table32[eax*4]
6353 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6354 add ecx,Tmap1.DeltaVFrac // increment v fraction
6356 sbb ebp,ebp // get -1 if carry
6357 mov [edi+48],eax // store pixel 3
6359 add ebx,edx // increment u fraction
6360 movzx eax,byte ptr [esi] // get texture pixel 0
6362 mov eax, gr_fade_table32[eax*4]
6365 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6366 add ecx,Tmap1.DeltaVFrac // increment v fraction
6368 sbb ebp,ebp // get -1 if carry
6369 mov [edi+52],eax // store pixel 3
6371 add ebx,edx // increment u fraction
6372 movzx eax,byte ptr [esi] // get texture pixel 0
6374 mov eax, gr_fade_table32[eax*4]
6377 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6378 add ecx,Tmap1.DeltaVFrac // increment v fraction
6380 sbb ebp,ebp // get -1 if carry
6381 mov [edi+56],eax // store pixel 3
6383 add ebx,edx // increment u fraction
6384 movzx eax,byte ptr [esi] // get texture pixel 0
6386 mov eax, gr_fade_table32[eax*4]
6389 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6390 add ecx,Tmap1.DeltaVFrac // increment v fraction
6392 sbb ebp,ebp // get -1 if carry
6393 mov [edi+60],eax // store pixel 3
6395 add ebx,edx // increment u fraction
6396 movzx eax,byte ptr [esi] // get texture pixel 0
6398 mov eax, gr_fade_table32[eax*4]
6401 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6402 add ecx,Tmap1.DeltaVFrac // increment v fraction
6404 sbb ebp,ebp // get -1 if carry
6405 mov [edi+64],eax // store pixel 3
6407 add ebx,edx // increment u fraction
6408 movzx eax,byte ptr [esi] // get texture pixel 0
6410 mov eax, gr_fade_table32[eax*4]
6413 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6414 add ecx,Tmap1.DeltaVFrac // increment v fraction
6416 sbb ebp,ebp // get -1 if carry
6417 mov [edi+68],eax // store pixel 3
6419 add ebx,edx // increment u fraction
6420 movzx eax,byte ptr [esi] // get texture pixel 0
6422 mov eax, gr_fade_table32[eax*4]
6425 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6426 add ecx,Tmap1.DeltaVFrac // increment v fraction
6428 sbb ebp,ebp // get -1 if carry
6429 mov [edi+72],eax // store pixel 3
6431 add ebx,edx // increment u fraction
6432 movzx eax,byte ptr [esi] // get texture pixel 0
6434 mov eax, gr_fade_table32[eax*4]
6437 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6438 add ecx,Tmap1.DeltaVFrac // increment v fraction
6440 sbb ebp,ebp // get -1 if carry
6441 mov [edi+76],eax // store pixel 3
6443 add ebx,edx // increment u fraction
6444 movzx eax,byte ptr [esi] // get texture pixel 0
6446 mov eax, gr_fade_table32[eax*4]
6449 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6450 add ecx,Tmap1.DeltaVFrac // increment v fraction
6452 sbb ebp,ebp // get -1 if carry
6453 mov [edi+80],eax // store pixel 3
6455 add ebx,edx // increment u fraction
6456 movzx eax,byte ptr [esi] // get texture pixel 0
6458 mov eax, gr_fade_table32[eax*4]
6461 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6462 add ecx,Tmap1.DeltaVFrac // increment v fraction
6464 sbb ebp,ebp // get -1 if carry
6465 mov [edi+84],eax // store pixel 3
6467 add ebx,edx // increment u fraction
6468 movzx eax,byte ptr [esi] // get texture pixel 0
6470 mov eax, gr_fade_table32[eax*4]
6473 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6474 add ecx,Tmap1.DeltaVFrac // increment v fraction
6476 sbb ebp,ebp // get -1 if carry
6477 mov [edi+88],eax // store pixel 3
6479 add ebx,edx // increment u fraction
6480 movzx eax,byte ptr [esi] // get texture pixel 0
6482 mov eax, gr_fade_table32[eax*4]
6485 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6486 add ecx,Tmap1.DeltaVFrac // increment v fraction
6488 sbb ebp,ebp // get -1 if carry
6489 mov [edi+92],eax // store pixel 3
6491 add ebx,edx // increment u fraction
6492 movzx eax,byte ptr [esi] // get texture pixel 0
6494 mov eax, gr_fade_table32[eax*4]
6497 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6498 add ecx,Tmap1.DeltaVFrac // increment v fraction
6500 sbb ebp,ebp // get -1 if carry
6501 mov [edi+96],eax // store pixel 3
6503 add ebx,edx // increment u fraction
6504 movzx eax,byte ptr [esi] // get texture pixel 0
6506 mov eax, gr_fade_table32[eax*4]
6509 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6510 add ecx,Tmap1.DeltaVFrac // increment v fraction
6512 sbb ebp,ebp // get -1 if carry
6513 mov [edi+100],eax // store pixel 3
6515 add ebx,edx // increment u fraction
6516 movzx eax,byte ptr [esi] // get texture pixel 0
6518 mov eax, gr_fade_table32[eax*4]
6521 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6522 add ecx,Tmap1.DeltaVFrac // increment v fraction
6526 sbb ebp,ebp // get -1 if carry
6527 mov [edi+104],eax // store pixel 3
6529 add ebx,edx // increment u fraction
6530 movzx eax,byte ptr [esi] // get texture pixel 0
6532 mov eax, gr_fade_table32[eax*4]
6535 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6536 add ecx,Tmap1.DeltaVFrac // increment v fraction
6538 sbb ebp,ebp // get -1 if carry
6539 mov [edi+108],eax // store pixel 3
6541 add ebx,edx // increment u fraction
6542 movzx eax,byte ptr [esi] // get texture pixel 0
6544 mov eax, gr_fade_table32[eax*4]
6546 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6547 add ecx,Tmap1.DeltaVFrac // increment v fraction
6549 sbb ebp,ebp // get -1 if carry
6550 mov [edi+112],eax // store pixel 4
6552 add ebx,edx // increment u fraction
6553 movzx eax,byte ptr [esi] // get texture pixel 0
6555 mov eax, gr_fade_table32[eax*4]
6557 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6558 add ecx,Tmap1.DeltaVFrac // increment v fraction
6560 sbb ebp,ebp // get -1 if carry
6561 mov [edi+116],eax // store pixel 5
6563 add ebx,edx // increment u fraction
6564 movzx eax,byte ptr [esi] // get texture pixel 0
6566 mov eax, gr_fade_table32[eax*4]
6568 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6569 add ecx,Tmap1.DeltaVFrac // increment v fraction
6571 sbb ebp,ebp // get -1 if carry
6572 mov [edi+120],eax // store pixel 6
6574 add ebx,edx // increment u fraction
6576 movzx eax,byte ptr [esi] // get texture pixel 0
6578 mov eax, gr_fade_table32[eax*4]
6580 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6582 mov [edi+124],eax // store pixel 7
6586 ; ************** Okay to Access Stack Frame ****************
6587 ; ************** Okay to Access Stack Frame ****************
6588 ; ************** Okay to Access Stack Frame ****************
6591 ; the fdiv is done, finish right ; st0 st1 st2 st3 st4 st5 st6 st7
6592 ; ZR V/ZR 1/ZR U/ZR UL VL
6594 fld st ; ZR ZR V/ZR 1/ZR U/ZR UL VL
6595 fmul st,st(2) ; VR ZR V/ZR 1/ZR U/ZR UL VL
6596 fxch st(1) ; ZR VR V/ZR 1/ZR U/ZR UL VL
6597 fmul st,st(4) ; UR VR V/ZR 1/ZR U/ZR UL VL
6599 add edi,128 ; increment to next span
6600 dec Tmap1.Subdivisions ; decrement span count
6601 jnz SpanLoop ; loop back
6603 // save new lighting values
6606 // mov Tmap1.fx_l, eax
6610 // mov Tmap1.fx_dl_dx, eax
6612 HandleLeftoverPixels:
6615 mov esi,Tmap1.pixptr ; load texture pointer
6617 ; edi = dest dib bits
6618 ; esi = current texture dib bits
6619 ; at this point the FPU contains ; st0 st1 st2 st3 st4 st5 st6 st7
6620 ; inv. means invalid numbers ; inv. inv. inv. inv. inv. UL VL
6622 cmp Tmap1.WidthModLength,0 ; are there remaining pixels to draw?
6623 jz FPUReturn ; nope, pop the FPU and bail
6625 ; convert left side coords ; st0 st1 st2 st3 st4 st5 st6 st7
6627 fld st(5) ; UL inv. inv. inv. inv. inv. UL VL
6628 fmul Tmap1.FixedScale ; UL16 inv. inv. inv. inv. inv. UL VL
6629 fistp Tmap1.UFixed ; inv. inv. inv. inv. inv. UL VL
6631 fld st(6) ; VL inv. inv. inv. inv. inv. UL VL
6632 fmul Tmap1.FixedScale // VL16 inv. inv. inv. inv. inv. UL VL
6633 fistp Tmap1.VFixed ; inv. inv. inv. inv. inv. UL VL
6635 dec Tmap1.WidthModLength ; calc how many steps to take
6636 jz OnePixelSpan ; just one, don't do deltas
6638 ; calculate right edge coordinates ; st0 st1 st2 st3 st4 st5 st6 st7
6641 ; @todo rearrange things so we don't need these two instructions
6642 fstp Tmap1.FloatTemp ; inv. inv. inv. inv. UL VL
6643 fstp Tmap1.FloatTemp ; inv. inv. inv. UL VL
6645 fld Tmap1.RightVOverZ ; V/Zr inv. inv. inv. UL VL
6646 fsub Tmap1.dVOverZdX ; V/ZR inv. inv. inv. UL VL
6647 fld Tmap1.RightUOverZ ; U/Zr V/ZR inv. inv. inv. UL VL
6648 fsub Tmap1.dUOverZdX ; U/ZR V/ZR inv. inv. inv. UL VL
6649 fld Tmap1.RightOneOverZ ; 1/Zr U/ZR V/ZR inv. inv. inv. UL VL
6650 fsub Tmap1.dOneOverZdX ; 1/ZR U/ZR V/ZR inv. inv. inv. UL VL
6652 fdivr Tmap1.One ; ZR U/ZR V/ZR inv. inv. inv. UL VL
6654 fmul st(1),st ; ZR UR V/ZR inv. inv. inv. UL VL
6655 fmulp st(2),st ; UR VR inv. inv. inv. UL VL
6657 ; calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
6659 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
6660 fxch st(1) ; VR UR inv. inv. inv. dU VL
6661 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
6662 fxch st(6) ; dV UR inv. inv. inv. dU VR
6664 fidiv Tmap1.WidthModLength ; dv UR inv. inv. inv. dU VR
6665 fmul Tmap1.FixedScale ; dv16 UR inv. inv. inv. dU VR
6666 fistp Tmap1.DeltaV ; UR inv. inv. inv. dU VR
6668 fxch st(4) ; dU inv. inv. inv. UR VR
6669 fidiv Tmap1.WidthModLength ; du inv. inv. inv. UR VR
6670 fmul Tmap1.FixedScale ; du16 inv. inv. inv. UR VR
6671 fistp Tmap1.DeltaU ; inv. inv. inv. UR VR
6673 ; @todo gross! these are to line up with the other loop
6674 fld st(1) ; inv. inv. inv. inv. UR VR
6675 fld st(2) ; inv. inv. inv. inv. inv. UR VR
6680 ; setup delta values
6681 mov eax, Tmap1.DeltaV // get v 16.16 step
6682 mov ebx, eax // copy it
6683 sar eax, 16 // get v int step
6684 shl ebx, 16 // get v frac step
6685 mov Tmap1.DeltaVFrac, ebx // store it
6686 imul eax, Tmap1.src_offset // calc texture step for v int step
6688 mov ebx, Tmap1.DeltaU // get u 16.16 step
6689 mov ecx, ebx // copy it
6690 sar ebx, 16 // get the u int step
6691 shl ecx, 16 // get the u frac step
6692 mov Tmap1.DeltaUFrac, ecx // store it
6693 add eax, ebx // calc uint + vint step
6694 mov Tmap1.UVintVfracStepVNoCarry, eax // save whole step in non-v-carry slot
6695 add eax, Tmap1.src_offset // calc whole step + v carry
6696 mov Tmap1.UVintVfracStepVCarry, eax // save in v-carry slot
6703 ; check coordinate ranges
6704 mov eax, Tmap1.UFixed
6705 cmp eax, Tmap1.MinUFixed
6707 mov eax, Tmap1.MinUFixed
6708 mov Tmap1.UFixed, eax
6711 cmp eax, Tmap1.MaxUFixed
6713 mov eax, Tmap1.MaxUFixed
6714 mov Tmap1.UFixed, eax
6716 mov eax, Tmap1.VFixed
6717 cmp eax, Tmap1.MinVFixed
6719 mov eax, Tmap1.MinVFixed
6720 mov Tmap1.VFixed, eax
6723 cmp eax, Tmap1.MaxVFixed
6725 mov eax, Tmap1.MaxVFixed
6726 mov Tmap1.VFixed, eax
6733 ; setup initial coordinates
6734 mov esi, Tmap1.UFixed // get u 16.16
6735 mov ebx, esi // copy it
6736 sar esi, 16 // get integer part
6737 shl ebx, 16 // get fractional part
6739 mov ecx, Tmap1.VFixed // get v 16.16
6740 mov edx, ecx // copy it
6741 sar edx, 16 // get integer part
6742 shl ecx, 16 // get fractional part
6743 imul edx, Tmap1.src_offset // calc texture scanline address
6744 add esi, edx // calc texture offset
6745 add esi, Tmap1.pixptr // calc address
6747 ; set edi = address of first pixel to modify
6748 ; mov edi, Tmap1.dest_row_data
6757 mov edx, Tmap1.DeltaUFrac
6759 cmp Tmap1.WidthModLength, 1
6764 mov ebx, Tmap1.fx_l_right
6771 // slow but maybe better
6774 mov ebx, Tmap1.WidthModLength
6779 mov eax, Tmap1.fx_dl_dx
6789 inc Tmap1.WidthModLength
6790 mov eax,Tmap1.WidthModLength
6794 mov Tmap1.WidthModLength, eax
6798 mov al,[edi] // preread the destination cache line
6801 movzx eax,byte ptr [esi] // get texture pixel 0
6803 mov eax, gr_fade_table32[eax*4]
6805 add ecx,Tmap1.DeltaVFrac // increment v fraction
6806 sbb ebp,ebp // get -1 if carry
6807 add ebx,edx // increment u fraction
6808 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6809 mov [edi+0],eax // store pixel 0
6811 add ecx,Tmap1.DeltaVFrac // increment v fraction
6812 sbb ebp,ebp // get -1 if carry
6813 add ebx,edx // increment u fraction
6814 movzx eax,byte ptr [esi] // get texture pixel 0
6816 mov eax, gr_fade_table32[eax*4]
6818 adc esi,UVintVfracStep[4*ebp] // add in step ints & carries
6819 mov [edi+4],eax // store pixel 1
6822 dec Tmap1.WidthModLength
6830 movzx eax,byte ptr [esi] // get texture pixel 0
6832 mov eax, gr_fade_table32[eax*4]
6833 mov [edi],eax // store pixel 2
6848 OldWay: // This is 6% slower than above
6850 mov ebx,Tmap1.UFixed ; get starting coordinates
6851 mov ecx,Tmap1.VFixed ; for span
6853 ; leftover pixels loop
6854 ; edi = dest dib bits
6855 ; esi = texture dib bits
6861 mov eax,ecx ; copy v
6863 imul eax,Tmap1.src_offset ; scan offset
6864 mov edx,ebx ; copy u
6866 add eax,edx ; texture offset
6867 mov al,[esi+eax] ; get source pixel
6869 mov [edi],al ; store it
6871 add ebx,Tmap1.DeltaU ; increment u coordinate
6872 add ecx,Tmap1.DeltaV ; increment v coordinate
6874 dec Tmap1.WidthModLength ; decrement loop count
6875 jl FPUReturn ; finish up
6879 mov eax,ecx ; copy v
6881 imul eax,Tmap1.src_offset ; scan offset
6882 mov edx,ebx ; copy u
6884 add eax,edx ; texture offset
6885 mov al,[esi+eax] ; get source pixel
6886 mov [edi],al ; store it
6888 add ebx,Tmap1.DeltaU ; increment u coordinate
6889 add ecx,Tmap1.DeltaV ; increment v coordinate
6891 dec Tmap1.WidthModLength ; decrement loop count
6892 jge LeftoverLoop ; finish up
6897 ; busy FPU registers: ; st0 st1 st2 st3 st4 st5 st6 st7
6898 ; xxx xxx xxx xxx xxx xxx xxx
6909 fldcw Tmap1.OldFPUCW // restore the FPU
6926 add edx,DeltaVFrac ; Add in 0.32 DeltaVFrac to VFrac
6927 sbb ebp,ebp ; ebp will equal -1 if there was a carry
6928 mov BYTE PTR [edi], al ; blit destination pixel
6929 mov al, BYTE PTR [esi] ; get next texel
6930 add ecx,ebx ; add 0.32 DeltaUFrac to UFrac, plus light
6931 adc esi, [UVStepCarry1+(ebp*4)]
6932 mov ah, ch ; move lighting value into place
6933 mov al, ShadeTable[eax] ; Get shaded pixel