adding support for runtime selection of tmap funcs
[btb/d2x.git] / texmap / scanline.c
1 /*
2 THE COMPUTER CODE CONTAINED HEREIN IS THE SOLE PROPERTY OF PARALLAX
3 SOFTWARE CORPORATION ("PARALLAX").  PARALLAX, IN DISTRIBUTING THE CODE TO
4 END-USERS, AND SUBJECT TO ALL OF THE TERMS AND CONDITIONS HEREIN, GRANTS A
5 ROYALTY-FREE, PERPETUAL LICENSE TO SUCH END-USERS FOR USE BY SUCH END-USERS
6 IN USING, DISPLAYING,  AND CREATING DERIVATIVE WORKS THEREOF, SO LONG AS
7 SUCH USE, DISPLAY OR CREATION IS FOR NON-COMMERCIAL, ROYALTY OR REVENUE
8 FREE PURPOSES.  IN NO EVENT SHALL THE END-USER USE THE COMPUTER CODE
9 CONTAINED HEREIN FOR REVENUE-BEARING PURPOSES.  THE END-USER UNDERSTANDS
10 AND AGREES TO THE TERMS HEREIN AND ACCEPTS THE SAME BY USE OF THIS FILE.  
11 COPYRIGHT 1993-1998 PARALLAX SOFTWARE CORPORATION.  ALL RIGHTS RESERVED.
12 */
13 /*
14  * $Source: /cvs/cvsroot/d2x/texmap/scanline.c,v $
15  * $Revision: 1.3 $
16  * $Author: bradleyb $
17  * $Date: 2001-10-25 02:22:46 $
18  * 
19  * Routines to draw the texture mapped scanlines.
20  * 
21  * $Log: not supported by cvs2svn $
22  * Revision 1.2  2001/01/31 15:18:04  bradleyb
23  * Makefile and conf.h fixes
24  *
25  * Revision 1.1.1.1  2001/01/19 03:30:16  bradleyb
26  * Import of d2x-0.0.8
27  *
28  * Revision 1.4  1999/10/18 00:31:01  donut
29  * more alpha fixes from Falk Hueffner
30  *
31  * Revision 1.3  1999/10/14 04:48:21  donut
32  * alpha fixes, and gl_font args
33  *
34  * Revision 1.2  1999/09/21 07:22:40  sekmu
35  * remove unused var warning
36  *
37  * Revision 1.1.1.1  1999/06/14 22:14:08  donut
38  * Import of d1x 1.37 source.
39  *
40  * Revision 1.2  1995/02/20  18:23:39  john
41  * Added new module for C versions of inner loops.
42  * 
43  * Revision 1.1  1995/02/20  17:42:27  john
44  * Initial revision
45  * 
46  * 
47  */
48
49 #ifdef HAVE_CONFIG_H
50 #include <conf.h>
51 #endif
52
53 #ifdef RCS
54 static char rcsid[] = "$Id: scanline.c,v 1.3 2001-10-25 02:22:46 bradleyb Exp $";
55 #endif
56
57 #include <math.h>
58 #include <limits.h>
59 #include <stdio.h>
60 #include <stdlib.h>
61 #include <string.h>
62
63 #include "maths.h"
64 #include "mono.h"
65 #include "gr.h"
66 #include "grdef.h"
67 #include "texmap.h"
68 #include "texmapl.h"
69 #include "scanline.h"
70
71 void c_tmap_scanline_flat()
72 {
73         ubyte *dest;
74 //        int x;
75
76         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y )  );
77
78 /*      for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
79                 *dest++ = tmap_flat_color;
80         }*/
81         memset(dest,tmap_flat_color,fx_xright-fx_xleft+1);
82 }
83
84 void c_tmap_scanline_shaded()
85 {
86         int fade;
87         ubyte *dest, tmp;
88         int x;
89
90         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
91
92         fade = tmap_flat_shade_value<<8;
93         for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
94                 tmp = *dest;
95                 *dest++ = gr_fade_table[ fade |(tmp)];
96         }
97 }
98
99 void c_tmap_scanline_lin_nolight()
100 {
101         ubyte *dest;
102         uint c;
103         int x;
104         fix u,v,dudx, dvdx;
105
106         u = fx_u;
107         v = fx_v*64;
108         dudx = fx_du_dx; 
109         dvdx = fx_dv_dx*64; 
110
111         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
112
113         if (!Transparency_on)   {
114                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
115                         *dest++ = (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
116                         u += dudx;
117                         v += dvdx;
118                 }
119         } else {
120                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
121                         c = (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
122                         if ( c!=255)
123                                 *dest = c;
124                         dest++;
125                         u += dudx;
126                         v += dvdx;
127                 }
128         }
129 }
130
131
132 #if 1
133 void c_tmap_scanline_lin()
134 {
135         ubyte *dest;
136         uint c;
137         int x, j;
138         fix u,v,l,dudx, dvdx, dldx;
139
140         u = fx_u;
141         v = fx_v*64;
142         dudx = fx_du_dx; 
143         dvdx = fx_dv_dx*64; 
144
145         l = fx_l>>8;
146         dldx = fx_dl_dx>>8;
147         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
148
149         if (!Transparency_on)   {
150                 ubyte*                  pixPtrLocalCopy = pixptr;
151                 ubyte*                  fadeTableLocalCopy = gr_fade_table;
152                 unsigned long   destlong;
153
154                 x = fx_xright-fx_xleft+1;
155
156                 if ((j = (unsigned long) dest & 3) != 0)
157                         {
158                         j = 4 - j;
159
160                         if (j > x)
161                                 j = x;
162
163                         while (j > 0)
164                                 {       
165                                 //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
166                                 *dest++ = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ];
167                                 //end edit -MM
168                                 l += dldx;
169                                 u += dudx;
170                                 v += dvdx;
171                                 x--;
172                                 j--;
173                                 }
174                         }
175
176                 j &= ~3;
177                 while (j > 0)
178                         {
179                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
180                         destlong = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ] << 24;
181                         //end edit -MM
182                         l += dldx;
183                         u += dudx;
184                         v += dvdx;
185                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
186                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ] << 16;
187                         //end edit -MM
188                         l += dldx;
189                         u += dudx;
190                         v += dvdx;
191                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
192                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ] << 8;
193                         //end edit -MM
194                         l += dldx;
195                         u += dudx;
196                         v += dvdx;
197                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
198                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ];
199                         //end edit -MM
200                         l += dldx;
201                         u += dudx;
202                         v += dvdx;
203                         *((unsigned long *) dest) = destlong;
204                         dest += 4;
205                         x -= 4;
206                         j -= 4;
207                         }
208
209                 while (x-- > 0)
210                         {
211                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
212                         *dest++ = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ];
213                         //end edit -MM
214                         l += dldx;
215                         u += dudx;
216                         v += dvdx;
217                         }
218
219         } else {
220                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
221                         c = (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
222                         if ( c!=TRANSPARENCY_COLOR)
223                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
224                                 *dest = gr_fade_table[ (l&(0x7f00)) + c ];
225                         //end edit -MM
226                         dest++;
227                         l += dldx;
228                         u += dudx;
229                         v += dvdx;
230                 }
231         }
232 }
233
234 #else
235 void c_tmap_scanline_lin()
236 {
237         ubyte *dest;
238         uint c;
239         int x;
240         fix u,v,l,dudx, dvdx, dldx;
241
242         u = fx_u;
243         v = fx_v*64;
244         dudx = fx_du_dx; 
245         dvdx = fx_dv_dx*64; 
246
247         l = fx_l>>8;
248         dldx = fx_dl_dx>>8;
249         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
250
251         if (!Transparency_on)   {
252                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
253                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
254                         *dest++ = gr_fade_table[ (l&(0x7f00)) + (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ];
255                         //end edit -MM
256                         l += dldx;
257                         u += dudx;
258                         v += dvdx;
259                 }
260         } else {
261                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
262                         c = (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
263                         if ( c!=255)
264                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
265                                 *dest = gr_fade_table[ (l&(0x7f00)) + c ];
266                         //end edit -MM
267                         dest++;
268                         l += dldx;
269                         u += dudx;
270                         v += dvdx;
271                 }
272         }
273 }
274 #endif
275
276 // Used for energy centers. See comments for c_tmap_scanline_per().
277 void c_fp_tmap_scanline_per_nolight()
278 {
279         ubyte          *dest;
280         uint            c;
281         int             x, j;
282         double          u, v, z, dudx, dvdx, dzdx, rec_z;
283         u_int64_t       destlong;
284
285         u = f2db(fx_u);
286         v = f2db(fx_v) * 64.0;
287         z = f2db(fx_z);
288         dudx = f2db(fx_du_dx);
289         dvdx = f2db(fx_dv_dx) * 64.0;
290         dzdx = f2db(fx_dz_dx);
291
292         rec_z = 1.0 / z;
293
294         dest = (ubyte *) (write_buffer + fx_xleft + (bytes_per_row * fx_y));
295
296         x = fx_xright - fx_xleft + 1;
297         if (!Transparency_on) {
298                 if (x >= 8) {
299                         if ((j = (size_t) dest & 7) != 0) {
300                                 j = 8 - j;
301
302                                 while (j > 0) {
303                                         *dest++ =
304                                             (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
305                                                          (((int) (u * rec_z)) & 63)];
306                                         u += dudx;
307                                         v += dvdx;
308                                         z += dzdx;
309                                         rec_z = 1.0 / z;
310                                         x--;
311                                         j--;
312                                 }
313                         }
314
315                         while (j >= 8) {
316                                 destlong =
317                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
318                                                        (((int) (u * rec_z)) & 63)];
319                                 u += dudx;
320                                 v += dvdx;
321                                 z += dzdx;
322                                 rec_z = 1.0 / z;
323                                 destlong |=
324                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
325                                                        (((int) (u * rec_z)) & 63)] << 8;
326                                 u += dudx;
327                                 v += dvdx;
328                                 z += dzdx;
329                                 rec_z = 1.0 / z;
330                                 destlong |=
331                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
332                                                        (((int) (u * rec_z)) & 63)] << 16;
333                                 u += dudx;
334                                 v += dvdx;
335                                 z += dzdx;
336                                 rec_z = 1.0 / z;
337                                 destlong |=
338                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
339                                                        (((int) (u * rec_z)) & 63)] << 24;
340                                 u += dudx;
341                                 v += dvdx;
342                                 z += dzdx;
343                                 rec_z = 1.0 / z;
344                                 destlong |=
345                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
346                                                        (((int) (u * rec_z)) & 63)] << 32;
347                                 u += dudx;
348                                 v += dvdx;
349                                 z += dzdx;
350                                 rec_z = 1.0 / z;
351                                 destlong |=
352                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
353                                                        (((int) (u * rec_z)) & 63)] << 40;
354                                 u += dudx;
355                                 v += dvdx;
356                                 z += dzdx;
357                                 rec_z = 1.0 / z;
358                                 destlong |=
359                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
360                                                        (((int) (u * rec_z)) & 63)] << 48;
361                                 u += dudx;
362                                 v += dvdx;
363                                 z += dzdx;
364                                 rec_z = 1.0 / z;
365                                 destlong |=
366                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
367                                                        (((int) (u * rec_z)) & 63)] << 56;
368                                 u += dudx;
369                                 v += dvdx;
370                                 z += dzdx;
371                                 rec_z = 1.0 / z;
372
373                                 *((u_int64_t *) dest) = destlong;
374                                 dest += 8;
375                                 x -= 8;
376                                 j -= 8;
377                         }
378                 }
379                 while (x-- > 0) {
380                         *dest++ =
381                             (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
382                                                (((int) (u * rec_z)) & 63)];
383                         u += dudx;
384                         v += dvdx;
385                         z += dzdx;
386                         rec_z = 1.0 / z;
387                 }
388         } else {
389                 x = fx_xright - fx_xleft + 1;
390
391                 if (x >= 8) {
392                         if ((j = (size_t) dest & 7) != 0) {
393                                 j = 8 - j;
394
395                                 while (j > 0) {
396                                         c =
397                                             (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
398                                                          (((int) (u * rec_z)) & 63)];
399                                         if (c != 255)
400                                                 *dest = c;
401                                         dest++;
402                                         u += dudx;
403                                         v += dvdx;
404                                         z += dzdx;
405                                         rec_z = 1.0 / z;
406                                         x--;
407                                         j--;
408                                 }
409                         }
410
411                         j = x;
412                         while (j >= 8) {
413                                 destlong = *((u_int64_t *) dest);
414                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
415                                                   (((int) (u * rec_z)) & 63)];
416                                 if (c != 255) {
417                                         destlong &= ~(u_int64_t)0xFF;
418                                         destlong |= (u_int64_t) c;
419                                 }
420                                 u += dudx;
421                                 v += dvdx;
422                                 z += dzdx;
423                                 rec_z = 1.0 / z;
424                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
425                                                   (((int) (u * rec_z)) & 63)];
426                                 if (c != 255) {
427                                         destlong &= ~((u_int64_t)0xFF << 8);
428                                         destlong |= (u_int64_t) c << 8;
429                                 }
430                                 u += dudx;
431                                 v += dvdx;
432                                 z += dzdx;
433                                 rec_z = 1.0 / z;
434                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
435                                                   (((int) (u * rec_z)) & 63)];
436                                 if (c != 255) {
437                                         destlong &= ~((u_int64_t)0xFF << 16);
438                                         destlong |= (u_int64_t) c << 16;
439                                 }
440                                 u += dudx;
441                                 v += dvdx;
442                                 z += dzdx;
443                                 rec_z = 1.0 / z;
444                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
445                                                   (((int) (u * rec_z)) & 63)];
446                                 if (c != 255) {
447                                         destlong &= ~((u_int64_t)0xFF << 24);
448                                         destlong |= (u_int64_t) c << 24;
449                                 }
450                                 u += dudx;
451                                 v += dvdx;
452                                 z += dzdx;
453                                 rec_z = 1.0 / z;
454                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
455                                                   (((int) (u * rec_z)) & 63)];
456                                 if (c != 255) {
457                                         destlong &= ~((u_int64_t)0xFF << 32);
458                                         destlong |= (u_int64_t) c << 32;
459                                 }
460                                 u += dudx;
461                                 v += dvdx;
462                                 z += dzdx;
463                                 rec_z = 1.0 / z;
464                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
465                                                   (((int) (u * rec_z)) & 63)];
466                                 if (c != 255) {
467                                         destlong &= ~((u_int64_t)0xFF << 40);
468                                         destlong |= (u_int64_t) c << 40;
469                                 }
470                                 u += dudx;
471                                 v += dvdx;
472                                 z += dzdx;
473                                 rec_z = 1.0 / z;
474                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
475                                                   (((int) (u * rec_z)) & 63)];
476                                 if (c != 255) {
477                                         destlong &= ~((u_int64_t)0xFF << 48);
478                                         destlong |= (u_int64_t) c << 48;
479                                 }
480                                 u += dudx;
481                                 v += dvdx;
482                                 z += dzdx;
483                                 rec_z = 1.0 / z;
484                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
485                                                   (((int) (u * rec_z)) & 63)];
486                                 if (c != 255) {
487                                         destlong &= ~((u_int64_t)0xFF << 56);
488                                         destlong |= (u_int64_t) c << 56;
489                                 }
490                                 u += dudx;
491                                 v += dvdx;
492                                 z += dzdx;
493                                 rec_z = 1.0 / z;
494
495                                 *((u_int64_t *) dest) = destlong;
496                                 dest += 8;
497                                 x -= 8;
498                                 j -= 8;
499                         }
500                 }
501                 while (x-- > 0) {
502                         c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
503                                           (((int) (u * rec_z)) & 63)];
504                         if (c != 255)
505                                 *dest = c;
506                         dest++;
507                         u += dudx;
508                         v += dvdx;
509                         z += dzdx;
510                         rec_z = 1.0 / z;
511                 }
512         }
513 }
514
515 void c_tmap_scanline_per_nolight()
516 {
517         ubyte *dest;
518         uint c;
519         int x;
520         fix u,v,z,dudx, dvdx, dzdx;
521
522         u = fx_u;
523         v = fx_v*64;
524         z = fx_z;
525         dudx = fx_du_dx; 
526         dvdx = fx_dv_dx*64; 
527         dzdx = fx_dz_dx;
528
529         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
530
531         if (!Transparency_on)   {
532                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
533                         *dest++ = (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
534                         u += dudx;
535                         v += dvdx;
536                         z += dzdx;
537                 }
538         } else {
539                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
540                         c = (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
541                         if ( c!=255)
542                                 *dest = c;
543                         dest++;
544                         u += dudx;
545                         v += dvdx;
546                         z += dzdx;
547                 }
548         }
549 }
550
551 // This texture mapper uses floating point extensively and writes 8 pixels at once, so it likely works
552 // best on 64 bit RISC processors.
553 // WARNING: it is not endian clean. For big endian, reverse the shift counts in the unrolled loops. I
554 // have no means to test that, so I didn't try it. Please tell me if you get this to work on a big
555 // endian machine.
556 // If you're using an Alpha, use the Compaq compiler for this file for quite some fps more.
557 // Unfortunately, it won't compile the whole source, so simply compile everything, change the
558 // compiler to ccc, remove scanline.o and compile again.
559 // Please send comments/suggestions to falk.hueffner@student.uni-tuebingen.de.
560 void c_fp_tmap_scanline_per()
561 {
562         ubyte          *dest;
563         uint            c;
564         int             x, j;
565         double          u, v, z, l, dudx, dvdx, dzdx, dldx, rec_z;
566         u_int64_t       destlong;
567
568         u = f2db(fx_u);
569         v = f2db(fx_v) * 64.0;
570         z = f2db(fx_z);
571         l = f2db(fx_l);
572         dudx = f2db(fx_du_dx);
573         dvdx = f2db(fx_dv_dx) * 64.0;
574         dzdx = f2db(fx_dz_dx);
575         dldx = f2db(fx_dl_dx);
576
577         rec_z = 1.0 / z; // gcc 2.95.2 is won't do this optimization itself
578
579         dest = (ubyte *) (write_buffer + fx_xleft + (bytes_per_row * fx_y));
580         x = fx_xright - fx_xleft + 1;
581
582         if (!Transparency_on) {
583                 if (x >= 8) {
584                         if ((j = (size_t) dest & 7) != 0) {
585                                 j = 8 - j;
586
587                                 while (j > 0) {
588                                         *dest++ =
589                                             gr_fade_table[((int) fabs(l)) * 256 +
590                                                           (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
591                                                                         (((int) (u * rec_z)) & 63)]];
592                                         l += dldx;
593                                         u += dudx;
594                                         v += dvdx;
595                                         z += dzdx;
596                                         rec_z = 1.0 / z;
597                                         x--;
598                                         j--;
599                                 }
600                         }
601
602                         j = x;
603                         while (j >= 8) {
604                                 destlong =
605                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
606                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
607                                                                             (((int) (u * rec_z)) & 63)]];
608                                 l += dldx;
609                                 u += dudx;
610                                 v += dvdx;
611                                 z += dzdx;
612                                 rec_z = 1.0 / z;
613                                 destlong |=
614                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
615                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
616                                                                             (((int) (u * rec_z)) & 63)]] << 8;
617                                 l += dldx;
618                                 u += dudx;
619                                 v += dvdx;
620                                 z += dzdx;
621                                 rec_z = 1.0 / z;
622                                 destlong |=
623                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
624                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
625                                                                             (((int) (u * rec_z)) & 63)]] << 16;
626                                 l += dldx;
627                                 u += dudx;
628                                 v += dvdx;
629                                 z += dzdx;
630                                 rec_z = 1.0 / z;
631                                 destlong |=
632                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
633                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
634                                                                             (((int) (u * rec_z)) & 63)]] << 24;
635                                 l += dldx;
636                                 u += dudx;
637                                 v += dvdx;
638                                 z += dzdx;
639                                 rec_z = 1.0 / z;
640                                 destlong |=
641                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
642                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
643                                                                             (((int) (u * rec_z)) & 63)]] << 32;
644                                 l += dldx;
645                                 u += dudx;
646                                 v += dvdx;
647                                 z += dzdx;
648                                 rec_z = 1.0 / z;
649                                 destlong |=
650                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
651                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
652                                                                             (((int) (u * rec_z)) & 63)]] << 40;
653                                 l += dldx;
654                                 u += dudx;
655                                 v += dvdx;
656                                 z += dzdx;
657                                 rec_z = 1.0 / z;
658                                 destlong |=
659                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
660                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
661                                                                             (((int) (u * rec_z)) & 63)]] << 48;
662                                 l += dldx;
663                                 u += dudx;
664                                 v += dvdx;
665                                 z += dzdx;
666                                 rec_z = 1.0 / z;
667                                 destlong |=
668                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
669                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
670                                                                             (((int) (u * rec_z)) & 63)]] << 56;
671                                 l += dldx;
672                                 u += dudx;
673                                 v += dvdx;
674                                 z += dzdx;
675                                 rec_z = 1.0 / z;
676
677                                 *((u_int64_t *) dest) = destlong;
678                                 dest += 8;
679                                 x -= 8;
680                                 j -= 8;
681                         }
682                 }
683                 while (x-- > 0) {
684                         *dest++ =
685                             gr_fade_table[((int) fabs(l)) * 256 +
686                                           (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]];
687                         l += dldx;
688                         u += dudx;
689                         v += dvdx;
690                         z += dzdx;
691                         rec_z = 1.0 / z;
692                 }
693         } else {
694                 if (x >= 8) {
695                         if ((j = (size_t) dest & 7) != 0) {
696                                 j = 8 - j;
697
698                                 while (j > 0) {
699                                         c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
700                                         if (c != 255)
701                                                 *dest = gr_fade_table[((int) fabs(l)) * 256 + c];
702                                         dest++;
703                                         l += dldx;
704                                         u += dudx;
705                                         v += dvdx;
706                                         z += dzdx;
707                                         rec_z = 1.0 / z;
708                                         x--;
709                                         j--;
710                                 }
711                         }
712
713                         j = x;
714                         while (j >= 8) {
715                                 destlong = *((u_int64_t *) dest);
716                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
717                                 if (c != 255) {
718                                         destlong &= ~(u_int64_t)0xFF;
719                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c];
720                                 }
721                                 l += dldx;
722                                 u += dudx;
723                                 v += dvdx;
724                                 z += dzdx;
725                                 rec_z = 1.0 / z;
726                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
727                                 if (c != 255) {
728                                         destlong &= ~((u_int64_t)0xFF << 8);
729                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 8;
730                                 }
731                                 l += dldx;
732                                 u += dudx;
733                                 v += dvdx;
734                                 z += dzdx;
735                                 rec_z = 1.0 / z;
736                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
737                                 if (c != 255) {
738                                         destlong &= ~((u_int64_t)0xFF << 16);
739                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 16;
740                                 }
741                                 l += dldx;
742                                 u += dudx;
743                                 v += dvdx;
744                                 z += dzdx;
745                                 rec_z = 1.0 / z;
746                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
747                                 if (c != 255) {
748                                         destlong &= ~((u_int64_t)0xFF << 24);
749                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 24;
750                                 }
751                                 l += dldx;
752                                 u += dudx;
753                                 v += dvdx;
754                                 z += dzdx;
755                                 rec_z = 1.0 / z;
756                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
757                                 if (c != 255) {
758                                         destlong &= ~((u_int64_t)0xFF << 32);
759                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 32;
760                                 }
761                                 l += dldx;
762                                 u += dudx;
763                                 v += dvdx;
764                                 z += dzdx;
765                                 rec_z = 1.0 / z;
766                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
767                                 if (c != 255) {
768                                         destlong &= ~((u_int64_t)0xFF << 40);
769                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 40;
770                                 }
771                                 l += dldx;
772                                 u += dudx;
773                                 v += dvdx;
774                                 z += dzdx;
775                                 rec_z = 1.0 / z;
776                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
777                                 if (c != 255) {
778                                         destlong &= ~((u_int64_t)0xFF << 48);
779                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 48;
780                                 }
781                                 l += dldx;
782                                 u += dudx;
783                                 v += dvdx;
784                                 z += dzdx;
785                                 rec_z = 1.0 / z;
786                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
787                                 if (c != 255) {
788                                         destlong &= ~((u_int64_t)0xFF << 56);
789                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 56;
790                                 }
791                                 l += dldx;
792                                 u += dudx;
793                                 v += dvdx;
794                                 z += dzdx;
795                                 rec_z = 1.0 / z;
796
797                                 *((u_int64_t *) dest) = destlong;
798                                 dest += 8;
799                                 x -= 8;
800                                 j -= 8;
801                         }
802                 }
803                 while (x-- > 0) {
804                         c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
805                         if (c != 255)
806                                 *dest = gr_fade_table[((int) fabs(l)) * 256 + c];
807                         dest++;
808                         l += dldx;
809                         u += dudx;
810                         v += dvdx;
811                         z += dzdx;
812                         rec_z = 1.0 / z;
813                 }
814         }
815 }
816
817 #if 1
818 // note the unrolling loop is broken. It is never called, and uses big endian. -- FH
819 void c_tmap_scanline_per()
820 {
821         ubyte *dest;
822         uint c;
823         int x, j;
824         fix l,u,v,z;
825         fix dudx, dvdx, dzdx, dldx;
826
827         u = fx_u;
828         v = fx_v*64;
829         z = fx_z;
830         dudx = fx_du_dx; 
831         dvdx = fx_dv_dx*64; 
832         dzdx = fx_dz_dx;
833
834         l = fx_l>>8;
835         dldx = fx_dl_dx>>8;
836         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
837
838         if (!Transparency_on)   {
839                 ubyte*                  pixPtrLocalCopy = pixptr;
840                 ubyte*                  fadeTableLocalCopy = gr_fade_table;
841                 unsigned long   destlong;
842
843                 x = fx_xright-fx_xleft+1;
844
845                 if ((j = (unsigned long) dest & 3) != 0)
846                         {
847                         j = 4 - j;
848
849                         if (j > x)
850                                 j = x;
851
852                         while (j > 0)
853                                 {       
854                                 //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
855                                 *dest++ = fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
856                                 //end edit -MM
857                                 l += dldx;
858                                 u += dudx;
859                                 v += dvdx;
860                                 z += dzdx;
861                                 x--;
862                                 j--;
863                                 }
864                         }
865
866                 j &= ~3;
867                 while (j > 0)
868                         {
869                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
870                         destlong = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ] << 24;
871                         //end edit -MM
872                         l += dldx;
873                         u += dudx;
874                         v += dvdx;
875                         z += dzdx;
876                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
877                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ] << 16;
878                         //end edit -MM
879                         l += dldx;
880                         u += dudx;
881                         v += dvdx;
882                         z += dzdx;
883                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
884                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ] << 8;
885                         //end edit -MM
886                         l += dldx;
887                         u += dudx;
888                         v += dvdx;
889                         z += dzdx;
890                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
891                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
892                         //end edit -MM
893                         l += dldx;
894                         u += dudx;
895                         v += dvdx;
896                         z += dzdx;
897                         *((unsigned long *) dest) = destlong;
898                         dest += 4;
899                         x -= 4;
900                         j -= 4;
901                         }
902
903                 while (x-- > 0)
904                         {
905                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
906                         *dest++ = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
907                         //end edit -MM
908                         l += dldx;
909                         u += dudx;
910                         v += dvdx;
911                         z += dzdx;
912                         }
913
914         } else {
915                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
916                         c = (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
917                         if ( c!=TRANSPARENCY_COLOR)
918                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
919                                 *dest = gr_fade_table[ (l&(0x7f00)) + c ];
920                         //end edit -MM
921                         dest++;
922                         l += dldx;
923                         u += dudx;
924                         v += dvdx;
925                         z += dzdx;
926                 }
927         }
928 }
929
930 #else
931 void c_tmap_scanline_per()
932 {
933         ubyte *dest;
934         uint c;
935         int x;
936         fix u,v,z,l,dudx, dvdx, dzdx, dldx;
937
938         u = fx_u;
939         v = fx_v*64;
940         z = fx_z;
941         dudx = fx_du_dx; 
942         dvdx = fx_dv_dx*64; 
943         dzdx = fx_dz_dx;
944
945         l = fx_l>>8;
946         dldx = fx_dl_dx>>8;
947         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
948
949         if (!Transparency_on)   {
950                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
951                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
952                         *dest++ = gr_fade_table[ (l&(0x7f00)) + (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
953                         //end edit -MM
954                         l += dldx;
955                         u += dudx;
956                         v += dvdx;
957                         z += dzdx;
958                 }
959         } else {
960                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
961                         c = (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
962                         if ( c!=255)
963                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
964                                 *dest = gr_fade_table[ (l&(0x7f00)) + c ];
965                         //end edit -MM
966                         dest++;
967                         l += dldx;
968                         u += dudx;
969                         v += dvdx;
970                         z += dzdx;
971                 }
972         }
973 }
974
975 #endif
976
977 void (*cur_tmap_scanline_per)(void);
978 void (*cur_tmap_scanline_per_nolight)(void);
979 void (*cur_tmap_scanline_lin)(void);
980 void (*cur_tmap_scanline_lin_nolight)(void);
981 void (*cur_tmap_scanline_flat)(void);
982 void (*cur_tmap_scanline_shaded)(void);
983
984 //runtime selection of optimized tmappers.  12/07/99  Matthew Mueller
985 //the reason I did it this way rather than having a *tmap_funcs that then points to a c_tmap or fp_tmap struct thats already filled in, is to avoid a second pointer dereference.
986 void select_tmap(char *type){
987         if (!type){
988 #ifndef NO_ASM
989                 select_tmap("i386");
990 #else
991                 select_tmap("c");
992 #endif
993                 return;
994         }
995 #ifndef NO_ASM
996         if (stricmp(type,"i386")==0){
997                 cur_tmap_scanline_per=asm_tmap_scanline_per;
998                 cur_tmap_scanline_per_nolight=asm_tmap_scanline_per;
999                 cur_tmap_scanline_lin=asm_tmap_scanline_lin_lighted;
1000                 cur_tmap_scanline_lin_nolight=asm_tmap_scanline_lin;
1001                 cur_tmap_scanline_flat=asm_tmap_scanline_flat;
1002                 cur_tmap_scanline_shaded=asm_tmap_scanline_shaded;
1003         }
1004         else if (stricmp(type,"pent")==0){
1005                 cur_tmap_scanline_per=asm_pent_tmap_scanline_per;
1006                 cur_tmap_scanline_per_nolight=asm_pent_tmap_scanline_per;
1007                 cur_tmap_scanline_lin=asm_tmap_scanline_lin_lighted;
1008                 cur_tmap_scanline_lin_nolight=asm_tmap_scanline_lin;
1009                 cur_tmap_scanline_flat=asm_tmap_scanline_flat;
1010                 cur_tmap_scanline_shaded=asm_tmap_scanline_shaded;
1011         }
1012         else if (stricmp(type,"ppro")==0){
1013                 cur_tmap_scanline_per=asm_ppro_tmap_scanline_per;
1014                 cur_tmap_scanline_per_nolight=asm_ppro_tmap_scanline_per;
1015                 cur_tmap_scanline_lin=asm_tmap_scanline_lin_lighted;
1016                 cur_tmap_scanline_lin_nolight=asm_tmap_scanline_lin;
1017                 cur_tmap_scanline_flat=asm_tmap_scanline_flat;
1018                 cur_tmap_scanline_shaded=asm_tmap_scanline_shaded;
1019         }
1020         else
1021 #endif
1022         if (stricmp(type,"fp")==0){
1023                 cur_tmap_scanline_per=c_fp_tmap_scanline_per;
1024                 cur_tmap_scanline_per_nolight=c_fp_tmap_scanline_per_nolight;
1025                 cur_tmap_scanline_lin=c_tmap_scanline_lin;
1026                 cur_tmap_scanline_lin_nolight=c_tmap_scanline_lin_nolight;
1027                 cur_tmap_scanline_flat=c_tmap_scanline_flat;
1028                 cur_tmap_scanline_shaded=c_tmap_scanline_shaded;
1029         }
1030         else {
1031                 if (stricmp(type,"c")!=0)
1032                         printf("unknown tmap requested, using c tmap\n");
1033                 cur_tmap_scanline_per=c_tmap_scanline_per;
1034                 cur_tmap_scanline_per_nolight=c_tmap_scanline_per_nolight;
1035                 cur_tmap_scanline_lin=c_tmap_scanline_lin;
1036                 cur_tmap_scanline_lin_nolight=c_tmap_scanline_lin_nolight;
1037                 cur_tmap_scanline_flat=c_tmap_scanline_flat;
1038                 cur_tmap_scanline_shaded=c_tmap_scanline_shaded;
1039         }
1040 }