remove some redundancy by not needing to do SoundSlots[i].playing = 0; in addition...
[btb/d2x.git] / texmap / scanline.c
1 /* $Id: scanline.c,v 1.7 2004-05-20 22:31:03 btb Exp $ */
2 /*
3 THE COMPUTER CODE CONTAINED HEREIN IS THE SOLE PROPERTY OF PARALLAX
4 SOFTWARE CORPORATION ("PARALLAX").  PARALLAX, IN DISTRIBUTING THE CODE TO
5 END-USERS, AND SUBJECT TO ALL OF THE TERMS AND CONDITIONS HEREIN, GRANTS A
6 ROYALTY-FREE, PERPETUAL LICENSE TO SUCH END-USERS FOR USE BY SUCH END-USERS
7 IN USING, DISPLAYING,  AND CREATING DERIVATIVE WORKS THEREOF, SO LONG AS
8 SUCH USE, DISPLAY OR CREATION IS FOR NON-COMMERCIAL, ROYALTY OR REVENUE
9 FREE PURPOSES.  IN NO EVENT SHALL THE END-USER USE THE COMPUTER CODE
10 CONTAINED HEREIN FOR REVENUE-BEARING PURPOSES.  THE END-USER UNDERSTANDS
11 AND AGREES TO THE TERMS HEREIN AND ACCEPTS THE SAME BY USE OF THIS FILE.
12 COPYRIGHT 1993-1998 PARALLAX SOFTWARE CORPORATION.  ALL RIGHTS RESERVED.
13 */
14
15 /*
16  *
17  * Routines to draw the texture mapped scanlines.
18  *
19  * Old Log:
20  * Revision 1.2  1995/02/20  18:23:39  john
21  * Added new module for C versions of inner loops.
22  *
23  * Revision 1.1  1995/02/20  17:42:27  john
24  * Initial revision
25  *
26  *
27  */
28
29 #ifdef HAVE_CONFIG_H
30 #include <conf.h>
31 #endif
32
33 #ifdef RCS
34 static char rcsid[] = "$Id: scanline.c,v 1.7 2004-05-20 22:31:03 btb Exp $";
35 #endif
36
37 #include <math.h>
38 #include <limits.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42
43 #include "maths.h"
44 #include "mono.h"
45 #include "gr.h"
46 #include "grdef.h"
47 #include "texmap.h"
48 #include "texmapl.h"
49 #include "scanline.h"
50 #include "strutil.h"
51
52 void c_tmap_scanline_flat()
53 {
54         ubyte *dest;
55 //        int x;
56
57         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y )  );
58
59 /*      for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
60                 *dest++ = tmap_flat_color;
61         }*/
62         memset(dest,tmap_flat_color,fx_xright-fx_xleft+1);
63 }
64
65 void c_tmap_scanline_shaded()
66 {
67         int fade;
68         ubyte *dest, tmp;
69         int x;
70
71         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
72
73         fade = tmap_flat_shade_value<<8;
74         for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
75                 tmp = *dest;
76                 *dest++ = gr_fade_table[ fade |(tmp)];
77         }
78 }
79
80 void c_tmap_scanline_lin_nolight()
81 {
82         ubyte *dest;
83         uint c;
84         int x;
85         fix u,v,dudx, dvdx;
86
87         u = fx_u;
88         v = fx_v*64;
89         dudx = fx_du_dx; 
90         dvdx = fx_dv_dx*64; 
91
92         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
93
94         if (!Transparency_on)   {
95                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
96                         *dest++ = (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
97                         u += dudx;
98                         v += dvdx;
99                 }
100         } else {
101                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
102                         c = (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
103                         if ( c!=255)
104                                 *dest = c;
105                         dest++;
106                         u += dudx;
107                         v += dvdx;
108                 }
109         }
110 }
111
112
113 #if 1
114 void c_tmap_scanline_lin()
115 {
116         ubyte *dest;
117         uint c;
118         int x, j;
119         fix u,v,l,dudx, dvdx, dldx;
120
121         u = fx_u;
122         v = fx_v*64;
123         dudx = fx_du_dx; 
124         dvdx = fx_dv_dx*64; 
125
126         l = fx_l>>8;
127         dldx = fx_dl_dx>>8;
128         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
129
130         if (!Transparency_on)   {
131                 ubyte*                  pixPtrLocalCopy = pixptr;
132                 ubyte*                  fadeTableLocalCopy = gr_fade_table;
133                 unsigned long   destlong;
134
135                 x = fx_xright-fx_xleft+1;
136
137                 if ((j = (unsigned long) dest & 3) != 0)
138                         {
139                         j = 4 - j;
140
141                         if (j > x)
142                                 j = x;
143
144                         while (j > 0)
145                                 {       
146                                 //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
147                                 *dest++ = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ];
148                                 //end edit -MM
149                                 l += dldx;
150                                 u += dudx;
151                                 v += dvdx;
152                                 x--;
153                                 j--;
154                                 }
155                         }
156
157                 j &= ~3;
158                 while (j > 0)
159                         {
160                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
161                         destlong = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ] << 24;
162                         //end edit -MM
163                         l += dldx;
164                         u += dudx;
165                         v += dvdx;
166                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
167                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ] << 16;
168                         //end edit -MM
169                         l += dldx;
170                         u += dudx;
171                         v += dvdx;
172                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
173                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ] << 8;
174                         //end edit -MM
175                         l += dldx;
176                         u += dudx;
177                         v += dvdx;
178                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
179                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ];
180                         //end edit -MM
181                         l += dldx;
182                         u += dudx;
183                         v += dvdx;
184                         *((unsigned long *) dest) = destlong;
185                         dest += 4;
186                         x -= 4;
187                         j -= 4;
188                         }
189
190                 while (x-- > 0)
191                         {
192                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
193                         *dest++ = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ];
194                         //end edit -MM
195                         l += dldx;
196                         u += dudx;
197                         v += dvdx;
198                         }
199
200         } else {
201                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
202                         c = (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
203                         if ( c!=TRANSPARENCY_COLOR)
204                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
205                                 *dest = gr_fade_table[ (l&(0x7f00)) + c ];
206                         //end edit -MM
207                         dest++;
208                         l += dldx;
209                         u += dudx;
210                         v += dvdx;
211                 }
212         }
213 }
214
215 #else
216 void c_tmap_scanline_lin()
217 {
218         ubyte *dest;
219         uint c;
220         int x;
221         fix u,v,l,dudx, dvdx, dldx;
222
223         u = fx_u;
224         v = fx_v*64;
225         dudx = fx_du_dx; 
226         dvdx = fx_dv_dx*64; 
227
228         l = fx_l>>8;
229         dldx = fx_dl_dx>>8;
230         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
231
232         if (!Transparency_on)   {
233                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
234                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
235                         *dest++ = gr_fade_table[ (l&(0x7f00)) + (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ];
236                         //end edit -MM
237                         l += dldx;
238                         u += dudx;
239                         v += dvdx;
240                 }
241         } else {
242                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
243                         c = (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
244                         if ( c!=255)
245                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
246                                 *dest = gr_fade_table[ (l&(0x7f00)) + c ];
247                         //end edit -MM
248                         dest++;
249                         l += dldx;
250                         u += dudx;
251                         v += dvdx;
252                 }
253         }
254 }
255 #endif
256
257 // Used for energy centers. See comments for c_tmap_scanline_per().
258 void c_fp_tmap_scanline_per_nolight()
259 {
260         ubyte          *dest;
261         uint            c;
262         int             x, j;
263         double          u, v, z, dudx, dvdx, dzdx, rec_z;
264         u_int64_t       destlong;
265
266         u = f2db(fx_u);
267         v = f2db(fx_v) * 64.0;
268         z = f2db(fx_z);
269         dudx = f2db(fx_du_dx);
270         dvdx = f2db(fx_dv_dx) * 64.0;
271         dzdx = f2db(fx_dz_dx);
272
273         rec_z = 1.0 / z;
274
275         dest = (ubyte *) (write_buffer + fx_xleft + (bytes_per_row * fx_y));
276
277         x = fx_xright - fx_xleft + 1;
278         if (!Transparency_on) {
279                 if (x >= 8) {
280                         if ((j = (size_t) dest & 7) != 0) {
281                                 j = 8 - j;
282
283                                 while (j > 0) {
284                                         *dest++ =
285                                             (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
286                                                          (((int) (u * rec_z)) & 63)];
287                                         u += dudx;
288                                         v += dvdx;
289                                         z += dzdx;
290                                         rec_z = 1.0 / z;
291                                         x--;
292                                         j--;
293                                 }
294                         }
295
296                         while (j >= 8) {
297                                 destlong =
298                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
299                                                        (((int) (u * rec_z)) & 63)];
300                                 u += dudx;
301                                 v += dvdx;
302                                 z += dzdx;
303                                 rec_z = 1.0 / z;
304                                 destlong |=
305                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
306                                                        (((int) (u * rec_z)) & 63)] << 8;
307                                 u += dudx;
308                                 v += dvdx;
309                                 z += dzdx;
310                                 rec_z = 1.0 / z;
311                                 destlong |=
312                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
313                                                        (((int) (u * rec_z)) & 63)] << 16;
314                                 u += dudx;
315                                 v += dvdx;
316                                 z += dzdx;
317                                 rec_z = 1.0 / z;
318                                 destlong |=
319                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
320                                                        (((int) (u * rec_z)) & 63)] << 24;
321                                 u += dudx;
322                                 v += dvdx;
323                                 z += dzdx;
324                                 rec_z = 1.0 / z;
325                                 destlong |=
326                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
327                                                        (((int) (u * rec_z)) & 63)] << 32;
328                                 u += dudx;
329                                 v += dvdx;
330                                 z += dzdx;
331                                 rec_z = 1.0 / z;
332                                 destlong |=
333                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
334                                                        (((int) (u * rec_z)) & 63)] << 40;
335                                 u += dudx;
336                                 v += dvdx;
337                                 z += dzdx;
338                                 rec_z = 1.0 / z;
339                                 destlong |=
340                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
341                                                        (((int) (u * rec_z)) & 63)] << 48;
342                                 u += dudx;
343                                 v += dvdx;
344                                 z += dzdx;
345                                 rec_z = 1.0 / z;
346                                 destlong |=
347                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
348                                                        (((int) (u * rec_z)) & 63)] << 56;
349                                 u += dudx;
350                                 v += dvdx;
351                                 z += dzdx;
352                                 rec_z = 1.0 / z;
353
354                                 *((u_int64_t *) dest) = destlong;
355                                 dest += 8;
356                                 x -= 8;
357                                 j -= 8;
358                         }
359                 }
360                 while (x-- > 0) {
361                         *dest++ =
362                             (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
363                                                (((int) (u * rec_z)) & 63)];
364                         u += dudx;
365                         v += dvdx;
366                         z += dzdx;
367                         rec_z = 1.0 / z;
368                 }
369         } else {
370                 x = fx_xright - fx_xleft + 1;
371
372                 if (x >= 8) {
373                         if ((j = (size_t) dest & 7) != 0) {
374                                 j = 8 - j;
375
376                                 while (j > 0) {
377                                         c =
378                                             (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
379                                                          (((int) (u * rec_z)) & 63)];
380                                         if (c != 255)
381                                                 *dest = c;
382                                         dest++;
383                                         u += dudx;
384                                         v += dvdx;
385                                         z += dzdx;
386                                         rec_z = 1.0 / z;
387                                         x--;
388                                         j--;
389                                 }
390                         }
391
392                         j = x;
393                         while (j >= 8) {
394                                 destlong = *((u_int64_t *) dest);
395                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
396                                                   (((int) (u * rec_z)) & 63)];
397                                 if (c != 255) {
398                                         destlong &= ~(u_int64_t)0xFF;
399                                         destlong |= (u_int64_t) c;
400                                 }
401                                 u += dudx;
402                                 v += dvdx;
403                                 z += dzdx;
404                                 rec_z = 1.0 / z;
405                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
406                                                   (((int) (u * rec_z)) & 63)];
407                                 if (c != 255) {
408                                         destlong &= ~((u_int64_t)0xFF << 8);
409                                         destlong |= (u_int64_t) c << 8;
410                                 }
411                                 u += dudx;
412                                 v += dvdx;
413                                 z += dzdx;
414                                 rec_z = 1.0 / z;
415                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
416                                                   (((int) (u * rec_z)) & 63)];
417                                 if (c != 255) {
418                                         destlong &= ~((u_int64_t)0xFF << 16);
419                                         destlong |= (u_int64_t) c << 16;
420                                 }
421                                 u += dudx;
422                                 v += dvdx;
423                                 z += dzdx;
424                                 rec_z = 1.0 / z;
425                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
426                                                   (((int) (u * rec_z)) & 63)];
427                                 if (c != 255) {
428                                         destlong &= ~((u_int64_t)0xFF << 24);
429                                         destlong |= (u_int64_t) c << 24;
430                                 }
431                                 u += dudx;
432                                 v += dvdx;
433                                 z += dzdx;
434                                 rec_z = 1.0 / z;
435                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
436                                                   (((int) (u * rec_z)) & 63)];
437                                 if (c != 255) {
438                                         destlong &= ~((u_int64_t)0xFF << 32);
439                                         destlong |= (u_int64_t) c << 32;
440                                 }
441                                 u += dudx;
442                                 v += dvdx;
443                                 z += dzdx;
444                                 rec_z = 1.0 / z;
445                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
446                                                   (((int) (u * rec_z)) & 63)];
447                                 if (c != 255) {
448                                         destlong &= ~((u_int64_t)0xFF << 40);
449                                         destlong |= (u_int64_t) c << 40;
450                                 }
451                                 u += dudx;
452                                 v += dvdx;
453                                 z += dzdx;
454                                 rec_z = 1.0 / z;
455                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
456                                                   (((int) (u * rec_z)) & 63)];
457                                 if (c != 255) {
458                                         destlong &= ~((u_int64_t)0xFF << 48);
459                                         destlong |= (u_int64_t) c << 48;
460                                 }
461                                 u += dudx;
462                                 v += dvdx;
463                                 z += dzdx;
464                                 rec_z = 1.0 / z;
465                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
466                                                   (((int) (u * rec_z)) & 63)];
467                                 if (c != 255) {
468                                         destlong &= ~((u_int64_t)0xFF << 56);
469                                         destlong |= (u_int64_t) c << 56;
470                                 }
471                                 u += dudx;
472                                 v += dvdx;
473                                 z += dzdx;
474                                 rec_z = 1.0 / z;
475
476                                 *((u_int64_t *) dest) = destlong;
477                                 dest += 8;
478                                 x -= 8;
479                                 j -= 8;
480                         }
481                 }
482                 while (x-- > 0) {
483                         c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
484                                           (((int) (u * rec_z)) & 63)];
485                         if (c != 255)
486                                 *dest = c;
487                         dest++;
488                         u += dudx;
489                         v += dvdx;
490                         z += dzdx;
491                         rec_z = 1.0 / z;
492                 }
493         }
494 }
495
496 void c_tmap_scanline_per_nolight()
497 {
498         ubyte *dest;
499         uint c;
500         int x;
501         fix u,v,z,dudx, dvdx, dzdx;
502
503         u = fx_u;
504         v = fx_v*64;
505         z = fx_z;
506         dudx = fx_du_dx; 
507         dvdx = fx_dv_dx*64; 
508         dzdx = fx_dz_dx;
509
510         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
511
512         if (!Transparency_on)   {
513                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
514                         *dest++ = (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
515                         u += dudx;
516                         v += dvdx;
517                         z += dzdx;
518                 }
519         } else {
520                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
521                         c = (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
522                         if ( c!=255)
523                                 *dest = c;
524                         dest++;
525                         u += dudx;
526                         v += dvdx;
527                         z += dzdx;
528                 }
529         }
530 }
531
532 // This texture mapper uses floating point extensively and writes 8 pixels at once, so it likely works
533 // best on 64 bit RISC processors.
534 // WARNING: it is not endian clean. For big endian, reverse the shift counts in the unrolled loops. I
535 // have no means to test that, so I didn't try it. Please tell me if you get this to work on a big
536 // endian machine.
537 // If you're using an Alpha, use the Compaq compiler for this file for quite some fps more.
538 // Unfortunately, it won't compile the whole source, so simply compile everything, change the
539 // compiler to ccc, remove scanline.o and compile again.
540 // Please send comments/suggestions to falk.hueffner@student.uni-tuebingen.de.
541 void c_fp_tmap_scanline_per()
542 {
543         ubyte          *dest;
544         uint            c;
545         int             x, j;
546         double          u, v, z, l, dudx, dvdx, dzdx, dldx, rec_z;
547         u_int64_t       destlong;
548
549         u = f2db(fx_u);
550         v = f2db(fx_v) * 64.0;
551         z = f2db(fx_z);
552         l = f2db(fx_l);
553         dudx = f2db(fx_du_dx);
554         dvdx = f2db(fx_dv_dx) * 64.0;
555         dzdx = f2db(fx_dz_dx);
556         dldx = f2db(fx_dl_dx);
557
558         rec_z = 1.0 / z; // gcc 2.95.2 is won't do this optimization itself
559
560         dest = (ubyte *) (write_buffer + fx_xleft + (bytes_per_row * fx_y));
561         x = fx_xright - fx_xleft + 1;
562
563         if (!Transparency_on) {
564                 if (x >= 8) {
565                         if ((j = (size_t) dest & 7) != 0) {
566                                 j = 8 - j;
567
568                                 while (j > 0) {
569                                         *dest++ =
570                                             gr_fade_table[((int) fabs(l)) * 256 +
571                                                           (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
572                                                                         (((int) (u * rec_z)) & 63)]];
573                                         l += dldx;
574                                         u += dudx;
575                                         v += dvdx;
576                                         z += dzdx;
577                                         rec_z = 1.0 / z;
578                                         x--;
579                                         j--;
580                                 }
581                         }
582
583                         j = x;
584                         while (j >= 8) {
585                                 destlong =
586                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
587                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
588                                                                             (((int) (u * rec_z)) & 63)]];
589                                 l += dldx;
590                                 u += dudx;
591                                 v += dvdx;
592                                 z += dzdx;
593                                 rec_z = 1.0 / z;
594                                 destlong |=
595                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
596                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
597                                                                             (((int) (u * rec_z)) & 63)]] << 8;
598                                 l += dldx;
599                                 u += dudx;
600                                 v += dvdx;
601                                 z += dzdx;
602                                 rec_z = 1.0 / z;
603                                 destlong |=
604                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
605                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
606                                                                             (((int) (u * rec_z)) & 63)]] << 16;
607                                 l += dldx;
608                                 u += dudx;
609                                 v += dvdx;
610                                 z += dzdx;
611                                 rec_z = 1.0 / z;
612                                 destlong |=
613                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
614                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
615                                                                             (((int) (u * rec_z)) & 63)]] << 24;
616                                 l += dldx;
617                                 u += dudx;
618                                 v += dvdx;
619                                 z += dzdx;
620                                 rec_z = 1.0 / z;
621                                 destlong |=
622                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
623                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
624                                                                             (((int) (u * rec_z)) & 63)]] << 32;
625                                 l += dldx;
626                                 u += dudx;
627                                 v += dvdx;
628                                 z += dzdx;
629                                 rec_z = 1.0 / z;
630                                 destlong |=
631                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
632                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
633                                                                             (((int) (u * rec_z)) & 63)]] << 40;
634                                 l += dldx;
635                                 u += dudx;
636                                 v += dvdx;
637                                 z += dzdx;
638                                 rec_z = 1.0 / z;
639                                 destlong |=
640                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
641                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
642                                                                             (((int) (u * rec_z)) & 63)]] << 48;
643                                 l += dldx;
644                                 u += dudx;
645                                 v += dvdx;
646                                 z += dzdx;
647                                 rec_z = 1.0 / z;
648                                 destlong |=
649                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
650                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
651                                                                             (((int) (u * rec_z)) & 63)]] << 56;
652                                 l += dldx;
653                                 u += dudx;
654                                 v += dvdx;
655                                 z += dzdx;
656                                 rec_z = 1.0 / z;
657
658                                 *((u_int64_t *) dest) = destlong;
659                                 dest += 8;
660                                 x -= 8;
661                                 j -= 8;
662                         }
663                 }
664                 while (x-- > 0) {
665                         *dest++ =
666                             gr_fade_table[((int) fabs(l)) * 256 +
667                                           (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]];
668                         l += dldx;
669                         u += dudx;
670                         v += dvdx;
671                         z += dzdx;
672                         rec_z = 1.0 / z;
673                 }
674         } else {
675                 if (x >= 8) {
676                         if ((j = (size_t) dest & 7) != 0) {
677                                 j = 8 - j;
678
679                                 while (j > 0) {
680                                         c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
681                                         if (c != 255)
682                                                 *dest = gr_fade_table[((int) fabs(l)) * 256 + c];
683                                         dest++;
684                                         l += dldx;
685                                         u += dudx;
686                                         v += dvdx;
687                                         z += dzdx;
688                                         rec_z = 1.0 / z;
689                                         x--;
690                                         j--;
691                                 }
692                         }
693
694                         j = x;
695                         while (j >= 8) {
696                                 destlong = *((u_int64_t *) dest);
697                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
698                                 if (c != 255) {
699                                         destlong &= ~(u_int64_t)0xFF;
700                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c];
701                                 }
702                                 l += dldx;
703                                 u += dudx;
704                                 v += dvdx;
705                                 z += dzdx;
706                                 rec_z = 1.0 / z;
707                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
708                                 if (c != 255) {
709                                         destlong &= ~((u_int64_t)0xFF << 8);
710                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 8;
711                                 }
712                                 l += dldx;
713                                 u += dudx;
714                                 v += dvdx;
715                                 z += dzdx;
716                                 rec_z = 1.0 / z;
717                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
718                                 if (c != 255) {
719                                         destlong &= ~((u_int64_t)0xFF << 16);
720                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 16;
721                                 }
722                                 l += dldx;
723                                 u += dudx;
724                                 v += dvdx;
725                                 z += dzdx;
726                                 rec_z = 1.0 / z;
727                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
728                                 if (c != 255) {
729                                         destlong &= ~((u_int64_t)0xFF << 24);
730                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 24;
731                                 }
732                                 l += dldx;
733                                 u += dudx;
734                                 v += dvdx;
735                                 z += dzdx;
736                                 rec_z = 1.0 / z;
737                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
738                                 if (c != 255) {
739                                         destlong &= ~((u_int64_t)0xFF << 32);
740                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 32;
741                                 }
742                                 l += dldx;
743                                 u += dudx;
744                                 v += dvdx;
745                                 z += dzdx;
746                                 rec_z = 1.0 / z;
747                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
748                                 if (c != 255) {
749                                         destlong &= ~((u_int64_t)0xFF << 40);
750                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 40;
751                                 }
752                                 l += dldx;
753                                 u += dudx;
754                                 v += dvdx;
755                                 z += dzdx;
756                                 rec_z = 1.0 / z;
757                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
758                                 if (c != 255) {
759                                         destlong &= ~((u_int64_t)0xFF << 48);
760                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 48;
761                                 }
762                                 l += dldx;
763                                 u += dudx;
764                                 v += dvdx;
765                                 z += dzdx;
766                                 rec_z = 1.0 / z;
767                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
768                                 if (c != 255) {
769                                         destlong &= ~((u_int64_t)0xFF << 56);
770                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 56;
771                                 }
772                                 l += dldx;
773                                 u += dudx;
774                                 v += dvdx;
775                                 z += dzdx;
776                                 rec_z = 1.0 / z;
777
778                                 *((u_int64_t *) dest) = destlong;
779                                 dest += 8;
780                                 x -= 8;
781                                 j -= 8;
782                         }
783                 }
784                 while (x-- > 0) {
785                         c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
786                         if (c != 255)
787                                 *dest = gr_fade_table[((int) fabs(l)) * 256 + c];
788                         dest++;
789                         l += dldx;
790                         u += dudx;
791                         v += dvdx;
792                         z += dzdx;
793                         rec_z = 1.0 / z;
794                 }
795         }
796 }
797
798 #if 1
799 // note the unrolling loop is broken. It is never called, and uses big endian. -- FH
800 void c_tmap_scanline_per()
801 {
802         ubyte *dest;
803         uint c;
804         int x, j;
805         fix l,u,v,z;
806         fix dudx, dvdx, dzdx, dldx;
807
808         u = fx_u;
809         v = fx_v*64;
810         z = fx_z;
811         dudx = fx_du_dx; 
812         dvdx = fx_dv_dx*64; 
813         dzdx = fx_dz_dx;
814
815         l = fx_l>>8;
816         dldx = fx_dl_dx>>8;
817         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
818
819         if (!Transparency_on)   {
820                 ubyte*                  pixPtrLocalCopy = pixptr;
821                 ubyte*                  fadeTableLocalCopy = gr_fade_table;
822                 unsigned long   destlong;
823
824                 x = fx_xright-fx_xleft+1;
825
826                 if ((j = (unsigned long) dest & 3) != 0)
827                         {
828                         j = 4 - j;
829
830                         if (j > x)
831                                 j = x;
832
833                         while (j > 0)
834                                 {       
835                                 //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
836                                 *dest++ = fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
837                                 //end edit -MM
838                                 l += dldx;
839                                 u += dudx;
840                                 v += dvdx;
841                                 z += dzdx;
842                                 x--;
843                                 j--;
844                                 }
845                         }
846
847                 j &= ~3;
848                 while (j > 0)
849                         {
850                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
851                         destlong = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ] << 24;
852                         //end edit -MM
853                         l += dldx;
854                         u += dudx;
855                         v += dvdx;
856                         z += dzdx;
857                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
858                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ] << 16;
859                         //end edit -MM
860                         l += dldx;
861                         u += dudx;
862                         v += dvdx;
863                         z += dzdx;
864                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
865                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ] << 8;
866                         //end edit -MM
867                         l += dldx;
868                         u += dudx;
869                         v += dvdx;
870                         z += dzdx;
871                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
872                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
873                         //end edit -MM
874                         l += dldx;
875                         u += dudx;
876                         v += dvdx;
877                         z += dzdx;
878                         *((unsigned long *) dest) = destlong;
879                         dest += 4;
880                         x -= 4;
881                         j -= 4;
882                         }
883
884                 while (x-- > 0)
885                         {
886                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
887                         *dest++ = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
888                         //end edit -MM
889                         l += dldx;
890                         u += dudx;
891                         v += dvdx;
892                         z += dzdx;
893                         }
894
895         } else {
896                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
897                         c = (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
898                         if ( c!=TRANSPARENCY_COLOR)
899                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
900                                 *dest = gr_fade_table[ (l&(0x7f00)) + c ];
901                         //end edit -MM
902                         dest++;
903                         l += dldx;
904                         u += dudx;
905                         v += dvdx;
906                         z += dzdx;
907                 }
908         }
909 }
910
911 #else
912 void c_tmap_scanline_per()
913 {
914         ubyte *dest;
915         uint c;
916         int x;
917         fix u,v,z,l,dudx, dvdx, dzdx, dldx;
918
919         u = fx_u;
920         v = fx_v*64;
921         z = fx_z;
922         dudx = fx_du_dx; 
923         dvdx = fx_dv_dx*64; 
924         dzdx = fx_dz_dx;
925
926         l = fx_l>>8;
927         dldx = fx_dl_dx>>8;
928         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
929
930         if (!Transparency_on)   {
931                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
932                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
933                         *dest++ = gr_fade_table[ (l&(0x7f00)) + (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
934                         //end edit -MM
935                         l += dldx;
936                         u += dudx;
937                         v += dvdx;
938                         z += dzdx;
939                 }
940         } else {
941                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
942                         c = (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
943                         if ( c!=255)
944                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
945                                 *dest = gr_fade_table[ (l&(0x7f00)) + c ];
946                         //end edit -MM
947                         dest++;
948                         l += dldx;
949                         u += dudx;
950                         v += dvdx;
951                         z += dzdx;
952                 }
953         }
954 }
955
956 #endif
957
958 void (*cur_tmap_scanline_per)(void);
959 void (*cur_tmap_scanline_per_nolight)(void);
960 void (*cur_tmap_scanline_lin)(void);
961 void (*cur_tmap_scanline_lin_nolight)(void);
962 void (*cur_tmap_scanline_flat)(void);
963 void (*cur_tmap_scanline_shaded)(void);
964
965 //runtime selection of optimized tmappers.  12/07/99  Matthew Mueller
966 //the reason I did it this way rather than having a *tmap_funcs that then points to a c_tmap or fp_tmap struct thats already filled in, is to avoid a second pointer dereference.
967 void select_tmap(char *type)
968 {
969         if (!type)
970         {
971 #if !defined(NO_ASM) && !defined(OGL)
972 #if defined(__pentiumpro__)
973                 select_tmap("ppro");
974 #elif defined(__pentium__)
975                 select_tmap("pent");
976 #else
977                 select_tmap("i386");
978 #endif
979 #else
980                 select_tmap("c");
981 #endif
982                 return;
983         }
984 #if !defined(NO_ASM) && !defined(OGL)
985         if (stricmp(type, "i386")==0)
986         {
987                 cur_tmap_scanline_per=asm_tmap_scanline_per;
988                 cur_tmap_scanline_per_nolight=asm_tmap_scanline_per;
989                 cur_tmap_scanline_lin=asm_tmap_scanline_lin_lighted;
990                 cur_tmap_scanline_lin_nolight=asm_tmap_scanline_lin;
991                 cur_tmap_scanline_flat=asm_tmap_scanline_flat;
992                 cur_tmap_scanline_shaded=asm_tmap_scanline_shaded;
993         }
994         else if (stricmp(type,"pent")==0){
995                 cur_tmap_scanline_per=asm_pent_tmap_scanline_per;
996                 cur_tmap_scanline_per_nolight=asm_pent_tmap_scanline_per;
997                 cur_tmap_scanline_lin=asm_tmap_scanline_lin_lighted;
998                 cur_tmap_scanline_lin_nolight=asm_tmap_scanline_lin;
999                 cur_tmap_scanline_flat=asm_tmap_scanline_flat;
1000                 cur_tmap_scanline_shaded=asm_tmap_scanline_shaded;
1001         }
1002         else if (stricmp(type,"ppro")==0){
1003                 cur_tmap_scanline_per=asm_ppro_tmap_scanline_per;
1004                 cur_tmap_scanline_per_nolight=asm_ppro_tmap_scanline_per;
1005                 cur_tmap_scanline_lin=asm_tmap_scanline_lin_lighted;
1006                 cur_tmap_scanline_lin_nolight=asm_tmap_scanline_lin;
1007                 cur_tmap_scanline_flat=asm_tmap_scanline_flat;
1008                 cur_tmap_scanline_shaded=asm_tmap_scanline_shaded;
1009         }
1010         else
1011 #endif
1012         if (stricmp(type,"fp")==0){
1013                 cur_tmap_scanline_per=c_fp_tmap_scanline_per;
1014                 cur_tmap_scanline_per_nolight=c_fp_tmap_scanline_per_nolight;
1015                 cur_tmap_scanline_lin=c_tmap_scanline_lin;
1016                 cur_tmap_scanline_lin_nolight=c_tmap_scanline_lin_nolight;
1017                 cur_tmap_scanline_flat=c_tmap_scanline_flat;
1018                 cur_tmap_scanline_shaded=c_tmap_scanline_shaded;
1019         }
1020         else {
1021                 if (stricmp(type,"c")!=0)
1022                         printf("unknown tmap requested, using c tmap\n");
1023                 cur_tmap_scanline_per=c_tmap_scanline_per;
1024                 cur_tmap_scanline_per_nolight=c_tmap_scanline_per_nolight;
1025                 cur_tmap_scanline_lin=c_tmap_scanline_lin;
1026                 cur_tmap_scanline_lin_nolight=c_tmap_scanline_lin_nolight;
1027                 cur_tmap_scanline_flat=c_tmap_scanline_flat;
1028                 cur_tmap_scanline_shaded=c_tmap_scanline_shaded;
1029         }
1030 }