]> icculus.org git repositories - btb/d2x.git/blob - texmap/scanline.c
remove rcs tags
[btb/d2x.git] / texmap / scanline.c
1 /*
2 THE COMPUTER CODE CONTAINED HEREIN IS THE SOLE PROPERTY OF PARALLAX
3 SOFTWARE CORPORATION ("PARALLAX").  PARALLAX, IN DISTRIBUTING THE CODE TO
4 END-USERS, AND SUBJECT TO ALL OF THE TERMS AND CONDITIONS HEREIN, GRANTS A
5 ROYALTY-FREE, PERPETUAL LICENSE TO SUCH END-USERS FOR USE BY SUCH END-USERS
6 IN USING, DISPLAYING,  AND CREATING DERIVATIVE WORKS THEREOF, SO LONG AS
7 SUCH USE, DISPLAY OR CREATION IS FOR NON-COMMERCIAL, ROYALTY OR REVENUE
8 FREE PURPOSES.  IN NO EVENT SHALL THE END-USER USE THE COMPUTER CODE
9 CONTAINED HEREIN FOR REVENUE-BEARING PURPOSES.  THE END-USER UNDERSTANDS
10 AND AGREES TO THE TERMS HEREIN AND ACCEPTS THE SAME BY USE OF THIS FILE.
11 COPYRIGHT 1993-1998 PARALLAX SOFTWARE CORPORATION.  ALL RIGHTS RESERVED.
12 */
13
14 /*
15  *
16  * Routines to draw the texture mapped scanlines.
17  *
18  */
19
20 #ifdef HAVE_CONFIG_H
21 #include <conf.h>
22 #endif
23
24 #include <math.h>
25 #include <limits.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29
30 #include "maths.h"
31 #include "mono.h"
32 #include "gr.h"
33 #include "grdef.h"
34 #include "texmap.h"
35 #include "texmapl.h"
36 #include "scanline.h"
37 #include "strutil.h"
38
39 void c_tmap_scanline_flat()
40 {
41         ubyte *dest;
42 //        int x;
43
44         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y )  );
45
46 /*      for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
47                 *dest++ = tmap_flat_color;
48         }*/
49         memset(dest,tmap_flat_color,fx_xright-fx_xleft+1);
50 }
51
52 void c_tmap_scanline_shaded()
53 {
54         int fade;
55         ubyte *dest, tmp;
56         int x;
57
58         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
59
60         fade = tmap_flat_shade_value<<8;
61         for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
62                 tmp = *dest;
63                 *dest++ = gr_fade_table[ fade |(tmp)];
64         }
65 }
66
67 void c_tmap_scanline_lin_nolight()
68 {
69         ubyte *dest;
70         uint c;
71         int x;
72         fix u,v,dudx, dvdx;
73
74         u = fx_u;
75         v = fx_v*64;
76         dudx = fx_du_dx; 
77         dvdx = fx_dv_dx*64; 
78
79         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
80
81         if (!Transparency_on)   {
82                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
83                         *dest++ = (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
84                         u += dudx;
85                         v += dvdx;
86                 }
87         } else {
88                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
89                         c = (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
90                         if ( c!=255)
91                                 *dest = c;
92                         dest++;
93                         u += dudx;
94                         v += dvdx;
95                 }
96         }
97 }
98
99
100 #if 1
101 void c_tmap_scanline_lin()
102 {
103         ubyte *dest;
104         uint c;
105         int x, j;
106         fix u,v,l,dudx, dvdx, dldx;
107
108         u = fx_u;
109         v = fx_v*64;
110         dudx = fx_du_dx; 
111         dvdx = fx_dv_dx*64; 
112
113         l = fx_l>>8;
114         dldx = fx_dl_dx/256; // fx_dl_dx>>8 != fx_dl_dx/256 for negative numbers
115         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
116
117         if (!Transparency_on)   {
118                 ubyte*                  pixPtrLocalCopy = pixptr;
119                 ubyte*                  fadeTableLocalCopy = gr_fade_table;
120                 unsigned long   destlong;
121
122                 x = fx_xright-fx_xleft+1;
123
124                 if ((j = (unsigned long) dest & 3) != 0)
125                         {
126                         j = 4 - j;
127
128                         if (j > x)
129                                 j = x;
130
131                         while (j > 0)
132                                 {       
133                                 //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
134                                 *dest++ = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ];
135                                 //end edit -MM
136                                 l += dldx;
137                                 u += dudx;
138                                 v += dvdx;
139                                 x--;
140                                 j--;
141                                 }
142                         }
143
144                 j &= ~3;
145                 while (j > 0)
146                         {
147                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
148                         destlong = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ] << 24;
149                         //end edit -MM
150                         l += dldx;
151                         u += dudx;
152                         v += dvdx;
153                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
154                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ] << 16;
155                         //end edit -MM
156                         l += dldx;
157                         u += dudx;
158                         v += dvdx;
159                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
160                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ] << 8;
161                         //end edit -MM
162                         l += dldx;
163                         u += dudx;
164                         v += dvdx;
165                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
166                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ];
167                         //end edit -MM
168                         l += dldx;
169                         u += dudx;
170                         v += dvdx;
171                         *((unsigned long *) dest) = destlong;
172                         dest += 4;
173                         x -= 4;
174                         j -= 4;
175                         }
176
177                 while (x-- > 0)
178                         {
179                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
180                         *dest++ = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ];
181                         //end edit -MM
182                         l += dldx;
183                         u += dudx;
184                         v += dvdx;
185                         }
186
187         } else {
188                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
189                         c = (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
190                         if ( c!=TRANSPARENCY_COLOR)
191                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
192                                 *dest = gr_fade_table[ (l&(0x7f00)) + c ];
193                         //end edit -MM
194                         dest++;
195                         l += dldx;
196                         u += dudx;
197                         v += dvdx;
198                 }
199         }
200 }
201
202 #else
203 void c_tmap_scanline_lin()
204 {
205         ubyte *dest;
206         uint c;
207         int x;
208         fix u,v,l,dudx, dvdx, dldx;
209
210         u = fx_u;
211         v = fx_v*64;
212         dudx = fx_du_dx; 
213         dvdx = fx_dv_dx*64; 
214
215         l = fx_l>>8;
216         dldx = fx_dl_dx/256; // fx_dl_dx>>8 != fx_dl_dx/256 for negative numbers
217         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
218
219         if (!Transparency_on)   {
220                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
221                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
222                         *dest++ = gr_fade_table[ (l&(0x7f00)) + (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ];
223                         //end edit -MM
224                         l += dldx;
225                         u += dudx;
226                         v += dvdx;
227                 }
228         } else {
229                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
230                         c = (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
231                         if ( c!=255)
232                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
233                                 *dest = gr_fade_table[ (l&(0x7f00)) + c ];
234                         //end edit -MM
235                         dest++;
236                         l += dldx;
237                         u += dudx;
238                         v += dvdx;
239                 }
240         }
241 }
242 #endif
243
244 // Used for energy centers. See comments for c_tmap_scanline_per().
245 void c_fp_tmap_scanline_per_nolight()
246 {
247         ubyte          *dest;
248         uint            c;
249         int             x, j;
250         double          u, v, z, dudx, dvdx, dzdx, rec_z;
251         uint64_t    destlong;
252
253         u = f2db(fx_u);
254         v = f2db(fx_v) * 64.0;
255         z = f2db(fx_z);
256         dudx = f2db(fx_du_dx);
257         dvdx = f2db(fx_dv_dx) * 64.0;
258         dzdx = f2db(fx_dz_dx);
259
260         rec_z = 1.0 / z;
261
262         dest = (ubyte *) (write_buffer + fx_xleft + (bytes_per_row * fx_y));
263
264         x = fx_xright - fx_xleft + 1;
265         if (!Transparency_on) {
266                 if (x >= 8) {
267                         if ((j = (size_t) dest & 7) != 0) {
268                                 j = 8 - j;
269
270                                 while (j > 0) {
271                                         *dest++ =
272                                             (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
273                                                          (((int) (u * rec_z)) & 63)];
274                                         u += dudx;
275                                         v += dvdx;
276                                         z += dzdx;
277                                         rec_z = 1.0 / z;
278                                         x--;
279                                         j--;
280                                 }
281                         }
282
283                         while (j >= 8) {
284                                 destlong =
285                                     (uint64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
286                                                        (((int) (u * rec_z)) & 63)];
287                                 u += dudx;
288                                 v += dvdx;
289                                 z += dzdx;
290                                 rec_z = 1.0 / z;
291                                 destlong |=
292                                     (uint64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
293                                                        (((int) (u * rec_z)) & 63)] << 8;
294                                 u += dudx;
295                                 v += dvdx;
296                                 z += dzdx;
297                                 rec_z = 1.0 / z;
298                                 destlong |=
299                                     (uint64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
300                                                        (((int) (u * rec_z)) & 63)] << 16;
301                                 u += dudx;
302                                 v += dvdx;
303                                 z += dzdx;
304                                 rec_z = 1.0 / z;
305                                 destlong |=
306                                     (uint64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
307                                                        (((int) (u * rec_z)) & 63)] << 24;
308                                 u += dudx;
309                                 v += dvdx;
310                                 z += dzdx;
311                                 rec_z = 1.0 / z;
312                                 destlong |=
313                                     (uint64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
314                                                        (((int) (u * rec_z)) & 63)] << 32;
315                                 u += dudx;
316                                 v += dvdx;
317                                 z += dzdx;
318                                 rec_z = 1.0 / z;
319                                 destlong |=
320                                     (uint64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
321                                                        (((int) (u * rec_z)) & 63)] << 40;
322                                 u += dudx;
323                                 v += dvdx;
324                                 z += dzdx;
325                                 rec_z = 1.0 / z;
326                                 destlong |=
327                                     (uint64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
328                                                        (((int) (u * rec_z)) & 63)] << 48;
329                                 u += dudx;
330                                 v += dvdx;
331                                 z += dzdx;
332                                 rec_z = 1.0 / z;
333                                 destlong |=
334                                     (uint64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
335                                                        (((int) (u * rec_z)) & 63)] << 56;
336                                 u += dudx;
337                                 v += dvdx;
338                                 z += dzdx;
339                                 rec_z = 1.0 / z;
340
341                                 *((uint64_t *) dest) = destlong;
342                                 dest += 8;
343                                 x -= 8;
344                                 j -= 8;
345                         }
346                 }
347                 while (x-- > 0) {
348                         *dest++ =
349                             (uint64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
350                                                (((int) (u * rec_z)) & 63)];
351                         u += dudx;
352                         v += dvdx;
353                         z += dzdx;
354                         rec_z = 1.0 / z;
355                 }
356         } else {
357                 x = fx_xright - fx_xleft + 1;
358
359                 if (x >= 8) {
360                         if ((j = (size_t) dest & 7) != 0) {
361                                 j = 8 - j;
362
363                                 while (j > 0) {
364                                         c =
365                                             (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
366                                                          (((int) (u * rec_z)) & 63)];
367                                         if (c != 255)
368                                                 *dest = c;
369                                         dest++;
370                                         u += dudx;
371                                         v += dvdx;
372                                         z += dzdx;
373                                         rec_z = 1.0 / z;
374                                         x--;
375                                         j--;
376                                 }
377                         }
378
379                         j = x;
380                         while (j >= 8) {
381                                 destlong = *((uint64_t *) dest);
382                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
383                                                   (((int) (u * rec_z)) & 63)];
384                                 if (c != 255) {
385                                         destlong &= ~(uint64_t)0xFF;
386                                         destlong |= (uint64_t) c;
387                                 }
388                                 u += dudx;
389                                 v += dvdx;
390                                 z += dzdx;
391                                 rec_z = 1.0 / z;
392                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
393                                                   (((int) (u * rec_z)) & 63)];
394                                 if (c != 255) {
395                                         destlong &= ~((uint64_t)0xFF << 8);
396                                         destlong |= (uint64_t) c << 8;
397                                 }
398                                 u += dudx;
399                                 v += dvdx;
400                                 z += dzdx;
401                                 rec_z = 1.0 / z;
402                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
403                                                   (((int) (u * rec_z)) & 63)];
404                                 if (c != 255) {
405                                         destlong &= ~((uint64_t)0xFF << 16);
406                                         destlong |= (uint64_t) c << 16;
407                                 }
408                                 u += dudx;
409                                 v += dvdx;
410                                 z += dzdx;
411                                 rec_z = 1.0 / z;
412                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
413                                                   (((int) (u * rec_z)) & 63)];
414                                 if (c != 255) {
415                                         destlong &= ~((uint64_t)0xFF << 24);
416                                         destlong |= (uint64_t) c << 24;
417                                 }
418                                 u += dudx;
419                                 v += dvdx;
420                                 z += dzdx;
421                                 rec_z = 1.0 / z;
422                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
423                                                   (((int) (u * rec_z)) & 63)];
424                                 if (c != 255) {
425                                         destlong &= ~((uint64_t)0xFF << 32);
426                                         destlong |= (uint64_t) c << 32;
427                                 }
428                                 u += dudx;
429                                 v += dvdx;
430                                 z += dzdx;
431                                 rec_z = 1.0 / z;
432                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
433                                                   (((int) (u * rec_z)) & 63)];
434                                 if (c != 255) {
435                                         destlong &= ~((uint64_t)0xFF << 40);
436                                         destlong |= (uint64_t) c << 40;
437                                 }
438                                 u += dudx;
439                                 v += dvdx;
440                                 z += dzdx;
441                                 rec_z = 1.0 / z;
442                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
443                                                   (((int) (u * rec_z)) & 63)];
444                                 if (c != 255) {
445                                         destlong &= ~((uint64_t)0xFF << 48);
446                                         destlong |= (uint64_t) c << 48;
447                                 }
448                                 u += dudx;
449                                 v += dvdx;
450                                 z += dzdx;
451                                 rec_z = 1.0 / z;
452                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
453                                                   (((int) (u * rec_z)) & 63)];
454                                 if (c != 255) {
455                                         destlong &= ~((uint64_t)0xFF << 56);
456                                         destlong |= (uint64_t) c << 56;
457                                 }
458                                 u += dudx;
459                                 v += dvdx;
460                                 z += dzdx;
461                                 rec_z = 1.0 / z;
462
463                                 *((uint64_t *) dest) = destlong;
464                                 dest += 8;
465                                 x -= 8;
466                                 j -= 8;
467                         }
468                 }
469                 while (x-- > 0) {
470                         c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
471                                           (((int) (u * rec_z)) & 63)];
472                         if (c != 255)
473                                 *dest = c;
474                         dest++;
475                         u += dudx;
476                         v += dvdx;
477                         z += dzdx;
478                         rec_z = 1.0 / z;
479                 }
480         }
481 }
482
483 void c_tmap_scanline_per_nolight()
484 {
485         ubyte *dest;
486         uint c;
487         int x;
488         fix u,v,z,dudx, dvdx, dzdx;
489
490         u = fx_u;
491         v = fx_v*64;
492         z = fx_z;
493         dudx = fx_du_dx; 
494         dvdx = fx_dv_dx*64; 
495         dzdx = fx_dz_dx;
496
497         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
498
499         if (!Transparency_on)   {
500                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
501                         *dest++ = (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
502                         u += dudx;
503                         v += dvdx;
504                         z += dzdx;
505                 }
506         } else {
507                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
508                         c = (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
509                         if ( c!=255)
510                                 *dest = c;
511                         dest++;
512                         u += dudx;
513                         v += dvdx;
514                         z += dzdx;
515                 }
516         }
517 }
518
519 // This texture mapper uses floating point extensively and writes 8 pixels at once, so it likely works
520 // best on 64 bit RISC processors.
521 // WARNING: it is not endian clean. For big endian, reverse the shift counts in the unrolled loops. I
522 // have no means to test that, so I didn't try it. Please tell me if you get this to work on a big
523 // endian machine.
524 // If you're using an Alpha, use the Compaq compiler for this file for quite some fps more.
525 // Unfortunately, it won't compile the whole source, so simply compile everything, change the
526 // compiler to ccc, remove scanline.o and compile again.
527 // Please send comments/suggestions to falk.hueffner@student.uni-tuebingen.de.
528 void c_fp_tmap_scanline_per()
529 {
530         ubyte          *dest;
531         uint            c;
532         int             x, j;
533         double          u, v, z, l, dudx, dvdx, dzdx, dldx, rec_z;
534         uint64_t       destlong;
535
536         u = f2db(fx_u);
537         v = f2db(fx_v) * 64.0;
538         z = f2db(fx_z);
539         l = f2db(fx_l);
540         dudx = f2db(fx_du_dx);
541         dvdx = f2db(fx_dv_dx) * 64.0;
542         dzdx = f2db(fx_dz_dx);
543         dldx = f2db(fx_dl_dx);
544
545         rec_z = 1.0 / z; // gcc 2.95.2 is won't do this optimization itself
546
547         dest = (ubyte *) (write_buffer + fx_xleft + (bytes_per_row * fx_y));
548         x = fx_xright - fx_xleft + 1;
549
550         if (!Transparency_on) {
551                 if (x >= 8) {
552                         if ((j = (size_t) dest & 7) != 0) {
553                                 j = 8 - j;
554
555                                 while (j > 0) {
556                                         *dest++ =
557                                             gr_fade_table[((int) fabs(l)) * 256 +
558                                                           (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
559                                                                         (((int) (u * rec_z)) & 63)]];
560                                         l += dldx;
561                                         u += dudx;
562                                         v += dvdx;
563                                         z += dzdx;
564                                         rec_z = 1.0 / z;
565                                         x--;
566                                         j--;
567                                 }
568                         }
569
570                         j = x;
571                         while (j >= 8) {
572                                 destlong =
573                                     (uint64_t) gr_fade_table[((int) fabs(l)) * 256 +
574                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
575                                                                             (((int) (u * rec_z)) & 63)]];
576                                 l += dldx;
577                                 u += dudx;
578                                 v += dvdx;
579                                 z += dzdx;
580                                 rec_z = 1.0 / z;
581                                 destlong |=
582                                     (uint64_t) gr_fade_table[((int) fabs(l)) * 256 +
583                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
584                                                                             (((int) (u * rec_z)) & 63)]] << 8;
585                                 l += dldx;
586                                 u += dudx;
587                                 v += dvdx;
588                                 z += dzdx;
589                                 rec_z = 1.0 / z;
590                                 destlong |=
591                                     (uint64_t) gr_fade_table[((int) fabs(l)) * 256 +
592                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
593                                                                             (((int) (u * rec_z)) & 63)]] << 16;
594                                 l += dldx;
595                                 u += dudx;
596                                 v += dvdx;
597                                 z += dzdx;
598                                 rec_z = 1.0 / z;
599                                 destlong |=
600                                     (uint64_t) gr_fade_table[((int) fabs(l)) * 256 +
601                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
602                                                                             (((int) (u * rec_z)) & 63)]] << 24;
603                                 l += dldx;
604                                 u += dudx;
605                                 v += dvdx;
606                                 z += dzdx;
607                                 rec_z = 1.0 / z;
608                                 destlong |=
609                                     (uint64_t) gr_fade_table[((int) fabs(l)) * 256 +
610                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
611                                                                             (((int) (u * rec_z)) & 63)]] << 32;
612                                 l += dldx;
613                                 u += dudx;
614                                 v += dvdx;
615                                 z += dzdx;
616                                 rec_z = 1.0 / z;
617                                 destlong |=
618                                     (uint64_t) gr_fade_table[((int) fabs(l)) * 256 +
619                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
620                                                                             (((int) (u * rec_z)) & 63)]] << 40;
621                                 l += dldx;
622                                 u += dudx;
623                                 v += dvdx;
624                                 z += dzdx;
625                                 rec_z = 1.0 / z;
626                                 destlong |=
627                                     (uint64_t) gr_fade_table[((int) fabs(l)) * 256 +
628                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
629                                                                             (((int) (u * rec_z)) & 63)]] << 48;
630                                 l += dldx;
631                                 u += dudx;
632                                 v += dvdx;
633                                 z += dzdx;
634                                 rec_z = 1.0 / z;
635                                 destlong |=
636                                     (uint64_t) gr_fade_table[((int) fabs(l)) * 256 +
637                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
638                                                                             (((int) (u * rec_z)) & 63)]] << 56;
639                                 l += dldx;
640                                 u += dudx;
641                                 v += dvdx;
642                                 z += dzdx;
643                                 rec_z = 1.0 / z;
644
645                                 *((uint64_t *) dest) = destlong;
646                                 dest += 8;
647                                 x -= 8;
648                                 j -= 8;
649                         }
650                 }
651                 while (x-- > 0) {
652                         *dest++ =
653                             gr_fade_table[((int) fabs(l)) * 256 +
654                                           (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]];
655                         l += dldx;
656                         u += dudx;
657                         v += dvdx;
658                         z += dzdx;
659                         rec_z = 1.0 / z;
660                 }
661         } else {
662                 if (x >= 8) {
663                         if ((j = (size_t) dest & 7) != 0) {
664                                 j = 8 - j;
665
666                                 while (j > 0) {
667                                         c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
668                                         if (c != 255)
669                                                 *dest = gr_fade_table[((int) fabs(l)) * 256 + c];
670                                         dest++;
671                                         l += dldx;
672                                         u += dudx;
673                                         v += dvdx;
674                                         z += dzdx;
675                                         rec_z = 1.0 / z;
676                                         x--;
677                                         j--;
678                                 }
679                         }
680
681                         j = x;
682                         while (j >= 8) {
683                                 destlong = *((uint64_t *) dest);
684                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
685                                 if (c != 255) {
686                                         destlong &= ~(uint64_t)0xFF;
687                                         destlong |= (uint64_t) gr_fade_table[((int) fabs(l)) * 256 + c];
688                                 }
689                                 l += dldx;
690                                 u += dudx;
691                                 v += dvdx;
692                                 z += dzdx;
693                                 rec_z = 1.0 / z;
694                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
695                                 if (c != 255) {
696                                         destlong &= ~((uint64_t)0xFF << 8);
697                                         destlong |= (uint64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 8;
698                                 }
699                                 l += dldx;
700                                 u += dudx;
701                                 v += dvdx;
702                                 z += dzdx;
703                                 rec_z = 1.0 / z;
704                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
705                                 if (c != 255) {
706                                         destlong &= ~((uint64_t)0xFF << 16);
707                                         destlong |= (uint64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 16;
708                                 }
709                                 l += dldx;
710                                 u += dudx;
711                                 v += dvdx;
712                                 z += dzdx;
713                                 rec_z = 1.0 / z;
714                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
715                                 if (c != 255) {
716                                         destlong &= ~((uint64_t)0xFF << 24);
717                                         destlong |= (uint64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 24;
718                                 }
719                                 l += dldx;
720                                 u += dudx;
721                                 v += dvdx;
722                                 z += dzdx;
723                                 rec_z = 1.0 / z;
724                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
725                                 if (c != 255) {
726                                         destlong &= ~((uint64_t)0xFF << 32);
727                                         destlong |= (uint64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 32;
728                                 }
729                                 l += dldx;
730                                 u += dudx;
731                                 v += dvdx;
732                                 z += dzdx;
733                                 rec_z = 1.0 / z;
734                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
735                                 if (c != 255) {
736                                         destlong &= ~((uint64_t)0xFF << 40);
737                                         destlong |= (uint64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 40;
738                                 }
739                                 l += dldx;
740                                 u += dudx;
741                                 v += dvdx;
742                                 z += dzdx;
743                                 rec_z = 1.0 / z;
744                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
745                                 if (c != 255) {
746                                         destlong &= ~((uint64_t)0xFF << 48);
747                                         destlong |= (uint64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 48;
748                                 }
749                                 l += dldx;
750                                 u += dudx;
751                                 v += dvdx;
752                                 z += dzdx;
753                                 rec_z = 1.0 / z;
754                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
755                                 if (c != 255) {
756                                         destlong &= ~((uint64_t)0xFF << 56);
757                                         destlong |= (uint64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 56;
758                                 }
759                                 l += dldx;
760                                 u += dudx;
761                                 v += dvdx;
762                                 z += dzdx;
763                                 rec_z = 1.0 / z;
764
765                                 *((uint64_t *) dest) = destlong;
766                                 dest += 8;
767                                 x -= 8;
768                                 j -= 8;
769                         }
770                 }
771                 while (x-- > 0) {
772                         c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
773                         if (c != 255)
774                                 *dest = gr_fade_table[((int) fabs(l)) * 256 + c];
775                         dest++;
776                         l += dldx;
777                         u += dudx;
778                         v += dvdx;
779                         z += dzdx;
780                         rec_z = 1.0 / z;
781                 }
782         }
783 }
784
785 #if 1
786 // note the unrolling loop is broken. It is never called, and uses big endian. -- FH
787 void c_tmap_scanline_per()
788 {
789         ubyte *dest;
790         uint c;
791         int x, j;
792         fix l,u,v,z;
793         fix dudx, dvdx, dzdx, dldx;
794
795         u = fx_u;
796         v = fx_v*64;
797         z = fx_z;
798         dudx = fx_du_dx; 
799         dvdx = fx_dv_dx*64; 
800         dzdx = fx_dz_dx;
801
802         l = fx_l>>8;
803         dldx = fx_dl_dx/256; // fx_dl_dx>>8 != fx_dl_dx/256 for negative numbers
804         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
805
806         if (!Transparency_on)   {
807                 ubyte*                  pixPtrLocalCopy = pixptr;
808                 ubyte*                  fadeTableLocalCopy = gr_fade_table;
809                 unsigned long   destlong;
810
811                 x = fx_xright-fx_xleft+1; // x = number of pixels in scanline
812
813                 if ((j = (unsigned long) dest & 3) != 0)
814                         {
815                         j = 4 - j;
816
817                         if (j > x)
818                                 j = x;
819
820                         while (j > 0)
821                                 {       
822                                 //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
823                                 *dest++ = fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
824                                 //end edit -MM
825                                 l += dldx;
826                                 u += dudx;
827                                 v += dvdx;
828                                 z += dzdx;
829                                 x--;
830                                 j--;
831                                 }
832                         }
833
834                 j &= ~3;
835                 while (j > 0)
836                         {
837                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
838                         destlong = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ] << 24;
839                         //end edit -MM
840                         l += dldx;
841                         u += dudx;
842                         v += dvdx;
843                         z += dzdx;
844                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
845                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ] << 16;
846                         //end edit -MM
847                         l += dldx;
848                         u += dudx;
849                         v += dvdx;
850                         z += dzdx;
851                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
852                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ] << 8;
853                         //end edit -MM
854                         l += dldx;
855                         u += dudx;
856                         v += dvdx;
857                         z += dzdx;
858                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
859                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
860                         //end edit -MM
861                         l += dldx;
862                         u += dudx;
863                         v += dvdx;
864                         z += dzdx;
865                         *((unsigned long *) dest) = destlong;
866                         dest += 4;
867                         x -= 4;
868                         j -= 4;
869                         }
870
871                 while (x-- > 0)
872                         {
873                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
874                         *dest++ = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
875                         //end edit -MM
876                         l += dldx;
877                         u += dudx;
878                         v += dvdx;
879                         z += dzdx;
880                         }
881
882         } else {
883                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
884                         c = (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
885                         if ( c!=TRANSPARENCY_COLOR)
886                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
887                                 *dest = gr_fade_table[ (l&(0x7f00)) + c ];
888                         //end edit -MM
889                         dest++;
890                         l += dldx;
891                         u += dudx;
892                         v += dvdx;
893                         z += dzdx;
894                 }
895         }
896 }
897
898 #else
899 void c_tmap_scanline_per()
900 {
901         ubyte *dest;
902         uint c;
903         int x;
904         fix u,v,z,l,dudx, dvdx, dzdx, dldx;
905
906         u = fx_u;
907         v = fx_v*64;
908         z = fx_z;
909         dudx = fx_du_dx; 
910         dvdx = fx_dv_dx*64; 
911         dzdx = fx_dz_dx;
912
913         l = fx_l>>8;
914         dldx = fx_dl_dx/256; // fx_dl_dx>>8 != fx_dl_dx/256 for negative numbers
915         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
916
917         if (!Transparency_on)   {
918                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
919                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
920                         *dest++ = gr_fade_table[ (l&(0x7f00)) + (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
921                         //end edit -MM
922                         l += dldx;
923                         u += dudx;
924                         v += dvdx;
925                         z += dzdx;
926                 }
927         } else {
928                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
929                         c = (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
930                         if ( c!=255)
931                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
932                                 *dest = gr_fade_table[ (l&(0x7f00)) + c ];
933                         //end edit -MM
934                         dest++;
935                         l += dldx;
936                         u += dudx;
937                         v += dvdx;
938                         z += dzdx;
939                 }
940         }
941 }
942
943 #endif
944
945 void (*cur_tmap_scanline_per)(void);
946 void (*cur_tmap_scanline_per_nolight)(void);
947 void (*cur_tmap_scanline_lin)(void);
948 void (*cur_tmap_scanline_lin_nolight)(void);
949 void (*cur_tmap_scanline_flat)(void);
950 void (*cur_tmap_scanline_shaded)(void);
951
952 //runtime selection of optimized tmappers.  12/07/99  Matthew Mueller
953 //the reason I did it this way rather than having a *tmap_funcs that then points to a c_tmap or fp_tmap struct thats already filled in, is to avoid a second pointer dereference.
954 void select_tmap(char *type)
955 {
956         if (!type)
957         {
958 #if !defined(NO_ASM) && !defined(OGL)
959 #if defined(__pentiumpro__)
960                 select_tmap("ppro");
961 #elif defined(__pentium__)
962                 select_tmap("pent");
963 #else
964                 select_tmap("i386");
965 #endif
966 #else
967                 select_tmap("c");
968 #endif
969                 return;
970         }
971 #if !defined(NO_ASM) && !defined(OGL)
972         if (stricmp(type, "i386")==0)
973         {
974                 cur_tmap_scanline_per=asm_tmap_scanline_per;
975                 cur_tmap_scanline_per_nolight=asm_tmap_scanline_per;
976                 cur_tmap_scanline_lin=asm_tmap_scanline_lin_lighted;
977                 cur_tmap_scanline_lin_nolight=asm_tmap_scanline_lin;
978                 cur_tmap_scanline_flat=asm_tmap_scanline_flat;
979                 cur_tmap_scanline_shaded=asm_tmap_scanline_shaded;
980         }
981         else if (stricmp(type,"pent")==0){
982                 cur_tmap_scanline_per=asm_pent_tmap_scanline_per;
983                 cur_tmap_scanline_per_nolight=asm_pent_tmap_scanline_per;
984                 cur_tmap_scanline_lin=asm_tmap_scanline_lin_lighted;
985                 cur_tmap_scanline_lin_nolight=asm_tmap_scanline_lin;
986                 cur_tmap_scanline_flat=asm_tmap_scanline_flat;
987                 cur_tmap_scanline_shaded=asm_tmap_scanline_shaded;
988         }
989         else if (stricmp(type,"ppro")==0){
990                 cur_tmap_scanline_per=asm_ppro_tmap_scanline_per;
991                 cur_tmap_scanline_per_nolight=asm_ppro_tmap_scanline_per;
992                 cur_tmap_scanline_lin=asm_tmap_scanline_lin_lighted;
993                 cur_tmap_scanline_lin_nolight=asm_tmap_scanline_lin;
994                 cur_tmap_scanline_flat=asm_tmap_scanline_flat;
995                 cur_tmap_scanline_shaded=asm_tmap_scanline_shaded;
996         }
997         else
998 #endif
999         if (stricmp(type,"fp")==0){
1000                 cur_tmap_scanline_per=c_fp_tmap_scanline_per;
1001                 cur_tmap_scanline_per_nolight=c_fp_tmap_scanline_per_nolight;
1002                 cur_tmap_scanline_lin=c_tmap_scanline_lin;
1003                 cur_tmap_scanline_lin_nolight=c_tmap_scanline_lin_nolight;
1004                 cur_tmap_scanline_flat=c_tmap_scanline_flat;
1005                 cur_tmap_scanline_shaded=c_tmap_scanline_shaded;
1006         }
1007         else {
1008                 if (stricmp(type,"c")!=0)
1009                         printf("unknown tmap requested, using c tmap\n");
1010                 cur_tmap_scanline_per=c_tmap_scanline_per;
1011                 cur_tmap_scanline_per_nolight=c_tmap_scanline_per_nolight;
1012                 cur_tmap_scanline_lin=c_tmap_scanline_lin;
1013                 cur_tmap_scanline_lin_nolight=c_tmap_scanline_lin_nolight;
1014                 cur_tmap_scanline_flat=c_tmap_scanline_flat;
1015                 cur_tmap_scanline_shaded=c_tmap_scanline_shaded;
1016         }
1017 }