This commit was manufactured by cvs2svn to create tag 'd2x-0_1_2'.
[btb/d2x.git] / texmap / scanline.c
1 /*
2 THE COMPUTER CODE CONTAINED HEREIN IS THE SOLE PROPERTY OF PARALLAX
3 SOFTWARE CORPORATION ("PARALLAX").  PARALLAX, IN DISTRIBUTING THE CODE TO
4 END-USERS, AND SUBJECT TO ALL OF THE TERMS AND CONDITIONS HEREIN, GRANTS A
5 ROYALTY-FREE, PERPETUAL LICENSE TO SUCH END-USERS FOR USE BY SUCH END-USERS
6 IN USING, DISPLAYING,  AND CREATING DERIVATIVE WORKS THEREOF, SO LONG AS
7 SUCH USE, DISPLAY OR CREATION IS FOR NON-COMMERCIAL, ROYALTY OR REVENUE
8 FREE PURPOSES.  IN NO EVENT SHALL THE END-USER USE THE COMPUTER CODE
9 CONTAINED HEREIN FOR REVENUE-BEARING PURPOSES.  THE END-USER UNDERSTANDS
10 AND AGREES TO THE TERMS HEREIN AND ACCEPTS THE SAME BY USE OF THIS FILE.  
11 COPYRIGHT 1993-1998 PARALLAX SOFTWARE CORPORATION.  ALL RIGHTS RESERVED.
12 */
13 /*
14  * $Source: /cvs/cvsroot/d2x/texmap/scanline.c,v $
15  * $Revision: 1.5 $
16  * $Author: bradleyb $
17  * $Date: 2001-11-14 20:13:19 $
18  * 
19  * Routines to draw the texture mapped scanlines.
20  * 
21  * $Log: not supported by cvs2svn $
22  * Revision 1.4  2001/10/25 08:25:34  bradleyb
23  * Finished moving stuff to arch/blah.  I know, it's ugly, but It'll be easier to sync with d1x.
24  *
25  * Revision 1.3  2001/10/25 02:22:46  bradleyb
26  * adding support for runtime selection of tmap funcs
27  *
28  * Revision 1.2  2001/01/31 15:18:04  bradleyb
29  * Makefile and conf.h fixes
30  *
31  * Revision 1.1.1.1  2001/01/19 03:30:16  bradleyb
32  * Import of d2x-0.0.8
33  *
34  * Revision 1.4  1999/10/18 00:31:01  donut
35  * more alpha fixes from Falk Hueffner
36  *
37  * Revision 1.3  1999/10/14 04:48:21  donut
38  * alpha fixes, and gl_font args
39  *
40  * Revision 1.2  1999/09/21 07:22:40  sekmu
41  * remove unused var warning
42  *
43  * Revision 1.1.1.1  1999/06/14 22:14:08  donut
44  * Import of d1x 1.37 source.
45  *
46  * Revision 1.2  1995/02/20  18:23:39  john
47  * Added new module for C versions of inner loops.
48  * 
49  * Revision 1.1  1995/02/20  17:42:27  john
50  * Initial revision
51  * 
52  * 
53  */
54
55 #ifdef HAVE_CONFIG_H
56 #include <conf.h>
57 #endif
58
59 #ifdef RCS
60 static char rcsid[] = "$Id: scanline.c,v 1.5 2001-11-14 20:13:19 bradleyb Exp $";
61 #endif
62
63 #include <math.h>
64 #include <limits.h>
65 #include <stdio.h>
66 #include <stdlib.h>
67 #include <string.h>
68
69 #include "maths.h"
70 #include "mono.h"
71 #include "gr.h"
72 #include "grdef.h"
73 #include "texmap.h"
74 #include "texmapl.h"
75 #include "scanline.h"
76 #include "strutil.h"
77
78 void c_tmap_scanline_flat()
79 {
80         ubyte *dest;
81 //        int x;
82
83         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y )  );
84
85 /*      for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
86                 *dest++ = tmap_flat_color;
87         }*/
88         memset(dest,tmap_flat_color,fx_xright-fx_xleft+1);
89 }
90
91 void c_tmap_scanline_shaded()
92 {
93         int fade;
94         ubyte *dest, tmp;
95         int x;
96
97         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
98
99         fade = tmap_flat_shade_value<<8;
100         for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
101                 tmp = *dest;
102                 *dest++ = gr_fade_table[ fade |(tmp)];
103         }
104 }
105
106 void c_tmap_scanline_lin_nolight()
107 {
108         ubyte *dest;
109         uint c;
110         int x;
111         fix u,v,dudx, dvdx;
112
113         u = fx_u;
114         v = fx_v*64;
115         dudx = fx_du_dx; 
116         dvdx = fx_dv_dx*64; 
117
118         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
119
120         if (!Transparency_on)   {
121                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
122                         *dest++ = (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
123                         u += dudx;
124                         v += dvdx;
125                 }
126         } else {
127                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
128                         c = (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
129                         if ( c!=255)
130                                 *dest = c;
131                         dest++;
132                         u += dudx;
133                         v += dvdx;
134                 }
135         }
136 }
137
138
139 #if 1
140 void c_tmap_scanline_lin()
141 {
142         ubyte *dest;
143         uint c;
144         int x, j;
145         fix u,v,l,dudx, dvdx, dldx;
146
147         u = fx_u;
148         v = fx_v*64;
149         dudx = fx_du_dx; 
150         dvdx = fx_dv_dx*64; 
151
152         l = fx_l>>8;
153         dldx = fx_dl_dx>>8;
154         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
155
156         if (!Transparency_on)   {
157                 ubyte*                  pixPtrLocalCopy = pixptr;
158                 ubyte*                  fadeTableLocalCopy = gr_fade_table;
159                 unsigned long   destlong;
160
161                 x = fx_xright-fx_xleft+1;
162
163                 if ((j = (unsigned long) dest & 3) != 0)
164                         {
165                         j = 4 - j;
166
167                         if (j > x)
168                                 j = x;
169
170                         while (j > 0)
171                                 {       
172                                 //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
173                                 *dest++ = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ];
174                                 //end edit -MM
175                                 l += dldx;
176                                 u += dudx;
177                                 v += dvdx;
178                                 x--;
179                                 j--;
180                                 }
181                         }
182
183                 j &= ~3;
184                 while (j > 0)
185                         {
186                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
187                         destlong = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ] << 24;
188                         //end edit -MM
189                         l += dldx;
190                         u += dudx;
191                         v += dvdx;
192                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
193                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ] << 16;
194                         //end edit -MM
195                         l += dldx;
196                         u += dudx;
197                         v += dvdx;
198                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
199                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ] << 8;
200                         //end edit -MM
201                         l += dldx;
202                         u += dudx;
203                         v += dvdx;
204                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
205                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ];
206                         //end edit -MM
207                         l += dldx;
208                         u += dudx;
209                         v += dvdx;
210                         *((unsigned long *) dest) = destlong;
211                         dest += 4;
212                         x -= 4;
213                         j -= 4;
214                         }
215
216                 while (x-- > 0)
217                         {
218                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
219                         *dest++ = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ];
220                         //end edit -MM
221                         l += dldx;
222                         u += dudx;
223                         v += dvdx;
224                         }
225
226         } else {
227                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
228                         c = (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
229                         if ( c!=TRANSPARENCY_COLOR)
230                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
231                                 *dest = gr_fade_table[ (l&(0x7f00)) + c ];
232                         //end edit -MM
233                         dest++;
234                         l += dldx;
235                         u += dudx;
236                         v += dvdx;
237                 }
238         }
239 }
240
241 #else
242 void c_tmap_scanline_lin()
243 {
244         ubyte *dest;
245         uint c;
246         int x;
247         fix u,v,l,dudx, dvdx, dldx;
248
249         u = fx_u;
250         v = fx_v*64;
251         dudx = fx_du_dx; 
252         dvdx = fx_dv_dx*64; 
253
254         l = fx_l>>8;
255         dldx = fx_dl_dx>>8;
256         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
257
258         if (!Transparency_on)   {
259                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
260                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
261                         *dest++ = gr_fade_table[ (l&(0x7f00)) + (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ];
262                         //end edit -MM
263                         l += dldx;
264                         u += dudx;
265                         v += dvdx;
266                 }
267         } else {
268                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
269                         c = (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
270                         if ( c!=255)
271                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
272                                 *dest = gr_fade_table[ (l&(0x7f00)) + c ];
273                         //end edit -MM
274                         dest++;
275                         l += dldx;
276                         u += dudx;
277                         v += dvdx;
278                 }
279         }
280 }
281 #endif
282
283 // Used for energy centers. See comments for c_tmap_scanline_per().
284 void c_fp_tmap_scanline_per_nolight()
285 {
286         ubyte          *dest;
287         uint            c;
288         int             x, j;
289         double          u, v, z, dudx, dvdx, dzdx, rec_z;
290         u_int64_t       destlong;
291
292         u = f2db(fx_u);
293         v = f2db(fx_v) * 64.0;
294         z = f2db(fx_z);
295         dudx = f2db(fx_du_dx);
296         dvdx = f2db(fx_dv_dx) * 64.0;
297         dzdx = f2db(fx_dz_dx);
298
299         rec_z = 1.0 / z;
300
301         dest = (ubyte *) (write_buffer + fx_xleft + (bytes_per_row * fx_y));
302
303         x = fx_xright - fx_xleft + 1;
304         if (!Transparency_on) {
305                 if (x >= 8) {
306                         if ((j = (size_t) dest & 7) != 0) {
307                                 j = 8 - j;
308
309                                 while (j > 0) {
310                                         *dest++ =
311                                             (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
312                                                          (((int) (u * rec_z)) & 63)];
313                                         u += dudx;
314                                         v += dvdx;
315                                         z += dzdx;
316                                         rec_z = 1.0 / z;
317                                         x--;
318                                         j--;
319                                 }
320                         }
321
322                         while (j >= 8) {
323                                 destlong =
324                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
325                                                        (((int) (u * rec_z)) & 63)];
326                                 u += dudx;
327                                 v += dvdx;
328                                 z += dzdx;
329                                 rec_z = 1.0 / z;
330                                 destlong |=
331                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
332                                                        (((int) (u * rec_z)) & 63)] << 8;
333                                 u += dudx;
334                                 v += dvdx;
335                                 z += dzdx;
336                                 rec_z = 1.0 / z;
337                                 destlong |=
338                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
339                                                        (((int) (u * rec_z)) & 63)] << 16;
340                                 u += dudx;
341                                 v += dvdx;
342                                 z += dzdx;
343                                 rec_z = 1.0 / z;
344                                 destlong |=
345                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
346                                                        (((int) (u * rec_z)) & 63)] << 24;
347                                 u += dudx;
348                                 v += dvdx;
349                                 z += dzdx;
350                                 rec_z = 1.0 / z;
351                                 destlong |=
352                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
353                                                        (((int) (u * rec_z)) & 63)] << 32;
354                                 u += dudx;
355                                 v += dvdx;
356                                 z += dzdx;
357                                 rec_z = 1.0 / z;
358                                 destlong |=
359                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
360                                                        (((int) (u * rec_z)) & 63)] << 40;
361                                 u += dudx;
362                                 v += dvdx;
363                                 z += dzdx;
364                                 rec_z = 1.0 / z;
365                                 destlong |=
366                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
367                                                        (((int) (u * rec_z)) & 63)] << 48;
368                                 u += dudx;
369                                 v += dvdx;
370                                 z += dzdx;
371                                 rec_z = 1.0 / z;
372                                 destlong |=
373                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
374                                                        (((int) (u * rec_z)) & 63)] << 56;
375                                 u += dudx;
376                                 v += dvdx;
377                                 z += dzdx;
378                                 rec_z = 1.0 / z;
379
380                                 *((u_int64_t *) dest) = destlong;
381                                 dest += 8;
382                                 x -= 8;
383                                 j -= 8;
384                         }
385                 }
386                 while (x-- > 0) {
387                         *dest++ =
388                             (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
389                                                (((int) (u * rec_z)) & 63)];
390                         u += dudx;
391                         v += dvdx;
392                         z += dzdx;
393                         rec_z = 1.0 / z;
394                 }
395         } else {
396                 x = fx_xright - fx_xleft + 1;
397
398                 if (x >= 8) {
399                         if ((j = (size_t) dest & 7) != 0) {
400                                 j = 8 - j;
401
402                                 while (j > 0) {
403                                         c =
404                                             (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
405                                                          (((int) (u * rec_z)) & 63)];
406                                         if (c != 255)
407                                                 *dest = c;
408                                         dest++;
409                                         u += dudx;
410                                         v += dvdx;
411                                         z += dzdx;
412                                         rec_z = 1.0 / z;
413                                         x--;
414                                         j--;
415                                 }
416                         }
417
418                         j = x;
419                         while (j >= 8) {
420                                 destlong = *((u_int64_t *) dest);
421                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
422                                                   (((int) (u * rec_z)) & 63)];
423                                 if (c != 255) {
424                                         destlong &= ~(u_int64_t)0xFF;
425                                         destlong |= (u_int64_t) c;
426                                 }
427                                 u += dudx;
428                                 v += dvdx;
429                                 z += dzdx;
430                                 rec_z = 1.0 / z;
431                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
432                                                   (((int) (u * rec_z)) & 63)];
433                                 if (c != 255) {
434                                         destlong &= ~((u_int64_t)0xFF << 8);
435                                         destlong |= (u_int64_t) c << 8;
436                                 }
437                                 u += dudx;
438                                 v += dvdx;
439                                 z += dzdx;
440                                 rec_z = 1.0 / z;
441                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
442                                                   (((int) (u * rec_z)) & 63)];
443                                 if (c != 255) {
444                                         destlong &= ~((u_int64_t)0xFF << 16);
445                                         destlong |= (u_int64_t) c << 16;
446                                 }
447                                 u += dudx;
448                                 v += dvdx;
449                                 z += dzdx;
450                                 rec_z = 1.0 / z;
451                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
452                                                   (((int) (u * rec_z)) & 63)];
453                                 if (c != 255) {
454                                         destlong &= ~((u_int64_t)0xFF << 24);
455                                         destlong |= (u_int64_t) c << 24;
456                                 }
457                                 u += dudx;
458                                 v += dvdx;
459                                 z += dzdx;
460                                 rec_z = 1.0 / z;
461                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
462                                                   (((int) (u * rec_z)) & 63)];
463                                 if (c != 255) {
464                                         destlong &= ~((u_int64_t)0xFF << 32);
465                                         destlong |= (u_int64_t) c << 32;
466                                 }
467                                 u += dudx;
468                                 v += dvdx;
469                                 z += dzdx;
470                                 rec_z = 1.0 / z;
471                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
472                                                   (((int) (u * rec_z)) & 63)];
473                                 if (c != 255) {
474                                         destlong &= ~((u_int64_t)0xFF << 40);
475                                         destlong |= (u_int64_t) c << 40;
476                                 }
477                                 u += dudx;
478                                 v += dvdx;
479                                 z += dzdx;
480                                 rec_z = 1.0 / z;
481                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
482                                                   (((int) (u * rec_z)) & 63)];
483                                 if (c != 255) {
484                                         destlong &= ~((u_int64_t)0xFF << 48);
485                                         destlong |= (u_int64_t) c << 48;
486                                 }
487                                 u += dudx;
488                                 v += dvdx;
489                                 z += dzdx;
490                                 rec_z = 1.0 / z;
491                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
492                                                   (((int) (u * rec_z)) & 63)];
493                                 if (c != 255) {
494                                         destlong &= ~((u_int64_t)0xFF << 56);
495                                         destlong |= (u_int64_t) c << 56;
496                                 }
497                                 u += dudx;
498                                 v += dvdx;
499                                 z += dzdx;
500                                 rec_z = 1.0 / z;
501
502                                 *((u_int64_t *) dest) = destlong;
503                                 dest += 8;
504                                 x -= 8;
505                                 j -= 8;
506                         }
507                 }
508                 while (x-- > 0) {
509                         c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
510                                           (((int) (u * rec_z)) & 63)];
511                         if (c != 255)
512                                 *dest = c;
513                         dest++;
514                         u += dudx;
515                         v += dvdx;
516                         z += dzdx;
517                         rec_z = 1.0 / z;
518                 }
519         }
520 }
521
522 void c_tmap_scanline_per_nolight()
523 {
524         ubyte *dest;
525         uint c;
526         int x;
527         fix u,v,z,dudx, dvdx, dzdx;
528
529         u = fx_u;
530         v = fx_v*64;
531         z = fx_z;
532         dudx = fx_du_dx; 
533         dvdx = fx_dv_dx*64; 
534         dzdx = fx_dz_dx;
535
536         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
537
538         if (!Transparency_on)   {
539                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
540                         *dest++ = (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
541                         u += dudx;
542                         v += dvdx;
543                         z += dzdx;
544                 }
545         } else {
546                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
547                         c = (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
548                         if ( c!=255)
549                                 *dest = c;
550                         dest++;
551                         u += dudx;
552                         v += dvdx;
553                         z += dzdx;
554                 }
555         }
556 }
557
558 // This texture mapper uses floating point extensively and writes 8 pixels at once, so it likely works
559 // best on 64 bit RISC processors.
560 // WARNING: it is not endian clean. For big endian, reverse the shift counts in the unrolled loops. I
561 // have no means to test that, so I didn't try it. Please tell me if you get this to work on a big
562 // endian machine.
563 // If you're using an Alpha, use the Compaq compiler for this file for quite some fps more.
564 // Unfortunately, it won't compile the whole source, so simply compile everything, change the
565 // compiler to ccc, remove scanline.o and compile again.
566 // Please send comments/suggestions to falk.hueffner@student.uni-tuebingen.de.
567 void c_fp_tmap_scanline_per()
568 {
569         ubyte          *dest;
570         uint            c;
571         int             x, j;
572         double          u, v, z, l, dudx, dvdx, dzdx, dldx, rec_z;
573         u_int64_t       destlong;
574
575         u = f2db(fx_u);
576         v = f2db(fx_v) * 64.0;
577         z = f2db(fx_z);
578         l = f2db(fx_l);
579         dudx = f2db(fx_du_dx);
580         dvdx = f2db(fx_dv_dx) * 64.0;
581         dzdx = f2db(fx_dz_dx);
582         dldx = f2db(fx_dl_dx);
583
584         rec_z = 1.0 / z; // gcc 2.95.2 is won't do this optimization itself
585
586         dest = (ubyte *) (write_buffer + fx_xleft + (bytes_per_row * fx_y));
587         x = fx_xright - fx_xleft + 1;
588
589         if (!Transparency_on) {
590                 if (x >= 8) {
591                         if ((j = (size_t) dest & 7) != 0) {
592                                 j = 8 - j;
593
594                                 while (j > 0) {
595                                         *dest++ =
596                                             gr_fade_table[((int) fabs(l)) * 256 +
597                                                           (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
598                                                                         (((int) (u * rec_z)) & 63)]];
599                                         l += dldx;
600                                         u += dudx;
601                                         v += dvdx;
602                                         z += dzdx;
603                                         rec_z = 1.0 / z;
604                                         x--;
605                                         j--;
606                                 }
607                         }
608
609                         j = x;
610                         while (j >= 8) {
611                                 destlong =
612                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
613                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
614                                                                             (((int) (u * rec_z)) & 63)]];
615                                 l += dldx;
616                                 u += dudx;
617                                 v += dvdx;
618                                 z += dzdx;
619                                 rec_z = 1.0 / z;
620                                 destlong |=
621                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
622                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
623                                                                             (((int) (u * rec_z)) & 63)]] << 8;
624                                 l += dldx;
625                                 u += dudx;
626                                 v += dvdx;
627                                 z += dzdx;
628                                 rec_z = 1.0 / z;
629                                 destlong |=
630                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
631                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
632                                                                             (((int) (u * rec_z)) & 63)]] << 16;
633                                 l += dldx;
634                                 u += dudx;
635                                 v += dvdx;
636                                 z += dzdx;
637                                 rec_z = 1.0 / z;
638                                 destlong |=
639                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
640                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
641                                                                             (((int) (u * rec_z)) & 63)]] << 24;
642                                 l += dldx;
643                                 u += dudx;
644                                 v += dvdx;
645                                 z += dzdx;
646                                 rec_z = 1.0 / z;
647                                 destlong |=
648                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
649                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
650                                                                             (((int) (u * rec_z)) & 63)]] << 32;
651                                 l += dldx;
652                                 u += dudx;
653                                 v += dvdx;
654                                 z += dzdx;
655                                 rec_z = 1.0 / z;
656                                 destlong |=
657                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
658                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
659                                                                             (((int) (u * rec_z)) & 63)]] << 40;
660                                 l += dldx;
661                                 u += dudx;
662                                 v += dvdx;
663                                 z += dzdx;
664                                 rec_z = 1.0 / z;
665                                 destlong |=
666                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
667                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
668                                                                             (((int) (u * rec_z)) & 63)]] << 48;
669                                 l += dldx;
670                                 u += dudx;
671                                 v += dvdx;
672                                 z += dzdx;
673                                 rec_z = 1.0 / z;
674                                 destlong |=
675                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
676                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
677                                                                             (((int) (u * rec_z)) & 63)]] << 56;
678                                 l += dldx;
679                                 u += dudx;
680                                 v += dvdx;
681                                 z += dzdx;
682                                 rec_z = 1.0 / z;
683
684                                 *((u_int64_t *) dest) = destlong;
685                                 dest += 8;
686                                 x -= 8;
687                                 j -= 8;
688                         }
689                 }
690                 while (x-- > 0) {
691                         *dest++ =
692                             gr_fade_table[((int) fabs(l)) * 256 +
693                                           (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]];
694                         l += dldx;
695                         u += dudx;
696                         v += dvdx;
697                         z += dzdx;
698                         rec_z = 1.0 / z;
699                 }
700         } else {
701                 if (x >= 8) {
702                         if ((j = (size_t) dest & 7) != 0) {
703                                 j = 8 - j;
704
705                                 while (j > 0) {
706                                         c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
707                                         if (c != 255)
708                                                 *dest = gr_fade_table[((int) fabs(l)) * 256 + c];
709                                         dest++;
710                                         l += dldx;
711                                         u += dudx;
712                                         v += dvdx;
713                                         z += dzdx;
714                                         rec_z = 1.0 / z;
715                                         x--;
716                                         j--;
717                                 }
718                         }
719
720                         j = x;
721                         while (j >= 8) {
722                                 destlong = *((u_int64_t *) dest);
723                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
724                                 if (c != 255) {
725                                         destlong &= ~(u_int64_t)0xFF;
726                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c];
727                                 }
728                                 l += dldx;
729                                 u += dudx;
730                                 v += dvdx;
731                                 z += dzdx;
732                                 rec_z = 1.0 / z;
733                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
734                                 if (c != 255) {
735                                         destlong &= ~((u_int64_t)0xFF << 8);
736                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 8;
737                                 }
738                                 l += dldx;
739                                 u += dudx;
740                                 v += dvdx;
741                                 z += dzdx;
742                                 rec_z = 1.0 / z;
743                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
744                                 if (c != 255) {
745                                         destlong &= ~((u_int64_t)0xFF << 16);
746                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 16;
747                                 }
748                                 l += dldx;
749                                 u += dudx;
750                                 v += dvdx;
751                                 z += dzdx;
752                                 rec_z = 1.0 / z;
753                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
754                                 if (c != 255) {
755                                         destlong &= ~((u_int64_t)0xFF << 24);
756                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 24;
757                                 }
758                                 l += dldx;
759                                 u += dudx;
760                                 v += dvdx;
761                                 z += dzdx;
762                                 rec_z = 1.0 / z;
763                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
764                                 if (c != 255) {
765                                         destlong &= ~((u_int64_t)0xFF << 32);
766                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 32;
767                                 }
768                                 l += dldx;
769                                 u += dudx;
770                                 v += dvdx;
771                                 z += dzdx;
772                                 rec_z = 1.0 / z;
773                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
774                                 if (c != 255) {
775                                         destlong &= ~((u_int64_t)0xFF << 40);
776                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 40;
777                                 }
778                                 l += dldx;
779                                 u += dudx;
780                                 v += dvdx;
781                                 z += dzdx;
782                                 rec_z = 1.0 / z;
783                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
784                                 if (c != 255) {
785                                         destlong &= ~((u_int64_t)0xFF << 48);
786                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 48;
787                                 }
788                                 l += dldx;
789                                 u += dudx;
790                                 v += dvdx;
791                                 z += dzdx;
792                                 rec_z = 1.0 / z;
793                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
794                                 if (c != 255) {
795                                         destlong &= ~((u_int64_t)0xFF << 56);
796                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 56;
797                                 }
798                                 l += dldx;
799                                 u += dudx;
800                                 v += dvdx;
801                                 z += dzdx;
802                                 rec_z = 1.0 / z;
803
804                                 *((u_int64_t *) dest) = destlong;
805                                 dest += 8;
806                                 x -= 8;
807                                 j -= 8;
808                         }
809                 }
810                 while (x-- > 0) {
811                         c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
812                         if (c != 255)
813                                 *dest = gr_fade_table[((int) fabs(l)) * 256 + c];
814                         dest++;
815                         l += dldx;
816                         u += dudx;
817                         v += dvdx;
818                         z += dzdx;
819                         rec_z = 1.0 / z;
820                 }
821         }
822 }
823
824 #if 1
825 // note the unrolling loop is broken. It is never called, and uses big endian. -- FH
826 void c_tmap_scanline_per()
827 {
828         ubyte *dest;
829         uint c;
830         int x, j;
831         fix l,u,v,z;
832         fix dudx, dvdx, dzdx, dldx;
833
834         u = fx_u;
835         v = fx_v*64;
836         z = fx_z;
837         dudx = fx_du_dx; 
838         dvdx = fx_dv_dx*64; 
839         dzdx = fx_dz_dx;
840
841         l = fx_l>>8;
842         dldx = fx_dl_dx>>8;
843         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
844
845         if (!Transparency_on)   {
846                 ubyte*                  pixPtrLocalCopy = pixptr;
847                 ubyte*                  fadeTableLocalCopy = gr_fade_table;
848                 unsigned long   destlong;
849
850                 x = fx_xright-fx_xleft+1;
851
852                 if ((j = (unsigned long) dest & 3) != 0)
853                         {
854                         j = 4 - j;
855
856                         if (j > x)
857                                 j = x;
858
859                         while (j > 0)
860                                 {       
861                                 //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
862                                 *dest++ = fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
863                                 //end edit -MM
864                                 l += dldx;
865                                 u += dudx;
866                                 v += dvdx;
867                                 z += dzdx;
868                                 x--;
869                                 j--;
870                                 }
871                         }
872
873                 j &= ~3;
874                 while (j > 0)
875                         {
876                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
877                         destlong = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ] << 24;
878                         //end edit -MM
879                         l += dldx;
880                         u += dudx;
881                         v += dvdx;
882                         z += dzdx;
883                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
884                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ] << 16;
885                         //end edit -MM
886                         l += dldx;
887                         u += dudx;
888                         v += dvdx;
889                         z += dzdx;
890                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
891                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ] << 8;
892                         //end edit -MM
893                         l += dldx;
894                         u += dudx;
895                         v += dvdx;
896                         z += dzdx;
897                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
898                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
899                         //end edit -MM
900                         l += dldx;
901                         u += dudx;
902                         v += dvdx;
903                         z += dzdx;
904                         *((unsigned long *) dest) = destlong;
905                         dest += 4;
906                         x -= 4;
907                         j -= 4;
908                         }
909
910                 while (x-- > 0)
911                         {
912                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
913                         *dest++ = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
914                         //end edit -MM
915                         l += dldx;
916                         u += dudx;
917                         v += dvdx;
918                         z += dzdx;
919                         }
920
921         } else {
922                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
923                         c = (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
924                         if ( c!=TRANSPARENCY_COLOR)
925                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
926                                 *dest = gr_fade_table[ (l&(0x7f00)) + c ];
927                         //end edit -MM
928                         dest++;
929                         l += dldx;
930                         u += dudx;
931                         v += dvdx;
932                         z += dzdx;
933                 }
934         }
935 }
936
937 #else
938 void c_tmap_scanline_per()
939 {
940         ubyte *dest;
941         uint c;
942         int x;
943         fix u,v,z,l,dudx, dvdx, dzdx, dldx;
944
945         u = fx_u;
946         v = fx_v*64;
947         z = fx_z;
948         dudx = fx_du_dx; 
949         dvdx = fx_dv_dx*64; 
950         dzdx = fx_dz_dx;
951
952         l = fx_l>>8;
953         dldx = fx_dl_dx>>8;
954         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
955
956         if (!Transparency_on)   {
957                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
958                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
959                         *dest++ = gr_fade_table[ (l&(0x7f00)) + (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
960                         //end edit -MM
961                         l += dldx;
962                         u += dudx;
963                         v += dvdx;
964                         z += dzdx;
965                 }
966         } else {
967                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
968                         c = (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
969                         if ( c!=255)
970                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
971                                 *dest = gr_fade_table[ (l&(0x7f00)) + c ];
972                         //end edit -MM
973                         dest++;
974                         l += dldx;
975                         u += dudx;
976                         v += dvdx;
977                         z += dzdx;
978                 }
979         }
980 }
981
982 #endif
983
984 void (*cur_tmap_scanline_per)(void);
985 void (*cur_tmap_scanline_per_nolight)(void);
986 void (*cur_tmap_scanline_lin)(void);
987 void (*cur_tmap_scanline_lin_nolight)(void);
988 void (*cur_tmap_scanline_flat)(void);
989 void (*cur_tmap_scanline_shaded)(void);
990
991 //runtime selection of optimized tmappers.  12/07/99  Matthew Mueller
992 //the reason I did it this way rather than having a *tmap_funcs that then points to a c_tmap or fp_tmap struct thats already filled in, is to avoid a second pointer dereference.
993 void select_tmap(char *type){
994         if (!type){
995 #ifndef NO_ASM
996 #if defined(__pentiumpro__)
997                 select_tmap("ppro");
998 #elif defined(__pentium__)
999                 select_tmap("pent");
1000 #else
1001                 select_tmap("i386");
1002 #endif
1003 #else
1004                 select_tmap("c");
1005 #endif
1006                 return;
1007         }
1008 #ifndef NO_ASM
1009         if (stricmp(type,"i386")==0){
1010                 cur_tmap_scanline_per=asm_tmap_scanline_per;
1011                 cur_tmap_scanline_per_nolight=asm_tmap_scanline_per;
1012                 cur_tmap_scanline_lin=asm_tmap_scanline_lin_lighted;
1013                 cur_tmap_scanline_lin_nolight=asm_tmap_scanline_lin;
1014                 cur_tmap_scanline_flat=asm_tmap_scanline_flat;
1015                 cur_tmap_scanline_shaded=asm_tmap_scanline_shaded;
1016         }
1017         else if (stricmp(type,"pent")==0){
1018                 cur_tmap_scanline_per=asm_pent_tmap_scanline_per;
1019                 cur_tmap_scanline_per_nolight=asm_pent_tmap_scanline_per;
1020                 cur_tmap_scanline_lin=asm_tmap_scanline_lin_lighted;
1021                 cur_tmap_scanline_lin_nolight=asm_tmap_scanline_lin;
1022                 cur_tmap_scanline_flat=asm_tmap_scanline_flat;
1023                 cur_tmap_scanline_shaded=asm_tmap_scanline_shaded;
1024         }
1025         else if (stricmp(type,"ppro")==0){
1026                 cur_tmap_scanline_per=asm_ppro_tmap_scanline_per;
1027                 cur_tmap_scanline_per_nolight=asm_ppro_tmap_scanline_per;
1028                 cur_tmap_scanline_lin=asm_tmap_scanline_lin_lighted;
1029                 cur_tmap_scanline_lin_nolight=asm_tmap_scanline_lin;
1030                 cur_tmap_scanline_flat=asm_tmap_scanline_flat;
1031                 cur_tmap_scanline_shaded=asm_tmap_scanline_shaded;
1032         }
1033         else
1034 #endif
1035         if (stricmp(type,"fp")==0){
1036                 cur_tmap_scanline_per=c_fp_tmap_scanline_per;
1037                 cur_tmap_scanline_per_nolight=c_fp_tmap_scanline_per_nolight;
1038                 cur_tmap_scanline_lin=c_tmap_scanline_lin;
1039                 cur_tmap_scanline_lin_nolight=c_tmap_scanline_lin_nolight;
1040                 cur_tmap_scanline_flat=c_tmap_scanline_flat;
1041                 cur_tmap_scanline_shaded=c_tmap_scanline_shaded;
1042         }
1043         else {
1044                 if (stricmp(type,"c")!=0)
1045                         printf("unknown tmap requested, using c tmap\n");
1046                 cur_tmap_scanline_per=c_tmap_scanline_per;
1047                 cur_tmap_scanline_per_nolight=c_tmap_scanline_per_nolight;
1048                 cur_tmap_scanline_lin=c_tmap_scanline_lin;
1049                 cur_tmap_scanline_lin_nolight=c_tmap_scanline_lin_nolight;
1050                 cur_tmap_scanline_flat=c_tmap_scanline_flat;
1051                 cur_tmap_scanline_shaded=c_tmap_scanline_shaded;
1052         }
1053 }