Finished moving stuff to arch/blah. I know, it's ugly, but It'll be easier to sync...
[btb/d2x.git] / texmap / scanline.c
1 /*
2 THE COMPUTER CODE CONTAINED HEREIN IS THE SOLE PROPERTY OF PARALLAX
3 SOFTWARE CORPORATION ("PARALLAX").  PARALLAX, IN DISTRIBUTING THE CODE TO
4 END-USERS, AND SUBJECT TO ALL OF THE TERMS AND CONDITIONS HEREIN, GRANTS A
5 ROYALTY-FREE, PERPETUAL LICENSE TO SUCH END-USERS FOR USE BY SUCH END-USERS
6 IN USING, DISPLAYING,  AND CREATING DERIVATIVE WORKS THEREOF, SO LONG AS
7 SUCH USE, DISPLAY OR CREATION IS FOR NON-COMMERCIAL, ROYALTY OR REVENUE
8 FREE PURPOSES.  IN NO EVENT SHALL THE END-USER USE THE COMPUTER CODE
9 CONTAINED HEREIN FOR REVENUE-BEARING PURPOSES.  THE END-USER UNDERSTANDS
10 AND AGREES TO THE TERMS HEREIN AND ACCEPTS THE SAME BY USE OF THIS FILE.  
11 COPYRIGHT 1993-1998 PARALLAX SOFTWARE CORPORATION.  ALL RIGHTS RESERVED.
12 */
13 /*
14  * $Source: /cvs/cvsroot/d2x/texmap/scanline.c,v $
15  * $Revision: 1.4 $
16  * $Author: bradleyb $
17  * $Date: 2001-10-25 08:25:34 $
18  * 
19  * Routines to draw the texture mapped scanlines.
20  * 
21  * $Log: not supported by cvs2svn $
22  * Revision 1.3  2001/10/25 02:22:46  bradleyb
23  * adding support for runtime selection of tmap funcs
24  *
25  * Revision 1.2  2001/01/31 15:18:04  bradleyb
26  * Makefile and conf.h fixes
27  *
28  * Revision 1.1.1.1  2001/01/19 03:30:16  bradleyb
29  * Import of d2x-0.0.8
30  *
31  * Revision 1.4  1999/10/18 00:31:01  donut
32  * more alpha fixes from Falk Hueffner
33  *
34  * Revision 1.3  1999/10/14 04:48:21  donut
35  * alpha fixes, and gl_font args
36  *
37  * Revision 1.2  1999/09/21 07:22:40  sekmu
38  * remove unused var warning
39  *
40  * Revision 1.1.1.1  1999/06/14 22:14:08  donut
41  * Import of d1x 1.37 source.
42  *
43  * Revision 1.2  1995/02/20  18:23:39  john
44  * Added new module for C versions of inner loops.
45  * 
46  * Revision 1.1  1995/02/20  17:42:27  john
47  * Initial revision
48  * 
49  * 
50  */
51
52 #ifdef HAVE_CONFIG_H
53 #include <conf.h>
54 #endif
55
56 #ifdef RCS
57 static char rcsid[] = "$Id: scanline.c,v 1.4 2001-10-25 08:25:34 bradleyb Exp $";
58 #endif
59
60 #include <math.h>
61 #include <limits.h>
62 #include <stdio.h>
63 #include <stdlib.h>
64 #include <string.h>
65
66 #include "maths.h"
67 #include "mono.h"
68 #include "gr.h"
69 #include "grdef.h"
70 #include "texmap.h"
71 #include "texmapl.h"
72 #include "scanline.h"
73 #include "strutil.h"
74
75 void c_tmap_scanline_flat()
76 {
77         ubyte *dest;
78 //        int x;
79
80         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y )  );
81
82 /*      for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
83                 *dest++ = tmap_flat_color;
84         }*/
85         memset(dest,tmap_flat_color,fx_xright-fx_xleft+1);
86 }
87
88 void c_tmap_scanline_shaded()
89 {
90         int fade;
91         ubyte *dest, tmp;
92         int x;
93
94         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
95
96         fade = tmap_flat_shade_value<<8;
97         for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
98                 tmp = *dest;
99                 *dest++ = gr_fade_table[ fade |(tmp)];
100         }
101 }
102
103 void c_tmap_scanline_lin_nolight()
104 {
105         ubyte *dest;
106         uint c;
107         int x;
108         fix u,v,dudx, dvdx;
109
110         u = fx_u;
111         v = fx_v*64;
112         dudx = fx_du_dx; 
113         dvdx = fx_dv_dx*64; 
114
115         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
116
117         if (!Transparency_on)   {
118                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
119                         *dest++ = (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
120                         u += dudx;
121                         v += dvdx;
122                 }
123         } else {
124                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
125                         c = (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
126                         if ( c!=255)
127                                 *dest = c;
128                         dest++;
129                         u += dudx;
130                         v += dvdx;
131                 }
132         }
133 }
134
135
136 #if 1
137 void c_tmap_scanline_lin()
138 {
139         ubyte *dest;
140         uint c;
141         int x, j;
142         fix u,v,l,dudx, dvdx, dldx;
143
144         u = fx_u;
145         v = fx_v*64;
146         dudx = fx_du_dx; 
147         dvdx = fx_dv_dx*64; 
148
149         l = fx_l>>8;
150         dldx = fx_dl_dx>>8;
151         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
152
153         if (!Transparency_on)   {
154                 ubyte*                  pixPtrLocalCopy = pixptr;
155                 ubyte*                  fadeTableLocalCopy = gr_fade_table;
156                 unsigned long   destlong;
157
158                 x = fx_xright-fx_xleft+1;
159
160                 if ((j = (unsigned long) dest & 3) != 0)
161                         {
162                         j = 4 - j;
163
164                         if (j > x)
165                                 j = x;
166
167                         while (j > 0)
168                                 {       
169                                 //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
170                                 *dest++ = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ];
171                                 //end edit -MM
172                                 l += dldx;
173                                 u += dudx;
174                                 v += dvdx;
175                                 x--;
176                                 j--;
177                                 }
178                         }
179
180                 j &= ~3;
181                 while (j > 0)
182                         {
183                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
184                         destlong = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ] << 24;
185                         //end edit -MM
186                         l += dldx;
187                         u += dudx;
188                         v += dvdx;
189                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
190                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ] << 16;
191                         //end edit -MM
192                         l += dldx;
193                         u += dudx;
194                         v += dvdx;
195                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
196                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ] << 8;
197                         //end edit -MM
198                         l += dldx;
199                         u += dudx;
200                         v += dvdx;
201                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
202                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ];
203                         //end edit -MM
204                         l += dldx;
205                         u += dudx;
206                         v += dvdx;
207                         *((unsigned long *) dest) = destlong;
208                         dest += 4;
209                         x -= 4;
210                         j -= 4;
211                         }
212
213                 while (x-- > 0)
214                         {
215                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
216                         *dest++ = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ];
217                         //end edit -MM
218                         l += dldx;
219                         u += dudx;
220                         v += dvdx;
221                         }
222
223         } else {
224                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
225                         c = (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
226                         if ( c!=TRANSPARENCY_COLOR)
227                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
228                                 *dest = gr_fade_table[ (l&(0x7f00)) + c ];
229                         //end edit -MM
230                         dest++;
231                         l += dldx;
232                         u += dudx;
233                         v += dvdx;
234                 }
235         }
236 }
237
238 #else
239 void c_tmap_scanline_lin()
240 {
241         ubyte *dest;
242         uint c;
243         int x;
244         fix u,v,l,dudx, dvdx, dldx;
245
246         u = fx_u;
247         v = fx_v*64;
248         dudx = fx_du_dx; 
249         dvdx = fx_dv_dx*64; 
250
251         l = fx_l>>8;
252         dldx = fx_dl_dx>>8;
253         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
254
255         if (!Transparency_on)   {
256                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
257                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
258                         *dest++ = gr_fade_table[ (l&(0x7f00)) + (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ];
259                         //end edit -MM
260                         l += dldx;
261                         u += dudx;
262                         v += dvdx;
263                 }
264         } else {
265                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
266                         c = (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
267                         if ( c!=255)
268                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
269                                 *dest = gr_fade_table[ (l&(0x7f00)) + c ];
270                         //end edit -MM
271                         dest++;
272                         l += dldx;
273                         u += dudx;
274                         v += dvdx;
275                 }
276         }
277 }
278 #endif
279
280 // Used for energy centers. See comments for c_tmap_scanline_per().
281 void c_fp_tmap_scanline_per_nolight()
282 {
283         ubyte          *dest;
284         uint            c;
285         int             x, j;
286         double          u, v, z, dudx, dvdx, dzdx, rec_z;
287         u_int64_t       destlong;
288
289         u = f2db(fx_u);
290         v = f2db(fx_v) * 64.0;
291         z = f2db(fx_z);
292         dudx = f2db(fx_du_dx);
293         dvdx = f2db(fx_dv_dx) * 64.0;
294         dzdx = f2db(fx_dz_dx);
295
296         rec_z = 1.0 / z;
297
298         dest = (ubyte *) (write_buffer + fx_xleft + (bytes_per_row * fx_y));
299
300         x = fx_xright - fx_xleft + 1;
301         if (!Transparency_on) {
302                 if (x >= 8) {
303                         if ((j = (size_t) dest & 7) != 0) {
304                                 j = 8 - j;
305
306                                 while (j > 0) {
307                                         *dest++ =
308                                             (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
309                                                          (((int) (u * rec_z)) & 63)];
310                                         u += dudx;
311                                         v += dvdx;
312                                         z += dzdx;
313                                         rec_z = 1.0 / z;
314                                         x--;
315                                         j--;
316                                 }
317                         }
318
319                         while (j >= 8) {
320                                 destlong =
321                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
322                                                        (((int) (u * rec_z)) & 63)];
323                                 u += dudx;
324                                 v += dvdx;
325                                 z += dzdx;
326                                 rec_z = 1.0 / z;
327                                 destlong |=
328                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
329                                                        (((int) (u * rec_z)) & 63)] << 8;
330                                 u += dudx;
331                                 v += dvdx;
332                                 z += dzdx;
333                                 rec_z = 1.0 / z;
334                                 destlong |=
335                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
336                                                        (((int) (u * rec_z)) & 63)] << 16;
337                                 u += dudx;
338                                 v += dvdx;
339                                 z += dzdx;
340                                 rec_z = 1.0 / z;
341                                 destlong |=
342                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
343                                                        (((int) (u * rec_z)) & 63)] << 24;
344                                 u += dudx;
345                                 v += dvdx;
346                                 z += dzdx;
347                                 rec_z = 1.0 / z;
348                                 destlong |=
349                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
350                                                        (((int) (u * rec_z)) & 63)] << 32;
351                                 u += dudx;
352                                 v += dvdx;
353                                 z += dzdx;
354                                 rec_z = 1.0 / z;
355                                 destlong |=
356                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
357                                                        (((int) (u * rec_z)) & 63)] << 40;
358                                 u += dudx;
359                                 v += dvdx;
360                                 z += dzdx;
361                                 rec_z = 1.0 / z;
362                                 destlong |=
363                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
364                                                        (((int) (u * rec_z)) & 63)] << 48;
365                                 u += dudx;
366                                 v += dvdx;
367                                 z += dzdx;
368                                 rec_z = 1.0 / z;
369                                 destlong |=
370                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
371                                                        (((int) (u * rec_z)) & 63)] << 56;
372                                 u += dudx;
373                                 v += dvdx;
374                                 z += dzdx;
375                                 rec_z = 1.0 / z;
376
377                                 *((u_int64_t *) dest) = destlong;
378                                 dest += 8;
379                                 x -= 8;
380                                 j -= 8;
381                         }
382                 }
383                 while (x-- > 0) {
384                         *dest++ =
385                             (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
386                                                (((int) (u * rec_z)) & 63)];
387                         u += dudx;
388                         v += dvdx;
389                         z += dzdx;
390                         rec_z = 1.0 / z;
391                 }
392         } else {
393                 x = fx_xright - fx_xleft + 1;
394
395                 if (x >= 8) {
396                         if ((j = (size_t) dest & 7) != 0) {
397                                 j = 8 - j;
398
399                                 while (j > 0) {
400                                         c =
401                                             (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
402                                                          (((int) (u * rec_z)) & 63)];
403                                         if (c != 255)
404                                                 *dest = c;
405                                         dest++;
406                                         u += dudx;
407                                         v += dvdx;
408                                         z += dzdx;
409                                         rec_z = 1.0 / z;
410                                         x--;
411                                         j--;
412                                 }
413                         }
414
415                         j = x;
416                         while (j >= 8) {
417                                 destlong = *((u_int64_t *) dest);
418                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
419                                                   (((int) (u * rec_z)) & 63)];
420                                 if (c != 255) {
421                                         destlong &= ~(u_int64_t)0xFF;
422                                         destlong |= (u_int64_t) c;
423                                 }
424                                 u += dudx;
425                                 v += dvdx;
426                                 z += dzdx;
427                                 rec_z = 1.0 / z;
428                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
429                                                   (((int) (u * rec_z)) & 63)];
430                                 if (c != 255) {
431                                         destlong &= ~((u_int64_t)0xFF << 8);
432                                         destlong |= (u_int64_t) c << 8;
433                                 }
434                                 u += dudx;
435                                 v += dvdx;
436                                 z += dzdx;
437                                 rec_z = 1.0 / z;
438                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
439                                                   (((int) (u * rec_z)) & 63)];
440                                 if (c != 255) {
441                                         destlong &= ~((u_int64_t)0xFF << 16);
442                                         destlong |= (u_int64_t) c << 16;
443                                 }
444                                 u += dudx;
445                                 v += dvdx;
446                                 z += dzdx;
447                                 rec_z = 1.0 / z;
448                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
449                                                   (((int) (u * rec_z)) & 63)];
450                                 if (c != 255) {
451                                         destlong &= ~((u_int64_t)0xFF << 24);
452                                         destlong |= (u_int64_t) c << 24;
453                                 }
454                                 u += dudx;
455                                 v += dvdx;
456                                 z += dzdx;
457                                 rec_z = 1.0 / z;
458                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
459                                                   (((int) (u * rec_z)) & 63)];
460                                 if (c != 255) {
461                                         destlong &= ~((u_int64_t)0xFF << 32);
462                                         destlong |= (u_int64_t) c << 32;
463                                 }
464                                 u += dudx;
465                                 v += dvdx;
466                                 z += dzdx;
467                                 rec_z = 1.0 / z;
468                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
469                                                   (((int) (u * rec_z)) & 63)];
470                                 if (c != 255) {
471                                         destlong &= ~((u_int64_t)0xFF << 40);
472                                         destlong |= (u_int64_t) c << 40;
473                                 }
474                                 u += dudx;
475                                 v += dvdx;
476                                 z += dzdx;
477                                 rec_z = 1.0 / z;
478                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
479                                                   (((int) (u * rec_z)) & 63)];
480                                 if (c != 255) {
481                                         destlong &= ~((u_int64_t)0xFF << 48);
482                                         destlong |= (u_int64_t) c << 48;
483                                 }
484                                 u += dudx;
485                                 v += dvdx;
486                                 z += dzdx;
487                                 rec_z = 1.0 / z;
488                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
489                                                   (((int) (u * rec_z)) & 63)];
490                                 if (c != 255) {
491                                         destlong &= ~((u_int64_t)0xFF << 56);
492                                         destlong |= (u_int64_t) c << 56;
493                                 }
494                                 u += dudx;
495                                 v += dvdx;
496                                 z += dzdx;
497                                 rec_z = 1.0 / z;
498
499                                 *((u_int64_t *) dest) = destlong;
500                                 dest += 8;
501                                 x -= 8;
502                                 j -= 8;
503                         }
504                 }
505                 while (x-- > 0) {
506                         c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
507                                           (((int) (u * rec_z)) & 63)];
508                         if (c != 255)
509                                 *dest = c;
510                         dest++;
511                         u += dudx;
512                         v += dvdx;
513                         z += dzdx;
514                         rec_z = 1.0 / z;
515                 }
516         }
517 }
518
519 void c_tmap_scanline_per_nolight()
520 {
521         ubyte *dest;
522         uint c;
523         int x;
524         fix u,v,z,dudx, dvdx, dzdx;
525
526         u = fx_u;
527         v = fx_v*64;
528         z = fx_z;
529         dudx = fx_du_dx; 
530         dvdx = fx_dv_dx*64; 
531         dzdx = fx_dz_dx;
532
533         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
534
535         if (!Transparency_on)   {
536                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
537                         *dest++ = (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
538                         u += dudx;
539                         v += dvdx;
540                         z += dzdx;
541                 }
542         } else {
543                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
544                         c = (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
545                         if ( c!=255)
546                                 *dest = c;
547                         dest++;
548                         u += dudx;
549                         v += dvdx;
550                         z += dzdx;
551                 }
552         }
553 }
554
555 // This texture mapper uses floating point extensively and writes 8 pixels at once, so it likely works
556 // best on 64 bit RISC processors.
557 // WARNING: it is not endian clean. For big endian, reverse the shift counts in the unrolled loops. I
558 // have no means to test that, so I didn't try it. Please tell me if you get this to work on a big
559 // endian machine.
560 // If you're using an Alpha, use the Compaq compiler for this file for quite some fps more.
561 // Unfortunately, it won't compile the whole source, so simply compile everything, change the
562 // compiler to ccc, remove scanline.o and compile again.
563 // Please send comments/suggestions to falk.hueffner@student.uni-tuebingen.de.
564 void c_fp_tmap_scanline_per()
565 {
566         ubyte          *dest;
567         uint            c;
568         int             x, j;
569         double          u, v, z, l, dudx, dvdx, dzdx, dldx, rec_z;
570         u_int64_t       destlong;
571
572         u = f2db(fx_u);
573         v = f2db(fx_v) * 64.0;
574         z = f2db(fx_z);
575         l = f2db(fx_l);
576         dudx = f2db(fx_du_dx);
577         dvdx = f2db(fx_dv_dx) * 64.0;
578         dzdx = f2db(fx_dz_dx);
579         dldx = f2db(fx_dl_dx);
580
581         rec_z = 1.0 / z; // gcc 2.95.2 is won't do this optimization itself
582
583         dest = (ubyte *) (write_buffer + fx_xleft + (bytes_per_row * fx_y));
584         x = fx_xright - fx_xleft + 1;
585
586         if (!Transparency_on) {
587                 if (x >= 8) {
588                         if ((j = (size_t) dest & 7) != 0) {
589                                 j = 8 - j;
590
591                                 while (j > 0) {
592                                         *dest++ =
593                                             gr_fade_table[((int) fabs(l)) * 256 +
594                                                           (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
595                                                                         (((int) (u * rec_z)) & 63)]];
596                                         l += dldx;
597                                         u += dudx;
598                                         v += dvdx;
599                                         z += dzdx;
600                                         rec_z = 1.0 / z;
601                                         x--;
602                                         j--;
603                                 }
604                         }
605
606                         j = x;
607                         while (j >= 8) {
608                                 destlong =
609                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
610                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
611                                                                             (((int) (u * rec_z)) & 63)]];
612                                 l += dldx;
613                                 u += dudx;
614                                 v += dvdx;
615                                 z += dzdx;
616                                 rec_z = 1.0 / z;
617                                 destlong |=
618                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
619                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
620                                                                             (((int) (u * rec_z)) & 63)]] << 8;
621                                 l += dldx;
622                                 u += dudx;
623                                 v += dvdx;
624                                 z += dzdx;
625                                 rec_z = 1.0 / z;
626                                 destlong |=
627                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
628                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
629                                                                             (((int) (u * rec_z)) & 63)]] << 16;
630                                 l += dldx;
631                                 u += dudx;
632                                 v += dvdx;
633                                 z += dzdx;
634                                 rec_z = 1.0 / z;
635                                 destlong |=
636                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
637                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
638                                                                             (((int) (u * rec_z)) & 63)]] << 24;
639                                 l += dldx;
640                                 u += dudx;
641                                 v += dvdx;
642                                 z += dzdx;
643                                 rec_z = 1.0 / z;
644                                 destlong |=
645                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
646                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
647                                                                             (((int) (u * rec_z)) & 63)]] << 32;
648                                 l += dldx;
649                                 u += dudx;
650                                 v += dvdx;
651                                 z += dzdx;
652                                 rec_z = 1.0 / z;
653                                 destlong |=
654                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
655                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
656                                                                             (((int) (u * rec_z)) & 63)]] << 40;
657                                 l += dldx;
658                                 u += dudx;
659                                 v += dvdx;
660                                 z += dzdx;
661                                 rec_z = 1.0 / z;
662                                 destlong |=
663                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
664                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
665                                                                             (((int) (u * rec_z)) & 63)]] << 48;
666                                 l += dldx;
667                                 u += dudx;
668                                 v += dvdx;
669                                 z += dzdx;
670                                 rec_z = 1.0 / z;
671                                 destlong |=
672                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
673                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
674                                                                             (((int) (u * rec_z)) & 63)]] << 56;
675                                 l += dldx;
676                                 u += dudx;
677                                 v += dvdx;
678                                 z += dzdx;
679                                 rec_z = 1.0 / z;
680
681                                 *((u_int64_t *) dest) = destlong;
682                                 dest += 8;
683                                 x -= 8;
684                                 j -= 8;
685                         }
686                 }
687                 while (x-- > 0) {
688                         *dest++ =
689                             gr_fade_table[((int) fabs(l)) * 256 +
690                                           (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]];
691                         l += dldx;
692                         u += dudx;
693                         v += dvdx;
694                         z += dzdx;
695                         rec_z = 1.0 / z;
696                 }
697         } else {
698                 if (x >= 8) {
699                         if ((j = (size_t) dest & 7) != 0) {
700                                 j = 8 - j;
701
702                                 while (j > 0) {
703                                         c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
704                                         if (c != 255)
705                                                 *dest = gr_fade_table[((int) fabs(l)) * 256 + c];
706                                         dest++;
707                                         l += dldx;
708                                         u += dudx;
709                                         v += dvdx;
710                                         z += dzdx;
711                                         rec_z = 1.0 / z;
712                                         x--;
713                                         j--;
714                                 }
715                         }
716
717                         j = x;
718                         while (j >= 8) {
719                                 destlong = *((u_int64_t *) dest);
720                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
721                                 if (c != 255) {
722                                         destlong &= ~(u_int64_t)0xFF;
723                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c];
724                                 }
725                                 l += dldx;
726                                 u += dudx;
727                                 v += dvdx;
728                                 z += dzdx;
729                                 rec_z = 1.0 / z;
730                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
731                                 if (c != 255) {
732                                         destlong &= ~((u_int64_t)0xFF << 8);
733                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 8;
734                                 }
735                                 l += dldx;
736                                 u += dudx;
737                                 v += dvdx;
738                                 z += dzdx;
739                                 rec_z = 1.0 / z;
740                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
741                                 if (c != 255) {
742                                         destlong &= ~((u_int64_t)0xFF << 16);
743                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 16;
744                                 }
745                                 l += dldx;
746                                 u += dudx;
747                                 v += dvdx;
748                                 z += dzdx;
749                                 rec_z = 1.0 / z;
750                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
751                                 if (c != 255) {
752                                         destlong &= ~((u_int64_t)0xFF << 24);
753                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 24;
754                                 }
755                                 l += dldx;
756                                 u += dudx;
757                                 v += dvdx;
758                                 z += dzdx;
759                                 rec_z = 1.0 / z;
760                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
761                                 if (c != 255) {
762                                         destlong &= ~((u_int64_t)0xFF << 32);
763                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 32;
764                                 }
765                                 l += dldx;
766                                 u += dudx;
767                                 v += dvdx;
768                                 z += dzdx;
769                                 rec_z = 1.0 / z;
770                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
771                                 if (c != 255) {
772                                         destlong &= ~((u_int64_t)0xFF << 40);
773                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 40;
774                                 }
775                                 l += dldx;
776                                 u += dudx;
777                                 v += dvdx;
778                                 z += dzdx;
779                                 rec_z = 1.0 / z;
780                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
781                                 if (c != 255) {
782                                         destlong &= ~((u_int64_t)0xFF << 48);
783                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 48;
784                                 }
785                                 l += dldx;
786                                 u += dudx;
787                                 v += dvdx;
788                                 z += dzdx;
789                                 rec_z = 1.0 / z;
790                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
791                                 if (c != 255) {
792                                         destlong &= ~((u_int64_t)0xFF << 56);
793                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 56;
794                                 }
795                                 l += dldx;
796                                 u += dudx;
797                                 v += dvdx;
798                                 z += dzdx;
799                                 rec_z = 1.0 / z;
800
801                                 *((u_int64_t *) dest) = destlong;
802                                 dest += 8;
803                                 x -= 8;
804                                 j -= 8;
805                         }
806                 }
807                 while (x-- > 0) {
808                         c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
809                         if (c != 255)
810                                 *dest = gr_fade_table[((int) fabs(l)) * 256 + c];
811                         dest++;
812                         l += dldx;
813                         u += dudx;
814                         v += dvdx;
815                         z += dzdx;
816                         rec_z = 1.0 / z;
817                 }
818         }
819 }
820
821 #if 1
822 // note the unrolling loop is broken. It is never called, and uses big endian. -- FH
823 void c_tmap_scanline_per()
824 {
825         ubyte *dest;
826         uint c;
827         int x, j;
828         fix l,u,v,z;
829         fix dudx, dvdx, dzdx, dldx;
830
831         u = fx_u;
832         v = fx_v*64;
833         z = fx_z;
834         dudx = fx_du_dx; 
835         dvdx = fx_dv_dx*64; 
836         dzdx = fx_dz_dx;
837
838         l = fx_l>>8;
839         dldx = fx_dl_dx>>8;
840         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
841
842         if (!Transparency_on)   {
843                 ubyte*                  pixPtrLocalCopy = pixptr;
844                 ubyte*                  fadeTableLocalCopy = gr_fade_table;
845                 unsigned long   destlong;
846
847                 x = fx_xright-fx_xleft+1;
848
849                 if ((j = (unsigned long) dest & 3) != 0)
850                         {
851                         j = 4 - j;
852
853                         if (j > x)
854                                 j = x;
855
856                         while (j > 0)
857                                 {       
858                                 //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
859                                 *dest++ = fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
860                                 //end edit -MM
861                                 l += dldx;
862                                 u += dudx;
863                                 v += dvdx;
864                                 z += dzdx;
865                                 x--;
866                                 j--;
867                                 }
868                         }
869
870                 j &= ~3;
871                 while (j > 0)
872                         {
873                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
874                         destlong = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ] << 24;
875                         //end edit -MM
876                         l += dldx;
877                         u += dudx;
878                         v += dvdx;
879                         z += dzdx;
880                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
881                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ] << 16;
882                         //end edit -MM
883                         l += dldx;
884                         u += dudx;
885                         v += dvdx;
886                         z += dzdx;
887                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
888                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ] << 8;
889                         //end edit -MM
890                         l += dldx;
891                         u += dudx;
892                         v += dvdx;
893                         z += dzdx;
894                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
895                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
896                         //end edit -MM
897                         l += dldx;
898                         u += dudx;
899                         v += dvdx;
900                         z += dzdx;
901                         *((unsigned long *) dest) = destlong;
902                         dest += 4;
903                         x -= 4;
904                         j -= 4;
905                         }
906
907                 while (x-- > 0)
908                         {
909                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
910                         *dest++ = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
911                         //end edit -MM
912                         l += dldx;
913                         u += dudx;
914                         v += dvdx;
915                         z += dzdx;
916                         }
917
918         } else {
919                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
920                         c = (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
921                         if ( c!=TRANSPARENCY_COLOR)
922                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
923                                 *dest = gr_fade_table[ (l&(0x7f00)) + c ];
924                         //end edit -MM
925                         dest++;
926                         l += dldx;
927                         u += dudx;
928                         v += dvdx;
929                         z += dzdx;
930                 }
931         }
932 }
933
934 #else
935 void c_tmap_scanline_per()
936 {
937         ubyte *dest;
938         uint c;
939         int x;
940         fix u,v,z,l,dudx, dvdx, dzdx, dldx;
941
942         u = fx_u;
943         v = fx_v*64;
944         z = fx_z;
945         dudx = fx_du_dx; 
946         dvdx = fx_dv_dx*64; 
947         dzdx = fx_dz_dx;
948
949         l = fx_l>>8;
950         dldx = fx_dl_dx>>8;
951         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
952
953         if (!Transparency_on)   {
954                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
955                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
956                         *dest++ = gr_fade_table[ (l&(0x7f00)) + (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
957                         //end edit -MM
958                         l += dldx;
959                         u += dudx;
960                         v += dvdx;
961                         z += dzdx;
962                 }
963         } else {
964                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
965                         c = (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
966                         if ( c!=255)
967                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
968                                 *dest = gr_fade_table[ (l&(0x7f00)) + c ];
969                         //end edit -MM
970                         dest++;
971                         l += dldx;
972                         u += dudx;
973                         v += dvdx;
974                         z += dzdx;
975                 }
976         }
977 }
978
979 #endif
980
981 void (*cur_tmap_scanline_per)(void);
982 void (*cur_tmap_scanline_per_nolight)(void);
983 void (*cur_tmap_scanline_lin)(void);
984 void (*cur_tmap_scanline_lin_nolight)(void);
985 void (*cur_tmap_scanline_flat)(void);
986 void (*cur_tmap_scanline_shaded)(void);
987
988 //runtime selection of optimized tmappers.  12/07/99  Matthew Mueller
989 //the reason I did it this way rather than having a *tmap_funcs that then points to a c_tmap or fp_tmap struct thats already filled in, is to avoid a second pointer dereference.
990 void select_tmap(char *type){
991         if (!type){
992 #ifndef NO_ASM
993                 select_tmap("i386");
994 #else
995                 select_tmap("c");
996 #endif
997                 return;
998         }
999 #ifndef NO_ASM
1000         if (stricmp(type,"i386")==0){
1001                 cur_tmap_scanline_per=asm_tmap_scanline_per;
1002                 cur_tmap_scanline_per_nolight=asm_tmap_scanline_per;
1003                 cur_tmap_scanline_lin=asm_tmap_scanline_lin_lighted;
1004                 cur_tmap_scanline_lin_nolight=asm_tmap_scanline_lin;
1005                 cur_tmap_scanline_flat=asm_tmap_scanline_flat;
1006                 cur_tmap_scanline_shaded=asm_tmap_scanline_shaded;
1007         }
1008         else if (stricmp(type,"pent")==0){
1009                 cur_tmap_scanline_per=asm_pent_tmap_scanline_per;
1010                 cur_tmap_scanline_per_nolight=asm_pent_tmap_scanline_per;
1011                 cur_tmap_scanline_lin=asm_tmap_scanline_lin_lighted;
1012                 cur_tmap_scanline_lin_nolight=asm_tmap_scanline_lin;
1013                 cur_tmap_scanline_flat=asm_tmap_scanline_flat;
1014                 cur_tmap_scanline_shaded=asm_tmap_scanline_shaded;
1015         }
1016         else if (stricmp(type,"ppro")==0){
1017                 cur_tmap_scanline_per=asm_ppro_tmap_scanline_per;
1018                 cur_tmap_scanline_per_nolight=asm_ppro_tmap_scanline_per;
1019                 cur_tmap_scanline_lin=asm_tmap_scanline_lin_lighted;
1020                 cur_tmap_scanline_lin_nolight=asm_tmap_scanline_lin;
1021                 cur_tmap_scanline_flat=asm_tmap_scanline_flat;
1022                 cur_tmap_scanline_shaded=asm_tmap_scanline_shaded;
1023         }
1024         else
1025 #endif
1026         if (stricmp(type,"fp")==0){
1027                 cur_tmap_scanline_per=c_fp_tmap_scanline_per;
1028                 cur_tmap_scanline_per_nolight=c_fp_tmap_scanline_per_nolight;
1029                 cur_tmap_scanline_lin=c_tmap_scanline_lin;
1030                 cur_tmap_scanline_lin_nolight=c_tmap_scanline_lin_nolight;
1031                 cur_tmap_scanline_flat=c_tmap_scanline_flat;
1032                 cur_tmap_scanline_shaded=c_tmap_scanline_shaded;
1033         }
1034         else {
1035                 if (stricmp(type,"c")!=0)
1036                         printf("unknown tmap requested, using c tmap\n");
1037                 cur_tmap_scanline_per=c_tmap_scanline_per;
1038                 cur_tmap_scanline_per_nolight=c_tmap_scanline_per_nolight;
1039                 cur_tmap_scanline_lin=c_tmap_scanline_lin;
1040                 cur_tmap_scanline_lin_nolight=c_tmap_scanline_lin_nolight;
1041                 cur_tmap_scanline_flat=c_tmap_scanline_flat;
1042                 cur_tmap_scanline_shaded=c_tmap_scanline_shaded;
1043         }
1044 }