divide negative window x-coordinates properly, fixing random crashes
[btb/d2x.git] / texmap / scanline.c
1 /* $Id: scanline.c,v 1.9 2006-12-02 12:52:41 chris Exp $ */
2 /*
3 THE COMPUTER CODE CONTAINED HEREIN IS THE SOLE PROPERTY OF PARALLAX
4 SOFTWARE CORPORATION ("PARALLAX").  PARALLAX, IN DISTRIBUTING THE CODE TO
5 END-USERS, AND SUBJECT TO ALL OF THE TERMS AND CONDITIONS HEREIN, GRANTS A
6 ROYALTY-FREE, PERPETUAL LICENSE TO SUCH END-USERS FOR USE BY SUCH END-USERS
7 IN USING, DISPLAYING,  AND CREATING DERIVATIVE WORKS THEREOF, SO LONG AS
8 SUCH USE, DISPLAY OR CREATION IS FOR NON-COMMERCIAL, ROYALTY OR REVENUE
9 FREE PURPOSES.  IN NO EVENT SHALL THE END-USER USE THE COMPUTER CODE
10 CONTAINED HEREIN FOR REVENUE-BEARING PURPOSES.  THE END-USER UNDERSTANDS
11 AND AGREES TO THE TERMS HEREIN AND ACCEPTS THE SAME BY USE OF THIS FILE.
12 COPYRIGHT 1993-1998 PARALLAX SOFTWARE CORPORATION.  ALL RIGHTS RESERVED.
13 */
14
15 /*
16  *
17  * Routines to draw the texture mapped scanlines.
18  *
19  */
20
21 #ifdef HAVE_CONFIG_H
22 #include <conf.h>
23 #endif
24
25 #ifdef RCS
26 static char rcsid[] = "$Id: scanline.c,v 1.9 2006-12-02 12:52:41 chris Exp $";
27 #endif
28
29 #include <math.h>
30 #include <limits.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34
35 #include "maths.h"
36 #include "mono.h"
37 #include "gr.h"
38 #include "grdef.h"
39 #include "texmap.h"
40 #include "texmapl.h"
41 #include "scanline.h"
42 #include "strutil.h"
43
44 void c_tmap_scanline_flat()
45 {
46         ubyte *dest;
47 //        int x;
48
49         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y )  );
50
51 /*      for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
52                 *dest++ = tmap_flat_color;
53         }*/
54         memset(dest,tmap_flat_color,fx_xright-fx_xleft+1);
55 }
56
57 void c_tmap_scanline_shaded()
58 {
59         int fade;
60         ubyte *dest, tmp;
61         int x;
62
63         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
64
65         fade = tmap_flat_shade_value<<8;
66         for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
67                 tmp = *dest;
68                 *dest++ = gr_fade_table[ fade |(tmp)];
69         }
70 }
71
72 void c_tmap_scanline_lin_nolight()
73 {
74         ubyte *dest;
75         uint c;
76         int x;
77         fix u,v,dudx, dvdx;
78
79         u = fx_u;
80         v = fx_v*64;
81         dudx = fx_du_dx; 
82         dvdx = fx_dv_dx*64; 
83
84         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
85
86         if (!Transparency_on)   {
87                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
88                         *dest++ = (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
89                         u += dudx;
90                         v += dvdx;
91                 }
92         } else {
93                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
94                         c = (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
95                         if ( c!=255)
96                                 *dest = c;
97                         dest++;
98                         u += dudx;
99                         v += dvdx;
100                 }
101         }
102 }
103
104
105 #if 1
106 void c_tmap_scanline_lin()
107 {
108         ubyte *dest;
109         uint c;
110         int x, j;
111         fix u,v,l,dudx, dvdx, dldx;
112
113         u = fx_u;
114         v = fx_v*64;
115         dudx = fx_du_dx; 
116         dvdx = fx_dv_dx*64; 
117
118         l = fx_l>>8;
119         dldx = fx_dl_dx/256; // fx_dl_dx>>8 != fx_dl_dx/256 for negative numbers
120         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
121
122         if (!Transparency_on)   {
123                 ubyte*                  pixPtrLocalCopy = pixptr;
124                 ubyte*                  fadeTableLocalCopy = gr_fade_table;
125                 unsigned long   destlong;
126
127                 x = fx_xright-fx_xleft+1;
128
129                 if ((j = (unsigned long) dest & 3) != 0)
130                         {
131                         j = 4 - j;
132
133                         if (j > x)
134                                 j = x;
135
136                         while (j > 0)
137                                 {       
138                                 //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
139                                 *dest++ = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ];
140                                 //end edit -MM
141                                 l += dldx;
142                                 u += dudx;
143                                 v += dvdx;
144                                 x--;
145                                 j--;
146                                 }
147                         }
148
149                 j &= ~3;
150                 while (j > 0)
151                         {
152                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
153                         destlong = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ] << 24;
154                         //end edit -MM
155                         l += dldx;
156                         u += dudx;
157                         v += dvdx;
158                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
159                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ] << 16;
160                         //end edit -MM
161                         l += dldx;
162                         u += dudx;
163                         v += dvdx;
164                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
165                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ] << 8;
166                         //end edit -MM
167                         l += dldx;
168                         u += dudx;
169                         v += dvdx;
170                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
171                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ];
172                         //end edit -MM
173                         l += dldx;
174                         u += dudx;
175                         v += dvdx;
176                         *((unsigned long *) dest) = destlong;
177                         dest += 4;
178                         x -= 4;
179                         j -= 4;
180                         }
181
182                 while (x-- > 0)
183                         {
184                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
185                         *dest++ = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint) pixPtrLocalCopy[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ];
186                         //end edit -MM
187                         l += dldx;
188                         u += dudx;
189                         v += dvdx;
190                         }
191
192         } else {
193                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
194                         c = (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
195                         if ( c!=TRANSPARENCY_COLOR)
196                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
197                                 *dest = gr_fade_table[ (l&(0x7f00)) + c ];
198                         //end edit -MM
199                         dest++;
200                         l += dldx;
201                         u += dudx;
202                         v += dvdx;
203                 }
204         }
205 }
206
207 #else
208 void c_tmap_scanline_lin()
209 {
210         ubyte *dest;
211         uint c;
212         int x;
213         fix u,v,l,dudx, dvdx, dldx;
214
215         u = fx_u;
216         v = fx_v*64;
217         dudx = fx_du_dx; 
218         dvdx = fx_dv_dx*64; 
219
220         l = fx_l>>8;
221         dldx = fx_dl_dx/256; // fx_dl_dx>>8 != fx_dl_dx/256 for negative numbers
222         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
223
224         if (!Transparency_on)   {
225                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
226                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
227                         *dest++ = gr_fade_table[ (l&(0x7f00)) + (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ] ];
228                         //end edit -MM
229                         l += dldx;
230                         u += dudx;
231                         v += dvdx;
232                 }
233         } else {
234                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
235                         c = (uint)pixptr[ (f2i(v)&(64*63)) + (f2i(u)&63) ];
236                         if ( c!=255)
237                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
238                                 *dest = gr_fade_table[ (l&(0x7f00)) + c ];
239                         //end edit -MM
240                         dest++;
241                         l += dldx;
242                         u += dudx;
243                         v += dvdx;
244                 }
245         }
246 }
247 #endif
248
249 // Used for energy centers. See comments for c_tmap_scanline_per().
250 void c_fp_tmap_scanline_per_nolight()
251 {
252         ubyte          *dest;
253         uint            c;
254         int             x, j;
255         double          u, v, z, dudx, dvdx, dzdx, rec_z;
256         u_int64_t       destlong;
257
258         u = f2db(fx_u);
259         v = f2db(fx_v) * 64.0;
260         z = f2db(fx_z);
261         dudx = f2db(fx_du_dx);
262         dvdx = f2db(fx_dv_dx) * 64.0;
263         dzdx = f2db(fx_dz_dx);
264
265         rec_z = 1.0 / z;
266
267         dest = (ubyte *) (write_buffer + fx_xleft + (bytes_per_row * fx_y));
268
269         x = fx_xright - fx_xleft + 1;
270         if (!Transparency_on) {
271                 if (x >= 8) {
272                         if ((j = (size_t) dest & 7) != 0) {
273                                 j = 8 - j;
274
275                                 while (j > 0) {
276                                         *dest++ =
277                                             (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
278                                                          (((int) (u * rec_z)) & 63)];
279                                         u += dudx;
280                                         v += dvdx;
281                                         z += dzdx;
282                                         rec_z = 1.0 / z;
283                                         x--;
284                                         j--;
285                                 }
286                         }
287
288                         while (j >= 8) {
289                                 destlong =
290                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
291                                                        (((int) (u * rec_z)) & 63)];
292                                 u += dudx;
293                                 v += dvdx;
294                                 z += dzdx;
295                                 rec_z = 1.0 / z;
296                                 destlong |=
297                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
298                                                        (((int) (u * rec_z)) & 63)] << 8;
299                                 u += dudx;
300                                 v += dvdx;
301                                 z += dzdx;
302                                 rec_z = 1.0 / z;
303                                 destlong |=
304                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
305                                                        (((int) (u * rec_z)) & 63)] << 16;
306                                 u += dudx;
307                                 v += dvdx;
308                                 z += dzdx;
309                                 rec_z = 1.0 / z;
310                                 destlong |=
311                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
312                                                        (((int) (u * rec_z)) & 63)] << 24;
313                                 u += dudx;
314                                 v += dvdx;
315                                 z += dzdx;
316                                 rec_z = 1.0 / z;
317                                 destlong |=
318                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
319                                                        (((int) (u * rec_z)) & 63)] << 32;
320                                 u += dudx;
321                                 v += dvdx;
322                                 z += dzdx;
323                                 rec_z = 1.0 / z;
324                                 destlong |=
325                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
326                                                        (((int) (u * rec_z)) & 63)] << 40;
327                                 u += dudx;
328                                 v += dvdx;
329                                 z += dzdx;
330                                 rec_z = 1.0 / z;
331                                 destlong |=
332                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
333                                                        (((int) (u * rec_z)) & 63)] << 48;
334                                 u += dudx;
335                                 v += dvdx;
336                                 z += dzdx;
337                                 rec_z = 1.0 / z;
338                                 destlong |=
339                                     (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
340                                                        (((int) (u * rec_z)) & 63)] << 56;
341                                 u += dudx;
342                                 v += dvdx;
343                                 z += dzdx;
344                                 rec_z = 1.0 / z;
345
346                                 *((u_int64_t *) dest) = destlong;
347                                 dest += 8;
348                                 x -= 8;
349                                 j -= 8;
350                         }
351                 }
352                 while (x-- > 0) {
353                         *dest++ =
354                             (u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
355                                                (((int) (u * rec_z)) & 63)];
356                         u += dudx;
357                         v += dvdx;
358                         z += dzdx;
359                         rec_z = 1.0 / z;
360                 }
361         } else {
362                 x = fx_xright - fx_xleft + 1;
363
364                 if (x >= 8) {
365                         if ((j = (size_t) dest & 7) != 0) {
366                                 j = 8 - j;
367
368                                 while (j > 0) {
369                                         c =
370                                             (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
371                                                          (((int) (u * rec_z)) & 63)];
372                                         if (c != 255)
373                                                 *dest = c;
374                                         dest++;
375                                         u += dudx;
376                                         v += dvdx;
377                                         z += dzdx;
378                                         rec_z = 1.0 / z;
379                                         x--;
380                                         j--;
381                                 }
382                         }
383
384                         j = x;
385                         while (j >= 8) {
386                                 destlong = *((u_int64_t *) dest);
387                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
388                                                   (((int) (u * rec_z)) & 63)];
389                                 if (c != 255) {
390                                         destlong &= ~(u_int64_t)0xFF;
391                                         destlong |= (u_int64_t) c;
392                                 }
393                                 u += dudx;
394                                 v += dvdx;
395                                 z += dzdx;
396                                 rec_z = 1.0 / z;
397                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
398                                                   (((int) (u * rec_z)) & 63)];
399                                 if (c != 255) {
400                                         destlong &= ~((u_int64_t)0xFF << 8);
401                                         destlong |= (u_int64_t) c << 8;
402                                 }
403                                 u += dudx;
404                                 v += dvdx;
405                                 z += dzdx;
406                                 rec_z = 1.0 / z;
407                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
408                                                   (((int) (u * rec_z)) & 63)];
409                                 if (c != 255) {
410                                         destlong &= ~((u_int64_t)0xFF << 16);
411                                         destlong |= (u_int64_t) c << 16;
412                                 }
413                                 u += dudx;
414                                 v += dvdx;
415                                 z += dzdx;
416                                 rec_z = 1.0 / z;
417                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
418                                                   (((int) (u * rec_z)) & 63)];
419                                 if (c != 255) {
420                                         destlong &= ~((u_int64_t)0xFF << 24);
421                                         destlong |= (u_int64_t) c << 24;
422                                 }
423                                 u += dudx;
424                                 v += dvdx;
425                                 z += dzdx;
426                                 rec_z = 1.0 / z;
427                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
428                                                   (((int) (u * rec_z)) & 63)];
429                                 if (c != 255) {
430                                         destlong &= ~((u_int64_t)0xFF << 32);
431                                         destlong |= (u_int64_t) c << 32;
432                                 }
433                                 u += dudx;
434                                 v += dvdx;
435                                 z += dzdx;
436                                 rec_z = 1.0 / z;
437                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
438                                                   (((int) (u * rec_z)) & 63)];
439                                 if (c != 255) {
440                                         destlong &= ~((u_int64_t)0xFF << 40);
441                                         destlong |= (u_int64_t) c << 40;
442                                 }
443                                 u += dudx;
444                                 v += dvdx;
445                                 z += dzdx;
446                                 rec_z = 1.0 / z;
447                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
448                                                   (((int) (u * rec_z)) & 63)];
449                                 if (c != 255) {
450                                         destlong &= ~((u_int64_t)0xFF << 48);
451                                         destlong |= (u_int64_t) c << 48;
452                                 }
453                                 u += dudx;
454                                 v += dvdx;
455                                 z += dzdx;
456                                 rec_z = 1.0 / z;
457                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
458                                                   (((int) (u * rec_z)) & 63)];
459                                 if (c != 255) {
460                                         destlong &= ~((u_int64_t)0xFF << 56);
461                                         destlong |= (u_int64_t) c << 56;
462                                 }
463                                 u += dudx;
464                                 v += dvdx;
465                                 z += dzdx;
466                                 rec_z = 1.0 / z;
467
468                                 *((u_int64_t *) dest) = destlong;
469                                 dest += 8;
470                                 x -= 8;
471                                 j -= 8;
472                         }
473                 }
474                 while (x-- > 0) {
475                         c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
476                                           (((int) (u * rec_z)) & 63)];
477                         if (c != 255)
478                                 *dest = c;
479                         dest++;
480                         u += dudx;
481                         v += dvdx;
482                         z += dzdx;
483                         rec_z = 1.0 / z;
484                 }
485         }
486 }
487
488 void c_tmap_scanline_per_nolight()
489 {
490         ubyte *dest;
491         uint c;
492         int x;
493         fix u,v,z,dudx, dvdx, dzdx;
494
495         u = fx_u;
496         v = fx_v*64;
497         z = fx_z;
498         dudx = fx_du_dx; 
499         dvdx = fx_dv_dx*64; 
500         dzdx = fx_dz_dx;
501
502         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
503
504         if (!Transparency_on)   {
505                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
506                         *dest++ = (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
507                         u += dudx;
508                         v += dvdx;
509                         z += dzdx;
510                 }
511         } else {
512                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
513                         c = (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
514                         if ( c!=255)
515                                 *dest = c;
516                         dest++;
517                         u += dudx;
518                         v += dvdx;
519                         z += dzdx;
520                 }
521         }
522 }
523
524 // This texture mapper uses floating point extensively and writes 8 pixels at once, so it likely works
525 // best on 64 bit RISC processors.
526 // WARNING: it is not endian clean. For big endian, reverse the shift counts in the unrolled loops. I
527 // have no means to test that, so I didn't try it. Please tell me if you get this to work on a big
528 // endian machine.
529 // If you're using an Alpha, use the Compaq compiler for this file for quite some fps more.
530 // Unfortunately, it won't compile the whole source, so simply compile everything, change the
531 // compiler to ccc, remove scanline.o and compile again.
532 // Please send comments/suggestions to falk.hueffner@student.uni-tuebingen.de.
533 void c_fp_tmap_scanline_per()
534 {
535         ubyte          *dest;
536         uint            c;
537         int             x, j;
538         double          u, v, z, l, dudx, dvdx, dzdx, dldx, rec_z;
539         u_int64_t       destlong;
540
541         u = f2db(fx_u);
542         v = f2db(fx_v) * 64.0;
543         z = f2db(fx_z);
544         l = f2db(fx_l);
545         dudx = f2db(fx_du_dx);
546         dvdx = f2db(fx_dv_dx) * 64.0;
547         dzdx = f2db(fx_dz_dx);
548         dldx = f2db(fx_dl_dx);
549
550         rec_z = 1.0 / z; // gcc 2.95.2 is won't do this optimization itself
551
552         dest = (ubyte *) (write_buffer + fx_xleft + (bytes_per_row * fx_y));
553         x = fx_xright - fx_xleft + 1;
554
555         if (!Transparency_on) {
556                 if (x >= 8) {
557                         if ((j = (size_t) dest & 7) != 0) {
558                                 j = 8 - j;
559
560                                 while (j > 0) {
561                                         *dest++ =
562                                             gr_fade_table[((int) fabs(l)) * 256 +
563                                                           (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
564                                                                         (((int) (u * rec_z)) & 63)]];
565                                         l += dldx;
566                                         u += dudx;
567                                         v += dvdx;
568                                         z += dzdx;
569                                         rec_z = 1.0 / z;
570                                         x--;
571                                         j--;
572                                 }
573                         }
574
575                         j = x;
576                         while (j >= 8) {
577                                 destlong =
578                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
579                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
580                                                                             (((int) (u * rec_z)) & 63)]];
581                                 l += dldx;
582                                 u += dudx;
583                                 v += dvdx;
584                                 z += dzdx;
585                                 rec_z = 1.0 / z;
586                                 destlong |=
587                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
588                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
589                                                                             (((int) (u * rec_z)) & 63)]] << 8;
590                                 l += dldx;
591                                 u += dudx;
592                                 v += dvdx;
593                                 z += dzdx;
594                                 rec_z = 1.0 / z;
595                                 destlong |=
596                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
597                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
598                                                                             (((int) (u * rec_z)) & 63)]] << 16;
599                                 l += dldx;
600                                 u += dudx;
601                                 v += dvdx;
602                                 z += dzdx;
603                                 rec_z = 1.0 / z;
604                                 destlong |=
605                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
606                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
607                                                                             (((int) (u * rec_z)) & 63)]] << 24;
608                                 l += dldx;
609                                 u += dudx;
610                                 v += dvdx;
611                                 z += dzdx;
612                                 rec_z = 1.0 / z;
613                                 destlong |=
614                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
615                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
616                                                                             (((int) (u * rec_z)) & 63)]] << 32;
617                                 l += dldx;
618                                 u += dudx;
619                                 v += dvdx;
620                                 z += dzdx;
621                                 rec_z = 1.0 / z;
622                                 destlong |=
623                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
624                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
625                                                                             (((int) (u * rec_z)) & 63)]] << 40;
626                                 l += dldx;
627                                 u += dudx;
628                                 v += dvdx;
629                                 z += dzdx;
630                                 rec_z = 1.0 / z;
631                                 destlong |=
632                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
633                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
634                                                                             (((int) (u * rec_z)) & 63)]] << 48;
635                                 l += dldx;
636                                 u += dudx;
637                                 v += dvdx;
638                                 z += dzdx;
639                                 rec_z = 1.0 / z;
640                                 destlong |=
641                                     (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 +
642                                                               (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) +
643                                                                             (((int) (u * rec_z)) & 63)]] << 56;
644                                 l += dldx;
645                                 u += dudx;
646                                 v += dvdx;
647                                 z += dzdx;
648                                 rec_z = 1.0 / z;
649
650                                 *((u_int64_t *) dest) = destlong;
651                                 dest += 8;
652                                 x -= 8;
653                                 j -= 8;
654                         }
655                 }
656                 while (x-- > 0) {
657                         *dest++ =
658                             gr_fade_table[((int) fabs(l)) * 256 +
659                                           (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]];
660                         l += dldx;
661                         u += dudx;
662                         v += dvdx;
663                         z += dzdx;
664                         rec_z = 1.0 / z;
665                 }
666         } else {
667                 if (x >= 8) {
668                         if ((j = (size_t) dest & 7) != 0) {
669                                 j = 8 - j;
670
671                                 while (j > 0) {
672                                         c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
673                                         if (c != 255)
674                                                 *dest = gr_fade_table[((int) fabs(l)) * 256 + c];
675                                         dest++;
676                                         l += dldx;
677                                         u += dudx;
678                                         v += dvdx;
679                                         z += dzdx;
680                                         rec_z = 1.0 / z;
681                                         x--;
682                                         j--;
683                                 }
684                         }
685
686                         j = x;
687                         while (j >= 8) {
688                                 destlong = *((u_int64_t *) dest);
689                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
690                                 if (c != 255) {
691                                         destlong &= ~(u_int64_t)0xFF;
692                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c];
693                                 }
694                                 l += dldx;
695                                 u += dudx;
696                                 v += dvdx;
697                                 z += dzdx;
698                                 rec_z = 1.0 / z;
699                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
700                                 if (c != 255) {
701                                         destlong &= ~((u_int64_t)0xFF << 8);
702                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 8;
703                                 }
704                                 l += dldx;
705                                 u += dudx;
706                                 v += dvdx;
707                                 z += dzdx;
708                                 rec_z = 1.0 / z;
709                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
710                                 if (c != 255) {
711                                         destlong &= ~((u_int64_t)0xFF << 16);
712                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 16;
713                                 }
714                                 l += dldx;
715                                 u += dudx;
716                                 v += dvdx;
717                                 z += dzdx;
718                                 rec_z = 1.0 / z;
719                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
720                                 if (c != 255) {
721                                         destlong &= ~((u_int64_t)0xFF << 24);
722                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 24;
723                                 }
724                                 l += dldx;
725                                 u += dudx;
726                                 v += dvdx;
727                                 z += dzdx;
728                                 rec_z = 1.0 / z;
729                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
730                                 if (c != 255) {
731                                         destlong &= ~((u_int64_t)0xFF << 32);
732                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 32;
733                                 }
734                                 l += dldx;
735                                 u += dudx;
736                                 v += dvdx;
737                                 z += dzdx;
738                                 rec_z = 1.0 / z;
739                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
740                                 if (c != 255) {
741                                         destlong &= ~((u_int64_t)0xFF << 40);
742                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 40;
743                                 }
744                                 l += dldx;
745                                 u += dudx;
746                                 v += dvdx;
747                                 z += dzdx;
748                                 rec_z = 1.0 / z;
749                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
750                                 if (c != 255) {
751                                         destlong &= ~((u_int64_t)0xFF << 48);
752                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 48;
753                                 }
754                                 l += dldx;
755                                 u += dudx;
756                                 v += dvdx;
757                                 z += dzdx;
758                                 rec_z = 1.0 / z;
759                                 c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
760                                 if (c != 255) {
761                                         destlong &= ~((u_int64_t)0xFF << 56);
762                                         destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 56;
763                                 }
764                                 l += dldx;
765                                 u += dudx;
766                                 v += dvdx;
767                                 z += dzdx;
768                                 rec_z = 1.0 / z;
769
770                                 *((u_int64_t *) dest) = destlong;
771                                 dest += 8;
772                                 x -= 8;
773                                 j -= 8;
774                         }
775                 }
776                 while (x-- > 0) {
777                         c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)];
778                         if (c != 255)
779                                 *dest = gr_fade_table[((int) fabs(l)) * 256 + c];
780                         dest++;
781                         l += dldx;
782                         u += dudx;
783                         v += dvdx;
784                         z += dzdx;
785                         rec_z = 1.0 / z;
786                 }
787         }
788 }
789
790 #if 1
791 // note the unrolling loop is broken. It is never called, and uses big endian. -- FH
792 void c_tmap_scanline_per()
793 {
794         ubyte *dest;
795         uint c;
796         int x, j;
797         fix l,u,v,z;
798         fix dudx, dvdx, dzdx, dldx;
799
800         u = fx_u;
801         v = fx_v*64;
802         z = fx_z;
803         dudx = fx_du_dx; 
804         dvdx = fx_dv_dx*64; 
805         dzdx = fx_dz_dx;
806
807         l = fx_l>>8;
808         dldx = fx_dl_dx/256; // fx_dl_dx>>8 != fx_dl_dx/256 for negative numbers
809         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
810
811         if (!Transparency_on)   {
812                 ubyte*                  pixPtrLocalCopy = pixptr;
813                 ubyte*                  fadeTableLocalCopy = gr_fade_table;
814                 unsigned long   destlong;
815
816                 x = fx_xright-fx_xleft+1; // x = number of pixels in scanline
817
818                 if ((j = (unsigned long) dest & 3) != 0)
819                         {
820                         j = 4 - j;
821
822                         if (j > x)
823                                 j = x;
824
825                         while (j > 0)
826                                 {       
827                                 //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
828                                 *dest++ = fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
829                                 //end edit -MM
830                                 l += dldx;
831                                 u += dudx;
832                                 v += dvdx;
833                                 z += dzdx;
834                                 x--;
835                                 j--;
836                                 }
837                         }
838
839                 j &= ~3;
840                 while (j > 0)
841                         {
842                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
843                         destlong = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ] << 24;
844                         //end edit -MM
845                         l += dldx;
846                         u += dudx;
847                         v += dvdx;
848                         z += dzdx;
849                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
850                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ] << 16;
851                         //end edit -MM
852                         l += dldx;
853                         u += dudx;
854                         v += dvdx;
855                         z += dzdx;
856                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
857                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ] << 8;
858                         //end edit -MM
859                         l += dldx;
860                         u += dudx;
861                         v += dvdx;
862                         z += dzdx;
863                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
864                         destlong |= (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
865                         //end edit -MM
866                         l += dldx;
867                         u += dudx;
868                         v += dvdx;
869                         z += dzdx;
870                         *((unsigned long *) dest) = destlong;
871                         dest += 4;
872                         x -= 4;
873                         j -= 4;
874                         }
875
876                 while (x-- > 0)
877                         {
878                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
879                         *dest++ = (unsigned long) fadeTableLocalCopy[ (l&(0x7f00)) + (uint)pixPtrLocalCopy[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
880                         //end edit -MM
881                         l += dldx;
882                         u += dudx;
883                         v += dvdx;
884                         z += dzdx;
885                         }
886
887         } else {
888                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
889                         c = (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
890                         if ( c!=TRANSPARENCY_COLOR)
891                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
892                                 *dest = gr_fade_table[ (l&(0x7f00)) + c ];
893                         //end edit -MM
894                         dest++;
895                         l += dldx;
896                         u += dudx;
897                         v += dvdx;
898                         z += dzdx;
899                 }
900         }
901 }
902
903 #else
904 void c_tmap_scanline_per()
905 {
906         ubyte *dest;
907         uint c;
908         int x;
909         fix u,v,z,l,dudx, dvdx, dzdx, dldx;
910
911         u = fx_u;
912         v = fx_v*64;
913         z = fx_z;
914         dudx = fx_du_dx; 
915         dvdx = fx_dv_dx*64; 
916         dzdx = fx_dz_dx;
917
918         l = fx_l>>8;
919         dldx = fx_dl_dx/256; // fx_dl_dx>>8 != fx_dl_dx/256 for negative numbers
920         dest = (ubyte *)(write_buffer + fx_xleft + (bytes_per_row * fx_y)  );
921
922         if (!Transparency_on)   {
923                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
924                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
925                         *dest++ = gr_fade_table[ (l&(0x7f00)) + (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ] ];
926                         //end edit -MM
927                         l += dldx;
928                         u += dudx;
929                         v += dvdx;
930                         z += dzdx;
931                 }
932         } else {
933                 for (x= fx_xright-fx_xleft+1 ; x > 0; --x ) {
934                         c = (uint)pixptr[ ( (v/z)&(64*63) ) + ((u/z)&63) ];
935                         if ( c!=255)
936                         //edited 05/18/99 Matt Mueller - changed from 0xff00 to 0x7f00 to fix glitches
937                                 *dest = gr_fade_table[ (l&(0x7f00)) + c ];
938                         //end edit -MM
939                         dest++;
940                         l += dldx;
941                         u += dudx;
942                         v += dvdx;
943                         z += dzdx;
944                 }
945         }
946 }
947
948 #endif
949
950 void (*cur_tmap_scanline_per)(void);
951 void (*cur_tmap_scanline_per_nolight)(void);
952 void (*cur_tmap_scanline_lin)(void);
953 void (*cur_tmap_scanline_lin_nolight)(void);
954 void (*cur_tmap_scanline_flat)(void);
955 void (*cur_tmap_scanline_shaded)(void);
956
957 //runtime selection of optimized tmappers.  12/07/99  Matthew Mueller
958 //the reason I did it this way rather than having a *tmap_funcs that then points to a c_tmap or fp_tmap struct thats already filled in, is to avoid a second pointer dereference.
959 void select_tmap(char *type)
960 {
961         if (!type)
962         {
963 #if !defined(NO_ASM) && !defined(OGL)
964 #if defined(__pentiumpro__)
965                 select_tmap("ppro");
966 #elif defined(__pentium__)
967                 select_tmap("pent");
968 #else
969                 select_tmap("i386");
970 #endif
971 #else
972                 select_tmap("c");
973 #endif
974                 return;
975         }
976 #if !defined(NO_ASM) && !defined(OGL)
977         if (stricmp(type, "i386")==0)
978         {
979                 cur_tmap_scanline_per=asm_tmap_scanline_per;
980                 cur_tmap_scanline_per_nolight=asm_tmap_scanline_per;
981                 cur_tmap_scanline_lin=asm_tmap_scanline_lin_lighted;
982                 cur_tmap_scanline_lin_nolight=asm_tmap_scanline_lin;
983                 cur_tmap_scanline_flat=asm_tmap_scanline_flat;
984                 cur_tmap_scanline_shaded=asm_tmap_scanline_shaded;
985         }
986         else if (stricmp(type,"pent")==0){
987                 cur_tmap_scanline_per=asm_pent_tmap_scanline_per;
988                 cur_tmap_scanline_per_nolight=asm_pent_tmap_scanline_per;
989                 cur_tmap_scanline_lin=asm_tmap_scanline_lin_lighted;
990                 cur_tmap_scanline_lin_nolight=asm_tmap_scanline_lin;
991                 cur_tmap_scanline_flat=asm_tmap_scanline_flat;
992                 cur_tmap_scanline_shaded=asm_tmap_scanline_shaded;
993         }
994         else if (stricmp(type,"ppro")==0){
995                 cur_tmap_scanline_per=asm_ppro_tmap_scanline_per;
996                 cur_tmap_scanline_per_nolight=asm_ppro_tmap_scanline_per;
997                 cur_tmap_scanline_lin=asm_tmap_scanline_lin_lighted;
998                 cur_tmap_scanline_lin_nolight=asm_tmap_scanline_lin;
999                 cur_tmap_scanline_flat=asm_tmap_scanline_flat;
1000                 cur_tmap_scanline_shaded=asm_tmap_scanline_shaded;
1001         }
1002         else
1003 #endif
1004         if (stricmp(type,"fp")==0){
1005                 cur_tmap_scanline_per=c_fp_tmap_scanline_per;
1006                 cur_tmap_scanline_per_nolight=c_fp_tmap_scanline_per_nolight;
1007                 cur_tmap_scanline_lin=c_tmap_scanline_lin;
1008                 cur_tmap_scanline_lin_nolight=c_tmap_scanline_lin_nolight;
1009                 cur_tmap_scanline_flat=c_tmap_scanline_flat;
1010                 cur_tmap_scanline_shaded=c_tmap_scanline_shaded;
1011         }
1012         else {
1013                 if (stricmp(type,"c")!=0)
1014                         printf("unknown tmap requested, using c tmap\n");
1015                 cur_tmap_scanline_per=c_tmap_scanline_per;
1016                 cur_tmap_scanline_per_nolight=c_tmap_scanline_per_nolight;
1017                 cur_tmap_scanline_lin=c_tmap_scanline_lin;
1018                 cur_tmap_scanline_lin_nolight=c_tmap_scanline_lin_nolight;
1019                 cur_tmap_scanline_flat=c_tmap_scanline_flat;
1020                 cur_tmap_scanline_shaded=c_tmap_scanline_shaded;
1021         }
1022 }