]> icculus.org git repositories - taylor/freespace2.git/blob - src/graphics/scaler.cpp
added copyright header
[taylor/freespace2.git] / src / graphics / scaler.cpp
1 /*
2  * Copyright (C) Volition, Inc. 1999.  All rights reserved.
3  *
4  * All source code herein is the property of Volition, Inc. You may not sell 
5  * or otherwise commercially exploit the source or things you created based on
6  * the source.
7  */
8
9 /*
10  * $Logfile: /Freespace2/code/Graphics/Scaler.cpp $
11  * $Revision$
12  * $Date$
13  * $Author$
14  *
15  * Routines to scale a bitmap.
16  *
17  * $Log$
18  * Revision 1.3  2002/06/09 04:41:18  relnev
19  * added copyright header
20  *
21  * Revision 1.2  2002/06/05 08:05:29  relnev
22  * stub/warning removal.
23  *
24  * reworked the sound code.
25  *
26  * Revision 1.1.1.1  2002/05/03 03:28:09  root
27  * Initial import.
28  *
29  * 
30  * 9     7/20/99 1:49p Dave
31  * Peter Drake build. Fixed some release build warnings.
32  * 
33  * 8     6/22/99 7:03p Dave
34  * New detail options screen.
35  * 
36  * 7     5/12/99 5:33p Johne
37  * Don't use gr8_scaler() in pofview.
38  * 
39  * 6     5/09/99 6:00p Dave
40  * Lots of cool new effects. E3 build tweaks.
41  * 
42  * 5     1/14/99 6:06p Dave
43  * 100% full squad logo support for single player and multiplayer.
44  * 
45  * 4     1/14/99 12:48a Dave
46  * Todo list bug fixes. Made a pass at putting briefing icons back into
47  * FRED. Sort of works :(
48  * 
49  * 3     11/30/98 1:07p Dave
50  * 16 bit conversion, first run.
51  * 
52  * 2     10/07/98 10:53a Dave
53  * Initial checkin.
54  * 
55  * 1     10/07/98 10:49a Dave
56  * 
57  * 40    4/02/98 2:01p Dave
58  * JAS: Increased constant for source of compiled code
59  * 
60  * 39    4/01/98 9:21p John
61  * Made NDEBUG, optimized build with no warnings or errors.
62  * 
63  * 38    4/01/98 7:15p John
64  * fixed bug with previous
65  * 
66  * 37    4/01/98 6:45p John
67  * Reduced memory by combining compled_code ptrs.
68  * 
69  * 36    3/22/98 3:28p John
70  * Added in stippled alpha for lower details.  Made medium detail use
71  * engine glow.
72  * 
73  * 35    3/10/98 4:18p John
74  * Cleaned up graphics lib.  Took out most unused gr functions.   Made D3D
75  * & Glide have popups and print screen.  Took out all >8bpp software
76  * support.  Made Fred zbuffer.  Made zbuffer allocate dynamically to
77  * support Fred.  Made zbuffering key off of functions rather than one
78  * global variable.
79  * 
80  * 34    2/05/98 9:21p John
81  * Some new Direct3D code.   Added code to monitor a ton of stuff in the
82  * game.
83  * 
84  * 33    1/27/98 10:18a John
85  * fixed warning for optimized build
86  * 
87  * 32    1/26/98 5:12p John
88  * Added in code for Pentium Pro specific optimizations. Speed up
89  * zbuffered correct tmapper about 35%.   Speed up non-zbuffered scalers
90  * by about 25%.
91  * 
92  * 31    1/19/98 6:15p John
93  * Fixed all my Optimized Build compiler warnings
94  * 
95  * 30    12/04/97 12:09p John
96  * Made glows use scaler instead of tmapper so they don't rotate.  Had to
97  * add a zbuffered scaler.
98  * 
99  * 29    12/02/97 4:00p John
100  * Added first rev of thruster glow, along with variable levels of
101  * translucency, which retquired some restructing of palman.
102  * 
103  * 28    11/30/97 4:33p John
104  * added 32-bpp aascaler
105  * 
106  * 27    11/30/97 3:57p John
107  * Made fixed 32-bpp translucency.  Made BmpMan always map translucent
108  * color into 255 even if you aren't supposed to remap and make it's
109  * palette black.
110  * 
111  * 26    11/30/97 12:18p John
112  * added more 24 & 32-bpp primitives
113  * 
114  * 25    11/29/97 2:06p John
115  * added mode 16-bpp support
116  * 
117  * 24    11/14/97 12:30p John
118  * Fixed some DirectX bugs.  Moved the 8-16 xlat tables into Graphics
119  * libs.  Made 16-bpp DirectX modes know what bitmap format they're in.
120  * 
121  * 23    10/19/97 12:55p John
122  * new code to lock / unlock surfaces for smooth directx integration.
123  * 
124  * 22    10/15/97 4:48p John
125  * added 16-bpp aascaler
126  * 
127  * 21    10/14/97 8:08a John
128  * added a bunch more 16 bit support
129  * 
130  * 20    10/09/97 5:23p John
131  * Added support for more 16-bpp functions
132  * 
133  * 19    8/04/97 4:47p John
134  * added gr_aascaler.
135  * 
136  * 18    7/28/97 11:31a John
137  * made compiled code save all registers that it changes.  When building
138  * optimized, my code was using EBX, and so was the compiler, so weird
139  * errors happened.  Pushing/popping ebx fixed this.
140  * 
141  * 17    7/16/97 5:29p John
142  * added palette table caching and made scaler and liner no light tmapper
143  * do alpha blending in 8 bpp mode.
144  * 
145  * 16    7/10/97 2:06p John
146  * added code to specify alphablending type for bitmaps.
147  * 
148  * 15    6/12/97 2:50a Lawrance
149  * bm_unlock() now passed bitmap number, not pointer
150  * 
151  * 14    5/29/97 3:10p John
152  * Took out debug menu.  
153  * Made software scaler draw larger bitmaps.
154  * Optimized Direct3D some.
155  * 
156  * 13    5/12/97 12:27p John
157  * Restructured Graphics Library to add support for multiple renderers.
158  * 
159  * 12    12/04/96 2:02p John
160  * Added fast compiled code to the scaler in 8,16,32 bpp modes.
161  * 
162  * 11    12/03/96 8:08p John
163  * Added compiled code to 8bpp scaler.  Made bitmaps that are trying to
164  * scale up too big to not draw.
165  * 
166  * 10    12/03/96 11:12a John
167  * added commented out "filtering" code to scaler.
168  * 
169  * 9     11/19/96 2:42p Allender
170  * fix up 32 bit scaler
171  * 
172  * 8     11/15/96 11:27a Allender
173  * 16bpp version of scaler
174  * 
175  * 7     11/07/96 6:19p John
176  * Added a bunch of 16bpp primitives so the game sort of runs in 16bpp
177  * mode.
178  * 
179  * 6     10/26/96 1:40p John
180  * Added some now primitives to the 2d library and
181  * cleaned up some old ones.
182  *
183  * $NoKeywords: $
184  */
185
186 #include <math.h>
187 #include <limits.h>
188 #include <stdio.h>
189 #ifndef PLAT_UNIX
190 #include <conio.h>
191 #endif
192 #include <stdlib.h>
193
194 #include "scaler.h"
195 #include "2d.h"
196 #include "grinternal.h"
197 #include "floating.h"
198 #include "bmpman.h"
199 #include "palman.h"
200 #include "tmapscanline.h"
201 #include "systemvars.h"
202 #include "key.h"
203 #include "colors.h"
204
205 #define MIN_SCALE_FACTOR 0.0001f
206
207 #define USE_COMPILED_CODE
208
209 #define TRANSPARENCY_COLOR_8            0xff
210 #define TRANSPARENCY_COLOR_16           0xffff
211 #define TRANSPARENCY_COLOR_32           0xffffffff
212
213 #define FIND_SCALED_NUM(x,x0,x1,y0,y1) (((((x)-(x0))*((y1)-(y0)))/((x1)-(x0)))+(y0))
214
215 #define MAX_CODE_SIZE 32768             //65536 JAS: Determed to be 8208 on April1,98, 16K seems safe
216
217 ubyte compiled_code[MAX_CODE_SIZE];
218
219 #ifdef FIND_MAX_SIZE
220 static int Max_size = 0;
221 #endif
222
223 /*
224 void test_code()
225 {
226         _asm mov ax, [esi+0xabcdef12]
227         _asm cmp ax, 255
228         _asm je  0xabcdef12
229         _asm mov [edi+0xabcdef12], ax
230         _asm mov ax, [esi+0xabcdef12]
231 }
232 */
233
234
235
236 //----------------------------------------------------
237 // scaler_create_compiled_code8
238 //
239 // Creates code that looks like:
240 //
241 // @@: mov al, [esi+????]
242 //     cmp al, TRANSPARENCY_COLOR_8
243 //     je  @f   ; jump to next @@ label
244 //     mov [edi+???], al    ; If the source pixel is scaled up
245 //     mov [edi+???], al    ; there might be a lot of these lines
246 //     ...
247 // @@: mov al, [esi+????]
248 //
249
250 ubyte *scaler_create_compiled_code8( int w, fix u, fix du )
251 {
252         int last_u, x;
253         ubyte * cc;
254         uint * last_jmp_pos;
255
256         cc = compiled_code;
257
258         //if ( abs(du) < F1_0 / 4 ) *cc++ = 0xCC;
259
260 //      *cc++ = 0xCC;   // Int3
261 //      *cc++ = 0xc3;   // RET
262
263         last_u = -1;
264
265         last_jmp_pos=NULL;
266
267         for (x=0; x<w; x++ )                    {
268                 if ( last_u != f2i(u) ) {
269                         if ( last_jmp_pos )     {
270                                 *last_jmp_pos = (uint)cc - (uint)last_jmp_pos - 4;
271                         }
272                         *cc++ = 0x8a;   *cc++ = 0x86; // mov al, [esi+imm]
273                         *(uint *)cc = f2i(u); cc += 4;
274                         last_u = f2i(u);
275
276                         *cc++ = 0x3c; *cc++ = TRANSPARENCY_COLOR_8;     // cmp al, 255
277                         *cc++ = 0x0f; *cc++ = 0x84;   // je rel32
278                         last_jmp_pos = (uint *)cc;
279                         cc += 4;                
280                 }
281                 
282         
283                 *cc++ = 0x88;   *cc++ = 0x87; // mov [edi+imm], al
284                 *(uint *)cc = x; cc += 4;
285
286                 u += du;
287         }
288         if ( last_jmp_pos )     {
289                 *last_jmp_pos = (uint)cc - (uint)last_jmp_pos - 4;
290         }
291         *cc++ = 0xc3;   // RET
292
293         if ( cc >= &compiled_code[MAX_CODE_SIZE] )
294                 Int3();         // GET JOHN NOW!
295
296 #ifdef FIND_MAX_SIZE
297         int size = cc - compiled_code;
298         if ( size > Max_size )  {
299                 Max_size = size;
300                 mprintf(( "Max size = %d\n", size ));
301         }
302 #endif
303
304         return compiled_code;
305 }
306
307 ubyte *scaler_create_compiled_code8_stippled( int w, fix u, fix du )
308 {
309         int last_u, x;
310         ubyte * cc;
311         uint * last_jmp_pos;
312
313         cc = compiled_code;
314
315         //if ( abs(du) < F1_0 / 4 ) *cc++ = 0xCC;
316
317 //      *cc++ = 0xCC;   // Int3
318 //      *cc++ = 0xc3;   // RET
319
320         last_u = -1;
321
322         last_jmp_pos=NULL;
323
324         for (x=0; x<w-1; x+=2 )                 {
325                 if ( last_u != f2i(u) ) {
326                         if ( last_jmp_pos )     {
327                                 *last_jmp_pos = (uint)cc - (uint)last_jmp_pos - 4;
328                         }
329                         *cc++ = 0x8a;   *cc++ = 0x86; // mov al, [esi+imm]
330                         *(uint *)cc = f2i(u); cc += 4;
331                         last_u = f2i(u);
332
333                         *cc++ = 0x3c; *cc++ = TRANSPARENCY_COLOR_8;     // cmp al, 255
334                         *cc++ = 0x0f; *cc++ = 0x84;   // je rel32
335                         last_jmp_pos = (uint *)cc;
336                         cc += 4;                
337                 }
338                 
339         
340                 *cc++ = 0x88;   *cc++ = 0x87; // mov [edi+imm], al
341                 *(uint *)cc = x; cc += 4;
342
343                 u += du*2;
344         }
345         if ( last_jmp_pos )     {
346                 *last_jmp_pos = (uint)cc - (uint)last_jmp_pos - 4;
347         }
348         *cc++ = 0xc3;   // RET
349
350         if ( cc >= &compiled_code[MAX_CODE_SIZE] )
351                 Int3();         // GET JOHN NOW!
352
353 #ifdef FIND_MAX_SIZE
354         int size = cc - compiled_code;
355         if ( size > Max_size )  {
356                 Max_size = size;
357                 mprintf(( "Max size = %d\n", size ));
358         }
359 #endif
360
361         return compiled_code;
362 }
363
364 void test_code1()
365 {
366 #ifdef PLAT_UNIX
367         STUB_FUNCTION;
368 #else
369         _asm mov ebx, -1
370         _asm xor eax, eax
371         _asm xor ebx, ebx
372         _asm mov        bl, BYTE PTR [edi-1412567278]
373         _asm add ebx, eax
374         _asm mov ebx, [ecx+ebx] ; blend it
375         _asm cmp ebp, [edx]
376         _asm add edx, 4
377         _asm jl [0xABCDEF12]
378         
379 //     xor eax, eax                     ; avoid ppro partial register stall
380 //     mov ah, [esi+????]   ; get the foreground pixel
381 //     ; the following lines might be repeated
382 //     xor ebx, ebx                     ; avoid ppro partial register stall
383 //     mov bl, [edi+????]   ; get the background pixel
384 //     mov ebx, [ecx+ebx]       ; blend it
385 //     mov [edi+????], bl   ; write it
386 #endif
387 }
388
389 /*
390   00130 b8 00 00 00 00  mov     eax, 0
391   00135 8a a6 12 ef cd ab               mov     ah, BYTE PTR [esi-1412567278]
392   0013b 8a 87 12 ef cd ab               mov     al, BYTE PTR [edi-1412567278]
393   00141 8a 1c 01                    mov bl, BYTE PTR [ecx+eax]
394   00141 8b 1c 01                                        mov     ebx, DWORD PTR [ecx+eax]
395   00144 88 9f 12 ef cd ab               mov     BYTE PTR [edi-1412567278], bl
396
397
398   00130 33 c0           xor     eax, eax
399   00132 33 db           xor     ebx, ebx
400   00134 8a 9f 12 ef cd  ab              mov     bl, BYTE PTR [edi-1412567278]
401   0013a 03 d8           add     ebx, eax
402   0013c 8b 1c 19        mov     ebx, DWORD PTR [ecx+ebx]
403
404   0013f 3b 2a           cmp     ebp, DWORD PTR [edx]
405   00141 83 c2 04        add     edx, 4
406
407
408 */
409
410 //----------------------------------------------------
411 // scaler_create_compiled_code8_alpha
412 //
413 // Creates code that looks like:
414
415 //=============== Pentium ======================
416 // mov eax, 0
417 //     mov ah, [esi+????]   ; get the foreground pixel
418 //     ; the following lines might be repeated
419 //     mov al, [edi+????]   ; get the background pixel
420 //     mov bl, [ecx+eax]        ; blend it
421 //     mov [edi+????], bl   ; write it
422 //     ...
423
424 //============= Pentium Pro code =============
425 //     xor eax, eax                     ; avoid ppro partial register stall
426 //     mov ah, [esi+????]   ; get the foreground pixel
427 //     ; the following lines might be repeated
428 //     xor ebx, ebx                     ; avoid ppro partial register stall
429 //     mov bl, [edi+????]   ; get the background pixel
430 //     mov ebx, [ecx+ebx]       ; blend it
431 //     mov [edi+????], bl   ; write it
432
433
434 ubyte *scaler_create_compiled_code8_alpha( int w, fix u, fix du )
435 {
436         int last_u, x;
437         ubyte * cc;
438
439         cc = compiled_code;
440
441         //if ( abs(du) < F1_0 / 4 ) *cc++ = 0xCC;
442
443         //*cc++ = 0xCC; // Int3
444         //*cc++ = 0xc3; // RET
445
446         last_u = -1;
447
448         if ( Gr_cpu     > 5 )   {
449                 // Pentium Pro optimized code.
450
451                 for (x=0; x<w; x++ )                    {
452                         if ( last_u != f2i(u) ) {
453                                 *cc++ = 0x33;   *cc++ = 0xc0; // xor eax, eax
454                                 *cc++ = 0x8a;   *cc++ = 0xa6; // mov ah, [esi+imm]
455                                 //*cc++ = 0x8a; *cc++ = 0x86; // mov al, [esi+imm]
456                                 *(uint *)cc = f2i(u); cc += 4;
457                                 last_u = f2i(u);
458                         }
459                         
460                         *cc++ = 0x33;   *cc++ = 0xdb;           // xor ebx, ebx
461                         
462                         *cc++ = 0x8a;   *cc++ = 0x9f; 
463                         *(uint *)cc = x; cc += 4;               // mov bl, [edi+imm]
464
465                         *cc++ = 0x03;   *cc++ = 0xd8;           // add ebx, eax
466
467                         *cc++ = 0x8b; *cc++ = 0x1c; *cc++ = 0x19;       // mov  ebx, BYTE PTR [ecx+ebx]
468
469                         *cc++ = 0x88;   *cc++ = 0x9f; 
470                         *(uint *)cc = x; cc += 4;               // mov [edi+imm], bl
471
472                         u += du;
473                 }
474         } else {
475                 // Pentium optimized code.
476
477                 *cc++ = 0xb8; *(uint *)cc = 0; cc += 4;         // mov eax, 0
478
479                 for (x=0; x<w; x++ )                    {
480                         if ( last_u != f2i(u) ) {
481                                 *cc++ = 0x8a;   *cc++ = 0xa6; // mov ah, [esi+imm]
482                                 *(uint *)cc = f2i(u); cc += 4;
483                                 last_u = f2i(u);
484                         }
485                         
486                         *cc++ = 0x8a;   *cc++ = 0x87; 
487                         *(uint *)cc = x; cc += 4;               // mov al, [edi+imm]
488
489                         *cc++ = 0x8a; *cc++ = 0x1c; *cc++ = 0x01;       // mov  bl, BYTE PTR [ecx+eax]
490
491                         *cc++ = 0x88;   *cc++ = 0x9f; 
492                         *(uint *)cc = x; cc += 4;               // mov [edi+imm], bl
493
494                         u += du;
495                 }
496         }
497
498         *cc++ = 0xc3;   // RET
499
500         if ( cc >= &compiled_code[MAX_CODE_SIZE] )
501                 Int3();         // GET JOHN NOW!
502
503 #ifdef FIND_MAX_SIZE
504         int size = cc - compiled_code;
505         if ( size > Max_size )  {
506                 Max_size = size;
507                 mprintf(( "Max size = %d\n", size ));
508         }
509 #endif
510
511         return compiled_code;
512 }
513
514 /*
515                                 for (x=0; x<w; x++ )                    {
516                                         if ( fx_w > *zbuf )     {
517                                                 uint c = sbits[ tmp_u >> 16 ]<<8;
518                                                 *dbits = *((ubyte *)(lookup + (*dbits | c)));
519                                         }
520                                         dbits++;
521                                         zbuf++;
522                                         tmp_u += du;
523                                 }
524 */
525
526 //----------------------------------------------------
527 // scaler_create_compiled_code8_alpha_zbuffered
528 //
529 // Creates code that looks like:
530 // mov eax, 0
531 //     mov ah, [esi+????]   ; get the foreground pixel
532 //     ; the following lines might be repeated
533 //     cmp      fx_w, [edx+?????]
534 //     jle  @f
535 //     mov al, [edi+????]   ; get the background pixel
536 //     mov bl, [ecx+eax]        ; blend it
537 //     mov [edi+????], bl   ; write it
538 //  @@:
539 //     ...
540
541
542
543
544 //void test_code1()
545 //{
546 //      _asm cmp 0xFFFFFFFF, [edx+0xabcdef12]
547 //      _asm cmp ebp, [edx+0xabcdef12]
548 //      _asm jle        0xabcdef12
549 //}
550 //; 302  :      _asm cmp ebp, [edx+0xabcdef12]
551 //  00244       3b aa 12 ef cd ab               cmp     ebp, DWORD PTR [edx-1412567278]
552 //; 303  :      _asm jle        0xabcdef12
553 //  0024a       0f 8e 12 ef cd ab               jle     -1412567278             ; abcdef12H
554
555 ubyte *scaler_create_compiled_code8_alpha_zbuffered( int w, fix u, fix du )
556 {
557         int last_u, x;
558         ubyte * cc;
559         uint *last_jmp_pos=NULL;
560
561         cc = compiled_code;
562
563         //     xor eax, eax                     ; avoid ppro partial register stall
564 //     mov ah, [esi+????]   ; get the foreground pixel
565 //     ; the following lines might be repeated
566 //     xor ebx, ebx                     ; avoid ppro partial register stall
567 //     mov bl, [edi+????]   ; get the background pixel
568 //     mov ebx, [ecx+ebx]       ; blend it
569 //     mov [edi+????], bl   ; write it
570
571         //if ( abs(du) < F1_0 / 4 ) *cc++ = 0xCC;
572
573         //*cc++ = 0xCC; // Int3
574         //*cc++ = 0xc3; // RET
575         last_u = -1;
576
577         if ( Gr_cpu     > 5 )   {
578                 // Pentium Pro optimized code.
579
580                 for (x=0; x<w; x++ )                    {
581                         if ( last_u != f2i(u) ) {
582                                 *cc++ = 0x33;   *cc++ = 0xc0; // xor eax, eax
583                                 *cc++ = 0x8a;   *cc++ = 0xa6; // mov ah, [esi+imm]
584                                 *(uint *)cc = f2i(u); cc += 4;
585                                 last_u = f2i(u);
586                         }
587
588                         *cc++ = 0x3b;  *cc++ = 0xaa;    
589                         *(uint *)cc = x*4; cc += 4;             // cmp ebp, [edx+imm]
590
591 //                      *cc++ = 0x3b;  *cc++ = 0x2a;                                            // cmp ebp, [edx]
592 //                      *cc++ = 0x83;  *cc++ = 0xc2;  *cc++ = 0x4;      // add edx, 4
593
594                         *cc++ = 0x0f;  *cc++ = 0x8e;            // jle (8e) imm
595                         last_jmp_pos = (uint *)cc;
596                         *(uint *)cc = 0; cc += 4;
597                 
598                         *cc++ = 0x33;   *cc++ = 0xdb;           // xor ebx, ebx
599                         
600                         *cc++ = 0x8a;   *cc++ = 0x9f; 
601                         *(uint *)cc = x; cc += 4;               // mov bl, [edi+imm]
602
603                         *cc++ = 0x03;   *cc++ = 0xd8;           // add ebx, eax
604
605                         *cc++ = 0x8b; *cc++ = 0x1c; *cc++ = 0x19;       // mov  ebx, BYTE PTR [ecx+ebx]
606
607                         *cc++ = 0x88;   *cc++ = 0x9f; 
608                         *(uint *)cc = x; cc += 4;               // mov [edi+imm], bl
609
610                         if ( last_jmp_pos )     {
611                                 *last_jmp_pos = (uint)cc - (uint)last_jmp_pos - 4;
612                                 last_jmp_pos  = NULL;
613                         }
614
615                         u += du;
616                 }
617
618
619         } else {
620                 // Pentium optimized code.
621
622                 *cc++ = 0xb8; *(uint *)cc = 0; cc += 4;         // mov eax, 0
623
624                 for (x=0; x<w; x++ )                    {
625                         if ( last_u != f2i(u) ) {
626                                 *cc++ = 0x8a;   *cc++ = 0xa6; // mov ah, [esi+imm]
627                                 *(uint *)cc = f2i(u); cc += 4;
628                                 last_u = f2i(u);
629                         }
630
631                         *cc++ = 0x3b;  *cc++ = 0xaa;    
632                         *(uint *)cc = x*4; cc += 4;             // cmp ebp, [edx+imm]
633
634                         *cc++ = 0x0f;  *cc++ = 0x8e;            // jle imm
635                         last_jmp_pos = (uint *)cc;
636                         *(uint *)cc = 0; cc += 4;               
637                         
638                         *cc++ = 0x8a;   *cc++ = 0x87; 
639                         *(uint *)cc = x; cc += 4;               // mov al, [edi+imm]
640
641                         *cc++ = 0x8a; *cc++ = 0x1c; *cc++ = 0x01;       // mov  bl, BYTE PTR [ecx+eax]
642
643                         *cc++ = 0x88;   *cc++ = 0x9f; 
644                         *(uint *)cc = x; cc += 4;               // mov [edi+imm], bl
645
646                         if ( last_jmp_pos )     {
647                                 *last_jmp_pos = (uint)cc - (uint)last_jmp_pos - 4;
648                                 last_jmp_pos = NULL;
649                         }
650
651                         u += du;
652                 }
653         }
654         *cc++ = 0xc3;   // RET
655
656         if ( cc >= &compiled_code[MAX_CODE_SIZE] )
657                 Int3();         // GET JOHN NOW!
658
659 #ifdef FIND_MAX_SIZE
660         int size = cc - compiled_code;
661         if ( size > Max_size )  {
662                 Max_size = size;
663                 mprintf(( "Max sizeZ = %d\n", size ));
664         }
665 #endif
666
667         return compiled_code;
668 }
669
670
671
672 int Gr_scaler_zbuffering = 0;
673 uint Gr_global_z;
674
675 MONITOR( ScalerNumCalls );      
676
677
678 //----------------------------------------------------
679 // Scales current bitmap, between va and vb
680 void gr8_scaler(vertex *va, vertex *vb )
681 {
682 #if 1
683         if(Pofview_running){
684                 return;
685         }
686
687         float x0, y0, x1, y1;
688         float u0, v0, u1, v1;
689         float clipped_x0, clipped_y0, clipped_x1, clipped_y1;
690         float clipped_u0, clipped_v0, clipped_u1, clipped_v1;
691         float xmin, xmax, ymin, ymax;
692         int dx0, dy0, dx1, dy1;
693
694         MONITOR_INC( ScalerNumCalls, 1 );       
695
696         //============= CLIP IT =====================
697
698         x0 = va->sx; y0 = va->sy;
699         x1 = vb->sx; y1 = vb->sy;
700
701         xmin = i2fl(gr_screen.clip_left); ymin = i2fl(gr_screen.clip_top);
702         xmax = i2fl(gr_screen.clip_right); ymax = i2fl(gr_screen.clip_bottom);
703
704         u0 = va->u; v0 = va->v;
705         u1 = vb->u; v1 = vb->v;
706
707         // Check for obviously offscreen bitmaps...
708         if ( (y1<=y0) || (x1<=x0) ) return;
709         if ( (x1<xmin ) || (x0>xmax) ) return;
710         if ( (y1<ymin ) || (y0>ymax) ) return;
711
712         clipped_u0 = u0; clipped_v0 = v0;
713         clipped_u1 = u1; clipped_v1 = v1;
714
715         clipped_x0 = x0; clipped_y0 = y0;
716         clipped_x1 = x1; clipped_y1 = y1;
717
718         // Clip the left, moving u0 right as necessary
719         if ( x0 < xmin )        {
720                 clipped_u0 = FIND_SCALED_NUM(xmin,x0,x1,u0,u1);
721                 clipped_x0 = xmin;
722         }
723
724         // Clip the right, moving u1 left as necessary
725         if ( x1 > xmax )        {
726                 clipped_u1 = FIND_SCALED_NUM(xmax,x0,x1,u0,u1);
727                 clipped_x1 = xmax;
728         }
729
730         // Clip the top, moving v0 down as necessary
731         if ( y0 < ymin )        {
732                 clipped_v0 = FIND_SCALED_NUM(ymin,y0,y1,v0,v1);
733                 clipped_y0 = ymin;
734         }
735
736         // Clip the bottom, moving v1 up as necessary
737         if ( y1 > ymax )        {
738                 clipped_v1 = FIND_SCALED_NUM(ymax,y0,y1,v0,v1);
739                 clipped_y1 = ymax;
740         }
741         
742         dx0 = fl2i(clipped_x0); dx1 = fl2i(clipped_x1);
743         dy0 = fl2i(clipped_y0); dy1 = fl2i(clipped_y1);
744
745         if (dx1<=dx0) return;
746         if (dy1<=dy0) return;
747
748         //============= DRAW IT =====================
749         int u, v, du, dv;
750         int y, w;
751         ubyte * sbits, * dbits;
752         bitmap * bp;
753         ubyte * spixels;
754         float tmpu, tmpv;
755
756         tmpu = (clipped_u1-clipped_u0) / (dx1-dx0);
757         if ( fl_abs(tmpu) < MIN_SCALE_FACTOR ) {
758                 return;         // scaled up way too far!
759         }
760         tmpv = (clipped_v1-clipped_v0) / (dy1-dy0);
761         if ( fl_abs(tmpv) < MIN_SCALE_FACTOR ) {
762                 return;         // scaled up way too far!
763         }
764
765         int is_stippled = 0;
766
767         /*
768         if ( !Detail.alpha_effects )    {
769                 is_stippled = 1;
770                 Gr_scaler_zbuffering = 0;
771         }
772         */
773         
774         if ( is_stippled )      {
775                 bp = bm_lock( gr_screen.current_bitmap, 8, 0 );
776         } else {
777                 bp = bm_lock( gr_screen.current_bitmap, 8, 0 );
778         }
779
780
781         du = fl2f(tmpu*(bp->w-1));
782         dv = fl2f(tmpv*(bp->h-1));
783
784         v = fl2f(clipped_v0*(bp->h-1));
785         u = fl2f(clipped_u0*(bp->w-1)); 
786         w = dx1 - dx0 + 1;
787         if ( w < 2 ) {
788                 bm_unlock(gr_screen.current_bitmap);
789                 return;
790         }
791
792         uint fx_w = 0;
793         if ( Gr_scaler_zbuffering && gr_zbuffering )    {
794                 fx_w = (uint)fl2i(va->sw * GR_Z_RANGE)+gr_zoffset;
795                 Gr_global_z = fx_w;
796         }
797
798 #ifdef USE_COMPILED_CODE
799         ubyte *cc=NULL;
800
801         if ( Gr_scaler_zbuffering && gr_zbuffering )    {
802                 if ( gr_screen.current_alphablend_mode == GR_ALPHABLEND_FILTER )        {
803                         cc = scaler_create_compiled_code8_alpha_zbuffered( w, u, du );  
804                 }
805         } else {
806                 if ( gr_screen.current_alphablend_mode == GR_ALPHABLEND_FILTER )        {
807                         if ( is_stippled )      {
808                                 cc = scaler_create_compiled_code8_stippled( w, u, du );
809                         } else {
810                                 cc = scaler_create_compiled_code8_alpha( w, u, du );    
811                         }
812                 } else  {
813                         cc = scaler_create_compiled_code8( w, u, du );
814                 }
815         }
816         
817 #endif
818
819         spixels = (ubyte *)bp->data;
820
821         gr_lock();
822         Tmap.pScreenBits = (uint)gr_screen.offscreen_buffer_base;
823
824         uint *zbuf;
825
826         for (y=dy0; y<=dy1; v += dv, y++ )                      {
827                 if ( is_stippled && (y&1) )     {
828                         sbits = &spixels[bp->rowsize*(v>>16)+f2i(du)];
829                         dbits = GR_SCREEN_PTR(ubyte,dx0+1,y);
830                 } else {
831                         sbits = &spixels[bp->rowsize*(v>>16)];
832                         dbits = GR_SCREEN_PTR(ubyte,dx0,y);
833                 }
834                 uint lookup = 0;
835
836                 if ( gr_screen.current_alphablend_mode == GR_ALPHABLEND_FILTER )        {
837                         lookup = (uint)palette_get_blend_table(gr_screen.current_alpha);
838                 }
839
840                 if ( Gr_scaler_zbuffering && gr_zbuffering )    {
841                         zbuf = (uint *)&gr_zbuffer[(uint)dbits-(uint)Tmap.pScreenBits];
842                 }
843         
844 #ifdef USE_COMPILED_CODE
845                 // Call the compiled code to draw one scanline
846                 if ( Gr_scaler_zbuffering &&  gr_zbuffering && (gr_screen.current_alphablend_mode != GR_ALPHABLEND_FILTER))     {                       
847                         Int3();
848
849                         /*
850                         int x, tmp_u;
851                         tmp_u = u;
852
853                         for (x=0; x<w; x++ )                    {
854                                 if ( fx_w > *zbuf )     {
855                                         ubyte c = sbits[ tmp_u >> 16 ];
856                                         if ( c != TRANSPARENCY_COLOR_8 ) *dbits = c;
857                                 }
858                                 zbuf++;
859                                 dbits++;
860                                 tmp_u += du;
861                         }
862                         */
863                 } else {
864 /*                      {
865                                 int x, tmp_u;
866                                 tmp_u = u;
867
868         
869                                 for (x=0; x<w; x++ )                    {
870                                         if ( fx_w > *zbuf )     {
871                                                 uint c = sbits[ tmp_u >> 16 ]<<8;
872                                                 *dbits = *((ubyte *)(lookup + (*dbits | c)));
873                                         }
874                                         dbits++;
875                                         zbuf++;
876                                         tmp_u += du;
877                                 }
878                         } 
879 */
880 #ifdef PLAT_UNIX
881                         STUB_FUNCTION;
882 #else
883                         _asm push esi
884                         _asm push edi
885                         _asm push edx
886                         _asm push ecx
887                         _asm push ebx
888                         _asm push eax
889                         _asm mov ecx, lookup
890                         _asm mov esi, sbits
891                         _asm mov edi, dbits
892                         _asm mov eax, cc
893                         _asm mov edx, zbuf
894                         _asm push ebp
895                         _asm mov ebp, Gr_global_z
896                         _asm call eax
897                         _asm pop ebp
898                         _asm pop eax
899                         _asm pop ebx
900                         _asm pop ecx
901                         _asm pop edx
902                         _asm pop edi
903                         _asm pop esi
904 #endif
905                 }
906 #else   
907                 if ( gr_screen.current_alphablend_mode == GR_ALPHABLEND_FILTER )        {
908                         if ( Gr_scaler_zbuffering && gr_zbuffering )    {
909                                 int x, tmp_u;
910                                 tmp_u = u;
911
912                                 for (x=0; x<w; x++ )                    {
913                                         if ( fx_w > *zbuf )     {
914                                                 uint c = sbits[ tmp_u >> 16 ]<<8;
915                                                 *dbits = *((ubyte *)(lookup + (*dbits | c)));
916                                         }
917                                         dbits++;
918                                         zbuf++;
919                                         tmp_u += du;
920                                 }
921                         } else {
922                                 int x, tmp_u;
923                                 tmp_u = u;
924                                 for (x=0; x<w; x++ )                    {
925                                         uint c = sbits[ tmp_u >> 16 ]<<8;
926                                         *dbits++ = palette_blend[*dbits|c];
927                                         tmp_u += du;
928                                 }
929                         }
930                 } else {
931                         if ( Gr_scaler_zbuffering && gr_zbuffering )    {
932                                 int x, tmp_u;
933                                 tmp_u = u;
934                         
935                                 for (x=0; x<w; x++ )                    {
936                                         if ( fx_w > *zbuf )     {
937                                                 ubyte c = sbits[ tmp_u >> 16 ];
938                                                 if ( c != TRANSPARENCY_COLOR_8 ) *dbits = c;
939                                         }
940                                         zbuf++;
941                                         dbits++;
942                                         tmp_u += du;
943                                 }
944                         } else {
945                                 int x, tmp_u;
946                                 tmp_u = u;
947                                 for (x=0; x<w; x++ )                    {
948                                         ubyte c = sbits[ tmp_u >> 16 ];
949                                         if ( c != TRANSPARENCY_COLOR_8 ) *dbits = c;
950                                         dbits++;
951                                         tmp_u += du;
952                                 }
953                         }
954                 }
955 #endif
956         }
957
958         gr_unlock();
959         bm_unlock(gr_screen.current_bitmap);
960 #endif
961 }
962
963 int aiee = 0;
964 alphacolor_old old_alphac;
965 //----------------------------------------------------
966 // Scales current bitmap, between va and vb
967 void gr8_aascaler(vertex *va, vertex *vb )
968 {
969         float x0, y0, x1, y1;
970         float u0, v0, u1, v1;
971         float clipped_x0, clipped_y0, clipped_x1, clipped_y1;
972         float clipped_u0, clipped_v0, clipped_u1, clipped_v1;
973         float xmin, xmax, ymin, ymax;
974         int dx0, dy0, dx1, dy1;
975
976         //if ( !Current_alphacolor )    return;
977
978         MONITOR_INC( ScalerNumCalls, 1 );       
979
980         Assert(Fred_running);
981         if(!aiee){
982                 old_alphac.used = 1;
983                 old_alphac.r = 93;
984                 old_alphac.g = 93;
985                 old_alphac.b = 128;
986                 old_alphac.alpha = 255;
987                 //ac->type = type;
988                 //ac->clr=clr;
989                 //93, 93, 128, 255
990                 calc_alphacolor_old(&old_alphac);
991                 aiee = 1;
992         }
993
994         //============= CLIP IT =====================
995
996         x0 = va->sx; y0 = va->sy;
997         x1 = vb->sx; y1 = vb->sy;
998
999         xmin = i2fl(gr_screen.clip_left); ymin = i2fl(gr_screen.clip_top);
1000         xmax = i2fl(gr_screen.clip_right); ymax = i2fl(gr_screen.clip_bottom);
1001
1002         u0 = va->u; v0 = va->v;
1003         u1 = vb->u; v1 = vb->v;
1004
1005         // Check for obviously offscreen bitmaps...
1006         if ( (y1<=y0) || (x1<=x0) ) return;
1007         if ( (x1<xmin ) || (x0>xmax) ) return;
1008         if ( (y1<ymin ) || (y0>ymax) ) return;
1009
1010         clipped_u0 = u0; clipped_v0 = v0;
1011         clipped_u1 = u1; clipped_v1 = v1;
1012
1013         clipped_x0 = x0; clipped_y0 = y0;
1014         clipped_x1 = x1; clipped_y1 = y1;
1015
1016         // Clip the left, moving u0 right as necessary
1017         if ( x0 < xmin )        {
1018                 clipped_u0 = FIND_SCALED_NUM(xmin,x0,x1,u0,u1);
1019                 clipped_x0 = xmin;
1020         }
1021
1022         // Clip the right, moving u1 left as necessary
1023         if ( x1 > xmax )        {
1024                 clipped_u1 = FIND_SCALED_NUM(xmax,x0,x1,u0,u1);
1025                 clipped_x1 = xmax;
1026         }
1027
1028         // Clip the top, moving v0 down as necessary
1029         if ( y0 < ymin )        {
1030                 clipped_v0 = FIND_SCALED_NUM(ymin,y0,y1,v0,v1);
1031                 clipped_y0 = ymin;
1032         }
1033
1034         // Clip the bottom, moving v1 up as necessary
1035         if ( y1 > ymax )        {
1036                 clipped_v1 = FIND_SCALED_NUM(ymax,y0,y1,v0,v1);
1037                 clipped_y1 = ymax;
1038         }
1039         
1040         dx0 = fl2i(clipped_x0); dx1 = fl2i(clipped_x1);
1041         dy0 = fl2i(clipped_y0); dy1 = fl2i(clipped_y1);
1042
1043         if (dx1<=dx0) return;
1044         if (dy1<=dy0) return;
1045
1046         //============= DRAW IT =====================
1047         int u, v, du, dv;
1048         int y, w;
1049         ubyte * sbits, * dbits;
1050         bitmap * bp;
1051         ubyte * spixels;
1052         float tmpu, tmpv;
1053
1054         tmpu = (clipped_u1-clipped_u0) / (dx1-dx0);
1055         if ( fl_abs(tmpu) < MIN_SCALE_FACTOR ) {
1056                 return;         // scaled up way too far!
1057         }
1058         tmpv = (clipped_v1-clipped_v0) / (dy1-dy0);
1059         if ( fl_abs(tmpv) < MIN_SCALE_FACTOR ) {
1060                 return;         // scaled up way too far!
1061         }
1062
1063         bp = bm_lock( gr_screen.current_bitmap, 8, BMP_AABITMAP );
1064
1065         du = fl2f(tmpu*(bp->w-1));
1066         dv = fl2f(tmpv*(bp->h-1));
1067
1068         v = fl2f(clipped_v0*(bp->h-1));
1069         u = fl2f(clipped_u0*(bp->w-1)); 
1070         w = dx1 - dx0 + 1;
1071
1072 #ifdef USE_COMPILED_CODE
1073         ubyte *cc;
1074
1075         if ( Gr_scaler_zbuffering && gr_zbuffering )    {
1076                 //cc = scaler_create_compiled_code8_alpha_zbuffered( w, u, du );
1077         } else {
1078                 cc = scaler_create_compiled_code8_alpha( w, u, du );
1079         }
1080
1081 #endif
1082
1083         spixels = (ubyte *)bp->data;
1084
1085         gr_lock();
1086
1087         uint fx_w = 0;
1088         if ( Gr_scaler_zbuffering  && gr_zbuffering )   {
1089                 fx_w = (uint)fl2i(va->sw * GR_Z_RANGE)+gr_zoffset;
1090         }       
1091
1092         for (y=dy0; y<=dy1; y++ )                       {
1093                 sbits = &spixels[bp->rowsize*(v>>16)];
1094                 dbits = GR_SCREEN_PTR(ubyte,dx0,y);
1095
1096 #ifdef USE_COMPILED_CODE                
1097                 // uint lookup = (uint)&Current_alphacolor->table.lookup[0][0];
1098                 uint lookup = (uint)&old_alphac.table.lookup[0][0];
1099                 
1100                 // Call the compiled code to draw one scanline
1101                 if ( Gr_scaler_zbuffering  && gr_zbuffering )   {
1102                         int x, tmp_u;
1103                         tmp_u = u;
1104
1105                         uint *zbuf = (uint *)&gr_zbuffer[(uint)dbits-(uint)Tmap.pScreenBits];
1106         
1107                         for (x=0; x<w; x++ )                    {
1108                                 if ( fx_w > *zbuf )     {
1109                                         // uint c = sbits[ tmp_u >> 16 ];
1110                                         // *dbits = Current_alphacolor->table.lookup[c][*dbits];
1111                                         *dbits = (ubyte)0x00;
1112                                 }
1113                                 zbuf++;
1114                                 dbits++;
1115                                 tmp_u += du;
1116                         }
1117                 } else {
1118 #ifdef PLAT_UNIX
1119                         STUB_FUNCTION;
1120 #else
1121                         _asm push esi
1122                         _asm push edi
1123                         _asm push ecx
1124                         _asm push ebx
1125                         _asm push eax
1126                         _asm mov ecx, lookup
1127                         _asm mov esi, sbits
1128                         _asm mov edi, dbits
1129                         _asm mov eax, cc
1130                         _asm call eax
1131                         _asm pop eax
1132                         _asm pop ebx
1133                         _asm pop ecx
1134                         _asm pop edi
1135                         _asm pop esi
1136 #endif
1137                 }
1138 #else   
1139                 if ( Gr_scaler_zbuffering && gr_zbuffering )    {
1140                         int x, tmp_u;
1141                         tmp_u = u;
1142
1143                         uint *zbuf = (uint *)&gr_zbuffer[(uint)dbits-(uint)Tmap.pScreenBits];
1144         
1145                         for (x=0; x<w; x++ )                    {
1146                                 if ( fx_w > *zbuf )     {
1147                                         uint c = sbits[ tmp_u >> 16 ];
1148                                         *dbits = Current_alphacolor->table.lookup[c][*dbits];
1149                                 }
1150                                 zbuf++;
1151                                 dbits++;
1152                                 tmp_u += du;
1153                         }
1154                 } else {
1155                         int x, tmp_u;
1156                         tmp_u = u;
1157                         for (x=0; x<w; x++ )                    {
1158                                 uint c = sbits[ tmp_u >> 16 ];
1159                                 *dbits = Current_alphacolor->table.lookup[c][*dbits];
1160                                 dbits++;
1161                                 tmp_u += du;
1162                         }
1163                 }
1164 #endif
1165                 v += dv;
1166         }
1167
1168         gr_unlock();
1169
1170         bm_unlock(gr_screen.current_bitmap);
1171 }
1172