]> icculus.org git repositories - taylor/freespace2.git/blob - src/graphics/scaler.cpp
Screenshot function filled, will output into ~/.freespace(2)/Data
[taylor/freespace2.git] / src / graphics / scaler.cpp
1 /*
2  * Copyright (C) Volition, Inc. 1999.  All rights reserved.
3  *
4  * All source code herein is the property of Volition, Inc. You may not sell 
5  * or otherwise commercially exploit the source or things you created based on
6  * the source.
7  */
8
9 /*
10  * $Logfile: /Freespace2/code/Graphics/Scaler.cpp $
11  * $Revision$
12  * $Date$
13  * $Author$
14  *
15  * Routines to scale a bitmap.
16  *
17  * $Log$
18  * Revision 1.4  2002/07/13 06:46:48  theoddone33
19  * Warning cleanups
20  *
21  * Revision 1.3  2002/06/09 04:41:18  relnev
22  * added copyright header
23  *
24  * Revision 1.2  2002/06/05 08:05:29  relnev
25  * stub/warning removal.
26  *
27  * reworked the sound code.
28  *
29  * Revision 1.1.1.1  2002/05/03 03:28:09  root
30  * Initial import.
31  *
32  * 
33  * 9     7/20/99 1:49p Dave
34  * Peter Drake build. Fixed some release build warnings.
35  * 
36  * 8     6/22/99 7:03p Dave
37  * New detail options screen.
38  * 
39  * 7     5/12/99 5:33p Johne
40  * Don't use gr8_scaler() in pofview.
41  * 
42  * 6     5/09/99 6:00p Dave
43  * Lots of cool new effects. E3 build tweaks.
44  * 
45  * 5     1/14/99 6:06p Dave
46  * 100% full squad logo support for single player and multiplayer.
47  * 
48  * 4     1/14/99 12:48a Dave
49  * Todo list bug fixes. Made a pass at putting briefing icons back into
50  * FRED. Sort of works :(
51  * 
52  * 3     11/30/98 1:07p Dave
53  * 16 bit conversion, first run.
54  * 
55  * 2     10/07/98 10:53a Dave
56  * Initial checkin.
57  * 
58  * 1     10/07/98 10:49a Dave
59  * 
60  * 40    4/02/98 2:01p Dave
61  * JAS: Increased constant for source of compiled code
62  * 
63  * 39    4/01/98 9:21p John
64  * Made NDEBUG, optimized build with no warnings or errors.
65  * 
66  * 38    4/01/98 7:15p John
67  * fixed bug with previous
68  * 
69  * 37    4/01/98 6:45p John
70  * Reduced memory by combining compled_code ptrs.
71  * 
72  * 36    3/22/98 3:28p John
73  * Added in stippled alpha for lower details.  Made medium detail use
74  * engine glow.
75  * 
76  * 35    3/10/98 4:18p John
77  * Cleaned up graphics lib.  Took out most unused gr functions.   Made D3D
78  * & Glide have popups and print screen.  Took out all >8bpp software
79  * support.  Made Fred zbuffer.  Made zbuffer allocate dynamically to
80  * support Fred.  Made zbuffering key off of functions rather than one
81  * global variable.
82  * 
83  * 34    2/05/98 9:21p John
84  * Some new Direct3D code.   Added code to monitor a ton of stuff in the
85  * game.
86  * 
87  * 33    1/27/98 10:18a John
88  * fixed warning for optimized build
89  * 
90  * 32    1/26/98 5:12p John
91  * Added in code for Pentium Pro specific optimizations. Speed up
92  * zbuffered correct tmapper about 35%.   Speed up non-zbuffered scalers
93  * by about 25%.
94  * 
95  * 31    1/19/98 6:15p John
96  * Fixed all my Optimized Build compiler warnings
97  * 
98  * 30    12/04/97 12:09p John
99  * Made glows use scaler instead of tmapper so they don't rotate.  Had to
100  * add a zbuffered scaler.
101  * 
102  * 29    12/02/97 4:00p John
103  * Added first rev of thruster glow, along with variable levels of
104  * translucency, which retquired some restructing of palman.
105  * 
106  * 28    11/30/97 4:33p John
107  * added 32-bpp aascaler
108  * 
109  * 27    11/30/97 3:57p John
110  * Made fixed 32-bpp translucency.  Made BmpMan always map translucent
111  * color into 255 even if you aren't supposed to remap and make it's
112  * palette black.
113  * 
114  * 26    11/30/97 12:18p John
115  * added more 24 & 32-bpp primitives
116  * 
117  * 25    11/29/97 2:06p John
118  * added mode 16-bpp support
119  * 
120  * 24    11/14/97 12:30p John
121  * Fixed some DirectX bugs.  Moved the 8-16 xlat tables into Graphics
122  * libs.  Made 16-bpp DirectX modes know what bitmap format they're in.
123  * 
124  * 23    10/19/97 12:55p John
125  * new code to lock / unlock surfaces for smooth directx integration.
126  * 
127  * 22    10/15/97 4:48p John
128  * added 16-bpp aascaler
129  * 
130  * 21    10/14/97 8:08a John
131  * added a bunch more 16 bit support
132  * 
133  * 20    10/09/97 5:23p John
134  * Added support for more 16-bpp functions
135  * 
136  * 19    8/04/97 4:47p John
137  * added gr_aascaler.
138  * 
139  * 18    7/28/97 11:31a John
140  * made compiled code save all registers that it changes.  When building
141  * optimized, my code was using EBX, and so was the compiler, so weird
142  * errors happened.  Pushing/popping ebx fixed this.
143  * 
144  * 17    7/16/97 5:29p John
145  * added palette table caching and made scaler and liner no light tmapper
146  * do alpha blending in 8 bpp mode.
147  * 
148  * 16    7/10/97 2:06p John
149  * added code to specify alphablending type for bitmaps.
150  * 
151  * 15    6/12/97 2:50a Lawrance
152  * bm_unlock() now passed bitmap number, not pointer
153  * 
154  * 14    5/29/97 3:10p John
155  * Took out debug menu.  
156  * Made software scaler draw larger bitmaps.
157  * Optimized Direct3D some.
158  * 
159  * 13    5/12/97 12:27p John
160  * Restructured Graphics Library to add support for multiple renderers.
161  * 
162  * 12    12/04/96 2:02p John
163  * Added fast compiled code to the scaler in 8,16,32 bpp modes.
164  * 
165  * 11    12/03/96 8:08p John
166  * Added compiled code to 8bpp scaler.  Made bitmaps that are trying to
167  * scale up too big to not draw.
168  * 
169  * 10    12/03/96 11:12a John
170  * added commented out "filtering" code to scaler.
171  * 
172  * 9     11/19/96 2:42p Allender
173  * fix up 32 bit scaler
174  * 
175  * 8     11/15/96 11:27a Allender
176  * 16bpp version of scaler
177  * 
178  * 7     11/07/96 6:19p John
179  * Added a bunch of 16bpp primitives so the game sort of runs in 16bpp
180  * mode.
181  * 
182  * 6     10/26/96 1:40p John
183  * Added some now primitives to the 2d library and
184  * cleaned up some old ones.
185  *
186  * $NoKeywords: $
187  */
188
189 #include <math.h>
190 #include <limits.h>
191 #include <stdio.h>
192 #ifndef PLAT_UNIX
193 #include <conio.h>
194 #endif
195 #include <stdlib.h>
196
197 #include "scaler.h"
198 #include "2d.h"
199 #include "grinternal.h"
200 #include "floating.h"
201 #include "bmpman.h"
202 #include "palman.h"
203 #include "tmapscanline.h"
204 #include "systemvars.h"
205 #include "key.h"
206 #include "colors.h"
207
208 #define MIN_SCALE_FACTOR 0.0001f
209
210 #define USE_COMPILED_CODE
211
212 #define TRANSPARENCY_COLOR_8            0xff
213 #define TRANSPARENCY_COLOR_16           0xffff
214 #define TRANSPARENCY_COLOR_32           0xffffffff
215
216 #define FIND_SCALED_NUM(x,x0,x1,y0,y1) (((((x)-(x0))*((y1)-(y0)))/((x1)-(x0)))+(y0))
217
218 #define MAX_CODE_SIZE 32768             //65536 JAS: Determed to be 8208 on April1,98, 16K seems safe
219
220 ubyte compiled_code[MAX_CODE_SIZE];
221
222 #ifdef FIND_MAX_SIZE
223 static int Max_size = 0;
224 #endif
225
226 /*
227 void test_code()
228 {
229         _asm mov ax, [esi+0xabcdef12]
230         _asm cmp ax, 255
231         _asm je  0xabcdef12
232         _asm mov [edi+0xabcdef12], ax
233         _asm mov ax, [esi+0xabcdef12]
234 }
235 */
236
237
238
239 //----------------------------------------------------
240 // scaler_create_compiled_code8
241 //
242 // Creates code that looks like:
243 //
244 // @@: mov al, [esi+????]
245 //     cmp al, TRANSPARENCY_COLOR_8
246 //     je  @f   ; jump to next @@ label
247 //     mov [edi+???], al    ; If the source pixel is scaled up
248 //     mov [edi+???], al    ; there might be a lot of these lines
249 //     ...
250 // @@: mov al, [esi+????]
251 //
252
253 ubyte *scaler_create_compiled_code8( int w, fix u, fix du )
254 {
255         int last_u, x;
256         ubyte * cc;
257         uint * last_jmp_pos;
258
259         cc = compiled_code;
260
261         //if ( abs(du) < F1_0 / 4 ) *cc++ = 0xCC;
262
263 //      *cc++ = 0xCC;   // Int3
264 //      *cc++ = 0xc3;   // RET
265
266         last_u = -1;
267
268         last_jmp_pos=NULL;
269
270         for (x=0; x<w; x++ )                    {
271                 if ( last_u != f2i(u) ) {
272                         if ( last_jmp_pos )     {
273                                 *last_jmp_pos = (uint)cc - (uint)last_jmp_pos - 4;
274                         }
275                         *cc++ = 0x8a;   *cc++ = 0x86; // mov al, [esi+imm]
276                         *(uint *)cc = f2i(u); cc += 4;
277                         last_u = f2i(u);
278
279                         *cc++ = 0x3c; *cc++ = TRANSPARENCY_COLOR_8;     // cmp al, 255
280                         *cc++ = 0x0f; *cc++ = 0x84;   // je rel32
281                         last_jmp_pos = (uint *)cc;
282                         cc += 4;                
283                 }
284                 
285         
286                 *cc++ = 0x88;   *cc++ = 0x87; // mov [edi+imm], al
287                 *(uint *)cc = x; cc += 4;
288
289                 u += du;
290         }
291         if ( last_jmp_pos )     {
292                 *last_jmp_pos = (uint)cc - (uint)last_jmp_pos - 4;
293         }
294         *cc++ = 0xc3;   // RET
295
296         if ( cc >= &compiled_code[MAX_CODE_SIZE] )
297                 Int3();         // GET JOHN NOW!
298
299 #ifdef FIND_MAX_SIZE
300         int size = cc - compiled_code;
301         if ( size > Max_size )  {
302                 Max_size = size;
303                 mprintf(( "Max size = %d\n", size ));
304         }
305 #endif
306
307         return compiled_code;
308 }
309
310 ubyte *scaler_create_compiled_code8_stippled( int w, fix u, fix du )
311 {
312         int last_u, x;
313         ubyte * cc;
314         uint * last_jmp_pos;
315
316         cc = compiled_code;
317
318         //if ( abs(du) < F1_0 / 4 ) *cc++ = 0xCC;
319
320 //      *cc++ = 0xCC;   // Int3
321 //      *cc++ = 0xc3;   // RET
322
323         last_u = -1;
324
325         last_jmp_pos=NULL;
326
327         for (x=0; x<w-1; x+=2 )                 {
328                 if ( last_u != f2i(u) ) {
329                         if ( last_jmp_pos )     {
330                                 *last_jmp_pos = (uint)cc - (uint)last_jmp_pos - 4;
331                         }
332                         *cc++ = 0x8a;   *cc++ = 0x86; // mov al, [esi+imm]
333                         *(uint *)cc = f2i(u); cc += 4;
334                         last_u = f2i(u);
335
336                         *cc++ = 0x3c; *cc++ = TRANSPARENCY_COLOR_8;     // cmp al, 255
337                         *cc++ = 0x0f; *cc++ = 0x84;   // je rel32
338                         last_jmp_pos = (uint *)cc;
339                         cc += 4;                
340                 }
341                 
342         
343                 *cc++ = 0x88;   *cc++ = 0x87; // mov [edi+imm], al
344                 *(uint *)cc = x; cc += 4;
345
346                 u += du*2;
347         }
348         if ( last_jmp_pos )     {
349                 *last_jmp_pos = (uint)cc - (uint)last_jmp_pos - 4;
350         }
351         *cc++ = 0xc3;   // RET
352
353         if ( cc >= &compiled_code[MAX_CODE_SIZE] )
354                 Int3();         // GET JOHN NOW!
355
356 #ifdef FIND_MAX_SIZE
357         int size = cc - compiled_code;
358         if ( size > Max_size )  {
359                 Max_size = size;
360                 mprintf(( "Max size = %d\n", size ));
361         }
362 #endif
363
364         return compiled_code;
365 }
366
367 void test_code1()
368 {
369 #ifdef PLAT_UNIX
370         STUB_FUNCTION;
371 #else
372         _asm mov ebx, -1
373         _asm xor eax, eax
374         _asm xor ebx, ebx
375         _asm mov        bl, BYTE PTR [edi-1412567278]
376         _asm add ebx, eax
377         _asm mov ebx, [ecx+ebx] ; blend it
378         _asm cmp ebp, [edx]
379         _asm add edx, 4
380         _asm jl [0xABCDEF12]
381         
382 //     xor eax, eax                     ; avoid ppro partial register stall
383 //     mov ah, [esi+????]   ; get the foreground pixel
384 //     ; the following lines might be repeated
385 //     xor ebx, ebx                     ; avoid ppro partial register stall
386 //     mov bl, [edi+????]   ; get the background pixel
387 //     mov ebx, [ecx+ebx]       ; blend it
388 //     mov [edi+????], bl   ; write it
389 #endif
390 }
391
392 /*
393   00130 b8 00 00 00 00  mov     eax, 0
394   00135 8a a6 12 ef cd ab               mov     ah, BYTE PTR [esi-1412567278]
395   0013b 8a 87 12 ef cd ab               mov     al, BYTE PTR [edi-1412567278]
396   00141 8a 1c 01                    mov bl, BYTE PTR [ecx+eax]
397   00141 8b 1c 01                                        mov     ebx, DWORD PTR [ecx+eax]
398   00144 88 9f 12 ef cd ab               mov     BYTE PTR [edi-1412567278], bl
399
400
401   00130 33 c0           xor     eax, eax
402   00132 33 db           xor     ebx, ebx
403   00134 8a 9f 12 ef cd  ab              mov     bl, BYTE PTR [edi-1412567278]
404   0013a 03 d8           add     ebx, eax
405   0013c 8b 1c 19        mov     ebx, DWORD PTR [ecx+ebx]
406
407   0013f 3b 2a           cmp     ebp, DWORD PTR [edx]
408   00141 83 c2 04        add     edx, 4
409
410
411 */
412
413 //----------------------------------------------------
414 // scaler_create_compiled_code8_alpha
415 //
416 // Creates code that looks like:
417
418 //=============== Pentium ======================
419 // mov eax, 0
420 //     mov ah, [esi+????]   ; get the foreground pixel
421 //     ; the following lines might be repeated
422 //     mov al, [edi+????]   ; get the background pixel
423 //     mov bl, [ecx+eax]        ; blend it
424 //     mov [edi+????], bl   ; write it
425 //     ...
426
427 //============= Pentium Pro code =============
428 //     xor eax, eax                     ; avoid ppro partial register stall
429 //     mov ah, [esi+????]   ; get the foreground pixel
430 //     ; the following lines might be repeated
431 //     xor ebx, ebx                     ; avoid ppro partial register stall
432 //     mov bl, [edi+????]   ; get the background pixel
433 //     mov ebx, [ecx+ebx]       ; blend it
434 //     mov [edi+????], bl   ; write it
435
436
437 ubyte *scaler_create_compiled_code8_alpha( int w, fix u, fix du )
438 {
439         int last_u, x;
440         ubyte * cc;
441
442         cc = compiled_code;
443
444         //if ( abs(du) < F1_0 / 4 ) *cc++ = 0xCC;
445
446         //*cc++ = 0xCC; // Int3
447         //*cc++ = 0xc3; // RET
448
449         last_u = -1;
450
451         if ( Gr_cpu     > 5 )   {
452                 // Pentium Pro optimized code.
453
454                 for (x=0; x<w; x++ )                    {
455                         if ( last_u != f2i(u) ) {
456                                 *cc++ = 0x33;   *cc++ = 0xc0; // xor eax, eax
457                                 *cc++ = 0x8a;   *cc++ = 0xa6; // mov ah, [esi+imm]
458                                 //*cc++ = 0x8a; *cc++ = 0x86; // mov al, [esi+imm]
459                                 *(uint *)cc = f2i(u); cc += 4;
460                                 last_u = f2i(u);
461                         }
462                         
463                         *cc++ = 0x33;   *cc++ = 0xdb;           // xor ebx, ebx
464                         
465                         *cc++ = 0x8a;   *cc++ = 0x9f; 
466                         *(uint *)cc = x; cc += 4;               // mov bl, [edi+imm]
467
468                         *cc++ = 0x03;   *cc++ = 0xd8;           // add ebx, eax
469
470                         *cc++ = 0x8b; *cc++ = 0x1c; *cc++ = 0x19;       // mov  ebx, BYTE PTR [ecx+ebx]
471
472                         *cc++ = 0x88;   *cc++ = 0x9f; 
473                         *(uint *)cc = x; cc += 4;               // mov [edi+imm], bl
474
475                         u += du;
476                 }
477         } else {
478                 // Pentium optimized code.
479
480                 *cc++ = 0xb8; *(uint *)cc = 0; cc += 4;         // mov eax, 0
481
482                 for (x=0; x<w; x++ )                    {
483                         if ( last_u != f2i(u) ) {
484                                 *cc++ = 0x8a;   *cc++ = 0xa6; // mov ah, [esi+imm]
485                                 *(uint *)cc = f2i(u); cc += 4;
486                                 last_u = f2i(u);
487                         }
488                         
489                         *cc++ = 0x8a;   *cc++ = 0x87; 
490                         *(uint *)cc = x; cc += 4;               // mov al, [edi+imm]
491
492                         *cc++ = 0x8a; *cc++ = 0x1c; *cc++ = 0x01;       // mov  bl, BYTE PTR [ecx+eax]
493
494                         *cc++ = 0x88;   *cc++ = 0x9f; 
495                         *(uint *)cc = x; cc += 4;               // mov [edi+imm], bl
496
497                         u += du;
498                 }
499         }
500
501         *cc++ = 0xc3;   // RET
502
503         if ( cc >= &compiled_code[MAX_CODE_SIZE] )
504                 Int3();         // GET JOHN NOW!
505
506 #ifdef FIND_MAX_SIZE
507         int size = cc - compiled_code;
508         if ( size > Max_size )  {
509                 Max_size = size;
510                 mprintf(( "Max size = %d\n", size ));
511         }
512 #endif
513
514         return compiled_code;
515 }
516
517 /*
518                                 for (x=0; x<w; x++ )                    {
519                                         if ( fx_w > *zbuf )     {
520                                                 uint c = sbits[ tmp_u >> 16 ]<<8;
521                                                 *dbits = *((ubyte *)(lookup + (*dbits | c)));
522                                         }
523                                         dbits++;
524                                         zbuf++;
525                                         tmp_u += du;
526                                 }
527 */
528
529 //----------------------------------------------------
530 // scaler_create_compiled_code8_alpha_zbuffered
531 //
532 // Creates code that looks like:
533 // mov eax, 0
534 //     mov ah, [esi+????]   ; get the foreground pixel
535 //     ; the following lines might be repeated
536 //     cmp      fx_w, [edx+?????]
537 //     jle  @f
538 //     mov al, [edi+????]   ; get the background pixel
539 //     mov bl, [ecx+eax]        ; blend it
540 //     mov [edi+????], bl   ; write it
541 //  @@:
542 //     ...
543
544
545
546
547 //void test_code1()
548 //{
549 //      _asm cmp 0xFFFFFFFF, [edx+0xabcdef12]
550 //      _asm cmp ebp, [edx+0xabcdef12]
551 //      _asm jle        0xabcdef12
552 //}
553 //; 302  :      _asm cmp ebp, [edx+0xabcdef12]
554 //  00244       3b aa 12 ef cd ab               cmp     ebp, DWORD PTR [edx-1412567278]
555 //; 303  :      _asm jle        0xabcdef12
556 //  0024a       0f 8e 12 ef cd ab               jle     -1412567278             ; abcdef12H
557
558 ubyte *scaler_create_compiled_code8_alpha_zbuffered( int w, fix u, fix du )
559 {
560         int last_u, x;
561         ubyte * cc;
562         uint *last_jmp_pos=NULL;
563
564         cc = compiled_code;
565
566         //     xor eax, eax                     ; avoid ppro partial register stall
567 //     mov ah, [esi+????]   ; get the foreground pixel
568 //     ; the following lines might be repeated
569 //     xor ebx, ebx                     ; avoid ppro partial register stall
570 //     mov bl, [edi+????]   ; get the background pixel
571 //     mov ebx, [ecx+ebx]       ; blend it
572 //     mov [edi+????], bl   ; write it
573
574         //if ( abs(du) < F1_0 / 4 ) *cc++ = 0xCC;
575
576         //*cc++ = 0xCC; // Int3
577         //*cc++ = 0xc3; // RET
578         last_u = -1;
579
580         if ( Gr_cpu     > 5 )   {
581                 // Pentium Pro optimized code.
582
583                 for (x=0; x<w; x++ )                    {
584                         if ( last_u != f2i(u) ) {
585                                 *cc++ = 0x33;   *cc++ = 0xc0; // xor eax, eax
586                                 *cc++ = 0x8a;   *cc++ = 0xa6; // mov ah, [esi+imm]
587                                 *(uint *)cc = f2i(u); cc += 4;
588                                 last_u = f2i(u);
589                         }
590
591                         *cc++ = 0x3b;  *cc++ = 0xaa;    
592                         *(uint *)cc = x*4; cc += 4;             // cmp ebp, [edx+imm]
593
594 //                      *cc++ = 0x3b;  *cc++ = 0x2a;                                            // cmp ebp, [edx]
595 //                      *cc++ = 0x83;  *cc++ = 0xc2;  *cc++ = 0x4;      // add edx, 4
596
597                         *cc++ = 0x0f;  *cc++ = 0x8e;            // jle (8e) imm
598                         last_jmp_pos = (uint *)cc;
599                         *(uint *)cc = 0; cc += 4;
600                 
601                         *cc++ = 0x33;   *cc++ = 0xdb;           // xor ebx, ebx
602                         
603                         *cc++ = 0x8a;   *cc++ = 0x9f; 
604                         *(uint *)cc = x; cc += 4;               // mov bl, [edi+imm]
605
606                         *cc++ = 0x03;   *cc++ = 0xd8;           // add ebx, eax
607
608                         *cc++ = 0x8b; *cc++ = 0x1c; *cc++ = 0x19;       // mov  ebx, BYTE PTR [ecx+ebx]
609
610                         *cc++ = 0x88;   *cc++ = 0x9f; 
611                         *(uint *)cc = x; cc += 4;               // mov [edi+imm], bl
612
613                         if ( last_jmp_pos )     {
614                                 *last_jmp_pos = (uint)cc - (uint)last_jmp_pos - 4;
615                                 last_jmp_pos  = NULL;
616                         }
617
618                         u += du;
619                 }
620
621
622         } else {
623                 // Pentium optimized code.
624
625                 *cc++ = 0xb8; *(uint *)cc = 0; cc += 4;         // mov eax, 0
626
627                 for (x=0; x<w; x++ )                    {
628                         if ( last_u != f2i(u) ) {
629                                 *cc++ = 0x8a;   *cc++ = 0xa6; // mov ah, [esi+imm]
630                                 *(uint *)cc = f2i(u); cc += 4;
631                                 last_u = f2i(u);
632                         }
633
634                         *cc++ = 0x3b;  *cc++ = 0xaa;    
635                         *(uint *)cc = x*4; cc += 4;             // cmp ebp, [edx+imm]
636
637                         *cc++ = 0x0f;  *cc++ = 0x8e;            // jle imm
638                         last_jmp_pos = (uint *)cc;
639                         *(uint *)cc = 0; cc += 4;               
640                         
641                         *cc++ = 0x8a;   *cc++ = 0x87; 
642                         *(uint *)cc = x; cc += 4;               // mov al, [edi+imm]
643
644                         *cc++ = 0x8a; *cc++ = 0x1c; *cc++ = 0x01;       // mov  bl, BYTE PTR [ecx+eax]
645
646                         *cc++ = 0x88;   *cc++ = 0x9f; 
647                         *(uint *)cc = x; cc += 4;               // mov [edi+imm], bl
648
649                         if ( last_jmp_pos )     {
650                                 *last_jmp_pos = (uint)cc - (uint)last_jmp_pos - 4;
651                                 last_jmp_pos = NULL;
652                         }
653
654                         u += du;
655                 }
656         }
657         *cc++ = 0xc3;   // RET
658
659         if ( cc >= &compiled_code[MAX_CODE_SIZE] )
660                 Int3();         // GET JOHN NOW!
661
662 #ifdef FIND_MAX_SIZE
663         int size = cc - compiled_code;
664         if ( size > Max_size )  {
665                 Max_size = size;
666                 mprintf(( "Max sizeZ = %d\n", size ));
667         }
668 #endif
669
670         return compiled_code;
671 }
672
673
674
675 int Gr_scaler_zbuffering = 0;
676 uint Gr_global_z;
677
678 MONITOR( ScalerNumCalls );      
679
680
681 //----------------------------------------------------
682 // Scales current bitmap, between va and vb
683 void gr8_scaler(vertex *va, vertex *vb )
684 {
685 #if 1
686         if(Pofview_running){
687                 return;
688         }
689
690         float x0, y0, x1, y1;
691         float u0, v0, u1, v1;
692         float clipped_x0, clipped_y0, clipped_x1, clipped_y1;
693         float clipped_u0, clipped_v0, clipped_u1, clipped_v1;
694         float xmin, xmax, ymin, ymax;
695         int dx0, dy0, dx1, dy1;
696
697         MONITOR_INC( ScalerNumCalls, 1 );       
698
699         //============= CLIP IT =====================
700
701         x0 = va->sx; y0 = va->sy;
702         x1 = vb->sx; y1 = vb->sy;
703
704         xmin = i2fl(gr_screen.clip_left); ymin = i2fl(gr_screen.clip_top);
705         xmax = i2fl(gr_screen.clip_right); ymax = i2fl(gr_screen.clip_bottom);
706
707         u0 = va->u; v0 = va->v;
708         u1 = vb->u; v1 = vb->v;
709
710         // Check for obviously offscreen bitmaps...
711         if ( (y1<=y0) || (x1<=x0) ) return;
712         if ( (x1<xmin ) || (x0>xmax) ) return;
713         if ( (y1<ymin ) || (y0>ymax) ) return;
714
715         clipped_u0 = u0; clipped_v0 = v0;
716         clipped_u1 = u1; clipped_v1 = v1;
717
718         clipped_x0 = x0; clipped_y0 = y0;
719         clipped_x1 = x1; clipped_y1 = y1;
720
721         // Clip the left, moving u0 right as necessary
722         if ( x0 < xmin )        {
723                 clipped_u0 = FIND_SCALED_NUM(xmin,x0,x1,u0,u1);
724                 clipped_x0 = xmin;
725         }
726
727         // Clip the right, moving u1 left as necessary
728         if ( x1 > xmax )        {
729                 clipped_u1 = FIND_SCALED_NUM(xmax,x0,x1,u0,u1);
730                 clipped_x1 = xmax;
731         }
732
733         // Clip the top, moving v0 down as necessary
734         if ( y0 < ymin )        {
735                 clipped_v0 = FIND_SCALED_NUM(ymin,y0,y1,v0,v1);
736                 clipped_y0 = ymin;
737         }
738
739         // Clip the bottom, moving v1 up as necessary
740         if ( y1 > ymax )        {
741                 clipped_v1 = FIND_SCALED_NUM(ymax,y0,y1,v0,v1);
742                 clipped_y1 = ymax;
743         }
744         
745         dx0 = fl2i(clipped_x0); dx1 = fl2i(clipped_x1);
746         dy0 = fl2i(clipped_y0); dy1 = fl2i(clipped_y1);
747
748         if (dx1<=dx0) return;
749         if (dy1<=dy0) return;
750
751         //============= DRAW IT =====================
752         int u, v, du, dv;
753         int y, w;
754         ubyte * sbits, * dbits;
755         bitmap * bp;
756         ubyte * spixels;
757         float tmpu, tmpv;
758
759         tmpu = (clipped_u1-clipped_u0) / (dx1-dx0);
760         if ( fl_abs(tmpu) < MIN_SCALE_FACTOR ) {
761                 return;         // scaled up way too far!
762         }
763         tmpv = (clipped_v1-clipped_v0) / (dy1-dy0);
764         if ( fl_abs(tmpv) < MIN_SCALE_FACTOR ) {
765                 return;         // scaled up way too far!
766         }
767
768         int is_stippled = 0;
769
770         /*
771         if ( !Detail.alpha_effects )    {
772                 is_stippled = 1;
773                 Gr_scaler_zbuffering = 0;
774         }
775         */
776         
777         if ( is_stippled )      {
778                 bp = bm_lock( gr_screen.current_bitmap, 8, 0 );
779         } else {
780                 bp = bm_lock( gr_screen.current_bitmap, 8, 0 );
781         }
782
783
784         du = fl2f(tmpu*(bp->w-1));
785         dv = fl2f(tmpv*(bp->h-1));
786
787         v = fl2f(clipped_v0*(bp->h-1));
788         u = fl2f(clipped_u0*(bp->w-1)); 
789         w = dx1 - dx0 + 1;
790         if ( w < 2 ) {
791                 bm_unlock(gr_screen.current_bitmap);
792                 return;
793         }
794
795         uint fx_w = 0;
796         if ( Gr_scaler_zbuffering && gr_zbuffering )    {
797                 fx_w = (uint)fl2i(va->sw * GR_Z_RANGE)+gr_zoffset;
798                 Gr_global_z = fx_w;
799         }
800
801 #ifdef USE_COMPILED_CODE
802         ubyte *cc=NULL;
803
804         if ( Gr_scaler_zbuffering && gr_zbuffering )    {
805                 if ( gr_screen.current_alphablend_mode == GR_ALPHABLEND_FILTER )        {
806                         cc = scaler_create_compiled_code8_alpha_zbuffered( w, u, du );  
807                 }
808         } else {
809                 if ( gr_screen.current_alphablend_mode == GR_ALPHABLEND_FILTER )        {
810                         if ( is_stippled )      {
811                                 cc = scaler_create_compiled_code8_stippled( w, u, du );
812                         } else {
813                                 cc = scaler_create_compiled_code8_alpha( w, u, du );    
814                         }
815                 } else  {
816                         cc = scaler_create_compiled_code8( w, u, du );
817                 }
818         }
819         
820 #endif
821
822         spixels = (ubyte *)bp->data;
823
824         gr_lock();
825         Tmap.pScreenBits = (uint)gr_screen.offscreen_buffer_base;
826
827         uint *zbuf;
828
829         for (y=dy0; y<=dy1; v += dv, y++ )                      {
830                 if ( is_stippled && (y&1) )     {
831                         sbits = &spixels[bp->rowsize*(v>>16)+f2i(du)];
832                         dbits = GR_SCREEN_PTR(ubyte,dx0+1,y);
833                 } else {
834                         sbits = &spixels[bp->rowsize*(v>>16)];
835                         dbits = GR_SCREEN_PTR(ubyte,dx0,y);
836                 }
837                 uint lookup = 0;
838
839                 if ( gr_screen.current_alphablend_mode == GR_ALPHABLEND_FILTER )        {
840                         lookup = (uint)palette_get_blend_table(gr_screen.current_alpha);
841                 }
842
843                 if ( Gr_scaler_zbuffering && gr_zbuffering )    {
844                         zbuf = (uint *)&gr_zbuffer[(uint)dbits-(uint)Tmap.pScreenBits];
845                 }
846         
847 #ifdef USE_COMPILED_CODE
848                 // Call the compiled code to draw one scanline
849                 if ( Gr_scaler_zbuffering &&  gr_zbuffering && (gr_screen.current_alphablend_mode != GR_ALPHABLEND_FILTER))     {                       
850                         Int3();
851
852                         /*
853                         int x, tmp_u;
854                         tmp_u = u;
855
856                         for (x=0; x<w; x++ )                    {
857                                 if ( fx_w > *zbuf )     {
858                                         ubyte c = sbits[ tmp_u >> 16 ];
859                                         if ( c != TRANSPARENCY_COLOR_8 ) *dbits = c;
860                                 }
861                                 zbuf++;
862                                 dbits++;
863                                 tmp_u += du;
864                         }
865                         */
866                 } else {
867 /*                      {
868                                 int x, tmp_u;
869                                 tmp_u = u;
870
871         
872                                 for (x=0; x<w; x++ )                    {
873                                         if ( fx_w > *zbuf )     {
874                                                 uint c = sbits[ tmp_u >> 16 ]<<8;
875                                                 *dbits = *((ubyte *)(lookup + (*dbits | c)));
876                                         }
877                                         dbits++;
878                                         zbuf++;
879                                         tmp_u += du;
880                                 }
881                         } 
882 */
883 #ifdef PLAT_UNIX
884                         STUB_FUNCTION;
885 #else
886                         _asm push esi
887                         _asm push edi
888                         _asm push edx
889                         _asm push ecx
890                         _asm push ebx
891                         _asm push eax
892                         _asm mov ecx, lookup
893                         _asm mov esi, sbits
894                         _asm mov edi, dbits
895                         _asm mov eax, cc
896                         _asm mov edx, zbuf
897                         _asm push ebp
898                         _asm mov ebp, Gr_global_z
899                         _asm call eax
900                         _asm pop ebp
901                         _asm pop eax
902                         _asm pop ebx
903                         _asm pop ecx
904                         _asm pop edx
905                         _asm pop edi
906                         _asm pop esi
907 #endif
908                 }
909 #else   
910                 if ( gr_screen.current_alphablend_mode == GR_ALPHABLEND_FILTER )        {
911                         if ( Gr_scaler_zbuffering && gr_zbuffering )    {
912                                 int x, tmp_u;
913                                 tmp_u = u;
914
915                                 for (x=0; x<w; x++ )                    {
916                                         if ( fx_w > *zbuf )     {
917                                                 uint c = sbits[ tmp_u >> 16 ]<<8;
918                                                 *dbits = *((ubyte *)(lookup + (*dbits | c)));
919                                         }
920                                         dbits++;
921                                         zbuf++;
922                                         tmp_u += du;
923                                 }
924                         } else {
925                                 int x, tmp_u;
926                                 tmp_u = u;
927                                 for (x=0; x<w; x++ )                    {
928                                         uint c = sbits[ tmp_u >> 16 ]<<8;
929                                         *dbits++ = palette_blend[*dbits|c];
930                                         tmp_u += du;
931                                 }
932                         }
933                 } else {
934                         if ( Gr_scaler_zbuffering && gr_zbuffering )    {
935                                 int x, tmp_u;
936                                 tmp_u = u;
937                         
938                                 for (x=0; x<w; x++ )                    {
939                                         if ( fx_w > *zbuf )     {
940                                                 ubyte c = sbits[ tmp_u >> 16 ];
941                                                 if ( c != TRANSPARENCY_COLOR_8 ) *dbits = c;
942                                         }
943                                         zbuf++;
944                                         dbits++;
945                                         tmp_u += du;
946                                 }
947                         } else {
948                                 int x, tmp_u;
949                                 tmp_u = u;
950                                 for (x=0; x<w; x++ )                    {
951                                         ubyte c = sbits[ tmp_u >> 16 ];
952                                         if ( c != TRANSPARENCY_COLOR_8 ) *dbits = c;
953                                         dbits++;
954                                         tmp_u += du;
955                                 }
956                         }
957                 }
958 #endif
959         }
960
961         gr_unlock();
962         bm_unlock(gr_screen.current_bitmap);
963 #endif
964 }
965
966 int aiee = 0;
967 alphacolor_old old_alphac;
968 //----------------------------------------------------
969 // Scales current bitmap, between va and vb
970 void gr8_aascaler(vertex *va, vertex *vb )
971 {
972         float x0, y0, x1, y1;
973         float u0, v0, u1, v1;
974         float clipped_x0, clipped_y0, clipped_x1, clipped_y1;
975         float clipped_u0, clipped_v0, clipped_u1, clipped_v1;
976         float xmin, xmax, ymin, ymax;
977         int dx0, dy0, dx1, dy1;
978
979         //if ( !Current_alphacolor )    return;
980
981         MONITOR_INC( ScalerNumCalls, 1 );       
982
983         Assert(Fred_running);
984         if(!aiee){
985                 old_alphac.used = 1;
986                 old_alphac.r = 93;
987                 old_alphac.g = 93;
988                 old_alphac.b = 128;
989                 old_alphac.alpha = 255;
990                 //ac->type = type;
991                 //ac->clr=clr;
992                 //93, 93, 128, 255
993                 calc_alphacolor_old(&old_alphac);
994                 aiee = 1;
995         }
996
997         //============= CLIP IT =====================
998
999         x0 = va->sx; y0 = va->sy;
1000         x1 = vb->sx; y1 = vb->sy;
1001
1002         xmin = i2fl(gr_screen.clip_left); ymin = i2fl(gr_screen.clip_top);
1003         xmax = i2fl(gr_screen.clip_right); ymax = i2fl(gr_screen.clip_bottom);
1004
1005         u0 = va->u; v0 = va->v;
1006         u1 = vb->u; v1 = vb->v;
1007
1008         // Check for obviously offscreen bitmaps...
1009         if ( (y1<=y0) || (x1<=x0) ) return;
1010         if ( (x1<xmin ) || (x0>xmax) ) return;
1011         if ( (y1<ymin ) || (y0>ymax) ) return;
1012
1013         clipped_u0 = u0; clipped_v0 = v0;
1014         clipped_u1 = u1; clipped_v1 = v1;
1015
1016         clipped_x0 = x0; clipped_y0 = y0;
1017         clipped_x1 = x1; clipped_y1 = y1;
1018
1019         // Clip the left, moving u0 right as necessary
1020         if ( x0 < xmin )        {
1021                 clipped_u0 = FIND_SCALED_NUM(xmin,x0,x1,u0,u1);
1022                 clipped_x0 = xmin;
1023         }
1024
1025         // Clip the right, moving u1 left as necessary
1026         if ( x1 > xmax )        {
1027                 clipped_u1 = FIND_SCALED_NUM(xmax,x0,x1,u0,u1);
1028                 clipped_x1 = xmax;
1029         }
1030
1031         // Clip the top, moving v0 down as necessary
1032         if ( y0 < ymin )        {
1033                 clipped_v0 = FIND_SCALED_NUM(ymin,y0,y1,v0,v1);
1034                 clipped_y0 = ymin;
1035         }
1036
1037         // Clip the bottom, moving v1 up as necessary
1038         if ( y1 > ymax )        {
1039                 clipped_v1 = FIND_SCALED_NUM(ymax,y0,y1,v0,v1);
1040                 clipped_y1 = ymax;
1041         }
1042         
1043         dx0 = fl2i(clipped_x0); dx1 = fl2i(clipped_x1);
1044         dy0 = fl2i(clipped_y0); dy1 = fl2i(clipped_y1);
1045
1046         if (dx1<=dx0) return;
1047         if (dy1<=dy0) return;
1048
1049         //============= DRAW IT =====================
1050         int u, v, du, dv;
1051         int y, w;
1052         ubyte * sbits, * dbits;
1053         bitmap * bp;
1054         ubyte * spixels;
1055         float tmpu, tmpv;
1056
1057         tmpu = (clipped_u1-clipped_u0) / (dx1-dx0);
1058         if ( fl_abs(tmpu) < MIN_SCALE_FACTOR ) {
1059                 return;         // scaled up way too far!
1060         }
1061         tmpv = (clipped_v1-clipped_v0) / (dy1-dy0);
1062         if ( fl_abs(tmpv) < MIN_SCALE_FACTOR ) {
1063                 return;         // scaled up way too far!
1064         }
1065
1066         bp = bm_lock( gr_screen.current_bitmap, 8, BMP_AABITMAP );
1067
1068         du = fl2f(tmpu*(bp->w-1));
1069         dv = fl2f(tmpv*(bp->h-1));
1070
1071         v = fl2f(clipped_v0*(bp->h-1));
1072         u = fl2f(clipped_u0*(bp->w-1)); 
1073         w = dx1 - dx0 + 1;
1074
1075 #ifdef USE_COMPILED_CODE
1076         ubyte *cc;
1077
1078         if ( Gr_scaler_zbuffering && gr_zbuffering )    {
1079                 //cc = scaler_create_compiled_code8_alpha_zbuffered( w, u, du );
1080         } else {
1081                 cc = scaler_create_compiled_code8_alpha( w, u, du );
1082         }
1083
1084 #endif
1085
1086         spixels = (ubyte *)bp->data;
1087
1088         gr_lock();
1089
1090         uint fx_w = 0;
1091         if ( Gr_scaler_zbuffering  && gr_zbuffering )   {
1092                 fx_w = (uint)fl2i(va->sw * GR_Z_RANGE)+gr_zoffset;
1093         }       
1094
1095         for (y=dy0; y<=dy1; y++ )                       {
1096                 sbits = &spixels[bp->rowsize*(v>>16)];
1097                 dbits = GR_SCREEN_PTR(ubyte,dx0,y);
1098
1099 #ifdef USE_COMPILED_CODE                
1100                 // uint lookup = (uint)&Current_alphacolor->table.lookup[0][0];
1101                 //uint lookup = (uint)&old_alphac.table.lookup[0][0];   // Unused - DDOI
1102                 
1103                 // Call the compiled code to draw one scanline
1104                 if ( Gr_scaler_zbuffering  && gr_zbuffering )   {
1105                         int x, tmp_u;
1106                         tmp_u = u;
1107
1108                         uint *zbuf = (uint *)&gr_zbuffer[(uint)dbits-(uint)Tmap.pScreenBits];
1109         
1110                         for (x=0; x<w; x++ )                    {
1111                                 if ( fx_w > *zbuf )     {
1112                                         // uint c = sbits[ tmp_u >> 16 ];
1113                                         // *dbits = Current_alphacolor->table.lookup[c][*dbits];
1114                                         *dbits = (ubyte)0x00;
1115                                 }
1116                                 zbuf++;
1117                                 dbits++;
1118                                 tmp_u += du;
1119                         }
1120                 } else {
1121 #ifdef PLAT_UNIX
1122                         STUB_FUNCTION;
1123 #else
1124                         _asm push esi
1125                         _asm push edi
1126                         _asm push ecx
1127                         _asm push ebx
1128                         _asm push eax
1129                         _asm mov ecx, lookup
1130                         _asm mov esi, sbits
1131                         _asm mov edi, dbits
1132                         _asm mov eax, cc
1133                         _asm call eax
1134                         _asm pop eax
1135                         _asm pop ebx
1136                         _asm pop ecx
1137                         _asm pop edi
1138                         _asm pop esi
1139 #endif
1140                 }
1141 #else   
1142                 if ( Gr_scaler_zbuffering && gr_zbuffering )    {
1143                         int x, tmp_u;
1144                         tmp_u = u;
1145
1146                         uint *zbuf = (uint *)&gr_zbuffer[(uint)dbits-(uint)Tmap.pScreenBits];
1147         
1148                         for (x=0; x<w; x++ )                    {
1149                                 if ( fx_w > *zbuf )     {
1150                                         uint c = sbits[ tmp_u >> 16 ];
1151                                         *dbits = Current_alphacolor->table.lookup[c][*dbits];
1152                                 }
1153                                 zbuf++;
1154                                 dbits++;
1155                                 tmp_u += du;
1156                         }
1157                 } else {
1158                         int x, tmp_u;
1159                         tmp_u = u;
1160                         for (x=0; x<w; x++ )                    {
1161                                 uint c = sbits[ tmp_u >> 16 ];
1162                                 *dbits = Current_alphacolor->table.lookup[c][*dbits];
1163                                 dbits++;
1164                                 tmp_u += du;
1165                         }
1166                 }
1167 #endif
1168                 v += dv;
1169         }
1170
1171         gr_unlock();
1172
1173         bm_unlock(gr_screen.current_bitmap);
1174 }
1175