texmap/tmap_lin.asm

   1 ; $Id: tmap_lin.asm,v 1.3 2004-08-28 23:17:46 schaffner Exp $
   2 ;THE COMPUTER CODE CONTAINED HEREIN IS THE SOLE PROPERTY OF PARALLAX
   3 ;SOFTWARE CORPORATION ("PARALLAX").  PARALLAX, IN DISTRIBUTING THE CODE TO
   4 ;END-USERS, AND SUBJECT TO ALL OF THE TERMS AND CONDITIONS HEREIN, GRANTS A
   5 ;ROYALTY-FREE, PERPETUAL LICENSE TO SUCH END-USERS FOR USE BY SUCH END-USERS
   6 ;IN USING, DISPLAYING,  AND CREATING DERIVATIVE WORKS THEREOF, SO LONG AS
   7 ;SUCH USE, DISPLAY OR CREATION IS FOR NON-COMMERCIAL, ROYALTY OR REVENUE
   8 ;FREE PURPOSES.  IN NO EVENT SHALL THE END-USER USE THE COMPUTER CODE
   9 ;CONTAINED HEREIN FOR REVENUE-BEARING PURPOSES.  THE END-USER UNDERSTANDS
  10 ;AND AGREES TO THE TERMS HEREIN AND ACCEPTS THE SAME BY USE OF THIS FILE.
  11 ;COPYRIGHT 1993-1998 PARALLAX SOFTWARE CORPORATION.  ALL RIGHTS RESERVED.
  12 ;
  13 ;
  14 ; Linearly interpolating texture mapper inner loop
  15 ;
  16 ;
  17
  18 [BITS 32]
  19
  20 global  _asm_tmap_scanline_lin
  21 global  asm_tmap_scanline_lin
  22
  23 [SECTION .data]
  24
  25 %include        "tmap_inc.asm"
  26
  27 _loop_count     dd      0
  28
  29 [SECTION .text]
  30
  31 ; --------------------------------------------------------------------------------------------------
  32 ; Enter:
  33 ;       _xleft  fixed point left x coordinate
  34 ;       _xright fixed point right x coordinate
  35 ;       _y      fixed point y coordinate
  36 ;       _pixptr address of source pixel map
  37 ;       _u      fixed point initial u coordinate
  38 ;       _v      fixed point initial v coordinate
  39 ;       _du_dx  fixed point du/dx
  40 ;       _dv_dx  fixed point dv/dx
  41
  42 ;   for (x = (int) xleft; x <= (int) xright; x++) {
  43 ;      _setcolor(read_pixel_from_tmap(srcb,((int) (u/z)) & 63,((int) (v/z)) & 63));
  44 ;      _setpixel(x,y);
  45 ;
  46 ;      u += du_dx;
  47 ;      v += dv_dx;
  48 ;      z += dz_dx;
  49 ;   }
  50
  51         align   4
  52 _asm_tmap_scanline_lin:
  53 asm_tmap_scanline_lin:
  54         pusha
  55
  56 ; Setup for loop:       _loop_count  iterations = (int) xright - (int) xleft
  57 ;       esi     source pixel pointer = pixptr
  58 ;       edi     initial row pointer = y*320+x
  59
  60 ; set esi = pointer to start of texture map data
  61         mov     esi,[_pixptr]
  62
  63 ; set edi = address of first pixel to modify
  64         mov     edi,[_fx_y]
  65         cmp     edi,[_window_bottom]
  66         ja      near _none_to_do
  67
  68         imul    edi,[_bytes_per_row]
  69         mov     eax,[_fx_xleft]
  70         test    eax, eax
  71         jns     eax_ok
  72         sub     eax,eax
  73 eax_ok:
  74         add     edi,eax
  75         add     edi,[_write_buffer]
  76
  77 ; set _loop_count = # of iterations
  78         mov     eax,[_fx_xright]
  79         cmp     eax,[_window_right]
  80         jb      eax_ok1
  81         mov     eax,[_window_right]
  82 eax_ok1:        cmp     eax,[_window_left]
  83         ja      eax_ok2
  84         mov     eax,[_window_left]
  85 eax_ok2:
  86
  87         mov     ebx,[_fx_xleft]
  88         sub     eax,ebx
  89         js      near _none_to_do
  90         cmp     eax,[_window_width]
  91         jbe     _ok_to_do
  92         mov     eax,[_window_width]
  93 _ok_to_do:
  94         mov     [_loop_count],eax
  95
  96 ;       edi     destination pixel pointer
  97
  98
  99         mov     ebx,[_fx_u]
 100         mov     ecx,[_fx_du_dx]
 101         mov     edx,[_fx_dv_dx]
 102         mov     ebp,[_fx_v]
 103
 104         shl     ebx,10
 105         shl     ebp,10
 106         shl     edx,10
 107         shl     ecx,10
 108
 109 ; eax   work
 110 ; ebx   u
 111 ; ecx   du_dx
 112 ; edx   dv_dx
 113 ; ebp   v
 114 ; esi   read address
 115 ; edi   write address
 116
 117         test    dword [_Transparency_on],-1
 118         jne     near transparent_texture
 119
 120 %define _size   (_end1 - _start1)/num_iters
 121         mov     eax,num_iters-1
 122         sub     eax,[_loop_count]
 123         jns     j_eax_ok1
 124         inc     eax     ; sort of a hack, but we can get -1 here and want to be graceful
 125         jns     j_eax_ok1       ; if we jump, we had -1, which is kind of ok, if not, we int 3
 126         int     3       ; oops, going to jump behind _start1, very bad...
 127         sub     eax,eax ; ok to continue
 128 j_eax_ok1:      imul    eax,eax,_size
 129         add     eax,_start1
 130         jmp     eax
 131
 132         align   4
 133 _start1:
 134
 135 ; "OPTIMIZATIONS" maybe not worth making
 136 ;    Getting rid of the esi from the mov al,[esi+eax] instruction.
 137 ;       This would require moving into eax at the top of the loop, rather than doing the sub eax,eax.
 138 ;       You would have to align your bitmaps so that the two shlds would create the proper base address.
 139 ;       In other words, your bitmap data would have to begin at 4096x (for 64x64 bitmaps).
 140 ;       I did timings without converting the sub to a mov eax,esi and setting esi to the proper value.
 141 ;       There was a speedup of about 1% to 1.5% without converting the sub to a mov.
 142 ;    Getting rid of the edi by doing a mov nnnn[edi],al instead of mov [edi],al.
 143 ;       The problem with this is you would have a dword offset for nnnn.  My timings indicate it is slower.  (I think.)
 144 ;    Combining u,v and du,dv into single longwords.
 145 ;       The problem with this is you then must do a 16 bit operation to extract them, and you don't have enough
 146 ;       instructions to separate a destination operand from being used by the next instruction.  It shaves out one
 147 ;       register instruction (an add reg,reg), but adds a 16 bit operation, and the setup is more complicated.
 148 ; usage:
 149 ;       eax     work
 150 ;       ebx     u coordinate
 151 ;       ecx     delta u
 152 ;       edx     delta v
 153 ;       ebp     v coordinate
 154 ;       esi     pointer to source bitmap
 155 ;       edi     write address
 156 %rep num_iters
 157         mov     eax,ebp ; clear for
 158         add     ebp,edx ; update v coordinate
 159         shr     eax,26  ; shift in v coordinate
 160         shld    eax,ebx,6       ; shift in u coordinate while shifting up v coordinate
 161         add     ebx,ecx ; update u coordinate
 162         mov     al,[esi+eax]    ; get pixel from source bitmap
 163         mov     [edi],al
 164         inc     edi             ; XPARENT ADDED BY JOHN
 165
 166 ; inner loop if bitmaps are 256x256
 167 ; your register usage is bogus, and you must clear ecx
 168 ; fix your setup
 169 ; this is only about 10% faster in the inner loop
 170 ; this method would adapt to writing two pixels at a time better than
 171 ; the 64x64 method because you wouldn't run out of registers
 172 ; Note that this method assumes that both dv_dx and du_dx are in edx.
 173 ; edx = vi|vf|ui|uf
 174 ; where each field is 8 bits, vi = integer v coordinate, vf = fractional v coordinate, etc.
 175 ;** add ebx,edx
 176 ;** mov cl,bh
 177 ;** shld cx,bx,8
 178 ;** mov al,[esi+ecx]
 179 ;** mov [edi],al
 180 ;** inc edi
 181 %endrep
 182
 183 _end1:
 184
 185 _none_to_do:    popa
 186
 187         ret
 188
 189 ; ----------------------------------------------------------------------------------------
 190 ; if texture map has transparency, use this code.
 191 transparent_texture:
 192         test    dword [_loop_count],-1
 193         je      _t_none_to_do
 194 loop_transparent:
 195         mov     eax,ebp ; clear for
 196         add     ebp,edx ; update v coordinate
 197         shr     eax,26  ; shift in v coordinate
 198         shld    eax,ebx,6       ; shift in u coordinate while shifting up v coordinate
 199         add     ebx,ecx ; update u coordinate
 200         mov     al,[esi+eax]    ; get pixel from source bitmap
 201         cmp     al,255
 202         je      transp
 203         mov     [edi],al
 204 transp: inc     edi             ; XPARENT ADDED BY JOHN
 205
 206         dec     dword [_loop_count]
 207         jne     loop_transparent
 208
 209 _t_none_to_do:  popa
 210         ret
 211
 212
 213 ; This is the inner loop to write two pixels at a time
 214 ; This is about 2.5% faster overall (on Mike's 66 MHz 80486 DX2, VLB)
 215 ; You must write code to even align edi and do half as many iterations, and write
 216 ; the beginning and ending extra pixels, if necessary.
 217 ;       sub     eax,eax ; clear for
 218 ;       shld    eax,ebp,6       ; shift in v coordinate
 219 ;       add     ebp,_fx_dv_dx   ; update v coordinate
 220 ;       shld    eax,ebx,6       ; shift in u coordinate while shifting up v coordinate
 221 ;       add     ebx,ecx ; update u coordinate
 222 ;       mov     dl,[esi+eax]    ; get pixel from source bitmap
 223 ;
 224 ;       sub     eax,eax ; clear for
 225 ;       shld    eax,ebp,6       ; shift in v coordinate
 226 ;       add     ebp,_fx_dv_dx   ; update v coordinate
 227 ;       shld    eax,ebx,6       ; shift in u coordinate while shifting up v coordinate
 228 ;       add     ebx,ecx ; update u coordinate
 229 ;       mov     dh,[esi+eax]    ; get pixel from source bitmap
 230 ;
 231 ;       mov     [edi],dx
 232 ;       add     edi,2
 233