e14743d1 |
1 | /* |
2 | SDL - Simple DirectMedia Layer |
3 | Copyright (C) 1997-2009 Sam Lantinga |
4 | |
5 | This library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | This library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with this library; if not, write to the Free Software |
17 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
18 | |
19 | Sam Lantinga |
20 | slouken@libsdl.org |
21 | */ |
22 | #include "SDL_config.h" |
23 | |
24 | /* This a stretch blit implementation based on ideas given to me by |
25 | Tomasz Cejner - thanks! :) |
26 | |
27 | April 27, 2000 - Sam Lantinga |
28 | */ |
29 | |
30 | #include "SDL_video.h" |
31 | #include "SDL_blit.h" |
32 | |
33 | /* This isn't ready for general consumption yet - it should be folded |
34 | into the general blitting mechanism. |
35 | */ |
36 | |
37 | #if ((defined(_MFC_VER) && defined(_M_IX86)/* && !defined(_WIN32_WCE) still needed? */) || \ |
38 | defined(__WATCOMC__) || \ |
39 | (defined(__GNUC__) && defined(__i386__))) && SDL_ASSEMBLY_ROUTINES |
40 | /* There's a bug with gcc 4.4.1 and -O2 where srcp doesn't get the correct |
41 | * value after the first scanline. FIXME? */ |
42 | /*#define USE_ASM_STRETCH*/ |
43 | #endif |
44 | |
45 | #ifdef USE_ASM_STRETCH |
46 | |
47 | #ifdef HAVE_MPROTECT |
48 | #include <sys/types.h> |
49 | #include <sys/mman.h> |
50 | #endif |
51 | #ifdef __GNUC__ |
52 | #define PAGE_ALIGNED __attribute__((__aligned__(4096))) |
53 | #else |
54 | #define PAGE_ALIGNED |
55 | #endif |
56 | |
57 | #if defined(_M_IX86) || defined(i386) |
58 | #define PREFIX16 0x66 |
59 | #define STORE_BYTE 0xAA |
60 | #define STORE_WORD 0xAB |
61 | #define LOAD_BYTE 0xAC |
62 | #define LOAD_WORD 0xAD |
63 | #define RETURN 0xC3 |
64 | #else |
65 | #error Need assembly opcodes for this architecture |
66 | #endif |
67 | |
68 | static unsigned char copy_row[4096] PAGE_ALIGNED; |
69 | |
70 | static int generate_rowbytes(int src_w, int dst_w, int bpp) |
71 | { |
72 | static struct { |
73 | int bpp; |
74 | int src_w; |
75 | int dst_w; |
76 | int status; |
77 | } last; |
78 | |
79 | int i; |
80 | int pos, inc; |
81 | unsigned char *eip; |
82 | unsigned char load, store; |
83 | |
84 | /* See if we need to regenerate the copy buffer */ |
85 | if ( (src_w == last.src_w) && |
86 | (dst_w == last.dst_w) && (bpp == last.bpp) ) { |
87 | return(last.status); |
88 | } |
89 | last.bpp = bpp; |
90 | last.src_w = src_w; |
91 | last.dst_w = dst_w; |
92 | last.status = -1; |
93 | |
94 | switch (bpp) { |
95 | case 1: |
96 | load = LOAD_BYTE; |
97 | store = STORE_BYTE; |
98 | break; |
99 | case 2: |
100 | case 4: |
101 | load = LOAD_WORD; |
102 | store = STORE_WORD; |
103 | break; |
104 | default: |
105 | SDL_SetError("ASM stretch of %d bytes isn't supported\n", bpp); |
106 | return(-1); |
107 | } |
108 | #ifdef HAVE_MPROTECT |
109 | /* Make the code writeable */ |
110 | if ( mprotect(copy_row, sizeof(copy_row), PROT_READ|PROT_WRITE) < 0 ) { |
111 | SDL_SetError("Couldn't make copy buffer writeable"); |
112 | return(-1); |
113 | } |
114 | #endif |
115 | pos = 0x10000; |
116 | inc = (src_w << 16) / dst_w; |
117 | eip = copy_row; |
118 | for ( i=0; i<dst_w; ++i ) { |
119 | while ( pos >= 0x10000L ) { |
120 | if ( bpp == 2 ) { |
121 | *eip++ = PREFIX16; |
122 | } |
123 | *eip++ = load; |
124 | pos -= 0x10000L; |
125 | } |
126 | if ( bpp == 2 ) { |
127 | *eip++ = PREFIX16; |
128 | } |
129 | *eip++ = store; |
130 | pos += inc; |
131 | } |
132 | *eip++ = RETURN; |
133 | |
134 | /* Verify that we didn't overflow (too late!!!) */ |
135 | if ( eip > (copy_row+sizeof(copy_row)) ) { |
136 | SDL_SetError("Copy buffer overflow"); |
137 | return(-1); |
138 | } |
139 | #ifdef HAVE_MPROTECT |
140 | /* Make the code executable but not writeable */ |
141 | if ( mprotect(copy_row, sizeof(copy_row), PROT_READ|PROT_EXEC) < 0 ) { |
142 | SDL_SetError("Couldn't make copy buffer executable"); |
143 | return(-1); |
144 | } |
145 | #endif |
146 | last.status = 0; |
147 | return(0); |
148 | } |
149 | |
150 | #endif /* USE_ASM_STRETCH */ |
151 | |
152 | #define DEFINE_COPY_ROW(name, type) \ |
153 | void name(type *src, int src_w, type *dst, int dst_w) \ |
154 | { \ |
155 | int i; \ |
156 | int pos, inc; \ |
157 | type pixel = 0; \ |
158 | \ |
159 | pos = 0x10000; \ |
160 | inc = (src_w << 16) / dst_w; \ |
161 | for ( i=dst_w; i>0; --i ) { \ |
162 | while ( pos >= 0x10000L ) { \ |
163 | pixel = *src++; \ |
164 | pos -= 0x10000L; \ |
165 | } \ |
166 | *dst++ = pixel; \ |
167 | pos += inc; \ |
168 | } \ |
169 | } |
170 | DEFINE_COPY_ROW(copy_row1, Uint8) |
171 | DEFINE_COPY_ROW(copy_row2, Uint16) |
172 | DEFINE_COPY_ROW(copy_row4, Uint32) |
173 | |
174 | /* The ASM code doesn't handle 24-bpp stretch blits */ |
175 | void copy_row3(Uint8 *src, int src_w, Uint8 *dst, int dst_w) |
176 | { |
177 | int i; |
178 | int pos, inc; |
179 | Uint8 pixel[3] = { 0, 0, 0 }; |
180 | |
181 | pos = 0x10000; |
182 | inc = (src_w << 16) / dst_w; |
183 | for ( i=dst_w; i>0; --i ) { |
184 | while ( pos >= 0x10000L ) { |
185 | pixel[0] = *src++; |
186 | pixel[1] = *src++; |
187 | pixel[2] = *src++; |
188 | pos -= 0x10000L; |
189 | } |
190 | *dst++ = pixel[0]; |
191 | *dst++ = pixel[1]; |
192 | *dst++ = pixel[2]; |
193 | pos += inc; |
194 | } |
195 | } |
196 | |
197 | /* Perform a stretch blit between two surfaces of the same format. |
198 | NOTE: This function is not safe to call from multiple threads! |
199 | */ |
200 | int SDL_SoftStretch(SDL_Surface *src, SDL_Rect *srcrect, |
201 | SDL_Surface *dst, SDL_Rect *dstrect) |
202 | { |
203 | int src_locked; |
204 | int dst_locked; |
205 | int pos, inc; |
206 | int dst_width; |
207 | int dst_maxrow; |
208 | int src_row, dst_row; |
209 | Uint8 *srcp = NULL; |
210 | Uint8 *dstp; |
211 | SDL_Rect full_src; |
212 | SDL_Rect full_dst; |
213 | #ifdef USE_ASM_STRETCH |
214 | SDL_bool use_asm = SDL_TRUE; |
215 | #ifdef __GNUC__ |
216 | int u1, u2; |
217 | #endif |
218 | #endif /* USE_ASM_STRETCH */ |
219 | const int bpp = dst->format->BytesPerPixel; |
220 | |
221 | if ( src->format->BitsPerPixel != dst->format->BitsPerPixel ) { |
222 | SDL_SetError("Only works with same format surfaces"); |
223 | return(-1); |
224 | } |
225 | |
226 | /* Verify the blit rectangles */ |
227 | if ( srcrect ) { |
228 | if ( (srcrect->x < 0) || (srcrect->y < 0) || |
229 | ((srcrect->x+srcrect->w) > src->w) || |
230 | ((srcrect->y+srcrect->h) > src->h) ) { |
231 | SDL_SetError("Invalid source blit rectangle"); |
232 | return(-1); |
233 | } |
234 | } else { |
235 | full_src.x = 0; |
236 | full_src.y = 0; |
237 | full_src.w = src->w; |
238 | full_src.h = src->h; |
239 | srcrect = &full_src; |
240 | } |
241 | if ( dstrect ) { |
242 | if ( (dstrect->x < 0) || (dstrect->y < 0) || |
243 | ((dstrect->x+dstrect->w) > dst->w) || |
244 | ((dstrect->y+dstrect->h) > dst->h) ) { |
245 | SDL_SetError("Invalid destination blit rectangle"); |
246 | return(-1); |
247 | } |
248 | } else { |
249 | full_dst.x = 0; |
250 | full_dst.y = 0; |
251 | full_dst.w = dst->w; |
252 | full_dst.h = dst->h; |
253 | dstrect = &full_dst; |
254 | } |
255 | |
256 | /* Lock the destination if it's in hardware */ |
257 | dst_locked = 0; |
258 | if ( SDL_MUSTLOCK(dst) ) { |
259 | if ( SDL_LockSurface(dst) < 0 ) { |
260 | SDL_SetError("Unable to lock destination surface"); |
261 | return(-1); |
262 | } |
263 | dst_locked = 1; |
264 | } |
265 | /* Lock the source if it's in hardware */ |
266 | src_locked = 0; |
267 | if ( SDL_MUSTLOCK(src) ) { |
268 | if ( SDL_LockSurface(src) < 0 ) { |
269 | if ( dst_locked ) { |
270 | SDL_UnlockSurface(dst); |
271 | } |
272 | SDL_SetError("Unable to lock source surface"); |
273 | return(-1); |
274 | } |
275 | src_locked = 1; |
276 | } |
277 | |
278 | /* Set up the data... */ |
279 | pos = 0x10000; |
280 | inc = (srcrect->h << 16) / dstrect->h; |
281 | src_row = srcrect->y; |
282 | dst_row = dstrect->y; |
283 | dst_width = dstrect->w*bpp; |
284 | |
285 | #ifdef USE_ASM_STRETCH |
286 | /* Write the opcodes for this stretch */ |
287 | if ( (bpp == 3) || |
288 | (generate_rowbytes(srcrect->w, dstrect->w, bpp) < 0) ) { |
289 | use_asm = SDL_FALSE; |
290 | } |
291 | #endif |
292 | |
293 | /* Perform the stretch blit */ |
294 | for ( dst_maxrow = dst_row+dstrect->h; dst_row<dst_maxrow; ++dst_row ) { |
295 | dstp = (Uint8 *)dst->pixels + (dst_row*dst->pitch) |
296 | + (dstrect->x*bpp); |
297 | while ( pos >= 0x10000L ) { |
298 | srcp = (Uint8 *)src->pixels + (src_row*src->pitch) |
299 | + (srcrect->x*bpp); |
300 | ++src_row; |
301 | pos -= 0x10000L; |
302 | } |
303 | #ifdef USE_ASM_STRETCH |
304 | if (use_asm) { |
305 | #ifdef __GNUC__ |
306 | __asm__ __volatile__ ( |
307 | "call *%4" |
308 | : "=&D" (u1), "=&S" (u2) |
309 | : "0" (dstp), "1" (srcp), "r" (copy_row) |
310 | : "memory" ); |
311 | #elif defined(_MSC_VER) || defined(__WATCOMC__) |
312 | { void *code = copy_row; |
313 | __asm { |
314 | push edi |
315 | push esi |
316 | |
317 | mov edi, dstp |
318 | mov esi, srcp |
319 | call dword ptr code |
320 | |
321 | pop esi |
322 | pop edi |
323 | } |
324 | } |
325 | #else |
326 | #error Need inline assembly for this compiler |
327 | #endif |
328 | } else |
329 | #endif |
330 | switch (bpp) { |
331 | case 1: |
332 | copy_row1(srcp, srcrect->w, dstp, dstrect->w); |
333 | break; |
334 | case 2: |
335 | copy_row2((Uint16 *)srcp, srcrect->w, |
336 | (Uint16 *)dstp, dstrect->w); |
337 | break; |
338 | case 3: |
339 | copy_row3(srcp, srcrect->w, dstp, dstrect->w); |
340 | break; |
341 | case 4: |
342 | copy_row4((Uint32 *)srcp, srcrect->w, |
343 | (Uint32 *)dstp, dstrect->w); |
344 | break; |
345 | } |
346 | pos += inc; |
347 | } |
348 | |
349 | /* We need to unlock the surfaces if they're locked */ |
350 | if ( dst_locked ) { |
351 | SDL_UnlockSurface(dst); |
352 | } |
353 | if ( src_locked ) { |
354 | SDL_UnlockSurface(src); |
355 | } |
356 | return(0); |
357 | } |
358 | |