tune the preloads a bit
[sdl_omap.git] / src / video / SDL_blit.h
CommitLineData
e14743d1 1/*
2 SDL - Simple DirectMedia Layer
3 Copyright (C) 1997-2009 Sam Lantinga
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with this library; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18
19 Sam Lantinga
20 slouken@libsdl.org
21*/
22#include "SDL_config.h"
23
24#ifndef _SDL_blit_h
25#define _SDL_blit_h
26
27#include "SDL_endian.h"
28
29/* The structure passed to the low level blit functions */
30typedef struct {
31 Uint8 *s_pixels;
32 int s_width;
33 int s_height;
34 int s_skip;
35 Uint8 *d_pixels;
36 int d_width;
37 int d_height;
38 int d_skip;
39 void *aux_data;
40 SDL_PixelFormat *src;
41 Uint8 *table;
42 SDL_PixelFormat *dst;
43} SDL_BlitInfo;
44
45/* The type definition for the low level blit functions */
46typedef void (*SDL_loblit)(SDL_BlitInfo *info);
47
48/* This is the private info structure for software accelerated blits */
49struct private_swaccel {
50 SDL_loblit blit;
51 void *aux_data;
52};
53
54/* Blit mapping definition */
55typedef struct SDL_BlitMap {
56 SDL_Surface *dst;
57 int identity;
58 Uint8 *table;
59 SDL_blit hw_blit;
60 SDL_blit sw_blit;
61 struct private_hwaccel *hw_data;
62 struct private_swaccel *sw_data;
63
64 /* the version count matches the destination; mismatch indicates
65 an invalid mapping */
66 unsigned int format_version;
67} SDL_BlitMap;
68
69
70/* Functions found in SDL_blit.c */
71extern int SDL_CalculateBlit(SDL_Surface *surface);
72
73/* Functions found in SDL_blit_{0,1,N,A}.c */
74extern SDL_loblit SDL_CalculateBlit0(SDL_Surface *surface, int complex);
75extern SDL_loblit SDL_CalculateBlit1(SDL_Surface *surface, int complex);
76extern SDL_loblit SDL_CalculateBlitN(SDL_Surface *surface, int complex);
77extern SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface *surface, int complex);
78
79/*
80 * Useful macros for blitting routines
81 */
82
83#define FORMAT_EQUAL(A, B) \
84 ((A)->BitsPerPixel == (B)->BitsPerPixel \
85 && ((A)->Rmask == (B)->Rmask) && ((A)->Amask == (B)->Amask))
86
87/* Load pixel of the specified format from a buffer and get its R-G-B values */
88/* FIXME: rescale values to 0..255 here? */
89#define RGB_FROM_PIXEL(Pixel, fmt, r, g, b) \
90{ \
91 r = (((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss); \
92 g = (((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss); \
93 b = (((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss); \
94}
95#define RGB_FROM_RGB565(Pixel, r, g, b) \
96{ \
97 r = (((Pixel&0xF800)>>11)<<3); \
98 g = (((Pixel&0x07E0)>>5)<<2); \
99 b = ((Pixel&0x001F)<<3); \
100}
101#define RGB_FROM_RGB555(Pixel, r, g, b) \
102{ \
103 r = (((Pixel&0x7C00)>>10)<<3); \
104 g = (((Pixel&0x03E0)>>5)<<3); \
105 b = ((Pixel&0x001F)<<3); \
106}
107#define RGB_FROM_RGB888(Pixel, r, g, b) \
108{ \
109 r = ((Pixel&0xFF0000)>>16); \
110 g = ((Pixel&0xFF00)>>8); \
111 b = (Pixel&0xFF); \
112}
113#define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel) \
114do { \
115 switch (bpp) { \
116 case 2: \
117 Pixel = *((Uint16 *)(buf)); \
118 break; \
119 \
120 case 3: { \
121 Uint8 *B = (Uint8 *)(buf); \
122 if(SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
123 Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
124 } else { \
125 Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
126 } \
127 } \
128 break; \
129 \
130 case 4: \
131 Pixel = *((Uint32 *)(buf)); \
132 break; \
133 \
134 default: \
135 Pixel = 0; /* appease gcc */ \
136 break; \
137 } \
138} while(0)
139
140#define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b) \
141do { \
142 switch (bpp) { \
143 case 2: \
144 Pixel = *((Uint16 *)(buf)); \
145 break; \
146 \
147 case 3: { \
148 Uint8 *B = (Uint8 *)buf; \
149 if(SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
150 Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
151 } else { \
152 Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
153 } \
154 } \
155 break; \
156 \
157 case 4: \
158 Pixel = *((Uint32 *)(buf)); \
159 break; \
160 \
161 default: \
162 Pixel = 0; /* prevent gcc from complaining */ \
163 break; \
164 } \
165 RGB_FROM_PIXEL(Pixel, fmt, r, g, b); \
166} while(0)
167
168/* Assemble R-G-B values into a specified pixel format and store them */
169#ifdef __NDS__ /* FIXME */
170#define PIXEL_FROM_RGB(Pixel, fmt, r, g, b) \
171{ \
172 Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)| \
173 ((g>>fmt->Gloss)<<fmt->Gshift)| \
174 ((b>>fmt->Bloss)<<fmt->Bshift) | (1<<15); \
175}
176#else
177#define PIXEL_FROM_RGB(Pixel, fmt, r, g, b) \
178{ \
179 Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)| \
180 ((g>>fmt->Gloss)<<fmt->Gshift)| \
181 ((b>>fmt->Bloss)<<fmt->Bshift); \
182}
183#endif /* __NDS__ FIXME */
184#define RGB565_FROM_RGB(Pixel, r, g, b) \
185{ \
186 Pixel = ((r>>3)<<11)|((g>>2)<<5)|(b>>3); \
187}
188#define RGB555_FROM_RGB(Pixel, r, g, b) \
189{ \
190 Pixel = ((r>>3)<<10)|((g>>3)<<5)|(b>>3); \
191}
192#define RGB888_FROM_RGB(Pixel, r, g, b) \
193{ \
194 Pixel = (r<<16)|(g<<8)|b; \
195}
196#define ASSEMBLE_RGB(buf, bpp, fmt, r, g, b) \
197{ \
198 switch (bpp) { \
199 case 2: { \
200 Uint16 Pixel; \
201 \
202 PIXEL_FROM_RGB(Pixel, fmt, r, g, b); \
203 *((Uint16 *)(buf)) = Pixel; \
204 } \
205 break; \
206 \
207 case 3: { \
208 if(SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
209 *((buf)+fmt->Rshift/8) = r; \
210 *((buf)+fmt->Gshift/8) = g; \
211 *((buf)+fmt->Bshift/8) = b; \
212 } else { \
213 *((buf)+2-fmt->Rshift/8) = r; \
214 *((buf)+2-fmt->Gshift/8) = g; \
215 *((buf)+2-fmt->Bshift/8) = b; \
216 } \
217 } \
218 break; \
219 \
220 case 4: { \
221 Uint32 Pixel; \
222 \
223 PIXEL_FROM_RGB(Pixel, fmt, r, g, b); \
224 *((Uint32 *)(buf)) = Pixel; \
225 } \
226 break; \
227 } \
228}
229#define ASSEMBLE_RGB_AMASK(buf, bpp, fmt, r, g, b, Amask) \
230{ \
231 switch (bpp) { \
232 case 2: { \
233 Uint16 *bufp; \
234 Uint16 Pixel; \
235 \
236 bufp = (Uint16 *)buf; \
237 PIXEL_FROM_RGB(Pixel, fmt, r, g, b); \
238 *bufp = Pixel | (*bufp & Amask); \
239 } \
240 break; \
241 \
242 case 3: { \
243 if(SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
244 *((buf)+fmt->Rshift/8) = r; \
245 *((buf)+fmt->Gshift/8) = g; \
246 *((buf)+fmt->Bshift/8) = b; \
247 } else { \
248 *((buf)+2-fmt->Rshift/8) = r; \
249 *((buf)+2-fmt->Gshift/8) = g; \
250 *((buf)+2-fmt->Bshift/8) = b; \
251 } \
252 } \
253 break; \
254 \
255 case 4: { \
256 Uint32 *bufp; \
257 Uint32 Pixel; \
258 \
259 bufp = (Uint32 *)buf; \
260 PIXEL_FROM_RGB(Pixel, fmt, r, g, b); \
261 *bufp = Pixel | (*bufp & Amask); \
262 } \
263 break; \
264 } \
265}
266
267/* FIXME: Should we rescale alpha into 0..255 here? */
268#define RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a) \
269{ \
270 r = ((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss; \
271 g = ((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss; \
272 b = ((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss; \
273 a = ((Pixel&fmt->Amask)>>fmt->Ashift)<<fmt->Aloss; \
274}
275#define RGBA_FROM_8888(Pixel, fmt, r, g, b, a) \
276{ \
277 r = (Pixel&fmt->Rmask)>>fmt->Rshift; \
278 g = (Pixel&fmt->Gmask)>>fmt->Gshift; \
279 b = (Pixel&fmt->Bmask)>>fmt->Bshift; \
280 a = (Pixel&fmt->Amask)>>fmt->Ashift; \
281}
282#define RGBA_FROM_RGBA8888(Pixel, r, g, b, a) \
283{ \
284 r = (Pixel>>24); \
285 g = ((Pixel>>16)&0xFF); \
286 b = ((Pixel>>8)&0xFF); \
287 a = (Pixel&0xFF); \
288}
289#define RGBA_FROM_ARGB8888(Pixel, r, g, b, a) \
290{ \
291 r = ((Pixel>>16)&0xFF); \
292 g = ((Pixel>>8)&0xFF); \
293 b = (Pixel&0xFF); \
294 a = (Pixel>>24); \
295}
296#define RGBA_FROM_ABGR8888(Pixel, r, g, b, a) \
297{ \
298 r = (Pixel&0xFF); \
299 g = ((Pixel>>8)&0xFF); \
300 b = ((Pixel>>16)&0xFF); \
301 a = (Pixel>>24); \
302}
303#define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a) \
304do { \
305 switch (bpp) { \
306 case 2: \
307 Pixel = *((Uint16 *)(buf)); \
308 break; \
309 \
310 case 3: {/* FIXME: broken code (no alpha) */ \
311 Uint8 *b = (Uint8 *)buf; \
312 if(SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
313 Pixel = b[0] + (b[1] << 8) + (b[2] << 16); \
314 } else { \
315 Pixel = (b[0] << 16) + (b[1] << 8) + b[2]; \
316 } \
317 } \
318 break; \
319 \
320 case 4: \
321 Pixel = *((Uint32 *)(buf)); \
322 break; \
323 \
324 default: \
325 Pixel = 0; /* stop gcc complaints */ \
326 break; \
327 } \
328 RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a); \
329 Pixel &= ~fmt->Amask; \
330} while(0)
331
332/* FIXME: this isn't correct, especially for Alpha (maximum != 255) */
333#ifdef __NDS__ /* FIXME */
334#define PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a) \
335{ \
336 Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)| \
337 ((g>>fmt->Gloss)<<fmt->Gshift)| \
338 ((b>>fmt->Bloss)<<fmt->Bshift)| \
339 ((a>>fmt->Aloss)<<fmt->Ashift) | (1<<15); \
340}
341#else
342#define PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a) \
343{ \
344 Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)| \
345 ((g>>fmt->Gloss)<<fmt->Gshift)| \
346 ((b>>fmt->Bloss)<<fmt->Bshift)| \
347 ((a>>fmt->Aloss)<<fmt->Ashift); \
348}
349#endif /* __NDS__ FIXME */
350#define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a) \
351{ \
352 switch (bpp) { \
353 case 2: { \
354 Uint16 Pixel; \
355 \
356 PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a); \
357 *((Uint16 *)(buf)) = Pixel; \
358 } \
359 break; \
360 \
361 case 3: { /* FIXME: broken code (no alpha) */ \
362 if(SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
363 *((buf)+fmt->Rshift/8) = r; \
364 *((buf)+fmt->Gshift/8) = g; \
365 *((buf)+fmt->Bshift/8) = b; \
366 } else { \
367 *((buf)+2-fmt->Rshift/8) = r; \
368 *((buf)+2-fmt->Gshift/8) = g; \
369 *((buf)+2-fmt->Bshift/8) = b; \
370 } \
371 } \
372 break; \
373 \
374 case 4: { \
375 Uint32 Pixel; \
376 \
377 PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a); \
378 *((Uint32 *)(buf)) = Pixel; \
379 } \
380 break; \
381 } \
382}
383
384/* Blend the RGB values of two Pixels based on a source alpha value */
385#define ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB) \
386do { \
387 dR = (((sR-dR)*(A)+255)>>8)+dR; \
388 dG = (((sG-dG)*(A)+255)>>8)+dG; \
389 dB = (((sB-dB)*(A)+255)>>8)+dB; \
390} while(0)
391
392
393/* This is a very useful loop for optimizing blitters */
394#if defined(_MSC_VER) && (_MSC_VER == 1300)
395/* There's a bug in the Visual C++ 7 optimizer when compiling this code */
396#else
397#define USE_DUFFS_LOOP
398#endif
399#ifdef USE_DUFFS_LOOP
400
401/* 8-times unrolled loop */
402#define DUFFS_LOOP8(pixel_copy_increment, width) \
403{ int n = (width+7)/8; \
404 switch (width & 7) { \
405 case 0: do { pixel_copy_increment; \
406 case 7: pixel_copy_increment; \
407 case 6: pixel_copy_increment; \
408 case 5: pixel_copy_increment; \
409 case 4: pixel_copy_increment; \
410 case 3: pixel_copy_increment; \
411 case 2: pixel_copy_increment; \
412 case 1: pixel_copy_increment; \
413 } while ( --n > 0 ); \
414 } \
415}
416
417/* 4-times unrolled loop */
418#define DUFFS_LOOP4(pixel_copy_increment, width) \
419{ int n = (width+3)/4; \
420 switch (width & 3) { \
421 case 0: do { pixel_copy_increment; \
422 case 3: pixel_copy_increment; \
423 case 2: pixel_copy_increment; \
424 case 1: pixel_copy_increment; \
425 } while ( --n > 0 ); \
426 } \
427}
428
429/* 2 - times unrolled loop */
430#define DUFFS_LOOP_DOUBLE2(pixel_copy_increment, \
431 double_pixel_copy_increment, width) \
432{ int n, w = width; \
433 if( w & 1 ) { \
434 pixel_copy_increment; \
435 w--; \
436 } \
437 if ( w > 0 ) { \
438 n = ( w + 2) / 4; \
439 switch( w & 2 ) { \
440 case 0: do { double_pixel_copy_increment; \
441 case 2: double_pixel_copy_increment; \
442 } while ( --n > 0 ); \
443 } \
444 } \
445}
446
447/* 2 - times unrolled loop 4 pixels */
448#define DUFFS_LOOP_QUATRO2(pixel_copy_increment, \
449 double_pixel_copy_increment, \
450 quatro_pixel_copy_increment, width) \
451{ int n, w = width; \
452 if(w & 1) { \
453 pixel_copy_increment; \
454 w--; \
455 } \
456 if(w & 2) { \
457 double_pixel_copy_increment; \
458 w -= 2; \
459 } \
460 if ( w > 0 ) { \
461 n = ( w + 7 ) / 8; \
462 switch( w & 4 ) { \
463 case 0: do { quatro_pixel_copy_increment; \
464 case 4: quatro_pixel_copy_increment; \
465 } while ( --n > 0 ); \
466 } \
467 } \
468}
469
470/* Use the 8-times version of the loop by default */
471#define DUFFS_LOOP(pixel_copy_increment, width) \
472 DUFFS_LOOP8(pixel_copy_increment, width)
473
474#else
475
476/* Don't use Duff's device to unroll loops */
477#define DUFFS_LOOP_DOUBLE2(pixel_copy_increment, \
478 double_pixel_copy_increment, width) \
479{ int n = width; \
480 if( n & 1 ) { \
481 pixel_copy_increment; \
482 n--; \
483 } \
484 n=n>>1; \
485 for(; n > 0; --n) { \
486 double_pixel_copy_increment; \
487 } \
488}
489
490/* Don't use Duff's device to unroll loops */
491#define DUFFS_LOOP_QUATRO2(pixel_copy_increment, \
492 double_pixel_copy_increment, \
493 quatro_pixel_copy_increment, width) \
494{ int n = width; \
495 if(n & 1) { \
496 pixel_copy_increment; \
497 n--; \
498 } \
499 if(n & 2) { \
500 double_pixel_copy_increment; \
501 n -= 2; \
502 } \
503 n=n>>2; \
504 for(; n > 0; --n) { \
505 quatro_pixel_copy_increment; \
506 } \
507}
508
509/* Don't use Duff's device to unroll loops */
510#define DUFFS_LOOP(pixel_copy_increment, width) \
511{ int n; \
512 for ( n=width; n > 0; --n ) { \
513 pixel_copy_increment; \
514 } \
515}
516#define DUFFS_LOOP8(pixel_copy_increment, width) \
517 DUFFS_LOOP(pixel_copy_increment, width)
518#define DUFFS_LOOP4(pixel_copy_increment, width) \
519 DUFFS_LOOP(pixel_copy_increment, width)
520
521#endif /* USE_DUFFS_LOOP */
522
523/* Prevent Visual C++ 6.0 from printing out stupid warnings */
524#if defined(_MSC_VER) && (_MSC_VER >= 600)
525#pragma warning(disable: 4550)
526#endif
527
528#endif /* _SDL_blit_h */