[sdl_omap.git] / src / video / SDL_blit.h

/*
    SDL - Simple DirectMedia Layer
    Copyright (C) 1997-2009 Sam Lantinga

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

    Sam Lantinga
    slouken@libsdl.org
*/
#include "SDL_config.h"

#ifndef _SDL_blit_h
#define _SDL_blit_h

#include "SDL_endian.h"

/* The structure passed to the low level blit functions */
typedef struct {
	Uint8 *s_pixels;
	int s_width;
	int s_height;
	int s_skip;
	Uint8 *d_pixels;
	int d_width;
	int d_height;
	int d_skip;
	void *aux_data;
	SDL_PixelFormat *src;
	Uint8 *table;
	SDL_PixelFormat *dst;
} SDL_BlitInfo;

/* The type definition for the low level blit functions */
typedef void (*SDL_loblit)(SDL_BlitInfo *info);

/* This is the private info structure for software accelerated blits */
struct private_swaccel {
	SDL_loblit blit;
	void *aux_data;
};

/* Blit mapping definition */
typedef struct SDL_BlitMap {
	SDL_Surface *dst;
	int identity;
	Uint8 *table;
	SDL_blit hw_blit;
	SDL_blit sw_blit;
	struct private_hwaccel *hw_data;
	struct private_swaccel *sw_data;

	/* the version count matches the destination; mismatch indicates
	   an invalid mapping */
        unsigned int format_version;
} SDL_BlitMap;


/* Functions found in SDL_blit.c */
extern int SDL_CalculateBlit(SDL_Surface *surface);

/* Functions found in SDL_blit_{0,1,N,A}.c */
extern SDL_loblit SDL_CalculateBlit0(SDL_Surface *surface, int complex);
extern SDL_loblit SDL_CalculateBlit1(SDL_Surface *surface, int complex);
extern SDL_loblit SDL_CalculateBlitN(SDL_Surface *surface, int complex);
extern SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface *surface, int complex);

/*
 * Useful macros for blitting routines
 */

#define FORMAT_EQUAL(A, B)						\
    ((A)->BitsPerPixel == (B)->BitsPerPixel				\
     && ((A)->Rmask == (B)->Rmask) && ((A)->Amask == (B)->Amask))

/* Load pixel of the specified format from a buffer and get its R-G-B values */
/* FIXME: rescale values to 0..255 here? */
#define RGB_FROM_PIXEL(Pixel, fmt, r, g, b)				\
{									\
	r = (((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss); 		\
	g = (((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss); 		\
	b = (((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss); 		\
}
#define RGB_FROM_RGB565(Pixel, r, g, b)					\
{									\
	r = (((Pixel&0xF800)>>11)<<3);		 			\
	g = (((Pixel&0x07E0)>>5)<<2); 					\
	b = ((Pixel&0x001F)<<3); 					\
}
#define RGB_FROM_RGB555(Pixel, r, g, b)					\
{									\
	r = (((Pixel&0x7C00)>>10)<<3);		 			\
	g = (((Pixel&0x03E0)>>5)<<3); 					\
	b = ((Pixel&0x001F)<<3); 					\
}
#define RGB_FROM_RGB888(Pixel, r, g, b)					\
{									\
	r = ((Pixel&0xFF0000)>>16);		 			\
	g = ((Pixel&0xFF00)>>8);		 			\
	b = (Pixel&0xFF);			 			\
}
#define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel)				   \
do {									   \
	switch (bpp) {							   \
		case 2:							   \
			Pixel = *((Uint16 *)(buf));			   \
		break;							   \
									   \
		case 3: {						   \
		        Uint8 *B = (Uint8 *)(buf);			   \
			if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
			        Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
			} else {					   \
			        Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
			}						   \
		}							   \
		break;							   \
									   \
		case 4:							   \
			Pixel = *((Uint32 *)(buf));			   \
		break;							   \
									   \
		default:						   \
			Pixel = 0; /* appease gcc */			   \
		break;							   \
	}								   \
} while(0)

#define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b)			   \
do {									   \
	switch (bpp) {							   \
		case 2:							   \
			Pixel = *((Uint16 *)(buf));			   \
		break;							   \
									   \
		case 3: {						   \
		        Uint8 *B = (Uint8 *)buf;			   \
			if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
			        Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
			} else {					   \
			        Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
			}						   \
		}							   \
		break;							   \
									   \
		case 4:							   \
			Pixel = *((Uint32 *)(buf));			   \
		break;							   \
									   \
	        default:						   \
		        Pixel = 0;	/* prevent gcc from complaining */ \
		break;							   \
	}								   \
	RGB_FROM_PIXEL(Pixel, fmt, r, g, b);				   \
} while(0)

/* Assemble R-G-B values into a specified pixel format and store them */
#ifdef __NDS__ /* FIXME */
#define PIXEL_FROM_RGB(Pixel, fmt, r, g, b)				\
{									\
	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
		((g>>fmt->Gloss)<<fmt->Gshift)|				\
		((b>>fmt->Bloss)<<fmt->Bshift) | (1<<15);				\
}
#else
#define PIXEL_FROM_RGB(Pixel, fmt, r, g, b)				\
{									\
	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
		((g>>fmt->Gloss)<<fmt->Gshift)|				\
		((b>>fmt->Bloss)<<fmt->Bshift);				\
}
#endif /* __NDS__ FIXME */
#define RGB565_FROM_RGB(Pixel, r, g, b)					\
{									\
	Pixel = ((r>>3)<<11)|((g>>2)<<5)|(b>>3);			\
}
#define RGB555_FROM_RGB(Pixel, r, g, b)					\
{									\
	Pixel = ((r>>3)<<10)|((g>>3)<<5)|(b>>3);			\
}
#define RGB888_FROM_RGB(Pixel, r, g, b)					\
{									\
	Pixel = (r<<16)|(g<<8)|b;					\
}
#define ASSEMBLE_RGB(buf, bpp, fmt, r, g, b) 				\
{									\
	switch (bpp) {							\
		case 2: {						\
			Uint16 Pixel;					\
									\
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
			*((Uint16 *)(buf)) = Pixel;			\
		}							\
		break;							\
									\
		case 3: {						\
                        if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
			        *((buf)+fmt->Rshift/8) = r;		\
				*((buf)+fmt->Gshift/8) = g;		\
				*((buf)+fmt->Bshift/8) = b;		\
			} else {					\
			        *((buf)+2-fmt->Rshift/8) = r;		\
				*((buf)+2-fmt->Gshift/8) = g;		\
				*((buf)+2-fmt->Bshift/8) = b;		\
			}						\
		}							\
		break;							\
									\
		case 4: {						\
			Uint32 Pixel;					\
									\
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
			*((Uint32 *)(buf)) = Pixel;			\
		}							\
		break;							\
	}								\
}
#define ASSEMBLE_RGB_AMASK(buf, bpp, fmt, r, g, b, Amask)		\
{									\
	switch (bpp) {							\
		case 2: {						\
			Uint16 *bufp;					\
			Uint16 Pixel;					\
									\
			bufp = (Uint16 *)buf;				\
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
			*bufp = Pixel | (*bufp & Amask);		\
		}							\
		break;							\
									\
		case 3: {						\
                        if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
			        *((buf)+fmt->Rshift/8) = r;		\
				*((buf)+fmt->Gshift/8) = g;		\
				*((buf)+fmt->Bshift/8) = b;		\
			} else {					\
			        *((buf)+2-fmt->Rshift/8) = r;		\
				*((buf)+2-fmt->Gshift/8) = g;		\
				*((buf)+2-fmt->Bshift/8) = b;		\
			}						\
		}							\
		break;							\
									\
		case 4: {						\
			Uint32 *bufp;					\
			Uint32 Pixel;					\
									\
			bufp = (Uint32 *)buf;				\
			PIXEL_FROM_RGB(Pixel, fmt, r, g, b);		\
			*bufp = Pixel | (*bufp & Amask);		\
		}							\
		break;							\
	}								\
}

/* FIXME: Should we rescale alpha into 0..255 here? */
#define RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a)				\
{									\
	r = ((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss; 		\
	g = ((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss; 		\
	b = ((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss; 		\
	a = ((Pixel&fmt->Amask)>>fmt->Ashift)<<fmt->Aloss;	 	\
}
#define RGBA_FROM_8888(Pixel, fmt, r, g, b, a)	\
{						\
	r = (Pixel&fmt->Rmask)>>fmt->Rshift;	\
	g = (Pixel&fmt->Gmask)>>fmt->Gshift;	\
	b = (Pixel&fmt->Bmask)>>fmt->Bshift;	\
	a = (Pixel&fmt->Amask)>>fmt->Ashift;	\
}
#define RGBA_FROM_RGBA8888(Pixel, r, g, b, a)				\
{									\
	r = (Pixel>>24);						\
	g = ((Pixel>>16)&0xFF);						\
	b = ((Pixel>>8)&0xFF);						\
	a = (Pixel&0xFF);						\
}
#define RGBA_FROM_ARGB8888(Pixel, r, g, b, a)				\
{									\
	r = ((Pixel>>16)&0xFF);						\
	g = ((Pixel>>8)&0xFF);						\
	b = (Pixel&0xFF);						\
	a = (Pixel>>24);						\
}
#define RGBA_FROM_ABGR8888(Pixel, r, g, b, a)				\
{									\
	r = (Pixel&0xFF);						\
	g = ((Pixel>>8)&0xFF);						\
	b = ((Pixel>>16)&0xFF);						\
	a = (Pixel>>24);						\
}
#define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a)			   \
do {									   \
	switch (bpp) {							   \
		case 2:							   \
			Pixel = *((Uint16 *)(buf));			   \
		break;							   \
									   \
		case 3:	{/* FIXME: broken code (no alpha) */		   \
		        Uint8 *b = (Uint8 *)buf;			   \
			if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		   \
			        Pixel = b[0] + (b[1] << 8) + (b[2] << 16); \
			} else {					   \
			        Pixel = (b[0] << 16) + (b[1] << 8) + b[2]; \
			}						   \
		}							   \
		break;							   \
									   \
		case 4:							   \
			Pixel = *((Uint32 *)(buf));			   \
		break;							   \
									   \
		default:						   \
		        Pixel = 0; /* stop gcc complaints */		   \
		break;							   \
	}								   \
	RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a);			   \
	Pixel &= ~fmt->Amask;						   \
} while(0)

/* FIXME: this isn't correct, especially for Alpha (maximum != 255) */
#ifdef __NDS__ /* FIXME */
#define PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a)				\
{									\
	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
		((g>>fmt->Gloss)<<fmt->Gshift)|				\
		((b>>fmt->Bloss)<<fmt->Bshift)|				\
		((a>>fmt->Aloss)<<fmt->Ashift) | (1<<15);				\
}
#else
#define PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a)				\
{									\
	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|				\
		((g>>fmt->Gloss)<<fmt->Gshift)|				\
		((b>>fmt->Bloss)<<fmt->Bshift)|				\
		((a>>fmt->Aloss)<<fmt->Ashift);				\
}
#endif /* __NDS__ FIXME */
#define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a)			\
{									\
	switch (bpp) {							\
		case 2: {						\
			Uint16 Pixel;					\
									\
			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
			*((Uint16 *)(buf)) = Pixel;			\
		}							\
		break;							\
									\
		case 3: { /* FIXME: broken code (no alpha) */		\
                        if(SDL_BYTEORDER == SDL_LIL_ENDIAN) {		\
			        *((buf)+fmt->Rshift/8) = r;		\
				*((buf)+fmt->Gshift/8) = g;		\
				*((buf)+fmt->Bshift/8) = b;		\
			} else {					\
			        *((buf)+2-fmt->Rshift/8) = r;		\
				*((buf)+2-fmt->Gshift/8) = g;		\
				*((buf)+2-fmt->Bshift/8) = b;		\
			}						\
		}							\
		break;							\
									\
		case 4: {						\
			Uint32 Pixel;					\
									\
			PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a);	\
			*((Uint32 *)(buf)) = Pixel;			\
		}							\
		break;							\
	}								\
}

/* Blend the RGB values of two Pixels based on a source alpha value */
#define ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB)	\
do {						\
	dR = (((sR-dR)*(A)+255)>>8)+dR;		\
	dG = (((sG-dG)*(A)+255)>>8)+dG;		\
	dB = (((sB-dB)*(A)+255)>>8)+dB;		\
} while(0)


/* This is a very useful loop for optimizing blitters */
#if defined(_MSC_VER) && (_MSC_VER == 1300)
/* There's a bug in the Visual C++ 7 optimizer when compiling this code */
#else
#define USE_DUFFS_LOOP
#endif
#ifdef USE_DUFFS_LOOP

/* 8-times unrolled loop */
#define DUFFS_LOOP8(pixel_copy_increment, width)			\
{ int n = (width+7)/8;							\
	switch (width & 7) {						\
	case 0: do {	pixel_copy_increment;				\
	case 7:		pixel_copy_increment;				\
	case 6:		pixel_copy_increment;				\
	case 5:		pixel_copy_increment;				\
	case 4:		pixel_copy_increment;				\
	case 3:		pixel_copy_increment;				\
	case 2:		pixel_copy_increment;				\
	case 1:		pixel_copy_increment;				\
		} while ( --n > 0 );					\
	}								\
}

/* 4-times unrolled loop */
#define DUFFS_LOOP4(pixel_copy_increment, width)			\
{ int n = (width+3)/4;							\
	switch (width & 3) {						\
	case 0: do {	pixel_copy_increment;				\
	case 3:		pixel_copy_increment;				\
	case 2:		pixel_copy_increment;				\
	case 1:		pixel_copy_increment;				\
		} while ( --n > 0 );					\
	}								\
}

/* 2 - times unrolled loop */
#define DUFFS_LOOP_DOUBLE2(pixel_copy_increment,			\
				double_pixel_copy_increment, width)	\
{ int n, w = width;							\
	if( w & 1 ) {							\
	    pixel_copy_increment;					\
	    w--;							\
	}								\
	if ( w > 0 )	{						\
	    n = ( w + 2) / 4;						\
	    switch( w & 2 ) {						\
	    case 0: do {	double_pixel_copy_increment;		\
	    case 2:		double_pixel_copy_increment;		\
		    } while ( --n > 0 );					\
	    }								\
	}								\
}

/* 2 - times unrolled loop 4 pixels */
#define DUFFS_LOOP_QUATRO2(pixel_copy_increment,			\
				double_pixel_copy_increment,		\
				quatro_pixel_copy_increment, width)	\
{ int n, w = width;								\
        if(w & 1) {							\
	  pixel_copy_increment;						\
	  w--;								\
	}								\
	if(w & 2) {							\
	  double_pixel_copy_increment;					\
	  w -= 2;							\
	}								\
	if ( w > 0 ) {							\
	    n = ( w + 7 ) / 8;						\
	    switch( w & 4 ) {						\
	    case 0: do {	quatro_pixel_copy_increment;		\
	    case 4:		quatro_pixel_copy_increment;		\
		    } while ( --n > 0 );					\
	    }								\
	}								\
}

/* Use the 8-times version of the loop by default */
#define DUFFS_LOOP(pixel_copy_increment, width)				\
	DUFFS_LOOP8(pixel_copy_increment, width)

#else

/* Don't use Duff's device to unroll loops */
#define DUFFS_LOOP_DOUBLE2(pixel_copy_increment,			\
			 double_pixel_copy_increment, width)		\
{ int n = width;								\
    if( n & 1 ) {							\
	pixel_copy_increment;						\
	n--;								\
    }									\
    n=n>>1;								\
    for(; n > 0; --n) {   						\
	double_pixel_copy_increment;					\
    }									\
}

/* Don't use Duff's device to unroll loops */
#define DUFFS_LOOP_QUATRO2(pixel_copy_increment,			\
				double_pixel_copy_increment,		\
				quatro_pixel_copy_increment, width)	\
{ int n = width;								\
        if(n & 1) {							\
	  pixel_copy_increment;						\
	  n--;								\
	}								\
	if(n & 2) {							\
	  double_pixel_copy_increment;					\
	  n -= 2;							\
	}								\
	n=n>>2;								\
	for(; n > 0; --n) {   						\
	  quatro_pixel_copy_increment;					\
        }								\
}

/* Don't use Duff's device to unroll loops */
#define DUFFS_LOOP(pixel_copy_increment, width)				\
{ int n;								\
	for ( n=width; n > 0; --n ) {					\
		pixel_copy_increment;					\
	}								\
}
#define DUFFS_LOOP8(pixel_copy_increment, width)			\
	DUFFS_LOOP(pixel_copy_increment, width)
#define DUFFS_LOOP4(pixel_copy_increment, width)			\
	DUFFS_LOOP(pixel_copy_increment, width)

#endif /* USE_DUFFS_LOOP */

/* Prevent Visual C++ 6.0 from printing out stupid warnings */
#if defined(_MSC_VER) && (_MSC_VER >= 600)
#pragma warning(disable: 4550)
#endif

#endif /* _SDL_blit_h */
Commit	Line	Data
e14743d1	1	/*
	2	SDL - Simple DirectMedia Layer
	3	Copyright (C) 1997-2009 Sam Lantinga
	4
	5	This library is free software; you can redistribute it and/or
	6	modify it under the terms of the GNU Lesser General Public
	7	License as published by the Free Software Foundation; either
	8	version 2.1 of the License, or (at your option) any later version.
	9
	10	This library is distributed in the hope that it will be useful,
	11	but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	13	Lesser General Public License for more details.
	14
	15	You should have received a copy of the GNU Lesser General Public
	16	License along with this library; if not, write to the Free Software
	17	Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
	18
	19	Sam Lantinga
	20	slouken@libsdl.org
	21	*/
	22	#include "SDL_config.h"
	23
	24	#ifndef _SDL_blit_h
	25	#define _SDL_blit_h
	26
	27	#include "SDL_endian.h"
	28
	29	/* The structure passed to the low level blit functions */
	30	typedef struct {
	31	Uint8 *s_pixels;
	32	int s_width;
	33	int s_height;
	34	int s_skip;
	35	Uint8 *d_pixels;
	36	int d_width;
	37	int d_height;
	38	int d_skip;
	39	void *aux_data;
	40	SDL_PixelFormat *src;
	41	Uint8 *table;
	42	SDL_PixelFormat *dst;
	43	} SDL_BlitInfo;
	44
	45	/* The type definition for the low level blit functions */
	46	typedef void (SDL_loblit)(SDL_BlitInfo info);
	47
	48	/* This is the private info structure for software accelerated blits */
	49	struct private_swaccel {
	50	SDL_loblit blit;
	51	void *aux_data;
	52	};
	53
	54	/* Blit mapping definition */
	55	typedef struct SDL_BlitMap {
	56	SDL_Surface *dst;
	57	int identity;
	58	Uint8 *table;
	59	SDL_blit hw_blit;
	60	SDL_blit sw_blit;
	61	struct private_hwaccel *hw_data;
	62	struct private_swaccel *sw_data;
	63
	64	/* the version count matches the destination; mismatch indicates
65	an invalid mapping */
66	unsigned int format_version;
67	} SDL_BlitMap;
68
69
70	/* Functions found in SDL_blit.c */
71	extern int SDL_CalculateBlit(SDL_Surface *surface);
72
73	/* Functions found in SDL_blit_{0,1,N,A}.c */
74	extern SDL_loblit SDL_CalculateBlit0(SDL_Surface *surface, int complex);
75	extern SDL_loblit SDL_CalculateBlit1(SDL_Surface *surface, int complex);
76	extern SDL_loblit SDL_CalculateBlitN(SDL_Surface *surface, int complex);
77	extern SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface *surface, int complex);
78
79	/*
80	* Useful macros for blitting routines
81	*/
82
83	#define FORMAT_EQUAL(A, B) \
84	((A)->BitsPerPixel == (B)->BitsPerPixel \
85	&& ((A)->Rmask == (B)->Rmask) && ((A)->Amask == (B)->Amask))
86
87	/* Load pixel of the specified format from a buffer and get its R-G-B values */
88	/* FIXME: rescale values to 0..255 here? */
89	#define RGB_FROM_PIXEL(Pixel, fmt, r, g, b) \
90	{ \
91	r = (((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss); \
92	g = (((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss); \
93	b = (((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss); \
94	}
95	#define RGB_FROM_RGB565(Pixel, r, g, b) \
96	{ \
97	r = (((Pixel&0xF800)>>11)<<3); \
98	g = (((Pixel&0x07E0)>>5)<<2); \
99	b = ((Pixel&0x001F)<<3); \
100	}
101	#define RGB_FROM_RGB555(Pixel, r, g, b) \
102	{ \
103	r = (((Pixel&0x7C00)>>10)<<3); \
104	g = (((Pixel&0x03E0)>>5)<<3); \
105	b = ((Pixel&0x001F)<<3); \
106	}
107	#define RGB_FROM_RGB888(Pixel, r, g, b) \
108	{ \
109	r = ((Pixel&0xFF0000)>>16); \
110	g = ((Pixel&0xFF00)>>8); \
111	b = (Pixel&0xFF); \
112	}
113	#define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel) \
114	do { \
115	switch (bpp) { \
116	case 2: \
117	Pixel = ((Uint16 )(buf)); \
118	break; \
119	\
120	case 3: { \
121	Uint8 B = (Uint8 )(buf); \
122	if(SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
123	Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
124	} else { \
125	Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
126	} \
127	} \
128	break; \
129	\
130	case 4: \
131	Pixel = ((Uint32 )(buf)); \
132	break; \
133	\
134	default: \
135	Pixel = 0; /* appease gcc */ \
136	break; \
137	} \
138	} while(0)
139
140	#define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b) \
141	do { \
142	switch (bpp) { \
143	case 2: \
144	Pixel = ((Uint16 )(buf)); \
145	break; \
146	\
147	case 3: { \
148	Uint8 B = (Uint8 )buf; \
149	if(SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
150	Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
151	} else { \
152	Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
153	} \
154	} \
155	break; \
156	\
157	case 4: \
158	Pixel = ((Uint32 )(buf)); \
159	break; \
160	\
161	default: \
162	Pixel = 0; /* prevent gcc from complaining */ \
163	break; \
164	} \
165	RGB_FROM_PIXEL(Pixel, fmt, r, g, b); \
166	} while(0)
167
168	/* Assemble R-G-B values into a specified pixel format and store them */
169	#ifdef __NDS__ /* FIXME */
170	#define PIXEL_FROM_RGB(Pixel, fmt, r, g, b) \
171	{ \
172	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)\| \
173	((g>>fmt->Gloss)<<fmt->Gshift)\| \
174	((b>>fmt->Bloss)<<fmt->Bshift) \| (1<<15); \
175	}
176	#else
177	#define PIXEL_FROM_RGB(Pixel, fmt, r, g, b) \
178	{ \
179	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)\| \
180	((g>>fmt->Gloss)<<fmt->Gshift)\| \
181	((b>>fmt->Bloss)<<fmt->Bshift); \
182	}
183	#endif /* __NDS__ FIXME */
184	#define RGB565_FROM_RGB(Pixel, r, g, b) \
185	{ \
186	Pixel = ((r>>3)<<11)\|((g>>2)<<5)\|(b>>3); \
187	}
188	#define RGB555_FROM_RGB(Pixel, r, g, b) \
189	{ \
190	Pixel = ((r>>3)<<10)\|((g>>3)<<5)\|(b>>3); \
191	}
192	#define RGB888_FROM_RGB(Pixel, r, g, b) \
193	{ \
194	Pixel = (r<<16)\|(g<<8)\|b; \
195	}
196	#define ASSEMBLE_RGB(buf, bpp, fmt, r, g, b) \
197	{ \
198	switch (bpp) { \
199	case 2: { \
200	Uint16 Pixel; \
201	\
202	PIXEL_FROM_RGB(Pixel, fmt, r, g, b); \
203	((Uint16 )(buf)) = Pixel; \
204	} \
205	break; \
206	\
207	case 3: { \
208	if(SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
209	*((buf)+fmt->Rshift/8) = r; \
210	*((buf)+fmt->Gshift/8) = g; \
211	*((buf)+fmt->Bshift/8) = b; \
212	} else { \
213	*((buf)+2-fmt->Rshift/8) = r; \
214	*((buf)+2-fmt->Gshift/8) = g; \
215	*((buf)+2-fmt->Bshift/8) = b; \
216	} \
217	} \
218	break; \
219	\
220	case 4: { \
221	Uint32 Pixel; \
222	\
223	PIXEL_FROM_RGB(Pixel, fmt, r, g, b); \
224	((Uint32 )(buf)) = Pixel; \
225	} \
226	break; \
227	} \
228	}
229	#define ASSEMBLE_RGB_AMASK(buf, bpp, fmt, r, g, b, Amask) \
230	{ \
231	switch (bpp) { \
232	case 2: { \
233	Uint16 *bufp; \
234	Uint16 Pixel; \
235	\
236	bufp = (Uint16 *)buf; \
237	PIXEL_FROM_RGB(Pixel, fmt, r, g, b); \
238	bufp = Pixel \| (bufp & Amask); \
239	} \
240	break; \
241	\
242	case 3: { \
243	if(SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
244	*((buf)+fmt->Rshift/8) = r; \
245	*((buf)+fmt->Gshift/8) = g; \
246	*((buf)+fmt->Bshift/8) = b; \
247	} else { \
248	*((buf)+2-fmt->Rshift/8) = r; \
249	*((buf)+2-fmt->Gshift/8) = g; \
250	*((buf)+2-fmt->Bshift/8) = b; \
251	} \
252	} \
253	break; \
254	\
255	case 4: { \
256	Uint32 *bufp; \
257	Uint32 Pixel; \
258	\
259	bufp = (Uint32 *)buf; \
260	PIXEL_FROM_RGB(Pixel, fmt, r, g, b); \
261	bufp = Pixel \| (bufp & Amask); \
262	} \
263	break; \
264	} \
265	}
266
267	/* FIXME: Should we rescale alpha into 0..255 here? */
268	#define RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a) \
269	{ \
270	r = ((Pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss; \
271	g = ((Pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss; \
272	b = ((Pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss; \
273	a = ((Pixel&fmt->Amask)>>fmt->Ashift)<<fmt->Aloss; \
274	}
275	#define RGBA_FROM_8888(Pixel, fmt, r, g, b, a) \
276	{ \
277	r = (Pixel&fmt->Rmask)>>fmt->Rshift; \
278	g = (Pixel&fmt->Gmask)>>fmt->Gshift; \
279	b = (Pixel&fmt->Bmask)>>fmt->Bshift; \
280	a = (Pixel&fmt->Amask)>>fmt->Ashift; \
281	}
282	#define RGBA_FROM_RGBA8888(Pixel, r, g, b, a) \
283	{ \
284	r = (Pixel>>24); \
285	g = ((Pixel>>16)&0xFF); \
286	b = ((Pixel>>8)&0xFF); \
287	a = (Pixel&0xFF); \
288	}
289	#define RGBA_FROM_ARGB8888(Pixel, r, g, b, a) \
290	{ \
291	r = ((Pixel>>16)&0xFF); \
292	g = ((Pixel>>8)&0xFF); \
293	b = (Pixel&0xFF); \
294	a = (Pixel>>24); \
295	}
296	#define RGBA_FROM_ABGR8888(Pixel, r, g, b, a) \
297	{ \
298	r = (Pixel&0xFF); \
299	g = ((Pixel>>8)&0xFF); \
300	b = ((Pixel>>16)&0xFF); \
301	a = (Pixel>>24); \
302	}
303	#define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a) \
304	do { \
305	switch (bpp) { \
306	case 2: \
307	Pixel = ((Uint16 )(buf)); \
308	break; \
309	\
310	case 3: {/* FIXME: broken code (no alpha) */ \
311	Uint8 b = (Uint8 )buf; \
312	if(SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
313	Pixel = b[0] + (b[1] << 8) + (b[2] << 16); \
314	} else { \
315	Pixel = (b[0] << 16) + (b[1] << 8) + b[2]; \
316	} \
317	} \
318	break; \
319	\
320	case 4: \
321	Pixel = ((Uint32 )(buf)); \
322	break; \
323	\
324	default: \
325	Pixel = 0; /* stop gcc complaints */ \
326	break; \
327	} \
328	RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a); \
329	Pixel &= ~fmt->Amask; \
330	} while(0)
331
332	/* FIXME: this isn't correct, especially for Alpha (maximum != 255) */
333	#ifdef __NDS__ /* FIXME */
334	#define PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a) \
335	{ \
336	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)\| \
337	((g>>fmt->Gloss)<<fmt->Gshift)\| \
338	((b>>fmt->Bloss)<<fmt->Bshift)\| \
339	((a>>fmt->Aloss)<<fmt->Ashift) \| (1<<15); \
340	}
341	#else
342	#define PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a) \
343	{ \
344	Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)\| \
345	((g>>fmt->Gloss)<<fmt->Gshift)\| \
346	((b>>fmt->Bloss)<<fmt->Bshift)\| \
347	((a>>fmt->Aloss)<<fmt->Ashift); \
348	}
349	#endif /* __NDS__ FIXME */
350	#define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a) \
351	{ \
352	switch (bpp) { \
353	case 2: { \
354	Uint16 Pixel; \
355	\
356	PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a); \
357	((Uint16 )(buf)) = Pixel; \
358	} \
359	break; \
360	\
361	case 3: { /* FIXME: broken code (no alpha) */ \
362	if(SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
363	*((buf)+fmt->Rshift/8) = r; \
364	*((buf)+fmt->Gshift/8) = g; \
365	*((buf)+fmt->Bshift/8) = b; \
366	} else { \
367	*((buf)+2-fmt->Rshift/8) = r; \
368	*((buf)+2-fmt->Gshift/8) = g; \
369	*((buf)+2-fmt->Bshift/8) = b; \
370	} \
371	} \
372	break; \
373	\
374	case 4: { \
375	Uint32 Pixel; \
376	\
377	PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a); \
378	((Uint32 )(buf)) = Pixel; \
379	} \
380	break; \
381	} \
382	}
383
384	/* Blend the RGB values of two Pixels based on a source alpha value */
385	#define ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB) \
386	do { \
387	dR = (((sR-dR)*(A)+255)>>8)+dR; \
388	dG = (((sG-dG)*(A)+255)>>8)+dG; \
389	dB = (((sB-dB)*(A)+255)>>8)+dB; \
390	} while(0)
391
392
393	/* This is a very useful loop for optimizing blitters */
394	#if defined(_MSC_VER) && (_MSC_VER == 1300)
395	/* There's a bug in the Visual C++ 7 optimizer when compiling this code */
396	#else
397	#define USE_DUFFS_LOOP
398	#endif
399	#ifdef USE_DUFFS_LOOP
400
401	/* 8-times unrolled loop */
402	#define DUFFS_LOOP8(pixel_copy_increment, width) \
403	{ int n = (width+7)/8; \
404	switch (width & 7) { \
405	case 0: do { pixel_copy_increment; \
406	case 7: pixel_copy_increment; \
407	case 6: pixel_copy_increment; \
408	case 5: pixel_copy_increment; \
409	case 4: pixel_copy_increment; \
410	case 3: pixel_copy_increment; \
411	case 2: pixel_copy_increment; \
412	case 1: pixel_copy_increment; \
413	} while ( --n > 0 ); \
414	} \
415	}
416
417	/* 4-times unrolled loop */
418	#define DUFFS_LOOP4(pixel_copy_increment, width) \
419	{ int n = (width+3)/4; \
420	switch (width & 3) { \
421	case 0: do { pixel_copy_increment; \
422	case 3: pixel_copy_increment; \
423	case 2: pixel_copy_increment; \
424	case 1: pixel_copy_increment; \
425	} while ( --n > 0 ); \
426	} \
427	}
428
429	/* 2 - times unrolled loop */
430	#define DUFFS_LOOP_DOUBLE2(pixel_copy_increment, \
431	double_pixel_copy_increment, width) \
432	{ int n, w = width; \
433	if( w & 1 ) { \
434	pixel_copy_increment; \
435	w--; \
436	} \
437	if ( w > 0 ) { \
438	n = ( w + 2) / 4; \
439	switch( w & 2 ) { \
440	case 0: do { double_pixel_copy_increment; \
441	case 2: double_pixel_copy_increment; \
442	} while ( --n > 0 ); \
443	} \
444	} \
445	}
446
447	/* 2 - times unrolled loop 4 pixels */
448	#define DUFFS_LOOP_QUATRO2(pixel_copy_increment, \
449	double_pixel_copy_increment, \
450	quatro_pixel_copy_increment, width) \
451	{ int n, w = width; \
452	if(w & 1) { \
453	pixel_copy_increment; \
454	w--; \
455	} \
456	if(w & 2) { \
457	double_pixel_copy_increment; \
458	w -= 2; \
459	} \
460	if ( w > 0 ) { \
461	n = ( w + 7 ) / 8; \
462	switch( w & 4 ) { \
463	case 0: do { quatro_pixel_copy_increment; \
464	case 4: quatro_pixel_copy_increment; \
465	} while ( --n > 0 ); \
466	} \
467	} \
468	}
469
470	/* Use the 8-times version of the loop by default */
471	#define DUFFS_LOOP(pixel_copy_increment, width) \
472	DUFFS_LOOP8(pixel_copy_increment, width)
473
474	#else
475
476	/* Don't use Duff's device to unroll loops */
477	#define DUFFS_LOOP_DOUBLE2(pixel_copy_increment, \
478	double_pixel_copy_increment, width) \
479	{ int n = width; \
480	if( n & 1 ) { \
481	pixel_copy_increment; \
482	n--; \
483	} \
484	n=n>>1; \
485	for(; n > 0; --n) { \
486	double_pixel_copy_increment; \
487	} \
488	}
489
490	/* Don't use Duff's device to unroll loops */
491	#define DUFFS_LOOP_QUATRO2(pixel_copy_increment, \
492	double_pixel_copy_increment, \
493	quatro_pixel_copy_increment, width) \
494	{ int n = width; \
495	if(n & 1) { \
496	pixel_copy_increment; \
497	n--; \
498	} \
499	if(n & 2) { \
500	double_pixel_copy_increment; \
501	n -= 2; \
502	} \
503	n=n>>2; \
504	for(; n > 0; --n) { \
505	quatro_pixel_copy_increment; \
506	} \
507	}
508
509	/* Don't use Duff's device to unroll loops */
510	#define DUFFS_LOOP(pixel_copy_increment, width) \
511	{ int n; \
512	for ( n=width; n > 0; --n ) { \
513	pixel_copy_increment; \
514	} \
515	}
516	#define DUFFS_LOOP8(pixel_copy_increment, width) \
517	DUFFS_LOOP(pixel_copy_increment, width)
518	#define DUFFS_LOOP4(pixel_copy_increment, width) \
519	DUFFS_LOOP(pixel_copy_increment, width)
520
521	#endif /* USE_DUFFS_LOOP */
522
523	/* Prevent Visual C++ 6.0 from printing out stupid warnings */
524	#if defined(_MSC_VER) && (_MSC_VER >= 600)
525	#pragma warning(disable: 4550)
526	#endif
527
528	#endif /* _SDL_blit_h */