add some NEON 32bpp blitters
[sdl_omap.git] / src / video / SDL_blit_N.c
1 /*
2     SDL - Simple DirectMedia Layer
3     Copyright (C) 1997-2009 Sam Lantinga
4
5     This library is free software; you can redistribute it and/or
6     modify it under the terms of the GNU Lesser General Public
7     License as published by the Free Software Foundation; either
8     version 2.1 of the License, or (at your option) any later version.
9
10     This library is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13     Lesser General Public License for more details.
14
15     You should have received a copy of the GNU Lesser General Public
16     License along with this library; if not, write to the Free Software
17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18
19     Sam Lantinga
20     slouken@libsdl.org
21 */
22 #include "SDL_config.h"
23
24 #include "SDL_video.h"
25 #include "SDL_endian.h"
26 #include "SDL_cpuinfo.h"
27 #include "SDL_blit.h"
28
29 /* Functions to blit from N-bit surfaces to other surfaces */
30
31 #if SDL_ALTIVEC_BLITTERS
32 #if __MWERKS__
33 #pragma altivec_model on
34 #endif
35 #ifdef HAVE_ALTIVEC_H
36 #include <altivec.h>
37 #endif
38 #define assert(X)
39 #ifdef __MACOSX__
40 #include <sys/sysctl.h>
41 static size_t GetL3CacheSize( void )
42 {
43     const char key[] = "hw.l3cachesize";
44     u_int64_t result = 0;
45     size_t typeSize = sizeof( result );
46
47
48     int err = sysctlbyname( key, &result, &typeSize, NULL, 0 );
49     if( 0 != err ) return 0;
50
51     return result;
52 }
53 #else
54 static size_t GetL3CacheSize( void )
55 {
56     /* XXX: Just guess G4 */
57     return 2097152;
58 }
59 #endif /* __MACOSX__ */
60
61 #if (defined(__MACOSX__) && (__GNUC__ < 4))
62     #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
63         (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
64     #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
65         (vector unsigned short) ( a,b,c,d,e,f,g,h )
66 #else
67     #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
68         (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
69     #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
70         (vector unsigned short) { a,b,c,d,e,f,g,h }
71 #endif
72
73 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
74 #define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
75                                ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
76                                  0x04+a, 0x04+b, 0x04+c, 0x04+d, \
77                                  0x08+a, 0x08+b, 0x08+c, 0x08+d, \
78                                  0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
79
80 #define MAKE8888(dstfmt, r, g, b, a)  \
81     ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
82       ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
83       ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
84       ((a<<dstfmt->Ashift)&dstfmt->Amask) )
85
86 /*
87  * Data Stream Touch...Altivec cache prefetching.
88  *
89  *  Don't use this on a G5...however, the speed boost is very significant
90  *   on a G4.
91  */
92 #define DST_CHAN_SRC 1
93 #define DST_CHAN_DEST 2
94
95 /* macro to set DST control word value... */
96 #define DST_CTRL(size, count, stride) \
97     (((size) << 24) | ((count) << 16) | (stride))
98
99 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
100     ? vec_lvsl(0, src) \
101     : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
102
103 /* Calculate the permute vector used for 32->32 swizzling */
104 static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt,
105                                   const SDL_PixelFormat *dstfmt)
106 {
107     /*
108     * We have to assume that the bits that aren't used by other
109      *  colors is alpha, and it's one complete byte, since some formats
110      *  leave alpha with a zero mask, but we should still swizzle the bits.
111      */
112     /* ARGB */
113     const static struct SDL_PixelFormat default_pixel_format = {
114         NULL, 0, 0,
115         0, 0, 0, 0,
116         16, 8, 0, 24,
117         0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
118         0, 0};
119     if (!srcfmt) {
120         srcfmt = &default_pixel_format;
121     }
122     if (!dstfmt) {
123         dstfmt = &default_pixel_format;
124     }
125     const vector unsigned char plus = VECUINT8_LITERAL(
126                                       0x00, 0x00, 0x00, 0x00,
127                                       0x04, 0x04, 0x04, 0x04,
128                                       0x08, 0x08, 0x08, 0x08,
129                                       0x0C, 0x0C, 0x0C, 0x0C );
130     vector unsigned char vswiz;
131     vector unsigned int srcvec;
132 #define RESHIFT(X) (3 - ((X) >> 3))
133     Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
134     Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
135     Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
136     Uint32 amask;
137     /* Use zero for alpha if either surface doesn't have alpha */
138     if (dstfmt->Amask) {
139         amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift);
140     } else {    
141         amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF);
142     }           
143 #undef RESHIFT  
144     ((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask);
145     vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0));
146     return(vswiz);
147 }
148
149 static void Blit_RGB888_RGB565(SDL_BlitInfo *info);
150 static void Blit_RGB888_RGB565Altivec(SDL_BlitInfo *info) {
151     int height = info->d_height;
152     Uint8 *src = (Uint8 *) info->s_pixels;
153     int srcskip = info->s_skip;
154     Uint8 *dst = (Uint8 *) info->d_pixels;
155     int dstskip = info->d_skip;
156     SDL_PixelFormat *srcfmt = info->src;
157     vector unsigned char valpha = vec_splat_u8(0);
158     vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
159     vector unsigned char vgmerge = VECUINT8_LITERAL(
160         0x00, 0x02, 0x00, 0x06,
161         0x00, 0x0a, 0x00, 0x0e,
162         0x00, 0x12, 0x00, 0x16,
163         0x00, 0x1a, 0x00, 0x1e);
164     vector unsigned short v1 = vec_splat_u16(1);
165     vector unsigned short v3 = vec_splat_u16(3);
166     vector unsigned short v3f = VECUINT16_LITERAL(
167         0x003f, 0x003f, 0x003f, 0x003f,
168         0x003f, 0x003f, 0x003f, 0x003f);
169     vector unsigned short vfc = VECUINT16_LITERAL(
170         0x00fc, 0x00fc, 0x00fc, 0x00fc,
171         0x00fc, 0x00fc, 0x00fc, 0x00fc);
172     vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7);
173     vf800 = vec_sl(vf800, vec_splat_u16(8));
174
175     while (height--) {
176         vector unsigned char valigner;
177         vector unsigned char voverflow;
178         vector unsigned char vsrc;
179
180         int width = info->d_width;
181         int extrawidth;
182
183         /* do scalar until we can align... */
184 #define ONE_PIXEL_BLEND(condition, widthvar) \
185         while (condition) { \
186             Uint32 Pixel; \
187             unsigned sR, sG, sB, sA; \
188             DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
189                           sR, sG, sB, sA); \
190             *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
191                                 ((sG << 3) & 0x000007E0) | \
192                                 ((sB >> 3) & 0x0000001F)); \
193             dst += 2; \
194             src += 4; \
195             widthvar--; \
196         }
197
198         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
199
200         /* After all that work, here's the vector part! */
201         extrawidth = (width % 8);  /* trailing unaligned stores */
202         width -= extrawidth;
203         vsrc = vec_ld(0, src);
204         valigner = VEC_ALIGNER(src);
205
206         while (width) {
207             vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
208             vector unsigned int vsrc1, vsrc2;
209             vector unsigned char vdst;
210
211             voverflow = vec_ld(15, src);
212             vsrc = vec_perm(vsrc, voverflow, valigner);
213             vsrc1 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
214             src += 16;
215             vsrc = voverflow;
216             voverflow = vec_ld(15, src);
217             vsrc = vec_perm(vsrc, voverflow, valigner);
218             vsrc2 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
219             /* 1555 */
220             vpixel = (vector unsigned short)vec_packpx(vsrc1, vsrc2);
221             vgpixel = (vector unsigned short)vec_perm(vsrc1, vsrc2, vgmerge);
222             vgpixel = vec_and(vgpixel, vfc);
223             vgpixel = vec_sl(vgpixel, v3);
224             vrpixel = vec_sl(vpixel, v1);
225             vrpixel = vec_and(vrpixel, vf800);
226             vbpixel = vec_and(vpixel, v3f);
227             vdst = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel);
228             /* 565 */
229             vdst = vec_or(vdst, (vector unsigned char)vbpixel);
230             vec_st(vdst, 0, dst);
231
232             width -= 8;
233             src += 16;
234             dst += 16;
235             vsrc = voverflow;
236         }
237
238         assert(width == 0);
239
240         /* do scalar until we can align... */
241         ONE_PIXEL_BLEND((extrawidth), extrawidth);
242 #undef ONE_PIXEL_BLEND
243
244         src += srcskip;  /* move to next row, accounting for pitch. */
245         dst += dstskip;
246     }
247
248
249 }
250
251 static void Blit_RGB565_32Altivec(SDL_BlitInfo *info) {
252     int height = info->d_height;
253     Uint8 *src = (Uint8 *) info->s_pixels;
254     int srcskip = info->s_skip;
255     Uint8 *dst = (Uint8 *) info->d_pixels;
256     int dstskip = info->d_skip;
257     SDL_PixelFormat *srcfmt = info->src;
258     SDL_PixelFormat *dstfmt = info->dst;
259     unsigned alpha;
260     vector unsigned char valpha;
261     vector unsigned char vpermute;
262     vector unsigned short vf800;
263     vector unsigned int v8 = vec_splat_u32(8);
264     vector unsigned int v16 = vec_add(v8, v8);
265     vector unsigned short v2 = vec_splat_u16(2);
266     vector unsigned short v3 = vec_splat_u16(3);
267     /* 
268         0x10 - 0x1f is the alpha
269         0x00 - 0x0e evens are the red
270         0x01 - 0x0f odds are zero
271     */
272     vector unsigned char vredalpha1 = VECUINT8_LITERAL(
273         0x10, 0x00, 0x01, 0x01,
274         0x10, 0x02, 0x01, 0x01,
275         0x10, 0x04, 0x01, 0x01,
276         0x10, 0x06, 0x01, 0x01
277     );
278     vector unsigned char vredalpha2 = (vector unsigned char) (
279         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
280     );
281     /*
282         0x00 - 0x0f is ARxx ARxx ARxx ARxx
283         0x11 - 0x0f odds are blue
284     */
285     vector unsigned char vblue1 = VECUINT8_LITERAL(
286         0x00, 0x01, 0x02, 0x11,
287         0x04, 0x05, 0x06, 0x13,
288         0x08, 0x09, 0x0a, 0x15,
289         0x0c, 0x0d, 0x0e, 0x17
290     );
291     vector unsigned char vblue2 = (vector unsigned char)(
292         vec_add((vector unsigned int)vblue1, v8)
293     );
294     /*
295         0x00 - 0x0f is ARxB ARxB ARxB ARxB
296         0x10 - 0x0e evens are green
297     */
298     vector unsigned char vgreen1 = VECUINT8_LITERAL(
299         0x00, 0x01, 0x10, 0x03,
300         0x04, 0x05, 0x12, 0x07,
301         0x08, 0x09, 0x14, 0x0b,
302         0x0c, 0x0d, 0x16, 0x0f
303     );
304     vector unsigned char vgreen2 = (vector unsigned char)(
305         vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
306     );
307     
308
309     assert(srcfmt->BytesPerPixel == 2);
310     assert(dstfmt->BytesPerPixel == 4);
311
312     vf800 = (vector unsigned short)vec_splat_u8(-7);
313     vf800 = vec_sl(vf800, vec_splat_u16(8));
314
315     if (dstfmt->Amask && srcfmt->alpha) {
316         ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
317         valpha = vec_splat(valpha, 0);
318     } else {
319         alpha = 0;
320         valpha = vec_splat_u8(0);
321     }
322
323     vpermute = calc_swizzle32(NULL, dstfmt);
324     while (height--) {
325         vector unsigned char valigner;
326         vector unsigned char voverflow;
327         vector unsigned char vsrc;
328
329         int width = info->d_width;
330         int extrawidth;
331
332         /* do scalar until we can align... */
333 #define ONE_PIXEL_BLEND(condition, widthvar) \
334         while (condition) { \
335             unsigned sR, sG, sB; \
336             unsigned short Pixel = *((unsigned short *)src); \
337             sR = (Pixel >> 8) & 0xf8; \
338             sG = (Pixel >> 3) & 0xfc; \
339             sB = (Pixel << 3) & 0xf8; \
340             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
341             src += 2; \
342             dst += 4; \
343             widthvar--; \
344         }
345         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
346
347         /* After all that work, here's the vector part! */
348         extrawidth = (width % 8);  /* trailing unaligned stores */
349         width -= extrawidth;
350         vsrc = vec_ld(0, src);
351         valigner = VEC_ALIGNER(src);
352
353         while (width) {
354             vector unsigned short vR, vG, vB;
355             vector unsigned char vdst1, vdst2;
356
357             voverflow = vec_ld(15, src);
358             vsrc = vec_perm(vsrc, voverflow, valigner);
359
360             vR = vec_and((vector unsigned short)vsrc, vf800);
361             vB = vec_sl((vector unsigned short)vsrc, v3);
362             vG = vec_sl(vB, v2);
363
364             vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
365             vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
366             vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
367             vdst1 = vec_perm(vdst1, valpha, vpermute);
368             vec_st(vdst1, 0, dst);
369
370             vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
371             vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
372             vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
373             vdst2 = vec_perm(vdst2, valpha, vpermute);
374             vec_st(vdst2, 16, dst);
375             
376             width -= 8;
377             dst += 32;
378             src += 16;
379             vsrc = voverflow;
380         }
381
382         assert(width == 0);
383
384
385         /* do scalar until we can align... */
386         ONE_PIXEL_BLEND((extrawidth), extrawidth);
387 #undef ONE_PIXEL_BLEND
388
389         src += srcskip;  /* move to next row, accounting for pitch. */
390         dst += dstskip;
391     }
392
393 }
394
395
396 static void Blit_RGB555_32Altivec(SDL_BlitInfo *info) {
397     int height = info->d_height;
398     Uint8 *src = (Uint8 *) info->s_pixels;
399     int srcskip = info->s_skip;
400     Uint8 *dst = (Uint8 *) info->d_pixels;
401     int dstskip = info->d_skip;
402     SDL_PixelFormat *srcfmt = info->src;
403     SDL_PixelFormat *dstfmt = info->dst;
404     unsigned alpha;
405     vector unsigned char valpha;
406     vector unsigned char vpermute;
407     vector unsigned short vf800;
408     vector unsigned int v8 = vec_splat_u32(8);
409     vector unsigned int v16 = vec_add(v8, v8);
410     vector unsigned short v1 = vec_splat_u16(1);
411     vector unsigned short v3 = vec_splat_u16(3);
412     /* 
413         0x10 - 0x1f is the alpha
414         0x00 - 0x0e evens are the red
415         0x01 - 0x0f odds are zero
416     */
417     vector unsigned char vredalpha1 = VECUINT8_LITERAL(
418         0x10, 0x00, 0x01, 0x01,
419         0x10, 0x02, 0x01, 0x01,
420         0x10, 0x04, 0x01, 0x01,
421         0x10, 0x06, 0x01, 0x01
422     );
423     vector unsigned char vredalpha2 = (vector unsigned char)(
424         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
425     );
426     /*
427         0x00 - 0x0f is ARxx ARxx ARxx ARxx
428         0x11 - 0x0f odds are blue
429     */
430     vector unsigned char vblue1 = VECUINT8_LITERAL(
431         0x00, 0x01, 0x02, 0x11,
432         0x04, 0x05, 0x06, 0x13,
433         0x08, 0x09, 0x0a, 0x15,
434         0x0c, 0x0d, 0x0e, 0x17
435     );
436     vector unsigned char vblue2 = (vector unsigned char)(
437         vec_add((vector unsigned int)vblue1, v8)
438     );
439     /*
440         0x00 - 0x0f is ARxB ARxB ARxB ARxB
441         0x10 - 0x0e evens are green
442     */
443     vector unsigned char vgreen1 = VECUINT8_LITERAL(
444         0x00, 0x01, 0x10, 0x03,
445         0x04, 0x05, 0x12, 0x07,
446         0x08, 0x09, 0x14, 0x0b,
447         0x0c, 0x0d, 0x16, 0x0f
448     );
449     vector unsigned char vgreen2 = (vector unsigned char)(
450         vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
451     );
452     
453
454     assert(srcfmt->BytesPerPixel == 2);
455     assert(dstfmt->BytesPerPixel == 4);
456
457     vf800 = (vector unsigned short)vec_splat_u8(-7);
458     vf800 = vec_sl(vf800, vec_splat_u16(8));
459
460     if (dstfmt->Amask && srcfmt->alpha) {
461         ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
462         valpha = vec_splat(valpha, 0);
463     } else {
464         alpha = 0;
465         valpha = vec_splat_u8(0);
466     }
467
468     vpermute = calc_swizzle32(NULL, dstfmt);
469     while (height--) {
470         vector unsigned char valigner;
471         vector unsigned char voverflow;
472         vector unsigned char vsrc;
473
474         int width = info->d_width;
475         int extrawidth;
476
477         /* do scalar until we can align... */
478 #define ONE_PIXEL_BLEND(condition, widthvar) \
479         while (condition) { \
480             unsigned sR, sG, sB; \
481             unsigned short Pixel = *((unsigned short *)src); \
482             sR = (Pixel >> 7) & 0xf8; \
483             sG = (Pixel >> 2) & 0xf8; \
484             sB = (Pixel << 3) & 0xf8; \
485             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
486             src += 2; \
487             dst += 4; \
488             widthvar--; \
489         }
490         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
491
492         /* After all that work, here's the vector part! */
493         extrawidth = (width % 8);  /* trailing unaligned stores */
494         width -= extrawidth;
495         vsrc = vec_ld(0, src);
496         valigner = VEC_ALIGNER(src);
497
498         while (width) {
499             vector unsigned short vR, vG, vB;
500             vector unsigned char vdst1, vdst2;
501
502             voverflow = vec_ld(15, src);
503             vsrc = vec_perm(vsrc, voverflow, valigner);
504
505             vR = vec_and(vec_sl((vector unsigned short)vsrc,v1), vf800);
506             vB = vec_sl((vector unsigned short)vsrc, v3);
507             vG = vec_sl(vB, v3);
508
509             vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
510             vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
511             vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
512             vdst1 = vec_perm(vdst1, valpha, vpermute);
513             vec_st(vdst1, 0, dst);
514
515             vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
516             vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
517             vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
518             vdst2 = vec_perm(vdst2, valpha, vpermute);
519             vec_st(vdst2, 16, dst);
520             
521             width -= 8;
522             dst += 32;
523             src += 16;
524             vsrc = voverflow;
525         }
526
527         assert(width == 0);
528
529
530         /* do scalar until we can align... */
531         ONE_PIXEL_BLEND((extrawidth), extrawidth);
532 #undef ONE_PIXEL_BLEND
533
534         src += srcskip;  /* move to next row, accounting for pitch. */
535         dst += dstskip;
536     }
537
538 }
539
540 static void BlitNtoNKey(SDL_BlitInfo *info);
541 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info);
542 static void Blit32to32KeyAltivec(SDL_BlitInfo *info)
543 {
544     int height = info->d_height;
545     Uint32 *srcp = (Uint32 *) info->s_pixels;
546     int srcskip = info->s_skip;
547     Uint32 *dstp = (Uint32 *) info->d_pixels;
548     int dstskip = info->d_skip;
549     SDL_PixelFormat *srcfmt = info->src;
550     int srcbpp = srcfmt->BytesPerPixel;
551     SDL_PixelFormat *dstfmt = info->dst;
552     int dstbpp = dstfmt->BytesPerPixel;
553     int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
554         unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
555     Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
556         Uint32 ckey = info->src->colorkey;
557     vector unsigned int valpha;
558     vector unsigned char vpermute;
559     vector unsigned char vzero;
560     vector unsigned int vckey;
561     vector unsigned int vrgbmask;
562     vpermute = calc_swizzle32(srcfmt, dstfmt);
563     if (info->d_width < 16) {
564         if(copy_alpha) {
565             BlitNtoNKeyCopyAlpha(info);
566         } else {
567             BlitNtoNKey(info);
568         }
569         return;
570     }
571     vzero = vec_splat_u8(0);
572     if (alpha) {
573         ((unsigned char *)&valpha)[0] = (unsigned char)alpha;
574         valpha = (vector unsigned int)vec_splat((vector unsigned char)valpha, 0);
575     } else {
576         valpha = (vector unsigned int)vzero;
577     }
578     ckey &= rgbmask;
579     ((unsigned int *)(char*)&vckey)[0] = ckey;
580     vckey = vec_splat(vckey, 0);
581     ((unsigned int *)(char*)&vrgbmask)[0] = rgbmask;
582     vrgbmask = vec_splat(vrgbmask, 0);
583
584     while (height--) {
585 #define ONE_PIXEL_BLEND(condition, widthvar) \
586         if (copy_alpha) { \
587             while (condition) { \
588                 Uint32 Pixel; \
589                 unsigned sR, sG, sB, sA; \
590                 DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
591                           sR, sG, sB, sA); \
592                 if ( (Pixel & rgbmask) != ckey ) { \
593                       ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
594                             sR, sG, sB, sA); \
595                 } \
596                 dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
597                 srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
598                 widthvar--; \
599             } \
600         } else { \
601             while (condition) { \
602                 Uint32 Pixel; \
603                 unsigned sR, sG, sB; \
604                 RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
605                 if ( Pixel != ckey ) { \
606                     RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
607                     ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
608                               sR, sG, sB, alpha); \
609                 } \
610                 dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
611                 srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
612                 widthvar--; \
613             } \
614         }
615         int width = info->d_width;
616         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
617         assert(width > 0);
618         if (width > 0) {
619             int extrawidth = (width % 4);
620             vector unsigned char valigner = VEC_ALIGNER(srcp);
621             vector unsigned int vs = vec_ld(0, srcp);
622             width -= extrawidth;
623             assert(width >= 4);
624             while (width) {
625                 vector unsigned char vsel;
626                 vector unsigned int vd;
627                 vector unsigned int voverflow = vec_ld(15, srcp);
628                 /* load the source vec */
629                 vs = vec_perm(vs, voverflow, valigner);
630                 /* vsel is set for items that match the key */
631                 vsel = (vector unsigned char)vec_and(vs, vrgbmask);
632                 vsel = (vector unsigned char)vec_cmpeq(vs, vckey);
633                 /* permute the src vec to the dest format */
634                 vs = vec_perm(vs, valpha, vpermute);
635                 /* load the destination vec */
636                 vd = vec_ld(0, dstp);
637                 /* select the source and dest into vs */
638                 vd = (vector unsigned int)vec_sel((vector unsigned char)vs, (vector unsigned char)vd, vsel);
639                 
640                 vec_st(vd, 0, dstp);
641                 srcp += 4;
642                 width -= 4;
643                 dstp += 4;
644                 vs = voverflow;
645             }
646             ONE_PIXEL_BLEND((extrawidth), extrawidth);
647 #undef ONE_PIXEL_BLEND
648             srcp += srcskip >> 2;
649             dstp += dstskip >> 2;
650         }
651     }
652 }
653
654 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
655 /* Use this on a G5 */
656 static void ConvertAltivec32to32_noprefetch(SDL_BlitInfo *info)
657 {
658     int height = info->d_height;
659     Uint32 *src = (Uint32 *) info->s_pixels;
660     int srcskip = info->s_skip;
661     Uint32 *dst = (Uint32 *) info->d_pixels;
662     int dstskip = info->d_skip;
663     SDL_PixelFormat *srcfmt = info->src;
664     SDL_PixelFormat *dstfmt = info->dst;
665     vector unsigned int vzero = vec_splat_u32(0);
666     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
667     if (dstfmt->Amask && !srcfmt->Amask) {
668         if (srcfmt->alpha) {
669             vector unsigned char valpha;
670             ((unsigned char *)&valpha)[0] = srcfmt->alpha;
671             vzero = (vector unsigned int)vec_splat(valpha, 0);
672         }
673     }
674
675     assert(srcfmt->BytesPerPixel == 4);
676     assert(dstfmt->BytesPerPixel == 4);
677
678     while (height--) {
679         vector unsigned char valigner;
680         vector unsigned int vbits;
681         vector unsigned int voverflow;
682         Uint32 bits;
683         Uint8 r, g, b, a;
684
685         int width = info->d_width;
686         int extrawidth;
687
688         /* do scalar until we can align... */
689         while ((UNALIGNED_PTR(dst)) && (width)) {
690             bits = *(src++);
691             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
692             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
693             width--;
694         }
695
696         /* After all that work, here's the vector part! */
697         extrawidth = (width % 4);
698         width -= extrawidth;
699         valigner = VEC_ALIGNER(src);
700         vbits = vec_ld(0, src);
701
702        while (width) {
703             voverflow = vec_ld(15, src);
704             src += 4;
705             width -= 4;
706             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
707             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
708             vec_st(vbits, 0, dst);  /* store it back out. */
709             dst += 4;
710             vbits = voverflow;
711         }
712
713         assert(width == 0);
714
715         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
716         while (extrawidth) {
717             bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
718             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
719             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
720             extrawidth--;
721         }
722
723         src += srcskip >> 2;  /* move to next row, accounting for pitch. */
724         dst += dstskip >> 2;
725     }
726
727 }
728
729 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
730 /* Use this on a G4 */
731 static void ConvertAltivec32to32_prefetch(SDL_BlitInfo *info)
732 {
733     const int scalar_dst_lead = sizeof (Uint32) * 4;
734     const int vector_dst_lead = sizeof (Uint32) * 16;
735
736     int height = info->d_height;
737     Uint32 *src = (Uint32 *) info->s_pixels;
738     int srcskip = info->s_skip;
739     Uint32 *dst = (Uint32 *) info->d_pixels;
740     int dstskip = info->d_skip;
741     SDL_PixelFormat *srcfmt = info->src;
742     SDL_PixelFormat *dstfmt = info->dst;
743     vector unsigned int vzero = vec_splat_u32(0);
744     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
745     if (dstfmt->Amask && !srcfmt->Amask) {
746         if (srcfmt->alpha) {
747             vector unsigned char valpha;
748             ((unsigned char *)&valpha)[0] = srcfmt->alpha;
749             vzero = (vector unsigned int)vec_splat(valpha, 0);
750         }
751     }
752
753     assert(srcfmt->BytesPerPixel == 4);
754     assert(dstfmt->BytesPerPixel == 4);
755
756     while (height--) {
757         vector unsigned char valigner;
758         vector unsigned int vbits;
759         vector unsigned int voverflow;
760         Uint32 bits;
761         Uint8 r, g, b, a;
762
763         int width = info->d_width;
764         int extrawidth;
765
766         /* do scalar until we can align... */
767         while ((UNALIGNED_PTR(dst)) && (width)) {
768             vec_dstt(src+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
769             vec_dstst(dst+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
770             bits = *(src++);
771             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
772             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
773             width--;
774         }
775
776         /* After all that work, here's the vector part! */
777         extrawidth = (width % 4);
778         width -= extrawidth;
779         valigner = VEC_ALIGNER(src);
780         vbits = vec_ld(0, src);
781
782         while (width) {
783             vec_dstt(src+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
784             vec_dstst(dst+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
785             voverflow = vec_ld(15, src);
786             src += 4;
787             width -= 4;
788             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
789             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
790             vec_st(vbits, 0, dst);  /* store it back out. */
791             dst += 4;
792             vbits = voverflow;
793         }
794         
795         assert(width == 0);
796
797         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
798         while (extrawidth) {
799             bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
800             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
801             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
802             extrawidth--;
803         }
804
805         src += srcskip >> 2;  /* move to next row, accounting for pitch. */
806         dst += dstskip >> 2;
807     }
808
809     vec_dss(DST_CHAN_SRC);
810     vec_dss(DST_CHAN_DEST);
811 }
812
813 static Uint32 GetBlitFeatures( void )
814 {
815     static Uint32 features = 0xffffffff;
816     if (features == 0xffffffff) {
817         /* Provide an override for testing .. */
818         char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
819         if (override) {
820             features = 0;
821             SDL_sscanf(override, "%u", &features);
822         } else {
823             features = ( 0
824                 /* Feature 1 is has-MMX */
825                 | ((SDL_HasMMX()) ? 1 : 0)
826                 /* Feature 2 is has-AltiVec */
827                 | ((SDL_HasAltiVec()) ? 2 : 0)
828                 /* Feature 4 is dont-use-prefetch */
829                 /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
830                 | ((GetL3CacheSize() == 0) ? 4 : 0)
831             );
832         }
833     }
834     return features;
835 }
836 #if __MWERKS__
837 #pragma altivec_model off
838 #endif
839 #else
840 /* Feature 1 is has-MMX */
841 #define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
842 #endif
843
844 /* This is now endian dependent */
845 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
846 #define HI      1
847 #define LO      0
848 #else /* SDL_BYTEORDER == SDL_BIG_ENDIAN */
849 #define HI      0
850 #define LO      1
851 #endif
852
853 #if SDL_HERMES_BLITTERS
854
855 /* Heheheh, we coerce Hermes into using SDL blit information */
856 #define X86_ASSEMBLER
857 #define HermesConverterInterface        SDL_BlitInfo
858 #define HermesClearInterface            void
859 #define STACKCALL
860
861 #include "../hermes/HeadMMX.h"
862 #include "../hermes/HeadX86.h"
863
864 #else
865
866 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
867 #define RGB888_RGB332(dst, src) { \
868         dst = (Uint8)((((src)&0x00E00000)>>16)| \
869                       (((src)&0x0000E000)>>11)| \
870                       (((src)&0x000000C0)>>6)); \
871 }
872 static void Blit_RGB888_index8(SDL_BlitInfo *info)
873 {
874 #ifndef USE_DUFFS_LOOP
875         int c;
876 #endif
877         int width, height;
878         Uint32 *src;
879         const Uint8 *map;
880         Uint8 *dst;
881         int srcskip, dstskip;
882
883         /* Set up some basic variables */
884         width = info->d_width;
885         height = info->d_height;
886         src = (Uint32 *)info->s_pixels;
887         srcskip = info->s_skip/4;
888         dst = info->d_pixels;
889         dstskip = info->d_skip;
890         map = info->table;
891
892         if ( map == NULL ) {
893                 while ( height-- ) {
894 #ifdef USE_DUFFS_LOOP
895                         DUFFS_LOOP(
896                                 RGB888_RGB332(*dst++, *src);
897                         , width);
898 #else
899                         for ( c=width/4; c; --c ) {
900                                 /* Pack RGB into 8bit pixel */
901                                 ++src;
902                                 RGB888_RGB332(*dst++, *src);
903                                 ++src;
904                                 RGB888_RGB332(*dst++, *src);
905                                 ++src;
906                                 RGB888_RGB332(*dst++, *src);
907                                 ++src;
908                         }
909                         switch ( width & 3 ) {
910                                 case 3:
911                                         RGB888_RGB332(*dst++, *src);
912                                         ++src;
913                                 case 2:
914                                         RGB888_RGB332(*dst++, *src);
915                                         ++src;
916                                 case 1:
917                                         RGB888_RGB332(*dst++, *src);
918                                         ++src;
919                         }
920 #endif /* USE_DUFFS_LOOP */
921                         src += srcskip;
922                         dst += dstskip;
923                 }
924         } else {
925                 int Pixel;
926
927                 while ( height-- ) {
928 #ifdef USE_DUFFS_LOOP
929                         DUFFS_LOOP(
930                                 RGB888_RGB332(Pixel, *src);
931                                 *dst++ = map[Pixel];
932                                 ++src;
933                         , width);
934 #else
935                         for ( c=width/4; c; --c ) {
936                                 /* Pack RGB into 8bit pixel */
937                                 RGB888_RGB332(Pixel, *src);
938                                 *dst++ = map[Pixel];
939                                 ++src;
940                                 RGB888_RGB332(Pixel, *src);
941                                 *dst++ = map[Pixel];
942                                 ++src;
943                                 RGB888_RGB332(Pixel, *src);
944                                 *dst++ = map[Pixel];
945                                 ++src;
946                                 RGB888_RGB332(Pixel, *src);
947                                 *dst++ = map[Pixel];
948                                 ++src;
949                         }
950                         switch ( width & 3 ) {
951                                 case 3:
952                                         RGB888_RGB332(Pixel, *src);
953                                         *dst++ = map[Pixel];
954                                         ++src;
955                                 case 2:
956                                         RGB888_RGB332(Pixel, *src);
957                                         *dst++ = map[Pixel];
958                                         ++src;
959                                 case 1:
960                                         RGB888_RGB332(Pixel, *src);
961                                         *dst++ = map[Pixel];
962                                         ++src;
963                         }
964 #endif /* USE_DUFFS_LOOP */
965                         src += srcskip;
966                         dst += dstskip;
967                 }
968         }
969 }
970 /* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
971 #define RGB888_RGB555(dst, src) { \
972         *(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>9)| \
973                                     (((*src)&0x0000F800)>>6)| \
974                                     (((*src)&0x000000F8)>>3)); \
975 }
976 #define RGB888_RGB555_TWO(dst, src) { \
977         *(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
978                              (((src[HI])&0x0000F800)>>6)| \
979                              (((src[HI])&0x000000F8)>>3))<<16)| \
980                              (((src[LO])&0x00F80000)>>9)| \
981                              (((src[LO])&0x0000F800)>>6)| \
982                              (((src[LO])&0x000000F8)>>3); \
983 }
984 static void Blit_RGB888_RGB555(SDL_BlitInfo *info)
985 {
986 #ifndef USE_DUFFS_LOOP
987         int c;
988 #endif
989         int width, height;
990         Uint32 *src;
991         Uint16 *dst;
992         int srcskip, dstskip;
993
994         /* Set up some basic variables */
995         width = info->d_width;
996         height = info->d_height;
997         src = (Uint32 *)info->s_pixels;
998         srcskip = info->s_skip/4;
999         dst = (Uint16 *)info->d_pixels;
1000         dstskip = info->d_skip/2;
1001
1002 #ifdef USE_DUFFS_LOOP
1003         while ( height-- ) {
1004                 DUFFS_LOOP(
1005                         RGB888_RGB555(dst, src);
1006                         ++src;
1007                         ++dst;
1008                 , width);
1009                 src += srcskip;
1010                 dst += dstskip;
1011         }
1012 #else
1013         /* Memory align at 4-byte boundary, if necessary */
1014         if ( (long)dst & 0x03 ) {
1015                 /* Don't do anything if width is 0 */
1016                 if ( width == 0 ) {
1017                         return;
1018                 }
1019                 --width;
1020
1021                 while ( height-- ) {
1022                         /* Perform copy alignment */
1023                         RGB888_RGB555(dst, src);
1024                         ++src;
1025                         ++dst;
1026
1027                         /* Copy in 4 pixel chunks */
1028                         for ( c=width/4; c; --c ) {
1029                                 RGB888_RGB555_TWO(dst, src);
1030                                 src += 2;
1031                                 dst += 2;
1032                                 RGB888_RGB555_TWO(dst, src);
1033                                 src += 2;
1034                                 dst += 2;
1035                         }
1036                         /* Get any leftovers */
1037                         switch (width & 3) {
1038                                 case 3:
1039                                         RGB888_RGB555(dst, src);
1040                                         ++src;
1041                                         ++dst;
1042                                 case 2:
1043                                         RGB888_RGB555_TWO(dst, src);
1044                                         src += 2;
1045                                         dst += 2;
1046                                         break;
1047                                 case 1:
1048                                         RGB888_RGB555(dst, src);
1049                                         ++src;
1050                                         ++dst;
1051                                         break;
1052                         }
1053                         src += srcskip;
1054                         dst += dstskip;
1055                 }
1056         } else { 
1057                 while ( height-- ) {
1058                         /* Copy in 4 pixel chunks */
1059                         for ( c=width/4; c; --c ) {
1060                                 RGB888_RGB555_TWO(dst, src);
1061                                 src += 2;
1062                                 dst += 2;
1063                                 RGB888_RGB555_TWO(dst, src);
1064                                 src += 2;
1065                                 dst += 2;
1066                         }
1067                         /* Get any leftovers */
1068                         switch (width & 3) {
1069                                 case 3:
1070                                         RGB888_RGB555(dst, src);
1071                                         ++src;
1072                                         ++dst;
1073                                 case 2:
1074                                         RGB888_RGB555_TWO(dst, src);
1075                                         src += 2;
1076                                         dst += 2;
1077                                         break;
1078                                 case 1:
1079                                         RGB888_RGB555(dst, src);
1080                                         ++src;
1081                                         ++dst;
1082                                         break;
1083                         }
1084                         src += srcskip;
1085                         dst += dstskip;
1086                 }
1087         }
1088 #endif /* USE_DUFFS_LOOP */
1089 }
1090 /* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
1091 #define RGB888_RGB565(dst, src) { \
1092         *(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>8)| \
1093                                     (((*src)&0x0000FC00)>>5)| \
1094                                     (((*src)&0x000000F8)>>3)); \
1095 }
1096 #define RGB888_RGB565_TWO(dst, src) { \
1097         *(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
1098                              (((src[HI])&0x0000FC00)>>5)| \
1099                              (((src[HI])&0x000000F8)>>3))<<16)| \
1100                              (((src[LO])&0x00F80000)>>8)| \
1101                              (((src[LO])&0x0000FC00)>>5)| \
1102                              (((src[LO])&0x000000F8)>>3); \
1103 }
1104 static void Blit_RGB888_RGB565(SDL_BlitInfo *info)
1105 {
1106 #ifndef USE_DUFFS_LOOP
1107         int c;
1108 #endif
1109         int width, height;
1110         Uint32 *src;
1111         Uint16 *dst;
1112         int srcskip, dstskip;
1113
1114         /* Set up some basic variables */
1115         width = info->d_width;
1116         height = info->d_height;
1117         src = (Uint32 *)info->s_pixels;
1118         srcskip = info->s_skip/4;
1119         dst = (Uint16 *)info->d_pixels;
1120         dstskip = info->d_skip/2;
1121
1122 #ifdef USE_DUFFS_LOOP
1123         while ( height-- ) {
1124                 DUFFS_LOOP(
1125                         RGB888_RGB565(dst, src);
1126                         ++src;
1127                         ++dst;
1128                 , width);
1129                 src += srcskip;
1130                 dst += dstskip;
1131         }
1132 #else
1133         /* Memory align at 4-byte boundary, if necessary */
1134         if ( (long)dst & 0x03 ) {
1135                 /* Don't do anything if width is 0 */
1136                 if ( width == 0 ) {
1137                         return;
1138                 }
1139                 --width;
1140
1141                 while ( height-- ) {
1142                         /* Perform copy alignment */
1143                         RGB888_RGB565(dst, src);
1144                         ++src;
1145                         ++dst;
1146
1147                         /* Copy in 4 pixel chunks */
1148                         for ( c=width/4; c; --c ) {
1149                                 RGB888_RGB565_TWO(dst, src);
1150                                 src += 2;
1151                                 dst += 2;
1152                                 RGB888_RGB565_TWO(dst, src);
1153                                 src += 2;
1154                                 dst += 2;
1155                         }
1156                         /* Get any leftovers */
1157                         switch (width & 3) {
1158                                 case 3:
1159                                         RGB888_RGB565(dst, src);
1160                                         ++src;
1161                                         ++dst;
1162                                 case 2:
1163                                         RGB888_RGB565_TWO(dst, src);
1164                                         src += 2;
1165                                         dst += 2;
1166                                         break;
1167                                 case 1:
1168                                         RGB888_RGB565(dst, src);
1169                                         ++src;
1170                                         ++dst;
1171                                         break;
1172                         }
1173                         src += srcskip;
1174                         dst += dstskip;
1175                 }
1176         } else { 
1177                 while ( height-- ) {
1178                         /* Copy in 4 pixel chunks */
1179                         for ( c=width/4; c; --c ) {
1180                                 RGB888_RGB565_TWO(dst, src);
1181                                 src += 2;
1182                                 dst += 2;
1183                                 RGB888_RGB565_TWO(dst, src);
1184                                 src += 2;
1185                                 dst += 2;
1186                         }
1187                         /* Get any leftovers */
1188                         switch (width & 3) {
1189                                 case 3:
1190                                         RGB888_RGB565(dst, src);
1191                                         ++src;
1192                                         ++dst;
1193                                 case 2:
1194                                         RGB888_RGB565_TWO(dst, src);
1195                                         src += 2;
1196                                         dst += 2;
1197                                         break;
1198                                 case 1:
1199                                         RGB888_RGB565(dst, src);
1200                                         ++src;
1201                                         ++dst;
1202                                         break;
1203                         }
1204                         src += srcskip;
1205                         dst += dstskip;
1206                 }
1207         }
1208 #endif /* USE_DUFFS_LOOP */
1209 }
1210
1211 #endif /* SDL_HERMES_BLITTERS */
1212
1213 #ifdef __ARM_NEON__
1214
1215 /* NEON optimized blitter callers */
1216 #define make_neon_caller(name, neon_name) \
1217 extern void neon_name(void *dst, const void *src, int count, unsigned int abits); \
1218 static void name(SDL_BlitInfo *info) \
1219 { \
1220         int width = info->d_width; \
1221         int height = info->d_height; \
1222         Uint8 *src = info->s_pixels; \
1223         Uint8 *dst = info->d_pixels; \
1224         int srcskip = info->s_skip; \
1225         int dstskip = info->d_skip; \
1226         unsigned int abits = info->dst->Amask ? 0xff : 0; \
1227 \
1228         while ( height-- ) { \
1229             neon_name(dst, src, width, abits); \
1230             src += width * 4 + srcskip; \
1231             dst += width * 4 + dstskip; \
1232         } \
1233 }
1234
1235 make_neon_caller(BlitABGRtoXRGB_neon, neon_ABGRtoXRGB)
1236 make_neon_caller(BlitARGBtoXRGB_neon, neon_ARGBtoXRGB)
1237
1238 #endif /* __ARM_NEON__ */
1239
1240
1241 /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
1242 #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
1243 static void Blit_RGB565_32(SDL_BlitInfo *info, const Uint32 *map)
1244 {
1245 #ifndef USE_DUFFS_LOOP
1246         int c;
1247 #endif
1248         int width, height;
1249         Uint8 *src;
1250         Uint32 *dst;
1251         int srcskip, dstskip;
1252
1253         /* Set up some basic variables */
1254         width = info->d_width;
1255         height = info->d_height;
1256         src = (Uint8 *)info->s_pixels;
1257         srcskip = info->s_skip;
1258         dst = (Uint32 *)info->d_pixels;
1259         dstskip = info->d_skip/4;
1260
1261 #ifdef USE_DUFFS_LOOP
1262         while ( height-- ) {
1263                 DUFFS_LOOP(
1264                 {
1265                         *dst++ = RGB565_32(dst, src, map);
1266                         src += 2;
1267                 },
1268                 width);
1269                 src += srcskip;
1270                 dst += dstskip;
1271         }
1272 #else
1273         while ( height-- ) {
1274                 /* Copy in 4 pixel chunks */
1275                 for ( c=width/4; c; --c ) {
1276                         *dst++ = RGB565_32(dst, src, map);
1277                         src += 2;
1278                         *dst++ = RGB565_32(dst, src, map);
1279                         src += 2;
1280                         *dst++ = RGB565_32(dst, src, map);
1281                         src += 2;
1282                         *dst++ = RGB565_32(dst, src, map);
1283                         src += 2;
1284                 }
1285                 /* Get any leftovers */
1286                 switch (width & 3) {
1287                         case 3:
1288                                 *dst++ = RGB565_32(dst, src, map);
1289                                 src += 2;
1290                         case 2:
1291                                 *dst++ = RGB565_32(dst, src, map);
1292                                 src += 2;
1293                         case 1:
1294                                 *dst++ = RGB565_32(dst, src, map);
1295                                 src += 2;
1296                                 break;
1297                 }
1298                 src += srcskip;
1299                 dst += dstskip;
1300         }
1301 #endif /* USE_DUFFS_LOOP */
1302 }
1303
1304 /* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
1305 static const Uint32 RGB565_ARGB8888_LUT[512] = {
1306                 0x00000000, 0xff000000, 0x00000008, 0xff002000,
1307                 0x00000010, 0xff004000, 0x00000018, 0xff006100,
1308                 0x00000020, 0xff008100, 0x00000029, 0xff00a100,
1309                 0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
1310                 0x00000041, 0xff080000, 0x0000004a, 0xff082000,
1311                 0x00000052, 0xff084000, 0x0000005a, 0xff086100,
1312                 0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
1313                 0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
1314                 0x00000083, 0xff100000, 0x0000008b, 0xff102000,
1315                 0x00000094, 0xff104000, 0x0000009c, 0xff106100,
1316                 0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
1317                 0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
1318                 0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
1319                 0x000000d5, 0xff184000, 0x000000de, 0xff186100,
1320                 0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
1321                 0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
1322                 0x00000400, 0xff200000, 0x00000408, 0xff202000,
1323                 0x00000410, 0xff204000, 0x00000418, 0xff206100,
1324                 0x00000420, 0xff208100, 0x00000429, 0xff20a100,
1325                 0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
1326                 0x00000441, 0xff290000, 0x0000044a, 0xff292000,
1327                 0x00000452, 0xff294000, 0x0000045a, 0xff296100,
1328                 0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
1329                 0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
1330                 0x00000483, 0xff310000, 0x0000048b, 0xff312000,
1331                 0x00000494, 0xff314000, 0x0000049c, 0xff316100,
1332                 0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
1333                 0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
1334                 0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
1335                 0x000004d5, 0xff394000, 0x000004de, 0xff396100,
1336                 0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
1337                 0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
1338                 0x00000800, 0xff410000, 0x00000808, 0xff412000,
1339                 0x00000810, 0xff414000, 0x00000818, 0xff416100,
1340                 0x00000820, 0xff418100, 0x00000829, 0xff41a100,
1341                 0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
1342                 0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
1343                 0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
1344                 0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
1345                 0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
1346                 0x00000883, 0xff520000, 0x0000088b, 0xff522000,
1347                 0x00000894, 0xff524000, 0x0000089c, 0xff526100,
1348                 0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
1349                 0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
1350                 0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
1351                 0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
1352                 0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
1353                 0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
1354                 0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
1355                 0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
1356                 0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
1357                 0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
1358                 0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
1359                 0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
1360                 0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
1361                 0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
1362                 0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
1363                 0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
1364                 0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
1365                 0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
1366                 0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
1367                 0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
1368                 0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
1369                 0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
1370                 0x00001000, 0xff830000, 0x00001008, 0xff832000,
1371                 0x00001010, 0xff834000, 0x00001018, 0xff836100,
1372                 0x00001020, 0xff838100, 0x00001029, 0xff83a100,
1373                 0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
1374                 0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
1375                 0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
1376                 0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
1377                 0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
1378                 0x00001083, 0xff940000, 0x0000108b, 0xff942000,
1379                 0x00001094, 0xff944000, 0x0000109c, 0xff946100,
1380                 0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
1381                 0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
1382                 0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
1383                 0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
1384                 0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
1385                 0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
1386                 0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
1387                 0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
1388                 0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
1389                 0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
1390                 0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
1391                 0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
1392                 0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
1393                 0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
1394                 0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
1395                 0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
1396                 0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
1397                 0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
1398                 0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
1399                 0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
1400                 0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
1401                 0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
1402                 0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
1403                 0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
1404                 0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
1405                 0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
1406                 0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
1407                 0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
1408                 0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
1409                 0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
1410                 0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
1411                 0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
1412                 0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
1413                 0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
1414                 0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
1415                 0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
1416                 0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
1417                 0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
1418                 0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
1419                 0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
1420                 0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
1421                 0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
1422                 0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
1423                 0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
1424                 0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
1425                 0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
1426                 0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
1427                 0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
1428                 0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
1429                 0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
1430                 0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
1431                 0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
1432                 0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
1433                 0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
1434 };
1435 static void Blit_RGB565_ARGB8888(SDL_BlitInfo *info)
1436 {
1437     Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
1438 }
1439
1440 /* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
1441 static const Uint32 RGB565_ABGR8888_LUT[512] = {
1442                 0xff000000, 0x00000000, 0xff080000, 0x00002000,
1443                 0xff100000, 0x00004000, 0xff180000, 0x00006100,
1444                 0xff200000, 0x00008100, 0xff290000, 0x0000a100,
1445                 0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
1446                 0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
1447                 0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
1448                 0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
1449                 0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
1450                 0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
1451                 0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
1452                 0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
1453                 0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
1454                 0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
1455                 0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
1456                 0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
1457                 0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
1458                 0xff000400, 0x00000020, 0xff080400, 0x00002020,
1459                 0xff100400, 0x00004020, 0xff180400, 0x00006120,
1460                 0xff200400, 0x00008120, 0xff290400, 0x0000a120,
1461                 0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
1462                 0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
1463                 0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
1464                 0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
1465                 0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
1466                 0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
1467                 0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
1468                 0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
1469                 0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
1470                 0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
1471                 0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
1472                 0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
1473                 0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
1474                 0xff000800, 0x00000041, 0xff080800, 0x00002041,
1475                 0xff100800, 0x00004041, 0xff180800, 0x00006141,
1476                 0xff200800, 0x00008141, 0xff290800, 0x0000a141,
1477                 0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
1478                 0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
1479                 0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
1480                 0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
1481                 0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
1482                 0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
1483                 0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
1484                 0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
1485                 0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
1486                 0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
1487                 0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
1488                 0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
1489                 0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
1490                 0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
1491                 0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
1492                 0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
1493                 0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
1494                 0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
1495                 0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
1496                 0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
1497                 0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
1498                 0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
1499                 0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
1500                 0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
1501                 0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
1502                 0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
1503                 0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
1504                 0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
1505                 0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
1506                 0xff001000, 0x00000083, 0xff081000, 0x00002083,
1507                 0xff101000, 0x00004083, 0xff181000, 0x00006183,
1508                 0xff201000, 0x00008183, 0xff291000, 0x0000a183,
1509                 0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
1510                 0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
1511                 0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
1512                 0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
1513                 0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
1514                 0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
1515                 0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
1516                 0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
1517                 0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
1518                 0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
1519                 0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
1520                 0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
1521                 0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
1522                 0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
1523                 0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
1524                 0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
1525                 0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
1526                 0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
1527                 0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
1528                 0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
1529                 0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
1530                 0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
1531                 0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
1532                 0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
1533                 0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
1534                 0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
1535                 0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
1536                 0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
1537                 0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
1538                 0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
1539                 0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
1540                 0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
1541                 0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
1542                 0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
1543                 0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
1544                 0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
1545                 0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
1546                 0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
1547                 0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
1548                 0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
1549                 0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
1550                 0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
1551                 0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
1552                 0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
1553                 0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
1554                 0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
1555                 0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
1556                 0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
1557                 0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
1558                 0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
1559                 0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
1560                 0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
1561                 0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
1562                 0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
1563                 0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
1564                 0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
1565                 0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
1566                 0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
1567                 0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
1568                 0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
1569                 0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
1570 };
1571 static void Blit_RGB565_ABGR8888(SDL_BlitInfo *info)
1572 {
1573     Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
1574 }
1575
1576 /* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
1577 static const Uint32 RGB565_RGBA8888_LUT[512] = {
1578                 0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
1579                 0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
1580                 0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
1581                 0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
1582                 0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
1583                 0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
1584                 0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
1585                 0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
1586                 0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
1587                 0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
1588                 0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
1589                 0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
1590                 0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
1591                 0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
1592                 0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
1593                 0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
1594                 0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
1595                 0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
1596                 0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
1597                 0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
1598                 0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
1599                 0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
1600                 0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
1601                 0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
1602                 0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
1603                 0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
1604                 0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
1605                 0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
1606                 0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
1607                 0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
1608                 0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
1609                 0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
1610                 0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
1611                 0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
1612                 0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
1613                 0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
1614                 0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
1615                 0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
1616                 0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
1617                 0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
1618                 0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
1619                 0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
1620                 0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
1621                 0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
1622                 0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
1623                 0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
1624                 0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
1625                 0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
1626                 0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
1627                 0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
1628                 0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
1629                 0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
1630                 0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
1631                 0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
1632                 0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
1633                 0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
1634                 0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
1635                 0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
1636                 0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
1637                 0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
1638                 0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
1639                 0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
1640                 0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
1641                 0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
1642                 0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
1643                 0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
1644                 0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
1645                 0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
1646                 0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
1647                 0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
1648                 0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
1649                 0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
1650                 0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
1651                 0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
1652                 0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
1653                 0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
1654                 0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
1655                 0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
1656                 0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
1657                 0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
1658                 0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
1659                 0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
1660                 0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
1661                 0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
1662                 0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
1663                 0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
1664                 0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
1665                 0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
1666                 0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
1667                 0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
1668                 0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
1669                 0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
1670                 0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
1671                 0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
1672                 0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
1673                 0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
1674                 0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
1675                 0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
1676                 0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
1677                 0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
1678                 0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
1679                 0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
1680                 0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
1681                 0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
1682                 0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
1683                 0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
1684                 0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
1685                 0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
1686                 0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
1687                 0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
1688                 0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
1689                 0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
1690                 0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
1691                 0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
1692                 0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
1693                 0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
1694                 0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
1695                 0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
1696                 0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
1697                 0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
1698                 0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
1699                 0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
1700                 0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
1701                 0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
1702                 0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
1703                 0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
1704                 0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
1705                 0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
1706 };
1707 static void Blit_RGB565_RGBA8888(SDL_BlitInfo *info)
1708 {
1709     Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
1710 }
1711
1712 /* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
1713 static const Uint32 RGB565_BGRA8888_LUT[512] = {
1714                 0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
1715                 0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
1716                 0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
1717                 0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
1718                 0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
1719                 0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
1720                 0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
1721                 0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
1722                 0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
1723                 0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
1724                 0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
1725                 0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
1726                 0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
1727                 0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
1728                 0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
1729                 0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
1730                 0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
1731                 0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
1732                 0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
1733                 0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
1734                 0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
1735                 0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
1736                 0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
1737                 0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
1738                 0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
1739                 0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
1740                 0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
1741                 0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
1742                 0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
1743                 0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
1744                 0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
1745                 0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
1746                 0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
1747                 0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
1748                 0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
1749                 0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
1750                 0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
1751                 0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
1752                 0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
1753                 0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
1754                 0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
1755                 0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
1756                 0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
1757                 0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
1758                 0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
1759                 0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
1760                 0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
1761                 0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
1762                 0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
1763                 0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
1764                 0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
1765                 0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
1766                 0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
1767                 0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
1768                 0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
1769                 0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
1770                 0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
1771                 0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
1772                 0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
1773                 0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
1774                 0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
1775                 0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
1776                 0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
1777                 0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
1778                 0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
1779                 0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
1780                 0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
1781                 0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
1782                 0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
1783                 0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
1784                 0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
1785                 0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
1786                 0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
1787                 0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
1788                 0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
1789                 0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
1790                 0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
1791                 0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
1792                 0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
1793                 0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
1794                 0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
1795                 0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
1796                 0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
1797                 0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
1798                 0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
1799                 0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
1800                 0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
1801                 0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
1802                 0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
1803                 0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
1804                 0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
1805                 0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
1806                 0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
1807                 0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
1808                 0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
1809                 0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
1810                 0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
1811                 0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
1812                 0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
1813                 0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
1814                 0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
1815                 0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
1816                 0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
1817                 0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
1818                 0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
1819                 0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
1820                 0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
1821                 0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
1822                 0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
1823                 0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
1824                 0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
1825                 0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
1826                 0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
1827                 0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
1828                 0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
1829                 0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
1830                 0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
1831                 0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
1832                 0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
1833                 0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
1834                 0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
1835                 0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
1836                 0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
1837                 0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
1838                 0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
1839                 0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
1840                 0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
1841                 0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
1842 };
1843 static void Blit_RGB565_BGRA8888(SDL_BlitInfo *info)
1844 {
1845     Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
1846 }
1847
1848 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
1849 #ifndef RGB888_RGB332
1850 #define RGB888_RGB332(dst, src) { \
1851         dst = (((src)&0x00E00000)>>16)| \
1852               (((src)&0x0000E000)>>11)| \
1853               (((src)&0x000000C0)>>6); \
1854 }
1855 #endif
1856 static void Blit_RGB888_index8_map(SDL_BlitInfo *info)
1857 {
1858 #ifndef USE_DUFFS_LOOP
1859         int c;
1860 #endif
1861         int Pixel;
1862         int width, height;
1863         Uint32 *src;
1864         const Uint8 *map;
1865         Uint8 *dst;
1866         int srcskip, dstskip;
1867
1868         /* Set up some basic variables */
1869         width = info->d_width;
1870         height = info->d_height;
1871         src = (Uint32 *)info->s_pixels;
1872         srcskip = info->s_skip/4;
1873         dst = info->d_pixels;
1874         dstskip = info->d_skip;
1875         map = info->table;
1876
1877 #ifdef USE_DUFFS_LOOP
1878         while ( height-- ) {
1879                 DUFFS_LOOP(
1880                         RGB888_RGB332(Pixel, *src);
1881                         *dst++ = map[Pixel];
1882                         ++src;
1883                 , width);
1884                 src += srcskip;
1885                 dst += dstskip;
1886         }
1887 #else
1888         while ( height-- ) {
1889                 for ( c=width/4; c; --c ) {
1890                         /* Pack RGB into 8bit pixel */
1891                         RGB888_RGB332(Pixel, *src);
1892                         *dst++ = map[Pixel];
1893                         ++src;
1894                         RGB888_RGB332(Pixel, *src);
1895                         *dst++ = map[Pixel];
1896                         ++src;
1897                         RGB888_RGB332(Pixel, *src);
1898                         *dst++ = map[Pixel];
1899                         ++src;
1900                         RGB888_RGB332(Pixel, *src);
1901                         *dst++ = map[Pixel];
1902                         ++src;
1903                 }
1904                 switch ( width & 3 ) {
1905                         case 3:
1906                                 RGB888_RGB332(Pixel, *src);
1907                                 *dst++ = map[Pixel];
1908                                 ++src;
1909                         case 2:
1910                                 RGB888_RGB332(Pixel, *src);
1911                                 *dst++ = map[Pixel];
1912                                 ++src;
1913                         case 1:
1914                                 RGB888_RGB332(Pixel, *src);
1915                                 *dst++ = map[Pixel];
1916                                 ++src;
1917                 }
1918                 src += srcskip;
1919                 dst += dstskip;
1920         }
1921 #endif /* USE_DUFFS_LOOP */
1922 }
1923 static void BlitNto1(SDL_BlitInfo *info)
1924 {
1925 #ifndef USE_DUFFS_LOOP
1926         int c;
1927 #endif
1928         int width, height;
1929         Uint8 *src;
1930         const Uint8 *map;
1931         Uint8 *dst;
1932         int srcskip, dstskip;
1933         int srcbpp;
1934         Uint32 Pixel;
1935         int  sR, sG, sB;
1936         SDL_PixelFormat *srcfmt;
1937
1938         /* Set up some basic variables */
1939         width = info->d_width;
1940         height = info->d_height;
1941         src = info->s_pixels;
1942         srcskip = info->s_skip;
1943         dst = info->d_pixels;
1944         dstskip = info->d_skip;
1945         map = info->table;
1946         srcfmt = info->src;
1947         srcbpp = srcfmt->BytesPerPixel;
1948
1949         if ( map == NULL ) {
1950                 while ( height-- ) {
1951 #ifdef USE_DUFFS_LOOP
1952                         DUFFS_LOOP(
1953                                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
1954                                                                 sR, sG, sB);
1955                                 if ( 1 ) {
1956                                         /* Pack RGB into 8bit pixel */
1957                                         *dst = ((sR>>5)<<(3+2))|
1958                                                 ((sG>>5)<<(2)) |
1959                                                 ((sB>>6)<<(0)) ;
1960                                 }
1961                                 dst++;
1962                                 src += srcbpp;
1963                         , width);
1964 #else
1965                         for ( c=width; c; --c ) {
1966                                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
1967                                                                 sR, sG, sB);
1968                                 if ( 1 ) {
1969                                         /* Pack RGB into 8bit pixel */
1970                                         *dst = ((sR>>5)<<(3+2))|
1971                                                 ((sG>>5)<<(2)) |
1972                                                 ((sB>>6)<<(0)) ;
1973                                 }
1974                                 dst++;
1975                                 src += srcbpp;
1976                         }
1977 #endif
1978                         src += srcskip;
1979                         dst += dstskip;
1980                 }
1981         } else {
1982                 while ( height-- ) {
1983 #ifdef USE_DUFFS_LOOP
1984                         DUFFS_LOOP(
1985                                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
1986                                                                 sR, sG, sB);
1987                                 if ( 1 ) {
1988                                         /* Pack RGB into 8bit pixel */
1989                                         *dst = map[((sR>>5)<<(3+2))|
1990                                                    ((sG>>5)<<(2))  |
1991                                                    ((sB>>6)<<(0))  ];
1992                                 }
1993                                 dst++;
1994                                 src += srcbpp;
1995                         , width);
1996 #else
1997                         for ( c=width; c; --c ) {
1998                                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
1999                                                                 sR, sG, sB);
2000                                 if ( 1 ) {
2001                                         /* Pack RGB into 8bit pixel */
2002                                         *dst = map[((sR>>5)<<(3+2))|
2003                                                    ((sG>>5)<<(2))  |
2004                                                    ((sB>>6)<<(0))  ];
2005                                 }
2006                                 dst++;
2007                                 src += srcbpp;
2008                         }
2009 #endif /* USE_DUFFS_LOOP */
2010                         src += srcskip;
2011                         dst += dstskip;
2012                 }
2013         }
2014 }
2015
2016 /* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
2017 static void Blit4to4MaskAlpha(SDL_BlitInfo *info)
2018 {
2019         int width = info->d_width;
2020         int height = info->d_height;
2021         Uint32 *src = (Uint32 *)info->s_pixels;
2022         int srcskip = info->s_skip;
2023         Uint32 *dst = (Uint32 *)info->d_pixels;
2024         int dstskip = info->d_skip;
2025         SDL_PixelFormat *srcfmt = info->src;
2026         SDL_PixelFormat *dstfmt = info->dst;
2027
2028         if (dstfmt->Amask) {
2029                 /* RGB->RGBA, SET_ALPHA */
2030                 Uint32 mask = (srcfmt->alpha >> dstfmt->Aloss) << dstfmt->Ashift;
2031
2032                 while ( height-- ) {
2033                         DUFFS_LOOP(
2034                         {
2035                                 *dst = *src | mask;
2036                                 ++dst;
2037                                 ++src;
2038                         },
2039                         width);
2040                         src = (Uint32*)((Uint8*)src + srcskip);
2041                         dst = (Uint32*)((Uint8*)dst + dstskip);
2042                 }
2043         } else {
2044                 /* RGBA->RGB, NO_ALPHA */
2045                 Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
2046
2047                 while ( height-- ) {
2048                         DUFFS_LOOP(
2049                         {
2050                                 *dst = *src & mask;
2051                                 ++dst;
2052                                 ++src;
2053                         },
2054                         width);
2055                         src = (Uint32*)((Uint8*)src + srcskip);
2056                         dst = (Uint32*)((Uint8*)dst + dstskip);
2057                 }
2058         }
2059 }
2060
2061 static void BlitNtoN(SDL_BlitInfo *info)
2062 {
2063         int width = info->d_width;
2064         int height = info->d_height;
2065         Uint8 *src = info->s_pixels;
2066         int srcskip = info->s_skip;
2067         Uint8 *dst = info->d_pixels;
2068         int dstskip = info->d_skip;
2069         SDL_PixelFormat *srcfmt = info->src;
2070         int srcbpp = srcfmt->BytesPerPixel;
2071         SDL_PixelFormat *dstfmt = info->dst;
2072         int dstbpp = dstfmt->BytesPerPixel;
2073         unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
2074
2075         while ( height-- ) {
2076                 DUFFS_LOOP(
2077                 {
2078                         Uint32 Pixel;
2079                         unsigned sR;
2080                         unsigned sG;
2081                         unsigned sB;
2082                         DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
2083                         ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
2084                         dst += dstbpp;
2085                         src += srcbpp;
2086                 },
2087                 width);
2088                 src += srcskip;
2089                 dst += dstskip;
2090         }
2091 }
2092
2093 static void BlitNtoNCopyAlpha(SDL_BlitInfo *info)
2094 {
2095         int width = info->d_width;
2096         int height = info->d_height;
2097         Uint8 *src = info->s_pixels;
2098         int srcskip = info->s_skip;
2099         Uint8 *dst = info->d_pixels;
2100         int dstskip = info->d_skip;
2101         SDL_PixelFormat *srcfmt = info->src;
2102         int srcbpp = srcfmt->BytesPerPixel;
2103         SDL_PixelFormat *dstfmt = info->dst;
2104         int dstbpp = dstfmt->BytesPerPixel;
2105         int c;
2106
2107         /* FIXME: should map alpha to [0..255] correctly! */
2108         while ( height-- ) {
2109                 for ( c=width; c; --c ) {
2110                         Uint32 Pixel;
2111                         unsigned sR, sG, sB, sA;
2112                         DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
2113                                       sR, sG, sB, sA);
2114                         ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
2115                                       sR, sG, sB, sA);
2116                         dst += dstbpp;
2117                         src += srcbpp;
2118                 }
2119                 src += srcskip;
2120                 dst += dstskip;
2121         }
2122 }
2123
2124 static void BlitNto1Key(SDL_BlitInfo *info)
2125 {
2126         int width = info->d_width;
2127         int height = info->d_height;
2128         Uint8 *src = info->s_pixels;
2129         int srcskip = info->s_skip;
2130         Uint8 *dst = info->d_pixels;
2131         int dstskip = info->d_skip;
2132         SDL_PixelFormat *srcfmt = info->src;
2133         const Uint8 *palmap = info->table;
2134         Uint32 ckey = srcfmt->colorkey;
2135         Uint32 rgbmask = ~srcfmt->Amask;
2136         int srcbpp;
2137         Uint32 Pixel;
2138         unsigned sR, sG, sB;
2139
2140         /* Set up some basic variables */
2141         srcbpp = srcfmt->BytesPerPixel;
2142         ckey &= rgbmask;
2143
2144         if ( palmap == NULL ) {
2145                 while ( height-- ) {
2146                         DUFFS_LOOP(
2147                         {
2148                                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
2149                                                                 sR, sG, sB);
2150                                 if ( (Pixel & rgbmask) != ckey ) {
2151                                         /* Pack RGB into 8bit pixel */
2152                                         *dst = (Uint8)(((sR>>5)<<(3+2))|
2153                                                            ((sG>>5)<<(2)) |
2154                                                            ((sB>>6)<<(0)));
2155                                 }
2156                                 dst++;
2157                                 src += srcbpp;
2158                         },
2159                         width);
2160                         src += srcskip;
2161                         dst += dstskip;
2162                 }
2163         } else {
2164                 while ( height-- ) {
2165                         DUFFS_LOOP(
2166                         {
2167                                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
2168                                                                 sR, sG, sB);
2169                                 if ( (Pixel & rgbmask) != ckey ) {
2170                                         /* Pack RGB into 8bit pixel */
2171                                         *dst = (Uint8)palmap[((sR>>5)<<(3+2))|
2172                                                                      ((sG>>5)<<(2))  |
2173                                                                      ((sB>>6)<<(0))  ];
2174                                 }
2175                                 dst++;
2176                                 src += srcbpp;
2177                         },
2178                         width);
2179                         src += srcskip;
2180                         dst += dstskip;
2181                 }
2182         }
2183 }
2184
2185 static void Blit2to2Key(SDL_BlitInfo *info)
2186 {
2187         int width = info->d_width;
2188         int height = info->d_height;
2189         Uint16 *srcp = (Uint16 *)info->s_pixels;
2190         int srcskip = info->s_skip;
2191         Uint16 *dstp = (Uint16 *)info->d_pixels;
2192         int dstskip = info->d_skip;
2193         Uint32 ckey = info->src->colorkey;
2194         Uint32 rgbmask = ~info->src->Amask;
2195
2196         /* Set up some basic variables */
2197         srcskip /= 2;
2198         dstskip /= 2;
2199         ckey &= rgbmask;
2200
2201         while ( height-- ) {
2202                 DUFFS_LOOP(
2203                 {
2204                         if ( (*srcp & rgbmask) != ckey ) {
2205                                 *dstp = *srcp;
2206                         }
2207                         dstp++;
2208                         srcp++;
2209                 },
2210                 width);
2211                 srcp += srcskip;
2212                 dstp += dstskip;
2213         }
2214 }
2215
2216 static void BlitNtoNKey(SDL_BlitInfo *info)
2217 {
2218         int width = info->d_width;
2219         int height = info->d_height;
2220         Uint8 *src = info->s_pixels;
2221         int srcskip = info->s_skip;
2222         Uint8 *dst = info->d_pixels;
2223         int dstskip = info->d_skip;
2224         Uint32 ckey = info->src->colorkey;
2225         SDL_PixelFormat *srcfmt = info->src;
2226         SDL_PixelFormat *dstfmt = info->dst;
2227         int srcbpp = srcfmt->BytesPerPixel;
2228         int dstbpp = dstfmt->BytesPerPixel;
2229         unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
2230         Uint32 rgbmask = ~srcfmt->Amask;
2231
2232         /* Set up some basic variables */
2233         ckey &= rgbmask;
2234
2235         while ( height-- ) {
2236                 DUFFS_LOOP(
2237                 {
2238                         Uint32 Pixel;
2239                         unsigned sR;
2240                         unsigned sG;
2241                         unsigned sB;
2242                         RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
2243                         if ( (Pixel & rgbmask) != ckey ) {
2244                                 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
2245                                 ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
2246                                               sR, sG, sB, alpha);
2247                         }
2248                         dst += dstbpp;
2249                         src += srcbpp;
2250                 },
2251                 width);
2252                 src += srcskip;
2253                 dst += dstskip;
2254         }
2255 }
2256
2257 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info)
2258 {
2259         int width = info->d_width;
2260         int height = info->d_height;
2261         Uint8 *src = info->s_pixels;
2262         int srcskip = info->s_skip;
2263         Uint8 *dst = info->d_pixels;
2264         int dstskip = info->d_skip;
2265         Uint32 ckey = info->src->colorkey;
2266         SDL_PixelFormat *srcfmt = info->src;
2267         SDL_PixelFormat *dstfmt = info->dst;
2268         Uint32 rgbmask = ~srcfmt->Amask;
2269
2270         Uint8 srcbpp;
2271         Uint8 dstbpp;
2272         Uint32 Pixel;
2273         unsigned sR, sG, sB, sA;
2274
2275         /* Set up some basic variables */
2276         srcbpp = srcfmt->BytesPerPixel;
2277         dstbpp = dstfmt->BytesPerPixel;
2278         ckey &= rgbmask;
2279
2280         /* FIXME: should map alpha to [0..255] correctly! */
2281         while ( height-- ) {
2282                 DUFFS_LOOP(
2283                 {
2284                         DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
2285                                       sR, sG, sB, sA);
2286                         if ( (Pixel & rgbmask) != ckey ) {
2287                                   ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
2288                                                 sR, sG, sB, sA);
2289                         }
2290                         dst += dstbpp;
2291                         src += srcbpp;
2292                 },
2293                 width);
2294                 src += srcskip;
2295                 dst += dstskip;
2296         }
2297 }
2298
2299 /* Normal N to N optimized blitters */
2300 struct blit_table {
2301         Uint32 srcR, srcG, srcB;
2302         int dstbpp;
2303         Uint32 dstR, dstG, dstB;
2304         Uint32 blit_features;
2305         void *aux_data;
2306         SDL_loblit blitfunc;
2307         enum { NO_ALPHA=1, SET_ALPHA=2, COPY_ALPHA=4 } alpha;
2308 };
2309 static const struct blit_table normal_blit_1[] = {
2310         /* Default for 8-bit RGB source, an invalid combination */
2311         { 0,0,0, 0, 0,0,0, 0, NULL, NULL },
2312 };
2313 static const struct blit_table normal_blit_2[] = {
2314 #if SDL_HERMES_BLITTERS
2315     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000007E0,0x0000F800,
2316       0, ConvertX86p16_16BGR565, ConvertX86, NO_ALPHA },
2317     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x00007C00,0x000003E0,0x0000001F,
2318       0, ConvertX86p16_16RGB555, ConvertX86, NO_ALPHA },
2319     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000003E0,0x00007C00,
2320       0, ConvertX86p16_16BGR555, ConvertX86, NO_ALPHA },
2321 #elif SDL_ALTIVEC_BLITTERS
2322     /* has-altivec */
2323     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
2324       2, NULL, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
2325     { 0x00007C00,0x000003E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
2326       2, NULL, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
2327 #endif
2328     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00FF0000,0x0000FF00,0x000000FF,
2329       0, NULL, Blit_RGB565_ARGB8888, SET_ALPHA },
2330     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x000000FF,0x0000FF00,0x00FF0000,
2331       0, NULL, Blit_RGB565_ABGR8888, SET_ALPHA },
2332     { 0x0000F800,0x000007E0,0x0000001F, 4, 0xFF000000,0x00FF0000,0x0000FF00,
2333       0, NULL, Blit_RGB565_RGBA8888, SET_ALPHA },
2334     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x0000FF00,0x00FF0000,0xFF000000,
2335       0, NULL, Blit_RGB565_BGRA8888, SET_ALPHA },
2336
2337     /* Default for 16-bit RGB source, used if no other blitter matches */
2338     { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
2339 };
2340 static const struct blit_table normal_blit_3[] = {
2341         /* Default for 24-bit RGB source, never optimized */
2342     { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
2343 };
2344 static const struct blit_table normal_blit_4[] = {
2345 #if SDL_HERMES_BLITTERS
2346     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
2347       1, ConvertMMXpII32_16RGB565, ConvertMMX, NO_ALPHA },
2348     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
2349       0, ConvertX86p32_16RGB565, ConvertX86, NO_ALPHA },
2350     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
2351       1, ConvertMMXpII32_16BGR565, ConvertMMX, NO_ALPHA },
2352     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
2353       0, ConvertX86p32_16BGR565, ConvertX86, NO_ALPHA },
2354     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
2355       1, ConvertMMXpII32_16RGB555, ConvertMMX, NO_ALPHA },
2356     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
2357       0, ConvertX86p32_16RGB555, ConvertX86, NO_ALPHA },
2358     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
2359       1, ConvertMMXpII32_16BGR555, ConvertMMX, NO_ALPHA },
2360     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
2361       0, ConvertX86p32_16BGR555, ConvertX86, NO_ALPHA },
2362     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x00FF0000,0x0000FF00,0x000000FF,
2363       1, ConvertMMXpII32_24RGB888, ConvertMMX, NO_ALPHA },
2364     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x00FF0000,0x0000FF00,0x000000FF,
2365       0, ConvertX86p32_24RGB888, ConvertX86, NO_ALPHA },
2366     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x000000FF,0x0000FF00,0x00FF0000,
2367       0, ConvertX86p32_24BGR888, ConvertX86, NO_ALPHA },
2368     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x000000FF,0x0000FF00,0x00FF0000,
2369       0, ConvertX86p32_32BGR888, ConvertX86, NO_ALPHA },
2370     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0xFF000000,0x00FF0000,0x0000FF00,
2371       0, ConvertX86p32_32RGBA888, ConvertX86, NO_ALPHA },
2372     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x0000FF00,0x00FF0000,0xFF000000,
2373       0, ConvertX86p32_32BGRA888, ConvertX86, NO_ALPHA },
2374 #else
2375 #if SDL_ALTIVEC_BLITTERS
2376     /* has-altivec | dont-use-prefetch */
2377     { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
2378       6, NULL, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
2379     /* has-altivec */
2380     { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
2381       2, NULL, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
2382     /* has-altivec */
2383     { 0x00000000,0x00000000,0x00000000, 2, 0x0000F800,0x000007E0,0x0000001F,
2384       2, NULL, Blit_RGB888_RGB565Altivec, NO_ALPHA },
2385 #endif
2386     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
2387       0, NULL, Blit_RGB888_RGB565, NO_ALPHA },
2388     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
2389       0, NULL, Blit_RGB888_RGB555, NO_ALPHA },
2390 #endif
2391 #ifdef __ARM_NEON__
2392     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x00FF0000,0x0000FF00,0x000000FF,
2393       0, NULL, BlitARGBtoXRGB_neon, NO_ALPHA | SET_ALPHA },
2394     { 0x000000FF,0x0000FF00,0x00FF0000, 4, 0x00FF0000,0x0000FF00,0x000000FF,
2395       0, NULL, BlitABGRtoXRGB_neon, NO_ALPHA | SET_ALPHA },
2396     /* RGB->BGR is same as BGR->RGB */
2397     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x000000FF,0x0000FF00,0x00FF0000,
2398       0, NULL, BlitABGRtoXRGB_neon, NO_ALPHA | SET_ALPHA },
2399 #endif
2400         /* Default for 32-bit RGB source, used if no other blitter matches */
2401         { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
2402 };
2403 static const struct blit_table *normal_blit[] = {
2404         normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
2405 };
2406
2407 /* Mask matches table, or table entry is zero */
2408 #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
2409
2410 SDL_loblit SDL_CalculateBlitN(SDL_Surface *surface, int blit_index)
2411 {
2412         struct private_swaccel *sdata;
2413         SDL_PixelFormat *srcfmt;
2414         SDL_PixelFormat *dstfmt;
2415         const struct blit_table *table;
2416         int which;
2417         SDL_loblit blitfun;
2418
2419         /* Set up data for choosing the blit */
2420         sdata = surface->map->sw_data;
2421         srcfmt = surface->format;
2422         dstfmt = surface->map->dst->format;
2423
2424         if ( blit_index & 2 ) {
2425                 /* alpha or alpha+colorkey */
2426                 return SDL_CalculateAlphaBlit(surface, blit_index);
2427         }
2428
2429         /* We don't support destinations less than 8-bits */
2430         if ( dstfmt->BitsPerPixel < 8 ) {
2431                 return(NULL);
2432         }
2433         
2434         if(blit_index == 1) {
2435             /* colorkey blit: Here we don't have too many options, mostly
2436                because RLE is the preferred fast way to deal with this.
2437                If a particular case turns out to be useful we'll add it. */
2438
2439             if(srcfmt->BytesPerPixel == 2
2440                && surface->map->identity)
2441                 return Blit2to2Key;
2442             else if(dstfmt->BytesPerPixel == 1)
2443                 return BlitNto1Key;
2444             else {
2445 #if SDL_ALTIVEC_BLITTERS
2446         if((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4) && SDL_HasAltiVec()) {
2447             return Blit32to32KeyAltivec;
2448         } else
2449 #endif
2450
2451                 if(srcfmt->Amask && dstfmt->Amask)
2452                     return BlitNtoNKeyCopyAlpha;
2453                 else
2454                     return BlitNtoNKey;
2455             }
2456         }
2457
2458         blitfun = NULL;
2459         if ( dstfmt->BitsPerPixel == 8 ) {
2460                 /* We assume 8-bit destinations are palettized */
2461                 if ( (srcfmt->BytesPerPixel == 4) &&
2462                      (srcfmt->Rmask == 0x00FF0000) &&
2463                      (srcfmt->Gmask == 0x0000FF00) &&
2464                      (srcfmt->Bmask == 0x000000FF) ) {
2465                         if ( surface->map->table ) {
2466                                 blitfun = Blit_RGB888_index8_map;
2467                         } else {
2468 #if SDL_HERMES_BLITTERS
2469                                 sdata->aux_data = ConvertX86p32_8RGB332;
2470                                 blitfun = ConvertX86;
2471 #else
2472                                 blitfun = Blit_RGB888_index8;
2473 #endif
2474                         }
2475                 } else {
2476                         blitfun = BlitNto1;
2477                 }
2478         } else {
2479                 /* Now the meat, choose the blitter we want */
2480                 int a_need = NO_ALPHA;
2481                 if(dstfmt->Amask)
2482                     a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
2483                 table = normal_blit[srcfmt->BytesPerPixel-1];
2484                 for ( which=0; table[which].dstbpp; ++which ) {
2485                         if ( MASKOK(srcfmt->Rmask, table[which].srcR) &&
2486                             MASKOK(srcfmt->Gmask, table[which].srcG) &&
2487                             MASKOK(srcfmt->Bmask, table[which].srcB) &&
2488                             MASKOK(dstfmt->Rmask, table[which].dstR) &&
2489                             MASKOK(dstfmt->Gmask, table[which].dstG) &&
2490                             MASKOK(dstfmt->Bmask, table[which].dstB) &&
2491                             dstfmt->BytesPerPixel == table[which].dstbpp &&
2492                             (a_need & table[which].alpha) == a_need &&
2493                             ((table[which].blit_features & GetBlitFeatures()) == table[which].blit_features) )
2494                                 break;
2495                 }
2496                 sdata->aux_data = table[which].aux_data;
2497                 blitfun = table[which].blitfunc;
2498
2499                 if(blitfun == BlitNtoN) {  /* default C fallback catch-all. Slow! */
2500                         /* Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB */
2501                         if ( srcfmt->BytesPerPixel == 4 && dstfmt->BytesPerPixel == 4 &&
2502                              srcfmt->Rmask == dstfmt->Rmask &&
2503                              srcfmt->Gmask == dstfmt->Gmask &&
2504                              srcfmt->Bmask == dstfmt->Bmask ) {
2505                                 blitfun = Blit4to4MaskAlpha;
2506                         } else if ( a_need == COPY_ALPHA ) {
2507                             blitfun = BlitNtoNCopyAlpha;
2508                         }
2509                 }
2510         }
2511
2512 #ifdef DEBUG_ASM
2513 #if SDL_HERMES_BLITTERS
2514         if ( blitfun == ConvertMMX )
2515                 fprintf(stderr, "Using mmx blit\n");
2516         else
2517         if ( blitfun == ConvertX86 )
2518                 fprintf(stderr, "Using asm blit\n");
2519         else
2520 #endif
2521         if ( (blitfun == BlitNtoN) || (blitfun == BlitNto1) )
2522                 fprintf(stderr, "Using C blit\n");
2523         else
2524                 fprintf(stderr, "Using optimized C blit\n");
2525 #endif /* DEBUG_ASM */
2526
2527         return(blitfun);
2528 }