unify NEON code, use preload
[sdl_omap.git] / src / video / SDL_blit_neon.S
CommitLineData
a1f34081 1/*
a1eff5db 2 * (C) GraÅžvydas "notaz" Ignotas, 2011,2012
a1f34081 3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11.text
12.align 2
13
14@ void *dst, const void *src, int count, uint abits
15.macro do_argb bgr2rgb
16 vdup.i8 d0, r3
170:
18 vld4.8 {d4-d7}, [r1]!
19.if \bgr2rgb
20 vswp d4, d6 @ BGR->RGB
21.endif
22 vmov.i8 d7, d0
23 subs r2, r2, #8
24 blt do_argb_finish
25 vst4.8 {d4-d7}, [r0]!
26 bxeq lr
27 nop
28 b 0b
29.endm
30
a1eff5db 31@ void *dst, const void *src, int count, uint global_alpha
32.macro do_argb_alpha bgr2rgb global_alpha
bdfa6989 33 mov r12, #0xff
a1eff5db 34.if \global_alpha
bdfa6989 35 vdup.16 q11, r3
a1eff5db 36.endif
37 vdup.i16 q12, r12
bdfa6989 380:
a1eff5db 39 pld [r1, #64*2]
40 pld [r0, #64*2]
bdfa6989 41 vld4.8 {d4-d7}, [r1]!
42 vld4.8 {d0-d3}, [r0]
43.if \bgr2rgb
44 vswp d4, d6 @ BGR->RGB
a1eff5db 45.endif
46.if !\global_alpha
47 vmovl.u8 q11, d7
bdfa6989 48.endif
49 @ d = (((s-d)*a+255)>>8)+d
50 vsubl.u8 q8, d4, d0
51 vsubl.u8 q9, d5, d1
52 vsubl.u8 q10,d6, d2
53 vmul.s16 q8, q8, q11
54 vmul.s16 q9, q9, q11
55 vmul.s16 q10,q10,q11
56 vaddhn.i16 d4, q8, q12
57 vaddhn.i16 d5, q9, q12
58 vaddhn.i16 d6, q10,q12
59 vadd.i8 q2, q0
60 vadd.i8 d6, d2
61 vmov.i8 d7, d3
62 subs r2, r2, #8
63 blt do_argb_finish
64 vst4.8 {d4-d7}, [r0]!
65 bxeq lr
66 nop
67 b 0b
68.endm
69
70
a1f34081 71do_argb_finish:
72 add r2, r2, #8
73 vzip.8 d4, d5 @ RRR..|GGG.. -> RGRG..
74 vzip.8 d6, d7 @ BBB..|000.. -> B0B0..
75 vzip.16 q2, q3
76
77 vst1.32 d4[0], [r0]!
78 cmp r2, #1
79 bxle lr
80 vst1.32 d4[1], [r0]!
81 cmp r2, #2
82 bxle lr
83 vst1.32 d5[0], [r0]!
84 cmp r2, #3
85 bxle lr
86 vst1.32 d5[1], [r0]!
87 cmp r2, #4
88 bxle lr
89 vst1.32 d6[0], [r0]!
90 cmp r2, #5
91 bxle lr
92 vst1.32 d6[1], [r0]!
93 cmp r2, #6
94 bxle lr
95 vst1.32 d7[0], [r0]!
96 bx lr
97
98
99.global neon_ARGBtoXRGB
100neon_ARGBtoXRGB:
101 do_argb 0
102
103.global neon_ABGRtoXRGB
104neon_ABGRtoXRGB:
105 do_argb 1
106
107.global neon_ARGBtoXRGBalpha
108neon_ARGBtoXRGBalpha:
a1eff5db 109 do_argb_alpha 0, 0
a1f34081 110
111.global neon_ABGRtoXRGBalpha
112neon_ABGRtoXRGBalpha:
a1eff5db 113 do_argb_alpha 1, 0
a1f34081 114
bdfa6989 115.global neon_ARGBtoXRGBalphaS
116neon_ARGBtoXRGBalphaS:
a1eff5db 117 do_argb_alpha 0, 1
bdfa6989 118
119.global neon_ABGRtoXRGBalphaS
120neon_ABGRtoXRGBalphaS:
a1eff5db 121 do_argb_alpha 1, 1
bdfa6989 122
a1f34081 123@ vim:filetype=armasm