Added missing launcher
[mupen64plus-pandora.git] / source / gles2glide64 / src / GlideHQ / TextureFilters_hq4x.cpp
CommitLineData
98e75f2d 1/*
2 * Texture Filtering
3 * Version: 1.0
4 *
5 * Copyright (C) 2007 Hiroshi Morii All Rights Reserved.
6 * Email koolsmoky(at)users.sourceforge.net
7 * Web http://www.3dfxzone.it/koolsmoky
8 *
9 * this is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version.
13 *
14 * this is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with GNU Make; see the file COPYING. If not, write to
21 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
22 */
23
24/* Based on Maxim Stepin and Rice1964 hq4x code */
25
26#include <math.h>
27#include <stdlib.h>
28#include "TextureFilters.h"
29
30#if !_16BPP_HACK
31static uint32 RGB444toYUV[4096];
32#define RGB444toYUV(val) RGB444toYUV[val & 0x0FFF] /* val = ARGB4444 */
33
34/*inline static uint32 RGB444toYUV(uint32 val)
35{
36 uint32 r, g, b, Y, u, v;
37
38 r = (val & 0x0F00) >> 4;
39 g = (val & 0x00F0);
40 b = val & 0x000F;
41 r |= r >> 4;
42 g |= g >> 4;
43 b |= b << 4;
44
45 Y = (r + g + b) >> 2;
46 u = 128 + ((r - b) >> 2);
47 v = 128 + ((2*g - r - b)>>3);
48
49 return ((Y << 16) | (u << 8) | v);
50}*/
51
52static uint32 RGB555toYUV(uint32 val)
53{
54 uint32 r, g, b, Y, u, v;
55
56 r = (val & 0x7C00) >> 7;
57 g = (val & 0x03E0) >> 2;
58 b = (val & 0x001F) << 3;
59 r |= r >> 5;
60 g |= g >> 5;
61 b |= b >> 5;
62
63 Y = (r + g + b) >> 2;
64 u = 128 + ((r - b) >> 2);
65 v = 128 + ((2*g - r - b)>>3);
66
67 return ((Y << 16) | (u << 8) | v);
68}
69
70static uint32 RGB565toYUV(uint32 val)
71{
72 uint32 r, g, b, Y, u, v;
73
74 r = (val & 0xF800) >> 8;
75 g = (val & 0x07E0) >> 3;
76 b = (val & 0x001F) << 3;
77 r |= r >> 5;
78 g |= g >> 6;
79 b |= b >> 5;
80
81 Y = (r + g + b) >> 2;
82 u = 128 + ((r - b) >> 2);
83 v = 128 + ((2*g - r - b)>>3);
84
85 return ((Y << 16) | (u << 8) | v);
86}
87#endif /* !_16BPP_HACK */
88
89static uint32 RGB888toYUV(uint32 val)
90{
91#if 0
92 uint32 Yuv;
93
94 __asm {
95 mov eax, dword ptr [val];
96 mov ebx, eax;
97 mov ecx, eax;
98 and ebx, 0x000000ff; // b
99 and eax, 0x00ff0000; // r
100 and ecx, 0x0000ff00; // g
101 shl ebx, 14;
102 shr eax, 2;
103 shl ecx, 6;
104 mov edx, ebx;
105 add edx, eax;
106 add edx, ecx;
107 and edx, 0xffff0000;
108
109 sub eax, ebx;
110 add eax, 0x00800000;
111 shr eax, 8;
112 or edx, eax;
113 sub eax, 0x00800000;
114 and edx, 0xffffff00;
115
116 add ecx, 0x00800000;
117 shr ecx, 5;
118 shr ebx, 7;
119 add eax, ebx;
120 sub ecx, eax;
121 shr ecx, 11;
122 or edx, ecx;
123
124 mov dword ptr [Yuv], edx;
125 }
126
127 return Yuv;
128#else
129 uint32 r, g, b, Y, u, v;
130
131 r = (val & 0x00ff0000) >> 16;
132 g = (val & 0x0000ff00) >> 8;
133 b = val & 0x000000ff;
134
135 Y = (r + g + b) >> 2;
136 u = (0x00000200 + r - b) >> 2;
137 v = (0x00000400 + (g << 1) - r - b) >> 3;
138
139 return ((Y << 16) | (u << 8) | v);
140#endif
141}
142
143#define Ymask 0x00FF0000
144#define Umask 0x0000FF00
145#define Vmask 0x000000FF
146#define trY 0x00300000 // ?
147#define trU 0x00000700 // ??
148#define trV 0x00000006 // ???
149
150#define HQ4X_INTERP1(n, b) \
151static void hq4x_Interp1_##n (uint8 * pc, uint##b p1, uint##b p2) \
152{ \
153 /* *((uint##b*)pc) = (p1*3+p2) >> 2; */ \
154 *((uint##b*)pc) = INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1)*3 + INTERP_##n##_MASK_1_3(p2)) / 4) \
155 | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1)*3 + INTERP_##n##_MASK_SHIFT_2_4(p2)) / 4 ); \
156}
157
158#define HQ4X_INTERP2(n, b) \
159static void hq4x_Interp2_##n (uint8 * pc, uint##b p1, uint##b p2, uint##b p3) \
160{ \
161 /**((uint##b*)pc) = (p1*2+p2+p3) >> 2;*/ \
162 *((uint##b*)pc) = INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1)*2 + INTERP_##n##_MASK_1_3(p2) + INTERP_##n##_MASK_1_3(p3)) / 4) \
163 | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1)*2 + INTERP_##n##_MASK_SHIFT_2_4(p2) + INTERP_##n##_MASK_SHIFT_2_4(p3)) / 4); \
164}
165
166#define HQ4X_INTERP3(n, b) \
167static void hq4x_Interp3_##n (uint8 * pc, uint##b p1, uint##b p2) \
168{ \
169 /**((uint##b*)pc) = (p1*7+p2)/8;*/ \
170 *((uint##b*)pc) = INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1)*7 + INTERP_##n##_MASK_1_3(p2)) / 8) \
171 | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1)*7 + INTERP_##n##_MASK_SHIFT_2_4(p2)) / 8); \
172}
173
174#define HQ4X_INTERP5(n, b) \
175static void hq4x_Interp5_##n (uint8 * pc, uint##b p1, uint##b p2) \
176{ \
177 /**((uint##b*)pc) = (p1+p2) >> 1;*/ \
178 *((uint##b*)pc) = INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1) + INTERP_##n##_MASK_1_3(p2)) / 2) \
179 | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1) + INTERP_##n##_MASK_SHIFT_2_4(p2)) / 2); \
180}
181
182#define HQ4X_INTERP6(n, b) \
183static void hq4x_Interp6_##n (uint8 * pc, uint##b p1, uint##b p2, uint##b p3) \
184{ \
185 /**((uint##b*)pc) = (p1*5+p2*2+p3)/8;*/ \
186 *((uint##b*)pc) = INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1)*5 + INTERP_##n##_MASK_1_3(p2)*2 + INTERP_##n##_MASK_1_3(p3)) / 8) \
187 | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1)*5 + INTERP_##n##_MASK_SHIFT_2_4(p2)*2 + INTERP_##n##_MASK_SHIFT_2_4(p3)) / 8); \
188}
189
190#define HQ4X_INTERP7(n, b) \
191static void hq4x_Interp7_##n (uint8 * pc, uint##b p1, uint##b p2, uint##b p3) \
192{ \
193 /**((uint##b*)pc) = (p1*6+p2+p3)/8;*/ \
194 *((uint##b*)pc) = INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1)*6 + INTERP_##n##_MASK_1_3(p2) + INTERP_##n##_MASK_1_3(p3)) / 8) \
195 | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1)*6 + INTERP_##n##_MASK_SHIFT_2_4(p2) + INTERP_##n##_MASK_SHIFT_2_4(p3)) / 8); \
196}
197
198#define HQ4X_INTERP8(n, b) \
199static void hq4x_Interp8_##n (uint8 * pc, uint##b p1, uint##b p2) \
200{ \
201 /**((uint##b*)pc) = (p1*5+p2*3)/8;*/ \
202 *((uint##b*)pc) = INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1)*5 + INTERP_##n##_MASK_1_3(p2)*3) / 8) \
203 | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1)*5 + INTERP_##n##_MASK_SHIFT_2_4(p2)*3) / 8); \
204}
205
206#if !_16BPP_HACK
207#define INTERP_4444_MASK_1_3(v) (v & 0x0F0F)
208#define INTERP_4444_MASK_SHIFT_2_4(v) ((v & 0xF0F0) >> 4)
209#define INTERP_4444_MASK_SHIFTBACK_2_4(v) (INTERP_4444_MASK_1_3(v) << 4)
210HQ4X_INTERP1(4444, 16)
211HQ4X_INTERP2(4444, 16)
212HQ4X_INTERP3(4444, 16)
213HQ4X_INTERP5(4444, 16)
214HQ4X_INTERP6(4444, 16)
215HQ4X_INTERP7(4444, 16)
216HQ4X_INTERP8(4444, 16)
217
218#define INTERP_1555_MASK_1_3(v) (v & 0x7C1F)
219#define INTERP_1555_MASK_SHIFT_2_4(v) ((v & 0x83E0) >> 5)
220#define INTERP_1555_MASK_SHIFTBACK_2_4(v) (INTERP_1555_MASK_1_3(v) << 5)
221HQ4X_INTERP1(1555, 16)
222HQ4X_INTERP2(1555, 16)
223HQ4X_INTERP3(1555, 16)
224HQ4X_INTERP5(1555, 16)
225HQ4X_INTERP6(1555, 16)
226HQ4X_INTERP7(1555, 16)
227HQ4X_INTERP8(1555, 16)
228
229#define INTERP_565_MASK_1_3(v) (v & 0xF81F)
230#define INTERP_565_MASK_SHIFT_2_4(v) ((v & 0x7E0) >> 5)
231#define INTERP_565_MASK_SHIFTBACK_2_4(v) (INTERP_565_MASK_1_3(v) << 5)
232HQ4X_INTERP1(565, 16)
233HQ4X_INTERP2(565, 16)
234HQ4X_INTERP3(565, 16)
235HQ4X_INTERP5(565, 16)
236HQ4X_INTERP6(565, 16)
237HQ4X_INTERP7(565, 16)
238HQ4X_INTERP8(565, 16)
239#endif /* !_16BPP_HACK */
240
241#define INTERP_8888_MASK_1_3(v) (v & 0x00FF00FF)
242#define INTERP_8888_MASK_SHIFT_2_4(v) ((v & 0xFF00FF00) >> 8)
243#define INTERP_8888_MASK_SHIFTBACK_2_4(v) (INTERP_8888_MASK_1_3(v) << 8)
244HQ4X_INTERP1(8888, 32)
245HQ4X_INTERP2(8888, 32)
246HQ4X_INTERP3(8888, 32)
247HQ4X_INTERP5(8888, 32)
248HQ4X_INTERP6(8888, 32)
249HQ4X_INTERP7(8888, 32)
250HQ4X_INTERP8(8888, 32)
251
252#define PIXEL00_0 *((int*)(pOut)) = c[5];
253#define PIXEL00_11 hq4x_Interp1(pOut, c[5], c[4]);
254#define PIXEL00_12 hq4x_Interp1(pOut, c[5], c[2]);
255#define PIXEL00_20 hq4x_Interp2(pOut, c[5], c[2], c[4]);
256#define PIXEL00_50 hq4x_Interp5(pOut, c[2], c[4]);
257#define PIXEL00_80 hq4x_Interp8(pOut, c[5], c[1]);
258#define PIXEL00_81 hq4x_Interp8(pOut, c[5], c[4]);
259#define PIXEL00_82 hq4x_Interp8(pOut, c[5], c[2]);
260#define PIXEL01_0 *((int*)(pOut+BPP)) = c[5];
261#define PIXEL01_10 hq4x_Interp1(pOut+BPP, c[5], c[1]);
262#define PIXEL01_12 hq4x_Interp1(pOut+BPP, c[5], c[2]);
263#define PIXEL01_14 hq4x_Interp1(pOut+BPP, c[2], c[5]);
264#define PIXEL01_21 hq4x_Interp2(pOut+BPP, c[2], c[5], c[4]);
265#define PIXEL01_31 hq4x_Interp3(pOut+BPP, c[5], c[4]);
266#define PIXEL01_50 hq4x_Interp5(pOut+BPP, c[2], c[5]);
267#define PIXEL01_60 hq4x_Interp6(pOut+BPP, c[5], c[2], c[4]);
268#define PIXEL01_61 hq4x_Interp6(pOut+BPP, c[5], c[2], c[1]);
269#define PIXEL01_82 hq4x_Interp8(pOut+BPP, c[5], c[2]);
270#define PIXEL01_83 hq4x_Interp8(pOut+BPP, c[2], c[4]);
271#define PIXEL02_0 *((int*)(pOut+BPP2)) = c[5];
272#define PIXEL02_10 hq4x_Interp1(pOut+BPP2, c[5], c[3]);
273#define PIXEL02_11 hq4x_Interp1(pOut+BPP2, c[5], c[2]);
274#define PIXEL02_13 hq4x_Interp1(pOut+BPP2, c[2], c[5]);
275#define PIXEL02_21 hq4x_Interp2(pOut+BPP2, c[2], c[5], c[6]);
276#define PIXEL02_32 hq4x_Interp3(pOut+BPP2, c[5], c[6]);
277#define PIXEL02_50 hq4x_Interp5(pOut+BPP2, c[2], c[5]);
278#define PIXEL02_60 hq4x_Interp6(pOut+BPP2, c[5], c[2], c[6]);
279#define PIXEL02_61 hq4x_Interp6(pOut+BPP2, c[5], c[2], c[3]);
280#define PIXEL02_81 hq4x_Interp8(pOut+BPP2, c[5], c[2]);
281#define PIXEL02_83 hq4x_Interp8(pOut+BPP2, c[2], c[6]);
282#define PIXEL03_0 *((int*)(pOut+BPP3)) = c[5];
283#define PIXEL03_11 hq4x_Interp1(pOut+BPP3, c[5], c[2]);
284#define PIXEL03_12 hq4x_Interp1(pOut+BPP3, c[5], c[6]);
285#define PIXEL03_20 hq4x_Interp2(pOut+BPP3, c[5], c[2], c[6]);
286#define PIXEL03_50 hq4x_Interp5(pOut+BPP3, c[2], c[6]);
287#define PIXEL03_80 hq4x_Interp8(pOut+BPP3, c[5], c[3]);
288#define PIXEL03_81 hq4x_Interp8(pOut+BPP3, c[5], c[2]);
289#define PIXEL03_82 hq4x_Interp8(pOut+BPP3, c[5], c[6]);
290#define PIXEL10_0 *((int*)(pOut+BpL)) = c[5];
291#define PIXEL10_10 hq4x_Interp1(pOut+BpL, c[5], c[1]);
292#define PIXEL10_11 hq4x_Interp1(pOut+BpL, c[5], c[4]);
293#define PIXEL10_13 hq4x_Interp1(pOut+BpL, c[4], c[5]);
294#define PIXEL10_21 hq4x_Interp2(pOut+BpL, c[4], c[5], c[2]);
295#define PIXEL10_32 hq4x_Interp3(pOut+BpL, c[5], c[2]);
296#define PIXEL10_50 hq4x_Interp5(pOut+BpL, c[4], c[5]);
297#define PIXEL10_60 hq4x_Interp6(pOut+BpL, c[5], c[4], c[2]);
298#define PIXEL10_61 hq4x_Interp6(pOut+BpL, c[5], c[4], c[1]);
299#define PIXEL10_81 hq4x_Interp8(pOut+BpL, c[5], c[4]);
300#define PIXEL10_83 hq4x_Interp8(pOut+BpL, c[4], c[2]);
301#define PIXEL11_0 *((int*)(pOut+BpL+BPP)) = c[5];
302#define PIXEL11_30 hq4x_Interp3(pOut+BpL+BPP, c[5], c[1]);
303#define PIXEL11_31 hq4x_Interp3(pOut+BpL+BPP, c[5], c[4]);
304#define PIXEL11_32 hq4x_Interp3(pOut+BpL+BPP, c[5], c[2]);
305#define PIXEL11_70 hq4x_Interp7(pOut+BpL+BPP, c[5], c[4], c[2]);
306#define PIXEL12_0 *((int*)(pOut+BpL+BPP2)) = c[5];
307#define PIXEL12_30 hq4x_Interp3(pOut+BpL+BPP2, c[5], c[3]);
308#define PIXEL12_31 hq4x_Interp3(pOut+BpL+BPP2, c[5], c[2]);
309#define PIXEL12_32 hq4x_Interp3(pOut+BpL+BPP2, c[5], c[6]);
310#define PIXEL12_70 hq4x_Interp7(pOut+BpL+BPP2, c[5], c[6], c[2]);
311#define PIXEL13_0 *((int*)(pOut+BpL+BPP3)) = c[5];
312#define PIXEL13_10 hq4x_Interp1(pOut+BpL+BPP3, c[5], c[3]);
313#define PIXEL13_12 hq4x_Interp1(pOut+BpL+BPP3, c[5], c[6]);
314#define PIXEL13_14 hq4x_Interp1(pOut+BpL+BPP3, c[6], c[5]);
315#define PIXEL13_21 hq4x_Interp2(pOut+BpL+BPP3, c[6], c[5], c[2]);
316#define PIXEL13_31 hq4x_Interp3(pOut+BpL+BPP3, c[5], c[2]);
317#define PIXEL13_50 hq4x_Interp5(pOut+BpL+BPP3, c[6], c[5]);
318#define PIXEL13_60 hq4x_Interp6(pOut+BpL+BPP3, c[5], c[6], c[2]);
319#define PIXEL13_61 hq4x_Interp6(pOut+BpL+BPP3, c[5], c[6], c[3]);
320#define PIXEL13_82 hq4x_Interp8(pOut+BpL+BPP3, c[5], c[6]);
321#define PIXEL13_83 hq4x_Interp8(pOut+BpL+BPP3, c[6], c[2]);
322#define PIXEL20_0 *((int*)(pOut+BpL+BpL)) = c[5];
323#define PIXEL20_10 hq4x_Interp1(pOut+BpL+BpL, c[5], c[7]);
324#define PIXEL20_12 hq4x_Interp1(pOut+BpL+BpL, c[5], c[4]);
325#define PIXEL20_14 hq4x_Interp1(pOut+BpL+BpL, c[4], c[5]);
326#define PIXEL20_21 hq4x_Interp2(pOut+BpL+BpL, c[4], c[5], c[8]);
327#define PIXEL20_31 hq4x_Interp3(pOut+BpL+BpL, c[5], c[8]);
328#define PIXEL20_50 hq4x_Interp5(pOut+BpL+BpL, c[4], c[5]);
329#define PIXEL20_60 hq4x_Interp6(pOut+BpL+BpL, c[5], c[4], c[8]);
330#define PIXEL20_61 hq4x_Interp6(pOut+BpL+BpL, c[5], c[4], c[7]);
331#define PIXEL20_82 hq4x_Interp8(pOut+BpL+BpL, c[5], c[4]);
332#define PIXEL20_83 hq4x_Interp8(pOut+BpL+BpL, c[4], c[8]);
333#define PIXEL21_0 *((int*)(pOut+BpL+BpL+BPP)) = c[5];
334#define PIXEL21_30 hq4x_Interp3(pOut+BpL+BpL+BPP, c[5], c[7]);
335#define PIXEL21_31 hq4x_Interp3(pOut+BpL+BpL+BPP, c[5], c[8]);
336#define PIXEL21_32 hq4x_Interp3(pOut+BpL+BpL+BPP, c[5], c[4]);
337#define PIXEL21_70 hq4x_Interp7(pOut+BpL+BpL+BPP, c[5], c[4], c[8]);
338#define PIXEL22_0 *((int*)(pOut+BpL+BpL+BPP2)) = c[5];
339#define PIXEL22_30 hq4x_Interp3(pOut+BpL+BpL+BPP2, c[5], c[9]);
340#define PIXEL22_31 hq4x_Interp3(pOut+BpL+BpL+BPP2, c[5], c[6]);
341#define PIXEL22_32 hq4x_Interp3(pOut+BpL+BpL+BPP2, c[5], c[8]);
342#define PIXEL22_70 hq4x_Interp7(pOut+BpL+BpL+BPP2, c[5], c[6], c[8]);
343#define PIXEL23_0 *((int*)(pOut+BpL+BpL+BPP3)) = c[5];
344#define PIXEL23_10 hq4x_Interp1(pOut+BpL+BpL+BPP3, c[5], c[9]);
345#define PIXEL23_11 hq4x_Interp1(pOut+BpL+BpL+BPP3, c[5], c[6]);
346#define PIXEL23_13 hq4x_Interp1(pOut+BpL+BpL+BPP3, c[6], c[5]);
347#define PIXEL23_21 hq4x_Interp2(pOut+BpL+BpL+BPP3, c[6], c[5], c[8]);
348#define PIXEL23_32 hq4x_Interp3(pOut+BpL+BpL+BPP3, c[5], c[8]);
349#define PIXEL23_50 hq4x_Interp5(pOut+BpL+BpL+BPP3, c[6], c[5]);
350#define PIXEL23_60 hq4x_Interp6(pOut+BpL+BpL+BPP3, c[5], c[6], c[8]);
351#define PIXEL23_61 hq4x_Interp6(pOut+BpL+BpL+BPP3, c[5], c[6], c[9]);
352#define PIXEL23_81 hq4x_Interp8(pOut+BpL+BpL+BPP3, c[5], c[6]);
353#define PIXEL23_83 hq4x_Interp8(pOut+BpL+BpL+BPP3, c[6], c[8]);
354#define PIXEL30_0 *((int*)(pOut+BpL+BpL+BpL)) = c[5];
355#define PIXEL30_11 hq4x_Interp1(pOut+BpL+BpL+BpL, c[5], c[8]);
356#define PIXEL30_12 hq4x_Interp1(pOut+BpL+BpL+BpL, c[5], c[4]);
357#define PIXEL30_20 hq4x_Interp2(pOut+BpL+BpL+BpL, c[5], c[8], c[4]);
358#define PIXEL30_50 hq4x_Interp5(pOut+BpL+BpL+BpL, c[8], c[4]);
359#define PIXEL30_80 hq4x_Interp8(pOut+BpL+BpL+BpL, c[5], c[7]);
360#define PIXEL30_81 hq4x_Interp8(pOut+BpL+BpL+BpL, c[5], c[8]);
361#define PIXEL30_82 hq4x_Interp8(pOut+BpL+BpL+BpL, c[5], c[4]);
362#define PIXEL31_0 *((int*)(pOut+BpL+BpL+BpL+BPP)) = c[5];
363#define PIXEL31_10 hq4x_Interp1(pOut+BpL+BpL+BpL+BPP, c[5], c[7]);
364#define PIXEL31_11 hq4x_Interp1(pOut+BpL+BpL+BpL+BPP, c[5], c[8]);
365#define PIXEL31_13 hq4x_Interp1(pOut+BpL+BpL+BpL+BPP, c[8], c[5]);
366#define PIXEL31_21 hq4x_Interp2(pOut+BpL+BpL+BpL+BPP, c[8], c[5], c[4]);
367#define PIXEL31_32 hq4x_Interp3(pOut+BpL+BpL+BpL+BPP, c[5], c[4]);
368#define PIXEL31_50 hq4x_Interp5(pOut+BpL+BpL+BpL+BPP, c[8], c[5]);
369#define PIXEL31_60 hq4x_Interp6(pOut+BpL+BpL+BpL+BPP, c[5], c[8], c[4]);
370#define PIXEL31_61 hq4x_Interp6(pOut+BpL+BpL+BpL+BPP, c[5], c[8], c[7]);
371#define PIXEL31_81 hq4x_Interp8(pOut+BpL+BpL+BpL+BPP, c[5], c[8]);
372#define PIXEL31_83 hq4x_Interp8(pOut+BpL+BpL+BpL+BPP, c[8], c[4]);
373#define PIXEL32_0 *((int*)(pOut+BpL+BpL+BpL+BPP2)) = c[5];
374#define PIXEL32_10 hq4x_Interp1(pOut+BpL+BpL+BpL+BPP2, c[5], c[9]);
375#define PIXEL32_12 hq4x_Interp1(pOut+BpL+BpL+BpL+BPP2, c[5], c[8]);
376#define PIXEL32_14 hq4x_Interp1(pOut+BpL+BpL+BpL+BPP2, c[8], c[5]);
377#define PIXEL32_21 hq4x_Interp2(pOut+BpL+BpL+BpL+BPP2, c[8], c[5], c[6]);
378#define PIXEL32_31 hq4x_Interp3(pOut+BpL+BpL+BpL+BPP2, c[5], c[6]);
379#define PIXEL32_50 hq4x_Interp5(pOut+BpL+BpL+BpL+BPP2, c[8], c[5]);
380#define PIXEL32_60 hq4x_Interp6(pOut+BpL+BpL+BpL+BPP2, c[5], c[8], c[6]);
381#define PIXEL32_61 hq4x_Interp6(pOut+BpL+BpL+BpL+BPP2, c[5], c[8], c[9]);
382#define PIXEL32_82 hq4x_Interp8(pOut+BpL+BpL+BpL+BPP2, c[5], c[8]);
383#define PIXEL32_83 hq4x_Interp8(pOut+BpL+BpL+BpL+BPP2, c[8], c[6]);
384#define PIXEL33_0 *((int*)(pOut+BpL+BpL+BpL+BPP3)) = c[5];
385#define PIXEL33_11 hq4x_Interp1(pOut+BpL+BpL+BpL+BPP3, c[5], c[6]);
386#define PIXEL33_12 hq4x_Interp1(pOut+BpL+BpL+BpL+BPP3, c[5], c[8]);
387#define PIXEL33_20 hq4x_Interp2(pOut+BpL+BpL+BpL+BPP3, c[5], c[8], c[6]);
388#define PIXEL33_50 hq4x_Interp5(pOut+BpL+BpL+BpL+BPP3, c[8], c[6]);
389#define PIXEL33_80 hq4x_Interp8(pOut+BpL+BpL+BpL+BPP3, c[5], c[9]);
390#define PIXEL33_81 hq4x_Interp8(pOut+BpL+BpL+BpL+BPP3, c[5], c[6]);
391#define PIXEL33_82 hq4x_Interp8(pOut+BpL+BpL+BpL+BPP3, c[5], c[8]);
392
393#define HQ4X_DIFF(n, b) \
394static int Diff_##n (uint##b w1, uint##b w2) \
395{ \
396 int YUV1, YUV2; \
397 YUV1 = RGB##n##toYUV(w1); \
398 YUV2 = RGB##n##toYUV(w2); \
399 return ( ( abs((YUV1 & Ymask) - (YUV2 & Ymask)) > trY ) || \
400 ( abs((YUV1 & Umask) - (YUV2 & Umask)) > trU ) || \
401 ( abs((YUV1 & Vmask) - (YUV2 & Vmask)) > trV ) ); \
402}
403
404HQ4X_DIFF(888, 32)
405
406#if !_16BPP_HACK
407HQ4X_DIFF(444, 16)
408HQ4X_DIFF(555, 16)
409HQ4X_DIFF(565, 16)
410
411void hq4x_4444(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL)
412{
413#define hq4x_Interp1 hq4x_Interp1_4444
414#define hq4x_Interp2 hq4x_Interp2_4444
415#define hq4x_Interp3 hq4x_Interp3_4444
416#define hq4x_Interp4 hq4x_Interp4_4444
417#define hq4x_Interp5 hq4x_Interp5_4444
418#define hq4x_Interp6 hq4x_Interp6_4444
419#define hq4x_Interp7 hq4x_Interp7_4444
420#define hq4x_Interp8 hq4x_Interp8_4444
421#define Diff Diff_444
422#define BPP 2
423#define BPP2 4
424#define BPP3 6
425
426 int i, j, k;
427 int prevline, nextline;
428 uint16 w[10];
429 uint16 c[10];
430
431 int pattern;
432 int flag;
433
434 int YUV1, YUV2;
435
436 // +----+----+----+
437 // | | | |
438 // | w1 | w2 | w3 |
439 // +----+----+----+
440 // | | | |
441 // | w4 | w5 | w6 |
442 // +----+----+----+
443 // | | | |
444 // | w7 | w8 | w9 |
445 // +----+----+----+
446
447 for (j = 0; j < Yres; j++) {
448 if (j>0) prevline = -SrcPPL*2; else prevline = 0;
449 if (j<Yres-1) nextline = SrcPPL*2; else nextline = 0;
450
451 for (i=0; i<Xres; i++) {
452 w[2] = *((uint16*)(pIn + prevline));
453 w[5] = *((uint16*)pIn);
454 w[8] = *((uint16*)(pIn + nextline));
455
456 if (i>0) {
457 w[1] = *((uint16*)(pIn + prevline - 2));
458 w[4] = *((uint16*)(pIn - 2));
459 w[7] = *((uint16*)(pIn + nextline - 2));
460 } else {
461 w[1] = w[2];
462 w[4] = w[5];
463 w[7] = w[8];
464 }
465
466 if (i<Xres-1) {
467 w[3] = *((uint16*)(pIn + prevline + 2));
468 w[6] = *((uint16*)(pIn + 2));
469 w[9] = *((uint16*)(pIn + nextline + 2));
470 } else {
471 w[3] = w[2];
472 w[6] = w[5];
473 w[9] = w[8];
474 }
475
476 pattern = 0;
477 flag = 1;
478
479 YUV1 = RGB444toYUV(w[5]);
480
481 for (k=1; k<=9; k++) {
482 if (k==5) continue;
483
484 if ( w[k] != w[5] ) {
485 YUV2 = RGB444toYUV(w[k]);
486 if ( ( abs((YUV1 & Ymask) - (YUV2 & Ymask)) > trY ) ||
487 ( abs((YUV1 & Umask) - (YUV2 & Umask)) > trU ) ||
488 ( abs((YUV1 & Vmask) - (YUV2 & Vmask)) > trV ) )
489 pattern |= flag;
490 }
491 flag <<= 1;
492 }
493
494 for (k=1; k<=9; k++)
495 c[k] = w[k];
496
497#include "TextureFilters_hq4x.h"
498
499 pIn+=2;
500 pOut+=8;
501 }
502 pIn += 2*(SrcPPL-Xres);
503 pOut+= 8*(SrcPPL-Xres);
504 pOut+=BpL;
505 pOut+=BpL;
506 pOut+=BpL;
507 }
508
509#undef BPP
510#undef BPP2
511#undef BPP3
512#undef Diff
513#undef hq4x_Interp1
514#undef hq4x_Interp2
515#undef hq4x_Interp3
516#undef hq4x_Interp4
517#undef hq4x_Interp5
518#undef hq4x_Interp6
519#undef hq4x_Interp7
520#undef hq4x_Interp8
521}
522
523void hq4x_1555(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL)
524{
525#define hq4x_Interp1 hq4x_Interp1_1555
526#define hq4x_Interp2 hq4x_Interp2_1555
527#define hq4x_Interp3 hq4x_Interp3_1555
528#define hq4x_Interp4 hq4x_Interp4_1555
529#define hq4x_Interp5 hq4x_Interp5_1555
530#define hq4x_Interp6 hq4x_Interp6_1555
531#define hq4x_Interp7 hq4x_Interp7_1555
532#define hq4x_Interp8 hq4x_Interp8_1555
533#define Diff Diff_555
534#define BPP 2
535#define BPP2 4
536#define BPP3 6
537
538 int i, j, k;
539 int prevline, nextline;
540 uint16 w[10];
541 uint16 c[10];
542
543 int pattern;
544 int flag;
545
546 int YUV1, YUV2;
547
548 // +----+----+----+
549 // | | | |
550 // | w1 | w2 | w3 |
551 // +----+----+----+
552 // | | | |
553 // | w4 | w5 | w6 |
554 // +----+----+----+
555 // | | | |
556 // | w7 | w8 | w9 |
557 // +----+----+----+
558
559 for (j = 0; j < Yres; j++) {
560 if (j>0) prevline = -SrcPPL*2; else prevline = 0;
561 if (j<Yres-1) nextline = SrcPPL*2; else nextline = 0;
562
563 for (i=0; i<Xres; i++) {
564 w[2] = *((uint16*)(pIn + prevline));
565 w[5] = *((uint16*)pIn);
566 w[8] = *((uint16*)(pIn + nextline));
567
568 if (i>0) {
569 w[1] = *((uint16*)(pIn + prevline - 2));
570 w[4] = *((uint16*)(pIn - 2));
571 w[7] = *((uint16*)(pIn + nextline - 2));
572 } else {
573 w[1] = w[2];
574 w[4] = w[5];
575 w[7] = w[8];
576 }
577
578 if (i<Xres-1) {
579 w[3] = *((uint16*)(pIn + prevline + 2));
580 w[6] = *((uint16*)(pIn + 2));
581 w[9] = *((uint16*)(pIn + nextline + 2));
582 } else {
583 w[3] = w[2];
584 w[6] = w[5];
585 w[9] = w[8];
586 }
587
588 pattern = 0;
589 flag = 1;
590
591 YUV1 = RGB555toYUV(w[5]);
592
593 for (k=1; k<=9; k++) {
594 if (k==5) continue;
595
596 if ( w[k] != w[5] ) {
597 YUV2 = RGB555toYUV(w[k]);
598 if ( ( abs((YUV1 & Ymask) - (YUV2 & Ymask)) > trY ) ||
599 ( abs((YUV1 & Umask) - (YUV2 & Umask)) > trU ) ||
600 ( abs((YUV1 & Vmask) - (YUV2 & Vmask)) > trV ) )
601 pattern |= flag;
602 }
603 flag <<= 1;
604 }
605
606 for (k=1; k<=9; k++)
607 c[k] = w[k];
608
609#include "TextureFilters_hq4x.h"
610
611 pIn+=2;
612 pOut+=8;
613 }
614 pIn += 2*(SrcPPL-Xres);
615 pOut+= 8*(SrcPPL-Xres);
616 pOut+=BpL;
617 pOut+=BpL;
618 pOut+=BpL;
619 }
620
621#undef BPP
622#undef BPP2
623#undef BPP3
624#undef Diff
625#undef hq4x_Interp1
626#undef hq4x_Interp2
627#undef hq4x_Interp3
628#undef hq4x_Interp4
629#undef hq4x_Interp5
630#undef hq4x_Interp6
631#undef hq4x_Interp7
632#undef hq4x_Interp8
633}
634
635void hq4x_565(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL)
636{
637#define hq4x_Interp1 hq4x_Interp1_565
638#define hq4x_Interp2 hq4x_Interp2_565
639#define hq4x_Interp3 hq4x_Interp3_565
640#define hq4x_Interp4 hq4x_Interp4_565
641#define hq4x_Interp5 hq4x_Interp5_565
642#define hq4x_Interp6 hq4x_Interp6_565
643#define hq4x_Interp7 hq4x_Interp7_565
644#define hq4x_Interp8 hq4x_Interp8_565
645#define Diff Diff_565
646#define BPP 2
647#define BPP2 4
648#define BPP3 6
649
650 int i, j, k;
651 int prevline, nextline;
652 uint16 w[10];
653 uint16 c[10];
654
655 int pattern;
656 int flag;
657
658 int YUV1, YUV2;
659
660 // +----+----+----+
661 // | | | |
662 // | w1 | w2 | w3 |
663 // +----+----+----+
664 // | | | |
665 // | w4 | w5 | w6 |
666 // +----+----+----+
667 // | | | |
668 // | w7 | w8 | w9 |
669 // +----+----+----+
670
671 for (j = 0; j < Yres; j++) {
672 if (j>0) prevline = -SrcPPL*2; else prevline = 0;
673 if (j<Yres-1) nextline = SrcPPL*2; else nextline = 0;
674
675 for (i=0; i<Xres; i++) {
676 w[2] = *((uint16*)(pIn + prevline));
677 w[5] = *((uint16*)pIn);
678 w[8] = *((uint16*)(pIn + nextline));
679
680 if (i>0) {
681 w[1] = *((uint16*)(pIn + prevline - 2));
682 w[4] = *((uint16*)(pIn - 2));
683 w[7] = *((uint16*)(pIn + nextline - 2));
684 } else {
685 w[1] = w[2];
686 w[4] = w[5];
687 w[7] = w[8];
688 }
689
690 if (i<Xres-1) {
691 w[3] = *((uint16*)(pIn + prevline + 2));
692 w[6] = *((uint16*)(pIn + 2));
693 w[9] = *((uint16*)(pIn + nextline + 2));
694 } else {
695 w[3] = w[2];
696 w[6] = w[5];
697 w[9] = w[8];
698 }
699
700 pattern = 0;
701 flag = 1;
702
703 YUV1 = RGB565toYUV(w[5]);
704
705 for (k=1; k<=9; k++) {
706 if (k==5) continue;
707
708 if ( w[k] != w[5] ) {
709 YUV2 = RGB565toYUV(w[k]);
710 if ( ( abs((YUV1 & Ymask) - (YUV2 & Ymask)) > trY ) ||
711 ( abs((YUV1 & Umask) - (YUV2 & Umask)) > trU ) ||
712 ( abs((YUV1 & Vmask) - (YUV2 & Vmask)) > trV ) )
713 pattern |= flag;
714 }
715 flag <<= 1;
716 }
717
718 for (k=1; k<=9; k++)
719 c[k] = w[k];
720
721#include "TextureFilters_hq4x.h"
722
723 pIn+=2;
724 pOut+=8;
725 }
726 pIn += 2*(SrcPPL-Xres);
727 pOut+= 8*(SrcPPL-Xres);
728 pOut+=BpL;
729 pOut+=BpL;
730 pOut+=BpL;
731 }
732
733#undef BPP
734#undef BPP2
735#undef BPP3
736#undef Diff
737#undef hq4x_Interp1
738#undef hq4x_Interp2
739#undef hq4x_Interp3
740#undef hq4x_Interp4
741#undef hq4x_Interp5
742#undef hq4x_Interp6
743#undef hq4x_Interp7
744#undef hq4x_Interp8
745}
746#endif /* !_16BPP_HACK */
747
748void hq4x_8888(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL)
749{
750#define hq4x_Interp1 hq4x_Interp1_8888
751#define hq4x_Interp2 hq4x_Interp2_8888
752#define hq4x_Interp3 hq4x_Interp3_8888
753#define hq4x_Interp4 hq4x_Interp4_8888
754#define hq4x_Interp5 hq4x_Interp5_8888
755#define hq4x_Interp6 hq4x_Interp6_8888
756#define hq4x_Interp7 hq4x_Interp7_8888
757#define hq4x_Interp8 hq4x_Interp8_8888
758#define Diff Diff_888
759#define BPP 4
760#define BPP2 8
761#define BPP3 12
762
763 int i, j, k;
764 int prevline, nextline;
765 uint32 w[10];
766 uint32 c[10];
767
768 int pattern;
769 int flag;
770
771 int YUV1, YUV2;
772
773 // +----+----+----+
774 // | | | |
775 // | w1 | w2 | w3 |
776 // +----+----+----+
777 // | | | |
778 // | w4 | w5 | w6 |
779 // +----+----+----+
780 // | | | |
781 // | w7 | w8 | w9 |
782 // +----+----+----+
783
784 for (j = 0; j < Yres; j++) {
785 if (j>0) prevline = -SrcPPL*4; else prevline = 0;
786 if (j<Yres-1) nextline = SrcPPL*4; else nextline = 0;
787
788 for (i=0; i<Xres; i++) {
789 w[2] = *((uint32*)(pIn + prevline));
790 w[5] = *((uint32*)pIn);
791 w[8] = *((uint32*)(pIn + nextline));
792
793 if (i>0) {
794 w[1] = *((uint32*)(pIn + prevline - 4));
795 w[4] = *((uint32*)(pIn - 4));
796 w[7] = *((uint32*)(pIn + nextline - 4));
797 } else {
798 w[1] = w[2];
799 w[4] = w[5];
800 w[7] = w[8];
801 }
802
803 if (i<Xres-1) {
804 w[3] = *((uint32*)(pIn + prevline + 4));
805 w[6] = *((uint32*)(pIn + 4));
806 w[9] = *((uint32*)(pIn + nextline + 4));
807 } else {
808 w[3] = w[2];
809 w[6] = w[5];
810 w[9] = w[8];
811 }
812
813 pattern = 0;
814 flag = 1;
815
816 YUV1 = RGB888toYUV(w[5]);
817
818 for (k=1; k<=9; k++) {
819 if (k==5) continue;
820
821 if ( w[k] != w[5] ) {
822 YUV2 = RGB888toYUV(w[k]);
823 if ( ( abs((YUV1 & Ymask) - (YUV2 & Ymask)) > trY ) ||
824 ( abs((YUV1 & Umask) - (YUV2 & Umask)) > trU ) ||
825 ( abs((YUV1 & Vmask) - (YUV2 & Vmask)) > trV ) )
826 pattern |= flag;
827 }
828 flag <<= 1;
829 }
830
831 for (k=1; k<=9; k++)
832 c[k] = w[k];
833
834#include "TextureFilters_hq4x.h"
835
836 pIn+=4;
837 pOut+=16;
838 }
839
840 pIn += 4*(SrcPPL-Xres);
841 pOut+= 16*(SrcPPL-Xres);
842 pOut+=BpL;
843 pOut+=BpL;
844 pOut+=BpL;
845 }
846
847#undef BPP
848#undef BPP2
849#undef BPP3
850#undef Diff
851#undef hq4x_Interp1
852#undef hq4x_Interp2
853#undef hq4x_Interp3
854#undef hq4x_Interp4
855#undef hq4x_Interp5
856#undef hq4x_Interp6
857#undef hq4x_Interp7
858#undef hq4x_Interp8
859}
860
861#if !_16BPP_HACK
862void hq4x_init(void)
863{
864 static int done = 0;
865 int r, g, b, Y, u, v, i, j, k;
866
867 if (done ) return;
868
869 for (i = 0; i < 16; i++) {
870 for (j = 0; j < 16; j++) {
871 for (k = 0; k < 16; k++) {
872 r = (i << 4) | i;
873 g = (j << 4) | j;
874 b = (k << 4) | k;
875
876 /* Microsoft's RGB888->YUV conversion */
877 /*Y = ((( 66 * r + 129 * g + 25 * b + 128) >> 8) + 16) & 0xFF;
878 u = ((( -38 * r - 74 * g + 112 * b + 128) >> 8) + 128) & 0xFF;
879 v = ((( 112 * r - 94 * g - 18 * b + 128) >> 8) + 128) & 0xFF;*/
880
881 Y = (r + g + b) >> 2;
882 u = 128 + ((r - b) >> 2);
883 v = 128 + ((-r + 2*g -b)>>3);
884
885 RGB444toYUV[(i << 8) | (j << 4) | k] = (Y << 16) | (u << 8) | v;
886 }
887 }
888 }
889
890 done = 1;
891}
892#endif /* !_16BPP_HACK */