22726e4d |
1 | /****************************************************************************** |
2 | * Arachnoid Graphics Plugin for Mupen64Plus |
3 | * http://bitbucket.org/wahrhaft/mupen64plus-video-arachnoid/ |
4 | * |
5 | * Copyright (C) 2009 Jon Ring |
6 | * Copyright (C) 2007 Kristofer Karlsson, Rickard Niklasson |
7 | * |
8 | * This program is free software; you can redistribute it and/or |
9 | * modify it under the terms of the GNU General Public License |
10 | * as published by the Free Software Foundation; either version 2 |
11 | * of the License, or (at your option) any later version. |
12 | * |
13 | * This program is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | * GNU General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU General Public License |
19 | * along with this program; if not, write to the Free Software |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
21 | *****************************************************************************/ |
22 | |
23 | #ifndef ASSEMBLER_H_ |
24 | #define ASSEMBLER_H_ |
25 | |
26 | #ifndef WIN32 |
27 | #include <cstring> |
28 | #endif |
29 | // Swap bytes from 80 37 12 40 |
30 | // to 40 12 37 80 |
31 | // dwLen must be a multiple of 4 |
32 | inline void swapRomHeaderBytes(void *v, unsigned int dwLen) |
33 | { |
34 | int *b = (int*)v; |
35 | dwLen /= 4; |
36 | for (unsigned int i = 0; i < dwLen; ++i) |
37 | { |
38 | int tmp = b[i]; |
39 | b[i] = ((tmp & 0xff000000) >> 24) | ((tmp & 0x00ff0000) >> 8) | \ |
40 | ((tmp & 0x0000ff00) << 8) | ((tmp & 0x000000ff) << 24); |
41 | } |
42 | } |
43 | |
44 | |
45 | inline unsigned short swapword( unsigned short value ) |
46 | { |
47 | return (value >> 8) | (value << 8); |
48 | } |
49 | |
50 | inline void UnswapCopy( void *src, void *dest, unsigned int numBytes ) |
51 | { |
52 | #ifdef WIN32_ASM |
53 | __asm |
54 | { |
55 | mov ecx, 0 |
56 | mov esi, dword ptr [src] |
57 | mov edi, dword ptr [dest] |
58 | |
59 | mov ebx, esi |
60 | and ebx, 3 // ebx = number of leading bytes |
61 | |
62 | cmp ebx, 0 |
63 | jz StartDWordLoop |
64 | neg ebx |
65 | add ebx, 4 |
66 | |
67 | cmp ebx, [numBytes] |
68 | jle NotGreater |
69 | mov ebx, [numBytes] |
70 | NotGreater: |
71 | mov ecx, ebx |
72 | xor esi, 3 |
73 | LeadingLoop: // Copies leading bytes, in reverse order (un-swaps) |
74 | mov al, byte ptr [esi] |
75 | mov byte ptr [edi], al |
76 | sub esi, 1 |
77 | add edi, 1 |
78 | loop LeadingLoop |
79 | add esi, 5 |
80 | |
81 | StartDWordLoop: |
82 | mov ecx, dword ptr [numBytes] |
83 | sub ecx, ebx // Don't copy what's already been copied |
84 | |
85 | mov ebx, ecx |
86 | and ebx, 3 |
87 | // add ecx, 3 // Round up to nearest dword |
88 | shr ecx, 2 |
89 | |
90 | cmp ecx, 0 // If there's nothing to do, don't do it |
91 | jle StartTrailingLoop |
92 | |
93 | // Copies from source to destination, bswap-ing first |
94 | DWordLoop: |
95 | mov eax, dword ptr [esi] |
96 | bswap eax |
97 | mov dword ptr [edi], eax |
98 | add esi, 4 |
99 | add edi, 4 |
100 | loop DWordLoop |
101 | StartTrailingLoop: |
102 | cmp ebx, 0 |
103 | jz Done |
104 | mov ecx, ebx |
105 | xor esi, 3 |
106 | |
107 | TrailingLoop: |
108 | mov al, byte ptr [esi] |
109 | mov byte ptr [edi], al |
110 | sub esi, 1 |
111 | add edi, 1 |
112 | loop TrailingLoop |
113 | Done: |
114 | } |
115 | #else |
116 | |
117 | long long beginOffset = (long long)src & 3; |
118 | char *readPtr = (char*)src - beginOffset; |
119 | char *writePtr = (char*)dest; |
120 | |
121 | int swapOffset = beginOffset; |
122 | for (unsigned int i = 0; i < numBytes; ++i) |
123 | { |
124 | *writePtr = readPtr[3 - swapOffset]; |
125 | ++writePtr; |
126 | ++swapOffset; |
127 | if (swapOffset > 3) |
128 | { |
129 | swapOffset = 0; |
130 | readPtr += 4; |
131 | } |
132 | } |
133 | |
134 | #endif |
135 | } |
136 | |
137 | inline void DWordInterleave( void *mem, unsigned int numDWords ) |
138 | { |
139 | #ifdef WIN32_ASM |
140 | __asm { |
141 | mov esi, dword ptr [mem] |
142 | mov edi, dword ptr [mem] |
143 | add edi, 4 |
144 | mov ecx, dword ptr [numDWords] |
145 | DWordInterleaveLoop: |
146 | mov eax, dword ptr [esi] |
147 | mov ebx, dword ptr [edi] |
148 | mov dword ptr [esi], ebx |
149 | mov dword ptr [edi], eax |
150 | add esi, 8 |
151 | add edi, 8 |
152 | loop DWordInterleaveLoop |
153 | } |
154 | #else |
155 | int *m = (int*)mem; |
156 | for (unsigned int i = 0; i < numDWords; ++i) |
157 | { |
158 | int tmp = m[2 * i]; |
159 | m[2 * i] = m[2 * i + 1]; |
160 | m[2 * i + 1] = tmp; |
161 | } |
162 | #endif |
163 | } |
164 | |
165 | inline void QWordInterleave( void *mem, unsigned int numDWords ) |
166 | { |
167 | #ifdef WIN32_ASM |
168 | __asm |
169 | { |
170 | // Interleave the line on the qword |
171 | mov esi, dword ptr [mem] |
172 | mov edi, dword ptr [mem] |
173 | add edi, 8 |
174 | mov ecx, dword ptr [numDWords] |
175 | shr ecx, 1 |
176 | QWordInterleaveLoop: |
177 | mov eax, dword ptr [esi] |
178 | mov ebx, dword ptr [edi] |
179 | mov dword ptr [esi], ebx |
180 | mov dword ptr [edi], eax |
181 | add esi, 4 |
182 | add edi, 4 |
183 | mov eax, dword ptr [esi] |
184 | mov ebx, dword ptr [edi] |
185 | mov dword ptr [esi], ebx |
186 | mov dword ptr [edi], eax |
187 | add esi, 12 |
188 | add edi, 12 |
189 | loop QWordInterleaveLoop |
190 | } |
191 | #else |
192 | long long *m = (long long*)mem; |
193 | for (unsigned int i = 0; i < numDWords / 2; ++i) |
194 | { |
195 | long long tmp = m[2 * i]; |
196 | m[2 * i] = m[2 * i + 1]; |
197 | m[2 * i + 1] = tmp; |
198 | } |
199 | #endif |
200 | } |
201 | |
202 | const unsigned char Five2Eight[32] = |
203 | { |
204 | 0, // 00000 = 00000000 |
205 | 8, // 00001 = 00001000 |
206 | 16, // 00010 = 00010000 |
207 | 25, // 00011 = 00011001 |
208 | 33, // 00100 = 00100001 |
209 | 41, // 00101 = 00101001 |
210 | 49, // 00110 = 00110001 |
211 | 58, // 00111 = 00111010 |
212 | 66, // 01000 = 01000010 |
213 | 74, // 01001 = 01001010 |
214 | 82, // 01010 = 01010010 |
215 | 90, // 01011 = 01011010 |
216 | 99, // 01100 = 01100011 |
217 | 107, // 01101 = 01101011 |
218 | 115, // 01110 = 01110011 |
219 | 123, // 01111 = 01111011 |
220 | 132, // 10000 = 10000100 |
221 | 140, // 10001 = 10001100 |
222 | 148, // 10010 = 10010100 |
223 | 156, // 10011 = 10011100 |
224 | 165, // 10100 = 10100101 |
225 | 173, // 10101 = 10101101 |
226 | 181, // 10110 = 10110101 |
227 | 189, // 10111 = 10111101 |
228 | 197, // 11000 = 11000101 |
229 | 206, // 11001 = 11001110 |
230 | 214, // 11010 = 11010110 |
231 | 222, // 11011 = 11011110 |
232 | 230, // 11100 = 11100110 |
233 | 239, // 11101 = 11101111 |
234 | 247, // 11110 = 11110111 |
235 | 255 // 11111 = 11111111 |
236 | }; |
237 | |
238 | const unsigned char Four2Eight[16] = |
239 | { |
240 | 0, // 0000 = 00000000 |
241 | 17, // 0001 = 00010001 |
242 | 34, // 0010 = 00100010 |
243 | 51, // 0011 = 00110011 |
244 | 68, // 0100 = 01000100 |
245 | 85, // 0101 = 01010101 |
246 | 102, // 0110 = 01100110 |
247 | 119, // 0111 = 01110111 |
248 | 136, // 1000 = 10001000 |
249 | 153, // 1001 = 10011001 |
250 | 170, // 1010 = 10101010 |
251 | 187, // 1011 = 10111011 |
252 | 204, // 1100 = 11001100 |
253 | 221, // 1101 = 11011101 |
254 | 238, // 1110 = 11101110 |
255 | 255 // 1111 = 11111111 |
256 | }; |
257 | |
258 | const unsigned char Three2Four[8] = |
259 | { |
260 | 0, // 000 = 0000 |
261 | 2, // 001 = 0010 |
262 | 4, // 010 = 0100 |
263 | 6, // 011 = 0110 |
264 | 9, // 100 = 1001 |
265 | 11, // 101 = 1011 |
266 | 13, // 110 = 1101 |
267 | 15, // 111 = 1111 |
268 | }; |
269 | |
270 | const unsigned char Three2Eight[8] = |
271 | { |
272 | 0, // 000 = 00000000 |
273 | 36, // 001 = 00100100 |
274 | 73, // 010 = 01001001 |
275 | 109, // 011 = 01101101 |
276 | 146, // 100 = 10010010 |
277 | 182, // 101 = 10110110 |
278 | 219, // 110 = 11011011 |
279 | 255, // 111 = 11111111 |
280 | }; |
281 | const unsigned char Two2Eight[4] = |
282 | { |
283 | 0, // 00 = 00000000 |
284 | 85, // 01 = 01010101 |
285 | 170, // 10 = 10101010 |
286 | 255 // 11 = 11111111 |
287 | }; |
288 | |
289 | const unsigned char One2Four[2] = |
290 | { |
291 | 0, // 0 = 0000 |
292 | 15, // 1 = 1111 |
293 | }; |
294 | |
295 | const unsigned char One2Eight[2] = |
296 | { |
297 | 0, // 0 = 00000000 |
298 | 255, // 1 = 11111111 |
299 | }; |
300 | |
301 | inline unsigned short RGBA8888_RGBA4444( unsigned int color ) |
302 | { |
303 | return ((color & 0xF0000000) >> 28) | ((color & 0x00F00000) >> 16) | |
304 | ((color & 0x0000F000) >> 4) | ((color & 0x000000F0) << 8); |
305 | } |
306 | |
307 | inline unsigned int RGBA5551_RGBA8888( unsigned short color ) |
308 | { |
309 | int rgba; |
310 | char *p = (char*)&rgba; |
311 | color = (color >> 8) | (color << 8); |
312 | p[3] = One2Eight[color & 1]; |
313 | p[2] = Five2Eight[color >> 1 & 0x1f]; |
314 | p[1] = Five2Eight[color >> 6 & 0x1f]; |
315 | p[0] = Five2Eight[color >> 11 & 0x1f]; |
316 | return rgba; |
317 | } |
318 | |
319 | // Just swaps the word |
320 | inline unsigned short RGBA5551_RGBA5551( unsigned short color ) |
321 | { |
322 | return (color >> 8) | (color << 8); |
323 | } |
324 | |
325 | inline unsigned int IA88_RGBA8888( unsigned short color ) |
326 | { |
327 | return (color & 0xFF) | ((color & 0xFF) << 8) | (color << 16); |
328 | } |
329 | |
330 | inline unsigned short IA88_RGBA4444( unsigned short color ) |
331 | { |
332 | unsigned char b = color & 0xf0; |
333 | return (color >> 12) | b | (b << 4) | (b << 8); |
334 | } |
335 | |
336 | inline unsigned short IA44_RGBA4444( unsigned char color ) |
337 | { |
338 | unsigned char b = color >> 4; |
339 | return color | (b << 8) | (b << 12); |
340 | } |
341 | |
342 | inline unsigned int IA44_RGBA8888( unsigned char color ) |
343 | { |
344 | unsigned char b1 = color >> 4; |
345 | unsigned char b2 = color & 0x0f; |
346 | return b1 | (b1 << 4) | (b1 << 8) | (b1 << 12) | (b1 << 16) | (b1 << 20) | (b2 << 24) | (b2 << 28); |
347 | } |
348 | |
349 | inline unsigned short IA31_RGBA4444( unsigned char color ) |
350 | { |
351 | unsigned char t = Three2Four[color >> 1]; |
352 | return One2Four[color & 1] | (t << 4) | (t << 8) | (t << 12); |
353 | } |
354 | |
355 | inline unsigned int IA31_RGBA8888( unsigned char color ) |
356 | { |
357 | unsigned char t = Three2Eight[color >> 1]; |
358 | return t | (t << 8) | (t << 16) | (One2Eight[color & 1] << 24); |
359 | } |
360 | |
361 | inline unsigned short I8_RGBA4444( unsigned char color ) |
362 | { |
363 | color &= 0xf0; |
364 | return (color >> 4) | color | (color << 4) | (color << 8); |
365 | } |
366 | |
367 | inline unsigned int I8_RGBA8888( unsigned char color ) |
368 | { |
369 | return color | (color << 8) | (color << 16) | (color << 24); |
370 | } |
371 | |
372 | inline unsigned short I4_RGBA4444( unsigned char color ) |
373 | { |
374 | color &= 0x0f; |
375 | return color | (color << 4) | (color << 8) | (color << 12); |
376 | } |
377 | |
378 | inline unsigned int I4_RGBA8888( unsigned char color ) |
379 | { |
380 | unsigned char b = Four2Eight[color]; |
381 | return b | (b << 8) | (b << 16) | (b << 24); |
382 | } |
383 | |
384 | #endif |