| 1 | /****************************************************************************** |
| 2 | * Arachnoid Graphics Plugin for Mupen64Plus |
| 3 | * http://bitbucket.org/wahrhaft/mupen64plus-video-arachnoid/ |
| 4 | * |
| 5 | * Copyright (C) 2009 Jon Ring |
| 6 | * Copyright (C) 2007 Kristofer Karlsson, Rickard Niklasson |
| 7 | * |
| 8 | * This program is free software; you can redistribute it and/or |
| 9 | * modify it under the terms of the GNU General Public License |
| 10 | * as published by the Free Software Foundation; either version 2 |
| 11 | * of the License, or (at your option) any later version. |
| 12 | * |
| 13 | * This program is distributed in the hope that it will be useful, |
| 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 | * GNU General Public License for more details. |
| 17 | * |
| 18 | * You should have received a copy of the GNU General Public License |
| 19 | * along with this program; if not, write to the Free Software |
| 20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 21 | *****************************************************************************/ |
| 22 | |
| 23 | #ifndef ASSEMBLER_H_ |
| 24 | #define ASSEMBLER_H_ |
| 25 | |
| 26 | #ifndef WIN32 |
| 27 | #include <cstring> |
| 28 | #endif |
| 29 | // Swap bytes from 80 37 12 40 |
| 30 | // to 40 12 37 80 |
| 31 | // dwLen must be a multiple of 4 |
| 32 | inline void swapRomHeaderBytes(void *v, unsigned int dwLen) |
| 33 | { |
| 34 | int *b = (int*)v; |
| 35 | dwLen /= 4; |
| 36 | for (unsigned int i = 0; i < dwLen; ++i) |
| 37 | { |
| 38 | int tmp = b[i]; |
| 39 | b[i] = ((tmp & 0xff000000) >> 24) | ((tmp & 0x00ff0000) >> 8) | \ |
| 40 | ((tmp & 0x0000ff00) << 8) | ((tmp & 0x000000ff) << 24); |
| 41 | } |
| 42 | } |
| 43 | |
| 44 | |
| 45 | inline unsigned short swapword( unsigned short value ) |
| 46 | { |
| 47 | return (value >> 8) | (value << 8); |
| 48 | } |
| 49 | |
| 50 | inline void UnswapCopy( void *src, void *dest, unsigned int numBytes ) |
| 51 | { |
| 52 | #ifdef WIN32_ASM |
| 53 | __asm |
| 54 | { |
| 55 | mov ecx, 0 |
| 56 | mov esi, dword ptr [src] |
| 57 | mov edi, dword ptr [dest] |
| 58 | |
| 59 | mov ebx, esi |
| 60 | and ebx, 3 // ebx = number of leading bytes |
| 61 | |
| 62 | cmp ebx, 0 |
| 63 | jz StartDWordLoop |
| 64 | neg ebx |
| 65 | add ebx, 4 |
| 66 | |
| 67 | cmp ebx, [numBytes] |
| 68 | jle NotGreater |
| 69 | mov ebx, [numBytes] |
| 70 | NotGreater: |
| 71 | mov ecx, ebx |
| 72 | xor esi, 3 |
| 73 | LeadingLoop: // Copies leading bytes, in reverse order (un-swaps) |
| 74 | mov al, byte ptr [esi] |
| 75 | mov byte ptr [edi], al |
| 76 | sub esi, 1 |
| 77 | add edi, 1 |
| 78 | loop LeadingLoop |
| 79 | add esi, 5 |
| 80 | |
| 81 | StartDWordLoop: |
| 82 | mov ecx, dword ptr [numBytes] |
| 83 | sub ecx, ebx // Don't copy what's already been copied |
| 84 | |
| 85 | mov ebx, ecx |
| 86 | and ebx, 3 |
| 87 | // add ecx, 3 // Round up to nearest dword |
| 88 | shr ecx, 2 |
| 89 | |
| 90 | cmp ecx, 0 // If there's nothing to do, don't do it |
| 91 | jle StartTrailingLoop |
| 92 | |
| 93 | // Copies from source to destination, bswap-ing first |
| 94 | DWordLoop: |
| 95 | mov eax, dword ptr [esi] |
| 96 | bswap eax |
| 97 | mov dword ptr [edi], eax |
| 98 | add esi, 4 |
| 99 | add edi, 4 |
| 100 | loop DWordLoop |
| 101 | StartTrailingLoop: |
| 102 | cmp ebx, 0 |
| 103 | jz Done |
| 104 | mov ecx, ebx |
| 105 | xor esi, 3 |
| 106 | |
| 107 | TrailingLoop: |
| 108 | mov al, byte ptr [esi] |
| 109 | mov byte ptr [edi], al |
| 110 | sub esi, 1 |
| 111 | add edi, 1 |
| 112 | loop TrailingLoop |
| 113 | Done: |
| 114 | } |
| 115 | #else |
| 116 | |
| 117 | long long beginOffset = (long long)src & 3; |
| 118 | char *readPtr = (char*)src - beginOffset; |
| 119 | char *writePtr = (char*)dest; |
| 120 | |
| 121 | int swapOffset = beginOffset; |
| 122 | for (unsigned int i = 0; i < numBytes; ++i) |
| 123 | { |
| 124 | *writePtr = readPtr[3 - swapOffset]; |
| 125 | ++writePtr; |
| 126 | ++swapOffset; |
| 127 | if (swapOffset > 3) |
| 128 | { |
| 129 | swapOffset = 0; |
| 130 | readPtr += 4; |
| 131 | } |
| 132 | } |
| 133 | |
| 134 | #endif |
| 135 | } |
| 136 | |
| 137 | inline void DWordInterleave( void *mem, unsigned int numDWords ) |
| 138 | { |
| 139 | #ifdef WIN32_ASM |
| 140 | __asm { |
| 141 | mov esi, dword ptr [mem] |
| 142 | mov edi, dword ptr [mem] |
| 143 | add edi, 4 |
| 144 | mov ecx, dword ptr [numDWords] |
| 145 | DWordInterleaveLoop: |
| 146 | mov eax, dword ptr [esi] |
| 147 | mov ebx, dword ptr [edi] |
| 148 | mov dword ptr [esi], ebx |
| 149 | mov dword ptr [edi], eax |
| 150 | add esi, 8 |
| 151 | add edi, 8 |
| 152 | loop DWordInterleaveLoop |
| 153 | } |
| 154 | #else |
| 155 | int *m = (int*)mem; |
| 156 | for (unsigned int i = 0; i < numDWords; ++i) |
| 157 | { |
| 158 | int tmp = m[2 * i]; |
| 159 | m[2 * i] = m[2 * i + 1]; |
| 160 | m[2 * i + 1] = tmp; |
| 161 | } |
| 162 | #endif |
| 163 | } |
| 164 | |
| 165 | inline void QWordInterleave( void *mem, unsigned int numDWords ) |
| 166 | { |
| 167 | #ifdef WIN32_ASM |
| 168 | __asm |
| 169 | { |
| 170 | // Interleave the line on the qword |
| 171 | mov esi, dword ptr [mem] |
| 172 | mov edi, dword ptr [mem] |
| 173 | add edi, 8 |
| 174 | mov ecx, dword ptr [numDWords] |
| 175 | shr ecx, 1 |
| 176 | QWordInterleaveLoop: |
| 177 | mov eax, dword ptr [esi] |
| 178 | mov ebx, dword ptr [edi] |
| 179 | mov dword ptr [esi], ebx |
| 180 | mov dword ptr [edi], eax |
| 181 | add esi, 4 |
| 182 | add edi, 4 |
| 183 | mov eax, dword ptr [esi] |
| 184 | mov ebx, dword ptr [edi] |
| 185 | mov dword ptr [esi], ebx |
| 186 | mov dword ptr [edi], eax |
| 187 | add esi, 12 |
| 188 | add edi, 12 |
| 189 | loop QWordInterleaveLoop |
| 190 | } |
| 191 | #else |
| 192 | long long *m = (long long*)mem; |
| 193 | for (unsigned int i = 0; i < numDWords / 2; ++i) |
| 194 | { |
| 195 | long long tmp = m[2 * i]; |
| 196 | m[2 * i] = m[2 * i + 1]; |
| 197 | m[2 * i + 1] = tmp; |
| 198 | } |
| 199 | #endif |
| 200 | } |
| 201 | |
| 202 | const unsigned char Five2Eight[32] = |
| 203 | { |
| 204 | 0, // 00000 = 00000000 |
| 205 | 8, // 00001 = 00001000 |
| 206 | 16, // 00010 = 00010000 |
| 207 | 25, // 00011 = 00011001 |
| 208 | 33, // 00100 = 00100001 |
| 209 | 41, // 00101 = 00101001 |
| 210 | 49, // 00110 = 00110001 |
| 211 | 58, // 00111 = 00111010 |
| 212 | 66, // 01000 = 01000010 |
| 213 | 74, // 01001 = 01001010 |
| 214 | 82, // 01010 = 01010010 |
| 215 | 90, // 01011 = 01011010 |
| 216 | 99, // 01100 = 01100011 |
| 217 | 107, // 01101 = 01101011 |
| 218 | 115, // 01110 = 01110011 |
| 219 | 123, // 01111 = 01111011 |
| 220 | 132, // 10000 = 10000100 |
| 221 | 140, // 10001 = 10001100 |
| 222 | 148, // 10010 = 10010100 |
| 223 | 156, // 10011 = 10011100 |
| 224 | 165, // 10100 = 10100101 |
| 225 | 173, // 10101 = 10101101 |
| 226 | 181, // 10110 = 10110101 |
| 227 | 189, // 10111 = 10111101 |
| 228 | 197, // 11000 = 11000101 |
| 229 | 206, // 11001 = 11001110 |
| 230 | 214, // 11010 = 11010110 |
| 231 | 222, // 11011 = 11011110 |
| 232 | 230, // 11100 = 11100110 |
| 233 | 239, // 11101 = 11101111 |
| 234 | 247, // 11110 = 11110111 |
| 235 | 255 // 11111 = 11111111 |
| 236 | }; |
| 237 | |
| 238 | const unsigned char Four2Eight[16] = |
| 239 | { |
| 240 | 0, // 0000 = 00000000 |
| 241 | 17, // 0001 = 00010001 |
| 242 | 34, // 0010 = 00100010 |
| 243 | 51, // 0011 = 00110011 |
| 244 | 68, // 0100 = 01000100 |
| 245 | 85, // 0101 = 01010101 |
| 246 | 102, // 0110 = 01100110 |
| 247 | 119, // 0111 = 01110111 |
| 248 | 136, // 1000 = 10001000 |
| 249 | 153, // 1001 = 10011001 |
| 250 | 170, // 1010 = 10101010 |
| 251 | 187, // 1011 = 10111011 |
| 252 | 204, // 1100 = 11001100 |
| 253 | 221, // 1101 = 11011101 |
| 254 | 238, // 1110 = 11101110 |
| 255 | 255 // 1111 = 11111111 |
| 256 | }; |
| 257 | |
| 258 | const unsigned char Three2Four[8] = |
| 259 | { |
| 260 | 0, // 000 = 0000 |
| 261 | 2, // 001 = 0010 |
| 262 | 4, // 010 = 0100 |
| 263 | 6, // 011 = 0110 |
| 264 | 9, // 100 = 1001 |
| 265 | 11, // 101 = 1011 |
| 266 | 13, // 110 = 1101 |
| 267 | 15, // 111 = 1111 |
| 268 | }; |
| 269 | |
| 270 | const unsigned char Three2Eight[8] = |
| 271 | { |
| 272 | 0, // 000 = 00000000 |
| 273 | 36, // 001 = 00100100 |
| 274 | 73, // 010 = 01001001 |
| 275 | 109, // 011 = 01101101 |
| 276 | 146, // 100 = 10010010 |
| 277 | 182, // 101 = 10110110 |
| 278 | 219, // 110 = 11011011 |
| 279 | 255, // 111 = 11111111 |
| 280 | }; |
| 281 | const unsigned char Two2Eight[4] = |
| 282 | { |
| 283 | 0, // 00 = 00000000 |
| 284 | 85, // 01 = 01010101 |
| 285 | 170, // 10 = 10101010 |
| 286 | 255 // 11 = 11111111 |
| 287 | }; |
| 288 | |
| 289 | const unsigned char One2Four[2] = |
| 290 | { |
| 291 | 0, // 0 = 0000 |
| 292 | 15, // 1 = 1111 |
| 293 | }; |
| 294 | |
| 295 | const unsigned char One2Eight[2] = |
| 296 | { |
| 297 | 0, // 0 = 00000000 |
| 298 | 255, // 1 = 11111111 |
| 299 | }; |
| 300 | |
| 301 | inline unsigned short RGBA8888_RGBA4444( unsigned int color ) |
| 302 | { |
| 303 | return ((color & 0xF0000000) >> 28) | ((color & 0x00F00000) >> 16) | |
| 304 | ((color & 0x0000F000) >> 4) | ((color & 0x000000F0) << 8); |
| 305 | } |
| 306 | |
| 307 | inline unsigned int RGBA5551_RGBA8888( unsigned short color ) |
| 308 | { |
| 309 | int rgba; |
| 310 | char *p = (char*)&rgba; |
| 311 | color = (color >> 8) | (color << 8); |
| 312 | p[3] = One2Eight[color & 1]; |
| 313 | p[2] = Five2Eight[color >> 1 & 0x1f]; |
| 314 | p[1] = Five2Eight[color >> 6 & 0x1f]; |
| 315 | p[0] = Five2Eight[color >> 11 & 0x1f]; |
| 316 | return rgba; |
| 317 | } |
| 318 | |
| 319 | // Just swaps the word |
| 320 | inline unsigned short RGBA5551_RGBA5551( unsigned short color ) |
| 321 | { |
| 322 | return (color >> 8) | (color << 8); |
| 323 | } |
| 324 | |
| 325 | inline unsigned int IA88_RGBA8888( unsigned short color ) |
| 326 | { |
| 327 | return (color & 0xFF) | ((color & 0xFF) << 8) | (color << 16); |
| 328 | } |
| 329 | |
| 330 | inline unsigned short IA88_RGBA4444( unsigned short color ) |
| 331 | { |
| 332 | unsigned char b = color & 0xf0; |
| 333 | return (color >> 12) | b | (b << 4) | (b << 8); |
| 334 | } |
| 335 | |
| 336 | inline unsigned short IA44_RGBA4444( unsigned char color ) |
| 337 | { |
| 338 | unsigned char b = color >> 4; |
| 339 | return color | (b << 8) | (b << 12); |
| 340 | } |
| 341 | |
| 342 | inline unsigned int IA44_RGBA8888( unsigned char color ) |
| 343 | { |
| 344 | unsigned char b1 = color >> 4; |
| 345 | unsigned char b2 = color & 0x0f; |
| 346 | return b1 | (b1 << 4) | (b1 << 8) | (b1 << 12) | (b1 << 16) | (b1 << 20) | (b2 << 24) | (b2 << 28); |
| 347 | } |
| 348 | |
| 349 | inline unsigned short IA31_RGBA4444( unsigned char color ) |
| 350 | { |
| 351 | unsigned char t = Three2Four[color >> 1]; |
| 352 | return One2Four[color & 1] | (t << 4) | (t << 8) | (t << 12); |
| 353 | } |
| 354 | |
| 355 | inline unsigned int IA31_RGBA8888( unsigned char color ) |
| 356 | { |
| 357 | unsigned char t = Three2Eight[color >> 1]; |
| 358 | return t | (t << 8) | (t << 16) | (One2Eight[color & 1] << 24); |
| 359 | } |
| 360 | |
| 361 | inline unsigned short I8_RGBA4444( unsigned char color ) |
| 362 | { |
| 363 | color &= 0xf0; |
| 364 | return (color >> 4) | color | (color << 4) | (color << 8); |
| 365 | } |
| 366 | |
| 367 | inline unsigned int I8_RGBA8888( unsigned char color ) |
| 368 | { |
| 369 | return color | (color << 8) | (color << 16) | (color << 24); |
| 370 | } |
| 371 | |
| 372 | inline unsigned short I4_RGBA4444( unsigned char color ) |
| 373 | { |
| 374 | color &= 0x0f; |
| 375 | return color | (color << 4) | (color << 8) | (color << 12); |
| 376 | } |
| 377 | |
| 378 | inline unsigned int I4_RGBA8888( unsigned char color ) |
| 379 | { |
| 380 | unsigned char b = Four2Eight[color]; |
| 381 | return b | (b << 8) | (b << 16) | (b << 24); |
| 382 | } |
| 383 | |
| 384 | #endif |