Glide Plugin GLES2 port from mupen64plus-ae, but with special FrameSkip code
[mupen64plus-pandora.git] / source / gles2glide64 / src / GlideHQ / TxQuantize.cpp
CommitLineData
98e75f2d 1/*
2 * Texture Filtering
3 * Version: 1.0
4 *
5 * Copyright (C) 2007 Hiroshi Morii All Rights Reserved.
6 * Email koolsmoky(at)users.sourceforge.net
7 * Web http://www.3dfxzone.it/koolsmoky
8 *
9 * this is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version.
13 *
14 * this is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with GNU Make; see the file COPYING. If not, write to
21 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
22 */
23
24#ifdef __MSC__
25#pragma warning(disable: 4786)
26#endif
27
28#include <functional>
29#include <thread>
30
31/* NOTE: The codes are not optimized. They can be made faster. */
32
33#include "TxQuantize.h"
34
35TxQuantize::TxQuantize()
36{
37 _txUtil = new TxUtil();
38
39 /* get number of CPU cores. */
40 _numcore = _txUtil->getNumberofProcessors();
41
42 /* get dxtn extensions */
43 _tx_compress_fxt1 = TxLoadLib::getInstance()->getfxtCompressTexFuncExt();
44 _tx_compress_dxtn = TxLoadLib::getInstance()->getdxtCompressTexFuncExt();
45}
46
47
48TxQuantize::~TxQuantize()
49{
50 delete _txUtil;
51}
52
53void
54TxQuantize::ARGB1555_ARGB8888(uint32* src, uint32* dest, int width, int height)
55{
56#if 1
57 int siz = (width * height) >> 1;
58 int i;
59 for (i = 0; i < siz; i++) {
60 *dest = (((*src & 0x00008000) ? 0xff000000 : 0x00000000) |
61 ((*src & 0x00007c00) << 9) | ((*src & 0x00007000) << 4) |
62 ((*src & 0x000003e0) << 6) | ((*src & 0x00000380) << 1) |
63 ((*src & 0x0000001f) << 3) | ((*src & 0x0000001c) >> 2));
64 dest++;
65 *dest = (((*src & 0x80000000) ? 0xff000000 : 0x00000000) |
66 ((*src & 0x7c000000) >> 7) | ((*src & 0x70000000) >> 12) |
67 ((*src & 0x03e00000) >> 10) | ((*src & 0x03800000) >> 15) |
68 ((*src & 0x001f0000) >> 13) | ((*src & 0x001c0000) >> 18));
69 dest++;
70 src++;
71 }
72#else
73 int siz = (width * height) >> 1;
74
75 __asm {
76 push ebx;
77 push esi;
78 push edi;
79
80 mov esi, dword ptr [src];
81 mov edi, dword ptr [dest];
82 mov ecx, dword ptr [siz];
83
84 tc1_loop:
85 mov eax, dword ptr [esi];
86 add esi, 4;
87
88 // arrr rrgg gggb bbbb
89 // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
90 mov edx, eax; // edx = arrrrrgg gggbbbbb arrrrrgg gggbbbbb
91 mov ebx, 0x00000000;
92 and eax, 0x00008000; // eax = 00000000 00000000 a0000000 00000000
93 jz transparent1;
94 mov ebx, 0xff000000; // ebx = aaaaaaaa 00000000 00000000 00000000
95
96 transparent1:
97 mov eax, edx; // eax = arrrrrgg gggbbbbb arrrrrgg gggbbbbb
98 and edx, 0x00007c00; // edx = 00000000 00000000 0rrrrr00 00000000
99 shl edx, 4; // edx = 00000000 00000rrr rr000000 00000000
100 or ebx, edx; // ebx = aaaaaaaa 00000rrr rr000000 00000000
101 shl edx, 5; // edx = 00000000 rrrrr000 00000000 00000000
102 or ebx, edx; // ebx = aaaaaaaa rrrrrrrr rr000000 00000000
103 and ebx, 0xffff0000; // ebx = aaaaaaaa rrrrrrrr 00000000 00000000
104 mov edx, eax;
105 and edx, 0x000003e0; // edx = 00000000 00000000 000000gg ggg00000
106 shl edx, 1; // edx = 00000000 00000000 00000ggg gg000000
107 or ebx, edx; // ebx = aaaaaaaa rrrrrrrr 00000ggg gg000000
108 shl edx, 5; // edx = 00000000 00000000 ggggg000 00000000
109 or ebx, edx; // ebx = aaaaaaaa rrrrrrrr gggggggg gg000000
110 and ebx, 0xffffff00; // ebx = aaaaaaaa rrrrrrrr gggggggg 00000000
111 mov edx, eax;
112 and edx, 0x0000001f; // edx = 00000000 00000000 00000000 000bbbbb
113 shl edx, 3; // edx = 00000000 00000000 00000000 bbbbb000
114 or ebx, edx; // ebx = aaaaaaaa rrrrrrrr gggggggg bbbbb000
115 shr edx, 5; // edx = 00000000 00000000 00000000 00000bbb
116 or ebx, edx; // ebx = aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
117
118 mov dword ptr [edi], ebx;
119 add edi, 4;
120
121 shr eax, 16; // eax = 00000000 00000000 arrrrrgg gggbbbbb
122 mov edx, eax; // edx = 00000000 00000000 arrrrrgg gggbbbbb
123 mov ebx, 0x00000000;
124 and eax, 0x00008000; // eax = 00000000 00000000 a0000000 00000000
125 jz transparent2;
126 mov ebx, 0xff000000; // ebx = aaaaaaaa 00000000 00000000 00000000
127
128 transparent2:
129 mov eax, edx; // eax = 00000000 00000000 arrrrrgg gggbbbbb
130 and edx, 0x00007c00; // edx = 00000000 00000000 0rrrrr00 00000000
131 shl edx, 4; // edx = 00000000 00000rrr rr000000 00000000
132 or ebx, edx; // ebx = aaaaaaaa 00000rrr rr000000 00000000
133 shl edx, 5; // edx = 00000000 rrrrr000 00000000 00000000
134 or ebx, edx; // ebx = aaaaaaaa rrrrrrrr rr000000 00000000
135 and ebx, 0xffff0000; // ebx = aaaaaaaa rrrrrrrr 00000000 00000000
136 mov edx, eax;
137 and edx, 0x000003e0; // edx = 00000000 00000000 000000gg ggg00000
138 shl edx, 1; // edx = 00000000 00000000 00000ggg gg000000
139 or ebx, edx; // ebx = aaaaaaaa rrrrrrrr 00000ggg gg000000
140 shl edx, 5; // edx = 00000000 00000000 ggggg000 00000000
141 or ebx, edx; // ebx = aaaaaaaa rrrrrrrr gggggggg gg000000
142 and ebx, 0xffffff00; // ebx = aaaaaaaa rrrrrrrr gggggggg 00000000
143 mov edx, eax;
144 and edx, 0x0000001f; // edx = 00000000 00000000 00000000 000bbbbb
145 shl edx, 3; // edx = 00000000 00000000 00000000 bbbbb000
146 or ebx, edx; // ebx = aaaaaaaa rrrrrrrr gggggggg bbbbb000
147 shr edx, 5; // edx = 00000000 00000000 00000000 00000bbb
148 or ebx, edx; // ebx = aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
149
150 mov dword ptr [edi], ebx;
151 add edi, 4;
152
153 dec ecx;
154 jnz tc1_loop;
155
156 pop edi;
157 pop esi;
158 pop ebx;
159 }
160#endif
161}
162
163void
164TxQuantize::ARGB4444_ARGB8888(uint32* src, uint32* dest, int width, int height)
165{
166#if 1
167 int siz = (width * height) >> 1;
168 int i;
169 for (i = 0; i < siz; i++) {
170 *dest = ((*src & 0x0000f000) << 12) |
171 ((*src & 0x00000f00) << 8) |
172 ((*src & 0x000000f0) << 4) |
173 (*src & 0x0000000f);
174 *dest |= (*dest << 4);
175 dest++;
176 *dest = ((*src & 0xf0000000) |
177 ((*src & 0x0f000000) >> 4) |
178 ((*src & 0x00f00000) >> 8) |
179 ((*src & 0x000f0000) >> 12));
180 *dest |= (*dest >> 4);
181 dest++;
182 src++;
183 }
184#else
185 int siz = (width * height) >> 1;
186
187 __asm {
188 push ebx;
189 push esi;
190 push edi;
191
192 mov esi, dword ptr [src];
193 mov edi, dword ptr [dest];
194 mov ecx, dword ptr [siz];
195
196 tc1_loop:
197 mov eax, dword ptr [esi];
198 add esi, 4;
199
200 // aaaa rrrr gggg bbbb
201 // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
202 mov edx, eax;
203 and eax, 0x0000ffff;
204 mov ebx, eax; // 00000000 00000000 aaaarrrr ggggbbbb
205 and ebx, 0x0000f000; // 00000000 00000000 aaaa0000 00000000
206 shl ebx, 12; // 0000aaaa 00000000 00000000 00000000
207 or eax, ebx; // 0000aaaa 00000000 aaaarrrr ggggbbbb
208 mov ebx, eax;
209 and ebx, 0x00000f00; // 00000000 00000000 0000rrrr 00000000
210 shl ebx, 8; // 00000000 0000rrrr 00000000 00000000
211 or eax, ebx; // 0000aaaa 0000rrrr aaaarrrr ggggbbbb
212 mov ebx, eax;
213 and ebx, 0x000000f0; // 00000000 00000000 00000000 gggg0000
214 shl ebx, 4; // 00000000 00000000 0000gggg 00000000
215 and eax, 0x0f0f000f; // 0000aaaa 0000rrrr 00000000 0000bbbb
216 or eax, ebx; // 0000aaaa 0000rrrr 0000gggg 0000bbbb
217 mov ebx, eax;
218 shl ebx, 4; // aaaa0000 rrrr0000 gggg0000 bbbb0000
219 or eax, ebx; // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
220
221 mov dword ptr [edi], eax;
222 add edi, 4;
223
224 shr edx, 16;
225 mov ebx, edx; // 00000000 00000000 aaaarrrr ggggbbbb
226 and ebx, 0x0000f000; // 00000000 00000000 aaaa0000 00000000
227 shl ebx, 12; // 0000aaaa 00000000 00000000 00000000
228 or edx, ebx; // 0000aaaa 00000000 aaaarrrr ggggbbbb
229 mov ebx, edx;
230 and ebx, 0x00000f00; // 00000000 00000000 0000rrrr 00000000
231 shl ebx, 8; // 00000000 0000rrrr 00000000 00000000
232 or edx, ebx; // 0000aaaa 0000rrrr aaaarrrr ggggbbbb
233 mov ebx, edx;
234 and ebx, 0x000000f0; // 00000000 00000000 00000000 gggg0000
235 shl ebx, 4; // 00000000 00000000 0000gggg 00000000
236 and edx, 0x0f0f000f; // 0000aaaa 0000rrrr 00000000 0000bbbb
237 or edx, ebx; // 0000aaaa 0000rrrr 0000gggg 0000bbbb
238 mov ebx, edx;
239 shl ebx, 4; // aaaa0000 rrrr0000 gggg0000 bbbb0000
240 or edx, ebx; // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
241
242 mov dword ptr [edi], edx;
243 add edi, 4;
244
245 dec ecx;
246 jnz tc1_loop;
247
248 pop edi;
249 pop esi;
250 pop ebx;
251 }
252#endif
253}
254
255void
256TxQuantize::RGB565_ARGB8888(uint32* src, uint32* dest, int width, int height)
257{
258#if 1
259 int siz = (width * height) >> 1;
260 int i;
261 for (i = 0; i < siz; i++) {
262 *dest = (0xff000000 |
263 ((*src & 0x0000f800) << 8) | ((*src & 0x0000e000) << 3) |
264 ((*src & 0x000007e0) << 5) | ((*src & 0x00000600) >> 1) |
265 ((*src & 0x0000001f) << 3) | ((*src & 0x0000001c) >> 2));
266 dest++;
267 *dest = (0xff000000 |
268 ((*src & 0xf8000000) >> 8) | ((*src & 0xe0000000) >> 13) |
269 ((*src & 0x07e00000) >> 11) | ((*src & 0x06000000) >> 17) |
270 ((*src & 0x001f0000) >> 13) | ((*src & 0x001c0000) >> 18));
271 dest++;
272 src++;
273 }
274#else
275 int siz = (width * height) >> 1;
276
277 __asm {
278 push ebx;
279 push esi;
280 push edi;
281
282 mov esi, dword ptr [src];
283 mov edi, dword ptr [dest];
284 mov ecx, dword ptr [siz];
285
286 tc1_loop:
287 mov eax, dword ptr [esi];
288 add esi, 4;
289
290 // rrrr rggg gggb bbbb
291 // 11111111 rrrrrrrr gggggggg bbbbbbbb
292 mov edx, eax;
293 and eax, 0x0000ffff;
294 mov ebx, eax; // 00000000 00000000 rrrrrggg gggbbbbb
295 and ebx, 0x0000f800; // 00000000 00000000 rrrrr000 00000000
296 shl ebx, 5; // 00000000 000rrrrr 00000000 00000000
297 or eax, ebx; // 00000000 000rrrrr rrrrrggg gggbbbbb
298 mov ebx, eax;
299 and ebx, 0x000007e0; // 00000000 00000000 00000ggg ggg00000
300 shl ebx, 5; // 00000000 00000000 gggggg00 00000000
301 and eax, 0x001F001F; // 00000000 000rrrrr 00000000 000bbbbb
302 shl eax, 3; // 00000000 rrrrr000 00000000 bbbbb000
303 or eax, ebx; // 00000000 rrrrr000 gggggg00 bbbbb000
304 mov ebx, eax;
305 shr ebx, 5; // 00000000 00000rrr rr000ggg ggg00bbb
306 and ebx, 0x00070007; // 00000000 00000rrr 00000000 00000bbb
307 or eax, ebx; // 00000000 rrrrrrrr gggggg00 bbbbbbbb
308 mov ebx, eax;
309 shr ebx, 6;
310 and ebx, 0x00000300; // 00000000 00000000 000000gg 00000000
311 or eax, ebx // 00000000 rrrrrrrr gggggggg bbbbbbbb
312 or eax, 0xff000000; // 11111111 rrrrrrrr gggggggg bbbbbbbb
313
314 mov dword ptr [edi], eax;
315 add edi, 4;
316
317 shr edx, 16;
318 mov eax, edx; // 00000000 00000000 rrrrrggg gggbbbbb
319 and eax, 0x0000ffff;
320 mov ebx, eax; // 00000000 00000000 rrrrrggg gggbbbbb
321 and ebx, 0x0000f800; // 00000000 00000000 rrrrr000 00000000
322 shl ebx, 5; // 00000000 000rrrrr 00000000 00000000
323 or eax, ebx; // 00000000 000rrrrr rrrrrggg gggbbbbb
324 mov ebx, eax;
325 and ebx, 0x000007e0; // 00000000 00000000 00000ggg ggg00000
326 shl ebx, 5; // 00000000 00000000 gggggg00 00000000
327 and eax, 0x001F001F; // 00000000 000rrrrr 00000000 000bbbbb
328 shl eax, 3; // 00000000 rrrrr000 00000000 bbbbb000
329 or eax, ebx; // 00000000 rrrrr000 gggggg00 bbbbb000
330 mov ebx, eax;
331 shr ebx, 5; // 00000000 00000rrr rr000ggg ggg00bbb
332 and ebx, 0x00070007; // 00000000 00000rrr 00000000 00000bbb
333 or eax, ebx; // 00000000 rrrrrrrr gggggg00 bbbbbbbb
334 mov ebx, eax;
335 shr ebx, 6;
336 and ebx, 0x00000300; // 00000000 00000000 000000gg 00000000
337 or eax, ebx // 00000000 rrrrrrrr gggggggg bbbbbbbb
338 or eax, 0xff000000; // 11111111 rrrrrrrr gggggggg bbbbbbbb
339
340 mov dword ptr [edi], eax;
341 add edi, 4;
342
343 dec ecx;
344 jnz tc1_loop;
345
346 pop edi;
347 pop esi;
348 pop ebx;
349 }
350#endif
351}
352
353void
354TxQuantize::A8_ARGB8888(uint32* src, uint32* dest, int width, int height)
355{
356#if 1
357 int siz = (width * height) >> 2;
358 int i;
359 for (i = 0; i < siz; i++) {
360 *dest = (*src & 0x000000ff);
361 *dest |= (*dest << 8);
362 *dest |= (*dest << 16);
363 dest++;
364 *dest = (*src & 0x0000ff00);
365 *dest |= (*dest >> 8);
366 *dest |= (*dest << 16);
367 dest++;
368 *dest = (*src & 0x00ff0000);
369 *dest |= (*dest << 8);
370 *dest |= (*dest >> 16);
371 dest++;
372 *dest = (*src & 0xff000000);
373 *dest |= (*dest >> 8);
374 *dest |= (*dest >> 16);
375 dest++;
376 src++;
377 }
378#else
379 int siz = (width * height) >> 2;
380
381 __asm {
382 push ebx;
383 push esi;
384 push edi;
385
386 mov esi, dword ptr [src];
387 mov edi, dword ptr [dest];
388 mov ecx, dword ptr [siz];
389
390 tc1_loop:
391 mov eax, dword ptr [esi];
392 add esi, 4;
393
394 // aaaaaaaa
395 // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
396 mov edx, eax;
397 and eax, 0x000000ff;
398 mov ebx, eax; // 00000000 00000000 00000000 aaaaaaaa
399 shl ebx, 8; // 00000000 00000000 aaaaaaaa 00000000
400 or eax, ebx; // 00000000 00000000 aaaaaaaa aaaaaaaa
401 mov ebx, eax;
402 shl ebx, 16; // aaaaaaaa aaaaaaaa 00000000 00000000
403 or eax, ebx; // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
404
405 mov dword ptr [edi], eax;
406 add edi, 4;
407
408 mov eax, edx;
409 and eax, 0x0000ff00;
410 mov ebx, eax; // 00000000 00000000 aaaaaaaa 00000000
411 shr ebx, 8; // 00000000 00000000 00000000 aaaaaaaa
412 or eax, ebx; // 00000000 00000000 aaaaaaaa aaaaaaaa
413 mov ebx, eax;
414 shl ebx, 16; // aaaaaaaa aaaaaaaa 00000000 00000000
415 or eax, ebx; // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
416
417 mov dword ptr [edi], eax;
418 add edi, 4;
419
420 mov eax, edx;
421 and eax, 0x00ff0000;
422 mov ebx, eax; // 00000000 aaaaaaaa 00000000 00000000
423 shl ebx, 8; // aaaaaaaa 00000000 00000000 00000000
424 or eax, ebx; // aaaaaaaa aaaaaaaa 00000000 00000000
425 mov ebx, eax;
426 shr ebx, 16; // 00000000 00000000 aaaaaaaa aaaaaaaa
427 or eax, ebx; // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
428
429 mov dword ptr [edi], eax;
430 add edi, 4;
431
432 mov eax, edx;
433 and eax, 0xff000000;
434 mov ebx, eax; // aaaaaaaa 00000000 00000000 00000000
435 shr ebx, 8; // 00000000 aaaaaaaa 00000000 00000000
436 or eax, ebx; // aaaaaaaa aaaaaaaa 00000000 00000000
437 mov ebx, eax;
438 shr ebx, 16; // 00000000 00000000 aaaaaaaa aaaaaaaa
439 or eax, ebx; // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
440
441 mov dword ptr [edi], eax;
442 add edi, 4;
443
444 dec ecx;
445 jnz tc1_loop;
446
447 pop edi;
448 pop esi;
449 pop ebx;
450 }
451#endif
452}
453
454void
455TxQuantize::AI44_ARGB8888(uint32* src, uint32* dest, int width, int height)
456{
457#if 1
458 int siz = (width * height) >> 2;
459 int i;
460 for (i = 0; i < siz; i++) {
461 *dest = (*src & 0x0000000f);
462 *dest |= ((*dest << 8) | (*dest << 16));
463 *dest |= ((*src & 0x000000f0) << 20);
464 *dest |= (*dest << 4);
465 dest++;
466 *dest = (*src & 0x00000f00);
467 *dest |= ((*dest << 8) | (*dest >> 8));
468 *dest |= ((*src & 0x0000f000) << 12);
469 *dest |= (*dest << 4);
470 dest++;
471 *dest = (*src & 0x000f0000);
472 *dest |= ((*dest >> 8) | (*dest >> 16));
473 *dest |= ((*src & 0x00f00000) << 4);
474 *dest |= (*dest << 4);
475 dest++;
476 *dest = ((*src & 0x0f000000) >> 4);
477 *dest |= ((*dest >> 8) | (*dest >> 16));
478 *dest |= (*src & 0xf0000000);
479 *dest |= (*dest >> 4);
480 dest++;
481 src++;
482 }
483#else
484 int siz = (width * height) >> 2;
485
486 __asm {
487 push ebx;
488 push esi;
489 push edi;
490
491 mov esi, dword ptr [src];
492 mov edi, dword ptr [dest];
493 mov ecx, dword ptr [siz];
494
495 tc1_loop:
496 mov eax, dword ptr [esi];
497 add esi, 4;
498
499 // aaaaiiii
500 // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
501 mov edx, eax;
502 and eax, 0x000000f0; // 00000000 00000000 00000000 aaaa0000
503 mov ebx, edx;
504 shl eax, 20; // 0000aaaa 00000000 00000000 00000000
505 and ebx, 0x0000000f; // 00000000 00000000 00000000 0000iiii
506 or eax, ebx; // 0000aaaa 00000000 00000000 0000iiii
507 shl ebx, 8; // 00000000 00000000 0000iiii 00000000
508 or eax, ebx; // 0000aaaa 00000000 0000iiii 0000iiii
509 shl ebx, 8; // 00000000 0000iiii 00000000 00000000
510 or eax, ebx; // 0000aaaa 0000iiii 0000iiii 0000iiii
511 mov ebx, eax;
512 shl ebx, 4; // aaaa0000 iiii0000 iiii0000 iiii0000
513 or eax, ebx; // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
514
515 mov dword ptr [edi], eax;
516 add edi, 4;
517
518 mov eax, edx;
519 and eax, 0x0000f000; // 00000000 00000000 aaaa0000 00000000
520 mov ebx, edx;
521 shl eax, 12; // 0000aaaa 00000000 00000000 00000000
522 and ebx, 0x00000f00; // 00000000 00000000 0000iiii 00000000
523 or eax, ebx; // 0000aaaa 00000000 0000iiii 00000000
524 shr ebx, 8; // 00000000 00000000 00000000 0000iiii
525 or eax, ebx; // 0000aaaa 00000000 0000iiii 0000iiii
526 shl ebx, 16; // 00000000 0000iiii 00000000 00000000
527 or eax, ebx; // 0000aaaa 0000iiii 0000iiii 0000iiii
528 mov ebx, eax;
529 shl ebx, 4; // aaaa0000 iiii0000 iiii0000 iiii0000
530 or eax, ebx; // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
531
532 mov dword ptr [edi], eax;
533 add edi, 4;
534
535 mov eax, edx;
536 and eax, 0x00f00000; // 00000000 aaaa0000 00000000 00000000
537 mov ebx, edx;
538 shl eax, 4; // 0000aaaa 00000000 00000000 00000000
539 and ebx, 0x000f0000; // 00000000 0000iiii 00000000 00000000
540 or eax, ebx; // 0000aaaa 0000iiii 00000000 00000000
541 shr ebx, 8; // 00000000 00000000 0000iiii 00000000
542 or eax, ebx; // 0000aaaa 0000iiii 0000iiii 00000000
543 shr ebx, 8; // 00000000 00000000 00000000 0000iiii
544 or eax, ebx; // 0000aaaa 0000iiii 0000iiii 0000iiii
545 mov ebx, eax;
546 shl ebx, 4; // aaaa0000 iiii0000 iiii0000 iiii0000
547 or eax, ebx; // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
548
549 mov dword ptr [edi], eax;
550 add edi, 4;
551
552 mov eax, edx;
553 and eax, 0xf0000000; // aaaa0000 00000000 00000000 00000000
554 mov ebx, edx;
555 and ebx, 0x0f000000; // 0000iiii 00000000 00000000 00000000
556 shr ebx, 4; // 00000000 iiii0000 00000000 00000000
557 or eax, ebx; // aaaa0000 iiii0000 00000000 00000000
558 shr ebx, 8; // 00000000 00000000 iiii0000 00000000
559 or eax, ebx; // aaaa0000 iiii0000 iiii0000 00000000
560 shr ebx, 8; // 00000000 00000000 00000000 iiii0000
561 or eax, ebx; // aaaa0000 iiii0000 iiii0000 iiii0000
562 mov ebx, eax;
563 shr ebx, 4; // 0000aaaa 0000iiii 0000iiii 0000iiii
564 or eax, ebx; // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
565
566 mov dword ptr [edi], eax;
567 add edi, 4;
568
569 dec ecx;
570 jnz tc1_loop;
571
572 pop edi;
573 pop esi;
574 pop ebx;
575 }
576#endif
577}
578
579void
580TxQuantize::AI88_ARGB8888(uint32* src, uint32* dest, int width, int height)
581{
582#if 1
583 int siz = (width * height) >> 1;
584 int i;
585 for (i = 0; i < siz; i++) {
586 *dest = (*src & 0x000000ff);
587 *dest |= ((*dest << 8) | (*dest << 16));
588 *dest |= ((*src & 0x0000ff00) << 16);
589 dest++;
590 *dest = (*src & 0x00ff0000);
591 *dest |= ((*dest >> 8) | (*dest >> 16));
592 *dest |= (*src & 0xff000000);
593 dest++;
594 src++;
595 }
596#else
597 int siz = (width * height) >> 1;
598
599 __asm {
600 push ebx;
601 push esi;
602 push edi;
603
604 mov esi, dword ptr [src];
605 mov edi, dword ptr [dest];
606 mov ecx, dword ptr [siz];
607
608 tc1_loop:
609 mov eax, dword ptr [esi];
610 add esi, 4;
611
612 // aaaaaaaa iiiiiiii
613 // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
614 mov edx, eax;
615 and eax, 0x0000ffff; // 00000000 00000000 aaaaaaaa iiiiiiii
616 mov ebx, eax; // 00000000 00000000 aaaaaaaa iiiiiiii
617 shl eax, 16; // aaaaaaaa iiiiiiii 00000000 00000000
618 and ebx, 0x000000ff; // 00000000 00000000 00000000 iiiiiiii
619 or eax, ebx; // aaaaaaaa iiiiiiii 00000000 iiiiiiii
620 shl ebx, 8; // 00000000 00000000 iiiiiiii 00000000
621 or eax, ebx; // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
622
623 mov dword ptr [edi], eax;
624 add edi, 4;
625
626 mov eax, edx;
627 and eax, 0xffff0000; // aaaaaaaa iiiiiiii 00000000 00000000
628 mov ebx, eax; // aaaaaaaa iiiiiiii 00000000 00000000
629 and ebx, 0x00ff0000; // 00000000 iiiiiiii 00000000 00000000
630 shr ebx, 8; // 00000000 00000000 iiiiiiii 00000000
631 or eax, ebx; // aaaaaaaa iiiiiiii iiiiiiii 00000000
632 shr ebx, 8; // 00000000 00000000 00000000 iiiiiiii
633 or eax, ebx; // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
634
635 mov dword ptr [edi], eax;
636 add edi, 4;
637
638 dec ecx;
639 jnz tc1_loop;
640
641 pop edi;
642 pop esi;
643 pop ebx;
644 }
645#endif
646}
647
648void
649TxQuantize::ARGB8888_ARGB1555(uint32* src, uint32* dest, int width, int height)
650{
651#if 1
652 int siz = (width * height) >> 1;
653 int i;
654 for (i = 0; i < siz; i++) {
655 *dest = ((*src & 0xff000000) ? 0x00008000 : 0x00000000);
656 *dest |= (((*src & 0x00f80000) >> 9) |
657 ((*src & 0x0000f800) >> 6) |
658 ((*src & 0x000000f8) >> 3));
659 src++;
660 *dest |= ((*src & 0xff000000) ? 0x80000000 : 0x00000000);
661 *dest |= (((*src & 0x00f80000) << 7) |
662 ((*src & 0x0000f800) << 10) |
663 ((*src & 0x000000f8) << 13));
664 src++;
665 dest++;
666 }
667#else
668 int siz = (width * height) >> 1;
669
670 __asm {
671 push ebx;
672 push esi;
673 push edi;
674
675 mov esi, dword ptr [src];
676 mov edi, dword ptr [dest];
677 mov ecx, dword ptr [siz];
678
679 tc1_loop:
680 mov eax, dword ptr [esi];
681 add esi, 4;
682
683#if 1
684 mov edx, eax;
685 and eax, 0xff000000; // aaaa0000 00000000 00000000 00000000
686 jz transparent1;
687 mov eax, 0x00008000; // 00000000 00000000 a0000000 00000000
688
689 transparent1:
690 mov ebx, edx;
691 and ebx, 0x00f80000; // 00000000 rrrrr000 00000000 00000000
692 shr ebx, 9; // 00000000 00000000 0rrrrr00 00000000
693 or eax, ebx; // 00000000 00000000 arrrrr00 00000000
694 mov ebx, edx;
695 and ebx, 0x0000f800; // 00000000 00000000 ggggg000 00000000
696 shr ebx, 6; // 00000000 00000000 000000gg ggg00000
697 or eax, ebx; // 00000000 00000000 arrrrrgg ggg00000
698 and edx, 0x000000f8; // 00000000 00000000 00000000 bbbbb000
699 shr edx, 3; // 00000000 00000000 00000000 000bbbbb
700 or edx, eax; // 00000000 00000000 arrrrrgg gggbbbbb
701
702 mov eax, dword ptr [esi];
703 add esi, 4;
704
705 mov ebx, eax;
706 and eax, 0xff000000; // aaaa0000 00000000 00000000 00000000
707 jz transparent2;
708 or edx, 0x80000000; // a0000000 00000000 arrrrrgg gggbbbbb
709
710 transparent2:
711 mov eax, ebx;
712 and ebx, 0x00f80000; // 00000000 rrrrr000 00000000 00000000
713 shl ebx, 7; // 0rrrrr00 00000000 00000000 00000000
714 or edx, ebx; // arrrrr00 00000000 arrrrrgg gggbbbbb
715 mov ebx, eax;
716 and ebx, 0x0000f800; // 00000000 00000000 ggggg000 00000000
717 shl ebx, 10; // 000000gg ggg00000 00000000 00000000
718 or edx, ebx; // arrrrrgg ggg00000 arrrrrgg gggbbbbb
719 and eax, 0x000000f8; // 00000000 00000000 00000000 bbbbb000
720 shl eax, 13; // 00000000 000bbbbb 00000000 00000000
721 or edx, eax; // arrrrrgg gggbbbbb arrrrrgg gggbbbbb
722
723 mov dword ptr [edi], edx;
724 add edi, 4;
725#else
726 mov edx, eax;
727 and edx, 0x01000000; // 0000000a 00000000 00000000 00000000
728 shr edx, 9; // 00000000 00000000 a0000000 00000000
729 mov ebx, eax;
730 and ebx, 0x00f80000; // 00000000 rrrrr000 00000000 00000000
731 shr ebx, 9; // 00000000 00000000 0rrrrr00 00000000
732 or edx, ebx; // 00000000 00000000 arrrrr00 00000000
733 mov ebx, eax;
734 and ebx, 0x0000f800; // 00000000 00000000 ggggg000 00000000
735 shr ebx, 6; // 00000000 00000000 000000gg ggg00000
736 or edx, ebx; // 00000000 00000000 arrrrrgg ggg00000
737 and eax, 0x000000f8; // 00000000 00000000 00000000 bbbbb000
738 shr eax, 3; // 00000000 00000000 00000000 000bbbbb
739 or edx, eax; // 00000000 00000000 arrrrrgg gggbbbbb
740
741 mov eax, dword ptr [esi];
742 add esi, 4;
743
744 mov ebx, eax;
745 and ebx, 0x80000000; // a0000000 00000000 00000000 00000000
746 or edx, ebx; // a0000000 00000000 arrrrrgg gggbbbbb
747 mov ebx, eax;
748 and ebx, 0x00f80000; // 00000000 rrrrr000 00000000 00000000
749 shl ebx, 7; // 0rrrrr00 00000000 00000000 00000000
750 or edx, ebx; // arrrrr00 00000000 arrrrrgg gggbbbbb
751 mov ebx, eax;
752 and ebx, 0x0000f800; // 00000000 00000000 ggggg000 00000000
753 shl ebx, 10; // 000000gg ggg00000 00000000 00000000
754 or edx, ebx; // arrrrrgg ggg00000 arrrrrgg gggbbbbb
755 and eax, 0x000000f8; // 00000000 00000000 00000000 bbbbb000
756 shl eax, 13; // 00000000 000bbbbb 00000000 00000000
757 or edx, eax; // arrrrrgg gggbbbbb arrrrrgg gggbbbbb
758
759 mov dword ptr [edi], edx;
760 add edi, 4;
761#endif
762 dec ecx;
763 jnz tc1_loop;
764
765 pop edi;
766 pop esi;
767 pop ebx;
768 }
769#endif
770}
771
772void
773TxQuantize::ARGB8888_ARGB4444(uint32* src, uint32* dest, int width, int height)
774{
775#if 1
776 int siz = (width * height) >> 1;
777 int i;
778 for (i = 0; i < siz; i++) {
779 *dest = (((*src & 0xf0000000) >> 16) |
780 ((*src & 0x00f00000) >> 12) |
781 ((*src & 0x0000f000) >> 8) |
782 ((*src & 0x000000f0) >> 4));
783 src++;
784 *dest |= ((*src & 0xf0000000) |
785 ((*src & 0x00f00000) << 4) |
786 ((*src & 0x0000f000) << 8) |
787 ((*src & 0x000000f0) << 12));
788 src++;
789 dest++;
790 }
791#else
792 int siz = (width * height) >> 1;
793
794 __asm {
795 push ebx;
796 push esi;
797 push edi;
798
799 mov esi, dword ptr [src];
800 mov edi, dword ptr [dest];
801 mov ecx, dword ptr [siz];
802
803 tc1_loop:
804 mov eax, dword ptr [esi];
805 add esi, 4;
806
807 mov edx, eax;
808 and edx, 0xf0000000; // aaaa0000 00000000 00000000 00000000
809 shr edx, 16; // 00000000 00000000 aaaa0000 00000000
810 mov ebx, eax;
811 and ebx, 0x00f00000; // 00000000 rrrr0000 00000000 00000000
812 shr ebx, 12; // 00000000 00000000 0000rrrr 00000000
813 or edx, ebx; // 00000000 00000000 aaaarrrr 00000000
814 mov ebx, eax;
815 and ebx, 0x0000f000; // 00000000 00000000 gggg0000 00000000
816 shr ebx, 8; // 00000000 00000000 00000000 gggg0000
817 or edx, ebx; // 00000000 00000000 aaaarrrr gggg0000
818 and eax, 0x000000f0; // 00000000 00000000 00000000 bbbb0000
819 shr eax, 4; // 00000000 00000000 00000000 0000bbbb
820 or edx, eax; // 00000000 00000000 aaaarrrr ggggbbbb
821
822 mov eax, dword ptr [esi];
823 add esi, 4;
824
825 mov ebx, eax;
826 and ebx, 0xf0000000; // aaaa0000 00000000 00000000 00000000
827 or edx, ebx; // aaaa0000 00000000 aaaarrrr ggggbbbb
828 mov ebx, eax;
829 and ebx, 0x00f00000; // 00000000 rrrr0000 00000000 00000000
830 shl ebx, 4; // 0000rrrr 00000000 00000000 00000000
831 or edx, ebx; // aaaarrrr 00000000 aaaarrrr ggggbbbb
832 mov ebx, eax;
833 and ebx, 0x0000f000; // 00000000 00000000 gggg0000 00000000
834 shl ebx, 8; // 00000000 gggg0000 00000000 00000000
835 or edx, ebx; // aaaarrrr gggg0000 aaaarrrr ggggbbbb
836 and eax, 0x000000f0; // 00000000 00000000 00000000 bbbb0000
837 shl eax, 12; // 00000000 0000bbbb 00000000 00000000
838 or edx, eax; // arrrrrgg ggggbbbb aaaarrrr ggggbbbb
839
840 mov dword ptr [edi], edx;
841 add edi, 4;
842
843 dec ecx;
844 jnz tc1_loop;
845
846 pop edi;
847 pop esi;
848 pop ebx;
849 }
850#endif
851}
852
853void
854TxQuantize::ARGB8888_RGB565(uint32* src, uint32* dest, int width, int height)
855{
856#if 1
857 int siz = (width * height) >> 1;
858 int i;
859 for (i = 0; i < siz; i++) {
860 *dest = (((*src & 0x000000f8) >> 3) |
861 ((*src & 0x0000fc00) >> 5) |
862 ((*src & 0x00f80000) >> 8));
863 src++;
864 *dest |= (((*src & 0x000000f8) << 13) |
865 ((*src & 0x0000fc00) << 11) |
866 ((*src & 0x00f80000) << 8));
867 src++;
868 dest++;
869 }
870#else
871 int siz = (width * height) >> 1;
872
873 __asm {
874 push ebx;
875 push esi;
876 push edi;
877
878 mov esi, dword ptr [src];
879 mov edi, dword ptr [dest];
880 mov ecx, dword ptr [siz];
881
882 tc1_loop:
883 mov eax, dword ptr [esi];
884 add esi, 4;
885
886 mov edx, eax;
887 and edx, 0x000000F8; // 00000000 00000000 00000000 bbbbb000
888 shr edx, 3; // 00000000 00000000 00000000 000bbbbb
889 mov ebx, eax;
890 and ebx, 0x0000FC00; // 00000000 00000000 gggggg00 00000000
891 shr ebx, 5; // 00000000 00000000 00000ggg ggg00000
892 or edx, ebx; // 00000000 00000000 00000ggg gggbbbbb
893 mov ebx, eax;
894 and ebx, 0x00F80000; // 00000000 rrrrr000 00000000 00000000
895 shr ebx, 8; // 00000000 00000000 rrrrr000 00000000
896 or edx, ebx; // 00000000 00000000 rrrrrggg gggbbbbb
897
898 mov eax, dword ptr [esi];
899 add esi, 4;
900
901 mov ebx, eax;
902 and ebx, 0x000000F8; // 00000000 00000000 00000000 bbbbb000
903 shl ebx, 13; // 00000000 000bbbbb 00000000 00000000
904 or edx, ebx; // 00000000 000bbbbb rrrrrggg gggbbbbb
905 mov ebx, eax;
906 and ebx, 0x0000FC00; // 00000000 00000000 gggggg00 00000000
907 shl ebx, 11; // 00000ggg ggg00000 00000000 00000000
908 or edx, ebx; // 00000ggg gggbbbbb rrrrrggg gggbbbbb
909 mov ebx, eax;
910 and ebx, 0x00F80000; // 00000000 rrrrr000 00000000 00000000
911 shl ebx, 8; // rrrrr000 00000000 00000000 00000000
912 or edx, ebx; // rrrrrggg gggbbbbb rrrrrggg gggbbbbb
913
914 mov dword ptr [edi], edx;
915 add edi, 4;
916
917 dec ecx;
918 jnz tc1_loop;
919
920 pop edi;
921 pop esi;
922 pop ebx;
923 }
924#endif
925}
926
927void
928TxQuantize::ARGB8888_A8(uint32* src, uint32* dest, int width, int height)
929{
930#if 1
931 int siz = (width * height) >> 2;
932 int i;
933 for (i = 0; i < siz; i++) {
934 *dest = (*src & 0x0000ff00) >> 8;
935 src++;
936 *dest |= (*src & 0x0000ff00);
937 src++;
938 *dest |= ((*src & 0x0000ff00) << 8);
939 src++;
940 *dest |= ((*src & 0x0000ff00) << 16);
941 src++;
942 dest++;
943 }
944#else
945 int siz = (width * height) >> 2;
946
947 __asm {
948 push ebx;
949 push esi;
950 push edi;
951
952 mov esi, dword ptr [src];
953 mov edi, dword ptr [dest];
954 mov ecx, dword ptr [siz];
955
956 tc1_loop:
957 mov eax, dword ptr [esi];
958 add esi, 4;
959
960#if 0
961 mov edx, eax; // we'll use A comp for every pixel
962 and edx, 0xFF000000; // aaaaaaaa 00000000 00000000 00000000
963 shr edx, 24; // 00000000 00000000 00000000 aaaaaaaa
964
965 mov eax, dword ptr [esi];
966 add esi, 4;
967
968 and eax, 0xFF000000; // aaaaaaaa 00000000 00000000 00000000
969 shr eax, 16; // 00000000 00000000 aaaaaaaa 00000000
970 or edx, eax; // 00000000 00000000 aaaaaaaa aaaaaaaa
971
972 mov eax, dword ptr [esi];
973 add esi, 4;
974
975 and eax, 0xFF000000; // aaaaaaaa 00000000 00000000 00000000
976 shr eax, 8; // 00000000 aaaaaaaa 00000000 00000000
977 or edx, eax; // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
978
979 mov eax, dword ptr [esi];
980 add esi, 4;
981
982 and eax, 0xFF000000; // aaaaaaaa 00000000 00000000 00000000
983 or edx, eax; // aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa
984#endif
985
986#if 1
987 mov edx, eax; // we'll use G comp for every pixel
988 and edx, 0x0000FF00; // 00000000 00000000 aaaaaaaa 00000000
989 shr edx, 8; // 00000000 00000000 00000000 aaaaaaaa
990
991 mov eax, dword ptr [esi];
992 add esi, 4;
993
994 and eax, 0x0000FF00; // 00000000 00000000 aaaaaaaa 00000000
995 or edx, eax; // 00000000 00000000 aaaaaaaa aaaaaaaa
996
997 mov eax, dword ptr [esi];
998 add esi, 4;
999
1000 and eax, 0x0000FF00; // 00000000 00000000 aaaaaaaa 00000000
1001 shl eax, 8; // 00000000 aaaaaaaa 00000000 00000000
1002 or edx, eax; // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
1003
1004 mov eax, dword ptr [esi];
1005 add esi, 4;
1006
1007 and eax, 0x0000FF00; // 00000000 00000000 aaaaaaaa 00000000
1008 shl eax, 16; // aaaaaaaa 00000000 00000000 00000000
1009 or edx, eax; // aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa
1010#endif
1011
1012#if 0
1013 mov edx, eax;
1014 and edx, 0x000000FF; // 00000000 00000000 00000000 aaaaaaaa
1015
1016 mov eax, dword ptr [esi];
1017 add esi, 4;
1018
1019 and eax, 0x0000FF00; // 00000000 00000000 aaaaaaaa 00000000
1020 or edx, eax; // 00000000 00000000 aaaaaaaa aaaaaaaa
1021
1022 mov eax, dword ptr [esi];
1023 add esi, 4;
1024
1025 and eax, 0x00FF0000; // 00000000 aaaaaaaa 00000000 00000000
1026 or edx, eax; // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
1027
1028 mov eax, dword ptr [esi];
1029 add esi, 4;
1030
1031 and eax, 0xFF000000; // aaaaaaaa 00000000 00000000 00000000
1032 or edx, eax; // aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa
1033#endif
1034 mov dword ptr [edi], edx;
1035 add edi, 4;
1036
1037 dec ecx;
1038 jnz tc1_loop;
1039
1040 pop edi;
1041 pop esi;
1042 pop ebx;
1043 }
1044#endif
1045}
1046
1047void
1048TxQuantize::ARGB8888_AI44(uint32* src, uint32* dest, int width, int height)
1049{
1050#if 1
1051 int siz = (width * height) >> 2;
1052 int i;
1053 for (i = 0; i < siz; i++) {
1054 *dest = (((*src & 0xf0000000) >> 24) | ((*src & 0x0000f000) >> 12));
1055 src++;
1056 *dest |= (((*src & 0xf0000000) >> 16) | ((*src & 0x0000f000) >> 4));
1057 src++;
1058 *dest |= (((*src & 0xf0000000) >> 8) | ((*src & 0x0000f000) << 4));
1059 src++;
1060 *dest |= ((*src & 0xf0000000) | ((*src & 0x0000f000) << 12));
1061 src++;
1062 dest++;
1063 }
1064#else
1065 int siz = (width * height) >> 2;
1066
1067 __asm {
1068 push ebx;
1069 push esi;
1070 push edi;
1071
1072 mov esi, dword ptr [src];
1073 mov edi, dword ptr [dest];
1074 mov ecx, dword ptr [siz];
1075
1076 tc1_loop:
1077 mov eax, dword ptr [esi];
1078 add esi, 4;
1079
1080 mov edx, eax; // use A and G comps MSB
1081 and edx, 0xF0000000; // aaaa0000 00000000 00000000 00000000
1082 mov ebx, eax;
1083 shr edx, 24; // 00000000 00000000 00000000 aaaa0000
1084 and ebx, 0x0000F000; // 00000000 00000000 iiii0000 00000000
1085 shr ebx, 12; // 00000000 00000000 00000000 0000iiii
1086 or edx, ebx; // 00000000 00000000 00000000 aaaaiiii
1087
1088 mov eax, dword ptr [esi];
1089 add esi, 4;
1090
1091 mov ebx, eax;
1092 and eax, 0xF0000000; // aaaa0000 00000000 00000000 00000000
1093 shr eax, 16; // 00000000 00000000 aaaa0000 00000000
1094 and ebx, 0x0000F000; // 00000000 00000000 iiii0000 00000000
1095 shr ebx, 4; // 00000000 00000000 0000iiii 00000000
1096 or eax, ebx; // 00000000 00000000 aaaaiiii 00000000
1097 or edx, eax; // 00000000 00000000 aaaaiiii aaaaiiii
1098
1099 mov eax, dword ptr [esi];
1100 add esi, 4;
1101
1102 mov ebx, eax;
1103 and eax, 0xF0000000; // aaaa0000 00000000 00000000 00000000
1104 shr eax, 8; // 00000000 aaaa0000 00000000 00000000
1105 and ebx, 0x0000F000; // 00000000 00000000 iiii0000 00000000
1106 shl ebx, 4; // 00000000 0000iiii 00000000 00000000
1107 or eax, ebx; // 00000000 aaaaiiii 00000000 00000000
1108 or edx, eax; // 00000000 aaaaiiii aaaaiiii aaaaiiii
1109
1110 mov eax, dword ptr [esi];
1111 add esi, 4;
1112
1113 mov ebx, eax;
1114 and eax, 0xF0000000; // aaaa0000 00000000 00000000 00000000
1115 and ebx, 0x0000F000; // 00000000 00000000 iiii0000 00000000
1116 shl ebx, 12; // 0000iiii 00000000 00000000 00000000
1117 or eax, ebx; // aaaaiiii 00000000 00000000 00000000
1118 or edx, eax; // aaaaiiii aaaaiiii aaaaiiii aaaaiiii
1119
1120 mov dword ptr [edi], edx;
1121 add edi, 4;
1122
1123 dec ecx;
1124 jnz tc1_loop;
1125
1126 pop edi;
1127 pop esi;
1128 pop ebx;
1129 }
1130#endif
1131}
1132
1133void
1134TxQuantize::ARGB8888_AI88(uint32* src, uint32* dest, int width, int height)
1135{
1136#if 1
1137 int siz = (width * height) >> 1;
1138 int i;
1139 for (i = 0; i < siz; i++) {
1140 *dest = (((*src & 0xff000000) >> 16) | ((*src & 0x0000ff00) >> 8));
1141 src++;
1142 *dest |= ((*src & 0xff000000) | ((*src & 0x0000ff00) << 8));
1143 src++;
1144 dest++;
1145 }
1146#else
1147 int siz = (width * height) >> 1;
1148
1149 __asm {
1150 push ebx;
1151 push esi;
1152 push edi;
1153
1154 mov esi, dword ptr [src];
1155 mov edi, dword ptr [dest];
1156 mov ecx, dword ptr [siz];
1157
1158 tc1_loop:
1159 mov eax, dword ptr [esi];
1160 add esi, 4;
1161
1162 mov edx, eax;
1163 and edx, 0xFF000000; // aaaaaaaa 00000000 00000000 00000000
1164 mov ebx, eax;
1165 shr edx, 16; // 00000000 00000000 aaaaaaaa 00000000
1166 and ebx, 0x0000FF00; // 00000000 00000000 iiiiiiii 00000000
1167 shr ebx, 8; // 00000000 00000000 00000000 iiiiiiii
1168 or edx, ebx; // 00000000 00000000 aaaaaaaa iiiiiiii
1169
1170 mov eax, dword ptr [esi];
1171 add esi, 4;
1172
1173 mov ebx, eax;
1174 and eax, 0xFF000000; // aaaaaaaa 00000000 00000000 00000000
1175 and ebx, 0x0000FF00; // 00000000 00000000 iiiiiiii 00000000
1176 shl ebx, 8; // 00000000 iiiiiiii 00000000 00000000
1177 or eax, ebx; // aaaaaaaa iiiiiiii 00000000 00000000
1178 or edx, eax; // aaaaaaaa iiiiiiii aaaaaaaa iiiiiiii
1179
1180 mov dword ptr [edi], edx;
1181 add edi, 4;
1182
1183 dec ecx;
1184 jnz tc1_loop;
1185
1186 pop edi;
1187 pop esi;
1188 pop ebx;
1189 }
1190#endif
1191}
1192
1193/* R.W. Floyd and L. Steinberg, An adaptive algorithm
1194 * for spatial grey scale, Proceedings of the Society
1195 * of Information Display 17, pp75-77, 1976
1196 */
1197void
1198TxQuantize::ARGB8888_RGB565_ErrD(uint32* src, uint32* dst, int width, int height)
1199{
1200 /* Floyd-Steinberg error-diffusion halftoning */
1201
1202 int i, x, y;
1203 int qr, qg, qb; /* quantized incoming values */
1204 int ir, ig, ib; /* incoming values */
1205 int t;
1206 int *errR = new int[width];
1207 int *errG = new int[width];
1208 int *errB = new int[width];
1209
1210 uint16 *dest = (uint16 *)dst;
1211
1212 for (i = 0; i < width; i++) errR[i] = errG[i] = errB[i] = 0;
1213
1214 for (y = 0; y < height; y++) {
1215 qr = qg = qb = 0;
1216 for (x = 0; x < width; x++) {
1217 /* incoming pixel values */
1218 ir = ((*src >> 16) & 0xFF) * 10000;
1219 ig = ((*src >> 8) & 0xFF) * 10000;
1220 ib = ((*src ) & 0xFF) * 10000;
1221
1222 /* quantize pixel values.
1223 * qr * 0.4375 is the error from the pixel to the left,
1224 * errR is the error from the pixel to the top, top left, and top right */
1225 /* qr * 0.4375 is the error distribution to the EAST in
1226 * the previous loop */
1227 ir += errR[x] + qr * 4375 / 10000;
1228 ig += errG[x] + qg * 4375 / 10000;
1229 ib += errB[x] + qb * 4375 / 10000;
1230
1231 /* error distribution to the SOUTH-EAST in the previous loop
1232 * can't calculate in the previous loop because it steps on
1233 * the above quantization */
1234 errR[x] = qr * 625 / 10000;
1235 errG[x] = qg * 625 / 10000;
1236 errB[x] = qb * 625 / 10000;
1237
1238 qr = ir;
1239 qg = ig;
1240 qb = ib;
1241
1242 /* clamp */
1243 if (qr < 0) qr = 0; else if (qr > 2550000) qr = 2550000;
1244 if (qg < 0) qg = 0; else if (qg > 2550000) qg = 2550000;
1245 if (qb < 0) qb = 0; else if (qb > 2550000) qb = 2550000;
1246
1247 /* convert to RGB565 */
1248 qr = qr * 0x1F / 2550000;
1249 qg = qg * 0x3F / 2550000;
1250 qb = qb * 0x1F / 2550000;
1251
1252 /* this is the dithered pixel */
1253 t = (qr << 11) | (qg << 5) | qb;
1254
1255 /* compute the errors */
1256 qr = ((qr << 3) | (qr >> 2)) * 10000;
1257 qg = ((qg << 2) | (qg >> 4)) * 10000;
1258 qb = ((qb << 3) | (qb >> 2)) * 10000;
1259 qr = ir - qr;
1260 qg = ig - qg;
1261 qb = ib - qb;
1262
1263 /* compute the error distributions */
1264 /* Floyd-Steinberg filter
1265 * 7/16 (=0.4375) to the EAST
1266 * 5/16 (=0.3125) to the SOUTH
1267 * 1/16 (=0.0625) to the SOUTH-EAST
1268 * 3/16 (=0.1875) to the SOUTH-WEST
1269 *
1270 * x 7/16
1271 * 3/16 5/16 1/16
1272 */
1273 /* SOUTH-WEST */
1274 if (x > 1) {
1275 errR[x - 1] += qr * 1875 / 10000;
1276 errG[x - 1] += qg * 1875 / 10000;
1277 errB[x - 1] += qb * 1875 / 10000;
1278 }
1279
1280 /* SOUTH */
1281 errR[x] += qr * 3125 / 10000;
1282 errG[x] += qg * 3125 / 10000;
1283 errB[x] += qb * 3125 / 10000;
1284
1285 *dest = (t & 0xFFFF);
1286
1287 dest++;
1288 src++;
1289 }
1290 }
1291
1292 delete [] errR;
1293 delete [] errG;
1294 delete [] errB;
1295}
1296
1297
1298void
1299TxQuantize::ARGB8888_ARGB1555_ErrD(uint32* src, uint32* dst, int width, int height)
1300{
1301 /* Floyd-Steinberg error-diffusion halftoning */
1302
1303 int i, x, y;
1304 int qr, qg, qb; /* quantized incoming values */
1305 int ir, ig, ib; /* incoming values */
1306 int t;
1307 int *errR = new int[width];
1308 int *errG = new int[width];
1309 int *errB = new int[width];
1310
1311 uint16 *dest = (uint16 *)dst;
1312
1313 for (i = 0; i < width; i++) errR[i] = errG[i] = errB[i] = 0;
1314
1315 for (y = 0; y < height; y++) {
1316 qr = qg = qb = 0;
1317 for (x = 0; x < width; x++) {
1318 /* incoming pixel values */
1319 ir = ((*src >> 16) & 0xFF) * 10000;
1320 ig = ((*src >> 8) & 0xFF) * 10000;
1321 ib = ((*src ) & 0xFF) * 10000;
1322
1323 /* quantize pixel values.
1324 * qr * 0.4375 is the error from the pixel to the left,
1325 * errR is the error from the pixel to the top, top left, and top right */
1326 /* qr * 0.4375 is the error distribution to the EAST in
1327 * the previous loop */
1328 ir += errR[x] + qr * 4375 / 10000;
1329 ig += errG[x] + qg * 4375 / 10000;
1330 ib += errB[x] + qb * 4375 / 10000;
1331
1332 /* error distribution to the SOUTH-EAST of the previous loop.
1333 * cannot calculate in the previous loop because it steps on
1334 * the above quantization */
1335 errR[x] = qr * 625 / 10000;
1336 errG[x] = qg * 625 / 10000;
1337 errB[x] = qb * 625 / 10000;
1338
1339 qr = ir;
1340 qg = ig;
1341 qb = ib;
1342
1343 /* clamp */
1344 if (qr < 0) qr = 0; else if (qr > 2550000) qr = 2550000;
1345 if (qg < 0) qg = 0; else if (qg > 2550000) qg = 2550000;
1346 if (qb < 0) qb = 0; else if (qb > 2550000) qb = 2550000;
1347
1348 /* convert to RGB555 */
1349 qr = qr * 0x1F / 2550000;
1350 qg = qg * 0x1F / 2550000;
1351 qb = qb * 0x1F / 2550000;
1352
1353 /* this is the dithered pixel */
1354 t = (qr << 10) | (qg << 5) | qb;
1355 t |= ((*src >> 24) ? 0x8000 : 0);
1356
1357 /* compute the errors */
1358 qr = ((qr << 3) | (qr >> 2)) * 10000;
1359 qg = ((qg << 3) | (qg >> 2)) * 10000;
1360 qb = ((qb << 3) | (qb >> 2)) * 10000;
1361 qr = ir - qr;
1362 qg = ig - qg;
1363 qb = ib - qb;
1364
1365 /* compute the error distributions */
1366 /* Floyd-Steinberg filter
1367 * 7/16 (=0.4375) to the EAST
1368 * 5/16 (=0.3125) to the SOUTH
1369 * 1/16 (=0.0625) to the SOUTH-EAST
1370 * 3/16 (=0.1875) to the SOUTH-WEST
1371 *
1372 * x 7/16
1373 * 3/16 5/16 1/16
1374 */
1375 /* SOUTH-WEST */
1376 if (x > 1) {
1377 errR[x - 1] += qr * 1875 / 10000;
1378 errG[x - 1] += qg * 1875 / 10000;
1379 errB[x - 1] += qb * 1875 / 10000;
1380 }
1381
1382 /* SOUTH */
1383 errR[x] += qr * 3125 / 10000;
1384 errG[x] += qg * 3125 / 10000;
1385 errB[x] += qb * 3125 / 10000;
1386
1387 *dest = (t & 0xFFFF);
1388
1389 dest++;
1390 src++;
1391 }
1392 }
1393
1394 delete [] errR;
1395 delete [] errG;
1396 delete [] errB;
1397}
1398
1399void
1400TxQuantize::ARGB8888_ARGB4444_ErrD(uint32* src, uint32* dst, int width, int height)
1401{
1402 /* Floyd-Steinberg error-diffusion halftoning */
1403
1404 /* NOTE: alpha dithering looks better for alpha gradients, but are prone
1405 * to producing noisy speckles for constant or step level alpha. Output
1406 * results should always be checked.
1407 */
1408 boolean ditherAlpha = 0;
1409
1410 int i, x, y;
1411 int qr, qg, qb, qa; /* quantized incoming values */
1412 int ir, ig, ib, ia; /* incoming values */
1413 int t;
1414 int *errR = new int[width];
1415 int *errG = new int[width];
1416 int *errB = new int[width];
1417 int *errA = new int[width];
1418
1419 uint16 *dest = (uint16 *)dst;
1420
1421 for (i = 0; i < width; i++) errR[i] = errG[i] = errB[i] = errA[i] = 0;
1422
1423 for (y = 0; y < height; y++) {
1424 qr = qg = qb = qa = 0;
1425 for (x = 0; x < width; x++) {
1426 /* incoming pixel values */
1427 ir = ((*src >> 16) & 0xFF) * 10000;
1428 ig = ((*src >> 8) & 0xFF) * 10000;
1429 ib = ((*src ) & 0xFF) * 10000;
1430 ia = ((*src >> 24) & 0xFF) * 10000;
1431
1432 /* quantize pixel values.
1433 * qr * 0.4375 is the error from the pixel to the left,
1434 * errR is the error from the pixel to the top, top left, and top right */
1435 /* qr * 0.4375 is the error distribution to the EAST in
1436 * the previous loop */
1437 ir += errR[x] + qr * 4375 / 10000;
1438 ig += errG[x] + qg * 4375 / 10000;
1439 ib += errB[x] + qb * 4375 / 10000;
1440 ia += errA[x] + qa * 4375 / 10000;
1441
1442 /* error distribution to the SOUTH-EAST of the previous loop.
1443 * cannot calculate in the previous loop because it steps on
1444 * the above quantization */
1445 errR[x] = qr * 625 / 10000;
1446 errG[x] = qg * 625 / 10000;
1447 errB[x] = qb * 625 / 10000;
1448 errA[x] = qa * 625 / 10000;
1449
1450 qr = ir;
1451 qg = ig;
1452 qb = ib;
1453 qa = ia;
1454
1455 /* clamp */
1456 if (qr < 0) qr = 0; else if (qr > 2550000) qr = 2550000;
1457 if (qg < 0) qg = 0; else if (qg > 2550000) qg = 2550000;
1458 if (qb < 0) qb = 0; else if (qb > 2550000) qb = 2550000;
1459 if (qa < 0) qa = 0; else if (qa > 2550000) qa = 2550000;
1460
1461 /* convert to RGB444 */
1462 qr = qr * 0xF / 2550000;
1463 qg = qg * 0xF / 2550000;
1464 qb = qb * 0xF / 2550000;
1465 qa = qa * 0xF / 2550000;
1466
1467 /* this is the value to be returned */
1468 if (ditherAlpha) {
1469 t = (qa << 12) | (qr << 8) | (qg << 4) | qb;
1470 } else {
1471 t = (qr << 8) | (qg << 4) | qb;
1472 t |= (*src >> 16) & 0xF000;
1473 }
1474
1475 /* compute the errors */
1476 qr = ((qr << 4) | qr) * 10000;
1477 qg = ((qg << 4) | qg) * 10000;
1478 qb = ((qb << 4) | qb) * 10000;
1479 qa = ((qa << 4) | qa) * 10000;
1480 qr = ir - qr;
1481 qg = ig - qg;
1482 qb = ib - qb;
1483 qa = ia - qa;
1484
1485 /* compute the error distributions */
1486 /* Floyd-Steinberg filter
1487 * 7/16 (=0.4375) to the EAST
1488 * 5/16 (=0.3125) to the SOUTH
1489 * 1/16 (=0.0625) to the SOUTH-EAST
1490 * 3/16 (=0.1875) to the SOUTH-WEST
1491 *
1492 * x 7/16
1493 * 3/16 5/16 1/16
1494 */
1495 /* SOUTH-WEST */
1496 if (x > 1) {
1497 errR[x - 1] += qr * 1875 / 10000;
1498 errG[x - 1] += qg * 1875 / 10000;
1499 errB[x - 1] += qb * 1875 / 10000;
1500 errA[x - 1] += qa * 1875 / 10000;
1501 }
1502
1503 /* SOUTH */
1504 errR[x] += qr * 3125 / 10000;
1505 errG[x] += qg * 3125 / 10000;
1506 errB[x] += qb * 3125 / 10000;
1507 errA[x] += qa * 3125 / 10000;
1508
1509 *dest = (t & 0xFFFF);
1510
1511 dest++;
1512 src++;
1513 }
1514 }
1515
1516 delete [] errR;
1517 delete [] errG;
1518 delete [] errB;
1519 delete [] errA;
1520}
1521
1522void
1523TxQuantize::ARGB8888_AI44_ErrD(uint32* src, uint32* dst, int width, int height)
1524{
1525 /* Floyd-Steinberg error-diffusion halftoning */
1526
1527 /* NOTE: alpha dithering looks better for alpha gradients, but are prone
1528 * to producing noisy speckles for constant or step level alpha. Output
1529 * results should always be checked.
1530 */
1531 boolean ditherAlpha = 0;
1532
1533 int i, x, y;
1534 int qi, qa; /* quantized incoming values */
1535 int ii, ia; /* incoming values */
1536 int t;
1537 int *errI = new int[width];
1538 int *errA = new int[width];
1539
1540 uint8 *dest = (uint8 *)dst;
1541
1542 for (i = 0; i < width; i++) errI[i] = errA[i] = 0;
1543
1544 for (y = 0; y < height; y++) {
1545 qi = qa = 0;
1546 for (x = 0; x < width; x++) {
1547 /* 3dfx style Intensity = R * 0.299 + G * 0.587 + B * 0.114 */
1548 ii = ((*src >> 16) & 0xFF) * 2990 +
1549 ((*src >> 8) & 0xFF) * 5870 +
1550 ((*src ) & 0xFF) * 1140;
1551 ia = ((*src >> 24) & 0xFF) * 10000;
1552
1553 /* quantize pixel values.
1554 * qi * 0.4375 is the error from the pixel to the left,
1555 * errI is the error from the pixel to the top, top left, and top right */
1556 /* qi * 0.4375 is the error distrtibution to the EAST in
1557 * the previous loop */
1558 ii += errI[x] + qi * 4375 / 10000;
1559 ia += errA[x] + qa * 4375 / 10000;
1560
1561 /* error distribution to the SOUTH-EAST in the previous loop.
1562 * cannot calculate in the previous loop because it steps on
1563 * the above quantization */
1564 errI[x] = qi * 625 / 10000;
1565 errA[x] = qa * 625 / 10000;
1566
1567 qi = ii;
1568 qa = ia;
1569
1570 /* clamp */
1571 if (qi < 0) qi = 0; else if (qi > 2550000) qi = 2550000;
1572 if (qa < 0) qa = 0; else if (qa > 2550000) qa = 2550000;
1573
1574 /* convert to I4 */
1575 qi = qi * 0xF / 2550000;
1576 qa = qa * 0xF / 2550000;
1577
1578 /* this is the value to be returned */
1579 if (ditherAlpha) {
1580 t = (qa << 4) | qi;
1581 } else {
1582 t = qi;
1583 t |= ((*src >> 24) & 0xF0);
1584 }
1585
1586 /* compute the errors */
1587 qi = ((qi << 4) | qi) * 10000;
1588 qa = ((qa << 4) | qa) * 10000;
1589 qi = ii - qi;
1590 qa = ia - qa;
1591
1592 /* compute the error distributions */
1593 /* Floyd-Steinberg filter
1594 * 7/16 (=0.4375) to the EAST
1595 * 5/16 (=0.3125) to the SOUTH
1596 * 1/16 (=0.0625) to the SOUTH-EAST
1597 * 3/16 (=0.1875) to the SOUTH-WEST
1598 *
1599 * x 7/16
1600 * 3/16 5/16 1/16
1601 */
1602 /* SOUTH-WEST */
1603 if (x > 1) {
1604 errI[x - 1] += qi * 1875 / 10000;
1605 errA[x - 1] += qa * 1875 / 10000;
1606 }
1607
1608 /* SOUTH */
1609 errI[x] += qi * 3125 / 10000;
1610 errA[x] += qa * 3125 / 10000;
1611
1612 *dest = t & 0xFF;
1613
1614 dest++;
1615 src++;
1616 }
1617 }
1618
1619 delete [] errI;
1620 delete [] errA;
1621}
1622
1623void
1624TxQuantize::ARGB8888_AI88_Slow(uint32* src, uint32* dst, int width, int height)
1625{
1626 int x, y;
1627 uint16 *dest = (uint16 *)dst;
1628 for (y = 0; y < height; y++) {
1629 for (x = 0; x < width; x++) {
1630#if 1
1631 /* libpng style grayscale conversion.
1632 * Reduce RGB files to grayscale with or without alpha
1633 * using the equation given in Poynton's ColorFAQ at
1634 * <http://www.inforamp.net/~poynton/>
1635 * Copyright (c) 1998-01-04 Charles Poynton poynton at inforamp.net
1636 *
1637 * Y = 0.212671 * R + 0.715160 * G + 0.072169 * B
1638 *
1639 * We approximate this with
1640 *
1641 * Y = 0.21268 * R + 0.7151 * G + 0.07217 * B
1642 *
1643 * which can be expressed with integers as
1644 *
1645 * Y = (6969 * R + 23434 * G + 2365 * B)/32768
1646 *
1647 * The calculation is to be done in a linear colorspace.
1648 */
1649 *dest = (((int)((((*src >> 16) & 0xFF) * 6969 +
1650 ((*src >> 8) & 0xFF) * 23434 +
1651 ((*src ) & 0xFF) * 2365) / 32768) & 0xFF) |
1652 (uint16)((*src >> 16) & 0xFF00));
1653#else
1654 /* 3dfx style Intensity = R * 0.299 + G * 0.587 + B * 0.114
1655 * this is same as the standard NTSC gray scale conversion. */
1656 *dest = (((int)((((*src >> 16) & 0xFF) * 299 +
1657 ((*src >> 8) & 0xFF) * 587 +
1658 ((*src ) & 0xFF) * 114) / 1000) & 0xFF) |
1659 (uint16)((*src >> 16) & 0xFF00));
1660#endif
1661 dest++;
1662 src++;
1663 }
1664 }
1665}
1666
1667void
1668TxQuantize::ARGB8888_I8_Slow(uint32* src, uint32* dst, int width, int height)
1669{
1670 int x, y;
1671 uint8 *dest = (uint8 *)dst;
1672 for (y = 0; y < height; y++) {
1673 for (x = 0; x < width; x++) {
1674#if 1
1675 /* libpng style Intensity = (6969 * R + 23434 * G + 2365 * B)/32768 */
1676 *dest = (int)((((*src >> 16) & 0xFF) * 6969 +
1677 ((*src >> 8) & 0xFF) * 23434 +
1678 ((*src ) & 0xFF) * 2365) / 32768) & 0xFF;
1679#else
1680 /* 3dfx style Intensity = R * 0.299 + G * 0.587 + B * 0.114
1681 * this is same as the standard NTSC gray scale conversion. */
1682 *dest = (int)((((*src >>16) & 0xFF) * 299 +
1683 ((*src >> 8) & 0xFF) * 587 +
1684 ((*src ) & 0xFF) * 114) / 1000) & 0xFF;
1685#endif
1686 dest++;
1687 src++;
1688 }
1689 }
1690}
1691
1692void
1693TxQuantize::P8_16BPP(uint32* src, uint32* dest, int width, int height, uint32* palette)
1694{
1695 /* passed in palette is RGBA5551 format */
1696#if 1
1697 int i;
1698 int size = width * height;
1699 for (i = 0; i < size; i++) {
1700 ((uint16*)dest)[i] = ((uint16*)palette)[(int)(((uint8*)src)[i])];
1701 ((uint16*)dest)[i] = ((((uint16*)dest)[i] << 15) | (((uint16*)dest)[i] >> 1));
1702 }
1703#else
1704
1705 /* not finished yet... */
1706
1707 int siz = (width * height) >> 2;
1708
1709 __asm {
1710 push ebx;
1711 push esi;
1712 push edi;
1713
1714 mov esi, dword ptr [src];
1715 mov edi, dword ptr [dest];
1716 mov ecx, dword ptr [siz];
1717 mov edx, dword ptr [palette];
1718
1719 tc1_loop:
1720 mov eax, dword ptr [esi];
1721 add esi, 4;
1722
1723 dec ecx;
1724 jnz tc1_loop;
1725
1726 pop edi;
1727 pop esi;
1728 pop ebx;
1729 }
1730#endif
1731}
1732
1733boolean
1734TxQuantize::quantize(uint8* src, uint8* dest, int width, int height, uint16 srcformat, uint16 destformat, boolean fastQuantizer)
1735{
1736 typedef void (TxQuantize::*quantizerFunc)(uint32* src, uint32* dest, int width, int height);
1737 quantizerFunc quantizer;
1738 int bpp_shift = 0;
1739
1740 if (destformat == GR_TEXFMT_ARGB_8888) {
1741 switch (srcformat) {
1742 case GR_TEXFMT_ARGB_1555:
1743 quantizer = &TxQuantize::ARGB1555_ARGB8888;
1744 bpp_shift = 1;
1745 break;
1746 case GR_TEXFMT_ARGB_4444:
1747 quantizer = &TxQuantize::ARGB4444_ARGB8888;
1748 bpp_shift = 1;
1749 break;
1750 case GR_TEXFMT_RGB_565:
1751 quantizer = &TxQuantize::RGB565_ARGB8888;
1752 bpp_shift = 1;
1753 break;
1754 case GR_TEXFMT_ALPHA_8:
1755 quantizer = &TxQuantize::A8_ARGB8888;
1756 bpp_shift = 2;
1757 break;
1758 case GR_TEXFMT_ALPHA_INTENSITY_44:
1759 quantizer = &TxQuantize::AI44_ARGB8888;
1760 bpp_shift = 2;
1761 break;
1762 case GR_TEXFMT_ALPHA_INTENSITY_88:
1763 quantizer = &TxQuantize::AI88_ARGB8888;
1764 bpp_shift = 1;
1765 break;
1766 default:
1767 return 0;
1768 }
1769
1770#if !defined(NO_FILTER_THREAD)
1771 unsigned int numcore = _numcore;
1772 unsigned int blkrow = 0;
1773 while (numcore > 1 && blkrow == 0) {
1774 blkrow = (height >> 2) / numcore;
1775 numcore--;
1776 }
1777 if (blkrow > 0 && numcore > 1) {
1778 std::thread *thrd[MAX_NUMCORE];
1779 unsigned int i;
1780 int blkheight = blkrow << 2;
1781 unsigned int srcStride = (width * blkheight) << (2 - bpp_shift);
1782 unsigned int destStride = srcStride << bpp_shift;
1783 for (i = 0; i < numcore - 1; i++) {
1784 thrd[i] = new std::thread(std::bind(quantizer,
1785 this,
1786 (uint32*)src,
1787 (uint32*)dest,
1788 width,
1789 blkheight));
1790 src += srcStride;
1791 dest += destStride;
1792 }
1793 thrd[i] = new std::thread(std::bind(quantizer,
1794 this,
1795 (uint32*)src,
1796 (uint32*)dest,
1797 width,
1798 height - blkheight * i));
1799 for (i = 0; i < numcore; i++) {
1800 thrd[i]->join();
1801 delete thrd[i];
1802 }
1803 } else {
1804 (*this.*quantizer)((uint32*)src, (uint32*)dest, width, height);
1805 }
1806#else
1807 (*this.*quantizer)((uint32*)src, (uint32*)dest, width, height);
1808#endif
1809
1810 } else if (srcformat == GR_TEXFMT_ARGB_8888) {
1811 switch (destformat) {
1812 case GR_TEXFMT_ARGB_1555:
1813 quantizer = fastQuantizer ? &TxQuantize::ARGB8888_ARGB1555 : &TxQuantize::ARGB8888_ARGB1555_ErrD;
1814 bpp_shift = 1;
1815 break;
1816 case GR_TEXFMT_ARGB_4444:
1817 quantizer = fastQuantizer ? &TxQuantize::ARGB8888_ARGB4444 : &TxQuantize::ARGB8888_ARGB4444_ErrD;
1818 bpp_shift = 1;
1819 break;
1820 case GR_TEXFMT_RGB_565:
1821 quantizer = fastQuantizer ? &TxQuantize::ARGB8888_RGB565 : &TxQuantize::ARGB8888_RGB565_ErrD;
1822 bpp_shift = 1;
1823 break;
1824 case GR_TEXFMT_ALPHA_8:
1825 case GR_TEXFMT_INTENSITY_8:
1826 quantizer = fastQuantizer ? &TxQuantize::ARGB8888_A8 : &TxQuantize::ARGB8888_I8_Slow;
1827 bpp_shift = 2;
1828 break;
1829 case GR_TEXFMT_ALPHA_INTENSITY_44:
1830 quantizer = fastQuantizer ? &TxQuantize::ARGB8888_AI44 : &TxQuantize::ARGB8888_AI44_ErrD;
1831 bpp_shift = 2;
1832 break;
1833 case GR_TEXFMT_ALPHA_INTENSITY_88:
1834 quantizer = fastQuantizer ? &TxQuantize::ARGB8888_AI88 : &TxQuantize::ARGB8888_AI88_Slow;
1835 bpp_shift = 1;
1836 break;
1837 default:
1838 return 0;
1839 }
1840
1841#if !defined(NO_FILTER_THREAD)
1842 unsigned int numcore = _numcore;
1843 unsigned int blkrow = 0;
1844 while (numcore > 1 && blkrow == 0) {
1845 blkrow = (height >> 2) / numcore;
1846 numcore--;
1847 }
1848 if (blkrow > 0 && numcore > 1) {
1849 std::thread *thrd[MAX_NUMCORE];
1850 unsigned int i;
1851 int blkheight = blkrow << 2;
1852 unsigned int srcStride = (width * blkheight) << 2;
1853 unsigned int destStride = srcStride >> bpp_shift;
1854 for (i = 0; i < numcore - 1; i++) {
1855 thrd[i] = new std::thread(std::bind(quantizer,
1856 this,
1857 (uint32*)src,
1858 (uint32*)dest,
1859 width,
1860 blkheight));
1861 src += srcStride;
1862 dest += destStride;
1863 }
1864 thrd[i] = new std::thread(std::bind(quantizer,
1865 this,
1866 (uint32*)src,
1867 (uint32*)dest,
1868 width,
1869 height - blkheight * i));
1870 for (i = 0; i < numcore; i++) {
1871 thrd[i]->join();
1872 delete thrd[i];
1873 }
1874 } else {
1875 (*this.*quantizer)((uint32*)src, (uint32*)dest, width, height);
1876 }
1877#else
1878 (*this.*quantizer)((uint32*)src, (uint32*)dest, width, height);
1879#endif
1880
1881 } else {
1882 return 0;
1883 }
1884
1885 return 1;
1886}
1887
1888boolean
1889TxQuantize::FXT1(uint8 *src, uint8 *dest,
1890 int srcwidth, int srcheight, uint16 srcformat,
1891 int *destwidth, int *destheight, uint16 *destformat)
1892{
1893 /*
1894 * NOTE: src must be in ARGB8888 format, srcformat describes
1895 * the closest 16bbp representation of src.
1896 *
1897 * NOTE: I have modified the dxtn library to use ARGB format
1898 * which originaly was ABGR format.
1899 */
1900
1901 boolean bRet = 0;
1902
1903 if (_tx_compress_fxt1 &&
1904 srcwidth >= 8 && srcheight >= 4) {
1905 /* compress to fxt1
1906 * width and height must be larger than 8 and 4 respectively
1907 */
1908 int dstRowStride = ((srcwidth + 7) & ~7) << 1;
1909 int srcRowStride = (srcwidth << 2);
1910
1911#if !defined(NO_FILTER_THREAD)
1912 unsigned int numcore = _numcore;
1913 unsigned int blkrow = 0;
1914 while (numcore > 1 && blkrow == 0) {
1915 blkrow = (srcheight >> 2) / numcore;
1916 numcore--;
1917 }
1918 if (blkrow > 0 && numcore > 1) {
1919 std::thread *thrd[MAX_NUMCORE];
1920 unsigned int i;
1921 int blkheight = blkrow << 2;
1922 unsigned int srcStride = (srcwidth * blkheight) << 2;
1923 unsigned int destStride = dstRowStride * blkrow;
1924 for (i = 0; i < numcore - 1; i++) {
1925 thrd[i] = new std::thread(std::bind(_tx_compress_fxt1,
1926 srcwidth,
1927 blkheight,
1928 4,
1929 src,
1930 srcRowStride,
1931 dest,
1932 dstRowStride));
1933 src += srcStride;
1934 dest += destStride;
1935 }
1936 thrd[i] = new std::thread(std::bind(_tx_compress_fxt1,
1937 srcwidth,
1938 srcheight - blkheight * i,
1939 4,
1940 src,
1941 srcRowStride,
1942 dest,
1943 dstRowStride));
1944 for (i = 0; i < numcore; i++) {
1945 thrd[i]->join();
1946 delete thrd[i];
1947 }
1948 } else {
1949 (*_tx_compress_fxt1)(srcwidth, /* width */
1950 srcheight, /* height */
1951 4, /* comps: ARGB8888=4, RGB888=3 */
1952 src, /* source */
1953 srcRowStride, /* width*comps */
1954 dest, /* destination */
1955 dstRowStride); /* 16 bytes per 8x4 texel */
1956 }
1957#else
1958 (*_tx_compress_fxt1)(srcwidth, /* width */
1959 srcheight, /* height */
1960 4, /* comps: ARGB8888=4, RGB888=3 */
1961 src, /* source */
1962 srcRowStride, /* width*comps */
1963 dest, /* destination */
1964 dstRowStride); /* 16 bytes per 8x4 texel */
1965#endif
1966
1967 /* dxtn adjusts width and height to M8 and M4 respectively by replication */
1968 *destwidth = (srcwidth + 7) & ~7;
1969 *destheight = (srcheight + 3) & ~3;
1970 *destformat = GR_TEXFMT_ARGB_CMP_FXT1;
1971
1972 bRet = 1;
1973 }
1974
1975 return bRet;
1976}
1977
1978boolean
1979TxQuantize::DXTn(uint8 *src, uint8 *dest,
1980 int srcwidth, int srcheight, uint16 srcformat,
1981 int *destwidth, int *destheight, uint16 *destformat)
1982{
1983 /*
1984 * NOTE: src must be in ARGB8888 format, srcformat describes
1985 * the closest 16bbp representation of src.
1986 *
1987 * NOTE: I have modified the dxtn library to use ARGB format
1988 * which originaly was ABGR format.
1989 */
1990
1991 boolean bRet = 0;
1992
1993 if (_tx_compress_dxtn &&
1994 srcwidth >= 4 && srcheight >= 4) {
1995 /* compress to dxtn
1996 * width and height must be larger than 4
1997 */
1998
1999 /* skip formats that DXTn won't help in size. */
2000 if (srcformat == GR_TEXFMT_ALPHA_8 ||
2001 srcformat == GR_TEXFMT_ALPHA_INTENSITY_44) {
2002 ; /* shutup compiler */
2003 } else {
2004 int dstRowStride = ((srcwidth + 3) & ~3) << 2;
2005 int compression = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
2006
2007 *destformat = GR_TEXFMT_ARGB_CMP_DXT5;
2008
2009#if !GLIDE64_DXTN
2010 /* okay... we are going to disable DXT1 with 1bit alpha
2011 * for Glide64. some textures have all 0 alpha values.
2012 * see "N64 Kobe Bryant in NBA Courtside"
2013 */
2014 if (srcformat == GR_TEXFMT_ARGB_1555) {
2015 dstRowStride >>= 1;
2016 compression = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
2017 *destformat = GR_TEXFMT_ARGB_CMP_DXT1;
2018 } else
2019#endif
2020 if (srcformat == GR_TEXFMT_RGB_565 ||
2021 srcformat == GR_TEXFMT_INTENSITY_8) {
2022 dstRowStride >>= 1;
2023 compression = GL_COMPRESSED_RGB_S3TC_DXT1_EXT;
2024 *destformat = GR_TEXFMT_ARGB_CMP_DXT1;
2025 }
2026
2027#if !defined(NO_FILTER_THREAD)
2028 unsigned int numcore = _numcore;
2029 unsigned int blkrow = 0;
2030 while (numcore > 1 && blkrow == 0) {
2031 blkrow = (srcheight >> 2) / numcore;
2032 numcore--;
2033 }
2034 if (blkrow > 0 && numcore > 1) {
2035 std::thread *thrd[MAX_NUMCORE];
2036 unsigned int i;
2037 int blkheight = blkrow << 2;
2038 unsigned int srcStride = (srcwidth * blkheight) << 2;
2039 unsigned int destStride = dstRowStride * blkrow;
2040 for (i = 0; i < numcore - 1; i++) {
2041 thrd[i] = new std::thread(std::bind(_tx_compress_dxtn,
2042 4,
2043 srcwidth,
2044 blkheight,
2045 src,
2046 compression,
2047 dest,
2048 dstRowStride));
2049 src += srcStride;
2050 dest += destStride;
2051 }
2052 thrd[i] = new std::thread(std::bind(_tx_compress_dxtn,
2053 4,
2054 srcwidth,
2055 srcheight - blkheight * i,
2056 src,
2057 compression,
2058 dest,
2059 dstRowStride));
2060 for (i = 0; i < numcore; i++) {
2061 thrd[i]->join();
2062 delete thrd[i];
2063 }
2064 } else {
2065 (*_tx_compress_dxtn)(4, /* comps: ARGB8888=4, RGB888=3 */
2066 srcwidth, /* width */
2067 srcheight, /* height */
2068 src, /* source */
2069 compression, /* format */
2070 dest, /* destination */
2071 dstRowStride); /* DXT1 = 8 bytes per 4x4 texel
2072 * others = 16 bytes per 4x4 texel */
2073 }
2074#else
2075 (*_tx_compress_dxtn)(4, /* comps: ARGB8888=4, RGB888=3 */
2076 srcwidth, /* width */
2077 srcheight, /* height */
2078 src, /* source */
2079 compression, /* format */
2080 dest, /* destination */
2081 dstRowStride); /* DXT1 = 8 bytes per 4x4 texel
2082 * others = 16 bytes per 4x4 texel */
2083#endif
2084
2085 /* dxtn adjusts width and height to M4 by replication */
2086 *destwidth = (srcwidth + 3) & ~3;
2087 *destheight = (srcheight + 3) & ~3;
2088
2089 bRet = 1;
2090 }
2091 }
2092
2093 return bRet;
2094}
2095
2096boolean
2097TxQuantize::compress(uint8 *src, uint8 *dest,
2098 int srcwidth, int srcheight, uint16 srcformat,
2099 int *destwidth, int *destheight, uint16 *destformat,
2100 int compressionType)
2101{
2102 boolean bRet = 0;
2103
2104 switch (compressionType) {
2105 case FXT1_COMPRESSION:
2106 bRet = FXT1(src, dest,
2107 srcwidth, srcheight, srcformat,
2108 destwidth, destheight, destformat);
2109 break;
2110 case S3TC_COMPRESSION:
2111 bRet = DXTn(src, dest,
2112 srcwidth, srcheight, srcformat,
2113 destwidth, destheight, destformat);
2114 break;
2115 case NCC_COMPRESSION:
2116 /* TODO: narrow channel compression */
2117 ;
2118 }
2119
2120 return bRet;
2121}
2122
2123#if 0 /* unused */
2124void
2125TxQuantize::I8_ARGB8888(uint32* src, uint32* dest, int width, int height)
2126{
2127 int siz = (width * height) >> 2;
2128
2129 __asm {
2130 push ebx;
2131 push esi;
2132 push edi;
2133
2134 mov esi, dword ptr [src];
2135 mov edi, dword ptr [dest];
2136 mov ecx, dword ptr [siz];
2137
2138 tc1_loop:
2139 mov eax, dword ptr [esi];
2140 add esi, 4;
2141
2142 // aaaaaaaa
2143 // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa
2144 mov edx, eax;
2145 and eax, 0x000000ff;
2146 mov ebx, eax; // 00000000 00000000 00000000 aaaaaaaa
2147 shl ebx, 8; // 00000000 00000000 aaaaaaaa 00000000
2148 or eax, ebx; // 00000000 00000000 aaaaaaaa aaaaaaaa
2149 shl ebx, 8; // 00000000 aaaaaaaa 00000000 00000000
2150 or eax, ebx; // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
2151 or eax, 0xff000000; // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa
2152
2153 mov dword ptr [edi], eax;
2154 add edi, 4;
2155
2156 mov eax, edx;
2157 and eax, 0x0000ff00;
2158 mov ebx, eax; // 00000000 00000000 aaaaaaaa 00000000
2159 shr ebx, 8; // 00000000 00000000 00000000 aaaaaaaa
2160 or eax, ebx; // 00000000 00000000 aaaaaaaa aaaaaaaa
2161 shl ebx, 16; // 00000000 aaaaaaaa 00000000 00000000
2162 or eax, ebx; // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
2163 or eax, 0xff000000; // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa
2164
2165 mov dword ptr [edi], eax;
2166 add edi, 4;
2167
2168 mov eax, edx;
2169 and eax, 0x00ff0000;
2170 mov ebx, eax; // 00000000 aaaaaaaa 00000000 00000000
2171 shr ebx, 8; // 00000000 00000000 aaaaaaaa 00000000
2172 or eax, ebx; // 00000000 aaaaaaaa aaaaaaaa 00000000
2173 shr ebx, 8; // 00000000 00000000 00000000 aaaaaaaa
2174 or eax, ebx; // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
2175 or eax, 0xff000000; // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa
2176
2177 mov dword ptr [edi], eax;
2178 add edi, 4;
2179
2180 mov eax, edx;
2181 and eax, 0xff000000;
2182 mov ebx, eax; // aaaaaaaa 00000000 00000000 00000000
2183 shr ebx, 8; // 00000000 aaaaaaaa 00000000 00000000
2184 or eax, ebx; // aaaaaaaa aaaaaaaa 00000000 00000000
2185 shr ebx, 8; // 00000000 00000000 aaaaaaaa 00000000
2186 or eax, ebx; // aaaaaaaa aaaaaaaa aaaaaaaa 00000000
2187 shr eax, 8; // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
2188 or eax, 0xff000000; // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa
2189
2190 mov dword ptr [edi], eax;
2191 add edi, 4;
2192
2193 dec ecx;
2194 jnz tc1_loop;
2195
2196 pop edi;
2197 pop esi;
2198 pop ebx;
2199 }
2200}
2201
2202void
2203TxQuantize::ARGB8888_I8(uint32* src, uint32* dest, int width, int height)
2204{
2205 ARGB8888_A8(src, dest, width, height);
2206}
2207
2208void
2209TxQuantize::ARGB1555_ABGR8888(uint32* src, uint32* dest, int width, int height)
2210{
2211 int siz = (width * height) >> 1;
2212
2213 __asm {
2214 push ebx;
2215 push esi;
2216 push edi;
2217
2218 mov esi, dword ptr [src];
2219 mov edi, dword ptr [dest];
2220 mov ecx, dword ptr [siz];
2221
2222 tc1_loop:
2223 mov eax, dword ptr [esi];
2224 add esi, 4;
2225
2226 // arrr rrgg gggb bbbb
2227 // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
2228 mov edx, eax; // edx = arrrrrgg gggbbbbb arrrrrgg gggbbbbb
2229 and ebx, 0x00000000;
2230 and eax, 0x00008000; // eax = 00000000 00000000 a0000000 00000000
2231 jz transparent1;
2232 or ebx, 0xff000000; // ebx = aaaaaaaa 00000000 00000000 00000000
2233
2234 transparent1:
2235 mov eax, edx; // eax = arrrrrgg gggbbbbb arrrrrgg gggbbbbb
2236 and edx, 0x0000001f; // edx = 00000000 00000000 00000000 000bbbbb
2237 shl edx, 14; // edx = 00000000 00000bbb bb000000 00000000
2238 or ebx, edx; // ebx = aaaaaaaa 00000bbb bb000000 00000000
2239 shl edx, 5; // edx = 00000000 bbbbb000 00000000 00000000
2240 or ebx, edx; // ebx = aaaaaaaa bbbbbbbb bb000000 00000000
2241 and ebx, 0xffff0000; // ebx = aaaaaaaa bbbbbbbb 00000000 00000000
2242 mov edx, eax;
2243 and edx, 0x000003e0; // edx = 00000000 00000000 000000gg ggg00000
2244 shl edx, 1; // edx = 00000000 00000000 00000ggg gg000000
2245 or ebx, edx; // ebx = aaaaaaaa bbbbbbbb 00000ggg gg000000
2246 shl edx, 5; // edx = 00000000 00000000 ggggg000 00000000
2247 or ebx, edx; // ebx = aaaaaaaa bbbbbbbb gggggggg gg000000
2248 and ebx, 0xffffff00; // ebx = aaaaaaaa bbbbbbbb gggggggg 00000000
2249 mov edx, eax;
2250 and edx, 0x00007c00; // edx = 00000000 00000000 0rrrrr00 00000000
2251 shr edx, 7; // edx = 00000000 00000000 00000000 rrrrr000
2252 or ebx, edx; // ebx = aaaaaaaa bbbbbbbb gggggggg rrrrr000
2253 shr edx, 5; // edx = 00000000 00000000 00000000 00000rrr
2254 or ebx, edx; // ebx = aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
2255
2256 mov dword ptr [edi], ebx;
2257 add edi, 4;
2258
2259 shr eax, 16; // eax = 00000000 00000000 arrrrrgg gggbbbbb
2260 mov edx, eax; // edx = 00000000 00000000 arrrrrgg gggbbbbb
2261 and ebx, 0x00000000;
2262 and eax, 0x00008000; // eax = 00000000 00000000 a0000000 00000000
2263 jz transparent2;
2264 or ebx, 0xff000000; // ebx = aaaaaaaa 00000000 00000000 00000000
2265
2266 transparent2:
2267 mov eax, edx; // eax = arrrrrgg gggbbbbb arrrrrgg gggbbbbb
2268 and edx, 0x0000001f; // edx = 00000000 00000000 00000000 000bbbbb
2269 shl edx, 14; // edx = 00000000 00000bbb bb000000 00000000
2270 or ebx, edx; // ebx = aaaaaaaa 00000bbb bb000000 00000000
2271 shl edx, 5; // edx = 00000000 bbbbb000 00000000 00000000
2272 or ebx, edx; // ebx = aaaaaaaa bbbbbbbb bb000000 00000000
2273 and ebx, 0xffff0000; // ebx = aaaaaaaa bbbbbbbb 00000000 00000000
2274 mov edx, eax;
2275 and edx, 0x000003e0; // edx = 00000000 00000000 000000gg ggg00000
2276 shl edx, 1; // edx = 00000000 00000000 00000ggg gg000000
2277 or ebx, edx; // ebx = aaaaaaaa bbbbbbbb 00000ggg gg000000
2278 shl edx, 5; // edx = 00000000 00000000 ggggg000 00000000
2279 or ebx, edx; // ebx = aaaaaaaa bbbbbbbb gggggggg gg000000
2280 and ebx, 0xffffff00; // ebx = aaaaaaaa bbbbbbbb gggggggg 00000000
2281 mov edx, eax;
2282 and edx, 0x00007c00; // edx = 00000000 00000000 0rrrrr00 00000000
2283 shr edx, 7; // edx = 00000000 00000000 00000000 rrrrr000
2284 or ebx, edx; // ebx = aaaaaaaa bbbbbbbb gggggggg rrrrr000
2285 shr edx, 5; // edx = 00000000 00000000 00000000 00000rrr
2286 or ebx, edx; // ebx = aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
2287
2288 mov dword ptr [edi], ebx;
2289 add edi, 4;
2290
2291 dec ecx;
2292 jnz tc1_loop;
2293
2294 pop edi;
2295 pop esi;
2296 pop ebx;
2297 }
2298}
2299
2300void
2301TxQuantize::ARGB4444_ABGR8888(uint32* src, uint32* dest, int width, int height)
2302{
2303 int siz = (width * height) >> 1;
2304
2305 __asm {
2306 push ebx;
2307 push esi;
2308 push edi;
2309
2310 mov esi, dword ptr [src];
2311 mov edi, dword ptr [dest];
2312 mov ecx, dword ptr [siz];
2313
2314 tc1_loop:
2315 mov eax, dword ptr [esi];
2316 add esi, 4;
2317
2318 // aaaa rrrr gggg bbbb
2319 // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
2320 mov edx, eax;
2321 and eax, 0x0000ffff;
2322 mov ebx, eax; // 00000000 00000000 aaaarrrr ggggbbbb
2323 and ebx, 0x0000f000; // 00000000 00000000 aaaa0000 00000000
2324 shl ebx, 12; // 0000aaaa 00000000 00000000 00000000
2325 or eax, ebx; // 0000aaaa 00000000 aaaarrrr ggggbbbb
2326 mov ebx, eax;
2327 and ebx, 0x0000000f; // 00000000 00000000 00000000 0000bbbb
2328 shl ebx, 16; // 00000000 0000bbbb 00000000 00000000
2329 or eax, ebx; // 0000aaaa 0000bbbb aaaarrrr ggggbbbb
2330 mov ebx, eax;
2331 and ebx, 0x00000f00; // 00000000 00000000 0000rrrr 00000000
2332 shr ebx, 8; // 00000000 00000000 00000000 0000rrrr
2333 and eax, 0xfffffff0;
2334 or eax, ebx; // 0000aaaa 0000bbbb aaaarrrr ggggrrrr
2335 mov ebx, eax;
2336 and ebx, 0x000000f0; // 00000000 00000000 00000000 gggg0000
2337 shl ebx, 4; // 00000000 00000000 0000gggg 00000000
2338 and eax, 0x0f0f000f; // 0000aaaa 0000bbbb 00000000 0000rrrr
2339 or eax, ebx; // 0000aaaa 0000bbbb 0000gggg 0000rrrr
2340 mov ebx, eax;
2341 shl ebx, 4; // aaaa0000 bbbb0000 gggg0000 rrrr0000
2342 or eax, ebx; // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
2343
2344 mov dword ptr [edi], eax;
2345
2346 add edi, 4;
2347
2348 shr edx, 16;
2349 mov ebx, edx; // 00000000 00000000 aaaarrrr ggggbbbb
2350 and ebx, 0x0000f000; // 00000000 00000000 aaaa0000 00000000
2351 shl ebx, 12; // 0000aaaa 00000000 00000000 00000000
2352 or edx, ebx; // 0000aaaa 00000000 aaaarrrr ggggbbbb
2353 mov ebx, edx;
2354 and ebx, 0x0000000f; // 00000000 00000000 00000000 0000bbbb
2355 shl ebx, 16; // 00000000 0000bbbb 00000000 00000000
2356 or edx, ebx; // 0000aaaa 0000bbbb aaaarrrr ggggbbbb
2357 mov ebx, edx;
2358 and ebx, 0x00000f00; // 00000000 00000000 0000rrrr 00000000
2359 shr ebx, 8; // 00000000 00000000 00000000 0000rrrr
2360 and edx, 0xfffffff0;
2361 or edx, ebx; // 0000aaaa 0000bbbb aaaarrrr ggggrrrr
2362 mov ebx, edx;
2363 and ebx, 0x000000f0; // 00000000 00000000 00000000 gggg0000
2364 shl ebx, 4; // 00000000 00000000 0000gggg 00000000
2365 and edx, 0x0f0f000f; // 0000aaaa 0000bbbb 00000000 0000rrrr
2366 or edx, ebx; // 0000aaaa 0000bbbb 0000gggg 0000rrrr
2367 mov ebx, edx;
2368 shl ebx, 4; // aaaa0000 bbbb0000 gggg0000 rrrr0000
2369 or edx, ebx; // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
2370
2371 mov dword ptr [edi], edx;
2372 add edi, 4;
2373
2374 dec ecx;
2375 jnz tc1_loop;
2376
2377 pop edi;
2378 pop esi;
2379 pop ebx;
2380 }
2381}
2382
2383void
2384TxQuantize::ARGB8888_ABGR8888(uint32* src, uint32* dest, int width, int height)
2385{
2386 int siz = width * height;
2387
2388 __asm {
2389 push ebx;
2390 push esi;
2391 push edi;
2392
2393 mov esi, dword ptr [src];
2394 mov edi, dword ptr [dest];
2395 mov ecx, dword ptr [siz];
2396
2397 tc1_loop:
2398 mov eax, dword ptr [esi];
2399 add esi, 4;
2400
2401 // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
2402 mov edx, eax;
2403 bswap edx;
2404 shr edx, 8;
2405 and eax, 0xff000000;
2406
2407 or eax, edx;
2408
2409 mov dword ptr [edi], eax;
2410 add edi, 4;
2411
2412 dec ecx;
2413 jnz tc1_loop;
2414
2415 pop edi;
2416 pop esi;
2417 pop ebx;
2418 }
2419}
2420#endif