ALL: Huge upstream synch + PerRom DelaySI & CountPerOp parameters
[mupen64plus-pandora.git] / source / gles2glide64 / src / GlideHQ / TxQuantize.cpp
CommitLineData
98e75f2d 1/*
2 * Texture Filtering
3 * Version: 1.0
4 *
5 * Copyright (C) 2007 Hiroshi Morii All Rights Reserved.
6 * Email koolsmoky(at)users.sourceforge.net
7 * Web http://www.3dfxzone.it/koolsmoky
8 *
9 * this is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2, or (at your option)
12 * any later version.
13 *
14 * this is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with GNU Make; see the file COPYING. If not, write to
21 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
22 */
23
24#ifdef __MSC__
25#pragma warning(disable: 4786)
26#endif
27
2d262872 28#ifndef NO_FILTER_THREAD
98e75f2d 29#include <functional>
30#include <thread>
2d262872 31#endif
98e75f2d 32
33/* NOTE: The codes are not optimized. They can be made faster. */
34
35#include "TxQuantize.h"
36
37TxQuantize::TxQuantize()
38{
39 _txUtil = new TxUtil();
40
41 /* get number of CPU cores. */
42 _numcore = _txUtil->getNumberofProcessors();
43
44 /* get dxtn extensions */
45 _tx_compress_fxt1 = TxLoadLib::getInstance()->getfxtCompressTexFuncExt();
2d262872 46 _tx_compress_dxtn_rgba = TxLoadLib::getInstance()->getdxtCompressTexFuncExt();
98e75f2d 47}
48
49
50TxQuantize::~TxQuantize()
51{
52 delete _txUtil;
53}
54
55void
56TxQuantize::ARGB1555_ARGB8888(uint32* src, uint32* dest, int width, int height)
57{
58#if 1
59 int siz = (width * height) >> 1;
60 int i;
61 for (i = 0; i < siz; i++) {
62 *dest = (((*src & 0x00008000) ? 0xff000000 : 0x00000000) |
63 ((*src & 0x00007c00) << 9) | ((*src & 0x00007000) << 4) |
64 ((*src & 0x000003e0) << 6) | ((*src & 0x00000380) << 1) |
65 ((*src & 0x0000001f) << 3) | ((*src & 0x0000001c) >> 2));
66 dest++;
67 *dest = (((*src & 0x80000000) ? 0xff000000 : 0x00000000) |
68 ((*src & 0x7c000000) >> 7) | ((*src & 0x70000000) >> 12) |
69 ((*src & 0x03e00000) >> 10) | ((*src & 0x03800000) >> 15) |
70 ((*src & 0x001f0000) >> 13) | ((*src & 0x001c0000) >> 18));
71 dest++;
72 src++;
73 }
74#else
75 int siz = (width * height) >> 1;
76
77 __asm {
78 push ebx;
79 push esi;
80 push edi;
81
82 mov esi, dword ptr [src];
83 mov edi, dword ptr [dest];
84 mov ecx, dword ptr [siz];
85
86 tc1_loop:
87 mov eax, dword ptr [esi];
88 add esi, 4;
89
90 // arrr rrgg gggb bbbb
91 // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
92 mov edx, eax; // edx = arrrrrgg gggbbbbb arrrrrgg gggbbbbb
93 mov ebx, 0x00000000;
94 and eax, 0x00008000; // eax = 00000000 00000000 a0000000 00000000
95 jz transparent1;
96 mov ebx, 0xff000000; // ebx = aaaaaaaa 00000000 00000000 00000000
97
98 transparent1:
99 mov eax, edx; // eax = arrrrrgg gggbbbbb arrrrrgg gggbbbbb
100 and edx, 0x00007c00; // edx = 00000000 00000000 0rrrrr00 00000000
101 shl edx, 4; // edx = 00000000 00000rrr rr000000 00000000
102 or ebx, edx; // ebx = aaaaaaaa 00000rrr rr000000 00000000
103 shl edx, 5; // edx = 00000000 rrrrr000 00000000 00000000
104 or ebx, edx; // ebx = aaaaaaaa rrrrrrrr rr000000 00000000
105 and ebx, 0xffff0000; // ebx = aaaaaaaa rrrrrrrr 00000000 00000000
106 mov edx, eax;
107 and edx, 0x000003e0; // edx = 00000000 00000000 000000gg ggg00000
108 shl edx, 1; // edx = 00000000 00000000 00000ggg gg000000
109 or ebx, edx; // ebx = aaaaaaaa rrrrrrrr 00000ggg gg000000
110 shl edx, 5; // edx = 00000000 00000000 ggggg000 00000000
111 or ebx, edx; // ebx = aaaaaaaa rrrrrrrr gggggggg gg000000
112 and ebx, 0xffffff00; // ebx = aaaaaaaa rrrrrrrr gggggggg 00000000
113 mov edx, eax;
114 and edx, 0x0000001f; // edx = 00000000 00000000 00000000 000bbbbb
115 shl edx, 3; // edx = 00000000 00000000 00000000 bbbbb000
116 or ebx, edx; // ebx = aaaaaaaa rrrrrrrr gggggggg bbbbb000
117 shr edx, 5; // edx = 00000000 00000000 00000000 00000bbb
118 or ebx, edx; // ebx = aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
119
120 mov dword ptr [edi], ebx;
121 add edi, 4;
122
123 shr eax, 16; // eax = 00000000 00000000 arrrrrgg gggbbbbb
124 mov edx, eax; // edx = 00000000 00000000 arrrrrgg gggbbbbb
125 mov ebx, 0x00000000;
126 and eax, 0x00008000; // eax = 00000000 00000000 a0000000 00000000
127 jz transparent2;
128 mov ebx, 0xff000000; // ebx = aaaaaaaa 00000000 00000000 00000000
129
130 transparent2:
131 mov eax, edx; // eax = 00000000 00000000 arrrrrgg gggbbbbb
132 and edx, 0x00007c00; // edx = 00000000 00000000 0rrrrr00 00000000
133 shl edx, 4; // edx = 00000000 00000rrr rr000000 00000000
134 or ebx, edx; // ebx = aaaaaaaa 00000rrr rr000000 00000000
135 shl edx, 5; // edx = 00000000 rrrrr000 00000000 00000000
136 or ebx, edx; // ebx = aaaaaaaa rrrrrrrr rr000000 00000000
137 and ebx, 0xffff0000; // ebx = aaaaaaaa rrrrrrrr 00000000 00000000
138 mov edx, eax;
139 and edx, 0x000003e0; // edx = 00000000 00000000 000000gg ggg00000
140 shl edx, 1; // edx = 00000000 00000000 00000ggg gg000000
141 or ebx, edx; // ebx = aaaaaaaa rrrrrrrr 00000ggg gg000000
142 shl edx, 5; // edx = 00000000 00000000 ggggg000 00000000
143 or ebx, edx; // ebx = aaaaaaaa rrrrrrrr gggggggg gg000000
144 and ebx, 0xffffff00; // ebx = aaaaaaaa rrrrrrrr gggggggg 00000000
145 mov edx, eax;
146 and edx, 0x0000001f; // edx = 00000000 00000000 00000000 000bbbbb
147 shl edx, 3; // edx = 00000000 00000000 00000000 bbbbb000
148 or ebx, edx; // ebx = aaaaaaaa rrrrrrrr gggggggg bbbbb000
149 shr edx, 5; // edx = 00000000 00000000 00000000 00000bbb
150 or ebx, edx; // ebx = aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
151
152 mov dword ptr [edi], ebx;
153 add edi, 4;
154
155 dec ecx;
156 jnz tc1_loop;
157
158 pop edi;
159 pop esi;
160 pop ebx;
161 }
162#endif
163}
164
165void
166TxQuantize::ARGB4444_ARGB8888(uint32* src, uint32* dest, int width, int height)
167{
168#if 1
169 int siz = (width * height) >> 1;
170 int i;
171 for (i = 0; i < siz; i++) {
172 *dest = ((*src & 0x0000f000) << 12) |
173 ((*src & 0x00000f00) << 8) |
174 ((*src & 0x000000f0) << 4) |
175 (*src & 0x0000000f);
176 *dest |= (*dest << 4);
177 dest++;
178 *dest = ((*src & 0xf0000000) |
179 ((*src & 0x0f000000) >> 4) |
180 ((*src & 0x00f00000) >> 8) |
181 ((*src & 0x000f0000) >> 12));
182 *dest |= (*dest >> 4);
183 dest++;
184 src++;
185 }
186#else
187 int siz = (width * height) >> 1;
188
189 __asm {
190 push ebx;
191 push esi;
192 push edi;
193
194 mov esi, dword ptr [src];
195 mov edi, dword ptr [dest];
196 mov ecx, dword ptr [siz];
197
198 tc1_loop:
199 mov eax, dword ptr [esi];
200 add esi, 4;
201
202 // aaaa rrrr gggg bbbb
203 // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
204 mov edx, eax;
205 and eax, 0x0000ffff;
206 mov ebx, eax; // 00000000 00000000 aaaarrrr ggggbbbb
207 and ebx, 0x0000f000; // 00000000 00000000 aaaa0000 00000000
208 shl ebx, 12; // 0000aaaa 00000000 00000000 00000000
209 or eax, ebx; // 0000aaaa 00000000 aaaarrrr ggggbbbb
210 mov ebx, eax;
211 and ebx, 0x00000f00; // 00000000 00000000 0000rrrr 00000000
212 shl ebx, 8; // 00000000 0000rrrr 00000000 00000000
213 or eax, ebx; // 0000aaaa 0000rrrr aaaarrrr ggggbbbb
214 mov ebx, eax;
215 and ebx, 0x000000f0; // 00000000 00000000 00000000 gggg0000
216 shl ebx, 4; // 00000000 00000000 0000gggg 00000000
217 and eax, 0x0f0f000f; // 0000aaaa 0000rrrr 00000000 0000bbbb
218 or eax, ebx; // 0000aaaa 0000rrrr 0000gggg 0000bbbb
219 mov ebx, eax;
220 shl ebx, 4; // aaaa0000 rrrr0000 gggg0000 bbbb0000
221 or eax, ebx; // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
222
223 mov dword ptr [edi], eax;
224 add edi, 4;
225
226 shr edx, 16;
227 mov ebx, edx; // 00000000 00000000 aaaarrrr ggggbbbb
228 and ebx, 0x0000f000; // 00000000 00000000 aaaa0000 00000000
229 shl ebx, 12; // 0000aaaa 00000000 00000000 00000000
230 or edx, ebx; // 0000aaaa 00000000 aaaarrrr ggggbbbb
231 mov ebx, edx;
232 and ebx, 0x00000f00; // 00000000 00000000 0000rrrr 00000000
233 shl ebx, 8; // 00000000 0000rrrr 00000000 00000000
234 or edx, ebx; // 0000aaaa 0000rrrr aaaarrrr ggggbbbb
235 mov ebx, edx;
236 and ebx, 0x000000f0; // 00000000 00000000 00000000 gggg0000
237 shl ebx, 4; // 00000000 00000000 0000gggg 00000000
238 and edx, 0x0f0f000f; // 0000aaaa 0000rrrr 00000000 0000bbbb
239 or edx, ebx; // 0000aaaa 0000rrrr 0000gggg 0000bbbb
240 mov ebx, edx;
241 shl ebx, 4; // aaaa0000 rrrr0000 gggg0000 bbbb0000
242 or edx, ebx; // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
243
244 mov dword ptr [edi], edx;
245 add edi, 4;
246
247 dec ecx;
248 jnz tc1_loop;
249
250 pop edi;
251 pop esi;
252 pop ebx;
253 }
254#endif
255}
256
257void
258TxQuantize::RGB565_ARGB8888(uint32* src, uint32* dest, int width, int height)
259{
260#if 1
261 int siz = (width * height) >> 1;
262 int i;
263 for (i = 0; i < siz; i++) {
264 *dest = (0xff000000 |
265 ((*src & 0x0000f800) << 8) | ((*src & 0x0000e000) << 3) |
266 ((*src & 0x000007e0) << 5) | ((*src & 0x00000600) >> 1) |
267 ((*src & 0x0000001f) << 3) | ((*src & 0x0000001c) >> 2));
268 dest++;
269 *dest = (0xff000000 |
270 ((*src & 0xf8000000) >> 8) | ((*src & 0xe0000000) >> 13) |
271 ((*src & 0x07e00000) >> 11) | ((*src & 0x06000000) >> 17) |
272 ((*src & 0x001f0000) >> 13) | ((*src & 0x001c0000) >> 18));
273 dest++;
274 src++;
275 }
276#else
277 int siz = (width * height) >> 1;
278
279 __asm {
280 push ebx;
281 push esi;
282 push edi;
283
284 mov esi, dword ptr [src];
285 mov edi, dword ptr [dest];
286 mov ecx, dword ptr [siz];
287
288 tc1_loop:
289 mov eax, dword ptr [esi];
290 add esi, 4;
291
292 // rrrr rggg gggb bbbb
293 // 11111111 rrrrrrrr gggggggg bbbbbbbb
294 mov edx, eax;
295 and eax, 0x0000ffff;
296 mov ebx, eax; // 00000000 00000000 rrrrrggg gggbbbbb
297 and ebx, 0x0000f800; // 00000000 00000000 rrrrr000 00000000
298 shl ebx, 5; // 00000000 000rrrrr 00000000 00000000
299 or eax, ebx; // 00000000 000rrrrr rrrrrggg gggbbbbb
300 mov ebx, eax;
301 and ebx, 0x000007e0; // 00000000 00000000 00000ggg ggg00000
302 shl ebx, 5; // 00000000 00000000 gggggg00 00000000
303 and eax, 0x001F001F; // 00000000 000rrrrr 00000000 000bbbbb
304 shl eax, 3; // 00000000 rrrrr000 00000000 bbbbb000
305 or eax, ebx; // 00000000 rrrrr000 gggggg00 bbbbb000
306 mov ebx, eax;
307 shr ebx, 5; // 00000000 00000rrr rr000ggg ggg00bbb
308 and ebx, 0x00070007; // 00000000 00000rrr 00000000 00000bbb
309 or eax, ebx; // 00000000 rrrrrrrr gggggg00 bbbbbbbb
310 mov ebx, eax;
311 shr ebx, 6;
312 and ebx, 0x00000300; // 00000000 00000000 000000gg 00000000
313 or eax, ebx // 00000000 rrrrrrrr gggggggg bbbbbbbb
314 or eax, 0xff000000; // 11111111 rrrrrrrr gggggggg bbbbbbbb
315
316 mov dword ptr [edi], eax;
317 add edi, 4;
318
319 shr edx, 16;
320 mov eax, edx; // 00000000 00000000 rrrrrggg gggbbbbb
321 and eax, 0x0000ffff;
322 mov ebx, eax; // 00000000 00000000 rrrrrggg gggbbbbb
323 and ebx, 0x0000f800; // 00000000 00000000 rrrrr000 00000000
324 shl ebx, 5; // 00000000 000rrrrr 00000000 00000000
325 or eax, ebx; // 00000000 000rrrrr rrrrrggg gggbbbbb
326 mov ebx, eax;
327 and ebx, 0x000007e0; // 00000000 00000000 00000ggg ggg00000
328 shl ebx, 5; // 00000000 00000000 gggggg00 00000000
329 and eax, 0x001F001F; // 00000000 000rrrrr 00000000 000bbbbb
330 shl eax, 3; // 00000000 rrrrr000 00000000 bbbbb000
331 or eax, ebx; // 00000000 rrrrr000 gggggg00 bbbbb000
332 mov ebx, eax;
333 shr ebx, 5; // 00000000 00000rrr rr000ggg ggg00bbb
334 and ebx, 0x00070007; // 00000000 00000rrr 00000000 00000bbb
335 or eax, ebx; // 00000000 rrrrrrrr gggggg00 bbbbbbbb
336 mov ebx, eax;
337 shr ebx, 6;
338 and ebx, 0x00000300; // 00000000 00000000 000000gg 00000000
339 or eax, ebx // 00000000 rrrrrrrr gggggggg bbbbbbbb
340 or eax, 0xff000000; // 11111111 rrrrrrrr gggggggg bbbbbbbb
341
342 mov dword ptr [edi], eax;
343 add edi, 4;
344
345 dec ecx;
346 jnz tc1_loop;
347
348 pop edi;
349 pop esi;
350 pop ebx;
351 }
352#endif
353}
354
355void
356TxQuantize::A8_ARGB8888(uint32* src, uint32* dest, int width, int height)
357{
358#if 1
359 int siz = (width * height) >> 2;
360 int i;
361 for (i = 0; i < siz; i++) {
362 *dest = (*src & 0x000000ff);
363 *dest |= (*dest << 8);
364 *dest |= (*dest << 16);
365 dest++;
366 *dest = (*src & 0x0000ff00);
367 *dest |= (*dest >> 8);
368 *dest |= (*dest << 16);
369 dest++;
370 *dest = (*src & 0x00ff0000);
371 *dest |= (*dest << 8);
372 *dest |= (*dest >> 16);
373 dest++;
374 *dest = (*src & 0xff000000);
375 *dest |= (*dest >> 8);
376 *dest |= (*dest >> 16);
377 dest++;
378 src++;
379 }
380#else
381 int siz = (width * height) >> 2;
382
383 __asm {
384 push ebx;
385 push esi;
386 push edi;
387
388 mov esi, dword ptr [src];
389 mov edi, dword ptr [dest];
390 mov ecx, dword ptr [siz];
391
392 tc1_loop:
393 mov eax, dword ptr [esi];
394 add esi, 4;
395
396 // aaaaaaaa
397 // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
398 mov edx, eax;
399 and eax, 0x000000ff;
400 mov ebx, eax; // 00000000 00000000 00000000 aaaaaaaa
401 shl ebx, 8; // 00000000 00000000 aaaaaaaa 00000000
402 or eax, ebx; // 00000000 00000000 aaaaaaaa aaaaaaaa
403 mov ebx, eax;
404 shl ebx, 16; // aaaaaaaa aaaaaaaa 00000000 00000000
405 or eax, ebx; // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
406
407 mov dword ptr [edi], eax;
408 add edi, 4;
409
410 mov eax, edx;
411 and eax, 0x0000ff00;
412 mov ebx, eax; // 00000000 00000000 aaaaaaaa 00000000
413 shr ebx, 8; // 00000000 00000000 00000000 aaaaaaaa
414 or eax, ebx; // 00000000 00000000 aaaaaaaa aaaaaaaa
415 mov ebx, eax;
416 shl ebx, 16; // aaaaaaaa aaaaaaaa 00000000 00000000
417 or eax, ebx; // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
418
419 mov dword ptr [edi], eax;
420 add edi, 4;
421
422 mov eax, edx;
423 and eax, 0x00ff0000;
424 mov ebx, eax; // 00000000 aaaaaaaa 00000000 00000000
425 shl ebx, 8; // aaaaaaaa 00000000 00000000 00000000
426 or eax, ebx; // aaaaaaaa aaaaaaaa 00000000 00000000
427 mov ebx, eax;
428 shr ebx, 16; // 00000000 00000000 aaaaaaaa aaaaaaaa
429 or eax, ebx; // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
430
431 mov dword ptr [edi], eax;
432 add edi, 4;
433
434 mov eax, edx;
435 and eax, 0xff000000;
436 mov ebx, eax; // aaaaaaaa 00000000 00000000 00000000
437 shr ebx, 8; // 00000000 aaaaaaaa 00000000 00000000
438 or eax, ebx; // aaaaaaaa aaaaaaaa 00000000 00000000
439 mov ebx, eax;
440 shr ebx, 16; // 00000000 00000000 aaaaaaaa aaaaaaaa
441 or eax, ebx; // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
442
443 mov dword ptr [edi], eax;
444 add edi, 4;
445
446 dec ecx;
447 jnz tc1_loop;
448
449 pop edi;
450 pop esi;
451 pop ebx;
452 }
453#endif
454}
455
456void
457TxQuantize::AI44_ARGB8888(uint32* src, uint32* dest, int width, int height)
458{
459#if 1
460 int siz = (width * height) >> 2;
461 int i;
462 for (i = 0; i < siz; i++) {
463 *dest = (*src & 0x0000000f);
464 *dest |= ((*dest << 8) | (*dest << 16));
465 *dest |= ((*src & 0x000000f0) << 20);
466 *dest |= (*dest << 4);
467 dest++;
468 *dest = (*src & 0x00000f00);
469 *dest |= ((*dest << 8) | (*dest >> 8));
470 *dest |= ((*src & 0x0000f000) << 12);
471 *dest |= (*dest << 4);
472 dest++;
473 *dest = (*src & 0x000f0000);
474 *dest |= ((*dest >> 8) | (*dest >> 16));
475 *dest |= ((*src & 0x00f00000) << 4);
476 *dest |= (*dest << 4);
477 dest++;
478 *dest = ((*src & 0x0f000000) >> 4);
479 *dest |= ((*dest >> 8) | (*dest >> 16));
480 *dest |= (*src & 0xf0000000);
481 *dest |= (*dest >> 4);
482 dest++;
483 src++;
484 }
485#else
486 int siz = (width * height) >> 2;
487
488 __asm {
489 push ebx;
490 push esi;
491 push edi;
492
493 mov esi, dword ptr [src];
494 mov edi, dword ptr [dest];
495 mov ecx, dword ptr [siz];
496
497 tc1_loop:
498 mov eax, dword ptr [esi];
499 add esi, 4;
500
501 // aaaaiiii
502 // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
503 mov edx, eax;
504 and eax, 0x000000f0; // 00000000 00000000 00000000 aaaa0000
505 mov ebx, edx;
506 shl eax, 20; // 0000aaaa 00000000 00000000 00000000
507 and ebx, 0x0000000f; // 00000000 00000000 00000000 0000iiii
508 or eax, ebx; // 0000aaaa 00000000 00000000 0000iiii
509 shl ebx, 8; // 00000000 00000000 0000iiii 00000000
510 or eax, ebx; // 0000aaaa 00000000 0000iiii 0000iiii
511 shl ebx, 8; // 00000000 0000iiii 00000000 00000000
512 or eax, ebx; // 0000aaaa 0000iiii 0000iiii 0000iiii
513 mov ebx, eax;
514 shl ebx, 4; // aaaa0000 iiii0000 iiii0000 iiii0000
515 or eax, ebx; // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
516
517 mov dword ptr [edi], eax;
518 add edi, 4;
519
520 mov eax, edx;
521 and eax, 0x0000f000; // 00000000 00000000 aaaa0000 00000000
522 mov ebx, edx;
523 shl eax, 12; // 0000aaaa 00000000 00000000 00000000
524 and ebx, 0x00000f00; // 00000000 00000000 0000iiii 00000000
525 or eax, ebx; // 0000aaaa 00000000 0000iiii 00000000
526 shr ebx, 8; // 00000000 00000000 00000000 0000iiii
527 or eax, ebx; // 0000aaaa 00000000 0000iiii 0000iiii
528 shl ebx, 16; // 00000000 0000iiii 00000000 00000000
529 or eax, ebx; // 0000aaaa 0000iiii 0000iiii 0000iiii
530 mov ebx, eax;
531 shl ebx, 4; // aaaa0000 iiii0000 iiii0000 iiii0000
532 or eax, ebx; // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
533
534 mov dword ptr [edi], eax;
535 add edi, 4;
536
537 mov eax, edx;
538 and eax, 0x00f00000; // 00000000 aaaa0000 00000000 00000000
539 mov ebx, edx;
540 shl eax, 4; // 0000aaaa 00000000 00000000 00000000
541 and ebx, 0x000f0000; // 00000000 0000iiii 00000000 00000000
542 or eax, ebx; // 0000aaaa 0000iiii 00000000 00000000
543 shr ebx, 8; // 00000000 00000000 0000iiii 00000000
544 or eax, ebx; // 0000aaaa 0000iiii 0000iiii 00000000
545 shr ebx, 8; // 00000000 00000000 00000000 0000iiii
546 or eax, ebx; // 0000aaaa 0000iiii 0000iiii 0000iiii
547 mov ebx, eax;
548 shl ebx, 4; // aaaa0000 iiii0000 iiii0000 iiii0000
549 or eax, ebx; // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
550
551 mov dword ptr [edi], eax;
552 add edi, 4;
553
554 mov eax, edx;
555 and eax, 0xf0000000; // aaaa0000 00000000 00000000 00000000
556 mov ebx, edx;
557 and ebx, 0x0f000000; // 0000iiii 00000000 00000000 00000000
558 shr ebx, 4; // 00000000 iiii0000 00000000 00000000
559 or eax, ebx; // aaaa0000 iiii0000 00000000 00000000
560 shr ebx, 8; // 00000000 00000000 iiii0000 00000000
561 or eax, ebx; // aaaa0000 iiii0000 iiii0000 00000000
562 shr ebx, 8; // 00000000 00000000 00000000 iiii0000
563 or eax, ebx; // aaaa0000 iiii0000 iiii0000 iiii0000
564 mov ebx, eax;
565 shr ebx, 4; // 0000aaaa 0000iiii 0000iiii 0000iiii
566 or eax, ebx; // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
567
568 mov dword ptr [edi], eax;
569 add edi, 4;
570
571 dec ecx;
572 jnz tc1_loop;
573
574 pop edi;
575 pop esi;
576 pop ebx;
577 }
578#endif
579}
580
581void
582TxQuantize::AI88_ARGB8888(uint32* src, uint32* dest, int width, int height)
583{
584#if 1
585 int siz = (width * height) >> 1;
586 int i;
587 for (i = 0; i < siz; i++) {
588 *dest = (*src & 0x000000ff);
589 *dest |= ((*dest << 8) | (*dest << 16));
590 *dest |= ((*src & 0x0000ff00) << 16);
591 dest++;
592 *dest = (*src & 0x00ff0000);
593 *dest |= ((*dest >> 8) | (*dest >> 16));
594 *dest |= (*src & 0xff000000);
595 dest++;
596 src++;
597 }
598#else
599 int siz = (width * height) >> 1;
600
601 __asm {
602 push ebx;
603 push esi;
604 push edi;
605
606 mov esi, dword ptr [src];
607 mov edi, dword ptr [dest];
608 mov ecx, dword ptr [siz];
609
610 tc1_loop:
611 mov eax, dword ptr [esi];
612 add esi, 4;
613
614 // aaaaaaaa iiiiiiii
615 // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
616 mov edx, eax;
617 and eax, 0x0000ffff; // 00000000 00000000 aaaaaaaa iiiiiiii
618 mov ebx, eax; // 00000000 00000000 aaaaaaaa iiiiiiii
619 shl eax, 16; // aaaaaaaa iiiiiiii 00000000 00000000
620 and ebx, 0x000000ff; // 00000000 00000000 00000000 iiiiiiii
621 or eax, ebx; // aaaaaaaa iiiiiiii 00000000 iiiiiiii
622 shl ebx, 8; // 00000000 00000000 iiiiiiii 00000000
623 or eax, ebx; // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
624
625 mov dword ptr [edi], eax;
626 add edi, 4;
627
628 mov eax, edx;
629 and eax, 0xffff0000; // aaaaaaaa iiiiiiii 00000000 00000000
630 mov ebx, eax; // aaaaaaaa iiiiiiii 00000000 00000000
631 and ebx, 0x00ff0000; // 00000000 iiiiiiii 00000000 00000000
632 shr ebx, 8; // 00000000 00000000 iiiiiiii 00000000
633 or eax, ebx; // aaaaaaaa iiiiiiii iiiiiiii 00000000
634 shr ebx, 8; // 00000000 00000000 00000000 iiiiiiii
635 or eax, ebx; // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
636
637 mov dword ptr [edi], eax;
638 add edi, 4;
639
640 dec ecx;
641 jnz tc1_loop;
642
643 pop edi;
644 pop esi;
645 pop ebx;
646 }
647#endif
648}
649
650void
651TxQuantize::ARGB8888_ARGB1555(uint32* src, uint32* dest, int width, int height)
652{
653#if 1
654 int siz = (width * height) >> 1;
655 int i;
656 for (i = 0; i < siz; i++) {
657 *dest = ((*src & 0xff000000) ? 0x00008000 : 0x00000000);
658 *dest |= (((*src & 0x00f80000) >> 9) |
659 ((*src & 0x0000f800) >> 6) |
660 ((*src & 0x000000f8) >> 3));
661 src++;
662 *dest |= ((*src & 0xff000000) ? 0x80000000 : 0x00000000);
663 *dest |= (((*src & 0x00f80000) << 7) |
664 ((*src & 0x0000f800) << 10) |
665 ((*src & 0x000000f8) << 13));
666 src++;
667 dest++;
668 }
669#else
670 int siz = (width * height) >> 1;
671
672 __asm {
673 push ebx;
674 push esi;
675 push edi;
676
677 mov esi, dword ptr [src];
678 mov edi, dword ptr [dest];
679 mov ecx, dword ptr [siz];
680
681 tc1_loop:
682 mov eax, dword ptr [esi];
683 add esi, 4;
684
685#if 1
686 mov edx, eax;
687 and eax, 0xff000000; // aaaa0000 00000000 00000000 00000000
688 jz transparent1;
689 mov eax, 0x00008000; // 00000000 00000000 a0000000 00000000
690
691 transparent1:
692 mov ebx, edx;
693 and ebx, 0x00f80000; // 00000000 rrrrr000 00000000 00000000
694 shr ebx, 9; // 00000000 00000000 0rrrrr00 00000000
695 or eax, ebx; // 00000000 00000000 arrrrr00 00000000
696 mov ebx, edx;
697 and ebx, 0x0000f800; // 00000000 00000000 ggggg000 00000000
698 shr ebx, 6; // 00000000 00000000 000000gg ggg00000
699 or eax, ebx; // 00000000 00000000 arrrrrgg ggg00000
700 and edx, 0x000000f8; // 00000000 00000000 00000000 bbbbb000
701 shr edx, 3; // 00000000 00000000 00000000 000bbbbb
702 or edx, eax; // 00000000 00000000 arrrrrgg gggbbbbb
703
704 mov eax, dword ptr [esi];
705 add esi, 4;
706
707 mov ebx, eax;
708 and eax, 0xff000000; // aaaa0000 00000000 00000000 00000000
709 jz transparent2;
710 or edx, 0x80000000; // a0000000 00000000 arrrrrgg gggbbbbb
711
712 transparent2:
713 mov eax, ebx;
714 and ebx, 0x00f80000; // 00000000 rrrrr000 00000000 00000000
715 shl ebx, 7; // 0rrrrr00 00000000 00000000 00000000
716 or edx, ebx; // arrrrr00 00000000 arrrrrgg gggbbbbb
717 mov ebx, eax;
718 and ebx, 0x0000f800; // 00000000 00000000 ggggg000 00000000
719 shl ebx, 10; // 000000gg ggg00000 00000000 00000000
720 or edx, ebx; // arrrrrgg ggg00000 arrrrrgg gggbbbbb
721 and eax, 0x000000f8; // 00000000 00000000 00000000 bbbbb000
722 shl eax, 13; // 00000000 000bbbbb 00000000 00000000
723 or edx, eax; // arrrrrgg gggbbbbb arrrrrgg gggbbbbb
724
725 mov dword ptr [edi], edx;
726 add edi, 4;
727#else
728 mov edx, eax;
729 and edx, 0x01000000; // 0000000a 00000000 00000000 00000000
730 shr edx, 9; // 00000000 00000000 a0000000 00000000
731 mov ebx, eax;
732 and ebx, 0x00f80000; // 00000000 rrrrr000 00000000 00000000
733 shr ebx, 9; // 00000000 00000000 0rrrrr00 00000000
734 or edx, ebx; // 00000000 00000000 arrrrr00 00000000
735 mov ebx, eax;
736 and ebx, 0x0000f800; // 00000000 00000000 ggggg000 00000000
737 shr ebx, 6; // 00000000 00000000 000000gg ggg00000
738 or edx, ebx; // 00000000 00000000 arrrrrgg ggg00000
739 and eax, 0x000000f8; // 00000000 00000000 00000000 bbbbb000
740 shr eax, 3; // 00000000 00000000 00000000 000bbbbb
741 or edx, eax; // 00000000 00000000 arrrrrgg gggbbbbb
742
743 mov eax, dword ptr [esi];
744 add esi, 4;
745
746 mov ebx, eax;
747 and ebx, 0x80000000; // a0000000 00000000 00000000 00000000
748 or edx, ebx; // a0000000 00000000 arrrrrgg gggbbbbb
749 mov ebx, eax;
750 and ebx, 0x00f80000; // 00000000 rrrrr000 00000000 00000000
751 shl ebx, 7; // 0rrrrr00 00000000 00000000 00000000
752 or edx, ebx; // arrrrr00 00000000 arrrrrgg gggbbbbb
753 mov ebx, eax;
754 and ebx, 0x0000f800; // 00000000 00000000 ggggg000 00000000
755 shl ebx, 10; // 000000gg ggg00000 00000000 00000000
756 or edx, ebx; // arrrrrgg ggg00000 arrrrrgg gggbbbbb
757 and eax, 0x000000f8; // 00000000 00000000 00000000 bbbbb000
758 shl eax, 13; // 00000000 000bbbbb 00000000 00000000
759 or edx, eax; // arrrrrgg gggbbbbb arrrrrgg gggbbbbb
760
761 mov dword ptr [edi], edx;
762 add edi, 4;
763#endif
764 dec ecx;
765 jnz tc1_loop;
766
767 pop edi;
768 pop esi;
769 pop ebx;
770 }
771#endif
772}
773
774void
775TxQuantize::ARGB8888_ARGB4444(uint32* src, uint32* dest, int width, int height)
776{
777#if 1
778 int siz = (width * height) >> 1;
779 int i;
780 for (i = 0; i < siz; i++) {
781 *dest = (((*src & 0xf0000000) >> 16) |
782 ((*src & 0x00f00000) >> 12) |
783 ((*src & 0x0000f000) >> 8) |
784 ((*src & 0x000000f0) >> 4));
785 src++;
786 *dest |= ((*src & 0xf0000000) |
787 ((*src & 0x00f00000) << 4) |
788 ((*src & 0x0000f000) << 8) |
789 ((*src & 0x000000f0) << 12));
790 src++;
791 dest++;
792 }
793#else
794 int siz = (width * height) >> 1;
795
796 __asm {
797 push ebx;
798 push esi;
799 push edi;
800
801 mov esi, dword ptr [src];
802 mov edi, dword ptr [dest];
803 mov ecx, dword ptr [siz];
804
805 tc1_loop:
806 mov eax, dword ptr [esi];
807 add esi, 4;
808
809 mov edx, eax;
810 and edx, 0xf0000000; // aaaa0000 00000000 00000000 00000000
811 shr edx, 16; // 00000000 00000000 aaaa0000 00000000
812 mov ebx, eax;
813 and ebx, 0x00f00000; // 00000000 rrrr0000 00000000 00000000
814 shr ebx, 12; // 00000000 00000000 0000rrrr 00000000
815 or edx, ebx; // 00000000 00000000 aaaarrrr 00000000
816 mov ebx, eax;
817 and ebx, 0x0000f000; // 00000000 00000000 gggg0000 00000000
818 shr ebx, 8; // 00000000 00000000 00000000 gggg0000
819 or edx, ebx; // 00000000 00000000 aaaarrrr gggg0000
820 and eax, 0x000000f0; // 00000000 00000000 00000000 bbbb0000
821 shr eax, 4; // 00000000 00000000 00000000 0000bbbb
822 or edx, eax; // 00000000 00000000 aaaarrrr ggggbbbb
823
824 mov eax, dword ptr [esi];
825 add esi, 4;
826
827 mov ebx, eax;
828 and ebx, 0xf0000000; // aaaa0000 00000000 00000000 00000000
829 or edx, ebx; // aaaa0000 00000000 aaaarrrr ggggbbbb
830 mov ebx, eax;
831 and ebx, 0x00f00000; // 00000000 rrrr0000 00000000 00000000
832 shl ebx, 4; // 0000rrrr 00000000 00000000 00000000
833 or edx, ebx; // aaaarrrr 00000000 aaaarrrr ggggbbbb
834 mov ebx, eax;
835 and ebx, 0x0000f000; // 00000000 00000000 gggg0000 00000000
836 shl ebx, 8; // 00000000 gggg0000 00000000 00000000
837 or edx, ebx; // aaaarrrr gggg0000 aaaarrrr ggggbbbb
838 and eax, 0x000000f0; // 00000000 00000000 00000000 bbbb0000
839 shl eax, 12; // 00000000 0000bbbb 00000000 00000000
840 or edx, eax; // arrrrrgg ggggbbbb aaaarrrr ggggbbbb
841
842 mov dword ptr [edi], edx;
843 add edi, 4;
844
845 dec ecx;
846 jnz tc1_loop;
847
848 pop edi;
849 pop esi;
850 pop ebx;
851 }
852#endif
853}
854
855void
856TxQuantize::ARGB8888_RGB565(uint32* src, uint32* dest, int width, int height)
857{
858#if 1
859 int siz = (width * height) >> 1;
860 int i;
861 for (i = 0; i < siz; i++) {
862 *dest = (((*src & 0x000000f8) >> 3) |
863 ((*src & 0x0000fc00) >> 5) |
864 ((*src & 0x00f80000) >> 8));
865 src++;
866 *dest |= (((*src & 0x000000f8) << 13) |
867 ((*src & 0x0000fc00) << 11) |
868 ((*src & 0x00f80000) << 8));
869 src++;
870 dest++;
871 }
872#else
873 int siz = (width * height) >> 1;
874
875 __asm {
876 push ebx;
877 push esi;
878 push edi;
879
880 mov esi, dword ptr [src];
881 mov edi, dword ptr [dest];
882 mov ecx, dword ptr [siz];
883
884 tc1_loop:
885 mov eax, dword ptr [esi];
886 add esi, 4;
887
888 mov edx, eax;
889 and edx, 0x000000F8; // 00000000 00000000 00000000 bbbbb000
890 shr edx, 3; // 00000000 00000000 00000000 000bbbbb
891 mov ebx, eax;
892 and ebx, 0x0000FC00; // 00000000 00000000 gggggg00 00000000
893 shr ebx, 5; // 00000000 00000000 00000ggg ggg00000
894 or edx, ebx; // 00000000 00000000 00000ggg gggbbbbb
895 mov ebx, eax;
896 and ebx, 0x00F80000; // 00000000 rrrrr000 00000000 00000000
897 shr ebx, 8; // 00000000 00000000 rrrrr000 00000000
898 or edx, ebx; // 00000000 00000000 rrrrrggg gggbbbbb
899
900 mov eax, dword ptr [esi];
901 add esi, 4;
902
903 mov ebx, eax;
904 and ebx, 0x000000F8; // 00000000 00000000 00000000 bbbbb000
905 shl ebx, 13; // 00000000 000bbbbb 00000000 00000000
906 or edx, ebx; // 00000000 000bbbbb rrrrrggg gggbbbbb
907 mov ebx, eax;
908 and ebx, 0x0000FC00; // 00000000 00000000 gggggg00 00000000
909 shl ebx, 11; // 00000ggg ggg00000 00000000 00000000
910 or edx, ebx; // 00000ggg gggbbbbb rrrrrggg gggbbbbb
911 mov ebx, eax;
912 and ebx, 0x00F80000; // 00000000 rrrrr000 00000000 00000000
913 shl ebx, 8; // rrrrr000 00000000 00000000 00000000
914 or edx, ebx; // rrrrrggg gggbbbbb rrrrrggg gggbbbbb
915
916 mov dword ptr [edi], edx;
917 add edi, 4;
918
919 dec ecx;
920 jnz tc1_loop;
921
922 pop edi;
923 pop esi;
924 pop ebx;
925 }
926#endif
927}
928
929void
930TxQuantize::ARGB8888_A8(uint32* src, uint32* dest, int width, int height)
931{
932#if 1
933 int siz = (width * height) >> 2;
934 int i;
935 for (i = 0; i < siz; i++) {
936 *dest = (*src & 0x0000ff00) >> 8;
937 src++;
938 *dest |= (*src & 0x0000ff00);
939 src++;
940 *dest |= ((*src & 0x0000ff00) << 8);
941 src++;
942 *dest |= ((*src & 0x0000ff00) << 16);
943 src++;
944 dest++;
945 }
946#else
947 int siz = (width * height) >> 2;
948
949 __asm {
950 push ebx;
951 push esi;
952 push edi;
953
954 mov esi, dword ptr [src];
955 mov edi, dword ptr [dest];
956 mov ecx, dword ptr [siz];
957
958 tc1_loop:
959 mov eax, dword ptr [esi];
960 add esi, 4;
961
962#if 0
963 mov edx, eax; // we'll use A comp for every pixel
964 and edx, 0xFF000000; // aaaaaaaa 00000000 00000000 00000000
965 shr edx, 24; // 00000000 00000000 00000000 aaaaaaaa
966
967 mov eax, dword ptr [esi];
968 add esi, 4;
969
970 and eax, 0xFF000000; // aaaaaaaa 00000000 00000000 00000000
971 shr eax, 16; // 00000000 00000000 aaaaaaaa 00000000
972 or edx, eax; // 00000000 00000000 aaaaaaaa aaaaaaaa
973
974 mov eax, dword ptr [esi];
975 add esi, 4;
976
977 and eax, 0xFF000000; // aaaaaaaa 00000000 00000000 00000000
978 shr eax, 8; // 00000000 aaaaaaaa 00000000 00000000
979 or edx, eax; // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
980
981 mov eax, dword ptr [esi];
982 add esi, 4;
983
984 and eax, 0xFF000000; // aaaaaaaa 00000000 00000000 00000000
985 or edx, eax; // aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa
986#endif
987
988#if 1
989 mov edx, eax; // we'll use G comp for every pixel
990 and edx, 0x0000FF00; // 00000000 00000000 aaaaaaaa 00000000
991 shr edx, 8; // 00000000 00000000 00000000 aaaaaaaa
992
993 mov eax, dword ptr [esi];
994 add esi, 4;
995
996 and eax, 0x0000FF00; // 00000000 00000000 aaaaaaaa 00000000
997 or edx, eax; // 00000000 00000000 aaaaaaaa aaaaaaaa
998
999 mov eax, dword ptr [esi];
1000 add esi, 4;
1001
1002 and eax, 0x0000FF00; // 00000000 00000000 aaaaaaaa 00000000
1003 shl eax, 8; // 00000000 aaaaaaaa 00000000 00000000
1004 or edx, eax; // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
1005
1006 mov eax, dword ptr [esi];
1007 add esi, 4;
1008
1009 and eax, 0x0000FF00; // 00000000 00000000 aaaaaaaa 00000000
1010 shl eax, 16; // aaaaaaaa 00000000 00000000 00000000
1011 or edx, eax; // aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa
1012#endif
1013
1014#if 0
1015 mov edx, eax;
1016 and edx, 0x000000FF; // 00000000 00000000 00000000 aaaaaaaa
1017
1018 mov eax, dword ptr [esi];
1019 add esi, 4;
1020
1021 and eax, 0x0000FF00; // 00000000 00000000 aaaaaaaa 00000000
1022 or edx, eax; // 00000000 00000000 aaaaaaaa aaaaaaaa
1023
1024 mov eax, dword ptr [esi];
1025 add esi, 4;
1026
1027 and eax, 0x00FF0000; // 00000000 aaaaaaaa 00000000 00000000
1028 or edx, eax; // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
1029
1030 mov eax, dword ptr [esi];
1031 add esi, 4;
1032
1033 and eax, 0xFF000000; // aaaaaaaa 00000000 00000000 00000000
1034 or edx, eax; // aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa
1035#endif
1036 mov dword ptr [edi], edx;
1037 add edi, 4;
1038
1039 dec ecx;
1040 jnz tc1_loop;
1041
1042 pop edi;
1043 pop esi;
1044 pop ebx;
1045 }
1046#endif
1047}
1048
1049void
1050TxQuantize::ARGB8888_AI44(uint32* src, uint32* dest, int width, int height)
1051{
1052#if 1
1053 int siz = (width * height) >> 2;
1054 int i;
1055 for (i = 0; i < siz; i++) {
1056 *dest = (((*src & 0xf0000000) >> 24) | ((*src & 0x0000f000) >> 12));
1057 src++;
1058 *dest |= (((*src & 0xf0000000) >> 16) | ((*src & 0x0000f000) >> 4));
1059 src++;
1060 *dest |= (((*src & 0xf0000000) >> 8) | ((*src & 0x0000f000) << 4));
1061 src++;
1062 *dest |= ((*src & 0xf0000000) | ((*src & 0x0000f000) << 12));
1063 src++;
1064 dest++;
1065 }
1066#else
1067 int siz = (width * height) >> 2;
1068
1069 __asm {
1070 push ebx;
1071 push esi;
1072 push edi;
1073
1074 mov esi, dword ptr [src];
1075 mov edi, dword ptr [dest];
1076 mov ecx, dword ptr [siz];
1077
1078 tc1_loop:
1079 mov eax, dword ptr [esi];
1080 add esi, 4;
1081
1082 mov edx, eax; // use A and G comps MSB
1083 and edx, 0xF0000000; // aaaa0000 00000000 00000000 00000000
1084 mov ebx, eax;
1085 shr edx, 24; // 00000000 00000000 00000000 aaaa0000
1086 and ebx, 0x0000F000; // 00000000 00000000 iiii0000 00000000
1087 shr ebx, 12; // 00000000 00000000 00000000 0000iiii
1088 or edx, ebx; // 00000000 00000000 00000000 aaaaiiii
1089
1090 mov eax, dword ptr [esi];
1091 add esi, 4;
1092
1093 mov ebx, eax;
1094 and eax, 0xF0000000; // aaaa0000 00000000 00000000 00000000
1095 shr eax, 16; // 00000000 00000000 aaaa0000 00000000
1096 and ebx, 0x0000F000; // 00000000 00000000 iiii0000 00000000
1097 shr ebx, 4; // 00000000 00000000 0000iiii 00000000
1098 or eax, ebx; // 00000000 00000000 aaaaiiii 00000000
1099 or edx, eax; // 00000000 00000000 aaaaiiii aaaaiiii
1100
1101 mov eax, dword ptr [esi];
1102 add esi, 4;
1103
1104 mov ebx, eax;
1105 and eax, 0xF0000000; // aaaa0000 00000000 00000000 00000000
1106 shr eax, 8; // 00000000 aaaa0000 00000000 00000000
1107 and ebx, 0x0000F000; // 00000000 00000000 iiii0000 00000000
1108 shl ebx, 4; // 00000000 0000iiii 00000000 00000000
1109 or eax, ebx; // 00000000 aaaaiiii 00000000 00000000
1110 or edx, eax; // 00000000 aaaaiiii aaaaiiii aaaaiiii
1111
1112 mov eax, dword ptr [esi];
1113 add esi, 4;
1114
1115 mov ebx, eax;
1116 and eax, 0xF0000000; // aaaa0000 00000000 00000000 00000000
1117 and ebx, 0x0000F000; // 00000000 00000000 iiii0000 00000000
1118 shl ebx, 12; // 0000iiii 00000000 00000000 00000000
1119 or eax, ebx; // aaaaiiii 00000000 00000000 00000000
1120 or edx, eax; // aaaaiiii aaaaiiii aaaaiiii aaaaiiii
1121
1122 mov dword ptr [edi], edx;
1123 add edi, 4;
1124
1125 dec ecx;
1126 jnz tc1_loop;
1127
1128 pop edi;
1129 pop esi;
1130 pop ebx;
1131 }
1132#endif
1133}
1134
1135void
1136TxQuantize::ARGB8888_AI88(uint32* src, uint32* dest, int width, int height)
1137{
1138#if 1
1139 int siz = (width * height) >> 1;
1140 int i;
1141 for (i = 0; i < siz; i++) {
1142 *dest = (((*src & 0xff000000) >> 16) | ((*src & 0x0000ff00) >> 8));
1143 src++;
1144 *dest |= ((*src & 0xff000000) | ((*src & 0x0000ff00) << 8));
1145 src++;
1146 dest++;
1147 }
1148#else
1149 int siz = (width * height) >> 1;
1150
1151 __asm {
1152 push ebx;
1153 push esi;
1154 push edi;
1155
1156 mov esi, dword ptr [src];
1157 mov edi, dword ptr [dest];
1158 mov ecx, dword ptr [siz];
1159
1160 tc1_loop:
1161 mov eax, dword ptr [esi];
1162 add esi, 4;
1163
1164 mov edx, eax;
1165 and edx, 0xFF000000; // aaaaaaaa 00000000 00000000 00000000
1166 mov ebx, eax;
1167 shr edx, 16; // 00000000 00000000 aaaaaaaa 00000000
1168 and ebx, 0x0000FF00; // 00000000 00000000 iiiiiiii 00000000
1169 shr ebx, 8; // 00000000 00000000 00000000 iiiiiiii
1170 or edx, ebx; // 00000000 00000000 aaaaaaaa iiiiiiii
1171
1172 mov eax, dword ptr [esi];
1173 add esi, 4;
1174
1175 mov ebx, eax;
1176 and eax, 0xFF000000; // aaaaaaaa 00000000 00000000 00000000
1177 and ebx, 0x0000FF00; // 00000000 00000000 iiiiiiii 00000000
1178 shl ebx, 8; // 00000000 iiiiiiii 00000000 00000000
1179 or eax, ebx; // aaaaaaaa iiiiiiii 00000000 00000000
1180 or edx, eax; // aaaaaaaa iiiiiiii aaaaaaaa iiiiiiii
1181
1182 mov dword ptr [edi], edx;
1183 add edi, 4;
1184
1185 dec ecx;
1186 jnz tc1_loop;
1187
1188 pop edi;
1189 pop esi;
1190 pop ebx;
1191 }
1192#endif
1193}
1194
1195/* R.W. Floyd and L. Steinberg, An adaptive algorithm
1196 * for spatial grey scale, Proceedings of the Society
1197 * of Information Display 17, pp75-77, 1976
1198 */
1199void
1200TxQuantize::ARGB8888_RGB565_ErrD(uint32* src, uint32* dst, int width, int height)
1201{
1202 /* Floyd-Steinberg error-diffusion halftoning */
1203
1204 int i, x, y;
1205 int qr, qg, qb; /* quantized incoming values */
1206 int ir, ig, ib; /* incoming values */
1207 int t;
1208 int *errR = new int[width];
1209 int *errG = new int[width];
1210 int *errB = new int[width];
1211
1212 uint16 *dest = (uint16 *)dst;
1213
1214 for (i = 0; i < width; i++) errR[i] = errG[i] = errB[i] = 0;
1215
1216 for (y = 0; y < height; y++) {
1217 qr = qg = qb = 0;
1218 for (x = 0; x < width; x++) {
1219 /* incoming pixel values */
1220 ir = ((*src >> 16) & 0xFF) * 10000;
1221 ig = ((*src >> 8) & 0xFF) * 10000;
1222 ib = ((*src ) & 0xFF) * 10000;
1223
1224 /* quantize pixel values.
1225 * qr * 0.4375 is the error from the pixel to the left,
1226 * errR is the error from the pixel to the top, top left, and top right */
1227 /* qr * 0.4375 is the error distribution to the EAST in
1228 * the previous loop */
1229 ir += errR[x] + qr * 4375 / 10000;
1230 ig += errG[x] + qg * 4375 / 10000;
1231 ib += errB[x] + qb * 4375 / 10000;
1232
1233 /* error distribution to the SOUTH-EAST in the previous loop
1234 * can't calculate in the previous loop because it steps on
1235 * the above quantization */
1236 errR[x] = qr * 625 / 10000;
1237 errG[x] = qg * 625 / 10000;
1238 errB[x] = qb * 625 / 10000;
1239
1240 qr = ir;
1241 qg = ig;
1242 qb = ib;
1243
1244 /* clamp */
1245 if (qr < 0) qr = 0; else if (qr > 2550000) qr = 2550000;
1246 if (qg < 0) qg = 0; else if (qg > 2550000) qg = 2550000;
1247 if (qb < 0) qb = 0; else if (qb > 2550000) qb = 2550000;
1248
1249 /* convert to RGB565 */
1250 qr = qr * 0x1F / 2550000;
1251 qg = qg * 0x3F / 2550000;
1252 qb = qb * 0x1F / 2550000;
1253
1254 /* this is the dithered pixel */
1255 t = (qr << 11) | (qg << 5) | qb;
1256
1257 /* compute the errors */
1258 qr = ((qr << 3) | (qr >> 2)) * 10000;
1259 qg = ((qg << 2) | (qg >> 4)) * 10000;
1260 qb = ((qb << 3) | (qb >> 2)) * 10000;
1261 qr = ir - qr;
1262 qg = ig - qg;
1263 qb = ib - qb;
1264
1265 /* compute the error distributions */
1266 /* Floyd-Steinberg filter
1267 * 7/16 (=0.4375) to the EAST
1268 * 5/16 (=0.3125) to the SOUTH
1269 * 1/16 (=0.0625) to the SOUTH-EAST
1270 * 3/16 (=0.1875) to the SOUTH-WEST
1271 *
1272 * x 7/16
1273 * 3/16 5/16 1/16
1274 */
1275 /* SOUTH-WEST */
1276 if (x > 1) {
1277 errR[x - 1] += qr * 1875 / 10000;
1278 errG[x - 1] += qg * 1875 / 10000;
1279 errB[x - 1] += qb * 1875 / 10000;
1280 }
1281
1282 /* SOUTH */
1283 errR[x] += qr * 3125 / 10000;
1284 errG[x] += qg * 3125 / 10000;
1285 errB[x] += qb * 3125 / 10000;
1286
1287 *dest = (t & 0xFFFF);
1288
1289 dest++;
1290 src++;
1291 }
1292 }
1293
1294 delete [] errR;
1295 delete [] errG;
1296 delete [] errB;
1297}
1298
1299
1300void
1301TxQuantize::ARGB8888_ARGB1555_ErrD(uint32* src, uint32* dst, int width, int height)
1302{
1303 /* Floyd-Steinberg error-diffusion halftoning */
1304
1305 int i, x, y;
1306 int qr, qg, qb; /* quantized incoming values */
1307 int ir, ig, ib; /* incoming values */
1308 int t;
1309 int *errR = new int[width];
1310 int *errG = new int[width];
1311 int *errB = new int[width];
1312
1313 uint16 *dest = (uint16 *)dst;
1314
1315 for (i = 0; i < width; i++) errR[i] = errG[i] = errB[i] = 0;
1316
1317 for (y = 0; y < height; y++) {
1318 qr = qg = qb = 0;
1319 for (x = 0; x < width; x++) {
1320 /* incoming pixel values */
1321 ir = ((*src >> 16) & 0xFF) * 10000;
1322 ig = ((*src >> 8) & 0xFF) * 10000;
1323 ib = ((*src ) & 0xFF) * 10000;
1324
1325 /* quantize pixel values.
1326 * qr * 0.4375 is the error from the pixel to the left,
1327 * errR is the error from the pixel to the top, top left, and top right */
1328 /* qr * 0.4375 is the error distribution to the EAST in
1329 * the previous loop */
1330 ir += errR[x] + qr * 4375 / 10000;
1331 ig += errG[x] + qg * 4375 / 10000;
1332 ib += errB[x] + qb * 4375 / 10000;
1333
1334 /* error distribution to the SOUTH-EAST of the previous loop.
1335 * cannot calculate in the previous loop because it steps on
1336 * the above quantization */
1337 errR[x] = qr * 625 / 10000;
1338 errG[x] = qg * 625 / 10000;
1339 errB[x] = qb * 625 / 10000;
1340
1341 qr = ir;
1342 qg = ig;
1343 qb = ib;
1344
1345 /* clamp */
1346 if (qr < 0) qr = 0; else if (qr > 2550000) qr = 2550000;
1347 if (qg < 0) qg = 0; else if (qg > 2550000) qg = 2550000;
1348 if (qb < 0) qb = 0; else if (qb > 2550000) qb = 2550000;
1349
1350 /* convert to RGB555 */
1351 qr = qr * 0x1F / 2550000;
1352 qg = qg * 0x1F / 2550000;
1353 qb = qb * 0x1F / 2550000;
1354
1355 /* this is the dithered pixel */
1356 t = (qr << 10) | (qg << 5) | qb;
1357 t |= ((*src >> 24) ? 0x8000 : 0);
1358
1359 /* compute the errors */
1360 qr = ((qr << 3) | (qr >> 2)) * 10000;
1361 qg = ((qg << 3) | (qg >> 2)) * 10000;
1362 qb = ((qb << 3) | (qb >> 2)) * 10000;
1363 qr = ir - qr;
1364 qg = ig - qg;
1365 qb = ib - qb;
1366
1367 /* compute the error distributions */
1368 /* Floyd-Steinberg filter
1369 * 7/16 (=0.4375) to the EAST
1370 * 5/16 (=0.3125) to the SOUTH
1371 * 1/16 (=0.0625) to the SOUTH-EAST
1372 * 3/16 (=0.1875) to the SOUTH-WEST
1373 *
1374 * x 7/16
1375 * 3/16 5/16 1/16
1376 */
1377 /* SOUTH-WEST */
1378 if (x > 1) {
1379 errR[x - 1] += qr * 1875 / 10000;
1380 errG[x - 1] += qg * 1875 / 10000;
1381 errB[x - 1] += qb * 1875 / 10000;
1382 }
1383
1384 /* SOUTH */
1385 errR[x] += qr * 3125 / 10000;
1386 errG[x] += qg * 3125 / 10000;
1387 errB[x] += qb * 3125 / 10000;
1388
1389 *dest = (t & 0xFFFF);
1390
1391 dest++;
1392 src++;
1393 }
1394 }
1395
1396 delete [] errR;
1397 delete [] errG;
1398 delete [] errB;
1399}
1400
1401void
1402TxQuantize::ARGB8888_ARGB4444_ErrD(uint32* src, uint32* dst, int width, int height)
1403{
1404 /* Floyd-Steinberg error-diffusion halftoning */
1405
1406 /* NOTE: alpha dithering looks better for alpha gradients, but are prone
1407 * to producing noisy speckles for constant or step level alpha. Output
1408 * results should always be checked.
1409 */
1410 boolean ditherAlpha = 0;
1411
1412 int i, x, y;
1413 int qr, qg, qb, qa; /* quantized incoming values */
1414 int ir, ig, ib, ia; /* incoming values */
1415 int t;
1416 int *errR = new int[width];
1417 int *errG = new int[width];
1418 int *errB = new int[width];
1419 int *errA = new int[width];
1420
1421 uint16 *dest = (uint16 *)dst;
1422
1423 for (i = 0; i < width; i++) errR[i] = errG[i] = errB[i] = errA[i] = 0;
1424
1425 for (y = 0; y < height; y++) {
1426 qr = qg = qb = qa = 0;
1427 for (x = 0; x < width; x++) {
1428 /* incoming pixel values */
1429 ir = ((*src >> 16) & 0xFF) * 10000;
1430 ig = ((*src >> 8) & 0xFF) * 10000;
1431 ib = ((*src ) & 0xFF) * 10000;
1432 ia = ((*src >> 24) & 0xFF) * 10000;
1433
1434 /* quantize pixel values.
1435 * qr * 0.4375 is the error from the pixel to the left,
1436 * errR is the error from the pixel to the top, top left, and top right */
1437 /* qr * 0.4375 is the error distribution to the EAST in
1438 * the previous loop */
1439 ir += errR[x] + qr * 4375 / 10000;
1440 ig += errG[x] + qg * 4375 / 10000;
1441 ib += errB[x] + qb * 4375 / 10000;
1442 ia += errA[x] + qa * 4375 / 10000;
1443
1444 /* error distribution to the SOUTH-EAST of the previous loop.
1445 * cannot calculate in the previous loop because it steps on
1446 * the above quantization */
1447 errR[x] = qr * 625 / 10000;
1448 errG[x] = qg * 625 / 10000;
1449 errB[x] = qb * 625 / 10000;
1450 errA[x] = qa * 625 / 10000;
1451
1452 qr = ir;
1453 qg = ig;
1454 qb = ib;
1455 qa = ia;
1456
1457 /* clamp */
1458 if (qr < 0) qr = 0; else if (qr > 2550000) qr = 2550000;
1459 if (qg < 0) qg = 0; else if (qg > 2550000) qg = 2550000;
1460 if (qb < 0) qb = 0; else if (qb > 2550000) qb = 2550000;
1461 if (qa < 0) qa = 0; else if (qa > 2550000) qa = 2550000;
1462
1463 /* convert to RGB444 */
1464 qr = qr * 0xF / 2550000;
1465 qg = qg * 0xF / 2550000;
1466 qb = qb * 0xF / 2550000;
1467 qa = qa * 0xF / 2550000;
1468
1469 /* this is the value to be returned */
1470 if (ditherAlpha) {
1471 t = (qa << 12) | (qr << 8) | (qg << 4) | qb;
1472 } else {
1473 t = (qr << 8) | (qg << 4) | qb;
1474 t |= (*src >> 16) & 0xF000;
1475 }
1476
1477 /* compute the errors */
1478 qr = ((qr << 4) | qr) * 10000;
1479 qg = ((qg << 4) | qg) * 10000;
1480 qb = ((qb << 4) | qb) * 10000;
1481 qa = ((qa << 4) | qa) * 10000;
1482 qr = ir - qr;
1483 qg = ig - qg;
1484 qb = ib - qb;
1485 qa = ia - qa;
1486
1487 /* compute the error distributions */
1488 /* Floyd-Steinberg filter
1489 * 7/16 (=0.4375) to the EAST
1490 * 5/16 (=0.3125) to the SOUTH
1491 * 1/16 (=0.0625) to the SOUTH-EAST
1492 * 3/16 (=0.1875) to the SOUTH-WEST
1493 *
1494 * x 7/16
1495 * 3/16 5/16 1/16
1496 */
1497 /* SOUTH-WEST */
1498 if (x > 1) {
1499 errR[x - 1] += qr * 1875 / 10000;
1500 errG[x - 1] += qg * 1875 / 10000;
1501 errB[x - 1] += qb * 1875 / 10000;
1502 errA[x - 1] += qa * 1875 / 10000;
1503 }
1504
1505 /* SOUTH */
1506 errR[x] += qr * 3125 / 10000;
1507 errG[x] += qg * 3125 / 10000;
1508 errB[x] += qb * 3125 / 10000;
1509 errA[x] += qa * 3125 / 10000;
1510
1511 *dest = (t & 0xFFFF);
1512
1513 dest++;
1514 src++;
1515 }
1516 }
1517
1518 delete [] errR;
1519 delete [] errG;
1520 delete [] errB;
1521 delete [] errA;
1522}
1523
1524void
1525TxQuantize::ARGB8888_AI44_ErrD(uint32* src, uint32* dst, int width, int height)
1526{
1527 /* Floyd-Steinberg error-diffusion halftoning */
1528
1529 /* NOTE: alpha dithering looks better for alpha gradients, but are prone
1530 * to producing noisy speckles for constant or step level alpha. Output
1531 * results should always be checked.
1532 */
1533 boolean ditherAlpha = 0;
1534
1535 int i, x, y;
1536 int qi, qa; /* quantized incoming values */
1537 int ii, ia; /* incoming values */
1538 int t;
1539 int *errI = new int[width];
1540 int *errA = new int[width];
1541
1542 uint8 *dest = (uint8 *)dst;
1543
1544 for (i = 0; i < width; i++) errI[i] = errA[i] = 0;
1545
1546 for (y = 0; y < height; y++) {
1547 qi = qa = 0;
1548 for (x = 0; x < width; x++) {
1549 /* 3dfx style Intensity = R * 0.299 + G * 0.587 + B * 0.114 */
1550 ii = ((*src >> 16) & 0xFF) * 2990 +
1551 ((*src >> 8) & 0xFF) * 5870 +
1552 ((*src ) & 0xFF) * 1140;
1553 ia = ((*src >> 24) & 0xFF) * 10000;
1554
1555 /* quantize pixel values.
1556 * qi * 0.4375 is the error from the pixel to the left,
1557 * errI is the error from the pixel to the top, top left, and top right */
1558 /* qi * 0.4375 is the error distrtibution to the EAST in
1559 * the previous loop */
1560 ii += errI[x] + qi * 4375 / 10000;
1561 ia += errA[x] + qa * 4375 / 10000;
1562
1563 /* error distribution to the SOUTH-EAST in the previous loop.
1564 * cannot calculate in the previous loop because it steps on
1565 * the above quantization */
1566 errI[x] = qi * 625 / 10000;
1567 errA[x] = qa * 625 / 10000;
1568
1569 qi = ii;
1570 qa = ia;
1571
1572 /* clamp */
1573 if (qi < 0) qi = 0; else if (qi > 2550000) qi = 2550000;
1574 if (qa < 0) qa = 0; else if (qa > 2550000) qa = 2550000;
1575
1576 /* convert to I4 */
1577 qi = qi * 0xF / 2550000;
1578 qa = qa * 0xF / 2550000;
1579
1580 /* this is the value to be returned */
1581 if (ditherAlpha) {
1582 t = (qa << 4) | qi;
1583 } else {
1584 t = qi;
1585 t |= ((*src >> 24) & 0xF0);
1586 }
1587
1588 /* compute the errors */
1589 qi = ((qi << 4) | qi) * 10000;
1590 qa = ((qa << 4) | qa) * 10000;
1591 qi = ii - qi;
1592 qa = ia - qa;
1593
1594 /* compute the error distributions */
1595 /* Floyd-Steinberg filter
1596 * 7/16 (=0.4375) to the EAST
1597 * 5/16 (=0.3125) to the SOUTH
1598 * 1/16 (=0.0625) to the SOUTH-EAST
1599 * 3/16 (=0.1875) to the SOUTH-WEST
1600 *
1601 * x 7/16
1602 * 3/16 5/16 1/16
1603 */
1604 /* SOUTH-WEST */
1605 if (x > 1) {
1606 errI[x - 1] += qi * 1875 / 10000;
1607 errA[x - 1] += qa * 1875 / 10000;
1608 }
1609
1610 /* SOUTH */
1611 errI[x] += qi * 3125 / 10000;
1612 errA[x] += qa * 3125 / 10000;
1613
1614 *dest = t & 0xFF;
1615
1616 dest++;
1617 src++;
1618 }
1619 }
1620
1621 delete [] errI;
1622 delete [] errA;
1623}
1624
1625void
1626TxQuantize::ARGB8888_AI88_Slow(uint32* src, uint32* dst, int width, int height)
1627{
1628 int x, y;
1629 uint16 *dest = (uint16 *)dst;
1630 for (y = 0; y < height; y++) {
1631 for (x = 0; x < width; x++) {
1632#if 1
1633 /* libpng style grayscale conversion.
1634 * Reduce RGB files to grayscale with or without alpha
1635 * using the equation given in Poynton's ColorFAQ at
1636 * <http://www.inforamp.net/~poynton/>
1637 * Copyright (c) 1998-01-04 Charles Poynton poynton at inforamp.net
1638 *
1639 * Y = 0.212671 * R + 0.715160 * G + 0.072169 * B
1640 *
1641 * We approximate this with
1642 *
1643 * Y = 0.21268 * R + 0.7151 * G + 0.07217 * B
1644 *
1645 * which can be expressed with integers as
1646 *
1647 * Y = (6969 * R + 23434 * G + 2365 * B)/32768
1648 *
1649 * The calculation is to be done in a linear colorspace.
1650 */
1651 *dest = (((int)((((*src >> 16) & 0xFF) * 6969 +
1652 ((*src >> 8) & 0xFF) * 23434 +
1653 ((*src ) & 0xFF) * 2365) / 32768) & 0xFF) |
1654 (uint16)((*src >> 16) & 0xFF00));
1655#else
1656 /* 3dfx style Intensity = R * 0.299 + G * 0.587 + B * 0.114
1657 * this is same as the standard NTSC gray scale conversion. */
1658 *dest = (((int)((((*src >> 16) & 0xFF) * 299 +
1659 ((*src >> 8) & 0xFF) * 587 +
1660 ((*src ) & 0xFF) * 114) / 1000) & 0xFF) |
1661 (uint16)((*src >> 16) & 0xFF00));
1662#endif
1663 dest++;
1664 src++;
1665 }
1666 }
1667}
1668
1669void
1670TxQuantize::ARGB8888_I8_Slow(uint32* src, uint32* dst, int width, int height)
1671{
1672 int x, y;
1673 uint8 *dest = (uint8 *)dst;
1674 for (y = 0; y < height; y++) {
1675 for (x = 0; x < width; x++) {
1676#if 1
1677 /* libpng style Intensity = (6969 * R + 23434 * G + 2365 * B)/32768 */
1678 *dest = (int)((((*src >> 16) & 0xFF) * 6969 +
1679 ((*src >> 8) & 0xFF) * 23434 +
1680 ((*src ) & 0xFF) * 2365) / 32768) & 0xFF;
1681#else
1682 /* 3dfx style Intensity = R * 0.299 + G * 0.587 + B * 0.114
1683 * this is same as the standard NTSC gray scale conversion. */
1684 *dest = (int)((((*src >>16) & 0xFF) * 299 +
1685 ((*src >> 8) & 0xFF) * 587 +
1686 ((*src ) & 0xFF) * 114) / 1000) & 0xFF;
1687#endif
1688 dest++;
1689 src++;
1690 }
1691 }
1692}
1693
1694void
1695TxQuantize::P8_16BPP(uint32* src, uint32* dest, int width, int height, uint32* palette)
1696{
1697 /* passed in palette is RGBA5551 format */
1698#if 1
1699 int i;
1700 int size = width * height;
1701 for (i = 0; i < size; i++) {
1702 ((uint16*)dest)[i] = ((uint16*)palette)[(int)(((uint8*)src)[i])];
1703 ((uint16*)dest)[i] = ((((uint16*)dest)[i] << 15) | (((uint16*)dest)[i] >> 1));
1704 }
1705#else
1706
1707 /* not finished yet... */
1708
1709 int siz = (width * height) >> 2;
1710
1711 __asm {
1712 push ebx;
1713 push esi;
1714 push edi;
1715
1716 mov esi, dword ptr [src];
1717 mov edi, dword ptr [dest];
1718 mov ecx, dword ptr [siz];
1719 mov edx, dword ptr [palette];
1720
1721 tc1_loop:
1722 mov eax, dword ptr [esi];
1723 add esi, 4;
1724
1725 dec ecx;
1726 jnz tc1_loop;
1727
1728 pop edi;
1729 pop esi;
1730 pop ebx;
1731 }
1732#endif
1733}
1734
1735boolean
1736TxQuantize::quantize(uint8* src, uint8* dest, int width, int height, uint16 srcformat, uint16 destformat, boolean fastQuantizer)
1737{
1738 typedef void (TxQuantize::*quantizerFunc)(uint32* src, uint32* dest, int width, int height);
1739 quantizerFunc quantizer;
1740 int bpp_shift = 0;
1741
1742 if (destformat == GR_TEXFMT_ARGB_8888) {
1743 switch (srcformat) {
1744 case GR_TEXFMT_ARGB_1555:
1745 quantizer = &TxQuantize::ARGB1555_ARGB8888;
1746 bpp_shift = 1;
1747 break;
1748 case GR_TEXFMT_ARGB_4444:
1749 quantizer = &TxQuantize::ARGB4444_ARGB8888;
1750 bpp_shift = 1;
1751 break;
1752 case GR_TEXFMT_RGB_565:
1753 quantizer = &TxQuantize::RGB565_ARGB8888;
1754 bpp_shift = 1;
1755 break;
1756 case GR_TEXFMT_ALPHA_8:
1757 quantizer = &TxQuantize::A8_ARGB8888;
1758 bpp_shift = 2;
1759 break;
1760 case GR_TEXFMT_ALPHA_INTENSITY_44:
1761 quantizer = &TxQuantize::AI44_ARGB8888;
1762 bpp_shift = 2;
1763 break;
1764 case GR_TEXFMT_ALPHA_INTENSITY_88:
1765 quantizer = &TxQuantize::AI88_ARGB8888;
1766 bpp_shift = 1;
1767 break;
1768 default:
1769 return 0;
1770 }
1771
1772#if !defined(NO_FILTER_THREAD)
1773 unsigned int numcore = _numcore;
1774 unsigned int blkrow = 0;
1775 while (numcore > 1 && blkrow == 0) {
1776 blkrow = (height >> 2) / numcore;
1777 numcore--;
1778 }
1779 if (blkrow > 0 && numcore > 1) {
1780 std::thread *thrd[MAX_NUMCORE];
1781 unsigned int i;
1782 int blkheight = blkrow << 2;
1783 unsigned int srcStride = (width * blkheight) << (2 - bpp_shift);
1784 unsigned int destStride = srcStride << bpp_shift;
1785 for (i = 0; i < numcore - 1; i++) {
1786 thrd[i] = new std::thread(std::bind(quantizer,
1787 this,
1788 (uint32*)src,
1789 (uint32*)dest,
1790 width,
1791 blkheight));
1792 src += srcStride;
1793 dest += destStride;
1794 }
1795 thrd[i] = new std::thread(std::bind(quantizer,
1796 this,
1797 (uint32*)src,
1798 (uint32*)dest,
1799 width,
1800 height - blkheight * i));
1801 for (i = 0; i < numcore; i++) {
1802 thrd[i]->join();
1803 delete thrd[i];
1804 }
1805 } else {
1806 (*this.*quantizer)((uint32*)src, (uint32*)dest, width, height);
1807 }
1808#else
1809 (*this.*quantizer)((uint32*)src, (uint32*)dest, width, height);
1810#endif
1811
1812 } else if (srcformat == GR_TEXFMT_ARGB_8888) {
1813 switch (destformat) {
1814 case GR_TEXFMT_ARGB_1555:
1815 quantizer = fastQuantizer ? &TxQuantize::ARGB8888_ARGB1555 : &TxQuantize::ARGB8888_ARGB1555_ErrD;
1816 bpp_shift = 1;
1817 break;
1818 case GR_TEXFMT_ARGB_4444:
1819 quantizer = fastQuantizer ? &TxQuantize::ARGB8888_ARGB4444 : &TxQuantize::ARGB8888_ARGB4444_ErrD;
1820 bpp_shift = 1;
1821 break;
1822 case GR_TEXFMT_RGB_565:
1823 quantizer = fastQuantizer ? &TxQuantize::ARGB8888_RGB565 : &TxQuantize::ARGB8888_RGB565_ErrD;
1824 bpp_shift = 1;
1825 break;
1826 case GR_TEXFMT_ALPHA_8:
1827 case GR_TEXFMT_INTENSITY_8:
1828 quantizer = fastQuantizer ? &TxQuantize::ARGB8888_A8 : &TxQuantize::ARGB8888_I8_Slow;
1829 bpp_shift = 2;
1830 break;
1831 case GR_TEXFMT_ALPHA_INTENSITY_44:
1832 quantizer = fastQuantizer ? &TxQuantize::ARGB8888_AI44 : &TxQuantize::ARGB8888_AI44_ErrD;
1833 bpp_shift = 2;
1834 break;
1835 case GR_TEXFMT_ALPHA_INTENSITY_88:
1836 quantizer = fastQuantizer ? &TxQuantize::ARGB8888_AI88 : &TxQuantize::ARGB8888_AI88_Slow;
1837 bpp_shift = 1;
1838 break;
1839 default:
1840 return 0;
1841 }
1842
1843#if !defined(NO_FILTER_THREAD)
1844 unsigned int numcore = _numcore;
1845 unsigned int blkrow = 0;
1846 while (numcore > 1 && blkrow == 0) {
1847 blkrow = (height >> 2) / numcore;
1848 numcore--;
1849 }
1850 if (blkrow > 0 && numcore > 1) {
1851 std::thread *thrd[MAX_NUMCORE];
1852 unsigned int i;
1853 int blkheight = blkrow << 2;
1854 unsigned int srcStride = (width * blkheight) << 2;
1855 unsigned int destStride = srcStride >> bpp_shift;
1856 for (i = 0; i < numcore - 1; i++) {
1857 thrd[i] = new std::thread(std::bind(quantizer,
1858 this,
1859 (uint32*)src,
1860 (uint32*)dest,
1861 width,
1862 blkheight));
1863 src += srcStride;
1864 dest += destStride;
1865 }
1866 thrd[i] = new std::thread(std::bind(quantizer,
1867 this,
1868 (uint32*)src,
1869 (uint32*)dest,
1870 width,
1871 height - blkheight * i));
1872 for (i = 0; i < numcore; i++) {
1873 thrd[i]->join();
1874 delete thrd[i];
1875 }
1876 } else {
1877 (*this.*quantizer)((uint32*)src, (uint32*)dest, width, height);
1878 }
1879#else
1880 (*this.*quantizer)((uint32*)src, (uint32*)dest, width, height);
1881#endif
1882
1883 } else {
1884 return 0;
1885 }
1886
1887 return 1;
1888}
1889
1890boolean
1891TxQuantize::FXT1(uint8 *src, uint8 *dest,
1892 int srcwidth, int srcheight, uint16 srcformat,
1893 int *destwidth, int *destheight, uint16 *destformat)
1894{
1895 /*
1896 * NOTE: src must be in ARGB8888 format, srcformat describes
1897 * the closest 16bbp representation of src.
1898 *
1899 * NOTE: I have modified the dxtn library to use ARGB format
1900 * which originaly was ABGR format.
1901 */
1902
1903 boolean bRet = 0;
1904
1905 if (_tx_compress_fxt1 &&
1906 srcwidth >= 8 && srcheight >= 4) {
1907 /* compress to fxt1
1908 * width and height must be larger than 8 and 4 respectively
1909 */
1910 int dstRowStride = ((srcwidth + 7) & ~7) << 1;
1911 int srcRowStride = (srcwidth << 2);
1912
1913#if !defined(NO_FILTER_THREAD)
1914 unsigned int numcore = _numcore;
1915 unsigned int blkrow = 0;
1916 while (numcore > 1 && blkrow == 0) {
1917 blkrow = (srcheight >> 2) / numcore;
1918 numcore--;
1919 }
1920 if (blkrow > 0 && numcore > 1) {
1921 std::thread *thrd[MAX_NUMCORE];
1922 unsigned int i;
1923 int blkheight = blkrow << 2;
1924 unsigned int srcStride = (srcwidth * blkheight) << 2;
1925 unsigned int destStride = dstRowStride * blkrow;
1926 for (i = 0; i < numcore - 1; i++) {
1927 thrd[i] = new std::thread(std::bind(_tx_compress_fxt1,
1928 srcwidth,
1929 blkheight,
1930 4,
1931 src,
1932 srcRowStride,
1933 dest,
1934 dstRowStride));
1935 src += srcStride;
1936 dest += destStride;
1937 }
1938 thrd[i] = new std::thread(std::bind(_tx_compress_fxt1,
1939 srcwidth,
1940 srcheight - blkheight * i,
1941 4,
1942 src,
1943 srcRowStride,
1944 dest,
1945 dstRowStride));
1946 for (i = 0; i < numcore; i++) {
1947 thrd[i]->join();
1948 delete thrd[i];
1949 }
1950 } else {
1951 (*_tx_compress_fxt1)(srcwidth, /* width */
1952 srcheight, /* height */
1953 4, /* comps: ARGB8888=4, RGB888=3 */
1954 src, /* source */
1955 srcRowStride, /* width*comps */
1956 dest, /* destination */
1957 dstRowStride); /* 16 bytes per 8x4 texel */
1958 }
1959#else
1960 (*_tx_compress_fxt1)(srcwidth, /* width */
1961 srcheight, /* height */
1962 4, /* comps: ARGB8888=4, RGB888=3 */
1963 src, /* source */
1964 srcRowStride, /* width*comps */
1965 dest, /* destination */
1966 dstRowStride); /* 16 bytes per 8x4 texel */
1967#endif
1968
1969 /* dxtn adjusts width and height to M8 and M4 respectively by replication */
1970 *destwidth = (srcwidth + 7) & ~7;
1971 *destheight = (srcheight + 3) & ~3;
1972 *destformat = GR_TEXFMT_ARGB_CMP_FXT1;
1973
1974 bRet = 1;
1975 }
1976
1977 return bRet;
1978}
1979
1980boolean
1981TxQuantize::DXTn(uint8 *src, uint8 *dest,
1982 int srcwidth, int srcheight, uint16 srcformat,
1983 int *destwidth, int *destheight, uint16 *destformat)
1984{
1985 /*
1986 * NOTE: src must be in ARGB8888 format, srcformat describes
1987 * the closest 16bbp representation of src.
1988 *
1989 * NOTE: I have modified the dxtn library to use ARGB format
1990 * which originaly was ABGR format.
1991 */
1992
1993 boolean bRet = 0;
1994
2d262872 1995 if (_tx_compress_dxtn_rgba &&
98e75f2d 1996 srcwidth >= 4 && srcheight >= 4) {
1997 /* compress to dxtn
1998 * width and height must be larger than 4
1999 */
2000
2001 /* skip formats that DXTn won't help in size. */
2002 if (srcformat == GR_TEXFMT_ALPHA_8 ||
2003 srcformat == GR_TEXFMT_ALPHA_INTENSITY_44) {
2004 ; /* shutup compiler */
2005 } else {
2006 int dstRowStride = ((srcwidth + 3) & ~3) << 2;
2007 int compression = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
2008
2009 *destformat = GR_TEXFMT_ARGB_CMP_DXT5;
2010
2011#if !GLIDE64_DXTN
2012 /* okay... we are going to disable DXT1 with 1bit alpha
2013 * for Glide64. some textures have all 0 alpha values.
2014 * see "N64 Kobe Bryant in NBA Courtside"
2015 */
2016 if (srcformat == GR_TEXFMT_ARGB_1555) {
2017 dstRowStride >>= 1;
2018 compression = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
2019 *destformat = GR_TEXFMT_ARGB_CMP_DXT1;
2020 } else
2021#endif
2022 if (srcformat == GR_TEXFMT_RGB_565 ||
2023 srcformat == GR_TEXFMT_INTENSITY_8) {
2024 dstRowStride >>= 1;
2025 compression = GL_COMPRESSED_RGB_S3TC_DXT1_EXT;
2026 *destformat = GR_TEXFMT_ARGB_CMP_DXT1;
2027 }
2028
2029#if !defined(NO_FILTER_THREAD)
2030 unsigned int numcore = _numcore;
2031 unsigned int blkrow = 0;
2032 while (numcore > 1 && blkrow == 0) {
2033 blkrow = (srcheight >> 2) / numcore;
2034 numcore--;
2035 }
2036 if (blkrow > 0 && numcore > 1) {
2037 std::thread *thrd[MAX_NUMCORE];
2038 unsigned int i;
2039 int blkheight = blkrow << 2;
2040 unsigned int srcStride = (srcwidth * blkheight) << 2;
2041 unsigned int destStride = dstRowStride * blkrow;
2042 for (i = 0; i < numcore - 1; i++) {
2d262872 2043 thrd[i] = new std::thread(std::bind(_tx_compress_dxtn_rgba,
98e75f2d 2044 4,
2045 srcwidth,
2046 blkheight,
2047 src,
2048 compression,
2049 dest,
2050 dstRowStride));
2051 src += srcStride;
2052 dest += destStride;
2053 }
2d262872 2054 thrd[i] = new std::thread(std::bind(_tx_compress_dxtn_rgba,
98e75f2d 2055 4,
2056 srcwidth,
2057 srcheight - blkheight * i,
2058 src,
2059 compression,
2060 dest,
2061 dstRowStride));
2062 for (i = 0; i < numcore; i++) {
2063 thrd[i]->join();
2064 delete thrd[i];
2065 }
2066 } else {
2d262872 2067 (*_tx_compress_dxtn_rgba)(4, /* comps: ARGB8888=4, RGB888=3 */
98e75f2d 2068 srcwidth, /* width */
2069 srcheight, /* height */
2070 src, /* source */
2071 compression, /* format */
2072 dest, /* destination */
2073 dstRowStride); /* DXT1 = 8 bytes per 4x4 texel
2074 * others = 16 bytes per 4x4 texel */
2075 }
2076#else
2d262872 2077 (*_tx_compress_dxtn_rgba)(4, /* comps: ARGB8888=4, RGB888=3 */
98e75f2d 2078 srcwidth, /* width */
2079 srcheight, /* height */
2080 src, /* source */
2081 compression, /* format */
2082 dest, /* destination */
2083 dstRowStride); /* DXT1 = 8 bytes per 4x4 texel
2084 * others = 16 bytes per 4x4 texel */
2085#endif
2086
2087 /* dxtn adjusts width and height to M4 by replication */
2088 *destwidth = (srcwidth + 3) & ~3;
2089 *destheight = (srcheight + 3) & ~3;
2090
2091 bRet = 1;
2092 }
2093 }
2094
2095 return bRet;
2096}
2097
2098boolean
2099TxQuantize::compress(uint8 *src, uint8 *dest,
2100 int srcwidth, int srcheight, uint16 srcformat,
2101 int *destwidth, int *destheight, uint16 *destformat,
2102 int compressionType)
2103{
2104 boolean bRet = 0;
2105
2106 switch (compressionType) {
2107 case FXT1_COMPRESSION:
2108 bRet = FXT1(src, dest,
2109 srcwidth, srcheight, srcformat,
2110 destwidth, destheight, destformat);
2111 break;
2112 case S3TC_COMPRESSION:
2113 bRet = DXTn(src, dest,
2114 srcwidth, srcheight, srcformat,
2115 destwidth, destheight, destformat);
2116 break;
2117 case NCC_COMPRESSION:
2118 /* TODO: narrow channel compression */
2119 ;
2120 }
2121
2122 return bRet;
2123}
2124
2125#if 0 /* unused */
2126void
2127TxQuantize::I8_ARGB8888(uint32* src, uint32* dest, int width, int height)
2128{
2129 int siz = (width * height) >> 2;
2130
2131 __asm {
2132 push ebx;
2133 push esi;
2134 push edi;
2135
2136 mov esi, dword ptr [src];
2137 mov edi, dword ptr [dest];
2138 mov ecx, dword ptr [siz];
2139
2140 tc1_loop:
2141 mov eax, dword ptr [esi];
2142 add esi, 4;
2143
2144 // aaaaaaaa
2145 // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa
2146 mov edx, eax;
2147 and eax, 0x000000ff;
2148 mov ebx, eax; // 00000000 00000000 00000000 aaaaaaaa
2149 shl ebx, 8; // 00000000 00000000 aaaaaaaa 00000000
2150 or eax, ebx; // 00000000 00000000 aaaaaaaa aaaaaaaa
2151 shl ebx, 8; // 00000000 aaaaaaaa 00000000 00000000
2152 or eax, ebx; // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
2153 or eax, 0xff000000; // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa
2154
2155 mov dword ptr [edi], eax;
2156 add edi, 4;
2157
2158 mov eax, edx;
2159 and eax, 0x0000ff00;
2160 mov ebx, eax; // 00000000 00000000 aaaaaaaa 00000000
2161 shr ebx, 8; // 00000000 00000000 00000000 aaaaaaaa
2162 or eax, ebx; // 00000000 00000000 aaaaaaaa aaaaaaaa
2163 shl ebx, 16; // 00000000 aaaaaaaa 00000000 00000000
2164 or eax, ebx; // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
2165 or eax, 0xff000000; // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa
2166
2167 mov dword ptr [edi], eax;
2168 add edi, 4;
2169
2170 mov eax, edx;
2171 and eax, 0x00ff0000;
2172 mov ebx, eax; // 00000000 aaaaaaaa 00000000 00000000
2173 shr ebx, 8; // 00000000 00000000 aaaaaaaa 00000000
2174 or eax, ebx; // 00000000 aaaaaaaa aaaaaaaa 00000000
2175 shr ebx, 8; // 00000000 00000000 00000000 aaaaaaaa
2176 or eax, ebx; // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
2177 or eax, 0xff000000; // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa
2178
2179 mov dword ptr [edi], eax;
2180 add edi, 4;
2181
2182 mov eax, edx;
2183 and eax, 0xff000000;
2184 mov ebx, eax; // aaaaaaaa 00000000 00000000 00000000
2185 shr ebx, 8; // 00000000 aaaaaaaa 00000000 00000000
2186 or eax, ebx; // aaaaaaaa aaaaaaaa 00000000 00000000
2187 shr ebx, 8; // 00000000 00000000 aaaaaaaa 00000000
2188 or eax, ebx; // aaaaaaaa aaaaaaaa aaaaaaaa 00000000
2189 shr eax, 8; // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
2190 or eax, 0xff000000; // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa
2191
2192 mov dword ptr [edi], eax;
2193 add edi, 4;
2194
2195 dec ecx;
2196 jnz tc1_loop;
2197
2198 pop edi;
2199 pop esi;
2200 pop ebx;
2201 }
2202}
2203
2204void
2205TxQuantize::ARGB8888_I8(uint32* src, uint32* dest, int width, int height)
2206{
2207 ARGB8888_A8(src, dest, width, height);
2208}
2209
2210void
2211TxQuantize::ARGB1555_ABGR8888(uint32* src, uint32* dest, int width, int height)
2212{
2213 int siz = (width * height) >> 1;
2214
2215 __asm {
2216 push ebx;
2217 push esi;
2218 push edi;
2219
2220 mov esi, dword ptr [src];
2221 mov edi, dword ptr [dest];
2222 mov ecx, dword ptr [siz];
2223
2224 tc1_loop:
2225 mov eax, dword ptr [esi];
2226 add esi, 4;
2227
2228 // arrr rrgg gggb bbbb
2229 // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
2230 mov edx, eax; // edx = arrrrrgg gggbbbbb arrrrrgg gggbbbbb
2231 and ebx, 0x00000000;
2232 and eax, 0x00008000; // eax = 00000000 00000000 a0000000 00000000
2233 jz transparent1;
2234 or ebx, 0xff000000; // ebx = aaaaaaaa 00000000 00000000 00000000
2235
2236 transparent1:
2237 mov eax, edx; // eax = arrrrrgg gggbbbbb arrrrrgg gggbbbbb
2238 and edx, 0x0000001f; // edx = 00000000 00000000 00000000 000bbbbb
2239 shl edx, 14; // edx = 00000000 00000bbb bb000000 00000000
2240 or ebx, edx; // ebx = aaaaaaaa 00000bbb bb000000 00000000
2241 shl edx, 5; // edx = 00000000 bbbbb000 00000000 00000000
2242 or ebx, edx; // ebx = aaaaaaaa bbbbbbbb bb000000 00000000
2243 and ebx, 0xffff0000; // ebx = aaaaaaaa bbbbbbbb 00000000 00000000
2244 mov edx, eax;
2245 and edx, 0x000003e0; // edx = 00000000 00000000 000000gg ggg00000
2246 shl edx, 1; // edx = 00000000 00000000 00000ggg gg000000
2247 or ebx, edx; // ebx = aaaaaaaa bbbbbbbb 00000ggg gg000000
2248 shl edx, 5; // edx = 00000000 00000000 ggggg000 00000000
2249 or ebx, edx; // ebx = aaaaaaaa bbbbbbbb gggggggg gg000000
2250 and ebx, 0xffffff00; // ebx = aaaaaaaa bbbbbbbb gggggggg 00000000
2251 mov edx, eax;
2252 and edx, 0x00007c00; // edx = 00000000 00000000 0rrrrr00 00000000
2253 shr edx, 7; // edx = 00000000 00000000 00000000 rrrrr000
2254 or ebx, edx; // ebx = aaaaaaaa bbbbbbbb gggggggg rrrrr000
2255 shr edx, 5; // edx = 00000000 00000000 00000000 00000rrr
2256 or ebx, edx; // ebx = aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
2257
2258 mov dword ptr [edi], ebx;
2259 add edi, 4;
2260
2261 shr eax, 16; // eax = 00000000 00000000 arrrrrgg gggbbbbb
2262 mov edx, eax; // edx = 00000000 00000000 arrrrrgg gggbbbbb
2263 and ebx, 0x00000000;
2264 and eax, 0x00008000; // eax = 00000000 00000000 a0000000 00000000
2265 jz transparent2;
2266 or ebx, 0xff000000; // ebx = aaaaaaaa 00000000 00000000 00000000
2267
2268 transparent2:
2269 mov eax, edx; // eax = arrrrrgg gggbbbbb arrrrrgg gggbbbbb
2270 and edx, 0x0000001f; // edx = 00000000 00000000 00000000 000bbbbb
2271 shl edx, 14; // edx = 00000000 00000bbb bb000000 00000000
2272 or ebx, edx; // ebx = aaaaaaaa 00000bbb bb000000 00000000
2273 shl edx, 5; // edx = 00000000 bbbbb000 00000000 00000000
2274 or ebx, edx; // ebx = aaaaaaaa bbbbbbbb bb000000 00000000
2275 and ebx, 0xffff0000; // ebx = aaaaaaaa bbbbbbbb 00000000 00000000
2276 mov edx, eax;
2277 and edx, 0x000003e0; // edx = 00000000 00000000 000000gg ggg00000
2278 shl edx, 1; // edx = 00000000 00000000 00000ggg gg000000
2279 or ebx, edx; // ebx = aaaaaaaa bbbbbbbb 00000ggg gg000000
2280 shl edx, 5; // edx = 00000000 00000000 ggggg000 00000000
2281 or ebx, edx; // ebx = aaaaaaaa bbbbbbbb gggggggg gg000000
2282 and ebx, 0xffffff00; // ebx = aaaaaaaa bbbbbbbb gggggggg 00000000
2283 mov edx, eax;
2284 and edx, 0x00007c00; // edx = 00000000 00000000 0rrrrr00 00000000
2285 shr edx, 7; // edx = 00000000 00000000 00000000 rrrrr000
2286 or ebx, edx; // ebx = aaaaaaaa bbbbbbbb gggggggg rrrrr000
2287 shr edx, 5; // edx = 00000000 00000000 00000000 00000rrr
2288 or ebx, edx; // ebx = aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
2289
2290 mov dword ptr [edi], ebx;
2291 add edi, 4;
2292
2293 dec ecx;
2294 jnz tc1_loop;
2295
2296 pop edi;
2297 pop esi;
2298 pop ebx;
2299 }
2300}
2301
2302void
2303TxQuantize::ARGB4444_ABGR8888(uint32* src, uint32* dest, int width, int height)
2304{
2305 int siz = (width * height) >> 1;
2306
2307 __asm {
2308 push ebx;
2309 push esi;
2310 push edi;
2311
2312 mov esi, dword ptr [src];
2313 mov edi, dword ptr [dest];
2314 mov ecx, dword ptr [siz];
2315
2316 tc1_loop:
2317 mov eax, dword ptr [esi];
2318 add esi, 4;
2319
2320 // aaaa rrrr gggg bbbb
2321 // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
2322 mov edx, eax;
2323 and eax, 0x0000ffff;
2324 mov ebx, eax; // 00000000 00000000 aaaarrrr ggggbbbb
2325 and ebx, 0x0000f000; // 00000000 00000000 aaaa0000 00000000
2326 shl ebx, 12; // 0000aaaa 00000000 00000000 00000000
2327 or eax, ebx; // 0000aaaa 00000000 aaaarrrr ggggbbbb
2328 mov ebx, eax;
2329 and ebx, 0x0000000f; // 00000000 00000000 00000000 0000bbbb
2330 shl ebx, 16; // 00000000 0000bbbb 00000000 00000000
2331 or eax, ebx; // 0000aaaa 0000bbbb aaaarrrr ggggbbbb
2332 mov ebx, eax;
2333 and ebx, 0x00000f00; // 00000000 00000000 0000rrrr 00000000
2334 shr ebx, 8; // 00000000 00000000 00000000 0000rrrr
2335 and eax, 0xfffffff0;
2336 or eax, ebx; // 0000aaaa 0000bbbb aaaarrrr ggggrrrr
2337 mov ebx, eax;
2338 and ebx, 0x000000f0; // 00000000 00000000 00000000 gggg0000
2339 shl ebx, 4; // 00000000 00000000 0000gggg 00000000
2340 and eax, 0x0f0f000f; // 0000aaaa 0000bbbb 00000000 0000rrrr
2341 or eax, ebx; // 0000aaaa 0000bbbb 0000gggg 0000rrrr
2342 mov ebx, eax;
2343 shl ebx, 4; // aaaa0000 bbbb0000 gggg0000 rrrr0000
2344 or eax, ebx; // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
2345
2346 mov dword ptr [edi], eax;
2347
2348 add edi, 4;
2349
2350 shr edx, 16;
2351 mov ebx, edx; // 00000000 00000000 aaaarrrr ggggbbbb
2352 and ebx, 0x0000f000; // 00000000 00000000 aaaa0000 00000000
2353 shl ebx, 12; // 0000aaaa 00000000 00000000 00000000
2354 or edx, ebx; // 0000aaaa 00000000 aaaarrrr ggggbbbb
2355 mov ebx, edx;
2356 and ebx, 0x0000000f; // 00000000 00000000 00000000 0000bbbb
2357 shl ebx, 16; // 00000000 0000bbbb 00000000 00000000
2358 or edx, ebx; // 0000aaaa 0000bbbb aaaarrrr ggggbbbb
2359 mov ebx, edx;
2360 and ebx, 0x00000f00; // 00000000 00000000 0000rrrr 00000000
2361 shr ebx, 8; // 00000000 00000000 00000000 0000rrrr
2362 and edx, 0xfffffff0;
2363 or edx, ebx; // 0000aaaa 0000bbbb aaaarrrr ggggrrrr
2364 mov ebx, edx;
2365 and ebx, 0x000000f0; // 00000000 00000000 00000000 gggg0000
2366 shl ebx, 4; // 00000000 00000000 0000gggg 00000000
2367 and edx, 0x0f0f000f; // 0000aaaa 0000bbbb 00000000 0000rrrr
2368 or edx, ebx; // 0000aaaa 0000bbbb 0000gggg 0000rrrr
2369 mov ebx, edx;
2370 shl ebx, 4; // aaaa0000 bbbb0000 gggg0000 rrrr0000
2371 or edx, ebx; // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
2372
2373 mov dword ptr [edi], edx;
2374 add edi, 4;
2375
2376 dec ecx;
2377 jnz tc1_loop;
2378
2379 pop edi;
2380 pop esi;
2381 pop ebx;
2382 }
2383}
2384
2385void
2386TxQuantize::ARGB8888_ABGR8888(uint32* src, uint32* dest, int width, int height)
2387{
2388 int siz = width * height;
2389
2390 __asm {
2391 push ebx;
2392 push esi;
2393 push edi;
2394
2395 mov esi, dword ptr [src];
2396 mov edi, dword ptr [dest];
2397 mov ecx, dword ptr [siz];
2398
2399 tc1_loop:
2400 mov eax, dword ptr [esi];
2401 add esi, 4;
2402
2403 // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
2404 mov edx, eax;
2405 bswap edx;
2406 shr edx, 8;
2407 and eax, 0xff000000;
2408
2409 or eax, edx;
2410
2411 mov dword ptr [edi], eax;
2412 add edi, 4;
2413
2414 dec ecx;
2415 jnz tc1_loop;
2416
2417 pop edi;
2418 pop esi;
2419 pop ebx;
2420 }
2421}
2422#endif