RICE: OOT Fix from mupen64plus-ae team
[mupen64plus-pandora.git] / source / gles2glide64 / src / GlideHQ / tc-1.1+ / dxtn.c
CommitLineData
98e75f2d 1/*
2 * DXTn codec
3 * Version: 1.1
4 *
5 * Copyright (C) 2004 Daniel Borca All Rights Reserved.
6 *
7 * this is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
10 * any later version.
11 *
12 * this is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Make; see the file COPYING. If not, write to
19 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
20 */
21
22/* Copyright (C) 2007 Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
23 * Added support for ARGB inputs, DXT3,5 workaround for ATI Radeons, and
24 * YUV conversions to determine representative colors.
25 */
26
27
28#include <stdlib.h>
29#include <string.h>
30#include <assert.h>
31
32#include <stdio.h>
33
34#include "types.h"
35#include "internal.h"
36#include "dxtn.h"
37
38
39/***************************************************************************\
40 * DXTn encoder
41 *
42 * The encoder was built by reversing the decoder,
43 * and is vaguely based on FXT1 codec. Note that this code
44 * is merely a proof of concept, since it is highly UNoptimized!
45\***************************************************************************/
46
47
48#define MAX_COMP 4 /* ever needed maximum number of components in texel */
49#define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
50#define N_TEXELS 16 /* number of texels in a block (always 16) */
51#define COLOR565(v) (word)((((v)[RCOMP] & 0xf8) << 8) | (((v)[GCOMP] & 0xfc) << 3) | ((v)[BCOMP] >> 3))
52
53
54static const int dxtn_color_tlat[2][4] = {
55 { 0, 2, 3, 1 },
56 { 0, 2, 1, 3 }
57};
58
59static const int dxtn_alpha_tlat[2][8] = {
60 { 0, 2, 3, 4, 5, 6, 7, 1 },
61 { 0, 2, 3, 4, 5, 1, 6, 7 }
62};
63
64
65static void
66dxt1_rgb_quantize (dword *cc, const byte *lines[], int comps)
67{
68 float b, iv[MAX_COMP]; /* interpolation vector */
69
70 dword hi; /* high doubleword */
71 int color0, color1;
72 int n_vect;
73 const int n_comp = 3;
74 int black = 0;
75
76#ifndef YUV
77 int minSum = 2000; /* big enough */
78#else
79 int minSum = 2000000;
80#endif
81 int maxSum = -1; /* small enough */
82 int minCol = 0; /* phoudoin: silent compiler! */
83 int maxCol = 0; /* phoudoin: silent compiler! */
84
85 byte input[N_TEXELS][MAX_COMP];
86 int i, k, l;
87
88 /* make the whole block opaque */
89 /* we will NEVER reference ACOMP of any pixel */
90
91 /* 4 texels each line */
92#ifndef ARGB
93 for (l = 0; l < 4; l++) {
94 for (k = 0; k < 4; k++) {
95 for (i = 0; i < comps; i++) {
96 input[k + l * 4][i] = *lines[l]++;
97 }
98 }
99 }
100#else
101 /* H.Morii - support for ARGB inputs */
102 for (l = 0; l < 4; l++) {
103 for (k = 0; k < 4; k++) {
104 input[k + l * 4][2] = *lines[l]++;
105 input[k + l * 4][1] = *lines[l]++;
106 input[k + l * 4][0] = *lines[l]++;
107 if (comps == 4) input[k + l * 4][3] = *lines[l]++;
108 }
109 }
110#endif
111
112 /* Our solution here is to find the darkest and brightest colors in
113 * the 4x4 tile and use those as the two representative colors.
114 * There are probably better algorithms to use (histogram-based).
115 */
116 for (k = 0; k < N_TEXELS; k++) {
117 int sum = 0;
118#ifndef YUV
119 for (i = 0; i < n_comp; i++) {
120 sum += input[k][i];
121 }
122#else
123 /* RGB to YUV conversion according to CCIR 601 specs
124 * Y = 0.299R+0.587G+0.114B
125 * U = 0.713(R - Y) = 0.500R-0.419G-0.081B
126 * V = 0.564(B - Y) = -0.169R-0.331G+0.500B
127 */
128 sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
129#endif
130 if (minSum > sum) {
131 minSum = sum;
132 minCol = k;
133 }
134 if (maxSum < sum) {
135 maxSum = sum;
136 maxCol = k;
137 }
138 if (sum == 0) {
139 black = 1;
140 }
141 }
142
143 color0 = COLOR565(input[minCol]);
144 color1 = COLOR565(input[maxCol]);
145
146 if (color0 == color1) {
147 /* we'll use 3-vector */
148 cc[0] = color0 | (color1 << 16);
149 hi = black ? -1 : 0;
150 } else {
151 if (black && ((color0 == 0) || (color1 == 0))) {
152 /* we still can use 4-vector */
153 black = 0;
154 }
155
156 if (black ^ (color0 <= color1)) {
157 int aux;
158 aux = color0;
159 color0 = color1;
160 color1 = aux;
161 aux = minCol;
162 minCol = maxCol;
163 maxCol = aux;
164 }
165 n_vect = (color0 <= color1) ? 2 : 3;
166
167 MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]);
168
169 /* add in texels */
170 cc[0] = color0 | (color1 << 16);
171 hi = 0;
172 for (k = N_TEXELS - 1; k >= 0; k--) {
173 int texel = 3;
174 int sum = 0;
175 if (black) {
176 for (i = 0; i < n_comp; i++) {
177 sum += input[k][i];
178 }
179 }
180 if (!black || sum) {
181 /* interpolate color */
182 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
183 texel = dxtn_color_tlat[black][texel];
184 }
185 /* add in texel */
186 hi <<= 2;
187 hi |= texel;
188 }
189 }
190 cc[1] = hi;
191}
192
193
194static void
195dxt1_rgba_quantize (dword *cc, const byte *lines[], int comps)
196{
197 float b, iv[MAX_COMP]; /* interpolation vector */
198
199 dword hi; /* high doubleword */
200 int color0, color1;
201 int n_vect;
202 const int n_comp = 3;
203 int transparent = 0;
204
205#ifndef YUV
206 int minSum = 2000; /* big enough */
207#else
208 int minSum = 2000000;
209#endif
210 int maxSum = -1; /* small enough */
211 int minCol = 0; /* phoudoin: silent compiler! */
212 int maxCol = 0; /* phoudoin: silent compiler! */
213
214 byte input[N_TEXELS][MAX_COMP];
215 int i, k, l;
216
217 if (comps == 3) {
218 /* make the whole block opaque */
219 memset(input, -1, sizeof(input));
220 }
221
222 /* 4 texels each line */
223#ifndef ARGB
224 for (l = 0; l < 4; l++) {
225 for (k = 0; k < 4; k++) {
226 for (i = 0; i < comps; i++) {
227 input[k + l * 4][i] = *lines[l]++;
228 }
229 }
230 }
231#else
232 /* H.Morii - support for ARGB inputs */
233 for (l = 0; l < 4; l++) {
234 for (k = 0; k < 4; k++) {
235 input[k + l * 4][2] = *lines[l]++;
236 input[k + l * 4][1] = *lines[l]++;
237 input[k + l * 4][0] = *lines[l]++;
238 if (comps == 4) input[k + l * 4][3] = *lines[l]++;
239 }
240 }
241#endif
242
243 /* Our solution here is to find the darkest and brightest colors in
244 * the 4x4 tile and use those as the two representative colors.
245 * There are probably better algorithms to use (histogram-based).
246 */
247 for (k = 0; k < N_TEXELS; k++) {
248 int sum = 0;
249#ifndef YUV
250 for (i = 0; i < n_comp; i++) {
251 sum += input[k][i];
252 }
253#else
254 sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
255#endif
256 if (minSum > sum) {
257 minSum = sum;
258 minCol = k;
259 }
260 if (maxSum < sum) {
261 maxSum = sum;
262 maxCol = k;
263 }
264 if (input[k][ACOMP] < 128) {
265 transparent = 1;
266 }
267 }
268
269 color0 = COLOR565(input[minCol]);
270 color1 = COLOR565(input[maxCol]);
271
272 if (color0 == color1) {
273 /* we'll use 3-vector */
274 cc[0] = color0 | (color1 << 16);
275 hi = transparent ? -1 : 0;
276 } else {
277 if (transparent ^ (color0 <= color1)) {
278 int aux;
279 aux = color0;
280 color0 = color1;
281 color1 = aux;
282 aux = minCol;
283 minCol = maxCol;
284 maxCol = aux;
285 }
286 n_vect = (color0 <= color1) ? 2 : 3;
287
288 MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]);
289
290 /* add in texels */
291 cc[0] = color0 | (color1 << 16);
292 hi = 0;
293 for (k = N_TEXELS - 1; k >= 0; k--) {
294 int texel = 3;
295 if (input[k][ACOMP] >= 128) {
296 /* interpolate color */
297 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
298 texel = dxtn_color_tlat[transparent][texel];
299 }
300 /* add in texel */
301 hi <<= 2;
302 hi |= texel;
303 }
304 }
305 cc[1] = hi;
306}
307
308
309static void
310dxt3_rgba_quantize (dword *cc, const byte *lines[], int comps)
311{
312 float b, iv[MAX_COMP]; /* interpolation vector */
313
314 dword lolo, lohi; /* low quadword: lo dword, hi dword */
315 dword hihi; /* high quadword: high dword */
316 int color0, color1;
317 const int n_vect = 3;
318 const int n_comp = 3;
319
320#ifndef YUV
321 int minSum = 2000; /* big enough */
322#else
323 int minSum = 2000000;
324#endif
325 int maxSum = -1; /* small enough */
326 int minCol = 0; /* phoudoin: silent compiler! */
327 int maxCol = 0; /* phoudoin: silent compiler! */
328
329 byte input[N_TEXELS][MAX_COMP];
330 int i, k, l;
331
332 if (comps == 3) {
333 /* make the whole block opaque */
334 memset(input, -1, sizeof(input));
335 }
336
337 /* 4 texels each line */
338#ifndef ARGB
339 for (l = 0; l < 4; l++) {
340 for (k = 0; k < 4; k++) {
341 for (i = 0; i < comps; i++) {
342 input[k + l * 4][i] = *lines[l]++;
343 }
344 }
345 }
346#else
347 /* H.Morii - support for ARGB inputs */
348 for (l = 0; l < 4; l++) {
349 for (k = 0; k < 4; k++) {
350 input[k + l * 4][2] = *lines[l]++;
351 input[k + l * 4][1] = *lines[l]++;
352 input[k + l * 4][0] = *lines[l]++;
353 if (comps == 4) input[k + l * 4][3] = *lines[l]++;
354 }
355 }
356#endif
357
358 /* Our solution here is to find the darkest and brightest colors in
359 * the 4x4 tile and use those as the two representative colors.
360 * There are probably better algorithms to use (histogram-based).
361 */
362 for (k = 0; k < N_TEXELS; k++) {
363 int sum = 0;
364#ifndef YUV
365 for (i = 0; i < n_comp; i++) {
366 sum += input[k][i];
367 }
368#else
369 sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
370#endif
371 if (minSum > sum) {
372 minSum = sum;
373 minCol = k;
374 }
375 if (maxSum < sum) {
376 maxSum = sum;
377 maxCol = k;
378 }
379 }
380
381 /* add in alphas */
382 lolo = lohi = 0;
383 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
384 /* add in alpha */
385 lohi <<= 4;
386 lohi |= input[k][ACOMP] >> 4;
387 }
388 cc[1] = lohi;
389 for (; k >= 0; k--) {
390 /* add in alpha */
391 lolo <<= 4;
392 lolo |= input[k][ACOMP] >> 4;
393 }
394 cc[0] = lolo;
395
396 color0 = COLOR565(input[minCol]);
397 color1 = COLOR565(input[maxCol]);
398
399#ifdef RADEON
400 /* H.Morii - Workaround for ATI Radeon
401 * According to the OpenGL EXT_texture_compression_s3tc specs,
402 * the encoding of the RGB components for DXT3 and DXT5 formats
403 * use the non-transparent encodings of DXT1 but treated as
404 * though color0 > color1, regardless of the actual values of
405 * color0 and color1. ATI Radeons however require the values to
406 * be color0 > color1.
407 */
408 if (color0 < color1) {
409 int aux;
410 aux = color0;
411 color0 = color1;
412 color1 = aux;
413 aux = minCol;
414 minCol = maxCol;
415 maxCol = aux;
416 }
417#endif
418
419 cc[2] = color0 | (color1 << 16);
420
421 hihi = 0;
422 if (color0 != color1) {
423 MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]);
424
425 /* add in texels */
426 for (k = N_TEXELS - 1; k >= 0; k--) {
427 int texel;
428 /* interpolate color */
429 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
430 texel = dxtn_color_tlat[0][texel];
431 /* add in texel */
432 hihi <<= 2;
433 hihi |= texel;
434 }
435 }
436 cc[3] = hihi;
437}
438
439
440static void
441dxt5_rgba_quantize (dword *cc, const byte *lines[], int comps)
442{
443 float b, iv[MAX_COMP]; /* interpolation vector */
444
445 qword lo; /* low quadword */
446 dword hihi; /* high quadword: high dword */
447 int color0, color1;
448 const int n_vect = 3;
449 const int n_comp = 3;
450
451#ifndef YUV
452 int minSum = 2000; /* big enough */
453#else
454 int minSum = 2000000;
455#endif
456 int maxSum = -1; /* small enough */
457 int minCol = 0; /* phoudoin: silent compiler! */
458 int maxCol = 0; /* phoudoin: silent compiler! */
459 int alpha0 = 2000; /* big enough */
460 int alpha1 = -1; /* small enough */
461 int anyZero = 0, anyOne = 0;
462 int a_vect;
463
464 byte input[N_TEXELS][MAX_COMP];
465 int i, k, l;
466
467 if (comps == 3) {
468 /* make the whole block opaque */
469 memset(input, -1, sizeof(input));
470 }
471
472 /* 4 texels each line */
473#ifndef ARGB
474 for (l = 0; l < 4; l++) {
475 for (k = 0; k < 4; k++) {
476 for (i = 0; i < comps; i++) {
477 input[k + l * 4][i] = *lines[l]++;
478 }
479 }
480 }
481#else
482 /* H.Morii - support for ARGB inputs */
483 for (l = 0; l < 4; l++) {
484 for (k = 0; k < 4; k++) {
485 input[k + l * 4][2] = *lines[l]++;
486 input[k + l * 4][1] = *lines[l]++;
487 input[k + l * 4][0] = *lines[l]++;
488 if (comps == 4) input[k + l * 4][3] = *lines[l]++;
489 }
490 }
491#endif
492
493 /* Our solution here is to find the darkest and brightest colors in
494 * the 4x4 tile and use those as the two representative colors.
495 * There are probably better algorithms to use (histogram-based).
496 */
497 for (k = 0; k < N_TEXELS; k++) {
498 int sum = 0;
499#ifndef YUV
500 for (i = 0; i < n_comp; i++) {
501 sum += input[k][i];
502 }
503#else
504 sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
505#endif
506 if (minSum > sum) {
507 minSum = sum;
508 minCol = k;
509 }
510 if (maxSum < sum) {
511 maxSum = sum;
512 maxCol = k;
513 }
514 if (alpha0 > input[k][ACOMP]) {
515 alpha0 = input[k][ACOMP];
516 }
517 if (alpha1 < input[k][ACOMP]) {
518 alpha1 = input[k][ACOMP];
519 }
520 if (input[k][ACOMP] == 0) {
521 anyZero = 1;
522 }
523 if (input[k][ACOMP] == 255) {
524 anyOne = 1;
525 }
526 }
527
528 /* add in alphas */
529 if (alpha0 == alpha1) {
530 /* we'll use 6-vector */
531 cc[0] = alpha0 | (alpha1 << 8);
532 cc[1] = 0;
533 } else {
534 if (anyZero && ((alpha0 == 0) || (alpha1 == 0))) {
535 /* we still might use 8-vector */
536 anyZero = 0;
537 }
538 if (anyOne && ((alpha0 == 255) || (alpha1 == 255))) {
539 /* we still might use 8-vector */
540 anyOne = 0;
541 }
542 if ((anyZero | anyOne) ^ (alpha0 <= alpha1)) {
543 int aux;
544 aux = alpha0;
545 alpha0 = alpha1;
546 alpha1 = aux;
547 }
548 a_vect = (alpha0 <= alpha1) ? 5 : 7;
549
550 /* compute interpolation vector */
551 iv[ACOMP] = (float)a_vect / (alpha1 - alpha0);
552 b = -iv[ACOMP] * alpha0 + 0.5F;
553
554 /* add in alphas */
555 Q_MOV32(lo, 0);
556 for (k = N_TEXELS - 1; k >= 0; k--) {
557 int texel = -1;
558 if (anyZero | anyOne) {
559 if (input[k][ACOMP] == 0) {
560 texel = 6;
561 } else if (input[k][ACOMP] == 255) {
562 texel = 7;
563 }
564 }
565 /* interpolate alpha */
566 if (texel == -1) {
567 float dot = input[k][ACOMP] * iv[ACOMP];
568 texel = (int)(dot + b);
569#if SAFECDOT
570 if (texel < 0) {
571 texel = 0;
572 } else if (texel > a_vect) {
573 texel = a_vect;
574 }
575#endif
576 texel = dxtn_alpha_tlat[anyZero | anyOne][texel];
577 }
578 /* add in texel */
579 Q_SHL(lo, 3);
580 Q_OR32(lo, texel);
581 }
582 Q_SHL(lo, 16);
583 Q_OR32(lo, alpha0 | (alpha1 << 8));
584 ((qword *)cc)[0] = lo;
585 }
586
587 color0 = COLOR565(input[minCol]);
588 color1 = COLOR565(input[maxCol]);
589
590#ifdef RADEON /* H.Morii - Workaround for ATI Radeon */
591 if (color0 < color1) {
592 int aux;
593 aux = color0;
594 color0 = color1;
595 color1 = aux;
596 aux = minCol;
597 minCol = maxCol;
598 maxCol = aux;
599 }
600#endif
601
602 cc[2] = color0 | (color1 << 16);
603
604 hihi = 0;
605 if (color0 != color1) {
606 MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]);
607
608 /* add in texels */
609 for (k = N_TEXELS - 1; k >= 0; k--) {
610 int texel;
611 /* interpolate color */
612 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
613 texel = dxtn_color_tlat[0][texel];
614 /* add in texel */
615 hihi <<= 2;
616 hihi |= texel;
617 }
618 }
619 cc[3] = hihi;
620}
621
622
623#define ENCODER(dxtn, n) \
624int TAPIENTRY \
625dxtn##_encode (int width, int height, int comps, \
626 const void *source, int srcRowStride, \
627 void *dest, int destRowStride) \
628{ \
629 int x, y; \
630 const byte *data; \
631 dword *encoded = (dword *)dest; \
632 void *newSource = NULL; \
633 \
634 /* Replicate image if width is not M4 or height is not M4 */ \
635 if ((width & 3) | (height & 3)) { \
636 int newWidth = (width + 3) & ~3; \
637 int newHeight = (height + 3) & ~3; \
638 newSource = malloc(comps * newWidth * newHeight * sizeof(byte *));\
639 _mesa_upscale_teximage2d(width, height, newWidth, newHeight, \
640 comps, (const byte *)source, \
641 srcRowStride, (byte *)newSource); \
642 source = newSource; \
643 width = newWidth; \
644 height = newHeight; \
645 srcRowStride = comps * newWidth; \
646 } \
647 \
648 data = (const byte *)source; \
649 destRowStride = (destRowStride - width * n) / 4; \
650 for (y = 0; y < height; y += 4) { \
651 unsigned int offs = 0 + (y + 0) * srcRowStride; \
652 for (x = 0; x < width; x += 4) { \
653 const byte *lines[4]; \
654 lines[0] = &data[offs]; \
655 lines[1] = lines[0] + srcRowStride; \
656 lines[2] = lines[1] + srcRowStride; \
657 lines[3] = lines[2] + srcRowStride; \
658 offs += 4 * comps; \
659 dxtn##_quantize(encoded, lines, comps); \
660 /* 4x4 block */ \
661 encoded += n; \
662 } \
663 encoded += destRowStride; \
664 } \
665 \
666 if (newSource != NULL) { \
667 free(newSource); \
668 } \
669 \
670 return 0; \
671}
672
673ENCODER(dxt1_rgb, 2)
674ENCODER(dxt1_rgba, 2)
675ENCODER(dxt3_rgba, 4)
676ENCODER(dxt5_rgba, 4)
677
678
679/***************************************************************************\
680 * DXTn decoder
681 *
682 * The decoder is based on GL_EXT_texture_compression_s3tc
683 * specification and serves as a concept for the encoder.
684\***************************************************************************/
685
686
687/* lookup table for scaling 4 bit colors up to 8 bits */
688static const byte _rgb_scale_4[] = {
689 0, 17, 34, 51, 68, 85, 102, 119,
690 136, 153, 170, 187, 204, 221, 238, 255
691};
692
693/* lookup table for scaling 5 bit colors up to 8 bits */
694static const byte _rgb_scale_5[] = {
695 0, 8, 16, 25, 33, 41, 49, 58,
696 66, 74, 82, 90, 99, 107, 115, 123,
697 132, 140, 148, 156, 165, 173, 181, 189,
698 197, 206, 214, 222, 230, 239, 247, 255
699};
700
701/* lookup table for scaling 6 bit colors up to 8 bits */
702static const byte _rgb_scale_6[] = {
703 0, 4, 8, 12, 16, 20, 24, 28,
704 32, 36, 40, 45, 49, 53, 57, 61,
705 65, 69, 73, 77, 81, 85, 89, 93,
706 97, 101, 105, 109, 113, 117, 121, 125,
707 130, 134, 138, 142, 146, 150, 154, 158,
708 162, 166, 170, 174, 178, 182, 186, 190,
709 194, 198, 202, 206, 210, 215, 219, 223,
710 227, 231, 235, 239, 243, 247, 251, 255
711};
712
713
714#define CC_SEL(cc, which) (((dword *)(cc))[(which) / 32] >> ((which) & 31))
715#define UP4(c) _rgb_scale_4[(c) & 15]
716#define UP5(c) _rgb_scale_5[(c) & 31]
717#define UP6(c) _rgb_scale_6[(c) & 63]
718#define ZERO_4UBV(v) *((dword *)(v)) = 0
719
720
721void TAPIENTRY
722dxt1_rgb_decode_1 (const void *texture, int stride,
723 int i, int j, byte *rgba)
724{
725 const byte *src = (const byte *)texture
726 + ((j / 4) * ((stride + 3) / 4) + i / 4) * 8;
727 const int code = (src[4 + (j & 3)] >> ((i & 3) * 2)) & 0x3;
728 if (code == 0) {
729 rgba[RCOMP] = UP5(CC_SEL(src, 11));
730 rgba[GCOMP] = UP6(CC_SEL(src, 5));
731 rgba[BCOMP] = UP5(CC_SEL(src, 0));
732 } else if (code == 1) {
733 rgba[RCOMP] = UP5(CC_SEL(src, 27));
734 rgba[GCOMP] = UP6(CC_SEL(src, 21));
735 rgba[BCOMP] = UP5(CC_SEL(src, 16));
736 } else {
737 const word col0 = src[0] | (src[1] << 8);
738 const word col1 = src[2] | (src[3] << 8);
739 if (col0 > col1) {
740 if (code == 2) {
741 rgba[RCOMP] = (UP5(col0 >> 11) * 2 + UP5(col1 >> 11)) / 3;
742 rgba[GCOMP] = (UP6(col0 >> 5) * 2 + UP6(col1 >> 5)) / 3;
743 rgba[BCOMP] = (UP5(col0 ) * 2 + UP5(col1 )) / 3;
744 } else {
745 rgba[RCOMP] = (UP5(col0 >> 11) + 2 * UP5(col1 >> 11)) / 3;
746 rgba[GCOMP] = (UP6(col0 >> 5) + 2 * UP6(col1 >> 5)) / 3;
747 rgba[BCOMP] = (UP5(col0 ) + 2 * UP5(col1 )) / 3;
748 }
749 } else {
750 if (code == 2) {
751 rgba[RCOMP] = (UP5(col0 >> 11) + UP5(col1 >> 11)) / 2;
752 rgba[GCOMP] = (UP6(col0 >> 5) + UP6(col1 >> 5)) / 2;
753 rgba[BCOMP] = (UP5(col0 ) + UP5(col1 )) / 2;
754 } else {
755 ZERO_4UBV(rgba);
756 }
757 }
758 }
759 rgba[ACOMP] = 255;
760}
761
762
763void TAPIENTRY
764dxt1_rgba_decode_1 (const void *texture, int stride,
765 int i, int j, byte *rgba)
766{
767 /* Same as rgb_dxt1 above, except alpha=0 if col0<=col1 and code=3. */
768 const byte *src = (const byte *)texture
769 + ((j / 4) * ((stride + 3) / 4) + i / 4) * 8;
770 const int code = (src[4 + (j & 3)] >> ((i & 3) * 2)) & 0x3;
771 if (code == 0) {
772 rgba[RCOMP] = UP5(CC_SEL(src, 11));
773 rgba[GCOMP] = UP6(CC_SEL(src, 5));
774 rgba[BCOMP] = UP5(CC_SEL(src, 0));
775 rgba[ACOMP] = 255;
776 } else if (code == 1) {
777 rgba[RCOMP] = UP5(CC_SEL(src, 27));
778 rgba[GCOMP] = UP6(CC_SEL(src, 21));
779 rgba[BCOMP] = UP5(CC_SEL(src, 16));
780 rgba[ACOMP] = 255;
781 } else {
782 const word col0 = src[0] | (src[1] << 8);
783 const word col1 = src[2] | (src[3] << 8);
784 if (col0 > col1) {
785 if (code == 2) {
786 rgba[RCOMP] = (UP5(col0 >> 11) * 2 + UP5(col1 >> 11)) / 3;
787 rgba[GCOMP] = (UP6(col0 >> 5) * 2 + UP6(col1 >> 5)) / 3;
788 rgba[BCOMP] = (UP5(col0 ) * 2 + UP5(col1 )) / 3;
789 } else {
790 rgba[RCOMP] = (UP5(col0 >> 11) + 2 * UP5(col1 >> 11)) / 3;
791 rgba[GCOMP] = (UP6(col0 >> 5) + 2 * UP6(col1 >> 5)) / 3;
792 rgba[BCOMP] = (UP5(col0 ) + 2 * UP5(col1 )) / 3;
793 }
794 rgba[ACOMP] = 255;
795 } else {
796 if (code == 2) {
797 rgba[RCOMP] = (UP5(col0 >> 11) + UP5(col1 >> 11)) / 2;
798 rgba[GCOMP] = (UP6(col0 >> 5) + UP6(col1 >> 5)) / 2;
799 rgba[BCOMP] = (UP5(col0 ) + UP5(col1 )) / 2;
800 rgba[ACOMP] = 255;
801 } else {
802 ZERO_4UBV(rgba);
803 }
804 }
805 }
806}
807
808
809void TAPIENTRY
810dxt3_rgba_decode_1 (const void *texture, int stride,
811 int i, int j, byte *rgba)
812{
813 const byte *src = (const byte *)texture
814 + ((j / 4) * ((stride + 3) / 4) + i / 4) * 16;
815 const int code = (src[12 + (j & 3)] >> ((i & 3) * 2)) & 0x3;
816 const dword *cc = (const dword *)(src + 8);
817 if (code == 0) {
818 rgba[RCOMP] = UP5(CC_SEL(cc, 11));
819 rgba[GCOMP] = UP6(CC_SEL(cc, 5));
820 rgba[BCOMP] = UP5(CC_SEL(cc, 0));
821 } else if (code == 1) {
822 rgba[RCOMP] = UP5(CC_SEL(cc, 27));
823 rgba[GCOMP] = UP6(CC_SEL(cc, 21));
824 rgba[BCOMP] = UP5(CC_SEL(cc, 16));
825 } else if (code == 2) {
826 /* (col0 * (4 - code) + col1 * (code - 1)) / 3 */
827 rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) * 2 + UP5(CC_SEL(cc, 27))) / 3;
828 rgba[GCOMP] = (UP6(CC_SEL(cc, 5)) * 2 + UP6(CC_SEL(cc, 21))) / 3;
829 rgba[BCOMP] = (UP5(CC_SEL(cc, 0)) * 2 + UP5(CC_SEL(cc, 16))) / 3;
830 } else {
831 rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) + 2 * UP5(CC_SEL(cc, 27))) / 3;
832 rgba[GCOMP] = (UP6(CC_SEL(cc, 5)) + 2 * UP6(CC_SEL(cc, 21))) / 3;
833 rgba[BCOMP] = (UP5(CC_SEL(cc, 0)) + 2 * UP5(CC_SEL(cc, 16))) / 3;
834 }
835 rgba[ACOMP] = UP4(src[((j & 3) * 4 + (i & 3)) / 2] >> ((i & 1) * 4));
836}
837
838
839void TAPIENTRY
840dxt5_rgba_decode_1 (const void *texture, int stride,
841 int i, int j, byte *rgba)
842{
843 const byte *src = (const byte *)texture
844 + ((j / 4) * ((stride + 3) / 4) + i / 4) * 16;
845 const int code = (src[12 + (j & 3)] >> ((i & 3) * 2)) & 0x3;
846 const dword *cc = (const dword *)(src + 8);
847 const byte alpha0 = src[0];
848 const byte alpha1 = src[1];
849 const int alphaShift = (((j & 3) * 4) + (i & 3)) * 3 + 16;
850 const int acode = ((alphaShift == 31)
851 ? CC_SEL(src + 2, alphaShift - 16)
852 : CC_SEL(src, alphaShift)) & 0x7;
853 if (code == 0) {
854 rgba[RCOMP] = UP5(CC_SEL(cc, 11));
855 rgba[GCOMP] = UP6(CC_SEL(cc, 5));
856 rgba[BCOMP] = UP5(CC_SEL(cc, 0));
857 } else if (code == 1) {
858 rgba[RCOMP] = UP5(CC_SEL(cc, 27));
859 rgba[GCOMP] = UP6(CC_SEL(cc, 21));
860 rgba[BCOMP] = UP5(CC_SEL(cc, 16));
861 } else if (code == 2) {
862 /* (col0 * (4 - code) + col1 * (code - 1)) / 3 */
863 rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) * 2 + UP5(CC_SEL(cc, 27))) / 3;
864 rgba[GCOMP] = (UP6(CC_SEL(cc, 5)) * 2 + UP6(CC_SEL(cc, 21))) / 3;
865 rgba[BCOMP] = (UP5(CC_SEL(cc, 0)) * 2 + UP5(CC_SEL(cc, 16))) / 3;
866 } else {
867 rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) + 2 * UP5(CC_SEL(cc, 27))) / 3;
868 rgba[GCOMP] = (UP6(CC_SEL(cc, 5)) + 2 * UP6(CC_SEL(cc, 21))) / 3;
869 rgba[BCOMP] = (UP5(CC_SEL(cc, 0)) + 2 * UP5(CC_SEL(cc, 16))) / 3;
870 }
871 if (acode == 0) {
872 rgba[ACOMP] = alpha0;
873 } else if (acode == 1) {
874 rgba[ACOMP] = alpha1;
875 } else if (alpha0 > alpha1) {
876 rgba[ACOMP] = ((8 - acode) * alpha0 + (acode - 1) * alpha1) / 7;
877 } else if (acode == 6) {
878 rgba[ACOMP] = 0;
879 } else if (acode == 7) {
880 rgba[ACOMP] = 255;
881 } else {
882 rgba[ACOMP] = ((6 - acode) * alpha0 + (acode - 1) * alpha1) / 5;
883 }
884}