Glide Plugin GLES2 port from mupen64plus-ae, but with special FrameSkip code
[mupen64plus-pandora.git] / source / gles2glide64 / src / GlideHQ / tc-1.1+ / fxt1.c
CommitLineData
98e75f2d 1/*
2 * FXT1 codec
3 * Version: 1.1
4 *
5 * Copyright (C) 2004 Daniel Borca All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * DANIEL BORCA BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25/* Copyright (C) 2007 Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
26 * Added support for ARGB inputs.
27 */
28
29
30#include <stdlib.h>
31#include <string.h>
32
33#include "types.h"
34#include "internal.h"
35#include "fxt1.h"
36
37
38/***************************************************************************\
39 * FXT1 encoder
40 *
41 * The encoder was built by reversing the decoder,
42 * and is vaguely based on Texus2 by 3dfx. Note that this code
43 * is merely a proof of concept, since it is highly UNoptimized;
44 * moreover, it is sub-optimal due to initial conditions passed
45 * to Lloyd's algorithm (the interpolation modes are even worse).
46\***************************************************************************/
47
48
49#define MAX_COMP 4 /* ever needed maximum number of components in texel */
50#define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
51#define N_TEXELS 32 /* number of texels in a block (always 32) */
52#define LL_N_REP 50 /* number of iterations in lloyd's vq */
53#define LL_RMS_D 10 /* fault tolerance (maximum delta) */
54#define LL_RMS_E 255 /* fault tolerance (maximum error) */
55#define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
56#define ISTBLACK(v) (*((dword *)(v)) == 0)
57#define COPY_4UBV(DST, SRC) *((dword *)(DST)) = *((dword *)(SRC))
58
59
60static int
61fxt1_bestcol (float vec[][MAX_COMP], int nv,
62 byte input[MAX_COMP], int nc)
63{
64 int i, j, best = -1;
65 float err = 1e9; /* big enough */
66
67 for (j = 0; j < nv; j++) {
68 float e = 0.0F;
69 for (i = 0; i < nc; i++) {
70 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
71 }
72 if (e < err) {
73 err = e;
74 best = j;
75 }
76 }
77
78 return best;
79}
80
81
82static int
83fxt1_worst (float vec[MAX_COMP],
84 byte input[N_TEXELS][MAX_COMP], int nc, int n)
85{
86 int i, k, worst = -1;
87 float err = -1.0F; /* small enough */
88
89 for (k = 0; k < n; k++) {
90 float e = 0.0F;
91 for (i = 0; i < nc; i++) {
92 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
93 }
94 if (e > err) {
95 err = e;
96 worst = k;
97 }
98 }
99
100 return worst;
101}
102
103
104static int
105fxt1_variance (double variance[MAX_COMP],
106 byte input[N_TEXELS][MAX_COMP], int nc, int n)
107{
108 int i, k, best = 0;
109 dword sx, sx2;
110 double var, maxvar = -1; /* small enough */
111 double teenth = 1.0 / n;
112
113 for (i = 0; i < nc; i++) {
114 sx = sx2 = 0;
115 for (k = 0; k < n; k++) {
116 int t = input[k][i];
117 sx += t;
118 sx2 += t * t;
119 }
120 var = sx2 * teenth - sx * sx * teenth * teenth;
121 if (maxvar < var) {
122 maxvar = var;
123 best = i;
124 }
125 if (variance) {
126 variance[i] = var;
127 }
128 }
129
130 return best;
131}
132
133
134static int
135fxt1_choose (float vec[][MAX_COMP], int nv,
136 byte input[N_TEXELS][MAX_COMP], int nc, int n)
137{
138#if 0
139 /* Choose colors from a grid.
140 */
141 int i, j;
142
143 for (j = 0; j < nv; j++) {
144 int m = j * (n - 1) / (nv - 1);
145 for (i = 0; i < nc; i++) {
146 vec[j][i] = input[m][i];
147 }
148 }
149#else
150 /* Our solution here is to find the darkest and brightest colors in
151 * the 8x4 tile and use those as the two representative colors.
152 * There are probably better algorithms to use (histogram-based).
153 */
154 int i, j, k;
155#ifndef YUV
156 int minSum = 2000; /* big enough */
157#else
158 int minSum = 2000000;
159#endif
160 int maxSum = -1; /* small enough */
161 int minCol = 0; /* phoudoin: silent compiler! */
162 int maxCol = 0; /* phoudoin: silent compiler! */
163
164 struct {
165 int flag;
166 dword key;
167 int freq;
168 int idx;
169 } hist[N_TEXELS];
170 int lenh = 0;
171
172 memset(hist, 0, sizeof(hist));
173
174 for (k = 0; k < n; k++) {
175 int l;
176 dword key = 0;
177 int sum = 0;
178 for (i = 0; i < nc; i++) {
179 key <<= 8;
180 key |= input[k][i];
181#ifndef YUV
182 sum += input[k][i];
183#else
184 /* RGB to YUV conversion according to CCIR 601 specs
185 * Y = 0.299R+0.587G+0.114B
186 * U = 0.713(R - Y) = 0.500R-0.419G-0.081B
187 * V = 0.564(B - Y) = -0.169R-0.331G+0.500B
188 */
189 sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
190#endif
191 }
192 for (l = 0; l < n; l++) {
193 if (!hist[l].flag) {
194 /* alloc new slot */
195 hist[l].flag = !0;
196 hist[l].key = key;
197 hist[l].freq = 1;
198 hist[l].idx = k;
199 lenh = l + 1;
200 break;
201 } else if (hist[l].key == key) {
202 hist[l].freq++;
203 break;
204 }
205 }
206 if (minSum > sum) {
207 minSum = sum;
208 minCol = k;
209 }
210 if (maxSum < sum) {
211 maxSum = sum;
212 maxCol = k;
213 }
214 }
215
216 if (lenh <= nv) {
217 for (j = 0; j < lenh; j++) {
218 for (i = 0; i < nc; i++) {
219 vec[j][i] = (float)input[hist[j].idx][i];
220 }
221 }
222 for (; j < nv; j++) {
223 for (i = 0; i < nc; i++) {
224 vec[j][i] = vec[0][i];
225 }
226 }
227 return 0;
228 }
229
230 for (j = 0; j < nv; j++) {
231 for (i = 0; i < nc; i++) {
232 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (float)(nv - 1);
233 }
234 }
235#endif
236
237 return !0;
238}
239
240
241static int
242fxt1_lloyd (float vec[][MAX_COMP], int nv,
243 byte input[N_TEXELS][MAX_COMP], int nc, int n)
244{
245 /* Use the generalized lloyd's algorithm for VQ:
246 * find 4 color vectors.
247 *
248 * for each sample color
249 * sort to nearest vector.
250 *
251 * replace each vector with the centroid of it's matching colors.
252 *
253 * repeat until RMS doesn't improve.
254 *
255 * if a color vector has no samples, or becomes the same as another
256 * vector, replace it with the color which is farthest from a sample.
257 *
258 * vec[][MAX_COMP] initial vectors and resulting colors
259 * nv number of resulting colors required
260 * input[N_TEXELS][MAX_COMP] input texels
261 * nc number of components in input / vec
262 * n number of input samples
263 */
264
265 int sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
266 int cnt[MAX_VECT]; /* how many times a certain vector was chosen */
267 float error, lasterror = 1e9;
268
269 int i, j, k, rep;
270
271 /* the quantizer */
272 for (rep = 0; rep < LL_N_REP; rep++) {
273 /* reset sums & counters */
274 for (j = 0; j < nv; j++) {
275 for (i = 0; i < nc; i++) {
276 sum[j][i] = 0;
277 }
278 cnt[j] = 0;
279 }
280 error = 0;
281
282 /* scan whole block */
283 for (k = 0; k < n; k++) {
284#if 1
285 int best = -1;
286 float err = 1e9; /* big enough */
287 /* determine best vector */
288 for (j = 0; j < nv; j++) {
289 float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
290 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
291 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
292 if (nc == 4) {
293 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
294 }
295 if (e < err) {
296 err = e;
297 best = j;
298 }
299 }
300#else
301 int best = fxt1_bestcol(vec, nv, input[k], nc, &err);
302#endif
303 /* add in closest color */
304 for (i = 0; i < nc; i++) {
305 sum[best][i] += input[k][i];
306 }
307 /* mark this vector as used */
308 cnt[best]++;
309 /* accumulate error */
310 error += err;
311 }
312
313 /* check RMS */
314 if ((error < LL_RMS_E) ||
315 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
316 return !0; /* good match */
317 }
318 lasterror = error;
319
320 /* move each vector to the barycenter of its closest colors */
321 for (j = 0; j < nv; j++) {
322 if (cnt[j]) {
323 float div = 1.0F / cnt[j];
324 for (i = 0; i < nc; i++) {
325 vec[j][i] = div * sum[j][i];
326 }
327 } else {
328 /* this vec has no samples or is identical with a previous vec */
329 int worst = fxt1_worst(vec[j], input, nc, n);
330 for (i = 0; i < nc; i++) {
331 vec[j][i] = input[worst][i];
332 }
333 }
334 }
335 }
336
337 return 0; /* could not converge fast enough */
338}
339
340
341static void
342fxt1_quantize_CHROMA (dword *cc,
343 byte input[N_TEXELS][MAX_COMP])
344{
345 const int n_vect = 4; /* 4 base vectors to find */
346 const int n_comp = 3; /* 3 components: R, G, B */
347 float vec[MAX_VECT][MAX_COMP];
348 int i, j, k;
349 qword hi; /* high quadword */
350 dword lohi, lolo; /* low quadword: hi dword, lo dword */
351
352 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
353 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
354 }
355
356 Q_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
357 for (j = n_vect - 1; j >= 0; j--) {
358 for (i = 0; i < n_comp; i++) {
359 /* add in colors */
360 Q_SHL(hi, 5);
361 Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
362 }
363 }
364 ((qword *)cc)[1] = hi;
365
366 lohi = lolo = 0;
367 /* right microtile */
368 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
369 lohi <<= 2;
370 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
371 }
372 /* left microtile */
373 for (; k >= 0; k--) {
374 lolo <<= 2;
375 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
376 }
377 cc[1] = lohi;
378 cc[0] = lolo;
379}
380
381
382static void
383fxt1_quantize_ALPHA0 (dword *cc,
384 byte input[N_TEXELS][MAX_COMP],
385 byte reord[N_TEXELS][MAX_COMP], int n)
386{
387 const int n_vect = 3; /* 3 base vectors to find */
388 const int n_comp = 4; /* 4 components: R, G, B, A */
389 float vec[MAX_VECT][MAX_COMP];
390 int i, j, k;
391 qword hi; /* high quadword */
392 dword lohi, lolo; /* low quadword: hi dword, lo dword */
393
394 /* the last vector indicates zero */
395 for (i = 0; i < n_comp; i++) {
396 vec[n_vect][i] = 0;
397 }
398
399 /* the first n texels in reord are guaranteed to be non-zero */
400 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
401 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
402 }
403
404 Q_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
405 for (j = n_vect - 1; j >= 0; j--) {
406 /* add in alphas */
407 Q_SHL(hi, 5);
408 Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F));
409 }
410 for (j = n_vect - 1; j >= 0; j--) {
411 for (i = 0; i < n_comp - 1; i++) {
412 /* add in colors */
413 Q_SHL(hi, 5);
414 Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
415 }
416 }
417 ((qword *)cc)[1] = hi;
418
419 lohi = lolo = 0;
420 /* right microtile */
421 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
422 lohi <<= 2;
423 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
424 }
425 /* left microtile */
426 for (; k >= 0; k--) {
427 lolo <<= 2;
428 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
429 }
430 cc[1] = lohi;
431 cc[0] = lolo;
432}
433
434
435static void
436fxt1_quantize_ALPHA1 (dword *cc,
437 byte input[N_TEXELS][MAX_COMP])
438{
439 const int n_vect = 3; /* highest vector number in each microtile */
440 const int n_comp = 4; /* 4 components: R, G, B, A */
441 float vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
442 float b, iv[MAX_COMP]; /* interpolation vector */
443 int i, j, k;
444 qword hi; /* high quadword */
445 dword lohi, lolo; /* low quadword: hi dword, lo dword */
446
447 int minSum;
448 int maxSum;
449 int minColL = 0, maxColL = 0;
450 int minColR = 0, maxColR = 0;
451 int sumL = 0, sumR = 0;
452
453 /* Our solution here is to find the darkest and brightest colors in
454 * the 4x4 tile and use those as the two representative colors.
455 * There are probably better algorithms to use (histogram-based).
456 */
457#ifndef YUV
458 minSum = 2000; /* big enough */
459#else
460 minSum = 2000000;
461#endif
462 maxSum = -1; /* small enough */
463 for (k = 0; k < N_TEXELS / 2; k++) {
464 int sum = 0;
465#ifndef YUV
466 for (i = 0; i < n_comp; i++) {
467 sum += input[k][i];
468 }
469#else
470 sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
471#endif
472 if (minSum > sum) {
473 minSum = sum;
474 minColL = k;
475 }
476 if (maxSum < sum) {
477 maxSum = sum;
478 maxColL = k;
479 }
480 sumL += sum;
481 }
482#ifndef YUV
483 minSum = 2000; /* big enough */
484#else
485 minSum = 2000000;
486#endif
487 maxSum = -1; /* small enough */
488 for (; k < N_TEXELS; k++) {
489 int sum = 0;
490#ifndef YUV
491 for (i = 0; i < n_comp; i++) {
492 sum += input[k][i];
493 }
494#else
495 sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
496#endif
497 if (minSum > sum) {
498 minSum = sum;
499 minColR = k;
500 }
501 if (maxSum < sum) {
502 maxSum = sum;
503 maxColR = k;
504 }
505 sumR += sum;
506 }
507
508 /* choose the common vector (yuck!) */
509 {
510 int j1, j2;
511 int v1 = 0, v2 = 0;
512 float err = 1e9; /* big enough */
513 float tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
514 for (i = 0; i < n_comp; i++) {
515 tv[0][i] = input[minColL][i];
516 tv[1][i] = input[maxColL][i];
517 tv[2][i] = input[minColR][i];
518 tv[3][i] = input[maxColR][i];
519 }
520 for (j1 = 0; j1 < 2; j1++) {
521 for (j2 = 2; j2 < 4; j2++) {
522 float e = 0.0F;
523 for (i = 0; i < n_comp; i++) {
524 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
525 }
526 if (e < err) {
527 err = e;
528 v1 = j1;
529 v2 = j2;
530 }
531 }
532 }
533 for (i = 0; i < n_comp; i++) {
534 vec[0][i] = tv[1 - v1][i];
535 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
536 vec[2][i] = tv[5 - v2][i];
537 }
538 }
539
540 /* left microtile */
541 cc[0] = 0;
542 if (minColL != maxColL) {
543 /* compute interpolation vector */
544 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
545
546 /* add in texels */
547 lolo = 0;
548 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
549 int texel;
550 /* interpolate color */
551 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
552 /* add in texel */
553 lolo <<= 2;
554 lolo |= texel;
555 }
556
557 cc[0] = lolo;
558 }
559
560 /* right microtile */
561 cc[1] = 0;
562 if (minColR != maxColR) {
563 /* compute interpolation vector */
564 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
565
566 /* add in texels */
567 lohi = 0;
568 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
569 int texel;
570 /* interpolate color */
571 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
572 /* add in texel */
573 lohi <<= 2;
574 lohi |= texel;
575 }
576
577 cc[1] = lohi;
578 }
579
580 Q_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
581 for (j = n_vect - 1; j >= 0; j--) {
582 /* add in alphas */
583 Q_SHL(hi, 5);
584 Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F));
585 }
586 for (j = n_vect - 1; j >= 0; j--) {
587 for (i = 0; i < n_comp - 1; i++) {
588 /* add in colors */
589 Q_SHL(hi, 5);
590 Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
591 }
592 }
593 ((qword *)cc)[1] = hi;
594}
595
596
597static void
598fxt1_quantize_HI (dword *cc,
599 byte input[N_TEXELS][MAX_COMP],
600 byte reord[N_TEXELS][MAX_COMP], int n)
601{
602 const int n_vect = 6; /* highest vector number */
603 const int n_comp = 3; /* 3 components: R, G, B */
604 float b = 0.0F; /* phoudoin: silent compiler! */
605 float iv[MAX_COMP]; /* interpolation vector */
606 int i, k;
607 dword hihi; /* high quadword: hi dword */
608
609#ifndef YUV
610 int minSum = 2000; /* big enough */
611#else
612 int minSum = 2000000;
613#endif
614 int maxSum = -1; /* small enough */
615 int minCol = 0; /* phoudoin: silent compiler! */
616 int maxCol = 0; /* phoudoin: silent compiler! */
617
618 /* Our solution here is to find the darkest and brightest colors in
619 * the 8x4 tile and use those as the two representative colors.
620 * There are probably better algorithms to use (histogram-based).
621 */
622 for (k = 0; k < n; k++) {
623 int sum = 0;
624#ifndef YUV
625 for (i = 0; i < n_comp; i++) {
626 sum += reord[k][i];
627 }
628#else
629 sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
630#endif
631 if (minSum > sum) {
632 minSum = sum;
633 minCol = k;
634 }
635 if (maxSum < sum) {
636 maxSum = sum;
637 maxCol = k;
638 }
639 }
640
641 hihi = 0; /* cc-hi = "00" */
642 for (i = 0; i < n_comp; i++) {
643 /* add in colors */
644 hihi <<= 5;
645 hihi |= reord[maxCol][i] >> 3;
646 }
647 for (i = 0; i < n_comp; i++) {
648 /* add in colors */
649 hihi <<= 5;
650 hihi |= reord[minCol][i] >> 3;
651 }
652 cc[3] = hihi;
653 cc[0] = cc[1] = cc[2] = 0;
654
655 /* compute interpolation vector */
656 if (minCol != maxCol) {
657 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
658 }
659
660 /* add in texels */
661 for (k = N_TEXELS - 1; k >= 0; k--) {
662 int t = k * 3;
663 dword *kk = (dword *)((byte *)cc + t / 8);
664 int texel = n_vect + 1; /* transparent black */
665
666 if (!ISTBLACK(input[k])) {
667 if (minCol != maxCol) {
668 /* interpolate color */
669 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
670 /* add in texel */
671 kk[0] |= texel << (t & 7);
672 }
673 } else {
674 /* add in texel */
675 kk[0] |= texel << (t & 7);
676 }
677 }
678}
679
680
681static void
682fxt1_quantize_MIXED1 (dword *cc,
683 byte input[N_TEXELS][MAX_COMP])
684{
685 const int n_vect = 2; /* highest vector number in each microtile */
686 const int n_comp = 3; /* 3 components: R, G, B */
687 byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
688 float b, iv[MAX_COMP]; /* interpolation vector */
689 int i, j, k;
690 qword hi; /* high quadword */
691 dword lohi, lolo; /* low quadword: hi dword, lo dword */
692
693 int minSum;
694 int maxSum;
695 int minColL = 0, maxColL = -1;
696 int minColR = 0, maxColR = -1;
697
698 /* Our solution here is to find the darkest and brightest colors in
699 * the 4x4 tile and use those as the two representative colors.
700 * There are probably better algorithms to use (histogram-based).
701 */
702#ifndef YUV
703 minSum = 2000; /* big enough */
704#else
705 minSum = 2000000;
706#endif
707 maxSum = -1; /* small enough */
708 for (k = 0; k < N_TEXELS / 2; k++) {
709 if (!ISTBLACK(input[k])) {
710 int sum = 0;
711#ifndef YUV
712 for (i = 0; i < n_comp; i++) {
713 sum += input[k][i];
714 }
715#else
716 sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
717#endif
718 if (minSum > sum) {
719 minSum = sum;
720 minColL = k;
721 }
722 if (maxSum < sum) {
723 maxSum = sum;
724 maxColL = k;
725 }
726 }
727 }
728#ifndef YUV
729 minSum = 2000; /* big enough */
730#else
731 minSum = 2000000;
732#endif
733 maxSum = -1; /* small enough */
734 for (; k < N_TEXELS; k++) {
735 if (!ISTBLACK(input[k])) {
736 int sum = 0;
737#ifndef YUV
738 for (i = 0; i < n_comp; i++) {
739 sum += input[k][i];
740 }
741#else
742 sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
743#endif
744 if (minSum > sum) {
745 minSum = sum;
746 minColR = k;
747 }
748 if (maxSum < sum) {
749 maxSum = sum;
750 maxColR = k;
751 }
752 }
753 }
754
755 /* left microtile */
756 if (maxColL == -1) {
757 /* all transparent black */
758 cc[0] = 0xFFFFFFFF;
759 for (i = 0; i < n_comp; i++) {
760 vec[0][i] = 0;
761 vec[1][i] = 0;
762 }
763 } else {
764 cc[0] = 0;
765 for (i = 0; i < n_comp; i++) {
766 vec[0][i] = input[minColL][i];
767 vec[1][i] = input[maxColL][i];
768 }
769 if (minColL != maxColL) {
770 /* compute interpolation vector */
771 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
772
773 /* add in texels */
774 lolo = 0;
775 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
776 int texel = n_vect + 1; /* transparent black */
777 if (!ISTBLACK(input[k])) {
778 /* interpolate color */
779 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
780 }
781 /* add in texel */
782 lolo <<= 2;
783 lolo |= texel;
784 }
785 cc[0] = lolo;
786 }
787 }
788
789 /* right microtile */
790 if (maxColR == -1) {
791 /* all transparent black */
792 cc[1] = 0xFFFFFFFF;
793 for (i = 0; i < n_comp; i++) {
794 vec[2][i] = 0;
795 vec[3][i] = 0;
796 }
797 } else {
798 cc[1] = 0;
799 for (i = 0; i < n_comp; i++) {
800 vec[2][i] = input[minColR][i];
801 vec[3][i] = input[maxColR][i];
802 }
803 if (minColR != maxColR) {
804 /* compute interpolation vector */
805 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
806
807 /* add in texels */
808 lohi = 0;
809 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
810 int texel = n_vect + 1; /* transparent black */
811 if (!ISTBLACK(input[k])) {
812 /* interpolate color */
813 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
814 }
815 /* add in texel */
816 lohi <<= 2;
817 lohi |= texel;
818 }
819 cc[1] = lohi;
820 }
821 }
822
823 Q_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
824 for (j = 2 * 2 - 1; j >= 0; j--) {
825 for (i = 0; i < n_comp; i++) {
826 /* add in colors */
827 Q_SHL(hi, 5);
828 Q_OR32(hi, vec[j][i] >> 3);
829 }
830 }
831 ((qword *)cc)[1] = hi;
832}
833
834
835static void
836fxt1_quantize_MIXED0 (dword *cc,
837 byte input[N_TEXELS][MAX_COMP])
838{
839 const int n_vect = 3; /* highest vector number in each microtile */
840 const int n_comp = 3; /* 3 components: R, G, B */
841 byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
842 float b, iv[MAX_COMP]; /* interpolation vector */
843 int i, j, k;
844 qword hi; /* high quadword */
845 dword lohi, lolo; /* low quadword: hi dword, lo dword */
846
847 int minColL = 0, maxColL = 0;
848 int minColR = 0, maxColR = 0;
849#if 0
850 int minSum;
851 int maxSum;
852
853 /* Our solution here is to find the darkest and brightest colors in
854 * the 4x4 tile and use those as the two representative colors.
855 * There are probably better algorithms to use (histogram-based).
856 */
857#ifndef YUV
858 minSum = 2000; /* big enough */
859#else
860 minSum = 2000000;
861#endif
862 maxSum = -1; /* small enough */
863 for (k = 0; k < N_TEXELS / 2; k++) {
864 int sum = 0;
865#ifndef YUV
866 for (i = 0; i < n_comp; i++) {
867 sum += input[k][i];
868 }
869#else
870 sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
871#endif
872 if (minSum > sum) {
873 minSum = sum;
874 minColL = k;
875 }
876 if (maxSum < sum) {
877 maxSum = sum;
878 maxColL = k;
879 }
880 }
881 minSum = 2000; /* big enough */
882 maxSum = -1; /* small enough */
883 for (; k < N_TEXELS; k++) {
884 int sum = 0;
885#ifndef YUV
886 for (i = 0; i < n_comp; i++) {
887 sum += input[k][i];
888 }
889#else
890 sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
891#endif
892 if (minSum > sum) {
893 minSum = sum;
894 minColR = k;
895 }
896 if (maxSum < sum) {
897 maxSum = sum;
898 maxColR = k;
899 }
900 }
901#else
902 int minVal;
903 int maxVal;
904 int maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
905 int maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
906
907 /* Scan the channel with max variance for lo & hi
908 * and use those as the two representative colors.
909 */
910 minVal = 2000; /* big enough */
911 maxVal = -1; /* small enough */
912 for (k = 0; k < N_TEXELS / 2; k++) {
913 int t = input[k][maxVarL];
914 if (minVal > t) {
915 minVal = t;
916 minColL = k;
917 }
918 if (maxVal < t) {
919 maxVal = t;
920 maxColL = k;
921 }
922 }
923 minVal = 2000; /* big enough */
924 maxVal = -1; /* small enough */
925 for (; k < N_TEXELS; k++) {
926 int t = input[k][maxVarR];
927 if (minVal > t) {
928 minVal = t;
929 minColR = k;
930 }
931 if (maxVal < t) {
932 maxVal = t;
933 maxColR = k;
934 }
935 }
936#endif
937
938 /* left microtile */
939 cc[0] = 0;
940 for (i = 0; i < n_comp; i++) {
941 vec[0][i] = input[minColL][i];
942 vec[1][i] = input[maxColL][i];
943 }
944 if (minColL != maxColL) {
945 /* compute interpolation vector */
946 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
947
948 /* add in texels */
949 lolo = 0;
950 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
951 int texel;
952 /* interpolate color */
953 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
954 /* add in texel */
955 lolo <<= 2;
956 lolo |= texel;
957 }
958
959 /* funky encoding for LSB of green */
960 if ((int)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
961 for (i = 0; i < n_comp; i++) {
962 vec[1][i] = input[minColL][i];
963 vec[0][i] = input[maxColL][i];
964 }
965 lolo = ~lolo;
966 }
967
968 cc[0] = lolo;
969 }
970
971 /* right microtile */
972 cc[1] = 0;
973 for (i = 0; i < n_comp; i++) {
974 vec[2][i] = input[minColR][i];
975 vec[3][i] = input[maxColR][i];
976 }
977 if (minColR != maxColR) {
978 /* compute interpolation vector */
979 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
980
981 /* add in texels */
982 lohi = 0;
983 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
984 int texel;
985 /* interpolate color */
986 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
987 /* add in texel */
988 lohi <<= 2;
989 lohi |= texel;
990 }
991
992 /* funky encoding for LSB of green */
993 if ((int)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
994 for (i = 0; i < n_comp; i++) {
995 vec[3][i] = input[minColR][i];
996 vec[2][i] = input[maxColR][i];
997 }
998 lohi = ~lohi;
999 }
1000
1001 cc[1] = lohi;
1002 }
1003
1004 Q_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1005 for (j = 2 * 2 - 1; j >= 0; j--) {
1006 for (i = 0; i < n_comp; i++) {
1007 /* add in colors */
1008 Q_SHL(hi, 5);
1009 Q_OR32(hi, vec[j][i] >> 3);
1010 }
1011 }
1012 ((qword *)cc)[1] = hi;
1013}
1014
1015
1016static void
1017fxt1_quantize (dword *cc, const byte *lines[], int comps)
1018{
1019 int trualpha;
1020 byte reord[N_TEXELS][MAX_COMP];
1021
1022 byte input[N_TEXELS][MAX_COMP];
1023#ifndef ARGB
1024 int i;
1025#endif
1026 int k, l;
1027
1028 if (comps == 3) {
1029 /* make the whole block opaque */
1030 memset(input, -1, sizeof(input));
1031 }
1032
1033 /* 8 texels each line */
1034#ifndef ARGB
1035 for (l = 0; l < 4; l++) {
1036 for (k = 0; k < 4; k++) {
1037 for (i = 0; i < comps; i++) {
1038 input[k + l * 4][i] = *lines[l]++;
1039 }
1040 }
1041 for (; k < 8; k++) {
1042 for (i = 0; i < comps; i++) {
1043 input[k + l * 4 + 12][i] = *lines[l]++;
1044 }
1045 }
1046 }
1047#else
1048 /* H.Morii - support for ARGB inputs */
1049 for (l = 0; l < 4; l++) {
1050 for (k = 0; k < 4; k++) {
1051 input[k + l * 4][2] = *lines[l]++;
1052 input[k + l * 4][1] = *lines[l]++;
1053 input[k + l * 4][0] = *lines[l]++;
1054 if (comps == 4) input[k + l * 4][3] = *lines[l]++;
1055 }
1056 for (; k < 8; k++) {
1057 input[k + l * 4 + 12][2] = *lines[l]++;
1058 input[k + l * 4 + 12][1] = *lines[l]++;
1059 input[k + l * 4 + 12][0] = *lines[l]++;
1060 if (comps == 4) input[k + l * 4 + 12][3] = *lines[l]++;
1061 }
1062 }
1063#endif
1064
1065 /* block layout:
1066 * 00, 01, 02, 03, 08, 09, 0a, 0b
1067 * 10, 11, 12, 13, 18, 19, 1a, 1b
1068 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1069 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1070 */
1071
1072 /* [dBorca]
1073 * stupidity flows forth from this
1074 */
1075 l = N_TEXELS;
1076 trualpha = 0;
1077 if (comps == 4) {
1078 /* skip all transparent black texels */
1079 l = 0;
1080 for (k = 0; k < N_TEXELS; k++) {
1081 /* test all components against 0 */
1082 if (!ISTBLACK(input[k])) {
1083 /* texel is not transparent black */
1084 COPY_4UBV(reord[l], input[k]);
1085 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1086 /* non-opaque texel */
1087 trualpha = !0;
1088 }
1089 l++;
1090 }
1091 }
1092 }
1093
1094#if 0
1095 if (trualpha) {
1096 fxt1_quantize_ALPHA0(cc, input, reord, l);
1097 } else if (l == 0) {
1098 cc[0] = cc[1] = cc[2] = -1;
1099 cc[3] = 0;
1100 } else if (l < N_TEXELS) {
1101 fxt1_quantize_HI(cc, input, reord, l);
1102 } else {
1103 fxt1_quantize_CHROMA(cc, input);
1104 }
1105 (void)fxt1_quantize_ALPHA1;
1106 (void)fxt1_quantize_MIXED1;
1107 (void)fxt1_quantize_MIXED0;
1108#else
1109 if (trualpha) {
1110 fxt1_quantize_ALPHA1(cc, input);
1111 } else if (l == 0) {
1112 cc[0] = cc[1] = cc[2] = 0xFFFFFFFF;
1113 cc[3] = 0;
1114 } else if (l < N_TEXELS) {
1115 fxt1_quantize_MIXED1(cc, input);
1116 } else {
1117 fxt1_quantize_MIXED0(cc, input);
1118 }
1119 (void)fxt1_quantize_ALPHA0;
1120 (void)fxt1_quantize_HI;
1121 (void)fxt1_quantize_CHROMA;
1122#endif
1123}
1124
1125
1126TAPI int TAPIENTRY
1127fxt1_encode (int width, int height, int comps,
1128 const void *source, int srcRowStride,
1129 void *dest, int destRowStride)
1130{
1131 int x, y;
1132 const byte *data;
1133 dword *encoded = (dword *)dest;
1134 void *newSource = NULL;
1135
1136 /* Replicate image if width is not M8 or height is not M4 */
1137 if ((width & 7) | (height & 3)) {
1138 int newWidth = (width + 7) & ~7;
1139 int newHeight = (height + 3) & ~3;
1140 newSource = malloc(comps * newWidth * newHeight * sizeof(byte *));
1141 _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1142 comps, (const byte *)source,
1143 srcRowStride, (byte *)newSource);
1144 source = newSource;
1145 width = newWidth;
1146 height = newHeight;
1147 srcRowStride = comps * newWidth;
1148 }
1149
1150 data = (const byte *)source;
1151 destRowStride = (destRowStride - width * 2) / 4;
1152 for (y = 0; y < height; y += 4) {
1153 unsigned int offs = 0 + (y + 0) * srcRowStride;
1154 for (x = 0; x < width; x += 8) {
1155 const byte *lines[4];
1156 lines[0] = &data[offs];
1157 lines[1] = lines[0] + srcRowStride;
1158 lines[2] = lines[1] + srcRowStride;
1159 lines[3] = lines[2] + srcRowStride;
1160 offs += 8 * comps;
1161 fxt1_quantize(encoded, lines, comps);
1162 /* 128 bits per 8x4 block */
1163 encoded += 4;
1164 }
1165 encoded += destRowStride;
1166 }
1167
1168 if (newSource != NULL) {
1169 free(newSource);
1170 }
1171
1172 return 0;
1173}
1174
1175
1176/***************************************************************************\
1177 * FXT1 decoder
1178 *
1179 * The decoder is based on GL_3DFX_texture_compression_FXT1
1180 * specification and serves as a concept for the encoder.
1181\***************************************************************************/
1182
1183
1184/* lookup table for scaling 5 bit colors up to 8 bits */
1185static const byte _rgb_scale_5[] = {
1186 0, 8, 16, 25, 33, 41, 49, 58,
1187 66, 74, 82, 90, 99, 107, 115, 123,
1188 132, 140, 148, 156, 165, 173, 181, 189,
1189 197, 206, 214, 222, 230, 239, 247, 255
1190};
1191
1192/* lookup table for scaling 6 bit colors up to 8 bits */
1193static const byte _rgb_scale_6[] = {
1194 0, 4, 8, 12, 16, 20, 24, 28,
1195 32, 36, 40, 45, 49, 53, 57, 61,
1196 65, 69, 73, 77, 81, 85, 89, 93,
1197 97, 101, 105, 109, 113, 117, 121, 125,
1198 130, 134, 138, 142, 146, 150, 154, 158,
1199 162, 166, 170, 174, 178, 182, 186, 190,
1200 194, 198, 202, 206, 210, 215, 219, 223,
1201 227, 231, 235, 239, 243, 247, 251, 255
1202};
1203
1204
1205#define CC_SEL(cc, which) (((dword *)(cc))[(which) / 32] >> ((which) & 31))
1206#define UP5(c) _rgb_scale_5[(c) & 31]
1207#define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1208#define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1209#define ZERO_4UBV(v) *((dword *)(v)) = 0
1210
1211
1212static void
1213fxt1_decode_1HI (const byte *code, int t, byte *rgba)
1214{
1215 const dword *cc;
1216
1217 t *= 3;
1218 cc = (const dword *)(code + t / 8);
1219 t = (cc[0] >> (t & 7)) & 7;
1220
1221 if (t == 7) {
1222 ZERO_4UBV(rgba);
1223 } else {
1224 cc = (const dword *)(code + 12);
1225 if (t == 0) {
1226 rgba[BCOMP] = UP5(CC_SEL(cc, 0));
1227 rgba[GCOMP] = UP5(CC_SEL(cc, 5));
1228 rgba[RCOMP] = UP5(CC_SEL(cc, 10));
1229 } else if (t == 6) {
1230 rgba[BCOMP] = UP5(CC_SEL(cc, 15));
1231 rgba[GCOMP] = UP5(CC_SEL(cc, 20));
1232 rgba[RCOMP] = UP5(CC_SEL(cc, 25));
1233 } else {
1234 rgba[BCOMP] = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1235 rgba[GCOMP] = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1236 rgba[RCOMP] = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1237 }
1238 rgba[ACOMP] = 255;
1239 }
1240}
1241
1242
1243static void
1244fxt1_decode_1CHROMA (const byte *code, int t, byte *rgba)
1245{
1246 const dword *cc;
1247 dword kk;
1248
1249 cc = (const dword *)code;
1250 if (t & 16) {
1251 cc++;
1252 t &= 15;
1253 }
1254 t = (cc[0] >> (t * 2)) & 3;
1255
1256 t *= 15;
1257 cc = (const dword *)(code + 8 + t / 8);
1258 kk = cc[0] >> (t & 7);
1259 rgba[BCOMP] = UP5(kk);
1260 rgba[GCOMP] = UP5(kk >> 5);
1261 rgba[RCOMP] = UP5(kk >> 10);
1262 rgba[ACOMP] = 255;
1263}
1264
1265
1266static void
1267fxt1_decode_1MIXED (const byte *code, int t, byte *rgba)
1268{
1269 const dword *cc;
1270 int col[2][3];
1271 int glsb, selb;
1272
1273 cc = (const dword *)code;
1274 if (t & 16) {
1275 t &= 15;
1276 t = (cc[1] >> (t * 2)) & 3;
1277 /* col 2 */
1278 col[0][BCOMP] = (*(const dword *)(code + 11)) >> 6;
1279 col[0][GCOMP] = CC_SEL(cc, 99);
1280 col[0][RCOMP] = CC_SEL(cc, 104);
1281 /* col 3 */
1282 col[1][BCOMP] = CC_SEL(cc, 109);
1283 col[1][GCOMP] = CC_SEL(cc, 114);
1284 col[1][RCOMP] = CC_SEL(cc, 119);
1285 glsb = CC_SEL(cc, 126);
1286 selb = CC_SEL(cc, 33);
1287 } else {
1288 t = (cc[0] >> (t * 2)) & 3;
1289 /* col 0 */
1290 col[0][BCOMP] = CC_SEL(cc, 64);
1291 col[0][GCOMP] = CC_SEL(cc, 69);
1292 col[0][RCOMP] = CC_SEL(cc, 74);
1293 /* col 1 */
1294 col[1][BCOMP] = CC_SEL(cc, 79);
1295 col[1][GCOMP] = CC_SEL(cc, 84);
1296 col[1][RCOMP] = CC_SEL(cc, 89);
1297 glsb = CC_SEL(cc, 125);
1298 selb = CC_SEL(cc, 1);
1299 }
1300
1301 if (CC_SEL(cc, 124) & 1) {
1302 /* alpha[0] == 1 */
1303
1304 if (t == 3) {
1305 ZERO_4UBV(rgba);
1306 } else {
1307 if (t == 0) {
1308 rgba[BCOMP] = UP5(col[0][BCOMP]);
1309 rgba[GCOMP] = UP5(col[0][GCOMP]);
1310 rgba[RCOMP] = UP5(col[0][RCOMP]);
1311 } else if (t == 2) {
1312 rgba[BCOMP] = UP5(col[1][BCOMP]);
1313 rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
1314 rgba[RCOMP] = UP5(col[1][RCOMP]);
1315 } else {
1316 rgba[BCOMP] = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1317 rgba[GCOMP] = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1318 rgba[RCOMP] = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1319 }
1320 rgba[ACOMP] = 255;
1321 }
1322 } else {
1323 /* alpha[0] == 0 */
1324
1325 if (t == 0) {
1326 rgba[BCOMP] = UP5(col[0][BCOMP]);
1327 rgba[GCOMP] = UP6(col[0][GCOMP], glsb ^ selb);
1328 rgba[RCOMP] = UP5(col[0][RCOMP]);
1329 } else if (t == 3) {
1330 rgba[BCOMP] = UP5(col[1][BCOMP]);
1331 rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
1332 rgba[RCOMP] = UP5(col[1][RCOMP]);
1333 } else {
1334 rgba[BCOMP] = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1335 rgba[GCOMP] = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1336 UP6(col[1][GCOMP], glsb));
1337 rgba[RCOMP] = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1338 }
1339 rgba[ACOMP] = 255;
1340 }
1341}
1342
1343
1344static void
1345fxt1_decode_1ALPHA (const byte *code, int t, byte *rgba)
1346{
1347 const dword *cc;
1348
1349 cc = (const dword *)code;
1350 if (CC_SEL(cc, 124) & 1) {
1351 /* lerp == 1 */
1352 int col0[4];
1353
1354 if (t & 16) {
1355 t &= 15;
1356 t = (cc[1] >> (t * 2)) & 3;
1357 /* col 2 */
1358 col0[BCOMP] = (*(const dword *)(code + 11)) >> 6;
1359 col0[GCOMP] = CC_SEL(cc, 99);
1360 col0[RCOMP] = CC_SEL(cc, 104);
1361 col0[ACOMP] = CC_SEL(cc, 119);
1362 } else {
1363 t = (cc[0] >> (t * 2)) & 3;
1364 /* col 0 */
1365 col0[BCOMP] = CC_SEL(cc, 64);
1366 col0[GCOMP] = CC_SEL(cc, 69);
1367 col0[RCOMP] = CC_SEL(cc, 74);
1368 col0[ACOMP] = CC_SEL(cc, 109);
1369 }
1370
1371 if (t == 0) {
1372 rgba[BCOMP] = UP5(col0[BCOMP]);
1373 rgba[GCOMP] = UP5(col0[GCOMP]);
1374 rgba[RCOMP] = UP5(col0[RCOMP]);
1375 rgba[ACOMP] = UP5(col0[ACOMP]);
1376 } else if (t == 3) {
1377 rgba[BCOMP] = UP5(CC_SEL(cc, 79));
1378 rgba[GCOMP] = UP5(CC_SEL(cc, 84));
1379 rgba[RCOMP] = UP5(CC_SEL(cc, 89));
1380 rgba[ACOMP] = UP5(CC_SEL(cc, 114));
1381 } else {
1382 rgba[BCOMP] = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1383 rgba[GCOMP] = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1384 rgba[RCOMP] = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1385 rgba[ACOMP] = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1386 }
1387 } else {
1388 /* lerp == 0 */
1389
1390 if (t & 16) {
1391 cc++;
1392 t &= 15;
1393 }
1394 t = (cc[0] >> (t * 2)) & 3;
1395
1396 if (t == 3) {
1397 ZERO_4UBV(rgba);
1398 } else {
1399 dword kk;
1400 cc = (const dword *)code;
1401 rgba[ACOMP] = UP5(cc[3] >> (t * 5 + 13));
1402 t *= 15;
1403 cc = (const dword *)(code + 8 + t / 8);
1404 kk = cc[0] >> (t & 7);
1405 rgba[BCOMP] = UP5(kk);
1406 rgba[GCOMP] = UP5(kk >> 5);
1407 rgba[RCOMP] = UP5(kk >> 10);
1408 }
1409 }
1410}
1411
1412
1413TAPI void TAPIENTRY
1414fxt1_decode_1 (const void *texture, int stride,
1415 int i, int j, byte *rgba)
1416{
1417 static void (*decode_1[]) (const byte *, int, byte *) = {
1418 fxt1_decode_1HI, /* cc-high = "00?" */
1419 fxt1_decode_1HI, /* cc-high = "00?" */
1420 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1421 fxt1_decode_1ALPHA, /* alpha = "011" */
1422 fxt1_decode_1MIXED, /* mixed = "1??" */
1423 fxt1_decode_1MIXED, /* mixed = "1??" */
1424 fxt1_decode_1MIXED, /* mixed = "1??" */
1425 fxt1_decode_1MIXED /* mixed = "1??" */
1426 };
1427
1428 const byte *code = (const byte *)texture +
1429 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1430 int mode = CC_SEL(code, 125);
1431 int t = i & 7;
1432
1433 if (t & 4) {
1434 t += 12;
1435 }
1436 t += (j & 3) * 4;
1437
1438 decode_1[mode](code, t, rgba);
1439
1440#if VERBOSE
1441 {
1442 extern int cc_chroma;
1443 extern int cc_alpha;
1444 extern int cc_high;
1445 extern int cc_mixed;
1446 static int *cctype[] = {
1447 &cc_high,
1448 &cc_high,
1449 &cc_chroma,
1450 &cc_alpha,
1451 &cc_mixed,
1452 &cc_mixed,
1453 &cc_mixed,
1454 &cc_mixed
1455 };
1456 (*cctype[mode])++;
1457 }
1458#endif
1459}