ALL: Huge upstream synch + PerRom DelaySI & CountPerOp parameters
[mupen64plus-pandora.git] / source / gles2glide64 / src / GlideHQ / tc-1.1+ / fxt1.c
CommitLineData
98e75f2d 1/*
2d262872 2 * Mesa 3-D graphics library
98e75f2d 3 *
2d262872 4 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
98e75f2d 5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
2d262872 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
98e75f2d 23 */
24
2d262872 25/**
26 * \file texcompress_fxt1.c
27 * GL_3DFX_texture_compression_FXT1 support.
98e75f2d 28 */
29
30
31#include <stdlib.h>
32#include <string.h>
2d262872 33#include <assert.h>
98e75f2d 34
35#include "types.h"
36#include "internal.h"
37#include "fxt1.h"
38
39
40/***************************************************************************\
41 * FXT1 encoder
42 *
43 * The encoder was built by reversing the decoder,
44 * and is vaguely based on Texus2 by 3dfx. Note that this code
45 * is merely a proof of concept, since it is highly UNoptimized;
46 * moreover, it is sub-optimal due to initial conditions passed
47 * to Lloyd's algorithm (the interpolation modes are even worse).
48\***************************************************************************/
49
50
51#define MAX_COMP 4 /* ever needed maximum number of components in texel */
52#define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
53#define N_TEXELS 32 /* number of texels in a block (always 32) */
54#define LL_N_REP 50 /* number of iterations in lloyd's vq */
55#define LL_RMS_D 10 /* fault tolerance (maximum delta) */
56#define LL_RMS_E 255 /* fault tolerance (maximum error) */
57#define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
58#define ISTBLACK(v) (*((dword *)(v)) == 0)
98e75f2d 59
60
61static int
62fxt1_bestcol (float vec[][MAX_COMP], int nv,
2d262872 63 byte input[MAX_COMP], int nc)
98e75f2d 64{
2d262872 65 int i, j, best = -1;
66 float err = 1e9; /* big enough */
67
68 for (j = 0; j < nv; j++) {
69 float e = 0.0F;
70 for (i = 0; i < nc; i++) {
71 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
72 }
73 if (e < err) {
74 err = e;
75 best = j;
76 }
77 }
78
79 return best;
98e75f2d 80}
81
82
83static int
84fxt1_worst (float vec[MAX_COMP],
2d262872 85 byte input[N_TEXELS][MAX_COMP], int nc, int n)
98e75f2d 86{
2d262872 87 int i, k, worst = -1;
88 float err = -1.0F; /* small enough */
89
90 for (k = 0; k < n; k++) {
91 float e = 0.0F;
92 for (i = 0; i < nc; i++) {
93 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
94 }
95 if (e > err) {
96 err = e;
97 worst = k;
98 }
99 }
100
101 return worst;
98e75f2d 102}
103
104
105static int
106fxt1_variance (double variance[MAX_COMP],
2d262872 107 byte input[N_TEXELS][MAX_COMP], int nc, int n)
98e75f2d 108{
2d262872 109 int i, k, best = 0;
110 int sx, sx2;
111 double var, maxvar = -1; /* small enough */
112 double teenth = 1.0 / n;
113
114 for (i = 0; i < nc; i++) {
115 sx = sx2 = 0;
116 for (k = 0; k < n; k++) {
117 int t = input[k][i];
118 sx += t;
119 sx2 += t * t;
120 }
121 var = sx2 * teenth - sx * sx * teenth * teenth;
122 if (maxvar < var) {
123 maxvar = var;
124 best = i;
125 }
126 if (variance) {
127 variance[i] = var;
128 }
129 }
130
131 return best;
98e75f2d 132}
133
134
135static int
136fxt1_choose (float vec[][MAX_COMP], int nv,
2d262872 137 byte input[N_TEXELS][MAX_COMP], int nc, int n)
98e75f2d 138{
139#if 0
2d262872 140 /* Choose colors from a grid.
141 */
142 int i, j;
143
144 for (j = 0; j < nv; j++) {
145 int m = j * (n - 1) / (nv - 1);
146 for (i = 0; i < nc; i++) {
147 vec[j][i] = input[m][i];
148 }
149 }
98e75f2d 150#else
2d262872 151 /* Our solution here is to find the darkest and brightest colors in
152 * the 8x4 tile and use those as the two representative colors.
153 * There are probably better algorithms to use (histogram-based).
154 */
155 int i, j, k;
156 int minSum = 2000; /* big enough */
157 int maxSum = -1; /* small enough */
158 int minCol = 0; /* phoudoin: silent compiler! */
159 int maxCol = 0; /* phoudoin: silent compiler! */
160
161 struct {
162 int flag;
163 int key;
164 int freq;
165 int idx;
166 } hist[N_TEXELS];
167 int lenh = 0;
168
169 memset(hist, 0, sizeof(hist));
170
171 for (k = 0; k < n; k++) {
172 int l;
173 int key = 0;
174 int sum = 0;
175 for (i = 0; i < nc; i++) {
176 key <<= 8;
177 key |= input[k][i];
178 sum += input[k][i];
179 }
180 for (l = 0; l < n; l++) {
181 if (!hist[l].flag) {
182 /* alloc new slot */
183 hist[l].flag = !0;
184 hist[l].key = key;
185 hist[l].freq = 1;
186 hist[l].idx = k;
187 lenh = l + 1;
188 break;
189 } else if (hist[l].key == key) {
190 hist[l].freq++;
191 break;
192 }
193 }
194 if (minSum > sum) {
195 minSum = sum;
196 minCol = k;
197 }
198 if (maxSum < sum) {
199 maxSum = sum;
200 maxCol = k;
201 }
202 }
203
204 if (lenh <= nv) {
205 for (j = 0; j < lenh; j++) {
206 for (i = 0; i < nc; i++) {
207 vec[j][i] = (float)input[hist[j].idx][i];
208 }
209 }
210 for (; j < nv; j++) {
211 for (i = 0; i < nc; i++) {
212 vec[j][i] = vec[0][i];
213 }
214 }
215 return 0;
216 }
217
218 for (j = 0; j < nv; j++) {
219 for (i = 0; i < nc; i++) {
220 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (float)(nv - 1);
221 }
222 }
98e75f2d 223#endif
224
2d262872 225 return !0;
98e75f2d 226}
227
228
229static int
230fxt1_lloyd (float vec[][MAX_COMP], int nv,
2d262872 231 byte input[N_TEXELS][MAX_COMP], int nc, int n)
98e75f2d 232{
2d262872 233 /* Use the generalized lloyd's algorithm for VQ:
234 * find 4 color vectors.
235 *
236 * for each sample color
237 * sort to nearest vector.
238 *
239 * replace each vector with the centroid of its matching colors.
240 *
241 * repeat until RMS doesn't improve.
242 *
243 * if a color vector has no samples, or becomes the same as another
244 * vector, replace it with the color which is farthest from a sample.
245 *
246 * vec[][MAX_COMP] initial vectors and resulting colors
247 * nv number of resulting colors required
248 * input[N_TEXELS][MAX_COMP] input texels
249 * nc number of components in input / vec
250 * n number of input samples
251 */
252
253 int sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
254 int cnt[MAX_VECT]; /* how many times a certain vector was chosen */
255 float error, lasterror = 1e9;
256
257 int i, j, k, rep;
258
259 /* the quantizer */
260 for (rep = 0; rep < LL_N_REP; rep++) {
261 /* reset sums & counters */
262 for (j = 0; j < nv; j++) {
263 for (i = 0; i < nc; i++) {
264 sum[j][i] = 0;
265 }
266 cnt[j] = 0;
267 }
268 error = 0;
269
270 /* scan whole block */
271 for (k = 0; k < n; k++) {
98e75f2d 272#if 1
2d262872 273 int best = -1;
274 float err = 1e9; /* big enough */
275 /* determine best vector */
276 for (j = 0; j < nv; j++) {
277 float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
278 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
279 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
280 if (nc == 4) {
281 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
282 }
283 if (e < err) {
284 err = e;
285 best = j;
286 }
287 }
98e75f2d 288#else
2d262872 289 int best = fxt1_bestcol(vec, nv, input[k], nc, &err);
98e75f2d 290#endif
2d262872 291 assert(best >= 0);
292 /* add in closest color */
293 for (i = 0; i < nc; i++) {
294 sum[best][i] += input[k][i];
295 }
296 /* mark this vector as used */
297 cnt[best]++;
298 /* accumulate error */
299 error += err;
300 }
301
302 /* check RMS */
303 if ((error < LL_RMS_E) ||
304 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
305 return !0; /* good match */
306 }
307 lasterror = error;
308
309 /* move each vector to the barycenter of its closest colors */
310 for (j = 0; j < nv; j++) {
311 if (cnt[j]) {
312 float div = 1.0F / cnt[j];
313 for (i = 0; i < nc; i++) {
314 vec[j][i] = div * sum[j][i];
315 }
316 } else {
317 /* this vec has no samples or is identical with a previous vec */
318 int worst = fxt1_worst(vec[j], input, nc, n);
319 for (i = 0; i < nc; i++) {
320 vec[j][i] = input[worst][i];
321 }
322 }
323 }
324 }
325
326 return 0; /* could not converge fast enough */
98e75f2d 327}
328
329
330static void
331fxt1_quantize_CHROMA (dword *cc,
2d262872 332 byte input[N_TEXELS][MAX_COMP])
98e75f2d 333{
2d262872 334 const int n_vect = 4; /* 4 base vectors to find */
335 const int n_comp = 3; /* 3 components: R, G, B */
336 float vec[MAX_VECT][MAX_COMP];
337 int i, j, k;
338 qword hi; /* high quadword */
339 dword lohi, lolo; /* low quadword: hi dword, lo dword */
340
341 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
342 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
343 }
344
345 Q_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
346 for (j = n_vect - 1; j >= 0; j--) {
347 for (i = 0; i < n_comp; i++) {
348 /* add in colors */
349 Q_SHL(hi, 5);
350 Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
351 }
352 }
353 ((qword *)cc)[1] = hi;
354
355 lohi = lolo = 0;
356 /* right microtile */
357 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
358 lohi <<= 2;
359 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
360 }
361 /* left microtile */
362 for (; k >= 0; k--) {
363 lolo <<= 2;
364 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
365 }
366 cc[1] = lohi;
367 cc[0] = lolo;
98e75f2d 368}
369
370
371static void
372fxt1_quantize_ALPHA0 (dword *cc,
2d262872 373 byte input[N_TEXELS][MAX_COMP],
374 byte reord[N_TEXELS][MAX_COMP], int n)
98e75f2d 375{
2d262872 376 const int n_vect = 3; /* 3 base vectors to find */
377 const int n_comp = 4; /* 4 components: R, G, B, A */
378 float vec[MAX_VECT][MAX_COMP];
379 int i, j, k;
380 qword hi; /* high quadword */
381 dword lohi, lolo; /* low quadword: hi dword, lo dword */
382
383 /* the last vector indicates zero */
384 for (i = 0; i < n_comp; i++) {
385 vec[n_vect][i] = 0;
386 }
387
388 /* the first n texels in reord are guaranteed to be non-zero */
389 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
390 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
391 }
392
393 Q_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
394 for (j = n_vect - 1; j >= 0; j--) {
395 /* add in alphas */
396 Q_SHL(hi, 5);
397 Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F));
398 }
399 for (j = n_vect - 1; j >= 0; j--) {
400 for (i = 0; i < n_comp - 1; i++) {
401 /* add in colors */
402 Q_SHL(hi, 5);
403 Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
404 }
405 }
406 ((qword *)cc)[1] = hi;
407
408 lohi = lolo = 0;
409 /* right microtile */
410 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
411 lohi <<= 2;
412 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
413 }
414 /* left microtile */
415 for (; k >= 0; k--) {
416 lolo <<= 2;
417 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
418 }
419 cc[1] = lohi;
420 cc[0] = lolo;
98e75f2d 421}
422
423
424static void
425fxt1_quantize_ALPHA1 (dword *cc,
2d262872 426 byte input[N_TEXELS][MAX_COMP])
98e75f2d 427{
2d262872 428 const int n_vect = 3; /* highest vector number in each microtile */
429 const int n_comp = 4; /* 4 components: R, G, B, A */
430 float vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
431 float b, iv[MAX_COMP]; /* interpolation vector */
432 int i, j, k;
433 qword hi; /* high quadword */
434 dword lohi, lolo; /* low quadword: hi dword, lo dword */
435
436 int minSum;
437 int maxSum;
438 int minColL = 0, maxColL = 0;
439 int minColR = 0, maxColR = 0;
440 int sumL = 0, sumR = 0;
441 int nn_comp;
442 /* Our solution here is to find the darkest and brightest colors in
443 * the 4x4 tile and use those as the two representative colors.
444 * There are probably better algorithms to use (histogram-based).
445 */
446 nn_comp = n_comp;
447 while ((minColL == maxColL) && nn_comp) {
448 minSum = 2000; /* big enough */
449 maxSum = -1; /* small enough */
450 for (k = 0; k < N_TEXELS / 2; k++) {
451 int sum = 0;
452 for (i = 0; i < nn_comp; i++) {
453 sum += input[k][i];
454 }
455 if (minSum > sum) {
456 minSum = sum;
457 minColL = k;
458 }
459 if (maxSum < sum) {
460 maxSum = sum;
461 maxColL = k;
462 }
463 sumL += sum;
464 }
465
466 nn_comp--;
467 }
468
469 nn_comp = n_comp;
470 while ((minColR == maxColR) && nn_comp) {
471 minSum = 2000; /* big enough */
472 maxSum = -1; /* small enough */
473 for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
474 int sum = 0;
475 for (i = 0; i < nn_comp; i++) {
476 sum += input[k][i];
477 }
478 if (minSum > sum) {
479 minSum = sum;
480 minColR = k;
481 }
482 if (maxSum < sum) {
483 maxSum = sum;
484 maxColR = k;
485 }
486 sumR += sum;
487 }
488
489 nn_comp--;
490 }
491
492 /* choose the common vector (yuck!) */
493 {
494 int j1, j2;
495 int v1 = 0, v2 = 0;
496 float err = 1e9; /* big enough */
497 float tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
498 for (i = 0; i < n_comp; i++) {
499 tv[0][i] = input[minColL][i];
500 tv[1][i] = input[maxColL][i];
501 tv[2][i] = input[minColR][i];
502 tv[3][i] = input[maxColR][i];
503 }
504 for (j1 = 0; j1 < 2; j1++) {
505 for (j2 = 2; j2 < 4; j2++) {
506 float e = 0.0F;
507 for (i = 0; i < n_comp; i++) {
508 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
509 }
510 if (e < err) {
511 err = e;
512 v1 = j1;
513 v2 = j2;
514 }
515 }
516 }
517 for (i = 0; i < n_comp; i++) {
518 vec[0][i] = tv[1 - v1][i];
519 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
520 vec[2][i] = tv[5 - v2][i];
521 }
522 }
523
524 /* left microtile */
525 cc[0] = 0;
526 if (minColL != maxColL) {
527 /* compute interpolation vector */
528 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
529
530 /* add in texels */
531 lolo = 0;
532 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
533 int texel;
534 /* interpolate color */
535 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
536 /* add in texel */
537 lolo <<= 2;
538 lolo |= texel;
539 }
540
541 cc[0] = lolo;
542 }
543
544 /* right microtile */
545 cc[1] = 0;
546 if (minColR != maxColR) {
547 /* compute interpolation vector */
548 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
549
550 /* add in texels */
551 lohi = 0;
552 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
553 int texel;
554 /* interpolate color */
555 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
556 /* add in texel */
557 lohi <<= 2;
558 lohi |= texel;
559 }
560
561 cc[1] = lohi;
562 }
563
564 Q_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
565 for (j = n_vect - 1; j >= 0; j--) {
566 /* add in alphas */
567 Q_SHL(hi, 5);
568 Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F));
569 }
570 for (j = n_vect - 1; j >= 0; j--) {
571 for (i = 0; i < n_comp - 1; i++) {
572 /* add in colors */
573 Q_SHL(hi, 5);
574 Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
575 }
576 }
577 ((qword *)cc)[1] = hi;
98e75f2d 578}
579
580
581static void
582fxt1_quantize_HI (dword *cc,
2d262872 583 byte input[N_TEXELS][MAX_COMP],
584 byte reord[N_TEXELS][MAX_COMP], int n)
98e75f2d 585{
2d262872 586 const int n_vect = 6; /* highest vector number */
587 const int n_comp = 3; /* 3 components: R, G, B */
588 float b = 0.0F; /* phoudoin: silent compiler! */
589 float iv[MAX_COMP]; /* interpolation vector */
590 int i, k;
591 dword hihi; /* high quadword: hi dword */
592
593 int minSum = 2000; /* big enough */
594 int maxSum = -1; /* small enough */
595 int minCol = 0; /* phoudoin: silent compiler! */
596 int maxCol = 0; /* phoudoin: silent compiler! */
597
598 /* Our solution here is to find the darkest and brightest colors in
599 * the 8x4 tile and use those as the two representative colors.
600 * There are probably better algorithms to use (histogram-based).
601 */
602 for (k = 0; k < n; k++) {
603 int sum = 0;
604 for (i = 0; i < n_comp; i++) {
605 sum += reord[k][i];
606 }
607 if (minSum > sum) {
608 minSum = sum;
609 minCol = k;
610 }
611 if (maxSum < sum) {
612 maxSum = sum;
613 maxCol = k;
614 }
615 }
616
617 hihi = 0; /* cc-hi = "00" */
618 for (i = 0; i < n_comp; i++) {
619 /* add in colors */
620 hihi <<= 5;
621 hihi |= reord[maxCol][i] >> 3;
622 }
623 for (i = 0; i < n_comp; i++) {
624 /* add in colors */
625 hihi <<= 5;
626 hihi |= reord[minCol][i] >> 3;
627 }
628 cc[3] = hihi;
629 cc[0] = cc[1] = cc[2] = 0;
630
631 /* compute interpolation vector */
632 if (minCol != maxCol) {
633 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
634 }
635
636 /* add in texels */
637 for (k = N_TEXELS - 1; k >= 0; k--) {
638 int t = k * 3;
639 dword *kk = (dword *)((char *)cc + t / 8);
640 int texel = n_vect + 1; /* transparent black */
641
642 if (!ISTBLACK(input[k])) {
643 if (minCol != maxCol) {
644 /* interpolate color */
645 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
646 /* add in texel */
647 kk[0] |= texel << (t & 7);
648 }
649 } else {
650 /* add in texel */
651 kk[0] |= texel << (t & 7);
652 }
653 }
98e75f2d 654}
655
656
657static void
658fxt1_quantize_MIXED1 (dword *cc,
2d262872 659 byte input[N_TEXELS][MAX_COMP])
98e75f2d 660{
2d262872 661 const int n_vect = 2; /* highest vector number in each microtile */
662 const int n_comp = 3; /* 3 components: R, G, B */
663 byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
664 float b, iv[MAX_COMP]; /* interpolation vector */
665 int i, j, k;
666 qword hi; /* high quadword */
667 dword lohi, lolo; /* low quadword: hi dword, lo dword */
668
669 int minSum;
670 int maxSum;
671 int minColL = 0, maxColL = -1;
672 int minColR = 0, maxColR = -1;
673
674 /* Our solution here is to find the darkest and brightest colors in
675 * the 4x4 tile and use those as the two representative colors.
676 * There are probably better algorithms to use (histogram-based).
677 */
678 minSum = 2000; /* big enough */
679 maxSum = -1; /* small enough */
680 for (k = 0; k < N_TEXELS / 2; k++) {
681 if (!ISTBLACK(input[k])) {
682 int sum = 0;
683 for (i = 0; i < n_comp; i++) {
684 sum += input[k][i];
685 }
686 if (minSum > sum) {
687 minSum = sum;
688 minColL = k;
689 }
690 if (maxSum < sum) {
691 maxSum = sum;
692 maxColL = k;
693 }
694 }
695 }
696 minSum = 2000; /* big enough */
697 maxSum = -1; /* small enough */
698 for (; k < N_TEXELS; k++) {
699 if (!ISTBLACK(input[k])) {
700 int sum = 0;
701 for (i = 0; i < n_comp; i++) {
702 sum += input[k][i];
703 }
704 if (minSum > sum) {
705 minSum = sum;
706 minColR = k;
707 }
708 if (maxSum < sum) {
709 maxSum = sum;
710 maxColR = k;
711 }
712 }
713 }
714
715 /* left microtile */
716 if (maxColL == -1) {
717 /* all transparent black */
718 cc[0] = ~0u;
719 for (i = 0; i < n_comp; i++) {
720 vec[0][i] = 0;
721 vec[1][i] = 0;
722 }
723 } else {
724 cc[0] = 0;
725 for (i = 0; i < n_comp; i++) {
726 vec[0][i] = input[minColL][i];
727 vec[1][i] = input[maxColL][i];
728 }
729 if (minColL != maxColL) {
730 /* compute interpolation vector */
731 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
732
733 /* add in texels */
734 lolo = 0;
735 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
736 int texel = n_vect + 1; /* transparent black */
737 if (!ISTBLACK(input[k])) {
738 /* interpolate color */
739 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
740 }
741 /* add in texel */
742 lolo <<= 2;
743 lolo |= texel;
744 }
745 cc[0] = lolo;
746 }
747 }
748
749 /* right microtile */
750 if (maxColR == -1) {
751 /* all transparent black */
752 cc[1] = ~0u;
753 for (i = 0; i < n_comp; i++) {
754 vec[2][i] = 0;
755 vec[3][i] = 0;
756 }
757 } else {
758 cc[1] = 0;
759 for (i = 0; i < n_comp; i++) {
760 vec[2][i] = input[minColR][i];
761 vec[3][i] = input[maxColR][i];
762 }
763 if (minColR != maxColR) {
764 /* compute interpolation vector */
765 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
766
767 /* add in texels */
768 lohi = 0;
769 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
770 int texel = n_vect + 1; /* transparent black */
771 if (!ISTBLACK(input[k])) {
772 /* interpolate color */
773 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
774 }
775 /* add in texel */
776 lohi <<= 2;
777 lohi |= texel;
778 }
779 cc[1] = lohi;
780 }
781 }
782
783 Q_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
784 for (j = 2 * 2 - 1; j >= 0; j--) {
785 for (i = 0; i < n_comp; i++) {
786 /* add in colors */
787 Q_SHL(hi, 5);
788 Q_OR32(hi, vec[j][i] >> 3);
789 }
790 }
791 ((qword *)cc)[1] = hi;
98e75f2d 792}
793
794
795static void
796fxt1_quantize_MIXED0 (dword *cc,
2d262872 797 byte input[N_TEXELS][MAX_COMP])
98e75f2d 798{
2d262872 799 const int n_vect = 3; /* highest vector number in each microtile */
800 const int n_comp = 3; /* 3 components: R, G, B */
801 byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
802 float b, iv[MAX_COMP]; /* interpolation vector */
803 int i, j, k;
804 qword hi; /* high quadword */
805 dword lohi, lolo; /* low quadword: hi dword, lo dword */
806
807 int minColL = 0, maxColL = 0;
808 int minColR = 0, maxColR = 0;
98e75f2d 809#if 0
2d262872 810 int minSum;
811 int maxSum;
812
813 /* Our solution here is to find the darkest and brightest colors in
814 * the 4x4 tile and use those as the two representative colors.
815 * There are probably better algorithms to use (histogram-based).
816 */
817 minSum = 2000; /* big enough */
818 maxSum = -1; /* small enough */
819 for (k = 0; k < N_TEXELS / 2; k++) {
820 int sum = 0;
821 for (i = 0; i < n_comp; i++) {
822 sum += input[k][i];
823 }
824 if (minSum > sum) {
825 minSum = sum;
826 minColL = k;
827 }
828 if (maxSum < sum) {
829 maxSum = sum;
830 maxColL = k;
831 }
832 }
833 minSum = 2000; /* big enough */
834 maxSum = -1; /* small enough */
835 for (; k < N_TEXELS; k++) {
836 int sum = 0;
837 for (i = 0; i < n_comp; i++) {
838 sum += input[k][i];
839 }
840 if (minSum > sum) {
841 minSum = sum;
842 minColR = k;
843 }
844 if (maxSum < sum) {
845 maxSum = sum;
846 maxColR = k;
847 }
848 }
98e75f2d 849#else
2d262872 850 int minVal;
851 int maxVal;
852 int maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
853 int maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
854
855 /* Scan the channel with max variance for lo & hi
856 * and use those as the two representative colors.
857 */
858 minVal = 2000; /* big enough */
859 maxVal = -1; /* small enough */
860 for (k = 0; k < N_TEXELS / 2; k++) {
861 int t = input[k][maxVarL];
862 if (minVal > t) {
863 minVal = t;
864 minColL = k;
865 }
866 if (maxVal < t) {
867 maxVal = t;
868 maxColL = k;
869 }
870 }
871 minVal = 2000; /* big enough */
872 maxVal = -1; /* small enough */
873 for (; k < N_TEXELS; k++) {
874 int t = input[k][maxVarR];
875 if (minVal > t) {
876 minVal = t;
877 minColR = k;
878 }
879 if (maxVal < t) {
880 maxVal = t;
881 maxColR = k;
882 }
883 }
98e75f2d 884#endif
885
2d262872 886 /* left microtile */
887 cc[0] = 0;
888 for (i = 0; i < n_comp; i++) {
889 vec[0][i] = input[minColL][i];
890 vec[1][i] = input[maxColL][i];
891 }
892 if (minColL != maxColL) {
893 /* compute interpolation vector */
894 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
895
896 /* add in texels */
897 lolo = 0;
898 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
899 int texel;
900 /* interpolate color */
901 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
902 /* add in texel */
903 lolo <<= 2;
904 lolo |= texel;
905 }
906
907 /* funky encoding for LSB of green */
908 if ((int)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
909 for (i = 0; i < n_comp; i++) {
910 vec[1][i] = input[minColL][i];
911 vec[0][i] = input[maxColL][i];
912 }
913 lolo = ~lolo;
914 }
915
916 cc[0] = lolo;
917 }
918
919 /* right microtile */
920 cc[1] = 0;
921 for (i = 0; i < n_comp; i++) {
922 vec[2][i] = input[minColR][i];
923 vec[3][i] = input[maxColR][i];
924 }
925 if (minColR != maxColR) {
926 /* compute interpolation vector */
927 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
928
929 /* add in texels */
930 lohi = 0;
931 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
932 int texel;
933 /* interpolate color */
934 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
935 /* add in texel */
936 lohi <<= 2;
937 lohi |= texel;
938 }
939
940 /* funky encoding for LSB of green */
941 if ((int)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
942 for (i = 0; i < n_comp; i++) {
943 vec[3][i] = input[minColR][i];
944 vec[2][i] = input[maxColR][i];
945 }
946 lohi = ~lohi;
947 }
948
949 cc[1] = lohi;
950 }
951
952 Q_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
953 for (j = 2 * 2 - 1; j >= 0; j--) {
954 for (i = 0; i < n_comp; i++) {
955 /* add in colors */
956 Q_SHL(hi, 5);
957 Q_OR32(hi, vec[j][i] >> 3);
958 }
959 }
960 ((qword *)cc)[1] = hi;
98e75f2d 961}
962
963
964static void
965fxt1_quantize (dword *cc, const byte *lines[], int comps)
966{
2d262872 967 int trualpha;
968 byte reord[N_TEXELS][MAX_COMP];
969
970 byte input[N_TEXELS][MAX_COMP];
971 int i, k, l;
972
973 if (comps == 3) {
974 /* make the whole block opaque */
975 memset(input, -1, sizeof(input));
976 }
977
978 /* 8 texels each line */
979 for (l = 0; l < 4; l++) {
980 for (k = 0; k < 4; k++) {
981 for (i = 0; i < comps; i++) {
982 input[k + l * 4][i] = *lines[l]++;
983 }
984 }
985 for (; k < 8; k++) {
986 for (i = 0; i < comps; i++) {
987 input[k + l * 4 + 12][i] = *lines[l]++;
988 }
989 }
990 }
991
992 /* block layout:
993 * 00, 01, 02, 03, 08, 09, 0a, 0b
994 * 10, 11, 12, 13, 18, 19, 1a, 1b
995 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
996 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
997 */
998
999 /* [dBorca]
1000 * stupidity flows forth from this
1001 */
1002 l = N_TEXELS;
1003 trualpha = 0;
1004 if (comps == 4) {
1005 /* skip all transparent black texels */
1006 l = 0;
1007 for (k = 0; k < N_TEXELS; k++) {
1008 /* test all components against 0 */
1009 if (!ISTBLACK(input[k])) {
1010 /* texel is not transparent black */
1011 COPY_4UBV(reord[l], input[k]);
1012 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1013 /* non-opaque texel */
1014 trualpha = !0;
1015 }
1016 l++;
1017 }
1018 }
1019 }
98e75f2d 1020
2d262872 1021#if 0
1022 if (trualpha) {
1023 fxt1_quantize_ALPHA0(cc, input, reord, l);
1024 } else if (l == 0) {
1025 cc[0] = cc[1] = cc[2] = -1;
1026 cc[3] = 0;
1027 } else if (l < N_TEXELS) {
1028 fxt1_quantize_HI(cc, input, reord, l);
1029 } else {
1030 fxt1_quantize_CHROMA(cc, input);
1031 }
1032 (void)fxt1_quantize_ALPHA1;
1033 (void)fxt1_quantize_MIXED1;
1034 (void)fxt1_quantize_MIXED0;
98e75f2d 1035#else
2d262872 1036 if (trualpha) {
1037 fxt1_quantize_ALPHA1(cc, input);
1038 } else if (l == 0) {
1039 cc[0] = cc[1] = cc[2] = ~0u;
1040 cc[3] = 0;
1041 } else if (l < N_TEXELS) {
1042 fxt1_quantize_MIXED1(cc, input);
1043 } else {
1044 fxt1_quantize_MIXED0(cc, input);
1045 }
1046 (void)fxt1_quantize_ALPHA0;
1047 (void)fxt1_quantize_HI;
1048 (void)fxt1_quantize_CHROMA;
98e75f2d 1049#endif
2d262872 1050}
98e75f2d 1051
98e75f2d 1052
2d262872 1053
1054/**
1055 * Upscale an image by replication, not (typical) stretching.
1056 * We use this when the image width or height is less than a
1057 * certain size (4, 8) and we need to upscale an image.
1058 */
1059static void
1060upscale_teximage2d(int inWidth, int inHeight,
1061 int outWidth, int outHeight,
1062 int comps, const byte *src, int srcRowStride,
1063 byte *dest )
1064{
1065 int i, j, k;
1066
1067 assert(outWidth >= inWidth);
1068 assert(outHeight >= inHeight);
98e75f2d 1069#if 0
2d262872 1070 ASSERT(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1071 ASSERT((outWidth & 3) == 0);
1072 ASSERT((outHeight & 3) == 0);
98e75f2d 1073#endif
98e75f2d 1074
2d262872 1075 for (i = 0; i < outHeight; i++) {
1076 const int ii = i % inHeight;
1077 for (j = 0; j < outWidth; j++) {
1078 const int jj = j % inWidth;
1079 for (k = 0; k < comps; k++) {
1080 dest[(i * outWidth + j) * comps + k]
1081 = src[ii * srcRowStride + jj * comps + k];
1082 }
1083 }
1084 }
1085}
98e75f2d 1086
2d262872 1087TAPI void TAPIENTRY
1088fxt1_encode (dword width, dword height, int comps,
1089 const void *source, int srcRowStride,
1090 void *dest, int destRowStride)
98e75f2d 1091{
2d262872 1092 dword x, y;
1093 const byte *data;
1094 dword *encoded = (dword *)dest;
1095 void *newSource = NULL, *newSourcetmp = NULL;
1096
1097 assert(comps == 3 || comps == 4);
1098
1099 if (comps == 3)
1100 newSource = reorder_source_3_alloc(source, width, height, srcRowStride);
1101 if (comps == 4)
1102 newSource = reorder_source_4_alloc(source, width, height, srcRowStride);
1103 if (!newSource)
1104 goto cleanUp;
1105 source = newSource;
1106
1107 /* Replicate image if width is not M8 or height is not M4 */
1108 if ((width & 7) | (height & 3)) {
1109 int newWidth = (width + 7) & ~7;
1110 int newHeight = (height + 3) & ~3;
1111 newSourcetmp = malloc(comps * newWidth * newHeight * sizeof(byte));
1112 free(newSource);
1113 newSource = newSourcetmp;
1114 if (!newSource) {
1115 goto cleanUp;
1116 }
1117 upscale_teximage2d(width, height, newWidth, newHeight,
1118 comps, (const byte *) source,
1119 srcRowStride, (byte *) newSource);
1120 source = newSource;
1121 width = newWidth;
1122 height = newHeight;
1123 srcRowStride = comps * newWidth;
1124 }
1125
1126 data = (const byte *) source;
1127 destRowStride = (destRowStride - width * 2) / 4;
1128 for (y = 0; y < height; y += 4) {
1129 dword offs = 0 + (y + 0) * srcRowStride;
1130 for (x = 0; x < width; x += 8) {
1131 const byte *lines[4];
1132 lines[0] = &data[offs];
1133 lines[1] = lines[0] + srcRowStride;
1134 lines[2] = lines[1] + srcRowStride;
1135 lines[3] = lines[2] + srcRowStride;
1136 offs += 8 * comps;
1137 fxt1_quantize(encoded, lines, comps);
1138 /* 128 bits per 8x4 block */
1139 encoded += 4;
1140 }
1141 encoded += destRowStride;
1142 }
1143
1144 cleanUp:
1145 free(newSource);
98e75f2d 1146}
1147
1148
1149/***************************************************************************\
1150 * FXT1 decoder
1151 *
1152 * The decoder is based on GL_3DFX_texture_compression_FXT1
1153 * specification and serves as a concept for the encoder.
1154\***************************************************************************/
1155
1156
1157/* lookup table for scaling 5 bit colors up to 8 bits */
1158static const byte _rgb_scale_5[] = {
2d262872 1159 0, 8, 16, 25, 33, 41, 49, 58,
1160 66, 74, 82, 90, 99, 107, 115, 123,
1161 132, 140, 148, 156, 165, 173, 181, 189,
1162 197, 206, 214, 222, 230, 239, 247, 255
98e75f2d 1163};
1164
1165/* lookup table for scaling 6 bit colors up to 8 bits */
1166static const byte _rgb_scale_6[] = {
2d262872 1167 0, 4, 8, 12, 16, 20, 24, 28,
1168 32, 36, 40, 45, 49, 53, 57, 61,
1169 65, 69, 73, 77, 81, 85, 89, 93,
1170 97, 101, 105, 109, 113, 117, 121, 125,
1171 130, 134, 138, 142, 146, 150, 154, 158,
1172 162, 166, 170, 174, 178, 182, 186, 190,
1173 194, 198, 202, 206, 210, 215, 219, 223,
1174 227, 231, 235, 239, 243, 247, 251, 255
98e75f2d 1175};
1176
1177
1178#define CC_SEL(cc, which) (((dword *)(cc))[(which) / 32] >> ((which) & 31))
1179#define UP5(c) _rgb_scale_5[(c) & 31]
1180#define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1181#define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
98e75f2d 1182
1183
1184static void
1185fxt1_decode_1HI (const byte *code, int t, byte *rgba)
1186{
2d262872 1187 const dword *cc;
1188
1189 t *= 3;
1190 cc = (const dword *)(code + t / 8);
1191 t = (cc[0] >> (t & 7)) & 7;
1192
1193 if (t == 7) {
1194 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1195 } else {
1196 byte r, g, b;
1197 cc = (const dword *)(code + 12);
1198 if (t == 0) {
1199 b = UP5(CC_SEL(cc, 0));
1200 g = UP5(CC_SEL(cc, 5));
1201 r = UP5(CC_SEL(cc, 10));
1202 } else if (t == 6) {
1203 b = UP5(CC_SEL(cc, 15));
1204 g = UP5(CC_SEL(cc, 20));
1205 r = UP5(CC_SEL(cc, 25));
1206 } else {
1207 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1208 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1209 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1210 }
1211 rgba[RCOMP] = r;
1212 rgba[GCOMP] = g;
1213 rgba[BCOMP] = b;
1214 rgba[ACOMP] = 255;
1215 }
98e75f2d 1216}
1217
1218
1219static void
1220fxt1_decode_1CHROMA (const byte *code, int t, byte *rgba)
1221{
2d262872 1222 const dword *cc;
1223 dword kk;
1224
1225 cc = (const dword *)code;
1226 if (t & 16) {
1227 cc++;
1228 t &= 15;
1229 }
1230 t = (cc[0] >> (t * 2)) & 3;
1231
1232 t *= 15;
1233 cc = (const dword *)(code + 8 + t / 8);
1234 kk = cc[0] >> (t & 7);
1235 rgba[BCOMP] = UP5(kk);
1236 rgba[GCOMP] = UP5(kk >> 5);
1237 rgba[RCOMP] = UP5(kk >> 10);
1238 rgba[ACOMP] = 255;
98e75f2d 1239}
1240
1241
1242static void
1243fxt1_decode_1MIXED (const byte *code, int t, byte *rgba)
1244{
2d262872 1245 const dword *cc;
1246 dword col[2][3];
1247 int glsb, selb;
1248
1249 cc = (const dword *)code;
1250 if (t & 16) {
1251 t &= 15;
1252 t = (cc[1] >> (t * 2)) & 3;
1253 /* col 2 */
1254 col[0][BCOMP] = (*(const dword *)(code + 11)) >> 6;
1255 col[0][GCOMP] = CC_SEL(cc, 99);
1256 col[0][RCOMP] = CC_SEL(cc, 104);
1257 /* col 3 */
1258 col[1][BCOMP] = CC_SEL(cc, 109);
1259 col[1][GCOMP] = CC_SEL(cc, 114);
1260 col[1][RCOMP] = CC_SEL(cc, 119);
1261 glsb = CC_SEL(cc, 126);
1262 selb = CC_SEL(cc, 33);
1263 } else {
1264 t = (cc[0] >> (t * 2)) & 3;
1265 /* col 0 */
1266 col[0][BCOMP] = CC_SEL(cc, 64);
1267 col[0][GCOMP] = CC_SEL(cc, 69);
1268 col[0][RCOMP] = CC_SEL(cc, 74);
1269 /* col 1 */
1270 col[1][BCOMP] = CC_SEL(cc, 79);
1271 col[1][GCOMP] = CC_SEL(cc, 84);
1272 col[1][RCOMP] = CC_SEL(cc, 89);
1273 glsb = CC_SEL(cc, 125);
1274 selb = CC_SEL(cc, 1);
1275 }
1276
1277 if (CC_SEL(cc, 124) & 1) {
1278 /* alpha[0] == 1 */
1279
1280 if (t == 3) {
1281 /* zero */
1282 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1283 } else {
1284 byte r, g, b;
1285 if (t == 0) {
1286 b = UP5(col[0][BCOMP]);
1287 g = UP5(col[0][GCOMP]);
1288 r = UP5(col[0][RCOMP]);
1289 } else if (t == 2) {
1290 b = UP5(col[1][BCOMP]);
1291 g = UP6(col[1][GCOMP], glsb);
1292 r = UP5(col[1][RCOMP]);
1293 } else {
1294 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1295 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1296 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1297 }
1298 rgba[RCOMP] = r;
1299 rgba[GCOMP] = g;
1300 rgba[BCOMP] = b;
1301 rgba[ACOMP] = 255;
1302 }
1303 } else {
1304 /* alpha[0] == 0 */
1305 byte r, g, b;
1306 if (t == 0) {
1307 b = UP5(col[0][BCOMP]);
1308 g = UP6(col[0][GCOMP], glsb ^ selb);
1309 r = UP5(col[0][RCOMP]);
1310 } else if (t == 3) {
1311 b = UP5(col[1][BCOMP]);
1312 g = UP6(col[1][GCOMP], glsb);
1313 r = UP5(col[1][RCOMP]);
1314 } else {
1315 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1316 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1317 UP6(col[1][GCOMP], glsb));
1318 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1319 }
1320 rgba[RCOMP] = r;
1321 rgba[GCOMP] = g;
1322 rgba[BCOMP] = b;
1323 rgba[ACOMP] = 255;
1324 }
98e75f2d 1325}
1326
1327
1328static void
1329fxt1_decode_1ALPHA (const byte *code, int t, byte *rgba)
1330{
2d262872 1331 const dword *cc;
1332 byte r, g, b, a;
1333
1334 cc = (const dword *)code;
1335 if (CC_SEL(cc, 124) & 1) {
1336 /* lerp == 1 */
1337 dword col0[4];
1338
1339 if (t & 16) {
1340 t &= 15;
1341 t = (cc[1] >> (t * 2)) & 3;
1342 /* col 2 */
1343 col0[BCOMP] = (*(const dword *)(code + 11)) >> 6;
1344 col0[GCOMP] = CC_SEL(cc, 99);
1345 col0[RCOMP] = CC_SEL(cc, 104);
1346 col0[ACOMP] = CC_SEL(cc, 119);
1347 } else {
1348 t = (cc[0] >> (t * 2)) & 3;
1349 /* col 0 */
1350 col0[BCOMP] = CC_SEL(cc, 64);
1351 col0[GCOMP] = CC_SEL(cc, 69);
1352 col0[RCOMP] = CC_SEL(cc, 74);
1353 col0[ACOMP] = CC_SEL(cc, 109);
1354 }
1355
1356 if (t == 0) {
1357 b = UP5(col0[BCOMP]);
1358 g = UP5(col0[GCOMP]);
1359 r = UP5(col0[RCOMP]);
1360 a = UP5(col0[ACOMP]);
1361 } else if (t == 3) {
1362 b = UP5(CC_SEL(cc, 79));
1363 g = UP5(CC_SEL(cc, 84));
1364 r = UP5(CC_SEL(cc, 89));
1365 a = UP5(CC_SEL(cc, 114));
1366 } else {
1367 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1368 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1369 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1370 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1371 }
1372 } else {
1373 /* lerp == 0 */
1374
1375 if (t & 16) {
1376 cc++;
1377 t &= 15;
1378 }
1379 t = (cc[0] >> (t * 2)) & 3;
1380
1381 if (t == 3) {
1382 /* zero */
1383 r = g = b = a = 0;
1384 } else {
1385 dword kk;
1386 cc = (const dword *)code;
1387 a = UP5(cc[3] >> (t * 5 + 13));
1388 t *= 15;
1389 cc = (const dword *)(code + 8 + t / 8);
1390 kk = cc[0] >> (t & 7);
1391 b = UP5(kk);
1392 g = UP5(kk >> 5);
1393 r = UP5(kk >> 10);
1394 }
1395 }
1396 rgba[RCOMP] = r;
1397 rgba[GCOMP] = g;
1398 rgba[BCOMP] = b;
1399 rgba[ACOMP] = a;
98e75f2d 1400}
1401
1402
1403TAPI void TAPIENTRY
2d262872 1404fxt1_decode_1 (const void *texture, int stride, /* in pixels */
1405 int i, int j, byte *rgba)
98e75f2d 1406{
2d262872 1407 static void (*decode_1[]) (const byte *, int, byte *) = {
1408 fxt1_decode_1HI, /* cc-high = "00?" */
1409 fxt1_decode_1HI, /* cc-high = "00?" */
1410 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1411 fxt1_decode_1ALPHA, /* alpha = "011" */
1412 fxt1_decode_1MIXED, /* mixed = "1??" */
1413 fxt1_decode_1MIXED, /* mixed = "1??" */
1414 fxt1_decode_1MIXED, /* mixed = "1??" */
1415 fxt1_decode_1MIXED /* mixed = "1??" */
1416 };
1417
1418 const byte *code = (const byte *)texture +
1419 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1420 int mode = CC_SEL(code, 125);
1421 int t = i & 7;
1422
1423 if (t & 4) {
1424 t += 12;
1425 }
1426 t += (j & 3) * 4;
1427
1428 decode_1[mode](code, t, rgba);
98e75f2d 1429}