2 * Mesa 3-D graphics library
4 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
26 * \file texcompress_fxt1.c
27 * GL_3DFX_texture_compression_FXT1 support.
40 /***************************************************************************\
43 * The encoder was built by reversing the decoder,
44 * and is vaguely based on Texus2 by 3dfx. Note that this code
45 * is merely a proof of concept, since it is highly UNoptimized;
46 * moreover, it is sub-optimal due to initial conditions passed
47 * to Lloyd's algorithm (the interpolation modes are even worse).
48 \***************************************************************************/
51 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
52 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
53 #define N_TEXELS 32 /* number of texels in a block (always 32) */
54 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
55 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
56 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
57 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
58 #define ISTBLACK(v) (*((dword *)(v)) == 0)
62 fxt1_bestcol (float vec[][MAX_COMP], int nv,
63 byte input[MAX_COMP], int nc)
66 float err = 1e9; /* big enough */
68 for (j = 0; j < nv; j++) {
70 for (i = 0; i < nc; i++) {
71 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
84 fxt1_worst (float vec[MAX_COMP],
85 byte input[N_TEXELS][MAX_COMP], int nc, int n)
88 float err = -1.0F; /* small enough */
90 for (k = 0; k < n; k++) {
92 for (i = 0; i < nc; i++) {
93 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
106 fxt1_variance (double variance[MAX_COMP],
107 byte input[N_TEXELS][MAX_COMP], int nc, int n)
111 double var, maxvar = -1; /* small enough */
112 double teenth = 1.0 / n;
114 for (i = 0; i < nc; i++) {
116 for (k = 0; k < n; k++) {
121 var = sx2 * teenth - sx * sx * teenth * teenth;
136 fxt1_choose (float vec[][MAX_COMP], int nv,
137 byte input[N_TEXELS][MAX_COMP], int nc, int n)
140 /* Choose colors from a grid.
144 for (j = 0; j < nv; j++) {
145 int m = j * (n - 1) / (nv - 1);
146 for (i = 0; i < nc; i++) {
147 vec[j][i] = input[m][i];
151 /* Our solution here is to find the darkest and brightest colors in
152 * the 8x4 tile and use those as the two representative colors.
153 * There are probably better algorithms to use (histogram-based).
156 int minSum = 2000; /* big enough */
157 int maxSum = -1; /* small enough */
158 int minCol = 0; /* phoudoin: silent compiler! */
159 int maxCol = 0; /* phoudoin: silent compiler! */
169 memset(hist, 0, sizeof(hist));
171 for (k = 0; k < n; k++) {
175 for (i = 0; i < nc; i++) {
180 for (l = 0; l < n; l++) {
189 } else if (hist[l].key == key) {
205 for (j = 0; j < lenh; j++) {
206 for (i = 0; i < nc; i++) {
207 vec[j][i] = (float)input[hist[j].idx][i];
210 for (; j < nv; j++) {
211 for (i = 0; i < nc; i++) {
212 vec[j][i] = vec[0][i];
218 for (j = 0; j < nv; j++) {
219 for (i = 0; i < nc; i++) {
220 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (float)(nv - 1);
230 fxt1_lloyd (float vec[][MAX_COMP], int nv,
231 byte input[N_TEXELS][MAX_COMP], int nc, int n)
233 /* Use the generalized lloyd's algorithm for VQ:
234 * find 4 color vectors.
236 * for each sample color
237 * sort to nearest vector.
239 * replace each vector with the centroid of its matching colors.
241 * repeat until RMS doesn't improve.
243 * if a color vector has no samples, or becomes the same as another
244 * vector, replace it with the color which is farthest from a sample.
246 * vec[][MAX_COMP] initial vectors and resulting colors
247 * nv number of resulting colors required
248 * input[N_TEXELS][MAX_COMP] input texels
249 * nc number of components in input / vec
250 * n number of input samples
253 int sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
254 int cnt[MAX_VECT]; /* how many times a certain vector was chosen */
255 float error, lasterror = 1e9;
260 for (rep = 0; rep < LL_N_REP; rep++) {
261 /* reset sums & counters */
262 for (j = 0; j < nv; j++) {
263 for (i = 0; i < nc; i++) {
270 /* scan whole block */
271 for (k = 0; k < n; k++) {
274 float err = 1e9; /* big enough */
275 /* determine best vector */
276 for (j = 0; j < nv; j++) {
277 float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
278 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
279 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
281 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
289 int best = fxt1_bestcol(vec, nv, input[k], nc, &err);
292 /* add in closest color */
293 for (i = 0; i < nc; i++) {
294 sum[best][i] += input[k][i];
296 /* mark this vector as used */
298 /* accumulate error */
303 if ((error < LL_RMS_E) ||
304 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
305 return !0; /* good match */
309 /* move each vector to the barycenter of its closest colors */
310 for (j = 0; j < nv; j++) {
312 float div = 1.0F / cnt[j];
313 for (i = 0; i < nc; i++) {
314 vec[j][i] = div * sum[j][i];
317 /* this vec has no samples or is identical with a previous vec */
318 int worst = fxt1_worst(vec[j], input, nc, n);
319 for (i = 0; i < nc; i++) {
320 vec[j][i] = input[worst][i];
326 return 0; /* could not converge fast enough */
331 fxt1_quantize_CHROMA (dword *cc,
332 byte input[N_TEXELS][MAX_COMP])
334 const int n_vect = 4; /* 4 base vectors to find */
335 const int n_comp = 3; /* 3 components: R, G, B */
336 float vec[MAX_VECT][MAX_COMP];
338 qword hi; /* high quadword */
339 dword lohi, lolo; /* low quadword: hi dword, lo dword */
341 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
342 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
345 Q_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
346 for (j = n_vect - 1; j >= 0; j--) {
347 for (i = 0; i < n_comp; i++) {
350 Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
353 ((qword *)cc)[1] = hi;
356 /* right microtile */
357 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
359 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
362 for (; k >= 0; k--) {
364 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
372 fxt1_quantize_ALPHA0 (dword *cc,
373 byte input[N_TEXELS][MAX_COMP],
374 byte reord[N_TEXELS][MAX_COMP], int n)
376 const int n_vect = 3; /* 3 base vectors to find */
377 const int n_comp = 4; /* 4 components: R, G, B, A */
378 float vec[MAX_VECT][MAX_COMP];
380 qword hi; /* high quadword */
381 dword lohi, lolo; /* low quadword: hi dword, lo dword */
383 /* the last vector indicates zero */
384 for (i = 0; i < n_comp; i++) {
388 /* the first n texels in reord are guaranteed to be non-zero */
389 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
390 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
393 Q_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
394 for (j = n_vect - 1; j >= 0; j--) {
397 Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F));
399 for (j = n_vect - 1; j >= 0; j--) {
400 for (i = 0; i < n_comp - 1; i++) {
403 Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
406 ((qword *)cc)[1] = hi;
409 /* right microtile */
410 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
412 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
415 for (; k >= 0; k--) {
417 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
425 fxt1_quantize_ALPHA1 (dword *cc,
426 byte input[N_TEXELS][MAX_COMP])
428 const int n_vect = 3; /* highest vector number in each microtile */
429 const int n_comp = 4; /* 4 components: R, G, B, A */
430 float vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
431 float b, iv[MAX_COMP]; /* interpolation vector */
433 qword hi; /* high quadword */
434 dword lohi, lolo; /* low quadword: hi dword, lo dword */
438 int minColL = 0, maxColL = 0;
439 int minColR = 0, maxColR = 0;
440 int sumL = 0, sumR = 0;
442 /* Our solution here is to find the darkest and brightest colors in
443 * the 4x4 tile and use those as the two representative colors.
444 * There are probably better algorithms to use (histogram-based).
447 while ((minColL == maxColL) && nn_comp) {
448 minSum = 2000; /* big enough */
449 maxSum = -1; /* small enough */
450 for (k = 0; k < N_TEXELS / 2; k++) {
452 for (i = 0; i < nn_comp; i++) {
470 while ((minColR == maxColR) && nn_comp) {
471 minSum = 2000; /* big enough */
472 maxSum = -1; /* small enough */
473 for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
475 for (i = 0; i < nn_comp; i++) {
492 /* choose the common vector (yuck!) */
496 float err = 1e9; /* big enough */
497 float tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
498 for (i = 0; i < n_comp; i++) {
499 tv[0][i] = input[minColL][i];
500 tv[1][i] = input[maxColL][i];
501 tv[2][i] = input[minColR][i];
502 tv[3][i] = input[maxColR][i];
504 for (j1 = 0; j1 < 2; j1++) {
505 for (j2 = 2; j2 < 4; j2++) {
507 for (i = 0; i < n_comp; i++) {
508 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
517 for (i = 0; i < n_comp; i++) {
518 vec[0][i] = tv[1 - v1][i];
519 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
520 vec[2][i] = tv[5 - v2][i];
526 if (minColL != maxColL) {
527 /* compute interpolation vector */
528 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
532 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
534 /* interpolate color */
535 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
544 /* right microtile */
546 if (minColR != maxColR) {
547 /* compute interpolation vector */
548 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
552 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
554 /* interpolate color */
555 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
564 Q_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
565 for (j = n_vect - 1; j >= 0; j--) {
568 Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F));
570 for (j = n_vect - 1; j >= 0; j--) {
571 for (i = 0; i < n_comp - 1; i++) {
574 Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
577 ((qword *)cc)[1] = hi;
582 fxt1_quantize_HI (dword *cc,
583 byte input[N_TEXELS][MAX_COMP],
584 byte reord[N_TEXELS][MAX_COMP], int n)
586 const int n_vect = 6; /* highest vector number */
587 const int n_comp = 3; /* 3 components: R, G, B */
588 float b = 0.0F; /* phoudoin: silent compiler! */
589 float iv[MAX_COMP]; /* interpolation vector */
591 dword hihi; /* high quadword: hi dword */
593 int minSum = 2000; /* big enough */
594 int maxSum = -1; /* small enough */
595 int minCol = 0; /* phoudoin: silent compiler! */
596 int maxCol = 0; /* phoudoin: silent compiler! */
598 /* Our solution here is to find the darkest and brightest colors in
599 * the 8x4 tile and use those as the two representative colors.
600 * There are probably better algorithms to use (histogram-based).
602 for (k = 0; k < n; k++) {
604 for (i = 0; i < n_comp; i++) {
617 hihi = 0; /* cc-hi = "00" */
618 for (i = 0; i < n_comp; i++) {
621 hihi |= reord[maxCol][i] >> 3;
623 for (i = 0; i < n_comp; i++) {
626 hihi |= reord[minCol][i] >> 3;
629 cc[0] = cc[1] = cc[2] = 0;
631 /* compute interpolation vector */
632 if (minCol != maxCol) {
633 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
637 for (k = N_TEXELS - 1; k >= 0; k--) {
639 dword *kk = (dword *)((char *)cc + t / 8);
640 int texel = n_vect + 1; /* transparent black */
642 if (!ISTBLACK(input[k])) {
643 if (minCol != maxCol) {
644 /* interpolate color */
645 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
647 kk[0] |= texel << (t & 7);
651 kk[0] |= texel << (t & 7);
658 fxt1_quantize_MIXED1 (dword *cc,
659 byte input[N_TEXELS][MAX_COMP])
661 const int n_vect = 2; /* highest vector number in each microtile */
662 const int n_comp = 3; /* 3 components: R, G, B */
663 byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
664 float b, iv[MAX_COMP]; /* interpolation vector */
666 qword hi; /* high quadword */
667 dword lohi, lolo; /* low quadword: hi dword, lo dword */
671 int minColL = 0, maxColL = -1;
672 int minColR = 0, maxColR = -1;
674 /* Our solution here is to find the darkest and brightest colors in
675 * the 4x4 tile and use those as the two representative colors.
676 * There are probably better algorithms to use (histogram-based).
678 minSum = 2000; /* big enough */
679 maxSum = -1; /* small enough */
680 for (k = 0; k < N_TEXELS / 2; k++) {
681 if (!ISTBLACK(input[k])) {
683 for (i = 0; i < n_comp; i++) {
696 minSum = 2000; /* big enough */
697 maxSum = -1; /* small enough */
698 for (; k < N_TEXELS; k++) {
699 if (!ISTBLACK(input[k])) {
701 for (i = 0; i < n_comp; i++) {
717 /* all transparent black */
719 for (i = 0; i < n_comp; i++) {
725 for (i = 0; i < n_comp; i++) {
726 vec[0][i] = input[minColL][i];
727 vec[1][i] = input[maxColL][i];
729 if (minColL != maxColL) {
730 /* compute interpolation vector */
731 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
735 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
736 int texel = n_vect + 1; /* transparent black */
737 if (!ISTBLACK(input[k])) {
738 /* interpolate color */
739 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
749 /* right microtile */
751 /* all transparent black */
753 for (i = 0; i < n_comp; i++) {
759 for (i = 0; i < n_comp; i++) {
760 vec[2][i] = input[minColR][i];
761 vec[3][i] = input[maxColR][i];
763 if (minColR != maxColR) {
764 /* compute interpolation vector */
765 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
769 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
770 int texel = n_vect + 1; /* transparent black */
771 if (!ISTBLACK(input[k])) {
772 /* interpolate color */
773 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
783 Q_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
784 for (j = 2 * 2 - 1; j >= 0; j--) {
785 for (i = 0; i < n_comp; i++) {
788 Q_OR32(hi, vec[j][i] >> 3);
791 ((qword *)cc)[1] = hi;
796 fxt1_quantize_MIXED0 (dword *cc,
797 byte input[N_TEXELS][MAX_COMP])
799 const int n_vect = 3; /* highest vector number in each microtile */
800 const int n_comp = 3; /* 3 components: R, G, B */
801 byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
802 float b, iv[MAX_COMP]; /* interpolation vector */
804 qword hi; /* high quadword */
805 dword lohi, lolo; /* low quadword: hi dword, lo dword */
807 int minColL = 0, maxColL = 0;
808 int minColR = 0, maxColR = 0;
813 /* Our solution here is to find the darkest and brightest colors in
814 * the 4x4 tile and use those as the two representative colors.
815 * There are probably better algorithms to use (histogram-based).
817 minSum = 2000; /* big enough */
818 maxSum = -1; /* small enough */
819 for (k = 0; k < N_TEXELS / 2; k++) {
821 for (i = 0; i < n_comp; i++) {
833 minSum = 2000; /* big enough */
834 maxSum = -1; /* small enough */
835 for (; k < N_TEXELS; k++) {
837 for (i = 0; i < n_comp; i++) {
852 int maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
853 int maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
855 /* Scan the channel with max variance for lo & hi
856 * and use those as the two representative colors.
858 minVal = 2000; /* big enough */
859 maxVal = -1; /* small enough */
860 for (k = 0; k < N_TEXELS / 2; k++) {
861 int t = input[k][maxVarL];
871 minVal = 2000; /* big enough */
872 maxVal = -1; /* small enough */
873 for (; k < N_TEXELS; k++) {
874 int t = input[k][maxVarR];
888 for (i = 0; i < n_comp; i++) {
889 vec[0][i] = input[minColL][i];
890 vec[1][i] = input[maxColL][i];
892 if (minColL != maxColL) {
893 /* compute interpolation vector */
894 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
898 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
900 /* interpolate color */
901 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
907 /* funky encoding for LSB of green */
908 if ((int)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
909 for (i = 0; i < n_comp; i++) {
910 vec[1][i] = input[minColL][i];
911 vec[0][i] = input[maxColL][i];
919 /* right microtile */
921 for (i = 0; i < n_comp; i++) {
922 vec[2][i] = input[minColR][i];
923 vec[3][i] = input[maxColR][i];
925 if (minColR != maxColR) {
926 /* compute interpolation vector */
927 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
931 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
933 /* interpolate color */
934 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
940 /* funky encoding for LSB of green */
941 if ((int)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
942 for (i = 0; i < n_comp; i++) {
943 vec[3][i] = input[minColR][i];
944 vec[2][i] = input[maxColR][i];
952 Q_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
953 for (j = 2 * 2 - 1; j >= 0; j--) {
954 for (i = 0; i < n_comp; i++) {
957 Q_OR32(hi, vec[j][i] >> 3);
960 ((qword *)cc)[1] = hi;
965 fxt1_quantize (dword *cc, const byte *lines[], int comps)
968 byte reord[N_TEXELS][MAX_COMP];
970 byte input[N_TEXELS][MAX_COMP];
974 /* make the whole block opaque */
975 memset(input, -1, sizeof(input));
978 /* 8 texels each line */
979 for (l = 0; l < 4; l++) {
980 for (k = 0; k < 4; k++) {
981 for (i = 0; i < comps; i++) {
982 input[k + l * 4][i] = *lines[l]++;
986 for (i = 0; i < comps; i++) {
987 input[k + l * 4 + 12][i] = *lines[l]++;
993 * 00, 01, 02, 03, 08, 09, 0a, 0b
994 * 10, 11, 12, 13, 18, 19, 1a, 1b
995 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
996 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1000 * stupidity flows forth from this
1005 /* skip all transparent black texels */
1007 for (k = 0; k < N_TEXELS; k++) {
1008 /* test all components against 0 */
1009 if (!ISTBLACK(input[k])) {
1010 /* texel is not transparent black */
1011 COPY_4UBV(reord[l], input[k]);
1012 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1013 /* non-opaque texel */
1023 fxt1_quantize_ALPHA0(cc, input, reord, l);
1024 } else if (l == 0) {
1025 cc[0] = cc[1] = cc[2] = -1;
1027 } else if (l < N_TEXELS) {
1028 fxt1_quantize_HI(cc, input, reord, l);
1030 fxt1_quantize_CHROMA(cc, input);
1032 (void)fxt1_quantize_ALPHA1;
1033 (void)fxt1_quantize_MIXED1;
1034 (void)fxt1_quantize_MIXED0;
1037 fxt1_quantize_ALPHA1(cc, input);
1038 } else if (l == 0) {
1039 cc[0] = cc[1] = cc[2] = ~0u;
1041 } else if (l < N_TEXELS) {
1042 fxt1_quantize_MIXED1(cc, input);
1044 fxt1_quantize_MIXED0(cc, input);
1046 (void)fxt1_quantize_ALPHA0;
1047 (void)fxt1_quantize_HI;
1048 (void)fxt1_quantize_CHROMA;
1055 * Upscale an image by replication, not (typical) stretching.
1056 * We use this when the image width or height is less than a
1057 * certain size (4, 8) and we need to upscale an image.
1060 upscale_teximage2d(int inWidth, int inHeight,
1061 int outWidth, int outHeight,
1062 int comps, const byte *src, int srcRowStride,
1067 assert(outWidth >= inWidth);
1068 assert(outHeight >= inHeight);
1070 ASSERT(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1071 ASSERT((outWidth & 3) == 0);
1072 ASSERT((outHeight & 3) == 0);
1075 for (i = 0; i < outHeight; i++) {
1076 const int ii = i % inHeight;
1077 for (j = 0; j < outWidth; j++) {
1078 const int jj = j % inWidth;
1079 for (k = 0; k < comps; k++) {
1080 dest[(i * outWidth + j) * comps + k]
1081 = src[ii * srcRowStride + jj * comps + k];
1088 fxt1_encode (dword width, dword height, int comps,
1089 const void *source, int srcRowStride,
1090 void *dest, int destRowStride)
1094 dword *encoded = (dword *)dest;
1095 void *newSource = NULL, *newSourcetmp = NULL;
1097 assert(comps == 3 || comps == 4);
1100 newSource = reorder_source_3_alloc(source, width, height, srcRowStride);
1102 newSource = reorder_source_4_alloc(source, width, height, srcRowStride);
1107 /* Replicate image if width is not M8 or height is not M4 */
1108 if ((width & 7) | (height & 3)) {
1109 int newWidth = (width + 7) & ~7;
1110 int newHeight = (height + 3) & ~3;
1111 newSourcetmp = malloc(comps * newWidth * newHeight * sizeof(byte));
1113 newSource = newSourcetmp;
1117 upscale_teximage2d(width, height, newWidth, newHeight,
1118 comps, (const byte *) source,
1119 srcRowStride, (byte *) newSource);
1123 srcRowStride = comps * newWidth;
1126 data = (const byte *) source;
1127 destRowStride = (destRowStride - width * 2) / 4;
1128 for (y = 0; y < height; y += 4) {
1129 dword offs = 0 + (y + 0) * srcRowStride;
1130 for (x = 0; x < width; x += 8) {
1131 const byte *lines[4];
1132 lines[0] = &data[offs];
1133 lines[1] = lines[0] + srcRowStride;
1134 lines[2] = lines[1] + srcRowStride;
1135 lines[3] = lines[2] + srcRowStride;
1137 fxt1_quantize(encoded, lines, comps);
1138 /* 128 bits per 8x4 block */
1141 encoded += destRowStride;
1149 /***************************************************************************\
1152 * The decoder is based on GL_3DFX_texture_compression_FXT1
1153 * specification and serves as a concept for the encoder.
1154 \***************************************************************************/
1157 /* lookup table for scaling 5 bit colors up to 8 bits */
1158 static const byte _rgb_scale_5[] = {
1159 0, 8, 16, 25, 33, 41, 49, 58,
1160 66, 74, 82, 90, 99, 107, 115, 123,
1161 132, 140, 148, 156, 165, 173, 181, 189,
1162 197, 206, 214, 222, 230, 239, 247, 255
1165 /* lookup table for scaling 6 bit colors up to 8 bits */
1166 static const byte _rgb_scale_6[] = {
1167 0, 4, 8, 12, 16, 20, 24, 28,
1168 32, 36, 40, 45, 49, 53, 57, 61,
1169 65, 69, 73, 77, 81, 85, 89, 93,
1170 97, 101, 105, 109, 113, 117, 121, 125,
1171 130, 134, 138, 142, 146, 150, 154, 158,
1172 162, 166, 170, 174, 178, 182, 186, 190,
1173 194, 198, 202, 206, 210, 215, 219, 223,
1174 227, 231, 235, 239, 243, 247, 251, 255
1178 #define CC_SEL(cc, which) (((dword *)(cc))[(which) / 32] >> ((which) & 31))
1179 #define UP5(c) _rgb_scale_5[(c) & 31]
1180 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1181 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1185 fxt1_decode_1HI (const byte *code, int t, byte *rgba)
1190 cc = (const dword *)(code + t / 8);
1191 t = (cc[0] >> (t & 7)) & 7;
1194 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1197 cc = (const dword *)(code + 12);
1199 b = UP5(CC_SEL(cc, 0));
1200 g = UP5(CC_SEL(cc, 5));
1201 r = UP5(CC_SEL(cc, 10));
1202 } else if (t == 6) {
1203 b = UP5(CC_SEL(cc, 15));
1204 g = UP5(CC_SEL(cc, 20));
1205 r = UP5(CC_SEL(cc, 25));
1207 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1208 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1209 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1220 fxt1_decode_1CHROMA (const byte *code, int t, byte *rgba)
1225 cc = (const dword *)code;
1230 t = (cc[0] >> (t * 2)) & 3;
1233 cc = (const dword *)(code + 8 + t / 8);
1234 kk = cc[0] >> (t & 7);
1235 rgba[BCOMP] = UP5(kk);
1236 rgba[GCOMP] = UP5(kk >> 5);
1237 rgba[RCOMP] = UP5(kk >> 10);
1243 fxt1_decode_1MIXED (const byte *code, int t, byte *rgba)
1249 cc = (const dword *)code;
1252 t = (cc[1] >> (t * 2)) & 3;
1254 col[0][BCOMP] = (*(const dword *)(code + 11)) >> 6;
1255 col[0][GCOMP] = CC_SEL(cc, 99);
1256 col[0][RCOMP] = CC_SEL(cc, 104);
1258 col[1][BCOMP] = CC_SEL(cc, 109);
1259 col[1][GCOMP] = CC_SEL(cc, 114);
1260 col[1][RCOMP] = CC_SEL(cc, 119);
1261 glsb = CC_SEL(cc, 126);
1262 selb = CC_SEL(cc, 33);
1264 t = (cc[0] >> (t * 2)) & 3;
1266 col[0][BCOMP] = CC_SEL(cc, 64);
1267 col[0][GCOMP] = CC_SEL(cc, 69);
1268 col[0][RCOMP] = CC_SEL(cc, 74);
1270 col[1][BCOMP] = CC_SEL(cc, 79);
1271 col[1][GCOMP] = CC_SEL(cc, 84);
1272 col[1][RCOMP] = CC_SEL(cc, 89);
1273 glsb = CC_SEL(cc, 125);
1274 selb = CC_SEL(cc, 1);
1277 if (CC_SEL(cc, 124) & 1) {
1282 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1286 b = UP5(col[0][BCOMP]);
1287 g = UP5(col[0][GCOMP]);
1288 r = UP5(col[0][RCOMP]);
1289 } else if (t == 2) {
1290 b = UP5(col[1][BCOMP]);
1291 g = UP6(col[1][GCOMP], glsb);
1292 r = UP5(col[1][RCOMP]);
1294 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1295 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1296 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1307 b = UP5(col[0][BCOMP]);
1308 g = UP6(col[0][GCOMP], glsb ^ selb);
1309 r = UP5(col[0][RCOMP]);
1310 } else if (t == 3) {
1311 b = UP5(col[1][BCOMP]);
1312 g = UP6(col[1][GCOMP], glsb);
1313 r = UP5(col[1][RCOMP]);
1315 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1316 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1317 UP6(col[1][GCOMP], glsb));
1318 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1329 fxt1_decode_1ALPHA (const byte *code, int t, byte *rgba)
1334 cc = (const dword *)code;
1335 if (CC_SEL(cc, 124) & 1) {
1341 t = (cc[1] >> (t * 2)) & 3;
1343 col0[BCOMP] = (*(const dword *)(code + 11)) >> 6;
1344 col0[GCOMP] = CC_SEL(cc, 99);
1345 col0[RCOMP] = CC_SEL(cc, 104);
1346 col0[ACOMP] = CC_SEL(cc, 119);
1348 t = (cc[0] >> (t * 2)) & 3;
1350 col0[BCOMP] = CC_SEL(cc, 64);
1351 col0[GCOMP] = CC_SEL(cc, 69);
1352 col0[RCOMP] = CC_SEL(cc, 74);
1353 col0[ACOMP] = CC_SEL(cc, 109);
1357 b = UP5(col0[BCOMP]);
1358 g = UP5(col0[GCOMP]);
1359 r = UP5(col0[RCOMP]);
1360 a = UP5(col0[ACOMP]);
1361 } else if (t == 3) {
1362 b = UP5(CC_SEL(cc, 79));
1363 g = UP5(CC_SEL(cc, 84));
1364 r = UP5(CC_SEL(cc, 89));
1365 a = UP5(CC_SEL(cc, 114));
1367 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1368 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1369 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1370 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1379 t = (cc[0] >> (t * 2)) & 3;
1386 cc = (const dword *)code;
1387 a = UP5(cc[3] >> (t * 5 + 13));
1389 cc = (const dword *)(code + 8 + t / 8);
1390 kk = cc[0] >> (t & 7);
1404 fxt1_decode_1 (const void *texture, int stride, /* in pixels */
1405 int i, int j, byte *rgba)
1407 static void (*decode_1[]) (const byte *, int, byte *) = {
1408 fxt1_decode_1HI, /* cc-high = "00?" */
1409 fxt1_decode_1HI, /* cc-high = "00?" */
1410 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1411 fxt1_decode_1ALPHA, /* alpha = "011" */
1412 fxt1_decode_1MIXED, /* mixed = "1??" */
1413 fxt1_decode_1MIXED, /* mixed = "1??" */
1414 fxt1_decode_1MIXED, /* mixed = "1??" */
1415 fxt1_decode_1MIXED /* mixed = "1??" */
1418 const byte *code = (const byte *)texture +
1419 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1420 int mode = CC_SEL(code, 125);
1428 decode_1[mode](code, t, rgba);