5 * Copyright (C) 2004 Daniel Borca All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * DANIEL BORCA BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 /* Copyright (C) 2007 Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
26 * Added support for ARGB inputs.
38 /***************************************************************************\
41 * The encoder was built by reversing the decoder,
42 * and is vaguely based on Texus2 by 3dfx. Note that this code
43 * is merely a proof of concept, since it is highly UNoptimized;
44 * moreover, it is sub-optimal due to initial conditions passed
45 * to Lloyd's algorithm (the interpolation modes are even worse).
46 \***************************************************************************/
49 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
50 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
51 #define N_TEXELS 32 /* number of texels in a block (always 32) */
52 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
53 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
54 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
55 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
56 #define ISTBLACK(v) (*((dword *)(v)) == 0)
57 #define COPY_4UBV(DST, SRC) *((dword *)(DST)) = *((dword *)(SRC))
61 fxt1_bestcol (float vec[][MAX_COMP], int nv,
62 byte input[MAX_COMP], int nc)
65 float err = 1e9; /* big enough */
67 for (j = 0; j < nv; j++) {
69 for (i = 0; i < nc; i++) {
70 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
83 fxt1_worst (float vec[MAX_COMP],
84 byte input[N_TEXELS][MAX_COMP], int nc, int n)
87 float err = -1.0F; /* small enough */
89 for (k = 0; k < n; k++) {
91 for (i = 0; i < nc; i++) {
92 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
105 fxt1_variance (double variance[MAX_COMP],
106 byte input[N_TEXELS][MAX_COMP], int nc, int n)
110 double var, maxvar = -1; /* small enough */
111 double teenth = 1.0 / n;
113 for (i = 0; i < nc; i++) {
115 for (k = 0; k < n; k++) {
120 var = sx2 * teenth - sx * sx * teenth * teenth;
135 fxt1_choose (float vec[][MAX_COMP], int nv,
136 byte input[N_TEXELS][MAX_COMP], int nc, int n)
139 /* Choose colors from a grid.
143 for (j = 0; j < nv; j++) {
144 int m = j * (n - 1) / (nv - 1);
145 for (i = 0; i < nc; i++) {
146 vec[j][i] = input[m][i];
150 /* Our solution here is to find the darkest and brightest colors in
151 * the 8x4 tile and use those as the two representative colors.
152 * There are probably better algorithms to use (histogram-based).
156 int minSum = 2000; /* big enough */
158 int minSum = 2000000;
160 int maxSum = -1; /* small enough */
161 int minCol = 0; /* phoudoin: silent compiler! */
162 int maxCol = 0; /* phoudoin: silent compiler! */
172 memset(hist, 0, sizeof(hist));
174 for (k = 0; k < n; k++) {
178 for (i = 0; i < nc; i++) {
184 /* RGB to YUV conversion according to CCIR 601 specs
185 * Y = 0.299R+0.587G+0.114B
186 * U = 0.713(R - Y) = 0.500R-0.419G-0.081B
187 * V = 0.564(B - Y) = -0.169R-0.331G+0.500B
189 sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
192 for (l = 0; l < n; l++) {
201 } else if (hist[l].key == key) {
217 for (j = 0; j < lenh; j++) {
218 for (i = 0; i < nc; i++) {
219 vec[j][i] = (float)input[hist[j].idx][i];
222 for (; j < nv; j++) {
223 for (i = 0; i < nc; i++) {
224 vec[j][i] = vec[0][i];
230 for (j = 0; j < nv; j++) {
231 for (i = 0; i < nc; i++) {
232 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (float)(nv - 1);
242 fxt1_lloyd (float vec[][MAX_COMP], int nv,
243 byte input[N_TEXELS][MAX_COMP], int nc, int n)
245 /* Use the generalized lloyd's algorithm for VQ:
246 * find 4 color vectors.
248 * for each sample color
249 * sort to nearest vector.
251 * replace each vector with the centroid of it's matching colors.
253 * repeat until RMS doesn't improve.
255 * if a color vector has no samples, or becomes the same as another
256 * vector, replace it with the color which is farthest from a sample.
258 * vec[][MAX_COMP] initial vectors and resulting colors
259 * nv number of resulting colors required
260 * input[N_TEXELS][MAX_COMP] input texels
261 * nc number of components in input / vec
262 * n number of input samples
265 int sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
266 int cnt[MAX_VECT]; /* how many times a certain vector was chosen */
267 float error, lasterror = 1e9;
272 for (rep = 0; rep < LL_N_REP; rep++) {
273 /* reset sums & counters */
274 for (j = 0; j < nv; j++) {
275 for (i = 0; i < nc; i++) {
282 /* scan whole block */
283 for (k = 0; k < n; k++) {
286 float err = 1e9; /* big enough */
287 /* determine best vector */
288 for (j = 0; j < nv; j++) {
289 float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
290 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
291 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
293 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
301 int best = fxt1_bestcol(vec, nv, input[k], nc, &err);
303 /* add in closest color */
304 for (i = 0; i < nc; i++) {
305 sum[best][i] += input[k][i];
307 /* mark this vector as used */
309 /* accumulate error */
314 if ((error < LL_RMS_E) ||
315 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
316 return !0; /* good match */
320 /* move each vector to the barycenter of its closest colors */
321 for (j = 0; j < nv; j++) {
323 float div = 1.0F / cnt[j];
324 for (i = 0; i < nc; i++) {
325 vec[j][i] = div * sum[j][i];
328 /* this vec has no samples or is identical with a previous vec */
329 int worst = fxt1_worst(vec[j], input, nc, n);
330 for (i = 0; i < nc; i++) {
331 vec[j][i] = input[worst][i];
337 return 0; /* could not converge fast enough */
342 fxt1_quantize_CHROMA (dword *cc,
343 byte input[N_TEXELS][MAX_COMP])
345 const int n_vect = 4; /* 4 base vectors to find */
346 const int n_comp = 3; /* 3 components: R, G, B */
347 float vec[MAX_VECT][MAX_COMP];
349 qword hi; /* high quadword */
350 dword lohi, lolo; /* low quadword: hi dword, lo dword */
352 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
353 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
356 Q_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
357 for (j = n_vect - 1; j >= 0; j--) {
358 for (i = 0; i < n_comp; i++) {
361 Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
364 ((qword *)cc)[1] = hi;
367 /* right microtile */
368 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
370 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
373 for (; k >= 0; k--) {
375 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
383 fxt1_quantize_ALPHA0 (dword *cc,
384 byte input[N_TEXELS][MAX_COMP],
385 byte reord[N_TEXELS][MAX_COMP], int n)
387 const int n_vect = 3; /* 3 base vectors to find */
388 const int n_comp = 4; /* 4 components: R, G, B, A */
389 float vec[MAX_VECT][MAX_COMP];
391 qword hi; /* high quadword */
392 dword lohi, lolo; /* low quadword: hi dword, lo dword */
394 /* the last vector indicates zero */
395 for (i = 0; i < n_comp; i++) {
399 /* the first n texels in reord are guaranteed to be non-zero */
400 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
401 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
404 Q_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
405 for (j = n_vect - 1; j >= 0; j--) {
408 Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F));
410 for (j = n_vect - 1; j >= 0; j--) {
411 for (i = 0; i < n_comp - 1; i++) {
414 Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
417 ((qword *)cc)[1] = hi;
420 /* right microtile */
421 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
423 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
426 for (; k >= 0; k--) {
428 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
436 fxt1_quantize_ALPHA1 (dword *cc,
437 byte input[N_TEXELS][MAX_COMP])
439 const int n_vect = 3; /* highest vector number in each microtile */
440 const int n_comp = 4; /* 4 components: R, G, B, A */
441 float vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
442 float b, iv[MAX_COMP]; /* interpolation vector */
444 qword hi; /* high quadword */
445 dword lohi, lolo; /* low quadword: hi dword, lo dword */
449 int minColL = 0, maxColL = 0;
450 int minColR = 0, maxColR = 0;
451 int sumL = 0, sumR = 0;
453 /* Our solution here is to find the darkest and brightest colors in
454 * the 4x4 tile and use those as the two representative colors.
455 * There are probably better algorithms to use (histogram-based).
458 minSum = 2000; /* big enough */
462 maxSum = -1; /* small enough */
463 for (k = 0; k < N_TEXELS / 2; k++) {
466 for (i = 0; i < n_comp; i++) {
470 sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
483 minSum = 2000; /* big enough */
487 maxSum = -1; /* small enough */
488 for (; k < N_TEXELS; k++) {
491 for (i = 0; i < n_comp; i++) {
495 sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
508 /* choose the common vector (yuck!) */
512 float err = 1e9; /* big enough */
513 float tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
514 for (i = 0; i < n_comp; i++) {
515 tv[0][i] = input[minColL][i];
516 tv[1][i] = input[maxColL][i];
517 tv[2][i] = input[minColR][i];
518 tv[3][i] = input[maxColR][i];
520 for (j1 = 0; j1 < 2; j1++) {
521 for (j2 = 2; j2 < 4; j2++) {
523 for (i = 0; i < n_comp; i++) {
524 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
533 for (i = 0; i < n_comp; i++) {
534 vec[0][i] = tv[1 - v1][i];
535 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
536 vec[2][i] = tv[5 - v2][i];
542 if (minColL != maxColL) {
543 /* compute interpolation vector */
544 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
548 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
550 /* interpolate color */
551 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
560 /* right microtile */
562 if (minColR != maxColR) {
563 /* compute interpolation vector */
564 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
568 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
570 /* interpolate color */
571 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
580 Q_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
581 for (j = n_vect - 1; j >= 0; j--) {
584 Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F));
586 for (j = n_vect - 1; j >= 0; j--) {
587 for (i = 0; i < n_comp - 1; i++) {
590 Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
593 ((qword *)cc)[1] = hi;
598 fxt1_quantize_HI (dword *cc,
599 byte input[N_TEXELS][MAX_COMP],
600 byte reord[N_TEXELS][MAX_COMP], int n)
602 const int n_vect = 6; /* highest vector number */
603 const int n_comp = 3; /* 3 components: R, G, B */
604 float b = 0.0F; /* phoudoin: silent compiler! */
605 float iv[MAX_COMP]; /* interpolation vector */
607 dword hihi; /* high quadword: hi dword */
610 int minSum = 2000; /* big enough */
612 int minSum = 2000000;
614 int maxSum = -1; /* small enough */
615 int minCol = 0; /* phoudoin: silent compiler! */
616 int maxCol = 0; /* phoudoin: silent compiler! */
618 /* Our solution here is to find the darkest and brightest colors in
619 * the 8x4 tile and use those as the two representative colors.
620 * There are probably better algorithms to use (histogram-based).
622 for (k = 0; k < n; k++) {
625 for (i = 0; i < n_comp; i++) {
629 sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
641 hihi = 0; /* cc-hi = "00" */
642 for (i = 0; i < n_comp; i++) {
645 hihi |= reord[maxCol][i] >> 3;
647 for (i = 0; i < n_comp; i++) {
650 hihi |= reord[minCol][i] >> 3;
653 cc[0] = cc[1] = cc[2] = 0;
655 /* compute interpolation vector */
656 if (minCol != maxCol) {
657 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
661 for (k = N_TEXELS - 1; k >= 0; k--) {
663 dword *kk = (dword *)((byte *)cc + t / 8);
664 int texel = n_vect + 1; /* transparent black */
666 if (!ISTBLACK(input[k])) {
667 if (minCol != maxCol) {
668 /* interpolate color */
669 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
671 kk[0] |= texel << (t & 7);
675 kk[0] |= texel << (t & 7);
682 fxt1_quantize_MIXED1 (dword *cc,
683 byte input[N_TEXELS][MAX_COMP])
685 const int n_vect = 2; /* highest vector number in each microtile */
686 const int n_comp = 3; /* 3 components: R, G, B */
687 byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
688 float b, iv[MAX_COMP]; /* interpolation vector */
690 qword hi; /* high quadword */
691 dword lohi, lolo; /* low quadword: hi dword, lo dword */
695 int minColL = 0, maxColL = -1;
696 int minColR = 0, maxColR = -1;
698 /* Our solution here is to find the darkest and brightest colors in
699 * the 4x4 tile and use those as the two representative colors.
700 * There are probably better algorithms to use (histogram-based).
703 minSum = 2000; /* big enough */
707 maxSum = -1; /* small enough */
708 for (k = 0; k < N_TEXELS / 2; k++) {
709 if (!ISTBLACK(input[k])) {
712 for (i = 0; i < n_comp; i++) {
716 sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
729 minSum = 2000; /* big enough */
733 maxSum = -1; /* small enough */
734 for (; k < N_TEXELS; k++) {
735 if (!ISTBLACK(input[k])) {
738 for (i = 0; i < n_comp; i++) {
742 sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
757 /* all transparent black */
759 for (i = 0; i < n_comp; i++) {
765 for (i = 0; i < n_comp; i++) {
766 vec[0][i] = input[minColL][i];
767 vec[1][i] = input[maxColL][i];
769 if (minColL != maxColL) {
770 /* compute interpolation vector */
771 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
775 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
776 int texel = n_vect + 1; /* transparent black */
777 if (!ISTBLACK(input[k])) {
778 /* interpolate color */
779 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
789 /* right microtile */
791 /* all transparent black */
793 for (i = 0; i < n_comp; i++) {
799 for (i = 0; i < n_comp; i++) {
800 vec[2][i] = input[minColR][i];
801 vec[3][i] = input[maxColR][i];
803 if (minColR != maxColR) {
804 /* compute interpolation vector */
805 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
809 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
810 int texel = n_vect + 1; /* transparent black */
811 if (!ISTBLACK(input[k])) {
812 /* interpolate color */
813 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
823 Q_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
824 for (j = 2 * 2 - 1; j >= 0; j--) {
825 for (i = 0; i < n_comp; i++) {
828 Q_OR32(hi, vec[j][i] >> 3);
831 ((qword *)cc)[1] = hi;
836 fxt1_quantize_MIXED0 (dword *cc,
837 byte input[N_TEXELS][MAX_COMP])
839 const int n_vect = 3; /* highest vector number in each microtile */
840 const int n_comp = 3; /* 3 components: R, G, B */
841 byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
842 float b, iv[MAX_COMP]; /* interpolation vector */
844 qword hi; /* high quadword */
845 dword lohi, lolo; /* low quadword: hi dword, lo dword */
847 int minColL = 0, maxColL = 0;
848 int minColR = 0, maxColR = 0;
853 /* Our solution here is to find the darkest and brightest colors in
854 * the 4x4 tile and use those as the two representative colors.
855 * There are probably better algorithms to use (histogram-based).
858 minSum = 2000; /* big enough */
862 maxSum = -1; /* small enough */
863 for (k = 0; k < N_TEXELS / 2; k++) {
866 for (i = 0; i < n_comp; i++) {
870 sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
881 minSum = 2000; /* big enough */
882 maxSum = -1; /* small enough */
883 for (; k < N_TEXELS; k++) {
886 for (i = 0; i < n_comp; i++) {
890 sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
904 int maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
905 int maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
907 /* Scan the channel with max variance for lo & hi
908 * and use those as the two representative colors.
910 minVal = 2000; /* big enough */
911 maxVal = -1; /* small enough */
912 for (k = 0; k < N_TEXELS / 2; k++) {
913 int t = input[k][maxVarL];
923 minVal = 2000; /* big enough */
924 maxVal = -1; /* small enough */
925 for (; k < N_TEXELS; k++) {
926 int t = input[k][maxVarR];
940 for (i = 0; i < n_comp; i++) {
941 vec[0][i] = input[minColL][i];
942 vec[1][i] = input[maxColL][i];
944 if (minColL != maxColL) {
945 /* compute interpolation vector */
946 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
950 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
952 /* interpolate color */
953 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
959 /* funky encoding for LSB of green */
960 if ((int)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
961 for (i = 0; i < n_comp; i++) {
962 vec[1][i] = input[minColL][i];
963 vec[0][i] = input[maxColL][i];
971 /* right microtile */
973 for (i = 0; i < n_comp; i++) {
974 vec[2][i] = input[minColR][i];
975 vec[3][i] = input[maxColR][i];
977 if (minColR != maxColR) {
978 /* compute interpolation vector */
979 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
983 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
985 /* interpolate color */
986 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
992 /* funky encoding for LSB of green */
993 if ((int)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
994 for (i = 0; i < n_comp; i++) {
995 vec[3][i] = input[minColR][i];
996 vec[2][i] = input[maxColR][i];
1004 Q_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1005 for (j = 2 * 2 - 1; j >= 0; j--) {
1006 for (i = 0; i < n_comp; i++) {
1009 Q_OR32(hi, vec[j][i] >> 3);
1012 ((qword *)cc)[1] = hi;
1017 fxt1_quantize (dword *cc, const byte *lines[], int comps)
1020 byte reord[N_TEXELS][MAX_COMP];
1022 byte input[N_TEXELS][MAX_COMP];
1029 /* make the whole block opaque */
1030 memset(input, -1, sizeof(input));
1033 /* 8 texels each line */
1035 for (l = 0; l < 4; l++) {
1036 for (k = 0; k < 4; k++) {
1037 for (i = 0; i < comps; i++) {
1038 input[k + l * 4][i] = *lines[l]++;
1041 for (; k < 8; k++) {
1042 for (i = 0; i < comps; i++) {
1043 input[k + l * 4 + 12][i] = *lines[l]++;
1048 /* H.Morii - support for ARGB inputs */
1049 for (l = 0; l < 4; l++) {
1050 for (k = 0; k < 4; k++) {
1051 input[k + l * 4][2] = *lines[l]++;
1052 input[k + l * 4][1] = *lines[l]++;
1053 input[k + l * 4][0] = *lines[l]++;
1054 if (comps == 4) input[k + l * 4][3] = *lines[l]++;
1056 for (; k < 8; k++) {
1057 input[k + l * 4 + 12][2] = *lines[l]++;
1058 input[k + l * 4 + 12][1] = *lines[l]++;
1059 input[k + l * 4 + 12][0] = *lines[l]++;
1060 if (comps == 4) input[k + l * 4 + 12][3] = *lines[l]++;
1066 * 00, 01, 02, 03, 08, 09, 0a, 0b
1067 * 10, 11, 12, 13, 18, 19, 1a, 1b
1068 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1069 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1073 * stupidity flows forth from this
1078 /* skip all transparent black texels */
1080 for (k = 0; k < N_TEXELS; k++) {
1081 /* test all components against 0 */
1082 if (!ISTBLACK(input[k])) {
1083 /* texel is not transparent black */
1084 COPY_4UBV(reord[l], input[k]);
1085 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1086 /* non-opaque texel */
1096 fxt1_quantize_ALPHA0(cc, input, reord, l);
1097 } else if (l == 0) {
1098 cc[0] = cc[1] = cc[2] = -1;
1100 } else if (l < N_TEXELS) {
1101 fxt1_quantize_HI(cc, input, reord, l);
1103 fxt1_quantize_CHROMA(cc, input);
1105 (void)fxt1_quantize_ALPHA1;
1106 (void)fxt1_quantize_MIXED1;
1107 (void)fxt1_quantize_MIXED0;
1110 fxt1_quantize_ALPHA1(cc, input);
1111 } else if (l == 0) {
1112 cc[0] = cc[1] = cc[2] = 0xFFFFFFFF;
1114 } else if (l < N_TEXELS) {
1115 fxt1_quantize_MIXED1(cc, input);
1117 fxt1_quantize_MIXED0(cc, input);
1119 (void)fxt1_quantize_ALPHA0;
1120 (void)fxt1_quantize_HI;
1121 (void)fxt1_quantize_CHROMA;
1127 fxt1_encode (int width, int height, int comps,
1128 const void *source, int srcRowStride,
1129 void *dest, int destRowStride)
1133 dword *encoded = (dword *)dest;
1134 void *newSource = NULL;
1136 /* Replicate image if width is not M8 or height is not M4 */
1137 if ((width & 7) | (height & 3)) {
1138 int newWidth = (width + 7) & ~7;
1139 int newHeight = (height + 3) & ~3;
1140 newSource = malloc(comps * newWidth * newHeight * sizeof(byte *));
1141 _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1142 comps, (const byte *)source,
1143 srcRowStride, (byte *)newSource);
1147 srcRowStride = comps * newWidth;
1150 data = (const byte *)source;
1151 destRowStride = (destRowStride - width * 2) / 4;
1152 for (y = 0; y < height; y += 4) {
1153 unsigned int offs = 0 + (y + 0) * srcRowStride;
1154 for (x = 0; x < width; x += 8) {
1155 const byte *lines[4];
1156 lines[0] = &data[offs];
1157 lines[1] = lines[0] + srcRowStride;
1158 lines[2] = lines[1] + srcRowStride;
1159 lines[3] = lines[2] + srcRowStride;
1161 fxt1_quantize(encoded, lines, comps);
1162 /* 128 bits per 8x4 block */
1165 encoded += destRowStride;
1168 if (newSource != NULL) {
1176 /***************************************************************************\
1179 * The decoder is based on GL_3DFX_texture_compression_FXT1
1180 * specification and serves as a concept for the encoder.
1181 \***************************************************************************/
1184 /* lookup table for scaling 5 bit colors up to 8 bits */
1185 static const byte _rgb_scale_5[] = {
1186 0, 8, 16, 25, 33, 41, 49, 58,
1187 66, 74, 82, 90, 99, 107, 115, 123,
1188 132, 140, 148, 156, 165, 173, 181, 189,
1189 197, 206, 214, 222, 230, 239, 247, 255
1192 /* lookup table for scaling 6 bit colors up to 8 bits */
1193 static const byte _rgb_scale_6[] = {
1194 0, 4, 8, 12, 16, 20, 24, 28,
1195 32, 36, 40, 45, 49, 53, 57, 61,
1196 65, 69, 73, 77, 81, 85, 89, 93,
1197 97, 101, 105, 109, 113, 117, 121, 125,
1198 130, 134, 138, 142, 146, 150, 154, 158,
1199 162, 166, 170, 174, 178, 182, 186, 190,
1200 194, 198, 202, 206, 210, 215, 219, 223,
1201 227, 231, 235, 239, 243, 247, 251, 255
1205 #define CC_SEL(cc, which) (((dword *)(cc))[(which) / 32] >> ((which) & 31))
1206 #define UP5(c) _rgb_scale_5[(c) & 31]
1207 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1208 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1209 #define ZERO_4UBV(v) *((dword *)(v)) = 0
1213 fxt1_decode_1HI (const byte *code, int t, byte *rgba)
1218 cc = (const dword *)(code + t / 8);
1219 t = (cc[0] >> (t & 7)) & 7;
1224 cc = (const dword *)(code + 12);
1226 rgba[BCOMP] = UP5(CC_SEL(cc, 0));
1227 rgba[GCOMP] = UP5(CC_SEL(cc, 5));
1228 rgba[RCOMP] = UP5(CC_SEL(cc, 10));
1229 } else if (t == 6) {
1230 rgba[BCOMP] = UP5(CC_SEL(cc, 15));
1231 rgba[GCOMP] = UP5(CC_SEL(cc, 20));
1232 rgba[RCOMP] = UP5(CC_SEL(cc, 25));
1234 rgba[BCOMP] = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1235 rgba[GCOMP] = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1236 rgba[RCOMP] = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1244 fxt1_decode_1CHROMA (const byte *code, int t, byte *rgba)
1249 cc = (const dword *)code;
1254 t = (cc[0] >> (t * 2)) & 3;
1257 cc = (const dword *)(code + 8 + t / 8);
1258 kk = cc[0] >> (t & 7);
1259 rgba[BCOMP] = UP5(kk);
1260 rgba[GCOMP] = UP5(kk >> 5);
1261 rgba[RCOMP] = UP5(kk >> 10);
1267 fxt1_decode_1MIXED (const byte *code, int t, byte *rgba)
1273 cc = (const dword *)code;
1276 t = (cc[1] >> (t * 2)) & 3;
1278 col[0][BCOMP] = (*(const dword *)(code + 11)) >> 6;
1279 col[0][GCOMP] = CC_SEL(cc, 99);
1280 col[0][RCOMP] = CC_SEL(cc, 104);
1282 col[1][BCOMP] = CC_SEL(cc, 109);
1283 col[1][GCOMP] = CC_SEL(cc, 114);
1284 col[1][RCOMP] = CC_SEL(cc, 119);
1285 glsb = CC_SEL(cc, 126);
1286 selb = CC_SEL(cc, 33);
1288 t = (cc[0] >> (t * 2)) & 3;
1290 col[0][BCOMP] = CC_SEL(cc, 64);
1291 col[0][GCOMP] = CC_SEL(cc, 69);
1292 col[0][RCOMP] = CC_SEL(cc, 74);
1294 col[1][BCOMP] = CC_SEL(cc, 79);
1295 col[1][GCOMP] = CC_SEL(cc, 84);
1296 col[1][RCOMP] = CC_SEL(cc, 89);
1297 glsb = CC_SEL(cc, 125);
1298 selb = CC_SEL(cc, 1);
1301 if (CC_SEL(cc, 124) & 1) {
1308 rgba[BCOMP] = UP5(col[0][BCOMP]);
1309 rgba[GCOMP] = UP5(col[0][GCOMP]);
1310 rgba[RCOMP] = UP5(col[0][RCOMP]);
1311 } else if (t == 2) {
1312 rgba[BCOMP] = UP5(col[1][BCOMP]);
1313 rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
1314 rgba[RCOMP] = UP5(col[1][RCOMP]);
1316 rgba[BCOMP] = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1317 rgba[GCOMP] = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1318 rgba[RCOMP] = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1326 rgba[BCOMP] = UP5(col[0][BCOMP]);
1327 rgba[GCOMP] = UP6(col[0][GCOMP], glsb ^ selb);
1328 rgba[RCOMP] = UP5(col[0][RCOMP]);
1329 } else if (t == 3) {
1330 rgba[BCOMP] = UP5(col[1][BCOMP]);
1331 rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
1332 rgba[RCOMP] = UP5(col[1][RCOMP]);
1334 rgba[BCOMP] = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1335 rgba[GCOMP] = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1336 UP6(col[1][GCOMP], glsb));
1337 rgba[RCOMP] = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1345 fxt1_decode_1ALPHA (const byte *code, int t, byte *rgba)
1349 cc = (const dword *)code;
1350 if (CC_SEL(cc, 124) & 1) {
1356 t = (cc[1] >> (t * 2)) & 3;
1358 col0[BCOMP] = (*(const dword *)(code + 11)) >> 6;
1359 col0[GCOMP] = CC_SEL(cc, 99);
1360 col0[RCOMP] = CC_SEL(cc, 104);
1361 col0[ACOMP] = CC_SEL(cc, 119);
1363 t = (cc[0] >> (t * 2)) & 3;
1365 col0[BCOMP] = CC_SEL(cc, 64);
1366 col0[GCOMP] = CC_SEL(cc, 69);
1367 col0[RCOMP] = CC_SEL(cc, 74);
1368 col0[ACOMP] = CC_SEL(cc, 109);
1372 rgba[BCOMP] = UP5(col0[BCOMP]);
1373 rgba[GCOMP] = UP5(col0[GCOMP]);
1374 rgba[RCOMP] = UP5(col0[RCOMP]);
1375 rgba[ACOMP] = UP5(col0[ACOMP]);
1376 } else if (t == 3) {
1377 rgba[BCOMP] = UP5(CC_SEL(cc, 79));
1378 rgba[GCOMP] = UP5(CC_SEL(cc, 84));
1379 rgba[RCOMP] = UP5(CC_SEL(cc, 89));
1380 rgba[ACOMP] = UP5(CC_SEL(cc, 114));
1382 rgba[BCOMP] = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1383 rgba[GCOMP] = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1384 rgba[RCOMP] = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1385 rgba[ACOMP] = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1394 t = (cc[0] >> (t * 2)) & 3;
1400 cc = (const dword *)code;
1401 rgba[ACOMP] = UP5(cc[3] >> (t * 5 + 13));
1403 cc = (const dword *)(code + 8 + t / 8);
1404 kk = cc[0] >> (t & 7);
1405 rgba[BCOMP] = UP5(kk);
1406 rgba[GCOMP] = UP5(kk >> 5);
1407 rgba[RCOMP] = UP5(kk >> 10);
1414 fxt1_decode_1 (const void *texture, int stride,
1415 int i, int j, byte *rgba)
1417 static void (*decode_1[]) (const byte *, int, byte *) = {
1418 fxt1_decode_1HI, /* cc-high = "00?" */
1419 fxt1_decode_1HI, /* cc-high = "00?" */
1420 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1421 fxt1_decode_1ALPHA, /* alpha = "011" */
1422 fxt1_decode_1MIXED, /* mixed = "1??" */
1423 fxt1_decode_1MIXED, /* mixed = "1??" */
1424 fxt1_decode_1MIXED, /* mixed = "1??" */
1425 fxt1_decode_1MIXED /* mixed = "1??" */
1428 const byte *code = (const byte *)texture +
1429 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1430 int mode = CC_SEL(code, 125);
1438 decode_1[mode](code, t, rgba);
1442 extern int cc_chroma;
1443 extern int cc_alpha;
1445 extern int cc_mixed;
1446 static int *cctype[] = {