source/gles2glide64/src/GlideHQ/tc-1.1+/fxt1.c

   1 /*
   2  * FXT1 codec
   3  * Version:  1.1
   4  *
   5  * Copyright (C) 2004  Daniel Borca   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * DANIEL BORCA BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /* Copyright (C) 2007  Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
  26  * Added support for ARGB inputs.
  27  */
  28
  29
  30 #include <stdlib.h>
  31 #include <string.h>
  32
  33 #include "types.h"
  34 #include "internal.h"
  35 #include "fxt1.h"
  36
  37
  38 /***************************************************************************\
  39  * FXT1 encoder
  40  *
  41  * The encoder was built by reversing the decoder,
  42  * and is vaguely based on Texus2 by 3dfx. Note that this code
  43  * is merely a proof of concept, since it is highly UNoptimized;
  44  * moreover, it is sub-optimal due to initial conditions passed
  45  * to Lloyd's algorithm (the interpolation modes are even worse).
  46 \***************************************************************************/
  47
  48
  49 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
  50 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
  51 #define N_TEXELS 32 /* number of texels in a block (always 32) */
  52 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
  53 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
  54 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
  55 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
  56 #define ISTBLACK(v) (*((dword *)(v)) == 0)
  57 #define COPY_4UBV(DST, SRC) *((dword *)(DST)) = *((dword *)(SRC))
  58
  59
  60 static int
  61 fxt1_bestcol (float vec[][MAX_COMP], int nv,
  62               byte input[MAX_COMP], int nc)
  63 {
  64     int i, j, best = -1;
  65     float err = 1e9; /* big enough */
  66
  67     for (j = 0; j < nv; j++) {
  68         float e = 0.0F;
  69         for (i = 0; i < nc; i++) {
  70             e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
  71         }
  72         if (e < err) {
  73             err = e;
  74             best = j;
  75         }
  76     }
  77
  78     return best;
  79 }
  80
  81
  82 static int
  83 fxt1_worst (float vec[MAX_COMP],
  84             byte input[N_TEXELS][MAX_COMP], int nc, int n)
  85 {
  86     int i, k, worst = -1;
  87     float err = -1.0F; /* small enough */
  88
  89     for (k = 0; k < n; k++) {
  90         float e = 0.0F;
  91         for (i = 0; i < nc; i++) {
  92             e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
  93         }
  94         if (e > err) {
  95             err = e;
  96             worst = k;
  97         }
  98     }
  99
 100     return worst;
 101 }
 102
 103
 104 static int
 105 fxt1_variance (double variance[MAX_COMP],
 106                byte input[N_TEXELS][MAX_COMP], int nc, int n)
 107 {
 108     int i, k, best = 0;
 109     dword sx, sx2;
 110     double var, maxvar = -1; /* small enough */
 111     double teenth = 1.0 / n;
 112
 113     for (i = 0; i < nc; i++) {
 114         sx = sx2 = 0;
 115         for (k = 0; k < n; k++) {
 116             int t = input[k][i];
 117             sx += t;
 118             sx2 += t * t;
 119         }
 120         var = sx2 * teenth - sx * sx * teenth * teenth;
 121         if (maxvar < var) {
 122             maxvar = var;
 123             best = i;
 124         }
 125         if (variance) {
 126             variance[i] = var;
 127         }
 128     }
 129
 130     return best;
 131 }
 132
 133
 134 static int
 135 fxt1_choose (float vec[][MAX_COMP], int nv,
 136              byte input[N_TEXELS][MAX_COMP], int nc, int n)
 137 {
 138 #if 0
 139     /* Choose colors from a grid.
 140      */
 141     int i, j;
 142
 143     for (j = 0; j < nv; j++) {
 144         int m = j * (n - 1) / (nv - 1);
 145         for (i = 0; i < nc; i++) {
 146             vec[j][i] = input[m][i];
 147         }
 148     }
 149 #else
 150     /* Our solution here is to find the darkest and brightest colors in
 151      * the 8x4 tile and use those as the two representative colors.
 152      * There are probably better algorithms to use (histogram-based).
 153      */
 154     int i, j, k;
 155 #ifndef YUV
 156     int minSum = 2000; /* big enough */
 157 #else
 158     int minSum = 2000000;
 159 #endif
 160     int maxSum = -1; /* small enough */
 161     int minCol = 0; /* phoudoin: silent compiler! */
 162     int maxCol = 0; /* phoudoin: silent compiler! */
 163
 164     struct {
 165         int flag;
 166         dword key;
 167         int freq;
 168         int idx;
 169     } hist[N_TEXELS];
 170     int lenh = 0;
 171
 172     memset(hist, 0, sizeof(hist));
 173
 174     for (k = 0; k < n; k++) {
 175         int l;
 176         dword key = 0;
 177         int sum = 0;
 178         for (i = 0; i < nc; i++) {
 179             key <<= 8;
 180             key |= input[k][i];
 181 #ifndef YUV
 182             sum += input[k][i];
 183 #else
 184             /* RGB to YUV conversion according to CCIR 601 specs
 185              * Y = 0.299R+0.587G+0.114B
 186              * U = 0.713(R - Y) = 0.500R-0.419G-0.081B
 187              * V = 0.564(B - Y) = -0.169R-0.331G+0.500B
 188              */
 189             sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
 190 #endif
 191         }
 192         for (l = 0; l < n; l++) {
 193             if (!hist[l].flag) {
 194                 /* alloc new slot */
 195                 hist[l].flag = !0;
 196                 hist[l].key = key;
 197                 hist[l].freq = 1;
 198                 hist[l].idx = k;
 199                 lenh = l + 1;
 200                 break;
 201             } else if (hist[l].key == key) {
 202                 hist[l].freq++;
 203                 break;
 204             }
 205         }
 206         if (minSum > sum) {
 207             minSum = sum;
 208             minCol = k;
 209         }
 210         if (maxSum < sum) {
 211             maxSum = sum;
 212             maxCol = k;
 213         }
 214     }
 215
 216     if (lenh <= nv) {
 217         for (j = 0; j < lenh; j++) {
 218             for (i = 0; i < nc; i++) {
 219                 vec[j][i] = (float)input[hist[j].idx][i];
 220             }
 221         }
 222         for (; j < nv; j++) {
 223             for (i = 0; i < nc; i++) {
 224                 vec[j][i] = vec[0][i];
 225             }
 226         }
 227         return 0;
 228     }
 229
 230     for (j = 0; j < nv; j++) {
 231         for (i = 0; i < nc; i++) {
 232             vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (float)(nv - 1);
 233         }
 234     }
 235 #endif
 236
 237     return !0;
 238 }
 239
 240
 241 static int
 242 fxt1_lloyd (float vec[][MAX_COMP], int nv,
 243             byte input[N_TEXELS][MAX_COMP], int nc, int n)
 244 {
 245     /* Use the generalized lloyd's algorithm for VQ:
 246      *     find 4 color vectors.
 247      *
 248      *     for each sample color
 249      *         sort to nearest vector.
 250      *
 251      *     replace each vector with the centroid of it's matching colors.
 252      *
 253      *     repeat until RMS doesn't improve.
 254      *
 255      *     if a color vector has no samples, or becomes the same as another
 256      *     vector, replace it with the color which is farthest from a sample.
 257      *
 258      * vec[][MAX_COMP]           initial vectors and resulting colors
 259      * nv                        number of resulting colors required
 260      * input[N_TEXELS][MAX_COMP] input texels
 261      * nc                        number of components in input / vec
 262      * n                         number of input samples
 263      */
 264
 265     int sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 266     int cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 267     float error, lasterror = 1e9;
 268
 269     int i, j, k, rep;
 270
 271     /* the quantizer */
 272     for (rep = 0; rep < LL_N_REP; rep++) {
 273         /* reset sums & counters */
 274         for (j = 0; j < nv; j++) {
 275             for (i = 0; i < nc; i++) {
 276                 sum[j][i] = 0;
 277             }
 278             cnt[j] = 0;
 279         }
 280         error = 0;
 281
 282         /* scan whole block */
 283         for (k = 0; k < n; k++) {
 284 #if 1
 285             int best = -1;
 286             float err = 1e9; /* big enough */
 287             /* determine best vector */
 288             for (j = 0; j < nv; j++) {
 289                 float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 290                           (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 291                           (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 292                 if (nc == 4) {
 293                     e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 294                 }
 295                 if (e < err) {
 296                     err = e;
 297                     best = j;
 298                 }
 299             }
 300 #else
 301             int best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 302 #endif
 303             /* add in closest color */
 304             for (i = 0; i < nc; i++) {
 305                 sum[best][i] += input[k][i];
 306             }
 307             /* mark this vector as used */
 308             cnt[best]++;
 309             /* accumulate error */
 310             error += err;
 311         }
 312
 313         /* check RMS */
 314         if ((error < LL_RMS_E) ||
 315             ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 316             return !0; /* good match */
 317         }
 318         lasterror = error;
 319
 320         /* move each vector to the barycenter of its closest colors */
 321         for (j = 0; j < nv; j++) {
 322             if (cnt[j]) {
 323                 float div = 1.0F / cnt[j];
 324                 for (i = 0; i < nc; i++) {
 325                     vec[j][i] = div * sum[j][i];
 326                 }
 327             } else {
 328                 /* this vec has no samples or is identical with a previous vec */
 329                 int worst = fxt1_worst(vec[j], input, nc, n);
 330                 for (i = 0; i < nc; i++) {
 331                     vec[j][i] = input[worst][i];
 332                 }
 333             }
 334         }
 335     }
 336
 337     return 0; /* could not converge fast enough */
 338 }
 339
 340
 341 static void
 342 fxt1_quantize_CHROMA (dword *cc,
 343                       byte input[N_TEXELS][MAX_COMP])
 344 {
 345     const int n_vect = 4; /* 4 base vectors to find */
 346     const int n_comp = 3; /* 3 components: R, G, B */
 347     float vec[MAX_VECT][MAX_COMP];
 348     int i, j, k;
 349     qword hi; /* high quadword */
 350     dword lohi, lolo; /* low quadword: hi dword, lo dword */
 351
 352     if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 353         fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 354     }
 355
 356     Q_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 357     for (j = n_vect - 1; j >= 0; j--) {
 358         for (i = 0; i < n_comp; i++) {
 359             /* add in colors */
 360             Q_SHL(hi, 5);
 361             Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
 362         }
 363     }
 364     ((qword *)cc)[1] = hi;
 365
 366     lohi = lolo = 0;
 367     /* right microtile */
 368     for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 369         lohi <<= 2;
 370         lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 371     }
 372     /* left microtile */
 373     for (; k >= 0; k--) {
 374         lolo <<= 2;
 375         lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 376     }
 377     cc[1] = lohi;
 378     cc[0] = lolo;
 379 }
 380
 381
 382 static void
 383 fxt1_quantize_ALPHA0 (dword *cc,
 384                       byte input[N_TEXELS][MAX_COMP],
 385                       byte reord[N_TEXELS][MAX_COMP], int n)
 386 {
 387     const int n_vect = 3; /* 3 base vectors to find */
 388     const int n_comp = 4; /* 4 components: R, G, B, A */
 389     float vec[MAX_VECT][MAX_COMP];
 390     int i, j, k;
 391     qword hi; /* high quadword */
 392     dword lohi, lolo; /* low quadword: hi dword, lo dword */
 393
 394     /* the last vector indicates zero */
 395     for (i = 0; i < n_comp; i++) {
 396         vec[n_vect][i] = 0;
 397     }
 398
 399     /* the first n texels in reord are guaranteed to be non-zero */
 400     if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 401         fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 402     }
 403
 404     Q_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 405     for (j = n_vect - 1; j >= 0; j--) {
 406         /* add in alphas */
 407         Q_SHL(hi, 5);
 408         Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F));
 409     }
 410     for (j = n_vect - 1; j >= 0; j--) {
 411         for (i = 0; i < n_comp - 1; i++) {
 412             /* add in colors */
 413             Q_SHL(hi, 5);
 414             Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
 415         }
 416     }
 417     ((qword *)cc)[1] = hi;
 418
 419     lohi = lolo = 0;
 420     /* right microtile */
 421     for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 422         lohi <<= 2;
 423         lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 424     }
 425     /* left microtile */
 426     for (; k >= 0; k--) {
 427         lolo <<= 2;
 428         lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 429     }
 430     cc[1] = lohi;
 431     cc[0] = lolo;
 432 }
 433
 434
 435 static void
 436 fxt1_quantize_ALPHA1 (dword *cc,
 437                       byte input[N_TEXELS][MAX_COMP])
 438 {
 439     const int n_vect = 3; /* highest vector number in each microtile */
 440     const int n_comp = 4; /* 4 components: R, G, B, A */
 441     float vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 442     float b, iv[MAX_COMP]; /* interpolation vector */
 443     int i, j, k;
 444     qword hi; /* high quadword */
 445     dword lohi, lolo; /* low quadword: hi dword, lo dword */
 446
 447     int minSum;
 448     int maxSum;
 449     int minColL = 0, maxColL = 0;
 450     int minColR = 0, maxColR = 0;
 451     int sumL = 0, sumR = 0;
 452
 453     /* Our solution here is to find the darkest and brightest colors in
 454      * the 4x4 tile and use those as the two representative colors.
 455      * There are probably better algorithms to use (histogram-based).
 456      */
 457 #ifndef YUV
 458     minSum = 2000; /* big enough */
 459 #else
 460     minSum = 2000000;
 461 #endif
 462     maxSum = -1; /* small enough */
 463     for (k = 0; k < N_TEXELS / 2; k++) {
 464         int sum = 0;
 465 #ifndef YUV
 466         for (i = 0; i < n_comp; i++) {
 467             sum += input[k][i];
 468         }
 469 #else
 470         sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
 471 #endif
 472         if (minSum > sum) {
 473             minSum = sum;
 474             minColL = k;
 475         }
 476         if (maxSum < sum) {
 477             maxSum = sum;
 478             maxColL = k;
 479         }
 480         sumL += sum;
 481     }
 482 #ifndef YUV
 483     minSum = 2000; /* big enough */
 484 #else
 485     minSum = 2000000;
 486 #endif
 487     maxSum = -1; /* small enough */
 488     for (; k < N_TEXELS; k++) {
 489         int sum = 0;
 490 #ifndef YUV
 491         for (i = 0; i < n_comp; i++) {
 492             sum += input[k][i];
 493         }
 494 #else
 495         sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
 496 #endif
 497         if (minSum > sum) {
 498             minSum = sum;
 499             minColR = k;
 500         }
 501         if (maxSum < sum) {
 502             maxSum = sum;
 503             maxColR = k;
 504         }
 505         sumR += sum;
 506     }
 507
 508     /* choose the common vector (yuck!) */
 509     {
 510         int j1, j2;
 511         int v1 = 0, v2 = 0;
 512         float err = 1e9; /* big enough */
 513         float tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 514         for (i = 0; i < n_comp; i++) {
 515             tv[0][i] = input[minColL][i];
 516             tv[1][i] = input[maxColL][i];
 517             tv[2][i] = input[minColR][i];
 518             tv[3][i] = input[maxColR][i];
 519         }
 520         for (j1 = 0; j1 < 2; j1++) {
 521             for (j2 = 2; j2 < 4; j2++) {
 522                 float e = 0.0F;
 523                 for (i = 0; i < n_comp; i++) {
 524                     e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 525                 }
 526                 if (e < err) {
 527                     err = e;
 528                     v1 = j1;
 529                     v2 = j2;
 530                 }
 531             }
 532         }
 533         for (i = 0; i < n_comp; i++) {
 534             vec[0][i] = tv[1 - v1][i];
 535             vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 536             vec[2][i] = tv[5 - v2][i];
 537         }
 538     }
 539
 540     /* left microtile */
 541     cc[0] = 0;
 542     if (minColL != maxColL) {
 543         /* compute interpolation vector */
 544         MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 545
 546         /* add in texels */
 547         lolo = 0;
 548         for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 549             int texel;
 550             /* interpolate color */
 551             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 552             /* add in texel */
 553             lolo <<= 2;
 554             lolo |= texel;
 555         }
 556
 557         cc[0] = lolo;
 558     }
 559
 560     /* right microtile */
 561     cc[1] = 0;
 562     if (minColR != maxColR) {
 563         /* compute interpolation vector */
 564         MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 565
 566         /* add in texels */
 567         lohi = 0;
 568         for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 569             int texel;
 570             /* interpolate color */
 571             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 572             /* add in texel */
 573             lohi <<= 2;
 574             lohi |= texel;
 575         }
 576
 577         cc[1] = lohi;
 578     }
 579
 580     Q_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 581     for (j = n_vect - 1; j >= 0; j--) {
 582         /* add in alphas */
 583         Q_SHL(hi, 5);
 584         Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F));
 585     }
 586     for (j = n_vect - 1; j >= 0; j--) {
 587         for (i = 0; i < n_comp - 1; i++) {
 588             /* add in colors */
 589             Q_SHL(hi, 5);
 590             Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
 591         }
 592     }
 593     ((qword *)cc)[1] = hi;
 594 }
 595
 596
 597 static void
 598 fxt1_quantize_HI (dword *cc,
 599                   byte input[N_TEXELS][MAX_COMP],
 600                   byte reord[N_TEXELS][MAX_COMP], int n)
 601 {
 602     const int n_vect = 6; /* highest vector number */
 603     const int n_comp = 3; /* 3 components: R, G, B */
 604     float b = 0.0F;       /* phoudoin: silent compiler! */
 605     float iv[MAX_COMP];   /* interpolation vector */
 606     int i, k;
 607     dword hihi; /* high quadword: hi dword */
 608
 609 #ifndef YUV
 610     int minSum = 2000; /* big enough */
 611 #else
 612     int minSum = 2000000;
 613 #endif
 614     int maxSum = -1; /* small enough */
 615     int minCol = 0; /* phoudoin: silent compiler! */
 616     int maxCol = 0; /* phoudoin: silent compiler! */
 617
 618     /* Our solution here is to find the darkest and brightest colors in
 619      * the 8x4 tile and use those as the two representative colors.
 620      * There are probably better algorithms to use (histogram-based).
 621      */
 622     for (k = 0; k < n; k++) {
 623         int sum = 0;
 624 #ifndef YUV
 625         for (i = 0; i < n_comp; i++) {
 626             sum += reord[k][i];
 627         }
 628 #else
 629         sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
 630 #endif
 631         if (minSum > sum) {
 632             minSum = sum;
 633             minCol = k;
 634         }
 635         if (maxSum < sum) {
 636             maxSum = sum;
 637             maxCol = k;
 638         }
 639     }
 640
 641     hihi = 0; /* cc-hi = "00" */
 642     for (i = 0; i < n_comp; i++) {
 643         /* add in colors */
 644         hihi <<= 5;
 645         hihi |= reord[maxCol][i] >> 3;
 646     }
 647     for (i = 0; i < n_comp; i++) {
 648         /* add in colors */
 649         hihi <<= 5;
 650         hihi |= reord[minCol][i] >> 3;
 651     }
 652     cc[3] = hihi;
 653     cc[0] = cc[1] = cc[2] = 0;
 654
 655     /* compute interpolation vector */
 656     if (minCol != maxCol) {
 657         MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 658     }
 659
 660     /* add in texels */
 661     for (k = N_TEXELS - 1; k >= 0; k--) {
 662         int t = k * 3;
 663         dword *kk = (dword *)((byte *)cc + t / 8);
 664         int texel = n_vect + 1; /* transparent black */
 665
 666         if (!ISTBLACK(input[k])) {
 667             if (minCol != maxCol) {
 668                 /* interpolate color */
 669                 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 670                 /* add in texel */
 671                 kk[0] |= texel << (t & 7);
 672             }
 673         } else {
 674             /* add in texel */
 675             kk[0] |= texel << (t & 7);
 676         }
 677     }
 678 }
 679
 680
 681 static void
 682 fxt1_quantize_MIXED1 (dword *cc,
 683                       byte input[N_TEXELS][MAX_COMP])
 684 {
 685     const int n_vect = 2; /* highest vector number in each microtile */
 686     const int n_comp = 3; /* 3 components: R, G, B */
 687     byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 688     float b, iv[MAX_COMP]; /* interpolation vector */
 689     int i, j, k;
 690     qword hi; /* high quadword */
 691     dword lohi, lolo; /* low quadword: hi dword, lo dword */
 692
 693     int minSum;
 694     int maxSum;
 695     int minColL = 0, maxColL = -1;
 696     int minColR = 0, maxColR = -1;
 697
 698     /* Our solution here is to find the darkest and brightest colors in
 699      * the 4x4 tile and use those as the two representative colors.
 700      * There are probably better algorithms to use (histogram-based).
 701      */
 702 #ifndef YUV
 703     minSum = 2000; /* big enough */
 704 #else
 705     minSum = 2000000;
 706 #endif
 707     maxSum = -1; /* small enough */
 708     for (k = 0; k < N_TEXELS / 2; k++) {
 709         if (!ISTBLACK(input[k])) {
 710             int sum = 0;
 711 #ifndef YUV
 712             for (i = 0; i < n_comp; i++) {
 713                 sum += input[k][i];
 714             }
 715 #else
 716             sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
 717 #endif
 718             if (minSum > sum) {
 719                 minSum = sum;
 720                 minColL = k;
 721             }
 722             if (maxSum < sum) {
 723                 maxSum = sum;
 724                 maxColL = k;
 725             }
 726         }
 727     }
 728 #ifndef YUV
 729     minSum = 2000; /* big enough */
 730 #else
 731     minSum = 2000000;
 732 #endif
 733     maxSum = -1; /* small enough */
 734     for (; k < N_TEXELS; k++) {
 735         if (!ISTBLACK(input[k])) {
 736             int sum = 0;
 737 #ifndef YUV
 738             for (i = 0; i < n_comp; i++) {
 739                 sum += input[k][i];
 740             }
 741 #else
 742             sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
 743 #endif
 744             if (minSum > sum) {
 745                 minSum = sum;
 746                 minColR = k;
 747             }
 748             if (maxSum < sum) {
 749                 maxSum = sum;
 750                 maxColR = k;
 751             }
 752         }
 753     }
 754
 755     /* left microtile */
 756     if (maxColL == -1) {
 757         /* all transparent black */
 758         cc[0] = 0xFFFFFFFF;
 759         for (i = 0; i < n_comp; i++) {
 760             vec[0][i] = 0;
 761             vec[1][i] = 0;
 762         }
 763     } else {
 764         cc[0] = 0;
 765         for (i = 0; i < n_comp; i++) {
 766             vec[0][i] = input[minColL][i];
 767             vec[1][i] = input[maxColL][i];
 768         }
 769         if (minColL != maxColL) {
 770             /* compute interpolation vector */
 771             MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 772
 773             /* add in texels */
 774             lolo = 0;
 775             for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 776                 int texel = n_vect + 1; /* transparent black */
 777                 if (!ISTBLACK(input[k])) {
 778                     /* interpolate color */
 779                     CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 780                 }
 781                 /* add in texel */
 782                 lolo <<= 2;
 783                 lolo |= texel;
 784             }
 785             cc[0] = lolo;
 786         }
 787     }
 788
 789     /* right microtile */
 790     if (maxColR == -1) {
 791         /* all transparent black */
 792         cc[1] = 0xFFFFFFFF;
 793         for (i = 0; i < n_comp; i++) {
 794             vec[2][i] = 0;
 795             vec[3][i] = 0;
 796         }
 797     } else {
 798         cc[1] = 0;
 799         for (i = 0; i < n_comp; i++) {
 800             vec[2][i] = input[minColR][i];
 801             vec[3][i] = input[maxColR][i];
 802         }
 803         if (minColR != maxColR) {
 804             /* compute interpolation vector */
 805             MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
 806
 807             /* add in texels */
 808             lohi = 0;
 809             for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 810                 int texel = n_vect + 1; /* transparent black */
 811                 if (!ISTBLACK(input[k])) {
 812                     /* interpolate color */
 813                     CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 814                 }
 815                 /* add in texel */
 816                 lohi <<= 2;
 817                 lohi |= texel;
 818             }
 819             cc[1] = lohi;
 820         }
 821     }
 822
 823     Q_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
 824     for (j = 2 * 2 - 1; j >= 0; j--) {
 825         for (i = 0; i < n_comp; i++) {
 826             /* add in colors */
 827             Q_SHL(hi, 5);
 828             Q_OR32(hi, vec[j][i] >> 3);
 829         }
 830     }
 831     ((qword *)cc)[1] = hi;
 832 }
 833
 834
 835 static void
 836 fxt1_quantize_MIXED0 (dword *cc,
 837                       byte input[N_TEXELS][MAX_COMP])
 838 {
 839     const int n_vect = 3; /* highest vector number in each microtile */
 840     const int n_comp = 3; /* 3 components: R, G, B */
 841     byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 842     float b, iv[MAX_COMP]; /* interpolation vector */
 843     int i, j, k;
 844     qword hi; /* high quadword */
 845     dword lohi, lolo; /* low quadword: hi dword, lo dword */
 846
 847     int minColL = 0, maxColL = 0;
 848     int minColR = 0, maxColR = 0;
 849 #if 0
 850     int minSum;
 851     int maxSum;
 852
 853     /* Our solution here is to find the darkest and brightest colors in
 854      * the 4x4 tile and use those as the two representative colors.
 855      * There are probably better algorithms to use (histogram-based).
 856      */
 857 #ifndef YUV
 858     minSum = 2000; /* big enough */
 859 #else
 860     minSum = 2000000;
 861 #endif
 862     maxSum = -1; /* small enough */
 863     for (k = 0; k < N_TEXELS / 2; k++) {
 864         int sum = 0;
 865 #ifndef YUV
 866         for (i = 0; i < n_comp; i++) {
 867             sum += input[k][i];
 868         }
 869 #else
 870         sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
 871 #endif
 872         if (minSum > sum) {
 873             minSum = sum;
 874             minColL = k;
 875         }
 876         if (maxSum < sum) {
 877             maxSum = sum;
 878             maxColL = k;
 879         }
 880     }
 881     minSum = 2000; /* big enough */
 882     maxSum = -1; /* small enough */
 883     for (; k < N_TEXELS; k++) {
 884         int sum = 0;
 885 #ifndef YUV
 886         for (i = 0; i < n_comp; i++) {
 887             sum += input[k][i];
 888         }
 889 #else
 890         sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
 891 #endif
 892         if (minSum > sum) {
 893             minSum = sum;
 894             minColR = k;
 895         }
 896         if (maxSum < sum) {
 897             maxSum = sum;
 898             maxColR = k;
 899         }
 900     }
 901 #else
 902     int minVal;
 903     int maxVal;
 904     int maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
 905     int maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
 906
 907     /* Scan the channel with max variance for lo & hi
 908      * and use those as the two representative colors.
 909      */
 910     minVal = 2000; /* big enough */
 911     maxVal = -1; /* small enough */
 912     for (k = 0; k < N_TEXELS / 2; k++) {
 913         int t = input[k][maxVarL];
 914         if (minVal > t) {
 915             minVal = t;
 916             minColL = k;
 917         }
 918         if (maxVal < t) {
 919             maxVal = t;
 920             maxColL = k;
 921         }
 922     }
 923     minVal = 2000; /* big enough */
 924     maxVal = -1; /* small enough */
 925     for (; k < N_TEXELS; k++) {
 926         int t = input[k][maxVarR];
 927         if (minVal > t) {
 928             minVal = t;
 929             minColR = k;
 930         }
 931         if (maxVal < t) {
 932             maxVal = t;
 933             maxColR = k;
 934         }
 935     }
 936 #endif
 937
 938     /* left microtile */
 939     cc[0] = 0;
 940     for (i = 0; i < n_comp; i++) {
 941         vec[0][i] = input[minColL][i];
 942         vec[1][i] = input[maxColL][i];
 943     }
 944     if (minColL != maxColL) {
 945         /* compute interpolation vector */
 946         MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 947
 948         /* add in texels */
 949         lolo = 0;
 950         for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 951             int texel;
 952             /* interpolate color */
 953             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 954             /* add in texel */
 955             lolo <<= 2;
 956             lolo |= texel;
 957         }
 958
 959         /* funky encoding for LSB of green */
 960         if ((int)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
 961             for (i = 0; i < n_comp; i++) {
 962                 vec[1][i] = input[minColL][i];
 963                 vec[0][i] = input[maxColL][i];
 964             }
 965             lolo = ~lolo;
 966         }
 967
 968         cc[0] = lolo;
 969     }
 970
 971     /* right microtile */
 972     cc[1] = 0;
 973     for (i = 0; i < n_comp; i++) {
 974         vec[2][i] = input[minColR][i];
 975         vec[3][i] = input[maxColR][i];
 976     }
 977     if (minColR != maxColR) {
 978         /* compute interpolation vector */
 979         MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
 980
 981         /* add in texels */
 982         lohi = 0;
 983         for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 984             int texel;
 985             /* interpolate color */
 986             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 987             /* add in texel */
 988             lohi <<= 2;
 989             lohi |= texel;
 990         }
 991
 992         /* funky encoding for LSB of green */
 993         if ((int)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
 994             for (i = 0; i < n_comp; i++) {
 995                 vec[3][i] = input[minColR][i];
 996                 vec[2][i] = input[maxColR][i];
 997             }
 998             lohi = ~lohi;
 999         }
1000
1001         cc[1] = lohi;
1002     }
1003
1004     Q_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1005     for (j = 2 * 2 - 1; j >= 0; j--) {
1006         for (i = 0; i < n_comp; i++) {
1007             /* add in colors */
1008             Q_SHL(hi, 5);
1009             Q_OR32(hi, vec[j][i] >> 3);
1010         }
1011     }
1012     ((qword *)cc)[1] = hi;
1013 }
1014
1015
1016 static void
1017 fxt1_quantize (dword *cc, const byte *lines[], int comps)
1018 {
1019     int trualpha;
1020     byte reord[N_TEXELS][MAX_COMP];
1021
1022     byte input[N_TEXELS][MAX_COMP];
1023 #ifndef ARGB
1024     int i;
1025 #endif
1026     int k, l;
1027
1028     if (comps == 3) {
1029         /* make the whole block opaque */
1030         memset(input, -1, sizeof(input));
1031     }
1032
1033     /* 8 texels each line */
1034 #ifndef ARGB
1035     for (l = 0; l < 4; l++) {
1036         for (k = 0; k < 4; k++) {
1037             for (i = 0; i < comps; i++) {
1038                 input[k + l * 4][i] = *lines[l]++;
1039             }
1040         }
1041         for (; k < 8; k++) {
1042             for (i = 0; i < comps; i++) {
1043                 input[k + l * 4 + 12][i] = *lines[l]++;
1044             }
1045         }
1046     }
1047 #else
1048     /* H.Morii - support for ARGB inputs */
1049     for (l = 0; l < 4; l++) {
1050         for (k = 0; k < 4; k++) {
1051           input[k + l * 4][2] = *lines[l]++;
1052           input[k + l * 4][1] = *lines[l]++;
1053           input[k + l * 4][0] = *lines[l]++;
1054           if (comps == 4) input[k + l * 4][3] = *lines[l]++;
1055         }
1056         for (; k < 8; k++) {
1057           input[k + l * 4 + 12][2] = *lines[l]++;
1058           input[k + l * 4 + 12][1] = *lines[l]++;
1059           input[k + l * 4 + 12][0] = *lines[l]++;
1060           if (comps == 4) input[k + l * 4 + 12][3] = *lines[l]++;
1061         }
1062     }
1063 #endif
1064
1065     /* block layout:
1066      * 00, 01, 02, 03, 08, 09, 0a, 0b
1067      * 10, 11, 12, 13, 18, 19, 1a, 1b
1068      * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1069      * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1070      */
1071
1072     /* [dBorca]
1073      * stupidity flows forth from this
1074      */
1075     l = N_TEXELS;
1076     trualpha = 0;
1077     if (comps == 4) {
1078         /* skip all transparent black texels */
1079         l = 0;
1080         for (k = 0; k < N_TEXELS; k++) {
1081             /* test all components against 0 */
1082             if (!ISTBLACK(input[k])) {
1083                 /* texel is not transparent black */
1084                 COPY_4UBV(reord[l], input[k]);
1085                 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1086                     /* non-opaque texel */
1087                     trualpha = !0;
1088                 }
1089                 l++;
1090             }
1091         }
1092     }
1093
1094 #if 0
1095     if (trualpha) {
1096         fxt1_quantize_ALPHA0(cc, input, reord, l);
1097     } else if (l == 0) {
1098         cc[0] = cc[1] = cc[2] = -1;
1099         cc[3] = 0;
1100     } else if (l < N_TEXELS) {
1101         fxt1_quantize_HI(cc, input, reord, l);
1102     } else {
1103         fxt1_quantize_CHROMA(cc, input);
1104     }
1105     (void)fxt1_quantize_ALPHA1;
1106     (void)fxt1_quantize_MIXED1;
1107     (void)fxt1_quantize_MIXED0;
1108 #else
1109     if (trualpha) {
1110         fxt1_quantize_ALPHA1(cc, input);
1111     } else if (l == 0) {
1112         cc[0] = cc[1] = cc[2] = 0xFFFFFFFF;
1113         cc[3] = 0;
1114     } else if (l < N_TEXELS) {
1115         fxt1_quantize_MIXED1(cc, input);
1116     } else {
1117         fxt1_quantize_MIXED0(cc, input);
1118     }
1119     (void)fxt1_quantize_ALPHA0;
1120     (void)fxt1_quantize_HI;
1121     (void)fxt1_quantize_CHROMA;
1122 #endif
1123 }
1124
1125
1126 TAPI int TAPIENTRY
1127 fxt1_encode (int width, int height, int comps,
1128              const void *source, int srcRowStride,
1129              void *dest, int destRowStride)
1130 {
1131     int x, y;
1132     const byte *data;
1133     dword *encoded = (dword *)dest;
1134     void *newSource = NULL;
1135
1136     /* Replicate image if width is not M8 or height is not M4 */
1137     if ((width & 7) | (height & 3)) {
1138         int newWidth = (width + 7) & ~7;
1139         int newHeight = (height + 3) & ~3;
1140         newSource = malloc(comps * newWidth * newHeight * sizeof(byte *));
1141         _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1142                                  comps, (const byte *)source,
1143                                  srcRowStride, (byte *)newSource);
1144         source = newSource;
1145         width = newWidth;
1146         height = newHeight;
1147         srcRowStride = comps * newWidth;
1148     }
1149
1150     data = (const byte *)source;
1151     destRowStride = (destRowStride - width * 2) / 4;
1152     for (y = 0; y < height; y += 4) {
1153         unsigned int offs = 0 + (y + 0) * srcRowStride;
1154         for (x = 0; x < width; x += 8) {
1155             const byte *lines[4];
1156             lines[0] = &data[offs];
1157             lines[1] = lines[0] + srcRowStride;
1158             lines[2] = lines[1] + srcRowStride;
1159             lines[3] = lines[2] + srcRowStride;
1160             offs += 8 * comps;
1161             fxt1_quantize(encoded, lines, comps);
1162             /* 128 bits per 8x4 block */
1163             encoded += 4;
1164         }
1165         encoded += destRowStride;
1166     }
1167
1168     if (newSource != NULL) {
1169         free(newSource);
1170     }
1171
1172     return 0;
1173 }
1174
1175
1176 /***************************************************************************\
1177  * FXT1 decoder
1178  *
1179  * The decoder is based on GL_3DFX_texture_compression_FXT1
1180  * specification and serves as a concept for the encoder.
1181 \***************************************************************************/
1182
1183
1184 /* lookup table for scaling 5 bit colors up to 8 bits */
1185 static const byte _rgb_scale_5[] = {
1186     0,   8,   16,  25,  33,  41,  49,  58,
1187     66,  74,  82,  90,  99,  107, 115, 123,
1188     132, 140, 148, 156, 165, 173, 181, 189,
1189     197, 206, 214, 222, 230, 239, 247, 255
1190 };
1191
1192 /* lookup table for scaling 6 bit colors up to 8 bits */
1193 static const byte _rgb_scale_6[] = {
1194     0,   4,   8,   12,  16,  20,  24,  28,
1195     32,  36,  40,  45,  49,  53,  57,  61,
1196     65,  69,  73,  77,  81,  85,  89,  93,
1197     97,  101, 105, 109, 113, 117, 121, 125,
1198     130, 134, 138, 142, 146, 150, 154, 158,
1199     162, 166, 170, 174, 178, 182, 186, 190,
1200     194, 198, 202, 206, 210, 215, 219, 223,
1201     227, 231, 235, 239, 243, 247, 251, 255
1202 };
1203
1204
1205 #define CC_SEL(cc, which) (((dword *)(cc))[(which) / 32] >> ((which) & 31))
1206 #define UP5(c) _rgb_scale_5[(c) & 31]
1207 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1208 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1209 #define ZERO_4UBV(v) *((dword *)(v)) = 0
1210
1211
1212 static void
1213 fxt1_decode_1HI (const byte *code, int t, byte *rgba)
1214 {
1215     const dword *cc;
1216
1217     t *= 3;
1218     cc = (const dword *)(code + t / 8);
1219     t = (cc[0] >> (t & 7)) & 7;
1220
1221     if (t == 7) {
1222         ZERO_4UBV(rgba);
1223     } else {
1224         cc = (const dword *)(code + 12);
1225         if (t == 0) {
1226             rgba[BCOMP] = UP5(CC_SEL(cc, 0));
1227             rgba[GCOMP] = UP5(CC_SEL(cc, 5));
1228             rgba[RCOMP] = UP5(CC_SEL(cc, 10));
1229         } else if (t == 6) {
1230             rgba[BCOMP] = UP5(CC_SEL(cc, 15));
1231             rgba[GCOMP] = UP5(CC_SEL(cc, 20));
1232             rgba[RCOMP] = UP5(CC_SEL(cc, 25));
1233         } else {
1234             rgba[BCOMP] = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1235             rgba[GCOMP] = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1236             rgba[RCOMP] = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1237         }
1238         rgba[ACOMP] = 255;
1239     }
1240 }
1241
1242
1243 static void
1244 fxt1_decode_1CHROMA (const byte *code, int t, byte *rgba)
1245 {
1246     const dword *cc;
1247     dword kk;
1248
1249     cc = (const dword *)code;
1250     if (t & 16) {
1251         cc++;
1252         t &= 15;
1253     }
1254     t = (cc[0] >> (t * 2)) & 3;
1255
1256     t *= 15;
1257     cc = (const dword *)(code + 8 + t / 8);
1258     kk = cc[0] >> (t & 7);
1259     rgba[BCOMP] = UP5(kk);
1260     rgba[GCOMP] = UP5(kk >> 5);
1261     rgba[RCOMP] = UP5(kk >> 10);
1262     rgba[ACOMP] = 255;
1263 }
1264
1265
1266 static void
1267 fxt1_decode_1MIXED (const byte *code, int t, byte *rgba)
1268 {
1269     const dword *cc;
1270     int col[2][3];
1271     int glsb, selb;
1272
1273     cc = (const dword *)code;
1274     if (t & 16) {
1275         t &= 15;
1276         t = (cc[1] >> (t * 2)) & 3;
1277         /* col 2 */
1278         col[0][BCOMP] = (*(const dword *)(code + 11)) >> 6;
1279         col[0][GCOMP] = CC_SEL(cc, 99);
1280         col[0][RCOMP] = CC_SEL(cc, 104);
1281         /* col 3 */
1282         col[1][BCOMP] = CC_SEL(cc, 109);
1283         col[1][GCOMP] = CC_SEL(cc, 114);
1284         col[1][RCOMP] = CC_SEL(cc, 119);
1285         glsb = CC_SEL(cc, 126);
1286         selb = CC_SEL(cc, 33);
1287     } else {
1288         t = (cc[0] >> (t * 2)) & 3;
1289         /* col 0 */
1290         col[0][BCOMP] = CC_SEL(cc, 64);
1291         col[0][GCOMP] = CC_SEL(cc, 69);
1292         col[0][RCOMP] = CC_SEL(cc, 74);
1293         /* col 1 */
1294         col[1][BCOMP] = CC_SEL(cc, 79);
1295         col[1][GCOMP] = CC_SEL(cc, 84);
1296         col[1][RCOMP] = CC_SEL(cc, 89);
1297         glsb = CC_SEL(cc, 125);
1298         selb = CC_SEL(cc, 1);
1299     }
1300
1301     if (CC_SEL(cc, 124) & 1) {
1302         /* alpha[0] == 1 */
1303
1304         if (t == 3) {
1305             ZERO_4UBV(rgba);
1306         } else {
1307             if (t == 0) {
1308                 rgba[BCOMP] = UP5(col[0][BCOMP]);
1309                 rgba[GCOMP] = UP5(col[0][GCOMP]);
1310                 rgba[RCOMP] = UP5(col[0][RCOMP]);
1311             } else if (t == 2) {
1312                 rgba[BCOMP] = UP5(col[1][BCOMP]);
1313                 rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
1314                 rgba[RCOMP] = UP5(col[1][RCOMP]);
1315             } else {
1316                 rgba[BCOMP] = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1317                 rgba[GCOMP] = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1318                 rgba[RCOMP] = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1319             }
1320             rgba[ACOMP] = 255;
1321         }
1322     } else {
1323         /* alpha[0] == 0 */
1324
1325         if (t == 0) {
1326             rgba[BCOMP] = UP5(col[0][BCOMP]);
1327             rgba[GCOMP] = UP6(col[0][GCOMP], glsb ^ selb);
1328             rgba[RCOMP] = UP5(col[0][RCOMP]);
1329         } else if (t == 3) {
1330             rgba[BCOMP] = UP5(col[1][BCOMP]);
1331             rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
1332             rgba[RCOMP] = UP5(col[1][RCOMP]);
1333         } else {
1334             rgba[BCOMP] = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1335             rgba[GCOMP] = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1336                                      UP6(col[1][GCOMP], glsb));
1337             rgba[RCOMP] = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1338         }
1339         rgba[ACOMP] = 255;
1340     }
1341 }
1342
1343
1344 static void
1345 fxt1_decode_1ALPHA (const byte *code, int t, byte *rgba)
1346 {
1347     const dword *cc;
1348
1349     cc = (const dword *)code;
1350     if (CC_SEL(cc, 124) & 1) {
1351         /* lerp == 1 */
1352         int col0[4];
1353
1354         if (t & 16) {
1355             t &= 15;
1356             t = (cc[1] >> (t * 2)) & 3;
1357             /* col 2 */
1358             col0[BCOMP] = (*(const dword *)(code + 11)) >> 6;
1359             col0[GCOMP] = CC_SEL(cc, 99);
1360             col0[RCOMP] = CC_SEL(cc, 104);
1361             col0[ACOMP] = CC_SEL(cc, 119);
1362         } else {
1363             t = (cc[0] >> (t * 2)) & 3;
1364             /* col 0 */
1365             col0[BCOMP] = CC_SEL(cc, 64);
1366             col0[GCOMP] = CC_SEL(cc, 69);
1367             col0[RCOMP] = CC_SEL(cc, 74);
1368             col0[ACOMP] = CC_SEL(cc, 109);
1369         }
1370
1371         if (t == 0) {
1372             rgba[BCOMP] = UP5(col0[BCOMP]);
1373             rgba[GCOMP] = UP5(col0[GCOMP]);
1374             rgba[RCOMP] = UP5(col0[RCOMP]);
1375             rgba[ACOMP] = UP5(col0[ACOMP]);
1376         } else if (t == 3) {
1377             rgba[BCOMP] = UP5(CC_SEL(cc, 79));
1378             rgba[GCOMP] = UP5(CC_SEL(cc, 84));
1379             rgba[RCOMP] = UP5(CC_SEL(cc, 89));
1380             rgba[ACOMP] = UP5(CC_SEL(cc, 114));
1381         } else {
1382             rgba[BCOMP] = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1383             rgba[GCOMP] = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1384             rgba[RCOMP] = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1385             rgba[ACOMP] = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1386         }
1387     } else {
1388         /* lerp == 0 */
1389
1390         if (t & 16) {
1391             cc++;
1392             t &= 15;
1393         }
1394         t = (cc[0] >> (t * 2)) & 3;
1395
1396         if (t == 3) {
1397             ZERO_4UBV(rgba);
1398         } else {
1399             dword kk;
1400             cc = (const dword *)code;
1401             rgba[ACOMP] = UP5(cc[3] >> (t * 5 + 13));
1402             t *= 15;
1403             cc = (const dword *)(code + 8 + t / 8);
1404             kk = cc[0] >> (t & 7);
1405             rgba[BCOMP] = UP5(kk);
1406             rgba[GCOMP] = UP5(kk >> 5);
1407             rgba[RCOMP] = UP5(kk >> 10);
1408         }
1409     }
1410 }
1411
1412
1413 TAPI void TAPIENTRY
1414 fxt1_decode_1 (const void *texture, int stride,
1415                int i, int j, byte *rgba)
1416 {
1417     static void (*decode_1[]) (const byte *, int, byte *) = {
1418         fxt1_decode_1HI,        /* cc-high   = "00?" */
1419         fxt1_decode_1HI,        /* cc-high   = "00?" */
1420         fxt1_decode_1CHROMA,    /* cc-chroma = "010" */
1421         fxt1_decode_1ALPHA,     /* alpha     = "011" */
1422         fxt1_decode_1MIXED,     /* mixed     = "1??" */
1423         fxt1_decode_1MIXED,     /* mixed     = "1??" */
1424         fxt1_decode_1MIXED,     /* mixed     = "1??" */
1425         fxt1_decode_1MIXED      /* mixed     = "1??" */
1426     };
1427
1428     const byte *code = (const byte *)texture +
1429                         ((j / 4) * (stride / 8) + (i / 8)) * 16;
1430     int mode = CC_SEL(code, 125);
1431     int t = i & 7;
1432
1433     if (t & 4) {
1434         t += 12;
1435     }
1436     t += (j & 3) * 4;
1437
1438     decode_1[mode](code, t, rgba);
1439
1440 #if VERBOSE
1441     {
1442         extern int cc_chroma;
1443         extern int cc_alpha;
1444         extern int cc_high;
1445         extern int cc_mixed;
1446         static int *cctype[] = {
1447             &cc_high,
1448             &cc_high,
1449             &cc_chroma,
1450             &cc_alpha,
1451             &cc_mixed,
1452             &cc_mixed,
1453             &cc_mixed,
1454             &cc_mixed
1455         };
1456         (*cctype[mode])++;
1457     }
1458 #endif
1459 }