fxt1.c

   1 /*
   2  * Mesa 3-D graphics library
   3  *
   4  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included
  14  * in all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22  * OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /**
  26  * \file texcompress_fxt1.c
  27  * GL_3DFX_texture_compression_FXT1 support.
  28  */
  29
  30
  31 #include <stdlib.h>
  32 #include <string.h>
  33 #include <assert.h>
  34
  35 #include "types.h"
  36 #include "internal.h"
  37 #include "fxt1.h"
  38
  39
  40 /***************************************************************************\
  41  * FXT1 encoder
  42  *
  43  * The encoder was built by reversing the decoder,
  44  * and is vaguely based on Texus2 by 3dfx. Note that this code
  45  * is merely a proof of concept, since it is highly UNoptimized;
  46  * moreover, it is sub-optimal due to initial conditions passed
  47  * to Lloyd's algorithm (the interpolation modes are even worse).
  48 \***************************************************************************/
  49
  50
  51 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
  52 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
  53 #define N_TEXELS 32 /* number of texels in a block (always 32) */
  54 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
  55 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
  56 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
  57 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
  58 #define ISTBLACK(v) (*((dword *)(v)) == 0)
  59
  60
  61 static int
  62 fxt1_bestcol (float vec[][MAX_COMP], int nv,
  63               byte input[MAX_COMP], int nc)
  64 {
  65    int i, j, best = -1;
  66    float err = 1e9; /* big enough */
  67
  68    for (j = 0; j < nv; j++) {
  69       float e = 0.0F;
  70       for (i = 0; i < nc; i++) {
  71          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
  72       }
  73       if (e < err) {
  74          err = e;
  75          best = j;
  76       }
  77    }
  78
  79    return best;
  80 }
  81
  82
  83 static int
  84 fxt1_worst (float vec[MAX_COMP],
  85             byte input[N_TEXELS][MAX_COMP], int nc, int n)
  86 {
  87    int i, k, worst = -1;
  88    float err = -1.0F; /* small enough */
  89
  90    for (k = 0; k < n; k++) {
  91       float e = 0.0F;
  92       for (i = 0; i < nc; i++) {
  93          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
  94       }
  95       if (e > err) {
  96          err = e;
  97          worst = k;
  98       }
  99    }
 100
 101    return worst;
 102 }
 103
 104
 105 static int
 106 fxt1_variance (double variance[MAX_COMP],
 107                byte input[N_TEXELS][MAX_COMP], int nc, int n)
 108 {
 109    int i, k, best = 0;
 110    int sx, sx2;
 111    double var, maxvar = -1; /* small enough */
 112    double teenth = 1.0 / n;
 113
 114    for (i = 0; i < nc; i++) {
 115       sx = sx2 = 0;
 116       for (k = 0; k < n; k++) {
 117          int t = input[k][i];
 118          sx += t;
 119          sx2 += t * t;
 120       }
 121       var = sx2 * teenth - sx * sx * teenth * teenth;
 122       if (maxvar < var) {
 123          maxvar = var;
 124          best = i;
 125       }
 126       if (variance) {
 127          variance[i] = var;
 128       }
 129    }
 130
 131    return best;
 132 }
 133
 134
 135 static int
 136 fxt1_choose (float vec[][MAX_COMP], int nv,
 137              byte input[N_TEXELS][MAX_COMP], int nc, int n)
 138 {
 139 #if 0
 140    /* Choose colors from a grid.
 141     */
 142    int i, j;
 143
 144    for (j = 0; j < nv; j++) {
 145       int m = j * (n - 1) / (nv - 1);
 146       for (i = 0; i < nc; i++) {
 147          vec[j][i] = input[m][i];
 148       }
 149    }
 150 #else
 151    /* Our solution here is to find the darkest and brightest colors in
 152     * the 8x4 tile and use those as the two representative colors.
 153     * There are probably better algorithms to use (histogram-based).
 154     */
 155    int i, j, k;
 156    int minSum = 2000; /* big enough */
 157    int maxSum = -1; /* small enough */
 158    int minCol = 0; /* phoudoin: silent compiler! */
 159    int maxCol = 0; /* phoudoin: silent compiler! */
 160
 161    struct {
 162       int flag;
 163       int key;
 164       int freq;
 165       int idx;
 166    } hist[N_TEXELS];
 167    int lenh = 0;
 168
 169    memset(hist, 0, sizeof(hist));
 170
 171    for (k = 0; k < n; k++) {
 172       int l;
 173       int key = 0;
 174       int sum = 0;
 175       for (i = 0; i < nc; i++) {
 176          key <<= 8;
 177          key |= input[k][i];
 178          sum += input[k][i];
 179       }
 180       for (l = 0; l < n; l++) {
 181          if (!hist[l].flag) {
 182             /* alloc new slot */
 183             hist[l].flag = !0;
 184             hist[l].key = key;
 185             hist[l].freq = 1;
 186             hist[l].idx = k;
 187             lenh = l + 1;
 188             break;
 189          } else if (hist[l].key == key) {
 190             hist[l].freq++;
 191             break;
 192          }
 193       }
 194       if (minSum > sum) {
 195          minSum = sum;
 196          minCol = k;
 197       }
 198       if (maxSum < sum) {
 199          maxSum = sum;
 200          maxCol = k;
 201       }
 202    }
 203
 204    if (lenh <= nv) {
 205       for (j = 0; j < lenh; j++) {
 206          for (i = 0; i < nc; i++) {
 207             vec[j][i] = (float)input[hist[j].idx][i];
 208          }
 209       }
 210       for (; j < nv; j++) {
 211          for (i = 0; i < nc; i++) {
 212             vec[j][i] = vec[0][i];
 213          }
 214       }
 215       return 0;
 216    }
 217
 218    for (j = 0; j < nv; j++) {
 219       for (i = 0; i < nc; i++) {
 220          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (float)(nv - 1);
 221       }
 222    }
 223 #endif
 224
 225    return !0;
 226 }
 227
 228
 229 static int
 230 fxt1_lloyd (float vec[][MAX_COMP], int nv,
 231             byte input[N_TEXELS][MAX_COMP], int nc, int n)
 232 {
 233    /* Use the generalized lloyd's algorithm for VQ:
 234     *     find 4 color vectors.
 235     *
 236     *     for each sample color
 237     *         sort to nearest vector.
 238     *
 239     *     replace each vector with the centroid of its matching colors.
 240     *
 241     *     repeat until RMS doesn't improve.
 242     *
 243     *     if a color vector has no samples, or becomes the same as another
 244     *     vector, replace it with the color which is farthest from a sample.
 245     *
 246     * vec[][MAX_COMP]           initial vectors and resulting colors
 247     * nv                        number of resulting colors required
 248     * input[N_TEXELS][MAX_COMP] input texels
 249     * nc                        number of components in input / vec
 250     * n                         number of input samples
 251     */
 252
 253    int sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
 254    int cnt[MAX_VECT]; /* how many times a certain vector was chosen */
 255    float error, lasterror = 1e9;
 256
 257    int i, j, k, rep;
 258
 259    /* the quantizer */
 260    for (rep = 0; rep < LL_N_REP; rep++) {
 261       /* reset sums & counters */
 262       for (j = 0; j < nv; j++) {
 263          for (i = 0; i < nc; i++) {
 264             sum[j][i] = 0;
 265          }
 266          cnt[j] = 0;
 267       }
 268       error = 0;
 269
 270       /* scan whole block */
 271       for (k = 0; k < n; k++) {
 272 #if 1
 273          int best = -1;
 274          float err = 1e9; /* big enough */
 275          /* determine best vector */
 276          for (j = 0; j < nv; j++) {
 277             float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
 278                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
 279                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
 280             if (nc == 4) {
 281                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
 282             }
 283             if (e < err) {
 284                err = e;
 285                best = j;
 286             }
 287          }
 288 #else
 289          int best = fxt1_bestcol(vec, nv, input[k], nc, &err);
 290 #endif
 291          assert(best >= 0);
 292          /* add in closest color */
 293          for (i = 0; i < nc; i++) {
 294             sum[best][i] += input[k][i];
 295          }
 296          /* mark this vector as used */
 297          cnt[best]++;
 298          /* accumulate error */
 299          error += err;
 300       }
 301
 302       /* check RMS */
 303       if ((error < LL_RMS_E) ||
 304           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
 305          return !0; /* good match */
 306       }
 307       lasterror = error;
 308
 309       /* move each vector to the barycenter of its closest colors */
 310       for (j = 0; j < nv; j++) {
 311          if (cnt[j]) {
 312             float div = 1.0F / cnt[j];
 313             for (i = 0; i < nc; i++) {
 314                vec[j][i] = div * sum[j][i];
 315             }
 316          } else {
 317             /* this vec has no samples or is identical with a previous vec */
 318             int worst = fxt1_worst(vec[j], input, nc, n);
 319             for (i = 0; i < nc; i++) {
 320                vec[j][i] = input[worst][i];
 321             }
 322          }
 323       }
 324    }
 325
 326    return 0; /* could not converge fast enough */
 327 }
 328
 329
 330 static void
 331 fxt1_quantize_CHROMA (dword *cc,
 332                       byte input[N_TEXELS][MAX_COMP])
 333 {
 334    const int n_vect = 4; /* 4 base vectors to find */
 335    const int n_comp = 3; /* 3 components: R, G, B */
 336    float vec[MAX_VECT][MAX_COMP];
 337    int i, j, k;
 338    qword hi; /* high quadword */
 339    dword lohi, lolo; /* low quadword: hi dword, lo dword */
 340
 341    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
 342       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
 343    }
 344
 345    Q_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
 346    for (j = n_vect - 1; j >= 0; j--) {
 347       for (i = 0; i < n_comp; i++) {
 348          /* add in colors */
 349          Q_SHL(hi, 5);
 350          Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
 351       }
 352    }
 353    ((qword *)cc)[1] = hi;
 354
 355    lohi = lolo = 0;
 356    /* right microtile */
 357    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 358       lohi <<= 2;
 359       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 360    }
 361    /* left microtile */
 362    for (; k >= 0; k--) {
 363       lolo <<= 2;
 364       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
 365    }
 366    cc[1] = lohi;
 367    cc[0] = lolo;
 368 }
 369
 370
 371 static void
 372 fxt1_quantize_ALPHA0 (dword *cc,
 373                       byte input[N_TEXELS][MAX_COMP],
 374                       byte reord[N_TEXELS][MAX_COMP], int n)
 375 {
 376    const int n_vect = 3; /* 3 base vectors to find */
 377    const int n_comp = 4; /* 4 components: R, G, B, A */
 378    float vec[MAX_VECT][MAX_COMP];
 379    int i, j, k;
 380    qword hi; /* high quadword */
 381    dword lohi, lolo; /* low quadword: hi dword, lo dword */
 382
 383    /* the last vector indicates zero */
 384    for (i = 0; i < n_comp; i++) {
 385       vec[n_vect][i] = 0;
 386    }
 387
 388    /* the first n texels in reord are guaranteed to be non-zero */
 389    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
 390       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
 391    }
 392
 393    Q_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
 394    for (j = n_vect - 1; j >= 0; j--) {
 395       /* add in alphas */
 396       Q_SHL(hi, 5);
 397       Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F));
 398    }
 399    for (j = n_vect - 1; j >= 0; j--) {
 400       for (i = 0; i < n_comp - 1; i++) {
 401          /* add in colors */
 402          Q_SHL(hi, 5);
 403          Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
 404       }
 405    }
 406    ((qword *)cc)[1] = hi;
 407
 408    lohi = lolo = 0;
 409    /* right microtile */
 410    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
 411       lohi <<= 2;
 412       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 413    }
 414    /* left microtile */
 415    for (; k >= 0; k--) {
 416       lolo <<= 2;
 417       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
 418    }
 419    cc[1] = lohi;
 420    cc[0] = lolo;
 421 }
 422
 423
 424 static void
 425 fxt1_quantize_ALPHA1 (dword *cc,
 426                       byte input[N_TEXELS][MAX_COMP])
 427 {
 428    const int n_vect = 3; /* highest vector number in each microtile */
 429    const int n_comp = 4; /* 4 components: R, G, B, A */
 430    float vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
 431    float b, iv[MAX_COMP]; /* interpolation vector */
 432    int i, j, k;
 433    qword hi; /* high quadword */
 434    dword lohi, lolo; /* low quadword: hi dword, lo dword */
 435
 436    int minSum;
 437    int maxSum;
 438    int minColL = 0, maxColL = 0;
 439    int minColR = 0, maxColR = 0;
 440    int sumL = 0, sumR = 0;
 441    int nn_comp;
 442    /* Our solution here is to find the darkest and brightest colors in
 443     * the 4x4 tile and use those as the two representative colors.
 444     * There are probably better algorithms to use (histogram-based).
 445     */
 446    nn_comp = n_comp;
 447    while ((minColL == maxColL) && nn_comp) {
 448        minSum = 2000; /* big enough */
 449        maxSum = -1; /* small enough */
 450        for (k = 0; k < N_TEXELS / 2; k++) {
 451            int sum = 0;
 452            for (i = 0; i < nn_comp; i++) {
 453                sum += input[k][i];
 454            }
 455            if (minSum > sum) {
 456                minSum = sum;
 457                minColL = k;
 458            }
 459            if (maxSum < sum) {
 460                maxSum = sum;
 461                maxColL = k;
 462            }
 463            sumL += sum;
 464        }
 465
 466        nn_comp--;
 467    }
 468
 469    nn_comp = n_comp;
 470    while ((minColR == maxColR) && nn_comp) {
 471        minSum = 2000; /* big enough */
 472        maxSum = -1; /* small enough */
 473        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
 474            int sum = 0;
 475            for (i = 0; i < nn_comp; i++) {
 476                sum += input[k][i];
 477            }
 478            if (minSum > sum) {
 479                minSum = sum;
 480                minColR = k;
 481            }
 482            if (maxSum < sum) {
 483                maxSum = sum;
 484                maxColR = k;
 485            }
 486            sumR += sum;
 487        }
 488
 489        nn_comp--;
 490    }
 491
 492    /* choose the common vector (yuck!) */
 493    {
 494       int j1, j2;
 495       int v1 = 0, v2 = 0;
 496       float err = 1e9; /* big enough */
 497       float tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 498       for (i = 0; i < n_comp; i++) {
 499          tv[0][i] = input[minColL][i];
 500          tv[1][i] = input[maxColL][i];
 501          tv[2][i] = input[minColR][i];
 502          tv[3][i] = input[maxColR][i];
 503       }
 504       for (j1 = 0; j1 < 2; j1++) {
 505          for (j2 = 2; j2 < 4; j2++) {
 506             float e = 0.0F;
 507             for (i = 0; i < n_comp; i++) {
 508                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
 509             }
 510             if (e < err) {
 511                err = e;
 512                v1 = j1;
 513                v2 = j2;
 514             }
 515          }
 516       }
 517       for (i = 0; i < n_comp; i++) {
 518          vec[0][i] = tv[1 - v1][i];
 519          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
 520          vec[2][i] = tv[5 - v2][i];
 521       }
 522    }
 523
 524    /* left microtile */
 525    cc[0] = 0;
 526    if (minColL != maxColL) {
 527       /* compute interpolation vector */
 528       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 529
 530       /* add in texels */
 531       lolo = 0;
 532       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 533          int texel;
 534          /* interpolate color */
 535          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 536          /* add in texel */
 537          lolo <<= 2;
 538          lolo |= texel;
 539       }
 540
 541       cc[0] = lolo;
 542    }
 543
 544    /* right microtile */
 545    cc[1] = 0;
 546    if (minColR != maxColR) {
 547       /* compute interpolation vector */
 548       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
 549
 550       /* add in texels */
 551       lohi = 0;
 552       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 553          int texel;
 554          /* interpolate color */
 555          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 556          /* add in texel */
 557          lohi <<= 2;
 558          lohi |= texel;
 559       }
 560
 561       cc[1] = lohi;
 562    }
 563
 564    Q_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
 565    for (j = n_vect - 1; j >= 0; j--) {
 566       /* add in alphas */
 567       Q_SHL(hi, 5);
 568       Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F));
 569    }
 570    for (j = n_vect - 1; j >= 0; j--) {
 571       for (i = 0; i < n_comp - 1; i++) {
 572          /* add in colors */
 573          Q_SHL(hi, 5);
 574          Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
 575       }
 576    }
 577    ((qword *)cc)[1] = hi;
 578 }
 579
 580
 581 static void
 582 fxt1_quantize_HI (dword *cc,
 583                   byte input[N_TEXELS][MAX_COMP],
 584                   byte reord[N_TEXELS][MAX_COMP], int n)
 585 {
 586    const int n_vect = 6; /* highest vector number */
 587    const int n_comp = 3; /* 3 components: R, G, B */
 588    float b = 0.0F;       /* phoudoin: silent compiler! */
 589    float iv[MAX_COMP];   /* interpolation vector */
 590    int i, k;
 591    dword hihi; /* high quadword: hi dword */
 592
 593    int minSum = 2000; /* big enough */
 594    int maxSum = -1; /* small enough */
 595    int minCol = 0; /* phoudoin: silent compiler! */
 596    int maxCol = 0; /* phoudoin: silent compiler! */
 597
 598    /* Our solution here is to find the darkest and brightest colors in
 599     * the 8x4 tile and use those as the two representative colors.
 600     * There are probably better algorithms to use (histogram-based).
 601     */
 602    for (k = 0; k < n; k++) {
 603       int sum = 0;
 604       for (i = 0; i < n_comp; i++) {
 605          sum += reord[k][i];
 606       }
 607       if (minSum > sum) {
 608          minSum = sum;
 609          minCol = k;
 610       }
 611       if (maxSum < sum) {
 612          maxSum = sum;
 613          maxCol = k;
 614       }
 615    }
 616
 617    hihi = 0; /* cc-hi = "00" */
 618    for (i = 0; i < n_comp; i++) {
 619       /* add in colors */
 620       hihi <<= 5;
 621       hihi |= reord[maxCol][i] >> 3;
 622    }
 623    for (i = 0; i < n_comp; i++) {
 624       /* add in colors */
 625       hihi <<= 5;
 626       hihi |= reord[minCol][i] >> 3;
 627    }
 628    cc[3] = hihi;
 629    cc[0] = cc[1] = cc[2] = 0;
 630
 631    /* compute interpolation vector */
 632    if (minCol != maxCol) {
 633       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
 634    }
 635
 636    /* add in texels */
 637    for (k = N_TEXELS - 1; k >= 0; k--) {
 638       int t = k * 3;
 639       dword *kk = (dword *)((char *)cc + t / 8);
 640       int texel = n_vect + 1; /* transparent black */
 641
 642       if (!ISTBLACK(input[k])) {
 643          if (minCol != maxCol) {
 644             /* interpolate color */
 645             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 646             /* add in texel */
 647             kk[0] |= texel << (t & 7);
 648          }
 649       } else {
 650          /* add in texel */
 651          kk[0] |= texel << (t & 7);
 652       }
 653    }
 654 }
 655
 656
 657 static void
 658 fxt1_quantize_MIXED1 (dword *cc,
 659                       byte input[N_TEXELS][MAX_COMP])
 660 {
 661    const int n_vect = 2; /* highest vector number in each microtile */
 662    const int n_comp = 3; /* 3 components: R, G, B */
 663    byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 664    float b, iv[MAX_COMP]; /* interpolation vector */
 665    int i, j, k;
 666    qword hi; /* high quadword */
 667    dword lohi, lolo; /* low quadword: hi dword, lo dword */
 668
 669    int minSum;
 670    int maxSum;
 671    int minColL = 0, maxColL = -1;
 672    int minColR = 0, maxColR = -1;
 673
 674    /* Our solution here is to find the darkest and brightest colors in
 675     * the 4x4 tile and use those as the two representative colors.
 676     * There are probably better algorithms to use (histogram-based).
 677     */
 678    minSum = 2000; /* big enough */
 679    maxSum = -1; /* small enough */
 680    for (k = 0; k < N_TEXELS / 2; k++) {
 681       if (!ISTBLACK(input[k])) {
 682          int sum = 0;
 683          for (i = 0; i < n_comp; i++) {
 684             sum += input[k][i];
 685          }
 686          if (minSum > sum) {
 687             minSum = sum;
 688             minColL = k;
 689          }
 690          if (maxSum < sum) {
 691             maxSum = sum;
 692             maxColL = k;
 693          }
 694       }
 695    }
 696    minSum = 2000; /* big enough */
 697    maxSum = -1; /* small enough */
 698    for (; k < N_TEXELS; k++) {
 699       if (!ISTBLACK(input[k])) {
 700          int sum = 0;
 701          for (i = 0; i < n_comp; i++) {
 702             sum += input[k][i];
 703          }
 704          if (minSum > sum) {
 705             minSum = sum;
 706             minColR = k;
 707          }
 708          if (maxSum < sum) {
 709             maxSum = sum;
 710             maxColR = k;
 711          }
 712       }
 713    }
 714
 715    /* left microtile */
 716    if (maxColL == -1) {
 717       /* all transparent black */
 718       cc[0] = ~0u;
 719       for (i = 0; i < n_comp; i++) {
 720          vec[0][i] = 0;
 721          vec[1][i] = 0;
 722       }
 723    } else {
 724       cc[0] = 0;
 725       for (i = 0; i < n_comp; i++) {
 726          vec[0][i] = input[minColL][i];
 727          vec[1][i] = input[maxColL][i];
 728       }
 729       if (minColL != maxColL) {
 730          /* compute interpolation vector */
 731          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 732
 733          /* add in texels */
 734          lolo = 0;
 735          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 736             int texel = n_vect + 1; /* transparent black */
 737             if (!ISTBLACK(input[k])) {
 738                /* interpolate color */
 739                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 740             }
 741             /* add in texel */
 742             lolo <<= 2;
 743             lolo |= texel;
 744          }
 745          cc[0] = lolo;
 746       }
 747    }
 748
 749    /* right microtile */
 750    if (maxColR == -1) {
 751       /* all transparent black */
 752       cc[1] = ~0u;
 753       for (i = 0; i < n_comp; i++) {
 754          vec[2][i] = 0;
 755          vec[3][i] = 0;
 756       }
 757    } else {
 758       cc[1] = 0;
 759       for (i = 0; i < n_comp; i++) {
 760          vec[2][i] = input[minColR][i];
 761          vec[3][i] = input[maxColR][i];
 762       }
 763       if (minColR != maxColR) {
 764          /* compute interpolation vector */
 765          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
 766
 767          /* add in texels */
 768          lohi = 0;
 769          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 770             int texel = n_vect + 1; /* transparent black */
 771             if (!ISTBLACK(input[k])) {
 772                /* interpolate color */
 773                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 774             }
 775             /* add in texel */
 776             lohi <<= 2;
 777             lohi |= texel;
 778          }
 779          cc[1] = lohi;
 780       }
 781    }
 782
 783    Q_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
 784    for (j = 2 * 2 - 1; j >= 0; j--) {
 785       for (i = 0; i < n_comp; i++) {
 786          /* add in colors */
 787          Q_SHL(hi, 5);
 788          Q_OR32(hi, vec[j][i] >> 3);
 789       }
 790    }
 791    ((qword *)cc)[1] = hi;
 792 }
 793
 794
 795 static void
 796 fxt1_quantize_MIXED0 (dword *cc,
 797                       byte input[N_TEXELS][MAX_COMP])
 798 {
 799    const int n_vect = 3; /* highest vector number in each microtile */
 800    const int n_comp = 3; /* 3 components: R, G, B */
 801    byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
 802    float b, iv[MAX_COMP]; /* interpolation vector */
 803    int i, j, k;
 804    qword hi; /* high quadword */
 805    dword lohi, lolo; /* low quadword: hi dword, lo dword */
 806
 807    int minColL = 0, maxColL = 0;
 808    int minColR = 0, maxColR = 0;
 809 #if 0
 810    int minSum;
 811    int maxSum;
 812
 813    /* Our solution here is to find the darkest and brightest colors in
 814     * the 4x4 tile and use those as the two representative colors.
 815     * There are probably better algorithms to use (histogram-based).
 816     */
 817    minSum = 2000; /* big enough */
 818    maxSum = -1; /* small enough */
 819    for (k = 0; k < N_TEXELS / 2; k++) {
 820       int sum = 0;
 821       for (i = 0; i < n_comp; i++) {
 822          sum += input[k][i];
 823       }
 824       if (minSum > sum) {
 825          minSum = sum;
 826          minColL = k;
 827       }
 828       if (maxSum < sum) {
 829          maxSum = sum;
 830          maxColL = k;
 831       }
 832    }
 833    minSum = 2000; /* big enough */
 834    maxSum = -1; /* small enough */
 835    for (; k < N_TEXELS; k++) {
 836       int sum = 0;
 837       for (i = 0; i < n_comp; i++) {
 838          sum += input[k][i];
 839       }
 840       if (minSum > sum) {
 841          minSum = sum;
 842          minColR = k;
 843       }
 844       if (maxSum < sum) {
 845          maxSum = sum;
 846          maxColR = k;
 847       }
 848    }
 849 #else
 850    int minVal;
 851    int maxVal;
 852    int maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
 853    int maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
 854
 855    /* Scan the channel with max variance for lo & hi
 856     * and use those as the two representative colors.
 857     */
 858    minVal = 2000; /* big enough */
 859    maxVal = -1; /* small enough */
 860    for (k = 0; k < N_TEXELS / 2; k++) {
 861       int t = input[k][maxVarL];
 862       if (minVal > t) {
 863          minVal = t;
 864          minColL = k;
 865       }
 866       if (maxVal < t) {
 867          maxVal = t;
 868          maxColL = k;
 869       }
 870    }
 871    minVal = 2000; /* big enough */
 872    maxVal = -1; /* small enough */
 873    for (; k < N_TEXELS; k++) {
 874       int t = input[k][maxVarR];
 875       if (minVal > t) {
 876          minVal = t;
 877          minColR = k;
 878       }
 879       if (maxVal < t) {
 880          maxVal = t;
 881          maxColR = k;
 882       }
 883    }
 884 #endif
 885
 886    /* left microtile */
 887    cc[0] = 0;
 888    for (i = 0; i < n_comp; i++) {
 889       vec[0][i] = input[minColL][i];
 890       vec[1][i] = input[maxColL][i];
 891    }
 892    if (minColL != maxColL) {
 893       /* compute interpolation vector */
 894       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
 895
 896       /* add in texels */
 897       lolo = 0;
 898       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
 899          int texel;
 900          /* interpolate color */
 901          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 902          /* add in texel */
 903          lolo <<= 2;
 904          lolo |= texel;
 905       }
 906
 907       /* funky encoding for LSB of green */
 908       if ((int)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
 909          for (i = 0; i < n_comp; i++) {
 910             vec[1][i] = input[minColL][i];
 911             vec[0][i] = input[maxColL][i];
 912          }
 913          lolo = ~lolo;
 914       }
 915
 916       cc[0] = lolo;
 917    }
 918
 919    /* right microtile */
 920    cc[1] = 0;
 921    for (i = 0; i < n_comp; i++) {
 922       vec[2][i] = input[minColR][i];
 923       vec[3][i] = input[maxColR][i];
 924    }
 925    if (minColR != maxColR) {
 926       /* compute interpolation vector */
 927       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
 928
 929       /* add in texels */
 930       lohi = 0;
 931       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
 932          int texel;
 933          /* interpolate color */
 934          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
 935          /* add in texel */
 936          lohi <<= 2;
 937          lohi |= texel;
 938       }
 939
 940       /* funky encoding for LSB of green */
 941       if ((int)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
 942          for (i = 0; i < n_comp; i++) {
 943             vec[3][i] = input[minColR][i];
 944             vec[2][i] = input[maxColR][i];
 945          }
 946          lohi = ~lohi;
 947       }
 948
 949       cc[1] = lohi;
 950    }
 951
 952    Q_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
 953    for (j = 2 * 2 - 1; j >= 0; j--) {
 954       for (i = 0; i < n_comp; i++) {
 955          /* add in colors */
 956          Q_SHL(hi, 5);
 957          Q_OR32(hi, vec[j][i] >> 3);
 958       }
 959    }
 960    ((qword *)cc)[1] = hi;
 961 }
 962
 963
 964 static void
 965 fxt1_quantize (dword *cc, const byte *lines[], int comps)
 966 {
 967    int trualpha;
 968    byte reord[N_TEXELS][MAX_COMP];
 969
 970    byte input[N_TEXELS][MAX_COMP];
 971    int i, k, l;
 972
 973    if (comps == 3) {
 974       /* make the whole block opaque */
 975       memset(input, -1, sizeof(input));
 976    }
 977
 978    /* 8 texels each line */
 979    for (l = 0; l < 4; l++) {
 980       for (k = 0; k < 4; k++) {
 981          for (i = 0; i < comps; i++) {
 982             input[k + l * 4][i] = *lines[l]++;
 983          }
 984       }
 985       for (; k < 8; k++) {
 986          for (i = 0; i < comps; i++) {
 987             input[k + l * 4 + 12][i] = *lines[l]++;
 988          }
 989       }
 990    }
 991
 992    /* block layout:
 993     * 00, 01, 02, 03, 08, 09, 0a, 0b
 994     * 10, 11, 12, 13, 18, 19, 1a, 1b
 995     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
 996     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
 997     */
 998
 999    /* [dBorca]
1000     * stupidity flows forth from this
1001     */
1002    l = N_TEXELS;
1003    trualpha = 0;
1004    if (comps == 4) {
1005       /* skip all transparent black texels */
1006       l = 0;
1007       for (k = 0; k < N_TEXELS; k++) {
1008          /* test all components against 0 */
1009          if (!ISTBLACK(input[k])) {
1010             /* texel is not transparent black */
1011             COPY_4UBV(reord[l], input[k]);
1012             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1013                /* non-opaque texel */
1014                trualpha = !0;
1015             }
1016             l++;
1017          }
1018       }
1019    }
1020
1021 #if 0
1022    if (trualpha) {
1023       fxt1_quantize_ALPHA0(cc, input, reord, l);
1024    } else if (l == 0) {
1025       cc[0] = cc[1] = cc[2] = -1;
1026       cc[3] = 0;
1027    } else if (l < N_TEXELS) {
1028       fxt1_quantize_HI(cc, input, reord, l);
1029    } else {
1030       fxt1_quantize_CHROMA(cc, input);
1031    }
1032    (void)fxt1_quantize_ALPHA1;
1033    (void)fxt1_quantize_MIXED1;
1034    (void)fxt1_quantize_MIXED0;
1035 #else
1036    if (trualpha) {
1037       fxt1_quantize_ALPHA1(cc, input);
1038    } else if (l == 0) {
1039       cc[0] = cc[1] = cc[2] = ~0u;
1040       cc[3] = 0;
1041    } else if (l < N_TEXELS) {
1042       fxt1_quantize_MIXED1(cc, input);
1043    } else {
1044       fxt1_quantize_MIXED0(cc, input);
1045    }
1046    (void)fxt1_quantize_ALPHA0;
1047    (void)fxt1_quantize_HI;
1048    (void)fxt1_quantize_CHROMA;
1049 #endif
1050 }
1051
1052
1053
1054 /**
1055  * Upscale an image by replication, not (typical) stretching.
1056  * We use this when the image width or height is less than a
1057  * certain size (4, 8) and we need to upscale an image.
1058  */
1059 static void
1060 upscale_teximage2d(int inWidth, int inHeight,
1061                    int outWidth, int outHeight,
1062                    int comps, const byte *src, int srcRowStride,
1063                    byte *dest )
1064 {
1065    int i, j, k;
1066
1067    assert(outWidth >= inWidth);
1068    assert(outHeight >= inHeight);
1069 #if 0
1070    ASSERT(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1071    ASSERT((outWidth & 3) == 0);
1072    ASSERT((outHeight & 3) == 0);
1073 #endif
1074
1075    for (i = 0; i < outHeight; i++) {
1076       const int ii = i % inHeight;
1077       for (j = 0; j < outWidth; j++) {
1078          const int jj = j % inWidth;
1079          for (k = 0; k < comps; k++) {
1080             dest[(i * outWidth + j) * comps + k]
1081                = src[ii * srcRowStride + jj * comps + k];
1082          }
1083       }
1084    }
1085 }
1086
1087 TAPI void TAPIENTRY
1088 fxt1_encode (dword width, dword height, int comps,
1089              const void *source, int srcRowStride,
1090              void *dest, int destRowStride)
1091 {
1092    dword x, y;
1093    const byte *data;
1094    dword *encoded = (dword *)dest;
1095    void *newSource = NULL, *newSourcetmp = NULL;
1096
1097    assert(comps == 3 || comps == 4);
1098
1099    if (comps == 3)
1100        newSource = reorder_source_3_alloc(source, width, height, srcRowStride);
1101    if (comps == 4)
1102        newSource = reorder_source_4_alloc(source, width, height, srcRowStride);
1103    if (!newSource)
1104        goto cleanUp;
1105    source = newSource;
1106
1107    /* Replicate image if width is not M8 or height is not M4 */
1108    if ((width & 7) | (height & 3)) {
1109       int newWidth = (width + 7) & ~7;
1110       int newHeight = (height + 3) & ~3;
1111       newSourcetmp = malloc(comps * newWidth * newHeight * sizeof(byte));
1112       free(newSource);
1113       newSource = newSourcetmp;
1114       if (!newSource) {
1115          goto cleanUp;
1116       }
1117       upscale_teximage2d(width, height, newWidth, newHeight,
1118                          comps, (const byte *) source,
1119                          srcRowStride, (byte *) newSource);
1120       source = newSource;
1121       width = newWidth;
1122       height = newHeight;
1123       srcRowStride = comps * newWidth;
1124    }
1125
1126    data = (const byte *) source;
1127    destRowStride = (destRowStride - width * 2) / 4;
1128    for (y = 0; y < height; y += 4) {
1129       dword offs = 0 + (y + 0) * srcRowStride;
1130       for (x = 0; x < width; x += 8) {
1131          const byte *lines[4];
1132          lines[0] = &data[offs];
1133          lines[1] = lines[0] + srcRowStride;
1134          lines[2] = lines[1] + srcRowStride;
1135          lines[3] = lines[2] + srcRowStride;
1136          offs += 8 * comps;
1137          fxt1_quantize(encoded, lines, comps);
1138          /* 128 bits per 8x4 block */
1139          encoded += 4;
1140       }
1141       encoded += destRowStride;
1142    }
1143
1144  cleanUp:
1145    free(newSource);
1146 }
1147
1148
1149 /***************************************************************************\
1150  * FXT1 decoder
1151  *
1152  * The decoder is based on GL_3DFX_texture_compression_FXT1
1153  * specification and serves as a concept for the encoder.
1154 \***************************************************************************/
1155
1156
1157 /* lookup table for scaling 5 bit colors up to 8 bits */
1158 static const byte _rgb_scale_5[] = {
1159    0,   8,   16,  25,  33,  41,  49,  58,
1160    66,  74,  82,  90,  99,  107, 115, 123,
1161    132, 140, 148, 156, 165, 173, 181, 189,
1162    197, 206, 214, 222, 230, 239, 247, 255
1163 };
1164
1165 /* lookup table for scaling 6 bit colors up to 8 bits */
1166 static const byte _rgb_scale_6[] = {
1167    0,   4,   8,   12,  16,  20,  24,  28,
1168    32,  36,  40,  45,  49,  53,  57,  61,
1169    65,  69,  73,  77,  81,  85,  89,  93,
1170    97,  101, 105, 109, 113, 117, 121, 125,
1171    130, 134, 138, 142, 146, 150, 154, 158,
1172    162, 166, 170, 174, 178, 182, 186, 190,
1173    194, 198, 202, 206, 210, 215, 219, 223,
1174    227, 231, 235, 239, 243, 247, 251, 255
1175 };
1176
1177
1178 #define CC_SEL(cc, which) (((dword *)(cc))[(which) / 32] >> ((which) & 31))
1179 #define UP5(c) _rgb_scale_5[(c) & 31]
1180 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1181 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1182
1183
1184 static void
1185 fxt1_decode_1HI (const byte *code, int t, byte *rgba)
1186 {
1187    const dword *cc;
1188
1189    t *= 3;
1190    cc = (const dword *)(code + t / 8);
1191    t = (cc[0] >> (t & 7)) & 7;
1192
1193    if (t == 7) {
1194       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1195    } else {
1196       byte r, g, b;
1197       cc = (const dword *)(code + 12);
1198       if (t == 0) {
1199          b = UP5(CC_SEL(cc, 0));
1200          g = UP5(CC_SEL(cc, 5));
1201          r = UP5(CC_SEL(cc, 10));
1202       } else if (t == 6) {
1203          b = UP5(CC_SEL(cc, 15));
1204          g = UP5(CC_SEL(cc, 20));
1205          r = UP5(CC_SEL(cc, 25));
1206       } else {
1207          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1208          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1209          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1210       }
1211       rgba[RCOMP] = r;
1212       rgba[GCOMP] = g;
1213       rgba[BCOMP] = b;
1214       rgba[ACOMP] = 255;
1215    }
1216 }
1217
1218
1219 static void
1220 fxt1_decode_1CHROMA (const byte *code, int t, byte *rgba)
1221 {
1222    const dword *cc;
1223    dword kk;
1224
1225    cc = (const dword *)code;
1226    if (t & 16) {
1227       cc++;
1228       t &= 15;
1229    }
1230    t = (cc[0] >> (t * 2)) & 3;
1231
1232    t *= 15;
1233    cc = (const dword *)(code + 8 + t / 8);
1234    kk = cc[0] >> (t & 7);
1235    rgba[BCOMP] = UP5(kk);
1236    rgba[GCOMP] = UP5(kk >> 5);
1237    rgba[RCOMP] = UP5(kk >> 10);
1238    rgba[ACOMP] = 255;
1239 }
1240
1241
1242 static void
1243 fxt1_decode_1MIXED (const byte *code, int t, byte *rgba)
1244 {
1245    const dword *cc;
1246    dword col[2][3];
1247    int glsb, selb;
1248
1249    cc = (const dword *)code;
1250    if (t & 16) {
1251       t &= 15;
1252       t = (cc[1] >> (t * 2)) & 3;
1253       /* col 2 */
1254       col[0][BCOMP] = (*(const dword *)(code + 11)) >> 6;
1255       col[0][GCOMP] = CC_SEL(cc, 99);
1256       col[0][RCOMP] = CC_SEL(cc, 104);
1257       /* col 3 */
1258       col[1][BCOMP] = CC_SEL(cc, 109);
1259       col[1][GCOMP] = CC_SEL(cc, 114);
1260       col[1][RCOMP] = CC_SEL(cc, 119);
1261       glsb = CC_SEL(cc, 126);
1262       selb = CC_SEL(cc, 33);
1263    } else {
1264       t = (cc[0] >> (t * 2)) & 3;
1265       /* col 0 */
1266       col[0][BCOMP] = CC_SEL(cc, 64);
1267       col[0][GCOMP] = CC_SEL(cc, 69);
1268       col[0][RCOMP] = CC_SEL(cc, 74);
1269       /* col 1 */
1270       col[1][BCOMP] = CC_SEL(cc, 79);
1271       col[1][GCOMP] = CC_SEL(cc, 84);
1272       col[1][RCOMP] = CC_SEL(cc, 89);
1273       glsb = CC_SEL(cc, 125);
1274       selb = CC_SEL(cc, 1);
1275    }
1276
1277    if (CC_SEL(cc, 124) & 1) {
1278       /* alpha[0] == 1 */
1279
1280       if (t == 3) {
1281          /* zero */
1282          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1283       } else {
1284          byte r, g, b;
1285          if (t == 0) {
1286             b = UP5(col[0][BCOMP]);
1287             g = UP5(col[0][GCOMP]);
1288             r = UP5(col[0][RCOMP]);
1289          } else if (t == 2) {
1290             b = UP5(col[1][BCOMP]);
1291             g = UP6(col[1][GCOMP], glsb);
1292             r = UP5(col[1][RCOMP]);
1293          } else {
1294             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1295             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1296             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1297          }
1298          rgba[RCOMP] = r;
1299          rgba[GCOMP] = g;
1300          rgba[BCOMP] = b;
1301          rgba[ACOMP] = 255;
1302       }
1303    } else {
1304       /* alpha[0] == 0 */
1305       byte r, g, b;
1306       if (t == 0) {
1307          b = UP5(col[0][BCOMP]);
1308          g = UP6(col[0][GCOMP], glsb ^ selb);
1309          r = UP5(col[0][RCOMP]);
1310       } else if (t == 3) {
1311          b = UP5(col[1][BCOMP]);
1312          g = UP6(col[1][GCOMP], glsb);
1313          r = UP5(col[1][RCOMP]);
1314       } else {
1315          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1316          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1317                         UP6(col[1][GCOMP], glsb));
1318          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1319       }
1320       rgba[RCOMP] = r;
1321       rgba[GCOMP] = g;
1322       rgba[BCOMP] = b;
1323       rgba[ACOMP] = 255;
1324    }
1325 }
1326
1327
1328 static void
1329 fxt1_decode_1ALPHA (const byte *code, int t, byte *rgba)
1330 {
1331    const dword *cc;
1332    byte r, g, b, a;
1333
1334    cc = (const dword *)code;
1335    if (CC_SEL(cc, 124) & 1) {
1336       /* lerp == 1 */
1337       dword col0[4];
1338
1339       if (t & 16) {
1340          t &= 15;
1341          t = (cc[1] >> (t * 2)) & 3;
1342          /* col 2 */
1343          col0[BCOMP] = (*(const dword *)(code + 11)) >> 6;
1344          col0[GCOMP] = CC_SEL(cc, 99);
1345          col0[RCOMP] = CC_SEL(cc, 104);
1346          col0[ACOMP] = CC_SEL(cc, 119);
1347       } else {
1348          t = (cc[0] >> (t * 2)) & 3;
1349          /* col 0 */
1350          col0[BCOMP] = CC_SEL(cc, 64);
1351          col0[GCOMP] = CC_SEL(cc, 69);
1352          col0[RCOMP] = CC_SEL(cc, 74);
1353          col0[ACOMP] = CC_SEL(cc, 109);
1354       }
1355
1356       if (t == 0) {
1357          b = UP5(col0[BCOMP]);
1358          g = UP5(col0[GCOMP]);
1359          r = UP5(col0[RCOMP]);
1360          a = UP5(col0[ACOMP]);
1361       } else if (t == 3) {
1362          b = UP5(CC_SEL(cc, 79));
1363          g = UP5(CC_SEL(cc, 84));
1364          r = UP5(CC_SEL(cc, 89));
1365          a = UP5(CC_SEL(cc, 114));
1366       } else {
1367          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1368          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1369          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1370          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1371       }
1372    } else {
1373       /* lerp == 0 */
1374
1375       if (t & 16) {
1376          cc++;
1377          t &= 15;
1378       }
1379       t = (cc[0] >> (t * 2)) & 3;
1380
1381       if (t == 3) {
1382          /* zero */
1383          r = g = b = a = 0;
1384       } else {
1385          dword kk;
1386          cc = (const dword *)code;
1387          a = UP5(cc[3] >> (t * 5 + 13));
1388          t *= 15;
1389          cc = (const dword *)(code + 8 + t / 8);
1390          kk = cc[0] >> (t & 7);
1391          b = UP5(kk);
1392          g = UP5(kk >> 5);
1393          r = UP5(kk >> 10);
1394       }
1395    }
1396    rgba[RCOMP] = r;
1397    rgba[GCOMP] = g;
1398    rgba[BCOMP] = b;
1399    rgba[ACOMP] = a;
1400 }
1401
1402
1403 TAPI void TAPIENTRY
1404 fxt1_decode_1 (const void *texture, int stride, /* in pixels */
1405                int i, int j, byte *rgba)
1406 {
1407    static void (*decode_1[]) (const byte *, int, byte *) = {
1408       fxt1_decode_1HI,     /* cc-high   = "00?" */
1409       fxt1_decode_1HI,     /* cc-high   = "00?" */
1410       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1411       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1412       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1413       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1414       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1415       fxt1_decode_1MIXED   /* mixed     = "1??" */
1416    };
1417
1418    const byte *code = (const byte *)texture +
1419                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1420    int mode = CC_SEL(code, 125);
1421    int t = i & 7;
1422
1423    if (t & 4) {
1424       t += 12;
1425    }
1426    t += (j & 3) * 4;
1427
1428    decode_1[mode](code, t, rgba);
1429 }