98e75f2d |
1 | /* |
2 | * DXTn codec |
3 | * Version: 1.1 |
4 | * |
5 | * Copyright (C) 2004 Daniel Borca All Rights Reserved. |
6 | * |
7 | * this is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License as published by |
9 | * the Free Software Foundation; either version 2, or (at your option) |
10 | * any later version. |
11 | * |
12 | * this is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | * GNU General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU General Public License |
18 | * along with GNU Make; see the file COPYING. If not, write to |
19 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. |
20 | */ |
21 | |
22 | /* Copyright (C) 2007 Hiroshi Morii <koolsmoky(at)users.sourceforge.net> |
23 | * Added support for ARGB inputs, DXT3,5 workaround for ATI Radeons, and |
24 | * YUV conversions to determine representative colors. |
25 | */ |
26 | |
27 | |
28 | #include <stdlib.h> |
29 | #include <string.h> |
30 | #include <assert.h> |
31 | |
32 | #include <stdio.h> |
33 | |
34 | #include "types.h" |
35 | #include "internal.h" |
36 | #include "dxtn.h" |
37 | |
38 | |
39 | /***************************************************************************\ |
40 | * DXTn encoder |
41 | * |
42 | * The encoder was built by reversing the decoder, |
43 | * and is vaguely based on FXT1 codec. Note that this code |
44 | * is merely a proof of concept, since it is highly UNoptimized! |
45 | \***************************************************************************/ |
46 | |
47 | |
48 | #define MAX_COMP 4 /* ever needed maximum number of components in texel */ |
49 | #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */ |
50 | #define N_TEXELS 16 /* number of texels in a block (always 16) */ |
51 | #define COLOR565(v) (word)((((v)[RCOMP] & 0xf8) << 8) | (((v)[GCOMP] & 0xfc) << 3) | ((v)[BCOMP] >> 3)) |
52 | |
53 | |
54 | static const int dxtn_color_tlat[2][4] = { |
55 | { 0, 2, 3, 1 }, |
56 | { 0, 2, 1, 3 } |
57 | }; |
58 | |
59 | static const int dxtn_alpha_tlat[2][8] = { |
60 | { 0, 2, 3, 4, 5, 6, 7, 1 }, |
61 | { 0, 2, 3, 4, 5, 1, 6, 7 } |
62 | }; |
63 | |
64 | |
65 | static void |
66 | dxt1_rgb_quantize (dword *cc, const byte *lines[], int comps) |
67 | { |
68 | float b, iv[MAX_COMP]; /* interpolation vector */ |
69 | |
70 | dword hi; /* high doubleword */ |
71 | int color0, color1; |
72 | int n_vect; |
73 | const int n_comp = 3; |
74 | int black = 0; |
75 | |
76 | #ifndef YUV |
77 | int minSum = 2000; /* big enough */ |
78 | #else |
79 | int minSum = 2000000; |
80 | #endif |
81 | int maxSum = -1; /* small enough */ |
82 | int minCol = 0; /* phoudoin: silent compiler! */ |
83 | int maxCol = 0; /* phoudoin: silent compiler! */ |
84 | |
85 | byte input[N_TEXELS][MAX_COMP]; |
86 | int i, k, l; |
87 | |
88 | /* make the whole block opaque */ |
89 | /* we will NEVER reference ACOMP of any pixel */ |
90 | |
91 | /* 4 texels each line */ |
92 | #ifndef ARGB |
93 | for (l = 0; l < 4; l++) { |
94 | for (k = 0; k < 4; k++) { |
95 | for (i = 0; i < comps; i++) { |
96 | input[k + l * 4][i] = *lines[l]++; |
97 | } |
98 | } |
99 | } |
100 | #else |
101 | /* H.Morii - support for ARGB inputs */ |
102 | for (l = 0; l < 4; l++) { |
103 | for (k = 0; k < 4; k++) { |
104 | input[k + l * 4][2] = *lines[l]++; |
105 | input[k + l * 4][1] = *lines[l]++; |
106 | input[k + l * 4][0] = *lines[l]++; |
107 | if (comps == 4) input[k + l * 4][3] = *lines[l]++; |
108 | } |
109 | } |
110 | #endif |
111 | |
112 | /* Our solution here is to find the darkest and brightest colors in |
113 | * the 4x4 tile and use those as the two representative colors. |
114 | * There are probably better algorithms to use (histogram-based). |
115 | */ |
116 | for (k = 0; k < N_TEXELS; k++) { |
117 | int sum = 0; |
118 | #ifndef YUV |
119 | for (i = 0; i < n_comp; i++) { |
120 | sum += input[k][i]; |
121 | } |
122 | #else |
123 | /* RGB to YUV conversion according to CCIR 601 specs |
124 | * Y = 0.299R+0.587G+0.114B |
125 | * U = 0.713(R - Y) = 0.500R-0.419G-0.081B |
126 | * V = 0.564(B - Y) = -0.169R-0.331G+0.500B |
127 | */ |
128 | sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP]; |
129 | #endif |
130 | if (minSum > sum) { |
131 | minSum = sum; |
132 | minCol = k; |
133 | } |
134 | if (maxSum < sum) { |
135 | maxSum = sum; |
136 | maxCol = k; |
137 | } |
138 | if (sum == 0) { |
139 | black = 1; |
140 | } |
141 | } |
142 | |
143 | color0 = COLOR565(input[minCol]); |
144 | color1 = COLOR565(input[maxCol]); |
145 | |
146 | if (color0 == color1) { |
147 | /* we'll use 3-vector */ |
148 | cc[0] = color0 | (color1 << 16); |
149 | hi = black ? -1 : 0; |
150 | } else { |
151 | if (black && ((color0 == 0) || (color1 == 0))) { |
152 | /* we still can use 4-vector */ |
153 | black = 0; |
154 | } |
155 | |
156 | if (black ^ (color0 <= color1)) { |
157 | int aux; |
158 | aux = color0; |
159 | color0 = color1; |
160 | color1 = aux; |
161 | aux = minCol; |
162 | minCol = maxCol; |
163 | maxCol = aux; |
164 | } |
165 | n_vect = (color0 <= color1) ? 2 : 3; |
166 | |
167 | MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]); |
168 | |
169 | /* add in texels */ |
170 | cc[0] = color0 | (color1 << 16); |
171 | hi = 0; |
172 | for (k = N_TEXELS - 1; k >= 0; k--) { |
173 | int texel = 3; |
174 | int sum = 0; |
175 | if (black) { |
176 | for (i = 0; i < n_comp; i++) { |
177 | sum += input[k][i]; |
178 | } |
179 | } |
180 | if (!black || sum) { |
181 | /* interpolate color */ |
182 | CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); |
183 | texel = dxtn_color_tlat[black][texel]; |
184 | } |
185 | /* add in texel */ |
186 | hi <<= 2; |
187 | hi |= texel; |
188 | } |
189 | } |
190 | cc[1] = hi; |
191 | } |
192 | |
193 | |
194 | static void |
195 | dxt1_rgba_quantize (dword *cc, const byte *lines[], int comps) |
196 | { |
197 | float b, iv[MAX_COMP]; /* interpolation vector */ |
198 | |
199 | dword hi; /* high doubleword */ |
200 | int color0, color1; |
201 | int n_vect; |
202 | const int n_comp = 3; |
203 | int transparent = 0; |
204 | |
205 | #ifndef YUV |
206 | int minSum = 2000; /* big enough */ |
207 | #else |
208 | int minSum = 2000000; |
209 | #endif |
210 | int maxSum = -1; /* small enough */ |
211 | int minCol = 0; /* phoudoin: silent compiler! */ |
212 | int maxCol = 0; /* phoudoin: silent compiler! */ |
213 | |
214 | byte input[N_TEXELS][MAX_COMP]; |
215 | int i, k, l; |
216 | |
217 | if (comps == 3) { |
218 | /* make the whole block opaque */ |
219 | memset(input, -1, sizeof(input)); |
220 | } |
221 | |
222 | /* 4 texels each line */ |
223 | #ifndef ARGB |
224 | for (l = 0; l < 4; l++) { |
225 | for (k = 0; k < 4; k++) { |
226 | for (i = 0; i < comps; i++) { |
227 | input[k + l * 4][i] = *lines[l]++; |
228 | } |
229 | } |
230 | } |
231 | #else |
232 | /* H.Morii - support for ARGB inputs */ |
233 | for (l = 0; l < 4; l++) { |
234 | for (k = 0; k < 4; k++) { |
235 | input[k + l * 4][2] = *lines[l]++; |
236 | input[k + l * 4][1] = *lines[l]++; |
237 | input[k + l * 4][0] = *lines[l]++; |
238 | if (comps == 4) input[k + l * 4][3] = *lines[l]++; |
239 | } |
240 | } |
241 | #endif |
242 | |
243 | /* Our solution here is to find the darkest and brightest colors in |
244 | * the 4x4 tile and use those as the two representative colors. |
245 | * There are probably better algorithms to use (histogram-based). |
246 | */ |
247 | for (k = 0; k < N_TEXELS; k++) { |
248 | int sum = 0; |
249 | #ifndef YUV |
250 | for (i = 0; i < n_comp; i++) { |
251 | sum += input[k][i]; |
252 | } |
253 | #else |
254 | sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP]; |
255 | #endif |
256 | if (minSum > sum) { |
257 | minSum = sum; |
258 | minCol = k; |
259 | } |
260 | if (maxSum < sum) { |
261 | maxSum = sum; |
262 | maxCol = k; |
263 | } |
264 | if (input[k][ACOMP] < 128) { |
265 | transparent = 1; |
266 | } |
267 | } |
268 | |
269 | color0 = COLOR565(input[minCol]); |
270 | color1 = COLOR565(input[maxCol]); |
271 | |
272 | if (color0 == color1) { |
273 | /* we'll use 3-vector */ |
274 | cc[0] = color0 | (color1 << 16); |
275 | hi = transparent ? -1 : 0; |
276 | } else { |
277 | if (transparent ^ (color0 <= color1)) { |
278 | int aux; |
279 | aux = color0; |
280 | color0 = color1; |
281 | color1 = aux; |
282 | aux = minCol; |
283 | minCol = maxCol; |
284 | maxCol = aux; |
285 | } |
286 | n_vect = (color0 <= color1) ? 2 : 3; |
287 | |
288 | MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]); |
289 | |
290 | /* add in texels */ |
291 | cc[0] = color0 | (color1 << 16); |
292 | hi = 0; |
293 | for (k = N_TEXELS - 1; k >= 0; k--) { |
294 | int texel = 3; |
295 | if (input[k][ACOMP] >= 128) { |
296 | /* interpolate color */ |
297 | CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); |
298 | texel = dxtn_color_tlat[transparent][texel]; |
299 | } |
300 | /* add in texel */ |
301 | hi <<= 2; |
302 | hi |= texel; |
303 | } |
304 | } |
305 | cc[1] = hi; |
306 | } |
307 | |
308 | |
309 | static void |
310 | dxt3_rgba_quantize (dword *cc, const byte *lines[], int comps) |
311 | { |
312 | float b, iv[MAX_COMP]; /* interpolation vector */ |
313 | |
314 | dword lolo, lohi; /* low quadword: lo dword, hi dword */ |
315 | dword hihi; /* high quadword: high dword */ |
316 | int color0, color1; |
317 | const int n_vect = 3; |
318 | const int n_comp = 3; |
319 | |
320 | #ifndef YUV |
321 | int minSum = 2000; /* big enough */ |
322 | #else |
323 | int minSum = 2000000; |
324 | #endif |
325 | int maxSum = -1; /* small enough */ |
326 | int minCol = 0; /* phoudoin: silent compiler! */ |
327 | int maxCol = 0; /* phoudoin: silent compiler! */ |
328 | |
329 | byte input[N_TEXELS][MAX_COMP]; |
330 | int i, k, l; |
331 | |
332 | if (comps == 3) { |
333 | /* make the whole block opaque */ |
334 | memset(input, -1, sizeof(input)); |
335 | } |
336 | |
337 | /* 4 texels each line */ |
338 | #ifndef ARGB |
339 | for (l = 0; l < 4; l++) { |
340 | for (k = 0; k < 4; k++) { |
341 | for (i = 0; i < comps; i++) { |
342 | input[k + l * 4][i] = *lines[l]++; |
343 | } |
344 | } |
345 | } |
346 | #else |
347 | /* H.Morii - support for ARGB inputs */ |
348 | for (l = 0; l < 4; l++) { |
349 | for (k = 0; k < 4; k++) { |
350 | input[k + l * 4][2] = *lines[l]++; |
351 | input[k + l * 4][1] = *lines[l]++; |
352 | input[k + l * 4][0] = *lines[l]++; |
353 | if (comps == 4) input[k + l * 4][3] = *lines[l]++; |
354 | } |
355 | } |
356 | #endif |
357 | |
358 | /* Our solution here is to find the darkest and brightest colors in |
359 | * the 4x4 tile and use those as the two representative colors. |
360 | * There are probably better algorithms to use (histogram-based). |
361 | */ |
362 | for (k = 0; k < N_TEXELS; k++) { |
363 | int sum = 0; |
364 | #ifndef YUV |
365 | for (i = 0; i < n_comp; i++) { |
366 | sum += input[k][i]; |
367 | } |
368 | #else |
369 | sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP]; |
370 | #endif |
371 | if (minSum > sum) { |
372 | minSum = sum; |
373 | minCol = k; |
374 | } |
375 | if (maxSum < sum) { |
376 | maxSum = sum; |
377 | maxCol = k; |
378 | } |
379 | } |
380 | |
381 | /* add in alphas */ |
382 | lolo = lohi = 0; |
383 | for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) { |
384 | /* add in alpha */ |
385 | lohi <<= 4; |
386 | lohi |= input[k][ACOMP] >> 4; |
387 | } |
388 | cc[1] = lohi; |
389 | for (; k >= 0; k--) { |
390 | /* add in alpha */ |
391 | lolo <<= 4; |
392 | lolo |= input[k][ACOMP] >> 4; |
393 | } |
394 | cc[0] = lolo; |
395 | |
396 | color0 = COLOR565(input[minCol]); |
397 | color1 = COLOR565(input[maxCol]); |
398 | |
399 | #ifdef RADEON |
400 | /* H.Morii - Workaround for ATI Radeon |
401 | * According to the OpenGL EXT_texture_compression_s3tc specs, |
402 | * the encoding of the RGB components for DXT3 and DXT5 formats |
403 | * use the non-transparent encodings of DXT1 but treated as |
404 | * though color0 > color1, regardless of the actual values of |
405 | * color0 and color1. ATI Radeons however require the values to |
406 | * be color0 > color1. |
407 | */ |
408 | if (color0 < color1) { |
409 | int aux; |
410 | aux = color0; |
411 | color0 = color1; |
412 | color1 = aux; |
413 | aux = minCol; |
414 | minCol = maxCol; |
415 | maxCol = aux; |
416 | } |
417 | #endif |
418 | |
419 | cc[2] = color0 | (color1 << 16); |
420 | |
421 | hihi = 0; |
422 | if (color0 != color1) { |
423 | MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]); |
424 | |
425 | /* add in texels */ |
426 | for (k = N_TEXELS - 1; k >= 0; k--) { |
427 | int texel; |
428 | /* interpolate color */ |
429 | CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); |
430 | texel = dxtn_color_tlat[0][texel]; |
431 | /* add in texel */ |
432 | hihi <<= 2; |
433 | hihi |= texel; |
434 | } |
435 | } |
436 | cc[3] = hihi; |
437 | } |
438 | |
439 | |
440 | static void |
441 | dxt5_rgba_quantize (dword *cc, const byte *lines[], int comps) |
442 | { |
443 | float b, iv[MAX_COMP]; /* interpolation vector */ |
444 | |
445 | qword lo; /* low quadword */ |
446 | dword hihi; /* high quadword: high dword */ |
447 | int color0, color1; |
448 | const int n_vect = 3; |
449 | const int n_comp = 3; |
450 | |
451 | #ifndef YUV |
452 | int minSum = 2000; /* big enough */ |
453 | #else |
454 | int minSum = 2000000; |
455 | #endif |
456 | int maxSum = -1; /* small enough */ |
457 | int minCol = 0; /* phoudoin: silent compiler! */ |
458 | int maxCol = 0; /* phoudoin: silent compiler! */ |
459 | int alpha0 = 2000; /* big enough */ |
460 | int alpha1 = -1; /* small enough */ |
461 | int anyZero = 0, anyOne = 0; |
462 | int a_vect; |
463 | |
464 | byte input[N_TEXELS][MAX_COMP]; |
465 | int i, k, l; |
466 | |
467 | if (comps == 3) { |
468 | /* make the whole block opaque */ |
469 | memset(input, -1, sizeof(input)); |
470 | } |
471 | |
472 | /* 4 texels each line */ |
473 | #ifndef ARGB |
474 | for (l = 0; l < 4; l++) { |
475 | for (k = 0; k < 4; k++) { |
476 | for (i = 0; i < comps; i++) { |
477 | input[k + l * 4][i] = *lines[l]++; |
478 | } |
479 | } |
480 | } |
481 | #else |
482 | /* H.Morii - support for ARGB inputs */ |
483 | for (l = 0; l < 4; l++) { |
484 | for (k = 0; k < 4; k++) { |
485 | input[k + l * 4][2] = *lines[l]++; |
486 | input[k + l * 4][1] = *lines[l]++; |
487 | input[k + l * 4][0] = *lines[l]++; |
488 | if (comps == 4) input[k + l * 4][3] = *lines[l]++; |
489 | } |
490 | } |
491 | #endif |
492 | |
493 | /* Our solution here is to find the darkest and brightest colors in |
494 | * the 4x4 tile and use those as the two representative colors. |
495 | * There are probably better algorithms to use (histogram-based). |
496 | */ |
497 | for (k = 0; k < N_TEXELS; k++) { |
498 | int sum = 0; |
499 | #ifndef YUV |
500 | for (i = 0; i < n_comp; i++) { |
501 | sum += input[k][i]; |
502 | } |
503 | #else |
504 | sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP]; |
505 | #endif |
506 | if (minSum > sum) { |
507 | minSum = sum; |
508 | minCol = k; |
509 | } |
510 | if (maxSum < sum) { |
511 | maxSum = sum; |
512 | maxCol = k; |
513 | } |
514 | if (alpha0 > input[k][ACOMP]) { |
515 | alpha0 = input[k][ACOMP]; |
516 | } |
517 | if (alpha1 < input[k][ACOMP]) { |
518 | alpha1 = input[k][ACOMP]; |
519 | } |
520 | if (input[k][ACOMP] == 0) { |
521 | anyZero = 1; |
522 | } |
523 | if (input[k][ACOMP] == 255) { |
524 | anyOne = 1; |
525 | } |
526 | } |
527 | |
528 | /* add in alphas */ |
529 | if (alpha0 == alpha1) { |
530 | /* we'll use 6-vector */ |
531 | cc[0] = alpha0 | (alpha1 << 8); |
532 | cc[1] = 0; |
533 | } else { |
534 | if (anyZero && ((alpha0 == 0) || (alpha1 == 0))) { |
535 | /* we still might use 8-vector */ |
536 | anyZero = 0; |
537 | } |
538 | if (anyOne && ((alpha0 == 255) || (alpha1 == 255))) { |
539 | /* we still might use 8-vector */ |
540 | anyOne = 0; |
541 | } |
542 | if ((anyZero | anyOne) ^ (alpha0 <= alpha1)) { |
543 | int aux; |
544 | aux = alpha0; |
545 | alpha0 = alpha1; |
546 | alpha1 = aux; |
547 | } |
548 | a_vect = (alpha0 <= alpha1) ? 5 : 7; |
549 | |
550 | /* compute interpolation vector */ |
551 | iv[ACOMP] = (float)a_vect / (alpha1 - alpha0); |
552 | b = -iv[ACOMP] * alpha0 + 0.5F; |
553 | |
554 | /* add in alphas */ |
555 | Q_MOV32(lo, 0); |
556 | for (k = N_TEXELS - 1; k >= 0; k--) { |
557 | int texel = -1; |
558 | if (anyZero | anyOne) { |
559 | if (input[k][ACOMP] == 0) { |
560 | texel = 6; |
561 | } else if (input[k][ACOMP] == 255) { |
562 | texel = 7; |
563 | } |
564 | } |
565 | /* interpolate alpha */ |
566 | if (texel == -1) { |
567 | float dot = input[k][ACOMP] * iv[ACOMP]; |
568 | texel = (int)(dot + b); |
569 | #if SAFECDOT |
570 | if (texel < 0) { |
571 | texel = 0; |
572 | } else if (texel > a_vect) { |
573 | texel = a_vect; |
574 | } |
575 | #endif |
576 | texel = dxtn_alpha_tlat[anyZero | anyOne][texel]; |
577 | } |
578 | /* add in texel */ |
579 | Q_SHL(lo, 3); |
580 | Q_OR32(lo, texel); |
581 | } |
582 | Q_SHL(lo, 16); |
583 | Q_OR32(lo, alpha0 | (alpha1 << 8)); |
584 | ((qword *)cc)[0] = lo; |
585 | } |
586 | |
587 | color0 = COLOR565(input[minCol]); |
588 | color1 = COLOR565(input[maxCol]); |
589 | |
590 | #ifdef RADEON /* H.Morii - Workaround for ATI Radeon */ |
591 | if (color0 < color1) { |
592 | int aux; |
593 | aux = color0; |
594 | color0 = color1; |
595 | color1 = aux; |
596 | aux = minCol; |
597 | minCol = maxCol; |
598 | maxCol = aux; |
599 | } |
600 | #endif |
601 | |
602 | cc[2] = color0 | (color1 << 16); |
603 | |
604 | hihi = 0; |
605 | if (color0 != color1) { |
606 | MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]); |
607 | |
608 | /* add in texels */ |
609 | for (k = N_TEXELS - 1; k >= 0; k--) { |
610 | int texel; |
611 | /* interpolate color */ |
612 | CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); |
613 | texel = dxtn_color_tlat[0][texel]; |
614 | /* add in texel */ |
615 | hihi <<= 2; |
616 | hihi |= texel; |
617 | } |
618 | } |
619 | cc[3] = hihi; |
620 | } |
621 | |
622 | |
623 | #define ENCODER(dxtn, n) \ |
624 | int TAPIENTRY \ |
625 | dxtn##_encode (int width, int height, int comps, \ |
626 | const void *source, int srcRowStride, \ |
627 | void *dest, int destRowStride) \ |
628 | { \ |
629 | int x, y; \ |
630 | const byte *data; \ |
631 | dword *encoded = (dword *)dest; \ |
632 | void *newSource = NULL; \ |
633 | \ |
634 | /* Replicate image if width is not M4 or height is not M4 */ \ |
635 | if ((width & 3) | (height & 3)) { \ |
636 | int newWidth = (width + 3) & ~3; \ |
637 | int newHeight = (height + 3) & ~3; \ |
638 | newSource = malloc(comps * newWidth * newHeight * sizeof(byte *));\ |
639 | _mesa_upscale_teximage2d(width, height, newWidth, newHeight, \ |
640 | comps, (const byte *)source, \ |
641 | srcRowStride, (byte *)newSource); \ |
642 | source = newSource; \ |
643 | width = newWidth; \ |
644 | height = newHeight; \ |
645 | srcRowStride = comps * newWidth; \ |
646 | } \ |
647 | \ |
648 | data = (const byte *)source; \ |
649 | destRowStride = (destRowStride - width * n) / 4; \ |
650 | for (y = 0; y < height; y += 4) { \ |
651 | unsigned int offs = 0 + (y + 0) * srcRowStride; \ |
652 | for (x = 0; x < width; x += 4) { \ |
653 | const byte *lines[4]; \ |
654 | lines[0] = &data[offs]; \ |
655 | lines[1] = lines[0] + srcRowStride; \ |
656 | lines[2] = lines[1] + srcRowStride; \ |
657 | lines[3] = lines[2] + srcRowStride; \ |
658 | offs += 4 * comps; \ |
659 | dxtn##_quantize(encoded, lines, comps); \ |
660 | /* 4x4 block */ \ |
661 | encoded += n; \ |
662 | } \ |
663 | encoded += destRowStride; \ |
664 | } \ |
665 | \ |
666 | if (newSource != NULL) { \ |
667 | free(newSource); \ |
668 | } \ |
669 | \ |
670 | return 0; \ |
671 | } |
672 | |
673 | ENCODER(dxt1_rgb, 2) |
674 | ENCODER(dxt1_rgba, 2) |
675 | ENCODER(dxt3_rgba, 4) |
676 | ENCODER(dxt5_rgba, 4) |
677 | |
678 | |
679 | /***************************************************************************\ |
680 | * DXTn decoder |
681 | * |
682 | * The decoder is based on GL_EXT_texture_compression_s3tc |
683 | * specification and serves as a concept for the encoder. |
684 | \***************************************************************************/ |
685 | |
686 | |
687 | /* lookup table for scaling 4 bit colors up to 8 bits */ |
688 | static const byte _rgb_scale_4[] = { |
689 | 0, 17, 34, 51, 68, 85, 102, 119, |
690 | 136, 153, 170, 187, 204, 221, 238, 255 |
691 | }; |
692 | |
693 | /* lookup table for scaling 5 bit colors up to 8 bits */ |
694 | static const byte _rgb_scale_5[] = { |
695 | 0, 8, 16, 25, 33, 41, 49, 58, |
696 | 66, 74, 82, 90, 99, 107, 115, 123, |
697 | 132, 140, 148, 156, 165, 173, 181, 189, |
698 | 197, 206, 214, 222, 230, 239, 247, 255 |
699 | }; |
700 | |
701 | /* lookup table for scaling 6 bit colors up to 8 bits */ |
702 | static const byte _rgb_scale_6[] = { |
703 | 0, 4, 8, 12, 16, 20, 24, 28, |
704 | 32, 36, 40, 45, 49, 53, 57, 61, |
705 | 65, 69, 73, 77, 81, 85, 89, 93, |
706 | 97, 101, 105, 109, 113, 117, 121, 125, |
707 | 130, 134, 138, 142, 146, 150, 154, 158, |
708 | 162, 166, 170, 174, 178, 182, 186, 190, |
709 | 194, 198, 202, 206, 210, 215, 219, 223, |
710 | 227, 231, 235, 239, 243, 247, 251, 255 |
711 | }; |
712 | |
713 | |
714 | #define CC_SEL(cc, which) (((dword *)(cc))[(which) / 32] >> ((which) & 31)) |
715 | #define UP4(c) _rgb_scale_4[(c) & 15] |
716 | #define UP5(c) _rgb_scale_5[(c) & 31] |
717 | #define UP6(c) _rgb_scale_6[(c) & 63] |
718 | #define ZERO_4UBV(v) *((dword *)(v)) = 0 |
719 | |
720 | |
721 | void TAPIENTRY |
722 | dxt1_rgb_decode_1 (const void *texture, int stride, |
723 | int i, int j, byte *rgba) |
724 | { |
725 | const byte *src = (const byte *)texture |
726 | + ((j / 4) * ((stride + 3) / 4) + i / 4) * 8; |
727 | const int code = (src[4 + (j & 3)] >> ((i & 3) * 2)) & 0x3; |
728 | if (code == 0) { |
729 | rgba[RCOMP] = UP5(CC_SEL(src, 11)); |
730 | rgba[GCOMP] = UP6(CC_SEL(src, 5)); |
731 | rgba[BCOMP] = UP5(CC_SEL(src, 0)); |
732 | } else if (code == 1) { |
733 | rgba[RCOMP] = UP5(CC_SEL(src, 27)); |
734 | rgba[GCOMP] = UP6(CC_SEL(src, 21)); |
735 | rgba[BCOMP] = UP5(CC_SEL(src, 16)); |
736 | } else { |
737 | const word col0 = src[0] | (src[1] << 8); |
738 | const word col1 = src[2] | (src[3] << 8); |
739 | if (col0 > col1) { |
740 | if (code == 2) { |
741 | rgba[RCOMP] = (UP5(col0 >> 11) * 2 + UP5(col1 >> 11)) / 3; |
742 | rgba[GCOMP] = (UP6(col0 >> 5) * 2 + UP6(col1 >> 5)) / 3; |
743 | rgba[BCOMP] = (UP5(col0 ) * 2 + UP5(col1 )) / 3; |
744 | } else { |
745 | rgba[RCOMP] = (UP5(col0 >> 11) + 2 * UP5(col1 >> 11)) / 3; |
746 | rgba[GCOMP] = (UP6(col0 >> 5) + 2 * UP6(col1 >> 5)) / 3; |
747 | rgba[BCOMP] = (UP5(col0 ) + 2 * UP5(col1 )) / 3; |
748 | } |
749 | } else { |
750 | if (code == 2) { |
751 | rgba[RCOMP] = (UP5(col0 >> 11) + UP5(col1 >> 11)) / 2; |
752 | rgba[GCOMP] = (UP6(col0 >> 5) + UP6(col1 >> 5)) / 2; |
753 | rgba[BCOMP] = (UP5(col0 ) + UP5(col1 )) / 2; |
754 | } else { |
755 | ZERO_4UBV(rgba); |
756 | } |
757 | } |
758 | } |
759 | rgba[ACOMP] = 255; |
760 | } |
761 | |
762 | |
763 | void TAPIENTRY |
764 | dxt1_rgba_decode_1 (const void *texture, int stride, |
765 | int i, int j, byte *rgba) |
766 | { |
767 | /* Same as rgb_dxt1 above, except alpha=0 if col0<=col1 and code=3. */ |
768 | const byte *src = (const byte *)texture |
769 | + ((j / 4) * ((stride + 3) / 4) + i / 4) * 8; |
770 | const int code = (src[4 + (j & 3)] >> ((i & 3) * 2)) & 0x3; |
771 | if (code == 0) { |
772 | rgba[RCOMP] = UP5(CC_SEL(src, 11)); |
773 | rgba[GCOMP] = UP6(CC_SEL(src, 5)); |
774 | rgba[BCOMP] = UP5(CC_SEL(src, 0)); |
775 | rgba[ACOMP] = 255; |
776 | } else if (code == 1) { |
777 | rgba[RCOMP] = UP5(CC_SEL(src, 27)); |
778 | rgba[GCOMP] = UP6(CC_SEL(src, 21)); |
779 | rgba[BCOMP] = UP5(CC_SEL(src, 16)); |
780 | rgba[ACOMP] = 255; |
781 | } else { |
782 | const word col0 = src[0] | (src[1] << 8); |
783 | const word col1 = src[2] | (src[3] << 8); |
784 | if (col0 > col1) { |
785 | if (code == 2) { |
786 | rgba[RCOMP] = (UP5(col0 >> 11) * 2 + UP5(col1 >> 11)) / 3; |
787 | rgba[GCOMP] = (UP6(col0 >> 5) * 2 + UP6(col1 >> 5)) / 3; |
788 | rgba[BCOMP] = (UP5(col0 ) * 2 + UP5(col1 )) / 3; |
789 | } else { |
790 | rgba[RCOMP] = (UP5(col0 >> 11) + 2 * UP5(col1 >> 11)) / 3; |
791 | rgba[GCOMP] = (UP6(col0 >> 5) + 2 * UP6(col1 >> 5)) / 3; |
792 | rgba[BCOMP] = (UP5(col0 ) + 2 * UP5(col1 )) / 3; |
793 | } |
794 | rgba[ACOMP] = 255; |
795 | } else { |
796 | if (code == 2) { |
797 | rgba[RCOMP] = (UP5(col0 >> 11) + UP5(col1 >> 11)) / 2; |
798 | rgba[GCOMP] = (UP6(col0 >> 5) + UP6(col1 >> 5)) / 2; |
799 | rgba[BCOMP] = (UP5(col0 ) + UP5(col1 )) / 2; |
800 | rgba[ACOMP] = 255; |
801 | } else { |
802 | ZERO_4UBV(rgba); |
803 | } |
804 | } |
805 | } |
806 | } |
807 | |
808 | |
809 | void TAPIENTRY |
810 | dxt3_rgba_decode_1 (const void *texture, int stride, |
811 | int i, int j, byte *rgba) |
812 | { |
813 | const byte *src = (const byte *)texture |
814 | + ((j / 4) * ((stride + 3) / 4) + i / 4) * 16; |
815 | const int code = (src[12 + (j & 3)] >> ((i & 3) * 2)) & 0x3; |
816 | const dword *cc = (const dword *)(src + 8); |
817 | if (code == 0) { |
818 | rgba[RCOMP] = UP5(CC_SEL(cc, 11)); |
819 | rgba[GCOMP] = UP6(CC_SEL(cc, 5)); |
820 | rgba[BCOMP] = UP5(CC_SEL(cc, 0)); |
821 | } else if (code == 1) { |
822 | rgba[RCOMP] = UP5(CC_SEL(cc, 27)); |
823 | rgba[GCOMP] = UP6(CC_SEL(cc, 21)); |
824 | rgba[BCOMP] = UP5(CC_SEL(cc, 16)); |
825 | } else if (code == 2) { |
826 | /* (col0 * (4 - code) + col1 * (code - 1)) / 3 */ |
827 | rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) * 2 + UP5(CC_SEL(cc, 27))) / 3; |
828 | rgba[GCOMP] = (UP6(CC_SEL(cc, 5)) * 2 + UP6(CC_SEL(cc, 21))) / 3; |
829 | rgba[BCOMP] = (UP5(CC_SEL(cc, 0)) * 2 + UP5(CC_SEL(cc, 16))) / 3; |
830 | } else { |
831 | rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) + 2 * UP5(CC_SEL(cc, 27))) / 3; |
832 | rgba[GCOMP] = (UP6(CC_SEL(cc, 5)) + 2 * UP6(CC_SEL(cc, 21))) / 3; |
833 | rgba[BCOMP] = (UP5(CC_SEL(cc, 0)) + 2 * UP5(CC_SEL(cc, 16))) / 3; |
834 | } |
835 | rgba[ACOMP] = UP4(src[((j & 3) * 4 + (i & 3)) / 2] >> ((i & 1) * 4)); |
836 | } |
837 | |
838 | |
839 | void TAPIENTRY |
840 | dxt5_rgba_decode_1 (const void *texture, int stride, |
841 | int i, int j, byte *rgba) |
842 | { |
843 | const byte *src = (const byte *)texture |
844 | + ((j / 4) * ((stride + 3) / 4) + i / 4) * 16; |
845 | const int code = (src[12 + (j & 3)] >> ((i & 3) * 2)) & 0x3; |
846 | const dword *cc = (const dword *)(src + 8); |
847 | const byte alpha0 = src[0]; |
848 | const byte alpha1 = src[1]; |
849 | const int alphaShift = (((j & 3) * 4) + (i & 3)) * 3 + 16; |
850 | const int acode = ((alphaShift == 31) |
851 | ? CC_SEL(src + 2, alphaShift - 16) |
852 | : CC_SEL(src, alphaShift)) & 0x7; |
853 | if (code == 0) { |
854 | rgba[RCOMP] = UP5(CC_SEL(cc, 11)); |
855 | rgba[GCOMP] = UP6(CC_SEL(cc, 5)); |
856 | rgba[BCOMP] = UP5(CC_SEL(cc, 0)); |
857 | } else if (code == 1) { |
858 | rgba[RCOMP] = UP5(CC_SEL(cc, 27)); |
859 | rgba[GCOMP] = UP6(CC_SEL(cc, 21)); |
860 | rgba[BCOMP] = UP5(CC_SEL(cc, 16)); |
861 | } else if (code == 2) { |
862 | /* (col0 * (4 - code) + col1 * (code - 1)) / 3 */ |
863 | rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) * 2 + UP5(CC_SEL(cc, 27))) / 3; |
864 | rgba[GCOMP] = (UP6(CC_SEL(cc, 5)) * 2 + UP6(CC_SEL(cc, 21))) / 3; |
865 | rgba[BCOMP] = (UP5(CC_SEL(cc, 0)) * 2 + UP5(CC_SEL(cc, 16))) / 3; |
866 | } else { |
867 | rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) + 2 * UP5(CC_SEL(cc, 27))) / 3; |
868 | rgba[GCOMP] = (UP6(CC_SEL(cc, 5)) + 2 * UP6(CC_SEL(cc, 21))) / 3; |
869 | rgba[BCOMP] = (UP5(CC_SEL(cc, 0)) + 2 * UP5(CC_SEL(cc, 16))) / 3; |
870 | } |
871 | if (acode == 0) { |
872 | rgba[ACOMP] = alpha0; |
873 | } else if (acode == 1) { |
874 | rgba[ACOMP] = alpha1; |
875 | } else if (alpha0 > alpha1) { |
876 | rgba[ACOMP] = ((8 - acode) * alpha0 + (acode - 1) * alpha1) / 7; |
877 | } else if (acode == 6) { |
878 | rgba[ACOMP] = 0; |
879 | } else if (acode == 7) { |
880 | rgba[ACOMP] = 255; |
881 | } else { |
882 | rgba[ACOMP] = ((6 - acode) * alpha0 + (acode - 1) * alpha1) / 5; |
883 | } |
884 | } |