Backport GPU Unai plugin from PCSX4ALL
[pcsx_rearmed.git] / plugins / gpu_unai / gpu_inner.h
1 /***************************************************************************
2 *   Copyright (C) 2010 PCSX4ALL Team                                      *
3 *   Copyright (C) 2010 Unai                                               *
4 *   Copyright (C) 2016 Senquack (dansilsby <AT> gmail <DOT> com)          *
5 *                                                                         *
6 *   This program is free software; you can redistribute it and/or modify  *
7 *   it under the terms of the GNU General Public License as published by  *
8 *   the Free Software Foundation; either version 2 of the License, or     *
9 *   (at your option) any later version.                                   *
10 *                                                                         *
11 *   This program is distributed in the hope that it will be useful,       *
12 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
13 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
14 *   GNU General Public License for more details.                          *
15 *                                                                         *
16 *   You should have received a copy of the GNU General Public License     *
17 *   along with this program; if not, write to the                         *
18 *   Free Software Foundation, Inc.,                                       *
19 *   51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA.           *
20 ***************************************************************************/
21
22 ///////////////////////////////////////////////////////////////////////////////
23 // Inner loop driver instantiation file
24
25 ///////////////////////////////////////////////////////////////////////////////
26 //  Option Masks (CF template paramter)
27 #define  CF_LIGHT     ((CF>> 0)&1) // Lighting
28 #define  CF_BLEND     ((CF>> 1)&1) // Blending
29 #define  CF_MASKCHECK ((CF>> 2)&1) // Mask bit check
30 #define  CF_BLENDMODE ((CF>> 3)&3) // Blend mode   0..3
31 #define  CF_TEXTMODE  ((CF>> 5)&3) // Texture mode 1..3 (0: texturing disabled)
32 #define  CF_GOURAUD   ((CF>> 7)&1) // Gouraud shading
33 #define  CF_MASKSET   ((CF>> 8)&1) // Mask bit set
34 #define  CF_DITHER    ((CF>> 9)&1) // Dithering
35 #define  CF_BLITMASK  ((CF>>10)&1) // blit_mask check (skip rendering pixels
36                                    //  that wouldn't end up displayed on
37                                    //  low-res screen using simple downscaler)
38
39 //#ifdef __arm__
40 //#ifndef ENABLE_GPU_ARMV7
41 /* ARMv5 */
42 //#include "gpu_inner_blend_arm5.h"
43 //#else
44 /* ARMv7 optimized */
45 //#include "gpu_inner_blend_arm7.h"
46 //#endif
47 //#else
48 //#include "gpu_inner_blend.h"
49 //#endif
50
51 // TODO: use the arm-optimized gpu_inner_blends for arm builds
52 #include "gpu_inner_blend.h"
53
54 #include "gpu_inner_quantization.h"
55 #include "gpu_inner_light.h"
56
57 // If defined, Gouraud colors are fixed-point 5.11, otherwise they are 8.16
58 // This is only for debugging/verification of low-precision colors in C.
59 // Low-precision Gouraud is intended for use by SIMD-optimized inner drivers
60 // which get/use Gouraud colors in SIMD registers.
61 //#define GPU_GOURAUD_LOW_PRECISION
62
63 // How many bits of fixed-point precision GouraudColor uses
64 #ifdef GPU_GOURAUD_LOW_PRECISION
65 #define GPU_GOURAUD_FIXED_BITS 11
66 #else
67 #define GPU_GOURAUD_FIXED_BITS 16
68 #endif
69
70 // Used to pass Gouraud colors to gpuPixelSpanFn() (lines)
71 struct GouraudColor {
72 #ifdef GPU_GOURAUD_LOW_PRECISION
73         u16 r, g, b;
74         s16 r_incr, g_incr, b_incr;
75 #else
76         u32 r, g, b;
77         s32 r_incr, g_incr, b_incr;
78 #endif
79 };
80
81 static inline u16 gpuGouraudColor15bpp(u32 r, u32 g, u32 b)
82 {
83         r >>= GPU_GOURAUD_FIXED_BITS;
84         g >>= GPU_GOURAUD_FIXED_BITS;
85         b >>= GPU_GOURAUD_FIXED_BITS;
86
87 #ifndef GPU_GOURAUD_LOW_PRECISION
88         // High-precision Gouraud colors are 8-bit + fractional
89         r >>= 3;  g >>= 3;  b >>= 3;
90 #endif
91
92         return r | (g << 5) | (b << 10);
93 }
94
95 ///////////////////////////////////////////////////////////////////////////////
96 //  GPU Pixel span operations generator gpuPixelSpanFn<>
97 //  Oct 2016: Created/adapted from old gpuPixelFn by senquack:
98 //  Original gpuPixelFn was used to draw lines one pixel at a time. I wrote
99 //  new line algorithms that draw lines using horizontal/vertical/diagonal
100 //  spans of pixels, necessitating new pixel-drawing function that could
101 //  not only render spans of pixels, but gouraud-shade them as well.
102 //  This speeds up line rendering and would allow tile-rendering (untextured
103 //  rectangles) to use the same set of functions. Since tiles are always
104 //  monochrome, they simply wouldn't use the extra set of 32 gouraud-shaded
105 //  gpuPixelSpanFn functions (TODO?).
106 //
107 // NOTE: While the PS1 framebuffer is 16 bit, we use 8-bit pointers here,
108 //       so that pDst can be incremented directly by 'incr' parameter
109 //       without having to shift it before use.
110 template<int CF>
111 static u8* gpuPixelSpanFn(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len)
112 {
113         // Blend func can save an operation if it knows uSrc MSB is
114         //  unset. For untextured prims, this is always true.
115         const bool skip_uSrc_mask = true;
116
117         u16 col;
118         struct GouraudColor * gcPtr;
119         u32 r, g, b;
120         s32 r_incr, g_incr, b_incr;
121
122         if (CF_GOURAUD) {
123                 gcPtr = (GouraudColor*)data;
124                 r = gcPtr->r;  r_incr = gcPtr->r_incr;
125                 g = gcPtr->g;  g_incr = gcPtr->g_incr;
126                 b = gcPtr->b;  b_incr = gcPtr->b_incr;
127         } else {
128                 col = (u16)data;
129         }
130
131         do {
132                 if (!CF_GOURAUD)
133                 {   // NO GOURAUD
134                         if (!CF_MASKCHECK && !CF_BLEND) {
135                                 if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; }
136                                 else            { *(u16*)pDst = col;          }
137                         } else if (CF_MASKCHECK && !CF_BLEND) {
138                                 if (!(*(u16*)pDst & 0x8000)) {
139                                         if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; }
140                                         else            { *(u16*)pDst = col;          }
141                                 }
142                         } else {
143                                 u16 uDst = *(u16*)pDst;
144                                 if (CF_MASKCHECK) { if (uDst & 0x8000) goto endpixel; }
145
146                                 u16 uSrc = col;
147
148                                 if (CF_BLEND)
149                                         uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
150
151                                 if (CF_MASKSET) { *(u16*)pDst = uSrc | 0x8000; }
152                                 else            { *(u16*)pDst = uSrc;          }
153                         }
154
155                 } else
156                 {   // GOURAUD
157
158                         if (!CF_MASKCHECK && !CF_BLEND) {
159                                 col = gpuGouraudColor15bpp(r, g, b);
160                                 if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; }
161                                 else            { *(u16*)pDst = col;          }
162                         } else if (CF_MASKCHECK && !CF_BLEND) {
163                                 col = gpuGouraudColor15bpp(r, g, b);
164                                 if (!(*(u16*)pDst & 0x8000)) {
165                                         if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; }
166                                         else            { *(u16*)pDst = col;          }
167                                 }
168                         } else {
169                                 u16 uDst = *(u16*)pDst;
170                                 if (CF_MASKCHECK) { if (uDst & 0x8000) goto endpixel; }
171                                 col = gpuGouraudColor15bpp(r, g, b);
172
173                                 u16 uSrc = col;
174
175                                 // Blend func can save an operation if it knows uSrc MSB is
176                                 //  unset. For untextured prims, this is always true.
177                                 const bool skip_uSrc_mask = true;
178
179                                 if (CF_BLEND)
180                                         uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
181
182                                 if (CF_MASKSET) { *(u16*)pDst = uSrc | 0x8000; }
183                                 else            { *(u16*)pDst = uSrc;          }
184                         }
185                 }
186
187 endpixel:
188                 if (CF_GOURAUD) {
189                         r += r_incr;
190                         g += g_incr;
191                         b += b_incr;
192                 }
193                 pDst += incr;
194         } while (len-- > 1);
195
196         // Note from senquack: Normally, I'd prefer to write a 'do {} while (--len)'
197         //  loop, or even a for() loop, however, on MIPS platforms anything but the
198         //  'do {} while (len-- > 1)' tends to generate very unoptimal asm, with
199         //  many unneeded MULs/ADDs/branches at the ends of these functions.
200         //  If you change the loop structure above, be sure to compare the quality
201         //  of the generated code!!
202
203         if (CF_GOURAUD) {
204                 gcPtr->r = r;
205                 gcPtr->g = g;
206                 gcPtr->b = b;
207         }
208         return pDst;
209 }
210
211 static u8* PixelSpanNULL(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len)
212 {
213         #ifdef ENABLE_GPU_LOG_SUPPORT
214                 fprintf(stdout,"PixelSpanNULL()\n");
215         #endif
216         return pDst;
217 }
218
219 ///////////////////////////////////////////////////////////////////////////////
220 //  PixelSpan (lines) innerloops driver
221 typedef u8* (*PSD)(u8* dst, uintptr_t data, ptrdiff_t incr, size_t len);
222
223 const PSD gpuPixelSpanDrivers[64] =
224
225         // Array index | 'CF' template field | Field value
226         // ------------+---------------------+----------------
227         // Bit 0       | CF_BLEND            | off (0), on (1)
228         // Bit 1       | CF_MASKCHECK        | off (0), on (1)
229         // Bit 3:2     | CF_BLENDMODE        | 0..3
230         // Bit 4       | CF_MASKSET          | off (0), on (1)
231         // Bit 5       | CF_GOURAUD          | off (0), on (1)
232         //
233         // NULL entries are ones for which blending is disabled and blend-mode
234         //  field is non-zero, which is obviously invalid.
235
236         // Flat-shaded
237         gpuPixelSpanFn<0x00<<1>,         gpuPixelSpanFn<0x01<<1>,         gpuPixelSpanFn<0x02<<1>,         gpuPixelSpanFn<0x03<<1>,
238         PixelSpanNULL,                   gpuPixelSpanFn<0x05<<1>,         PixelSpanNULL,                   gpuPixelSpanFn<0x07<<1>,
239         PixelSpanNULL,                   gpuPixelSpanFn<0x09<<1>,         PixelSpanNULL,                   gpuPixelSpanFn<0x0B<<1>,
240         PixelSpanNULL,                   gpuPixelSpanFn<0x0D<<1>,         PixelSpanNULL,                   gpuPixelSpanFn<0x0F<<1>,
241
242         // Flat-shaded + PixelMSB (CF_MASKSET)
243         gpuPixelSpanFn<(0x00<<1)|0x100>, gpuPixelSpanFn<(0x01<<1)|0x100>, gpuPixelSpanFn<(0x02<<1)|0x100>, gpuPixelSpanFn<(0x03<<1)|0x100>,
244         PixelSpanNULL,                   gpuPixelSpanFn<(0x05<<1)|0x100>, PixelSpanNULL,                   gpuPixelSpanFn<(0x07<<1)|0x100>,
245         PixelSpanNULL,                   gpuPixelSpanFn<(0x09<<1)|0x100>, PixelSpanNULL,                   gpuPixelSpanFn<(0x0B<<1)|0x100>,
246         PixelSpanNULL,                   gpuPixelSpanFn<(0x0D<<1)|0x100>, PixelSpanNULL,                   gpuPixelSpanFn<(0x0F<<1)|0x100>,
247
248         // Gouraud-shaded (CF_GOURAUD)
249         gpuPixelSpanFn<(0x00<<1)|0x80>,  gpuPixelSpanFn<(0x01<<1)|0x80>,  gpuPixelSpanFn<(0x02<<1)|0x80>,  gpuPixelSpanFn<(0x03<<1)|0x80>,
250         PixelSpanNULL,                   gpuPixelSpanFn<(0x05<<1)|0x80>,  PixelSpanNULL,                   gpuPixelSpanFn<(0x07<<1)|0x80>,
251         PixelSpanNULL,                   gpuPixelSpanFn<(0x09<<1)|0x80>,  PixelSpanNULL,                   gpuPixelSpanFn<(0x0B<<1)|0x80>,
252         PixelSpanNULL,                   gpuPixelSpanFn<(0x0D<<1)|0x80>,  PixelSpanNULL,                   gpuPixelSpanFn<(0x0F<<1)|0x80>,
253
254         // Gouraud-shaded (CF_GOURAUD) + PixelMSB (CF_MASKSET)
255         gpuPixelSpanFn<(0x00<<1)|0x180>, gpuPixelSpanFn<(0x01<<1)|0x180>, gpuPixelSpanFn<(0x02<<1)|0x180>, gpuPixelSpanFn<(0x03<<1)|0x180>,
256         PixelSpanNULL,                   gpuPixelSpanFn<(0x05<<1)|0x180>, PixelSpanNULL,                   gpuPixelSpanFn<(0x07<<1)|0x180>,
257         PixelSpanNULL,                   gpuPixelSpanFn<(0x09<<1)|0x180>, PixelSpanNULL,                   gpuPixelSpanFn<(0x0B<<1)|0x180>,
258         PixelSpanNULL,                   gpuPixelSpanFn<(0x0D<<1)|0x180>, PixelSpanNULL,                   gpuPixelSpanFn<(0x0F<<1)|0x180>
259 };
260
261 ///////////////////////////////////////////////////////////////////////////////
262 //  GPU Tiles innerloops generator
263
264 template<int CF>
265 static void gpuTileSpanFn(u16 *pDst, u32 count, u16 data)
266 {
267         if (!CF_MASKCHECK && !CF_BLEND) {
268                 if (CF_MASKSET) { data = data | 0x8000; }
269                 do { *pDst++ = data; } while (--count);
270         } else if (CF_MASKCHECK && !CF_BLEND) {
271                 if (CF_MASKSET) { data = data | 0x8000; }
272                 do { if (!(*pDst&0x8000)) { *pDst = data; } pDst++; } while (--count);
273         } else
274         {
275                 // Blend func can save an operation if it knows uSrc MSB is
276                 //  unset. For untextured prims, this is always true.
277                 const bool skip_uSrc_mask = true;
278
279                 u16 uSrc, uDst;
280                 do
281                 {
282                         if (CF_MASKCHECK || CF_BLEND) { uDst = *pDst; }
283                         if (CF_MASKCHECK) { if (uDst&0x8000) goto endtile; }
284
285                         uSrc = data;
286
287                         if (CF_BLEND)
288                                 uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
289
290                         if (CF_MASKSET) { *pDst = uSrc | 0x8000; }
291                         else            { *pDst = uSrc;          }
292
293                         //senquack - Did not apply "Silent Hill" mask-bit fix to here.
294                         // It is hard to tell from scarce documentation available and
295                         //  lack of comments in code, but I believe the tile-span
296                         //  functions here should not bother to preserve any source MSB,
297                         //  as they are not drawing from a texture.
298 endtile:
299                         pDst++;
300                 }
301                 while (--count);
302         }
303 }
304
305 static void TileNULL(u16 *pDst, u32 count, u16 data)
306 {
307         #ifdef ENABLE_GPU_LOG_SUPPORT
308                 fprintf(stdout,"TileNULL()\n");
309         #endif
310 }
311
312 ///////////////////////////////////////////////////////////////////////////////
313 //  Tiles innerloops driver
314 typedef void (*PT)(u16 *pDst, u32 count, u16 data);
315
316 // Template instantiation helper macros
317 #define TI(cf) gpuTileSpanFn<(cf)>
318 #define TN     TileNULL
319 #define TIBLOCK(ub) \
320         TI((ub)|0x00), TI((ub)|0x02), TI((ub)|0x04), TI((ub)|0x06), \
321         TN,            TI((ub)|0x0a), TN,            TI((ub)|0x0e), \
322         TN,            TI((ub)|0x12), TN,            TI((ub)|0x16), \
323         TN,            TI((ub)|0x1a), TN,            TI((ub)|0x1e)
324
325 const PT gpuTileSpanDrivers[32] = {
326         TIBLOCK(0<<8), TIBLOCK(1<<8)
327 };
328
329 #undef TI
330 #undef TN
331 #undef TIBLOCK
332
333
334 ///////////////////////////////////////////////////////////////////////////////
335 //  GPU Sprites innerloops generator
336
337 template<int CF>
338 static void gpuSpriteSpanFn(u16 *pDst, u32 count, u8* pTxt, u32 u0)
339 {
340         // Blend func can save an operation if it knows uSrc MSB is unset.
341         //  Untextured prims can always skip (source color always comes with MSB=0).
342         //  For textured prims, lighting funcs always return it unset. (bonus!)
343         const bool skip_uSrc_mask = (!CF_TEXTMODE) || CF_LIGHT;
344
345         u16 uSrc, uDst, srcMSB;
346         u32 u0_mask = gpu_unai.TextureWindow[2];
347
348         u8 r5, g5, b5;
349         if (CF_LIGHT) {
350                 r5 = gpu_unai.r5;
351                 g5 = gpu_unai.g5;
352                 b5 = gpu_unai.b5;
353         }
354
355         if (CF_TEXTMODE==3) {
356                 // Texture is accessed byte-wise, so adjust mask if 16bpp
357                 u0_mask <<= 1;
358         }
359
360         const u16 *CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.CBA;
361
362         do
363         {
364                 if (CF_MASKCHECK || CF_BLEND) { uDst = *pDst; }
365                 if (CF_MASKCHECK) if (uDst&0x8000) { goto endsprite; }
366
367                 if (CF_TEXTMODE==1) {  //  4bpp (CLUT)
368                         u8 rgb = pTxt[(u0 & u0_mask)>>1];
369                         uSrc = CBA_[(rgb>>((u0&1)<<2))&0xf];
370                 }
371                 if (CF_TEXTMODE==2) {  //  8bpp (CLUT)
372                         uSrc = CBA_[pTxt[u0 & u0_mask]];
373                 }
374                 if (CF_TEXTMODE==3) {  // 16bpp
375                         uSrc = *(u16*)(&pTxt[u0 & u0_mask]);
376                 }
377
378                 if (!uSrc) goto endsprite;
379
380                 //senquack - save source MSB, as blending or lighting macros will not
381                 //           (Silent Hill gray rectangles mask bit bug)
382                 if (CF_BLEND || CF_LIGHT) srcMSB = uSrc & 0x8000;
383                 
384                 if (CF_LIGHT)
385                         uSrc = gpuLightingTXT(uSrc, r5, g5, b5);
386
387                 if (CF_BLEND && srcMSB)
388                         uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
389
390                 if (CF_MASKSET)                { *pDst = uSrc | 0x8000; }
391                 else if (CF_BLEND || CF_LIGHT) { *pDst = uSrc | srcMSB; }
392                 else                           { *pDst = uSrc;          }
393
394 endsprite:
395                 u0 += (CF_TEXTMODE==3) ? 2 : 1;
396                 pDst++;
397         }
398         while (--count);
399 }
400
401 static void SpriteNULL(u16 *pDst, u32 count, u8* pTxt, u32 u0)
402 {
403         #ifdef ENABLE_GPU_LOG_SUPPORT
404                 fprintf(stdout,"SpriteNULL()\n");
405         #endif
406 }
407
408 ///////////////////////////////////////////////////////////////////////////////
409
410 ///////////////////////////////////////////////////////////////////////////////
411 //  Sprite innerloops driver
412 typedef void (*PS)(u16 *pDst, u32 count, u8* pTxt, u32 u0);
413
414 // Template instantiation helper macros
415 #define TI(cf) gpuSpriteSpanFn<(cf)>
416 #define TN     SpriteNULL
417 #define TIBLOCK(ub) \
418         TN,            TN,            TN,            TN,            TN,            TN,            TN,            TN,            \
419         TN,            TN,            TN,            TN,            TN,            TN,            TN,            TN,            \
420         TN,            TN,            TN,            TN,            TN,            TN,            TN,            TN,            \
421         TN,            TN,            TN,            TN,            TN,            TN,            TN,            TN,            \
422         TI((ub)|0x20), TI((ub)|0x21), TI((ub)|0x22), TI((ub)|0x23), TI((ub)|0x24), TI((ub)|0x25), TI((ub)|0x26), TI((ub)|0x27), \
423         TN,            TN,            TI((ub)|0x2a), TI((ub)|0x2b), TN,            TN,            TI((ub)|0x2e), TI((ub)|0x2f), \
424         TN,            TN,            TI((ub)|0x32), TI((ub)|0x33), TN,            TN,            TI((ub)|0x36), TI((ub)|0x37), \
425         TN,            TN,            TI((ub)|0x3a), TI((ub)|0x3b), TN,            TN,            TI((ub)|0x3e), TI((ub)|0x3f), \
426         TI((ub)|0x40), TI((ub)|0x41), TI((ub)|0x42), TI((ub)|0x43), TI((ub)|0x44), TI((ub)|0x45), TI((ub)|0x46), TI((ub)|0x47), \
427         TN,            TN,            TI((ub)|0x4a), TI((ub)|0x4b), TN,            TN,            TI((ub)|0x4e), TI((ub)|0x4f), \
428         TN,            TN,            TI((ub)|0x52), TI((ub)|0x53), TN,            TN,            TI((ub)|0x56), TI((ub)|0x57), \
429         TN,            TN,            TI((ub)|0x5a), TI((ub)|0x5b), TN,            TN,            TI((ub)|0x5e), TI((ub)|0x5f), \
430         TI((ub)|0x60), TI((ub)|0x61), TI((ub)|0x62), TI((ub)|0x63), TI((ub)|0x64), TI((ub)|0x65), TI((ub)|0x66), TI((ub)|0x67), \
431         TN,            TN,            TI((ub)|0x6a), TI((ub)|0x6b), TN,            TN,            TI((ub)|0x6e), TI((ub)|0x6f), \
432         TN,            TN,            TI((ub)|0x72), TI((ub)|0x73), TN,            TN,            TI((ub)|0x76), TI((ub)|0x77), \
433         TN,            TN,            TI((ub)|0x7a), TI((ub)|0x7b), TN,            TN,            TI((ub)|0x7e), TI((ub)|0x7f)
434
435 const PS gpuSpriteSpanDrivers[256] = {
436         TIBLOCK(0<<8), TIBLOCK(1<<8)
437 };
438
439 #undef TI
440 #undef TN
441 #undef TIBLOCK
442
443 ///////////////////////////////////////////////////////////////////////////////
444 //  GPU Polygon innerloops generator
445
446 //senquack - Newer version with following changes:
447 //           * Adapted to work with new poly routings in gpu_raster_polygon.h
448 //             adapted from DrHell GPU. They are less glitchy and use 22.10
449 //             fixed-point instead of original UNAI's 16.16.
450 //           * Texture coordinates are no longer packed together into one
451 //             unsigned int. This seems to lose too much accuracy (they each
452 //             end up being only 8.7 fixed-point that way) and pixel-droupouts
453 //             were noticeable both with original code and current DrHell
454 //             adaptations. An example would be the sky in NFS3. Now, they are
455 //             stored in separate ints, using separate masks.
456 //           * Function is no longer INLINE, as it was always called
457 //             through a function pointer.
458 //           * Function now ensures the mask bit of source texture is preserved
459 //             across calls to blending functions (Silent Hill rectangles fix)
460 //           * November 2016: Large refactoring of blending/lighting when
461 //             JohnnyF added dithering. See gpu_inner_quantization.h and
462 //             relevant blend/light headers.
463 // (see README_senquack.txt)
464 template<int CF>
465 static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, u16 *pDst, u32 count)
466 {
467         // Blend func can save an operation if it knows uSrc MSB is unset.
468         //  Untextured prims can always skip this (src color MSB is always 0).
469         //  For textured prims, lighting funcs always return it unset. (bonus!)
470         const bool skip_uSrc_mask = (!CF_TEXTMODE) || CF_LIGHT;
471
472         u32 bMsk; if (CF_BLITMASK) bMsk = gpu_unai.blit_mask;
473
474         if (!CF_TEXTMODE)
475         {
476                 if (!CF_GOURAUD)
477                 {
478                         // UNTEXTURED, NO GOURAUD
479                         const u16 pix15 = gpu_unai.PixelData;
480                         do {
481                                 u16 uSrc, uDst;
482
483                                 // NOTE: Don't enable CF_BLITMASK  pixel skipping (speed hack)
484                                 //  on untextured polys. It seems to do more harm than good: see
485                                 //  gravestone text at end of Medieval intro sequence. -senquack
486                                 //if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) { goto endpolynotextnogou; } }
487
488                                 if (CF_BLEND || CF_MASKCHECK) uDst = *pDst;
489                                 if (CF_MASKCHECK) { if (uDst&0x8000) { goto endpolynotextnogou; } }
490
491                                 uSrc = pix15;
492
493                                 if (CF_BLEND)
494                                         uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
495
496                                 if (CF_MASKSET) { *pDst = uSrc | 0x8000; }
497                                 else            { *pDst = uSrc;          }
498
499 endpolynotextnogou:
500                                 pDst++;
501                         } while(--count);
502                 }
503                 else
504                 {
505                         // UNTEXTURED, GOURAUD
506                         u32 l_gCol = gpu_unai.gCol;
507                         u32 l_gInc = gpu_unai.gInc;
508
509                         do {
510                                 u16 uDst, uSrc;
511
512                                 // See note in above loop regarding CF_BLITMASK
513                                 //if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) goto endpolynotextgou; }
514
515                                 if (CF_BLEND || CF_MASKCHECK) uDst = *pDst;
516                                 if (CF_MASKCHECK) { if (uDst&0x8000) goto endpolynotextgou; }
517
518                                 if (CF_DITHER) {
519                                         // GOURAUD, DITHER
520
521                                         u32 uSrc24 = gpuLightingRGB24(l_gCol);
522                                         if (CF_BLEND)
523                                                 uSrc24 = gpuBlending24<CF_BLENDMODE>(uSrc24, uDst);
524                                         uSrc = gpuColorQuantization24<CF_DITHER>(uSrc24, pDst);
525                                 } else {
526                                         // GOURAUD, NO DITHER
527
528                                         uSrc = gpuLightingRGB(l_gCol);
529
530                                         if (CF_BLEND)
531                                                 uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
532                                 }
533
534                                 if (CF_MASKSET) { *pDst = uSrc | 0x8000; }
535                                 else            { *pDst = uSrc;          }
536
537 endpolynotextgou:
538                                 pDst++;
539                                 l_gCol += l_gInc;
540                         }
541                         while (--count);
542                 }
543         }
544         else
545         {
546                 // TEXTURED
547
548                 u16 uDst, uSrc, srcMSB;
549
550                 //senquack - note: original UNAI code had gpu_unai.{u4/v4} packed into
551                 // one 32-bit unsigned int, but this proved to lose too much accuracy
552                 // (pixel drouputs noticeable in NFS3 sky), so now are separate vars.
553                 u32 l_u_msk = gpu_unai.u_msk;     u32 l_v_msk = gpu_unai.v_msk;
554                 u32 l_u = gpu_unai.u & l_u_msk;   u32 l_v = gpu_unai.v & l_v_msk;
555                 s32 l_u_inc = gpu_unai.u_inc;     s32 l_v_inc = gpu_unai.v_inc;
556
557                 const u16* TBA_ = gpu_unai.TBA;
558                 const u16* CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.CBA;
559
560                 u8 r5, g5, b5;
561                 u8 r8, g8, b8;
562
563                 u32 l_gInc, l_gCol;
564
565                 if (CF_LIGHT) {
566                         if (CF_GOURAUD) {
567                                 l_gInc = gpu_unai.gInc;
568                                 l_gCol = gpu_unai.gCol;
569                         } else {
570                                 if (CF_DITHER) {
571                                         r8 = gpu_unai.r8;
572                                         g8 = gpu_unai.g8;
573                                         b8 = gpu_unai.b8;
574                                 } else {
575                                         r5 = gpu_unai.r5;
576                                         g5 = gpu_unai.g5;
577                                         b5 = gpu_unai.b5;
578                                 }
579                         }
580                 }
581
582                 do
583                 {
584                         if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) goto endpolytext; }
585                         if (CF_MASKCHECK || CF_BLEND) { uDst = *pDst; }
586                         if (CF_MASKCHECK) if (uDst&0x8000) { goto endpolytext; }
587
588                         //senquack - adapted to work with new 22.10 fixed point routines:
589                         //           (UNAI originally used 16.16)
590                         if (CF_TEXTMODE==1) {  //  4bpp (CLUT)
591                                 u32 tu=(l_u>>10);
592                                 u32 tv=(l_v<<1)&(0xff<<11);
593                                 u8 rgb=((u8*)TBA_)[tv+(tu>>1)];
594                                 uSrc=CBA_[(rgb>>((tu&1)<<2))&0xf];
595                                 if (!uSrc) goto endpolytext;
596                         }
597                         if (CF_TEXTMODE==2) {  //  8bpp (CLUT)
598                                 uSrc = CBA_[(((u8*)TBA_)[(l_u>>10)+((l_v<<1)&(0xff<<11))])];
599                                 if (!uSrc) goto endpolytext;
600                         }
601                         if (CF_TEXTMODE==3) {  // 16bpp
602                                 uSrc = TBA_[(l_u>>10)+((l_v)&(0xff<<10))];
603                                 if (!uSrc) goto endpolytext;
604                         }
605
606                         // Save source MSB, as blending or lighting will not (Silent Hill)
607                         if (CF_BLEND || CF_LIGHT) srcMSB = uSrc & 0x8000;
608
609                         // When textured, only dither when LIGHT (texture blend) is enabled
610                         // LIGHT &&  BLEND => dither
611                         // LIGHT && !BLEND => dither
612                         //!LIGHT &&  BLEND => no dither
613                         //!LIGHT && !BLEND => no dither
614
615                         if (CF_DITHER && CF_LIGHT) {
616                                 u32 uSrc24;
617                                 if ( CF_GOURAUD)
618                                         uSrc24 = gpuLightingTXT24Gouraud(uSrc, l_gCol);
619                                 if (!CF_GOURAUD)
620                                         uSrc24 = gpuLightingTXT24(uSrc, r8, g8, b8);
621
622                                 if (CF_BLEND && srcMSB)
623                                         uSrc24 = gpuBlending24<CF_BLENDMODE>(uSrc24, uDst);
624
625                                 uSrc = gpuColorQuantization24<CF_DITHER>(uSrc24, pDst);
626                         } else
627                         {
628                                 if (CF_LIGHT) {
629                                         if ( CF_GOURAUD)
630                                                 uSrc = gpuLightingTXTGouraud(uSrc, l_gCol);
631                                         if (!CF_GOURAUD)
632                                                 uSrc = gpuLightingTXT(uSrc, r5, g5, b5);
633                                 }
634
635                                 if (CF_BLEND && srcMSB)
636                                         uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
637                         }
638
639                         if (CF_MASKSET)                { *pDst = uSrc | 0x8000; }
640                         else if (CF_BLEND || CF_LIGHT) { *pDst = uSrc | srcMSB; }
641                         else                           { *pDst = uSrc;          }
642 endpolytext:
643                         pDst++;
644                         l_u = (l_u + l_u_inc) & l_u_msk;
645                         l_v = (l_v + l_v_inc) & l_v_msk;
646                         if (CF_LIGHT && CF_GOURAUD) l_gCol += l_gInc;
647                 }
648                 while (--count);
649         }
650 }
651
652 static void PolyNULL(const gpu_unai_t &gpu_unai, u16 *pDst, u32 count)
653 {
654         #ifdef ENABLE_GPU_LOG_SUPPORT
655                 fprintf(stdout,"PolyNULL()\n");
656         #endif
657 }
658
659 ///////////////////////////////////////////////////////////////////////////////
660 //  Polygon innerloops driver
661 typedef void (*PP)(const gpu_unai_t &gpu_unai, u16 *pDst, u32 count);
662
663 // Template instantiation helper macros
664 #define TI(cf) gpuPolySpanFn<(cf)>
665 #define TN     PolyNULL
666 #define TIBLOCK(ub) \
667         TI((ub)|0x00), TI((ub)|0x01), TI((ub)|0x02), TI((ub)|0x03), TI((ub)|0x04), TI((ub)|0x05), TI((ub)|0x06), TI((ub)|0x07), \
668         TN,            TN,            TI((ub)|0x0a), TI((ub)|0x0b), TN,            TN,            TI((ub)|0x0e), TI((ub)|0x0f), \
669         TN,            TN,            TI((ub)|0x12), TI((ub)|0x13), TN,            TN,            TI((ub)|0x16), TI((ub)|0x17), \
670         TN,            TN,            TI((ub)|0x1a), TI((ub)|0x1b), TN,            TN,            TI((ub)|0x1e), TI((ub)|0x1f), \
671         TI((ub)|0x20), TI((ub)|0x21), TI((ub)|0x22), TI((ub)|0x23), TI((ub)|0x24), TI((ub)|0x25), TI((ub)|0x26), TI((ub)|0x27), \
672         TN,            TN,            TI((ub)|0x2a), TI((ub)|0x2b), TN,            TN,            TI((ub)|0x2e), TI((ub)|0x2f), \
673         TN,            TN,            TI((ub)|0x32), TI((ub)|0x33), TN,            TN,            TI((ub)|0x36), TI((ub)|0x37), \
674         TN,            TN,            TI((ub)|0x3a), TI((ub)|0x3b), TN,            TN,            TI((ub)|0x3e), TI((ub)|0x3f), \
675         TI((ub)|0x40), TI((ub)|0x41), TI((ub)|0x42), TI((ub)|0x43), TI((ub)|0x44), TI((ub)|0x45), TI((ub)|0x46), TI((ub)|0x47), \
676         TN,            TN,            TI((ub)|0x4a), TI((ub)|0x4b), TN,            TN,            TI((ub)|0x4e), TI((ub)|0x4f), \
677         TN,            TN,            TI((ub)|0x52), TI((ub)|0x53), TN,            TN,            TI((ub)|0x56), TI((ub)|0x57), \
678         TN,            TN,            TI((ub)|0x5a), TI((ub)|0x5b), TN,            TN,            TI((ub)|0x5e), TI((ub)|0x5f), \
679         TI((ub)|0x60), TI((ub)|0x61), TI((ub)|0x62), TI((ub)|0x63), TI((ub)|0x64), TI((ub)|0x65), TI((ub)|0x66), TI((ub)|0x67), \
680         TN,            TN,            TI((ub)|0x6a), TI((ub)|0x6b), TN,            TN,            TI((ub)|0x6e), TI((ub)|0x6f), \
681         TN,            TN,            TI((ub)|0x72), TI((ub)|0x73), TN,            TN,            TI((ub)|0x76), TI((ub)|0x77), \
682         TN,            TN,            TI((ub)|0x7a), TI((ub)|0x7b), TN,            TN,            TI((ub)|0x7e), TI((ub)|0x7f), \
683         TN,            TI((ub)|0x81), TN,            TI((ub)|0x83), TN,            TI((ub)|0x85), TN,            TI((ub)|0x87), \
684         TN,            TN,            TN,            TI((ub)|0x8b), TN,            TN,            TN,            TI((ub)|0x8f), \
685         TN,            TN,            TN,            TI((ub)|0x93), TN,            TN,            TN,            TI((ub)|0x97), \
686         TN,            TN,            TN,            TI((ub)|0x9b), TN,            TN,            TN,            TI((ub)|0x9f), \
687         TN,            TI((ub)|0xa1), TN,            TI((ub)|0xa3), TN,            TI((ub)|0xa5), TN,            TI((ub)|0xa7), \
688         TN,            TN,            TN,            TI((ub)|0xab), TN,            TN,            TN,            TI((ub)|0xaf), \
689         TN,            TN,            TN,            TI((ub)|0xb3), TN,            TN,            TN,            TI((ub)|0xb7), \
690         TN,            TN,            TN,            TI((ub)|0xbb), TN,            TN,            TN,            TI((ub)|0xbf), \
691         TN,            TI((ub)|0xc1), TN,            TI((ub)|0xc3), TN,            TI((ub)|0xc5), TN,            TI((ub)|0xc7), \
692         TN,            TN,            TN,            TI((ub)|0xcb), TN,            TN,            TN,            TI((ub)|0xcf), \
693         TN,            TN,            TN,            TI((ub)|0xd3), TN,            TN,            TN,            TI((ub)|0xd7), \
694         TN,            TN,            TN,            TI((ub)|0xdb), TN,            TN,            TN,            TI((ub)|0xdf), \
695         TN,            TI((ub)|0xe1), TN,            TI((ub)|0xe3), TN,            TI((ub)|0xe5), TN,            TI((ub)|0xe7), \
696         TN,            TN,            TN,            TI((ub)|0xeb), TN,            TN,            TN,            TI((ub)|0xef), \
697         TN,            TN,            TN,            TI((ub)|0xf3), TN,            TN,            TN,            TI((ub)|0xf7), \
698         TN,            TN,            TN,            TI((ub)|0xfb), TN,            TN,            TN,            TI((ub)|0xff)
699
700 const PP gpuPolySpanDrivers[2048] = {
701         TIBLOCK(0<<8), TIBLOCK(1<<8), TIBLOCK(2<<8), TIBLOCK(3<<8),
702         TIBLOCK(4<<8), TIBLOCK(5<<8), TIBLOCK(6<<8), TIBLOCK(7<<8)
703 };
704
705 #undef TI
706 #undef TN
707 #undef TIBLOCK