plugins/gpu_unai/gpu_unai.h

   1 /***************************************************************************
   2 *   Copyright (C) 2010 PCSX4ALL Team                                      *
   3 *   Copyright (C) 2010 Unai                                               *
   4 *   Copyright (C) 2016 Senquack (dansilsby <AT> gmail <DOT> com)          *
   5 *                                                                         *
   6 *   This program is free software; you can redistribute it and/or modify  *
   7 *   it under the terms of the GNU General Public License as published by  *
   8 *   the Free Software Foundation; either version 2 of the License, or     *
   9 *   (at your option) any later version.                                   *
  10 *                                                                         *
  11 *   This program is distributed in the hope that it will be useful,       *
  12 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
  13 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
  14 *   GNU General Public License for more details.                          *
  15 *                                                                         *
  16 *   You should have received a copy of the GNU General Public License     *
  17 *   along with this program; if not, write to the                         *
  18 *   Free Software Foundation, Inc.,                                       *
  19 *   51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA.           *
  20 ***************************************************************************/
  21
  22 #ifndef GPU_UNAI_H
  23 #define GPU_UNAI_H
  24
  25 #include <stdint.h>
  26 #include "gpu.h"
  27
  28 // Header shared between both standalone gpu_unai (gpu.cpp) and new
  29 // gpulib-compatible gpu_unai (gpulib_if.cpp)
  30 // -> Anything here should be for gpu_unai's private use. <-
  31
  32 ///////////////////////////////////////////////////////////////////////////////
  33 //  Compile Options
  34
  35 //#define ENABLE_GPU_NULL_SUPPORT   // Enables NullGPU support
  36 //#define ENABLE_GPU_LOG_SUPPORT    // Enables gpu logger, very slow only for windows debugging
  37 //#define ENABLE_GPU_ARMV7                      // Enables ARMv7 optimized assembly
  38
  39 //Poly routine options (default is integer math and accurate division)
  40 //#define GPU_UNAI_USE_FLOATMATH         // Use float math in poly routines
  41 //#define GPU_UNAI_USE_FLOAT_DIV_MULTINV // If GPU_UNAI_USE_FLOATMATH is defined,
  42                                          //  use multiply-by-inverse for division
  43 //#define GPU_UNAI_USE_INT_DIV_MULTINV   // If GPU_UNAI_USE_FLOATMATH is *not*
  44                                          //  defined, use old inaccurate division
  45
  46
  47 #define GPU_INLINE static inline __attribute__((always_inline))
  48 #define INLINE     static inline __attribute__((always_inline))
  49
  50 #define u8  uint8_t
  51 #define s8  int8_t
  52 #define u16 uint16_t
  53 #define s16 int16_t
  54 #define u32 uint32_t
  55 #define s32 int32_t
  56 #define s64 int64_t
  57 #define u64 uint64_t
  58
  59 typedef union {
  60         struct {
  61                 u16 r, g, b;
  62         } c;
  63         u64 raw;
  64 } gcol_t;
  65
  66 #ifndef NDEBUG
  67
  68 typedef struct {
  69         u32 v;
  70 } le32_t;
  71
  72 typedef struct {
  73         u16 v;
  74 } le16_t;
  75
  76 #define LExRead(v_) (v_.v)
  77
  78 #else
  79
  80 typedef u32 le32_t;
  81 typedef u16 le16_t;
  82 #define LExRead(v) (v)
  83
  84 #endif
  85
  86 static inline u32 le32_to_u32(le32_t le)
  87 {
  88         return LE32TOH(LExRead(le));
  89 }
  90
  91 static inline s32 le32_to_s32(le32_t le)
  92 {
  93         return (int32_t) LE32TOH(LExRead(le));
  94 }
  95
  96 static inline u32 le32_raw(le32_t le)
  97 {
  98         return LExRead(le);
  99 }
 100
 101 static inline le32_t u32_to_le32(u32 u)
 102 {
 103         return (le32_t){ HTOLE32(u) };
 104 }
 105
 106 static inline u16 le16_to_u16(le16_t le)
 107 {
 108         return LE16TOH(LExRead(le));
 109 }
 110
 111 static inline s16 le16_to_s16(le16_t le)
 112 {
 113         return (int16_t) LE16TOH(LExRead(le));
 114 }
 115
 116 static inline u16 le16_raw(le16_t le)
 117 {
 118         return LExRead(le);
 119 }
 120
 121 static inline le16_t u16_to_le16(u16 u)
 122 {
 123         return (le16_t){ HTOLE16(u) };
 124 }
 125
 126 union PtrUnion
 127 {
 128         le32_t  *U4;
 129         le16_t  *U2;
 130         u8   *U1;
 131         void *ptr;
 132 };
 133
 134 union GPUPacket
 135 {
 136         le32_t U4[16];
 137         le16_t U2[32];
 138         u8  U1[64];
 139 };
 140
 141 template<class T> static inline void SwapValues(T &x, T &y)
 142 {
 143         T tmp(x);  x = y;  y = tmp;
 144 }
 145
 146 template<typename T>
 147 static inline T Min2 (const T a, const T b)
 148 {
 149         return (a<b)?a:b;
 150 }
 151
 152 template<typename T>
 153 static inline T Min3 (const T a, const T b, const T c)
 154 {
 155         return  Min2(Min2(a,b),c);
 156 }
 157
 158 template<typename T>
 159 static inline T Max2 (const T a, const T b)
 160 {
 161         return  (a>b)?a:b;
 162 }
 163
 164 template<typename T>
 165 static inline T Max3 (const T a, const T b, const T c)
 166 {
 167         return  Max2(Max2(a,b),c);
 168 }
 169
 170
 171 ///////////////////////////////////////////////////////////////////////////////
 172 //  GPU Raster Macros
 173
 174 // Convert 24bpp color parameter of GPU command to 16bpp (15bpp + mask bit)
 175 #define GPU_RGB16(rgb) ((((rgb)&0xF80000)>>9)|(((rgb)&0xF800)>>6)|(((rgb)&0xF8)>>3))
 176
 177 // Sign-extend 11-bit coordinate command param
 178 #define GPU_EXPANDSIGN(x) (((s32)(x)<<(32-11))>>(32-11))
 179
 180 // Max difference between any two X or Y primitive coordinates
 181 #define CHKMAX_X 1024
 182 #define CHKMAX_Y 512
 183
 184 #define FRAME_BUFFER_SIZE       (1024*512*2)
 185 #define FRAME_WIDTH                       1024
 186 #define FRAME_HEIGHT              512
 187 #define FRAME_OFFSET(x,y)       (((y)<<10)+(x))
 188 #define FRAME_BYTE_STRIDE     2048
 189 #define FRAME_BYTES_PER_PIXEL 2
 190
 191 static inline s32 GPU_DIV(s32 rs, s32 rt)
 192 {
 193         return rt ? (rs / rt) : (0);
 194 }
 195
 196 // 'Unsafe' version of above that doesn't check for div-by-zero
 197 #define GPU_FAST_DIV(rs, rt) ((signed)(rs) / (signed)(rt))
 198
 199 // warning: gpu_arm.S asm uses this struct, update the asm if you change this
 200 struct gpu_unai_inner_t {
 201         le16_t* TBA;              // 00 Ptr to current texture in VRAM
 202         le16_t* CBA;              // 04 Ptr to current CLUT in VRAM
 203
 204         // 22.10 Fixed-pt texture coords, mask, scanline advance
 205         // NOTE: U,V are no longer packed together into one u32, this proved to be
 206         //  too imprecise, leading to pixel dropouts.  Example: NFS3's skybox.
 207         u32 u, v;                 // 08 not fractional for sprites
 208         u32 u_msk, v_msk;         // 10 always 22.10
 209         union {
 210           struct {
 211             s32 u_inc, v_inc;     // 18 poly uv increment, 22.10
 212           };
 213           struct {
 214             s32 y0, y1;           // 18 sprite y range
 215           };
 216         };
 217
 218         // Color for flat-shaded, texture-blended prims
 219         u8  r5, g5, b5, pad5;     // 20 5-bit light for undithered prims
 220         u8  r8, g8, b8, pad8;     // 24 8-bit light for dithered prims
 221
 222         // Color for Gouraud-shaded prims
 223         // Fixed-pt 8.8 rgb triplet
 224         // Packed fixed-pt 8.3:8.3:8.2 rgb triplet
 225         //  layout:  ccccccccXXXXXXXX for c in [r, g, b]
 226         //           ^ bit 16
 227         gcol_t gCol;       // 28
 228         gcol_t gInc;       // 30 Increment along scanline for gCol
 229
 230         // Color for flat-shaded, untextured prims
 231         u16 PixelData;     // 38 bgr555 color for untextured flat-shaded polys
 232
 233         u8 blit_mask;           // Determines what pixels to skip when rendering.
 234                                 //  Only useful on low-resolution devices using
 235                                 //  a simple pixel-dropping downscaler for PS1
 236                                 //  high-res modes. See 'pixel_skip' option.
 237
 238         u8 ilace_mask;          // Determines what lines to skip when rendering.
 239                                 //  Normally 0 when PS1 240 vertical res is in
 240                                 //  use and ilace_force is 0. When running in
 241                                 //  PS1 480 vertical res on a low-resolution
 242                                 //  device (320x240), will usually be set to 1
 243                                 //  so odd lines are not rendered. (Unless future
 244                                 //  full-screen scaling option is in use ..TODO)
 245 };
 246
 247 struct gpu_unai_t {
 248         u32 GPU_GP1;
 249         GPUPacket PacketBuffer;
 250         le16_t *vram;
 251
 252 #ifdef USE_GPULIB
 253         le16_t *downscale_vram;
 254 #endif
 255         ////////////////////////////////////////////////////////////////////////////
 256         // Variables used only by older standalone version of gpu_unai (gpu.cpp)
 257 #ifndef USE_GPULIB
 258         u32  GPU_GP0;
 259         u32  tex_window;       // Current texture window vals (set by GP0(E2h) cmd)
 260         s32  PacketCount;
 261         s32  PacketIndex;
 262         bool fb_dirty;         // Framebuffer is dirty (according to GPU)
 263
 264         //  Display status
 265         //  NOTE: Standalone older gpu_unai didn't care about horiz display range
 266         u16  DisplayArea[6];   // [0] : Start of display area (in VRAM) X
 267                                // [1] : Start of display area (in VRAM) Y
 268                                // [2] : Display mode resolution HORIZONTAL
 269                                // [3] : Display mode resolution VERTICAL
 270                                // [4] : Vertical display range (on TV) START
 271                                // [5] : Vertical display range (on TV) END
 272
 273         ////////////////////////////////////////////////////////////////////////////
 274         //  Dma Transfers info
 275         struct {
 276                 s32  px,py;
 277                 s32  x_end,y_end;
 278                 le16_t* pvram;
 279                 u32 *last_dma;     // Last dma pointer
 280                 bool FrameToRead;  // Load image in progress
 281                 bool FrameToWrite; // Store image in progress
 282         } dma;
 283
 284         ////////////////////////////////////////////////////////////////////////////
 285         //  Frameskip
 286         struct {
 287                 int  skipCount;    // Frame skip (0,1,2,3...)
 288                 bool isSkip;       // Skip frame (according to GPU)
 289                 bool skipFrame;    // Skip this frame (according to frame skip)
 290                 bool wasSkip;      // Skip frame old value (according to GPU)
 291                 bool skipGPU;      // Skip GPU primitives
 292         } frameskip;
 293 #endif
 294         // END of standalone gpu_unai variables
 295         ////////////////////////////////////////////////////////////////////////////
 296
 297         u32 TextureWindowCur;  // Current setting from last GP0(0xE2) cmd (raw form)
 298         u8  TextureWindow[4];  // [0] : Texture window offset X
 299                                // [1] : Texture window offset Y
 300                                // [2] : Texture window mask X
 301                                // [3] : Texture window mask Y
 302
 303         u16 DrawingArea[4];    // [0] : Drawing area top left X
 304                                // [1] : Drawing area top left Y
 305                                // [2] : Drawing area bottom right X
 306                                // [3] : Drawing area bottom right Y
 307
 308         s16 DrawingOffset[2];  // [0] : Drawing offset X (signed)
 309                                // [1] : Drawing offset Y (signed)
 310
 311         ////////////////////////////////////////////////////////////////////////////
 312         //  Inner Loop parameters
 313
 314         __attribute__((aligned(32)))
 315         gpu_unai_inner_t inn;
 316
 317         // End of inner Loop parameters
 318         ////////////////////////////////////////////////////////////////////////////
 319
 320         bool prog_ilace_flag;   // Tracks successive frames for 'prog_ilace' option
 321
 322         u8 BLEND_MODE;
 323         u8 TEXT_MODE;
 324         u8 Masking;
 325
 326         u16 PixelMSB;
 327
 328         gpu_unai_config_t config;
 329
 330         u8  LightLUT[32*32];    // 5-bit lighting LUT (gpu_inner_light.h)
 331         u32 DitherMatrix[64];   // Matrix of dither coefficients
 332 };
 333
 334 static __attribute__((aligned(32))) gpu_unai_t gpu_unai;
 335
 336 // Global config that frontend can alter.. Values are read in GPU_init().
 337 // TODO: if frontend menu modifies a setting, add a function that can notify
 338 // GPU plugin to use new setting.
 339 gpu_unai_config_t gpu_unai_config_ext;
 340
 341 ///////////////////////////////////////////////////////////////////////////////
 342 // Internal inline funcs to get option status: (Allows flexibility)
 343 static inline bool LightingEnabled()
 344 {
 345         return gpu_unai.config.lighting;
 346 }
 347
 348 static inline bool FastLightingEnabled()
 349 {
 350         return gpu_unai.config.fast_lighting;
 351 }
 352
 353 static inline bool BlendingEnabled()
 354 {
 355         return gpu_unai.config.blending;
 356 }
 357
 358 static inline bool DitheringEnabled()
 359 {
 360         return gpu_unai.config.dithering;
 361 }
 362
 363 static inline bool ForcedDitheringEnabled()
 364 {
 365         return gpu_unai.config.force_dithering;
 366 }
 367
 368 static inline bool ProgressiveInterlaceEnabled()
 369 {
 370 #ifdef USE_GPULIB
 371         // Using this old option greatly decreases quality of image. Disabled
 372         //  for now when using new gpulib, since it also adds more work in loops.
 373         return false;
 374 #else
 375         return gpu_unai.config.prog_ilace;
 376 #endif
 377 }
 378
 379 // For now, 320x240 output resolution is assumed, using simple line-skipping
 380 //  and pixel-skipping downscaler.
 381 // TODO: Flesh these out so they return useful values based on whether
 382 //       running on higher-res device or a resampling downscaler is enabled.
 383 static inline bool PixelSkipEnabled()
 384 {
 385         return gpu_unai.config.pixel_skip || gpu_unai.config.scale_hires;
 386 }
 387
 388 static inline bool LineSkipEnabled()
 389 {
 390         return true;
 391 }
 392
 393 #endif // GPU_UNAI_H