cdrom: change pause timing again
[pcsx_rearmed.git] / plugins / gpu_unai / gpulib_if.cpp
index 0064aaa..6816e2b 100644 (file)
@@ -2,6 +2,7 @@
 *   Copyright (C) 2010 PCSX4ALL Team                                      *
 *   Copyright (C) 2010 Unai                                               *
 *   Copyright (C) 2011 notaz                                              *
+*   Copyright (C) 2016 Senquack (dansilsby <AT> gmail <DOT> com)          *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA.           *
 ***************************************************************************/
 
+#include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "../gpulib/gpu.h"
-#include "arm_features.h"
-
-#define u8 uint8_t
-#define s8 int8_t
-#define u16 uint16_t
-#define s16 int16_t
-#define u32 uint32_t
-#define s32 int32_t
-#define s64 int64_t
-
-#define INLINE static
-
-#define        FRAME_BUFFER_SIZE  (1024*512*2)
-#define        FRAME_WIDTH        1024
-#define        FRAME_HEIGHT       512
-#define        FRAME_OFFSET(x,y)  (((y)<<10)+(x))
-
-#define isSkip 0 /* skip frame (info coming from GPU) */
-#define alt_fps 0
-static int linesInterlace;  /* internal lines interlace */
-static int force_interlace;
-
-static bool light = true; /* lighting */
-static bool blend = true; /* blending */
-static bool FrameToRead = false; /* load image in progress */
-static bool FrameToWrite = false; /* store image in progress */
-
-static bool enableAbbeyHack = false; /* Abe's Odyssey hack */
-
-static u8 BLEND_MODE;
-static u8 TEXT_MODE;
-static u8 Masking;
-
-static u16 PixelMSB;
-static u16 PixelData;
-
-///////////////////////////////////////////////////////////////////////////////
-//  GPU Global data
-///////////////////////////////////////////////////////////////////////////////
-
-//  Dma Transfers info
-static s32             px,py;
-static s32             x_end,y_end;
-static u16*  pvram;
-
-static s32 PacketCount;
-static s32 PacketIndex;
-
-//  Rasterizer status
-static u32 TextureWindow [4];
-static u32 DrawingArea   [4];
-static u32 DrawingOffset [2];
-
-static u16* TBA;
-static u16* CBA;
-
-//  Inner Loops
-static s32   u4, du4;
-static s32   v4, dv4;
-static s32   r4, dr4;
-static s32   g4, dg4;
-static s32   b4, db4;
-static u32   lInc;
-static u32   tInc, tMsk;
-
-union GPUPacket
-{
-       u32 U4[16];
-       s32 S4[16];
-       u16 U2[32];
-       s16 S2[32];
-       u8  U1[64];
-       s8  S1[64];
-};
-
-static GPUPacket PacketBuffer;
-static u16  *GPU_FrameBuffer;
-static u32   GPU_GP1;
-
-///////////////////////////////////////////////////////////////////////////////
-
-#include "../gpu_unai/gpu_fixedpoint.h"
-
-//  Inner loop driver instanciation file
-#include "../gpu_unai/gpu_inner.h"
 
-//  GPU Raster Macros
-#define        GPU_RGB16(rgb)        ((((rgb)&0xF80000)>>9)|(((rgb)&0xF800)>>6)|(((rgb)&0xF8)>>3))
+#ifdef THREAD_RENDERING
+#include "../gpulib/gpulib_thread_if.h"
+#define do_cmd_list real_do_cmd_list
+#define renderer_init real_renderer_init
+#define renderer_finish real_renderer_finish
+#define renderer_sync_ecmds real_renderer_sync_ecmds
+#define renderer_update_caches real_renderer_update_caches
+#define renderer_flush_queues real_renderer_flush_queues
+#define renderer_set_interlace real_renderer_set_interlace
+#define renderer_set_config real_renderer_set_config
+#define renderer_notify_res_change real_renderer_notify_res_change
+#define renderer_notify_update_lace real_renderer_notify_update_lace
+#define renderer_sync real_renderer_sync
+#define ex_regs scratch_ex_regs
+#endif
 
-#define GPU_EXPANDSIGN(x)  (((s32)(x)<<21)>>21)
+//#include "port.h"
+#include "gpu_unai.h"
 
-#define CHKMAX_X 1024
-#define CHKMAX_Y 512
+// GPU fixed point math
+#include "gpu_fixedpoint.h"
 
-#define        GPU_SWAP(a,b,t) {(t)=(a);(a)=(b);(b)=(t);}
+// Inner loop driver instantiation file
+#include "gpu_inner.h"
 
 // GPU internal image drawing functions
-#include "../gpu_unai/gpu_raster_image.h"
+#include "gpu_raster_image.h"
 
 // GPU internal line drawing functions
-#include "../gpu_unai/gpu_raster_line.h"
+#include "gpu_raster_line.h"
 
 // GPU internal polygon drawing functions
-#include "../gpu_unai/gpu_raster_polygon.h"
+#include "gpu_raster_polygon.h"
 
 // GPU internal sprite drawing functions
-#include "../gpu_unai/gpu_raster_sprite.h"
+#include "gpu_raster_sprite.h"
 
 // GPU command buffer execution/store
-#include "../gpu_unai/gpu_command.h"
+#include "gpu_command.h"
 
 /////////////////////////////////////////////////////////////////////////////
 
+#define DOWNSCALE_VRAM_SIZE (1024 * 512 * 2 * 2 + 4096)
+
+INLINE void scale_640_to_320(le16_t *dest, const le16_t *src, bool isRGB24) {
+  size_t uCount = 320;
+
+  if(isRGB24) {
+    const uint8_t* src8 = (const uint8_t *)src;
+    uint8_t* dst8 = (uint8_t *)dest;
+
+    do {
+      *dst8++ = *src8++;
+      *dst8++ = *src8++;
+      *dst8++ = *src8;
+      src8 += 4;
+    } while(--uCount);
+  } else {
+    const le16_t* src16 = src;
+    le16_t* dst16 = dest;
+
+    do {
+      *dst16++ = *src16;
+      src16 += 2;
+    } while(--uCount);
+  }
+}
+
+INLINE void scale_512_to_320(le16_t *dest, const le16_t *src, bool isRGB24) {
+  size_t uCount = 64;
+
+  if(isRGB24) {
+    const uint8_t* src8 = (const uint8_t *)src;
+    uint8_t* dst8 = (uint8_t *)dest;
+
+    do {
+      *dst8++ = *src8++;
+      *dst8++ = *src8++;
+      *dst8++ = *src8++;
+      *dst8++ = *src8++;
+      *dst8++ = *src8++;
+      *dst8++ = *src8;
+      src8 += 4;
+      *dst8++ = *src8++;
+      *dst8++ = *src8++;
+      *dst8++ = *src8++;
+      *dst8++ = *src8++;
+      *dst8++ = *src8++;
+      *dst8++ = *src8;
+      src8 += 4;
+      *dst8++ = *src8++;
+      *dst8++ = *src8++;
+      *dst8++ = *src8;
+      src8 += 4;
+    } while(--uCount);
+  } else {
+    const le16_t* src16 = src;
+    le16_t* dst16 = dest;
+
+    do {
+      *dst16++ = *src16++;
+      *dst16++ = *src16;
+      src16 += 2;
+      *dst16++ = *src16++;
+      *dst16++ = *src16;
+      src16 += 2;
+      *dst16++ = *src16;
+      src16 += 2;
+    } while(--uCount);
+  }
+}
+
+static uint16_t *get_downscale_buffer(int *x, int *y, int *w, int *h, int *vram_h)
+{
+  le16_t *dest = gpu_unai.downscale_vram;
+  const le16_t *src = gpu_unai.vram;
+  bool isRGB24 = (gpu_unai.GPU_GP1 & 0x00200000 ? true : false);
+  int stride = 1024, dstride = 1024, lines = *h, orig_w = *w;
+
+  // PS1 fb read wraps around (fixes black screen in 'Tobal no. 1')
+  unsigned int fb_mask = 1024 * 512 - 1;
+
+  if (*h > 240) {
+    *h /= 2;
+    stride *= 2;
+    lines = *h;
+
+    // Ensure start at a non-skipped line
+    while (*y & gpu_unai.ilace_mask) ++*y;
+  }
+
+  unsigned int fb_offset_src = (*y * dstride + *x) & fb_mask;
+  unsigned int fb_offset_dest = fb_offset_src;
+
+  if (*w == 512 || *w == 640) {
+    *w = 320;
+  }
+
+  switch(orig_w) {
+  case 640:
+    do {
+      scale_640_to_320(dest + fb_offset_dest, src + fb_offset_src, isRGB24);
+      fb_offset_src = (fb_offset_src + stride) & fb_mask;
+      fb_offset_dest = (fb_offset_dest + dstride) & fb_mask;
+    } while(--lines);
+
+    break;
+  case 512:
+    do {
+      scale_512_to_320(dest + fb_offset_dest, src + fb_offset_src, isRGB24);
+      fb_offset_src = (fb_offset_src + stride) & fb_mask;
+      fb_offset_dest = (fb_offset_dest + dstride) & fb_mask;
+    } while(--lines);
+    break;
+  default:
+    size_t size = isRGB24 ? *w * 3 : *w * 2;
+
+    do {
+      memcpy(dest + fb_offset_dest, src + fb_offset_src, size);
+      fb_offset_src = (fb_offset_src + stride) & fb_mask;
+      fb_offset_dest = (fb_offset_dest + dstride) & fb_mask;
+    } while(--lines);
+    break;
+  }
+
+  return (uint16_t *)gpu_unai.downscale_vram;
+}
+
+static void map_downscale_buffer(void)
+{
+  if (gpu_unai.downscale_vram)
+    return;
+
+  gpu_unai.downscale_vram = (le16_t*)gpu.mmap(DOWNSCALE_VRAM_SIZE);
+
+  if (gpu_unai.downscale_vram == NULL) {
+    fprintf(stderr, "failed to map downscale buffer\n");
+    gpu.get_downscale_buffer = NULL;
+  }
+  else {
+    gpu.get_downscale_buffer = get_downscale_buffer;
+  }
+}
+
+static void unmap_downscale_buffer(void)
+{
+  if (gpu_unai.downscale_vram == NULL)
+    return;
+
+  gpu.munmap(gpu_unai.downscale_vram, DOWNSCALE_VRAM_SIZE);
+  gpu_unai.downscale_vram = NULL;
+  gpu.get_downscale_buffer = NULL;
+}
+
 int renderer_init(void)
 {
-       GPU_FrameBuffer = (u16 *)gpu.vram;
-
-       // s_invTable
-       for(int i=1;i<=(1<<TABLE_BITS);++i)
-       {
-               double v = 1.0 / double(i);
-               #ifdef GPU_TABLE_10_BITS
-               v *= double(0xffffffff>>1);
-               #else
-               v *= double(0x80000000);
-               #endif
-               s_invTable[i-1]=s32(v);
-       }
-
-       return 0;
+  memset((void*)&gpu_unai, 0, sizeof(gpu_unai));
+  gpu_unai.vram = (le16_t *)gpu.vram;
+
+  // Original standalone gpu_unai initialized TextureWindow[]. I added the
+  //  same behavior here, since it seems unsafe to leave [2],[3] unset when
+  //  using HLE and Rearmed gpu_neon sets this similarly on init. -senquack
+  gpu_unai.TextureWindow[0] = 0;
+  gpu_unai.TextureWindow[1] = 0;
+  gpu_unai.TextureWindow[2] = 255;
+  gpu_unai.TextureWindow[3] = 255;
+  //senquack - new vars must be updated whenever texture window is changed:
+  //           (used for polygon-drawing in gpu_inner.h, gpu_raster_polygon.h)
+  const u32 fb = FIXED_BITS;  // # of fractional fixed-pt bits of u4/v4
+  gpu_unai.u_msk = (((u32)gpu_unai.TextureWindow[2]) << fb) | ((1 << fb) - 1);
+  gpu_unai.v_msk = (((u32)gpu_unai.TextureWindow[3]) << fb) | ((1 << fb) - 1);
+
+  // Configuration options
+  gpu_unai.config = gpu_unai_config_ext;
+  //senquack - disabled, not sure this is needed and would require modifying
+  // sprite-span functions, perhaps unnecessarily. No Abe Oddysey hack was
+  // present in latest PCSX4ALL sources we were using.
+  //gpu_unai.config.enableAbbeyHack = gpu_unai_config_ext.abe_hack;
+  gpu_unai.ilace_mask = gpu_unai.config.ilace_force;
+
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+  // s_invTable
+  for(int i=1;i<=(1<<TABLE_BITS);++i)
+  {
+    double v = 1.0 / double(i);
+#ifdef GPU_TABLE_10_BITS
+    v *= double(0xffffffff>>1);
+#else
+    v *= double(0x80000000);
+#endif
+    s_invTable[i-1]=s32(v);
+  }
+#endif
+
+  SetupLightLUT();
+  SetupDitheringConstants();
+
+  if (gpu_unai.config.scale_hires) {
+    map_downscale_buffer();
+  }
+
+  return 0;
 }
 
 void renderer_finish(void)
 {
+  unmap_downscale_buffer();
 }
 
 void renderer_notify_res_change(void)
+{
+  if (PixelSkipEnabled()) {
+    // Set blit_mask for high horizontal resolutions. This allows skipping
+    //  rendering pixels that would never get displayed on low-resolution
+    //  platforms that use simple pixel-dropping scaler.
+
+    switch (gpu.screen.hres)
+    {
+      case 512: gpu_unai.blit_mask = 0xa4; break; // GPU_BlitWWSWWSWS
+      case 640: gpu_unai.blit_mask = 0xaa; break; // GPU_BlitWS
+      default:  gpu_unai.blit_mask = 0;    break;
+    }
+  } else {
+    gpu_unai.blit_mask = 0;
+  }
+
+  if (LineSkipEnabled()) {
+    // Set rendering line-skip (only render every other line in high-res
+    //  480 vertical mode, or, optionally, force it for all video modes)
+
+    if (gpu.screen.vres == 480) {
+      if (gpu_unai.config.ilace_force) {
+        gpu_unai.ilace_mask = 3; // Only need 1/4 of lines
+      } else {
+        gpu_unai.ilace_mask = 1; // Only need 1/2 of lines
+      }
+    } else {
+      // Vert resolution changed from 480 to lower one
+      gpu_unai.ilace_mask = gpu_unai.config.ilace_force;
+    }
+  } else {
+    gpu_unai.ilace_mask = 0;
+  }
+
+  /*
+  printf("res change hres: %d   vres: %d   depth: %d   ilace_mask: %d\n",
+      gpu.screen.hres, gpu.screen.vres, (gpu.status & PSX_GPU_STATUS_RGB24) ? 24 : 15,
+      gpu_unai.ilace_mask);
+  */
+}
+
+void renderer_notify_scanout_change(int x, int y)
 {
 }
 
-void renderer_notify_scanout_x_change(int x, int w)
+#ifdef USE_GPULIB
+// Handles GP0 draw settings commands 0xE1...0xE6
+static void gpuGP0Cmd_0xEx(gpu_unai_t &gpu_unai, u32 cmd_word)
 {
+  // Assume incoming GP0 command is 0xE1..0xE6, convert to 1..6
+  u8 num = (cmd_word >> 24) & 7;
+  gpu.ex_regs[num] = cmd_word; // Update gpulib register
+  switch (num) {
+    case 1: {
+      // GP0(E1h) - Draw Mode setting (aka "Texpage")
+      u32 cur_texpage = gpu_unai.GPU_GP1 & 0x7FF;
+      u32 new_texpage = cmd_word & 0x7FF;
+      if (cur_texpage != new_texpage) {
+        gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 & ~0x7FF) | new_texpage;
+        gpuSetTexture(gpu_unai.GPU_GP1);
+      }
+    } break;
+
+    case 2: {
+      // GP0(E2h) - Texture Window setting
+      if (cmd_word != gpu_unai.TextureWindowCur) {
+        static const u8 TextureMask[32] = {
+          255, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7,
+          127, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7
+        };
+        gpu_unai.TextureWindowCur = cmd_word;
+        gpu_unai.TextureWindow[0] = ((cmd_word >> 10) & 0x1F) << 3;
+        gpu_unai.TextureWindow[1] = ((cmd_word >> 15) & 0x1F) << 3;
+        gpu_unai.TextureWindow[2] = TextureMask[(cmd_word >> 0) & 0x1F];
+        gpu_unai.TextureWindow[3] = TextureMask[(cmd_word >> 5) & 0x1F];
+        gpu_unai.TextureWindow[0] &= ~gpu_unai.TextureWindow[2];
+        gpu_unai.TextureWindow[1] &= ~gpu_unai.TextureWindow[3];
+
+        // Inner loop vars must be updated whenever texture window is changed:
+        const u32 fb = FIXED_BITS;  // # of fractional fixed-pt bits of u4/v4
+        gpu_unai.u_msk = (((u32)gpu_unai.TextureWindow[2]) << fb) | ((1 << fb) - 1);
+        gpu_unai.v_msk = (((u32)gpu_unai.TextureWindow[3]) << fb) | ((1 << fb) - 1);
+
+        gpuSetTexture(gpu_unai.GPU_GP1);
+      }
+    } break;
+
+    case 3: {
+      // GP0(E3h) - Set Drawing Area top left (X1,Y1)
+      gpu_unai.DrawingArea[0] = cmd_word         & 0x3FF;
+      gpu_unai.DrawingArea[1] = (cmd_word >> 10) & 0x3FF;
+    } break;
+
+    case 4: {
+      // GP0(E4h) - Set Drawing Area bottom right (X2,Y2)
+      gpu_unai.DrawingArea[2] = (cmd_word         & 0x3FF) + 1;
+      gpu_unai.DrawingArea[3] = ((cmd_word >> 10) & 0x3FF) + 1;
+    } break;
+
+    case 5: {
+      // GP0(E5h) - Set Drawing Offset (X,Y)
+      gpu_unai.DrawingOffset[0] = ((s32)cmd_word<<(32-11))>>(32-11);
+      gpu_unai.DrawingOffset[1] = ((s32)cmd_word<<(32-22))>>(32-11);
+    } break;
+
+    case 6: {
+      // GP0(E6h) - Mask Bit Setting
+      gpu_unai.Masking  = (cmd_word & 0x2) <<  1;
+      gpu_unai.PixelMSB = (cmd_word & 0x1) <<  8;
+    } break;
+  }
 }
+#endif
 
+#include "../gpulib/gpu_timing.h"
 extern const unsigned char cmd_lengths[256];
 
-int do_cmd_list(unsigned int *list, int list_len, int *last_cmd)
+int do_cmd_list(u32 *list_, int list_len,
+ int *cycles_sum_out, int *cycles_last, int *last_cmd)
 {
-  unsigned int cmd = 0, len, i;
-  unsigned int *list_start = list;
-  unsigned int *list_end = list + list_len;
+  int cpu_cycles_sum = 0, cpu_cycles = *cycles_last;
+  u32 cmd = 0, len, i;
+  le32_t *list = (le32_t *)list_;
+  le32_t *list_start = list;
+  le32_t *list_end = list + list_len;
+
+  //TODO: set ilace_mask when resolution changes instead of every time,
+  // eliminate #ifdef below.
+  gpu_unai.ilace_mask = gpu_unai.config.ilace_force;
 
-  linesInterlace = force_interlace;
 #ifdef HAVE_PRE_ARMV7 /* XXX */
-  linesInterlace |= !!(gpu.status & PSX_GPU_STATUS_INTERLACE);
+  gpu_unai.ilace_mask |= !!(gpu.status & PSX_GPU_STATUS_INTERLACE);
 #endif
+  if (gpu_unai.config.scale_hires) {
+    gpu_unai.ilace_mask |= !!(gpu.status & PSX_GPU_STATUS_INTERLACE);
+  }
 
   for (; list < list_end; list += 1 + len)
   {
-    cmd = *list >> 24;
+    cmd = le32_to_u32(*list) >> 24;
     len = cmd_lengths[cmd];
     if (list + 1 + len > list_end) {
       cmd = -1;
@@ -190,258 +423,370 @@ int do_cmd_list(unsigned int *list, int list_len, int *last_cmd)
     }
 
     #define PRIM cmd
-    PacketBuffer.U4[0] = list[0];
+    gpu_unai.PacketBuffer.U4[0] = list[0];
     for (i = 1; i <= len; i++)
-      PacketBuffer.U4[i] = list[i];
+      gpu_unai.PacketBuffer.U4[i] = list[i];
+
+    PtrUnion packet = { .ptr = (void*)&gpu_unai.PacketBuffer };
 
     switch (cmd)
     {
       case 0x02:
-        gpuClearImage();
+        gpuClearImage(packet);
+        gput_sum(cpu_cycles_sum, cpu_cycles,
+           gput_fill(le16_to_s16(packet.U2[4]) & 0x3ff, le16_to_s16(packet.U2[5]) & 0x1ff));
         break;
 
       case 0x20:
       case 0x21:
       case 0x22:
-      case 0x23:
-        gpuDrawF3(gpuPolySpanDrivers [Blending_Mode | Masking | Blending | PixelMSB]);
-        break;
+      case 0x23: {          // Monochrome 3-pt poly
+        PP driver = gpuPolySpanDrivers[
+          (gpu_unai.blit_mask?1024:0) |
+          Blending_Mode |
+          gpu_unai.Masking | Blending | gpu_unai.PixelMSB
+        ];
+        gpuDrawPolyF(packet, driver, false);
+        gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base());
+      } break;
 
       case 0x24:
       case 0x25:
       case 0x26:
-      case 0x27:
-        gpuSetCLUT   (PacketBuffer.U4[2] >> 16);
-        gpuSetTexture(PacketBuffer.U4[4] >> 16);
-        if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F))
-          gpuDrawFT3(gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | PixelMSB]);
-        else
-          gpuDrawFT3(gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | PixelMSB]);
-        break;
+      case 0x27: {          // Textured 3-pt poly
+        gpuSetCLUT   (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+        gpuSetTexture(le32_to_u32(gpu_unai.PacketBuffer.U4[4]) >> 16);
+
+        u32 driver_idx =
+          (gpu_unai.blit_mask?1024:0) |
+          Dithering |
+          Blending_Mode | gpu_unai.TEXT_MODE |
+          gpu_unai.Masking | Blending | gpu_unai.PixelMSB;
+
+        if (!FastLightingEnabled()) {
+          driver_idx |= Lighting;
+        } else {
+          if (!((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)))
+            driver_idx |= Lighting;
+        }
+
+        PP driver = gpuPolySpanDrivers[driver_idx];
+        gpuDrawPolyFT(packet, driver, false);
+        gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t());
+      } break;
 
       case 0x28:
       case 0x29:
       case 0x2A:
-      case 0x2B: {
-        const PP gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | Masking | Blending | PixelMSB];
-        gpuDrawF3(gpuPolySpanDriver);
-        PacketBuffer.U4[1] = PacketBuffer.U4[4];
-        gpuDrawF3(gpuPolySpanDriver);
-        break;
-      }
+      case 0x2B: {          // Monochrome 4-pt poly
+        PP driver = gpuPolySpanDrivers[
+          (gpu_unai.blit_mask?1024:0) |
+          Blending_Mode |
+          gpu_unai.Masking | Blending | gpu_unai.PixelMSB
+        ];
+        gpuDrawPolyF(packet, driver, true); // is_quad = true
+        gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base());
+      } break;
 
       case 0x2C:
       case 0x2D:
       case 0x2E:
-      case 0x2F: {
-        gpuSetCLUT   (PacketBuffer.U4[2] >> 16);
-        gpuSetTexture(PacketBuffer.U4[4] >> 16);
-        PP gpuPolySpanDriver;
-        if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F))
-          gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | PixelMSB];
-        else
-          gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | PixelMSB];
-        gpuDrawFT3(gpuPolySpanDriver);
-        PacketBuffer.U4[1] = PacketBuffer.U4[7];
-        PacketBuffer.U4[2] = PacketBuffer.U4[8];
-        gpuDrawFT3(gpuPolySpanDriver);
-        break;
-      }
+      case 0x2F: {          // Textured 4-pt poly
+        gpuSetCLUT   (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+        gpuSetTexture(le32_to_u32(gpu_unai.PacketBuffer.U4[4]) >> 16);
+
+        u32 driver_idx =
+          (gpu_unai.blit_mask?1024:0) |
+          Dithering |
+          Blending_Mode | gpu_unai.TEXT_MODE |
+          gpu_unai.Masking | Blending | gpu_unai.PixelMSB;
+
+        if (!FastLightingEnabled()) {
+          driver_idx |= Lighting;
+        } else {
+          if (!((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)))
+            driver_idx |= Lighting;
+        }
+
+        PP driver = gpuPolySpanDrivers[driver_idx];
+        gpuDrawPolyFT(packet, driver, true); // is_quad = true
+        gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t());
+      } break;
 
       case 0x30:
       case 0x31:
       case 0x32:
-      case 0x33:
-        gpuDrawG3(gpuPolySpanDrivers [Blending_Mode | Masking | Blending | 129 | PixelMSB]);
-        break;
+      case 0x33: {          // Gouraud-shaded 3-pt poly
+        //NOTE: The '129' here is CF_GOURAUD | CF_LIGHT, however
+        // this is an untextured poly, so CF_LIGHT (texture blend)
+        // shouldn't apply. Until the original array of template
+        // instantiation ptrs is fixed, we're stuck with this. (TODO)
+        PP driver = gpuPolySpanDrivers[
+          (gpu_unai.blit_mask?1024:0) |
+          Dithering |
+          Blending_Mode |
+          gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB
+        ];
+        gpuDrawPolyG(packet, driver, false);
+        gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g());
+      } break;
 
       case 0x34:
       case 0x35:
       case 0x36:
-      case 0x37:
-        gpuSetCLUT    (PacketBuffer.U4[2] >> 16);
-        gpuSetTexture (PacketBuffer.U4[5] >> 16);
-        gpuDrawGT3(gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | ((Lighting)?129:0) | PixelMSB]);
-        break;
+      case 0x37: {          // Gouraud-shaded, textured 3-pt poly
+        gpuSetCLUT    (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+        gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16);
+        PP driver = gpuPolySpanDrivers[
+          (gpu_unai.blit_mask?1024:0) |
+          Dithering |
+          Blending_Mode | gpu_unai.TEXT_MODE |
+          gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB
+        ];
+        gpuDrawPolyGT(packet, driver, false);
+        gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt());
+      } break;
 
       case 0x38:
       case 0x39:
       case 0x3A:
-      case 0x3B: {
-        const PP gpuPolySpanDriver  = gpuPolySpanDrivers [Blending_Mode | Masking | Blending | 129 | PixelMSB];
-        gpuDrawG3(gpuPolySpanDriver);
-        PacketBuffer.U4[0] = PacketBuffer.U4[6];
-        PacketBuffer.U4[1] = PacketBuffer.U4[7];
-        gpuDrawG3(gpuPolySpanDriver);
-        break;
-      }
+      case 0x3B: {          // Gouraud-shaded 4-pt poly
+        // See notes regarding '129' for 0x30..0x33 further above -senquack
+        PP driver = gpuPolySpanDrivers[
+          (gpu_unai.blit_mask?1024:0) |
+          Dithering |
+          Blending_Mode |
+          gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB
+        ];
+        gpuDrawPolyG(packet, driver, true); // is_quad = true
+        gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g());
+      } break;
 
       case 0x3C:
       case 0x3D:
       case 0x3E:
-      case 0x3F: {
-        gpuSetCLUT    (PacketBuffer.U4[2] >> 16);
-        gpuSetTexture (PacketBuffer.U4[5] >> 16);
-        const PP gpuPolySpanDriver  = gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | ((Lighting)?129:0) | PixelMSB];
-        gpuDrawGT3(gpuPolySpanDriver);
-        PacketBuffer.U4[0] = PacketBuffer.U4[9];
-        PacketBuffer.U4[1] = PacketBuffer.U4[10];
-        PacketBuffer.U4[2] = PacketBuffer.U4[11];
-        gpuDrawGT3(gpuPolySpanDriver);
-        break;
-      }
+      case 0x3F: {          // Gouraud-shaded, textured 4-pt poly
+        gpuSetCLUT    (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+        gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16);
+        PP driver = gpuPolySpanDrivers[
+          (gpu_unai.blit_mask?1024:0) |
+          Dithering |
+          Blending_Mode | gpu_unai.TEXT_MODE |
+          gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB
+        ];
+        gpuDrawPolyGT(packet, driver, true); // is_quad = true
+        gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt());
+      } break;
 
       case 0x40:
       case 0x41:
       case 0x42:
-      case 0x43:
-        gpuDrawLF(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]);
-        break;
-
-      case 0x48 ... 0x4F:
-      {
+      case 0x43: {          // Monochrome line
+        // Shift index right by one, as untextured prims don't use lighting
+        u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1;
+        PSD driver = gpuPixelSpanDrivers[driver_idx];
+        gpuDrawLineF(packet, driver);
+        gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
+      } break;
+
+      case 0x48 ... 0x4F: { // Monochrome line strip
         u32 num_vertexes = 1;
-        u32 *list_position = &(list[2]);
+        le32_t *list_position = &list[2];
 
-        gpuDrawLF(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]);
+        // Shift index right by one, as untextured prims don't use lighting
+        u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1;
+        PSD driver = gpuPixelSpanDrivers[driver_idx];
+        gpuDrawLineF(packet, driver);
 
         while(1)
         {
-          PacketBuffer.U4[1] = PacketBuffer.U4[2];
-          PacketBuffer.U4[2] = *list_position++;
-          gpuDrawLF(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]);
+          gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[2];
+          gpu_unai.PacketBuffer.U4[2] = *list_position++;
+          gpuDrawLineF(packet, driver);
+          gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
 
           num_vertexes++;
           if(list_position >= list_end) {
             cmd = -1;
             goto breakloop;
           }
-          if((*list_position & 0xf000f000) == 0x50005000)
+          if((le32_raw(*list_position) & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
             break;
         }
 
         len += (num_vertexes - 2);
-        break;
-      }
+      } break;
 
       case 0x50:
       case 0x51:
       case 0x52:
-      case 0x53:
-        gpuDrawLG(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]);
-        break;
-
-      case 0x58 ... 0x5F:
-      {
+      case 0x53: {          // Gouraud-shaded line
+        // Shift index right by one, as untextured prims don't use lighting
+        u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1;
+        // Index MSB selects Gouraud-shaded PixelSpanDriver:
+        driver_idx |= (1 << 5);
+        PSD driver = gpuPixelSpanDrivers[driver_idx];
+        gpuDrawLineG(packet, driver);
+        gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
+      } break;
+
+      case 0x58 ... 0x5F: { // Gouraud-shaded line strip
         u32 num_vertexes = 1;
-        u32 *list_position = &(list[2]);
+        le32_t *list_position = &list[2];
 
-        gpuDrawLG(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]);
+        // Shift index right by one, as untextured prims don't use lighting
+        u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1;
+        // Index MSB selects Gouraud-shaded PixelSpanDriver:
+        driver_idx |= (1 << 5);
+        PSD driver = gpuPixelSpanDrivers[driver_idx];
+        gpuDrawLineG(packet, driver);
 
         while(1)
         {
-          PacketBuffer.U4[0] = PacketBuffer.U4[2];
-          PacketBuffer.U4[1] = PacketBuffer.U4[3];
-          PacketBuffer.U4[2] = *list_position++;
-          PacketBuffer.U4[3] = *list_position++;
-          gpuDrawLG(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]);
+          gpu_unai.PacketBuffer.U4[0] = gpu_unai.PacketBuffer.U4[2];
+          gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[3];
+          gpu_unai.PacketBuffer.U4[2] = *list_position++;
+          gpu_unai.PacketBuffer.U4[3] = *list_position++;
+          gpuDrawLineG(packet, driver);
+          gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
 
           num_vertexes++;
           if(list_position >= list_end) {
             cmd = -1;
             goto breakloop;
           }
-          if((*list_position & 0xf000f000) == 0x50005000)
+          if((le32_raw(*list_position) & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
             break;
         }
 
         len += (num_vertexes - 2) * 2;
-        break;
-      }
+      } break;
 
       case 0x60:
       case 0x61:
       case 0x62:
-      case 0x63:
-        gpuDrawT(gpuTileSpanDrivers [Blending_Mode | Masking | Blending | (PixelMSB>>3)]);
-        break;
+      case 0x63: {          // Monochrome rectangle (variable size)
+        PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
+        s32 w = 0, h = 0;
+        gpuDrawT(packet, driver, &w, &h);
+        gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
+      } break;
 
       case 0x64:
       case 0x65:
       case 0x66:
-      case 0x67:
-        gpuSetCLUT    (PacketBuffer.U4[2] >> 16);
-        gpuSetTexture (GPU_GP1);
-        if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F))
-          gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | (enableAbbeyHack<<7)  | PixelMSB]);
-        else
-          gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | (enableAbbeyHack<<7)  | PixelMSB]);
-        break;
+      case 0x67: {          // Textured rectangle (variable size)
+        gpuSetCLUT    (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+        u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
+        s32 w = 0, h = 0;
+
+        //senquack - Only color 808080h-878787h allows skipping lighting calculation:
+        // This fixes Silent Hill running animation on loading screens:
+        // (On PSX, color values 0x00-0x7F darken the source texture's color,
+        //  0x81-FF lighten textures (ultimately clamped to 0x1F),
+        //  0x80 leaves source texture color unchanged, HOWEVER,
+        //   gpu_unai uses a simple lighting LUT whereby only the upper
+        //   5 bits of an 8-bit color are used, so 0x80-0x87 all behave as
+        //   0x80.
+        // 
+        // NOTE: I've changed all textured sprite draw commands here and
+        //  elsewhere to use proper behavior, but left poly commands
+        //  alone, I don't want to slow rendering down too much. (TODO)
+        //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
+        // Strip lower 3 bits of each color and determine if lighting should be used:
+        if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080))
+          driver_idx |= Lighting;
+        PS driver = gpuSpriteSpanDrivers[driver_idx];
+        gpuDrawS(packet, driver, &w, &h);
+        gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
+      } break;
 
       case 0x68:
       case 0x69:
       case 0x6A:
-      case 0x6B:
-        PacketBuffer.U4[2] = 0x00010001;
-        gpuDrawT(gpuTileSpanDrivers [Blending_Mode | Masking | Blending | (PixelMSB>>3)]);
-        break;
+      case 0x6B: {          // Monochrome rectangle (1x1 dot)
+        gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00010001);
+        PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
+        s32 w = 0, h = 0;
+        gpuDrawT(packet, driver, &w, &h);
+        gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1));
+      } break;
 
       case 0x70:
       case 0x71:
       case 0x72:
-      case 0x73:
-        PacketBuffer.U4[2] = 0x00080008;
-        gpuDrawT(gpuTileSpanDrivers [Blending_Mode | Masking | Blending | (PixelMSB>>3)]);
-        break;
+      case 0x73: {          // Monochrome rectangle (8x8)
+        gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00080008);
+        PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
+        s32 w = 0, h = 0;
+        gpuDrawT(packet, driver, &w, &h);
+        gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
+      } break;
 
       case 0x74:
       case 0x75:
       case 0x76:
-      case 0x77:
-        PacketBuffer.U4[3] = 0x00080008;
-        gpuSetCLUT    (PacketBuffer.U4[2] >> 16);
-        gpuSetTexture (GPU_GP1);
-        if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F))
-          gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | (enableAbbeyHack<<7)  | PixelMSB]);
-        else
-          gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | (enableAbbeyHack<<7)  | PixelMSB]);
-        break;
+      case 0x77: {          // Textured rectangle (8x8)
+        gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00080008);
+        gpuSetCLUT    (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+        u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
+        s32 w = 0, h = 0;
+
+        //senquack - Only color 808080h-878787h allows skipping lighting calculation:
+        //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
+        // Strip lower 3 bits of each color and determine if lighting should be used:
+        if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080))
+          driver_idx |= Lighting;
+        PS driver = gpuSpriteSpanDrivers[driver_idx];
+        gpuDrawS(packet, driver, &w, &h);
+        gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
+      } break;
 
       case 0x78:
       case 0x79:
       case 0x7A:
-      case 0x7B:
-        PacketBuffer.U4[2] = 0x00100010;
-        gpuDrawT(gpuTileSpanDrivers [Blending_Mode | Masking | Blending | (PixelMSB>>3)]);
-        break;
+      case 0x7B: {          // Monochrome rectangle (16x16)
+        gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00100010);
+        PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
+        s32 w = 0, h = 0;
+        gpuDrawT(packet, driver, &w, &h);
+        gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
+      } break;
 
       case 0x7C:
       case 0x7D:
 #ifdef __arm__
-        if ((GPU_GP1 & 0x180) == 0 && (Masking | PixelMSB) == 0)
+        if ((gpu_unai.GPU_GP1 & 0x180) == 0 && (gpu_unai.Masking | gpu_unai.PixelMSB) == 0)
         {
-          gpuSetCLUT    (PacketBuffer.U4[2] >> 16);
-          gpuSetTexture (GPU_GP1);
-          gpuDrawS16();
+          s32 w = 0, h = 0;
+          gpuSetCLUT(le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+          gpuDrawS16(packet, &w, &h);
+          gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
           break;
         }
         // fallthrough
 #endif
       case 0x7E:
-      case 0x7F:
-        PacketBuffer.U4[3] = 0x00100010;
-        gpuSetCLUT    (PacketBuffer.U4[2] >> 16);
-        gpuSetTexture (GPU_GP1);
-        if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F))
-          gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | (enableAbbeyHack<<7)  | PixelMSB]);
-        else
-          gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | (enableAbbeyHack<<7)  | PixelMSB]);
-        break;
+      case 0x7F: {          // Textured rectangle (16x16)
+        gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00100010);
+        gpuSetCLUT    (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+        u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
+        s32 w = 0, h = 0;
+        //senquack - Only color 808080h-878787h allows skipping lighting calculation:
+        //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
+        // Strip lower 3 bits of each color and determine if lighting should be used:
+        if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080))
+          driver_idx |= Lighting;
+        PS driver = gpuSpriteSpanDrivers[driver_idx];
+        gpuDrawS(packet, driver, &w, &h);
+        gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
+      } break;
 
+#ifdef TEST
       case 0x80:          //  vid -> vid
-        gpuMoveImage();   //  prim handles updateLace && skip
+        gpuMoveImage(packet);
         break;
-#ifdef TEST
+
       case 0xA0:          //  sys -> vid
       {
         u32 load_width = list[2] & 0xffff;
@@ -449,79 +794,38 @@ int do_cmd_list(unsigned int *list, int list_len, int *last_cmd)
         u32 load_size = load_width * load_height;
 
         len += load_size / 2;
-        break;
-      }
+      } break;
+
       case 0xC0:
         break;
 #else
-      case 0xA0:          //  sys ->vid
-      case 0xC0:          //  vid -> sys
+      case 0x1F:                   //  irq?
+      case 0x80 ... 0x9F:          //  vid -> vid
+      case 0xA0 ... 0xBF:          //  sys -> vid
+      case 0xC0 ... 0xDF:          //  vid -> sys
+        // Handled by gpulib
         goto breakloop;
 #endif
-      case 0xE1: {
-        const u32 temp = PacketBuffer.U4[0];
-        GPU_GP1 = (GPU_GP1 & ~0x000007FF) | (temp & 0x000007FF);
-        gpuSetTexture(temp);
-        gpu.ex_regs[1] = temp;
-        break;
-      }
-      case 0xE2: {
-        static const u8  TextureMask[32] = {
-          255, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7,
-          127, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7
-        };
-        const u32 temp = PacketBuffer.U4[0];
-        TextureWindow[0] = ((temp >> 10) & 0x1F) << 3;
-        TextureWindow[1] = ((temp >> 15) & 0x1F) << 3;
-        TextureWindow[2] = TextureMask[(temp >> 0) & 0x1F];
-        TextureWindow[3] = TextureMask[(temp >> 5) & 0x1F];
-        gpuSetTexture(GPU_GP1);
-        gpu.ex_regs[2] = temp;
-        break;
-      }
-      case 0xE3: {
-        const u32 temp = PacketBuffer.U4[0];
-        DrawingArea[0] = temp         & 0x3FF;
-        DrawingArea[1] = (temp >> 10) & 0x3FF;
-        gpu.ex_regs[3] = temp;
-        break;
-      }
-      case 0xE4: {
-        const u32 temp = PacketBuffer.U4[0];
-        DrawingArea[2] = (temp         & 0x3FF) + 1;
-        DrawingArea[3] = ((temp >> 10) & 0x3FF) + 1;
-        gpu.ex_regs[4] = temp;
-        break;
-      }
-      case 0xE5: {
-        const u32 temp = PacketBuffer.U4[0];
-        DrawingOffset[0] = ((s32)temp<<(32-11))>>(32-11);
-        DrawingOffset[1] = ((s32)temp<<(32-22))>>(32-11);
-        gpu.ex_regs[5] = temp;
-        break;
-      }
-      case 0xE6: {
-        const u32 temp = PacketBuffer.U4[0];
-        Masking = (temp & 0x2) <<  1;
-        PixelMSB =(temp & 0x1) <<  8;
-        gpu.ex_regs[6] = temp;
-        break;
-      }
+      case 0xE1 ... 0xE6: { // Draw settings
+        gpuGP0Cmd_0xEx(gpu_unai, le32_to_u32(gpu_unai.PacketBuffer.U4[0]));
+      } break;
     }
   }
 
 breakloop:
   gpu.ex_regs[1] &= ~0x1ff;
-  gpu.ex_regs[1] |= GPU_GP1 & 0x1ff;
+  gpu.ex_regs[1] |= gpu_unai.GPU_GP1 & 0x1ff;
 
+  *cycles_sum_out += cpu_cycles_sum;
+  *cycles_last = cpu_cycles;
   *last_cmd = cmd;
   return list - list_start;
 }
 
-void renderer_sync_ecmds(uint32_t *ecmds)
+void renderer_sync_ecmds(u32 *ecmds)
 {
   int dummy;
-  do_cmd_list(&ecmds[1], 6, &dummy);
+  do_cmd_list(&ecmds[1], 6, &dummy, &dummy, &dummy);
 }
 
 void renderer_update_caches(int x, int y, int w, int h, int state_changed)
@@ -536,20 +840,33 @@ void renderer_set_interlace(int enable, int is_odd)
 {
 }
 
-#ifndef TEST
-
 #include "../../frontend/plugin_lib.h"
-
+// Handle any gpulib settings applicable to gpu_unai:
 void renderer_set_config(const struct rearmed_cbs *cbs)
 {
-  force_interlace = cbs->gpu_unai.lineskip;
-  enableAbbeyHack = cbs->gpu_unai.abe_hack;
-  light = !cbs->gpu_unai.no_light;
-  blend = !cbs->gpu_unai.no_blend;
+  gpu_unai.vram = (le16_t *)gpu.vram;
+  gpu_unai.config.ilace_force   = cbs->gpu_unai.ilace_force;
+  gpu_unai.config.pixel_skip    = cbs->gpu_unai.pixel_skip;
+  gpu_unai.config.lighting      = cbs->gpu_unai.lighting;
+  gpu_unai.config.fast_lighting = cbs->gpu_unai.fast_lighting;
+  gpu_unai.config.blending      = cbs->gpu_unai.blending;
+  gpu_unai.config.dithering     = cbs->gpu_unai.dithering;
+  gpu_unai.config.scale_hires   = cbs->gpu_unai.scale_hires;
+
+  gpu.state.downscale_enable    = gpu_unai.config.scale_hires;
+  if (gpu_unai.config.scale_hires) {
+    map_downscale_buffer();
+  } else {
+    unmap_downscale_buffer();
+  }
+}
 
-  GPU_FrameBuffer = (u16 *)gpu.vram;
+void renderer_sync(void)
+{
 }
 
-#endif
+void renderer_notify_update_lace(int updated)
+{
+}
 
 // vim:shiftwidth=2:expandtab