X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=plugins%2Fgpu_unai%2Fgpulib_if.cpp;h=2dedbf83d48aff46cb8b6950fc6420fb40a5ec41;hp=0064aaa3742ba5679da9adde5e2e07864f86fea1;hb=HEAD;hpb=0b4038f8edd327a3a9a2fbdefbc25ece921bc2ab diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index 0064aaa3..6816e2bd 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -2,6 +2,7 @@ * Copyright (C) 2010 PCSX4ALL Team * * Copyright (C) 2010 Unai * * Copyright (C) 2011 notaz * +* Copyright (C) 2016 Senquack (dansilsby gmail com) * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -19,170 +20,402 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * ***************************************************************************/ +#include #include #include #include #include "../gpulib/gpu.h" -#include "arm_features.h" - -#define u8 uint8_t -#define s8 int8_t -#define u16 uint16_t -#define s16 int16_t -#define u32 uint32_t -#define s32 int32_t -#define s64 int64_t - -#define INLINE static - -#define FRAME_BUFFER_SIZE (1024*512*2) -#define FRAME_WIDTH 1024 -#define FRAME_HEIGHT 512 -#define FRAME_OFFSET(x,y) (((y)<<10)+(x)) - -#define isSkip 0 /* skip frame (info coming from GPU) */ -#define alt_fps 0 -static int linesInterlace; /* internal lines interlace */ -static int force_interlace; - -static bool light = true; /* lighting */ -static bool blend = true; /* blending */ -static bool FrameToRead = false; /* load image in progress */ -static bool FrameToWrite = false; /* store image in progress */ - -static bool enableAbbeyHack = false; /* Abe's Odyssey hack */ - -static u8 BLEND_MODE; -static u8 TEXT_MODE; -static u8 Masking; - -static u16 PixelMSB; -static u16 PixelData; - -/////////////////////////////////////////////////////////////////////////////// -// GPU Global data -/////////////////////////////////////////////////////////////////////////////// - -// Dma Transfers info -static s32 px,py; -static s32 x_end,y_end; -static u16* pvram; - -static s32 PacketCount; -static s32 PacketIndex; - -// Rasterizer status -static u32 TextureWindow [4]; -static u32 DrawingArea [4]; -static u32 DrawingOffset [2]; - -static u16* TBA; -static u16* CBA; - -// Inner Loops -static s32 u4, du4; -static s32 v4, dv4; -static s32 r4, dr4; -static s32 g4, dg4; -static s32 b4, db4; -static u32 lInc; -static u32 tInc, tMsk; - -union GPUPacket -{ - u32 U4[16]; - s32 S4[16]; - u16 U2[32]; - s16 S2[32]; - u8 U1[64]; - s8 S1[64]; -}; - -static GPUPacket PacketBuffer; -static u16 *GPU_FrameBuffer; -static u32 GPU_GP1; - -/////////////////////////////////////////////////////////////////////////////// - -#include "../gpu_unai/gpu_fixedpoint.h" - -// Inner loop driver instanciation file -#include "../gpu_unai/gpu_inner.h" -// GPU Raster Macros -#define GPU_RGB16(rgb) ((((rgb)&0xF80000)>>9)|(((rgb)&0xF800)>>6)|(((rgb)&0xF8)>>3)) +#ifdef THREAD_RENDERING +#include "../gpulib/gpulib_thread_if.h" +#define do_cmd_list real_do_cmd_list +#define renderer_init real_renderer_init +#define renderer_finish real_renderer_finish +#define renderer_sync_ecmds real_renderer_sync_ecmds +#define renderer_update_caches real_renderer_update_caches +#define renderer_flush_queues real_renderer_flush_queues +#define renderer_set_interlace real_renderer_set_interlace +#define renderer_set_config real_renderer_set_config +#define renderer_notify_res_change real_renderer_notify_res_change +#define renderer_notify_update_lace real_renderer_notify_update_lace +#define renderer_sync real_renderer_sync +#define ex_regs scratch_ex_regs +#endif -#define GPU_EXPANDSIGN(x) (((s32)(x)<<21)>>21) +//#include "port.h" +#include "gpu_unai.h" -#define CHKMAX_X 1024 -#define CHKMAX_Y 512 +// GPU fixed point math +#include "gpu_fixedpoint.h" -#define GPU_SWAP(a,b,t) {(t)=(a);(a)=(b);(b)=(t);} +// Inner loop driver instantiation file +#include "gpu_inner.h" // GPU internal image drawing functions -#include "../gpu_unai/gpu_raster_image.h" +#include "gpu_raster_image.h" // GPU internal line drawing functions -#include "../gpu_unai/gpu_raster_line.h" +#include "gpu_raster_line.h" // GPU internal polygon drawing functions -#include "../gpu_unai/gpu_raster_polygon.h" +#include "gpu_raster_polygon.h" // GPU internal sprite drawing functions -#include "../gpu_unai/gpu_raster_sprite.h" +#include "gpu_raster_sprite.h" // GPU command buffer execution/store -#include "../gpu_unai/gpu_command.h" +#include "gpu_command.h" ///////////////////////////////////////////////////////////////////////////// +#define DOWNSCALE_VRAM_SIZE (1024 * 512 * 2 * 2 + 4096) + +INLINE void scale_640_to_320(le16_t *dest, const le16_t *src, bool isRGB24) { + size_t uCount = 320; + + if(isRGB24) { + const uint8_t* src8 = (const uint8_t *)src; + uint8_t* dst8 = (uint8_t *)dest; + + do { + *dst8++ = *src8++; + *dst8++ = *src8++; + *dst8++ = *src8; + src8 += 4; + } while(--uCount); + } else { + const le16_t* src16 = src; + le16_t* dst16 = dest; + + do { + *dst16++ = *src16; + src16 += 2; + } while(--uCount); + } +} + +INLINE void scale_512_to_320(le16_t *dest, const le16_t *src, bool isRGB24) { + size_t uCount = 64; + + if(isRGB24) { + const uint8_t* src8 = (const uint8_t *)src; + uint8_t* dst8 = (uint8_t *)dest; + + do { + *dst8++ = *src8++; + *dst8++ = *src8++; + *dst8++ = *src8++; + *dst8++ = *src8++; + *dst8++ = *src8++; + *dst8++ = *src8; + src8 += 4; + *dst8++ = *src8++; + *dst8++ = *src8++; + *dst8++ = *src8++; + *dst8++ = *src8++; + *dst8++ = *src8++; + *dst8++ = *src8; + src8 += 4; + *dst8++ = *src8++; + *dst8++ = *src8++; + *dst8++ = *src8; + src8 += 4; + } while(--uCount); + } else { + const le16_t* src16 = src; + le16_t* dst16 = dest; + + do { + *dst16++ = *src16++; + *dst16++ = *src16; + src16 += 2; + *dst16++ = *src16++; + *dst16++ = *src16; + src16 += 2; + *dst16++ = *src16; + src16 += 2; + } while(--uCount); + } +} + +static uint16_t *get_downscale_buffer(int *x, int *y, int *w, int *h, int *vram_h) +{ + le16_t *dest = gpu_unai.downscale_vram; + const le16_t *src = gpu_unai.vram; + bool isRGB24 = (gpu_unai.GPU_GP1 & 0x00200000 ? true : false); + int stride = 1024, dstride = 1024, lines = *h, orig_w = *w; + + // PS1 fb read wraps around (fixes black screen in 'Tobal no. 1') + unsigned int fb_mask = 1024 * 512 - 1; + + if (*h > 240) { + *h /= 2; + stride *= 2; + lines = *h; + + // Ensure start at a non-skipped line + while (*y & gpu_unai.ilace_mask) ++*y; + } + + unsigned int fb_offset_src = (*y * dstride + *x) & fb_mask; + unsigned int fb_offset_dest = fb_offset_src; + + if (*w == 512 || *w == 640) { + *w = 320; + } + + switch(orig_w) { + case 640: + do { + scale_640_to_320(dest + fb_offset_dest, src + fb_offset_src, isRGB24); + fb_offset_src = (fb_offset_src + stride) & fb_mask; + fb_offset_dest = (fb_offset_dest + dstride) & fb_mask; + } while(--lines); + + break; + case 512: + do { + scale_512_to_320(dest + fb_offset_dest, src + fb_offset_src, isRGB24); + fb_offset_src = (fb_offset_src + stride) & fb_mask; + fb_offset_dest = (fb_offset_dest + dstride) & fb_mask; + } while(--lines); + break; + default: + size_t size = isRGB24 ? *w * 3 : *w * 2; + + do { + memcpy(dest + fb_offset_dest, src + fb_offset_src, size); + fb_offset_src = (fb_offset_src + stride) & fb_mask; + fb_offset_dest = (fb_offset_dest + dstride) & fb_mask; + } while(--lines); + break; + } + + return (uint16_t *)gpu_unai.downscale_vram; +} + +static void map_downscale_buffer(void) +{ + if (gpu_unai.downscale_vram) + return; + + gpu_unai.downscale_vram = (le16_t*)gpu.mmap(DOWNSCALE_VRAM_SIZE); + + if (gpu_unai.downscale_vram == NULL) { + fprintf(stderr, "failed to map downscale buffer\n"); + gpu.get_downscale_buffer = NULL; + } + else { + gpu.get_downscale_buffer = get_downscale_buffer; + } +} + +static void unmap_downscale_buffer(void) +{ + if (gpu_unai.downscale_vram == NULL) + return; + + gpu.munmap(gpu_unai.downscale_vram, DOWNSCALE_VRAM_SIZE); + gpu_unai.downscale_vram = NULL; + gpu.get_downscale_buffer = NULL; +} + int renderer_init(void) { - GPU_FrameBuffer = (u16 *)gpu.vram; - - // s_invTable - for(int i=1;i<=(1<>1); - #else - v *= double(0x80000000); - #endif - s_invTable[i-1]=s32(v); - } - - return 0; + memset((void*)&gpu_unai, 0, sizeof(gpu_unai)); + gpu_unai.vram = (le16_t *)gpu.vram; + + // Original standalone gpu_unai initialized TextureWindow[]. I added the + // same behavior here, since it seems unsafe to leave [2],[3] unset when + // using HLE and Rearmed gpu_neon sets this similarly on init. -senquack + gpu_unai.TextureWindow[0] = 0; + gpu_unai.TextureWindow[1] = 0; + gpu_unai.TextureWindow[2] = 255; + gpu_unai.TextureWindow[3] = 255; + //senquack - new vars must be updated whenever texture window is changed: + // (used for polygon-drawing in gpu_inner.h, gpu_raster_polygon.h) + const u32 fb = FIXED_BITS; // # of fractional fixed-pt bits of u4/v4 + gpu_unai.u_msk = (((u32)gpu_unai.TextureWindow[2]) << fb) | ((1 << fb) - 1); + gpu_unai.v_msk = (((u32)gpu_unai.TextureWindow[3]) << fb) | ((1 << fb) - 1); + + // Configuration options + gpu_unai.config = gpu_unai_config_ext; + //senquack - disabled, not sure this is needed and would require modifying + // sprite-span functions, perhaps unnecessarily. No Abe Oddysey hack was + // present in latest PCSX4ALL sources we were using. + //gpu_unai.config.enableAbbeyHack = gpu_unai_config_ext.abe_hack; + gpu_unai.ilace_mask = gpu_unai.config.ilace_force; + +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + // s_invTable + for(int i=1;i<=(1<>1); +#else + v *= double(0x80000000); +#endif + s_invTable[i-1]=s32(v); + } +#endif + + SetupLightLUT(); + SetupDitheringConstants(); + + if (gpu_unai.config.scale_hires) { + map_downscale_buffer(); + } + + return 0; } void renderer_finish(void) { + unmap_downscale_buffer(); } void renderer_notify_res_change(void) +{ + if (PixelSkipEnabled()) { + // Set blit_mask for high horizontal resolutions. This allows skipping + // rendering pixels that would never get displayed on low-resolution + // platforms that use simple pixel-dropping scaler. + + switch (gpu.screen.hres) + { + case 512: gpu_unai.blit_mask = 0xa4; break; // GPU_BlitWWSWWSWS + case 640: gpu_unai.blit_mask = 0xaa; break; // GPU_BlitWS + default: gpu_unai.blit_mask = 0; break; + } + } else { + gpu_unai.blit_mask = 0; + } + + if (LineSkipEnabled()) { + // Set rendering line-skip (only render every other line in high-res + // 480 vertical mode, or, optionally, force it for all video modes) + + if (gpu.screen.vres == 480) { + if (gpu_unai.config.ilace_force) { + gpu_unai.ilace_mask = 3; // Only need 1/4 of lines + } else { + gpu_unai.ilace_mask = 1; // Only need 1/2 of lines + } + } else { + // Vert resolution changed from 480 to lower one + gpu_unai.ilace_mask = gpu_unai.config.ilace_force; + } + } else { + gpu_unai.ilace_mask = 0; + } + + /* + printf("res change hres: %d vres: %d depth: %d ilace_mask: %d\n", + gpu.screen.hres, gpu.screen.vres, (gpu.status & PSX_GPU_STATUS_RGB24) ? 24 : 15, + gpu_unai.ilace_mask); + */ +} + +void renderer_notify_scanout_change(int x, int y) { } -void renderer_notify_scanout_x_change(int x, int w) +#ifdef USE_GPULIB +// Handles GP0 draw settings commands 0xE1...0xE6 +static void gpuGP0Cmd_0xEx(gpu_unai_t &gpu_unai, u32 cmd_word) { + // Assume incoming GP0 command is 0xE1..0xE6, convert to 1..6 + u8 num = (cmd_word >> 24) & 7; + gpu.ex_regs[num] = cmd_word; // Update gpulib register + switch (num) { + case 1: { + // GP0(E1h) - Draw Mode setting (aka "Texpage") + u32 cur_texpage = gpu_unai.GPU_GP1 & 0x7FF; + u32 new_texpage = cmd_word & 0x7FF; + if (cur_texpage != new_texpage) { + gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 & ~0x7FF) | new_texpage; + gpuSetTexture(gpu_unai.GPU_GP1); + } + } break; + + case 2: { + // GP0(E2h) - Texture Window setting + if (cmd_word != gpu_unai.TextureWindowCur) { + static const u8 TextureMask[32] = { + 255, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7, + 127, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7 + }; + gpu_unai.TextureWindowCur = cmd_word; + gpu_unai.TextureWindow[0] = ((cmd_word >> 10) & 0x1F) << 3; + gpu_unai.TextureWindow[1] = ((cmd_word >> 15) & 0x1F) << 3; + gpu_unai.TextureWindow[2] = TextureMask[(cmd_word >> 0) & 0x1F]; + gpu_unai.TextureWindow[3] = TextureMask[(cmd_word >> 5) & 0x1F]; + gpu_unai.TextureWindow[0] &= ~gpu_unai.TextureWindow[2]; + gpu_unai.TextureWindow[1] &= ~gpu_unai.TextureWindow[3]; + + // Inner loop vars must be updated whenever texture window is changed: + const u32 fb = FIXED_BITS; // # of fractional fixed-pt bits of u4/v4 + gpu_unai.u_msk = (((u32)gpu_unai.TextureWindow[2]) << fb) | ((1 << fb) - 1); + gpu_unai.v_msk = (((u32)gpu_unai.TextureWindow[3]) << fb) | ((1 << fb) - 1); + + gpuSetTexture(gpu_unai.GPU_GP1); + } + } break; + + case 3: { + // GP0(E3h) - Set Drawing Area top left (X1,Y1) + gpu_unai.DrawingArea[0] = cmd_word & 0x3FF; + gpu_unai.DrawingArea[1] = (cmd_word >> 10) & 0x3FF; + } break; + + case 4: { + // GP0(E4h) - Set Drawing Area bottom right (X2,Y2) + gpu_unai.DrawingArea[2] = (cmd_word & 0x3FF) + 1; + gpu_unai.DrawingArea[3] = ((cmd_word >> 10) & 0x3FF) + 1; + } break; + + case 5: { + // GP0(E5h) - Set Drawing Offset (X,Y) + gpu_unai.DrawingOffset[0] = ((s32)cmd_word<<(32-11))>>(32-11); + gpu_unai.DrawingOffset[1] = ((s32)cmd_word<<(32-22))>>(32-11); + } break; + + case 6: { + // GP0(E6h) - Mask Bit Setting + gpu_unai.Masking = (cmd_word & 0x2) << 1; + gpu_unai.PixelMSB = (cmd_word & 0x1) << 8; + } break; + } } +#endif +#include "../gpulib/gpu_timing.h" extern const unsigned char cmd_lengths[256]; -int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) +int do_cmd_list(u32 *list_, int list_len, + int *cycles_sum_out, int *cycles_last, int *last_cmd) { - unsigned int cmd = 0, len, i; - unsigned int *list_start = list; - unsigned int *list_end = list + list_len; + int cpu_cycles_sum = 0, cpu_cycles = *cycles_last; + u32 cmd = 0, len, i; + le32_t *list = (le32_t *)list_; + le32_t *list_start = list; + le32_t *list_end = list + list_len; + + //TODO: set ilace_mask when resolution changes instead of every time, + // eliminate #ifdef below. + gpu_unai.ilace_mask = gpu_unai.config.ilace_force; - linesInterlace = force_interlace; #ifdef HAVE_PRE_ARMV7 /* XXX */ - linesInterlace |= !!(gpu.status & PSX_GPU_STATUS_INTERLACE); + gpu_unai.ilace_mask |= !!(gpu.status & PSX_GPU_STATUS_INTERLACE); #endif + if (gpu_unai.config.scale_hires) { + gpu_unai.ilace_mask |= !!(gpu.status & PSX_GPU_STATUS_INTERLACE); + } for (; list < list_end; list += 1 + len) { - cmd = *list >> 24; + cmd = le32_to_u32(*list) >> 24; len = cmd_lengths[cmd]; if (list + 1 + len > list_end) { cmd = -1; @@ -190,258 +423,370 @@ int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) } #define PRIM cmd - PacketBuffer.U4[0] = list[0]; + gpu_unai.PacketBuffer.U4[0] = list[0]; for (i = 1; i <= len; i++) - PacketBuffer.U4[i] = list[i]; + gpu_unai.PacketBuffer.U4[i] = list[i]; + + PtrUnion packet = { .ptr = (void*)&gpu_unai.PacketBuffer }; switch (cmd) { case 0x02: - gpuClearImage(); + gpuClearImage(packet); + gput_sum(cpu_cycles_sum, cpu_cycles, + gput_fill(le16_to_s16(packet.U2[4]) & 0x3ff, le16_to_s16(packet.U2[5]) & 0x1ff)); break; case 0x20: case 0x21: case 0x22: - case 0x23: - gpuDrawF3(gpuPolySpanDrivers [Blending_Mode | Masking | Blending | PixelMSB]); - break; + case 0x23: { // Monochrome 3-pt poly + PP driver = gpuPolySpanDrivers[ + (gpu_unai.blit_mask?1024:0) | + Blending_Mode | + gpu_unai.Masking | Blending | gpu_unai.PixelMSB + ]; + gpuDrawPolyF(packet, driver, false); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base()); + } break; case 0x24: case 0x25: case 0x26: - case 0x27: - gpuSetCLUT (PacketBuffer.U4[2] >> 16); - gpuSetTexture(PacketBuffer.U4[4] >> 16); - if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F)) - gpuDrawFT3(gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | PixelMSB]); - else - gpuDrawFT3(gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | PixelMSB]); - break; + case 0x27: { // Textured 3-pt poly + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + gpuSetTexture(le32_to_u32(gpu_unai.PacketBuffer.U4[4]) >> 16); + + u32 driver_idx = + (gpu_unai.blit_mask?1024:0) | + Dithering | + Blending_Mode | gpu_unai.TEXT_MODE | + gpu_unai.Masking | Blending | gpu_unai.PixelMSB; + + if (!FastLightingEnabled()) { + driver_idx |= Lighting; + } else { + if (!((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))) + driver_idx |= Lighting; + } + + PP driver = gpuPolySpanDrivers[driver_idx]; + gpuDrawPolyFT(packet, driver, false); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t()); + } break; case 0x28: case 0x29: case 0x2A: - case 0x2B: { - const PP gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | Masking | Blending | PixelMSB]; - gpuDrawF3(gpuPolySpanDriver); - PacketBuffer.U4[1] = PacketBuffer.U4[4]; - gpuDrawF3(gpuPolySpanDriver); - break; - } + case 0x2B: { // Monochrome 4-pt poly + PP driver = gpuPolySpanDrivers[ + (gpu_unai.blit_mask?1024:0) | + Blending_Mode | + gpu_unai.Masking | Blending | gpu_unai.PixelMSB + ]; + gpuDrawPolyF(packet, driver, true); // is_quad = true + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base()); + } break; case 0x2C: case 0x2D: case 0x2E: - case 0x2F: { - gpuSetCLUT (PacketBuffer.U4[2] >> 16); - gpuSetTexture(PacketBuffer.U4[4] >> 16); - PP gpuPolySpanDriver; - if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F)) - gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | PixelMSB]; - else - gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | PixelMSB]; - gpuDrawFT3(gpuPolySpanDriver); - PacketBuffer.U4[1] = PacketBuffer.U4[7]; - PacketBuffer.U4[2] = PacketBuffer.U4[8]; - gpuDrawFT3(gpuPolySpanDriver); - break; - } + case 0x2F: { // Textured 4-pt poly + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + gpuSetTexture(le32_to_u32(gpu_unai.PacketBuffer.U4[4]) >> 16); + + u32 driver_idx = + (gpu_unai.blit_mask?1024:0) | + Dithering | + Blending_Mode | gpu_unai.TEXT_MODE | + gpu_unai.Masking | Blending | gpu_unai.PixelMSB; + + if (!FastLightingEnabled()) { + driver_idx |= Lighting; + } else { + if (!((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))) + driver_idx |= Lighting; + } + + PP driver = gpuPolySpanDrivers[driver_idx]; + gpuDrawPolyFT(packet, driver, true); // is_quad = true + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t()); + } break; case 0x30: case 0x31: case 0x32: - case 0x33: - gpuDrawG3(gpuPolySpanDrivers [Blending_Mode | Masking | Blending | 129 | PixelMSB]); - break; + case 0x33: { // Gouraud-shaded 3-pt poly + //NOTE: The '129' here is CF_GOURAUD | CF_LIGHT, however + // this is an untextured poly, so CF_LIGHT (texture blend) + // shouldn't apply. Until the original array of template + // instantiation ptrs is fixed, we're stuck with this. (TODO) + PP driver = gpuPolySpanDrivers[ + (gpu_unai.blit_mask?1024:0) | + Dithering | + Blending_Mode | + gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB + ]; + gpuDrawPolyG(packet, driver, false); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g()); + } break; case 0x34: case 0x35: case 0x36: - case 0x37: - gpuSetCLUT (PacketBuffer.U4[2] >> 16); - gpuSetTexture (PacketBuffer.U4[5] >> 16); - gpuDrawGT3(gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | ((Lighting)?129:0) | PixelMSB]); - break; + case 0x37: { // Gouraud-shaded, textured 3-pt poly + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16); + PP driver = gpuPolySpanDrivers[ + (gpu_unai.blit_mask?1024:0) | + Dithering | + Blending_Mode | gpu_unai.TEXT_MODE | + gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB + ]; + gpuDrawPolyGT(packet, driver, false); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt()); + } break; case 0x38: case 0x39: case 0x3A: - case 0x3B: { - const PP gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | Masking | Blending | 129 | PixelMSB]; - gpuDrawG3(gpuPolySpanDriver); - PacketBuffer.U4[0] = PacketBuffer.U4[6]; - PacketBuffer.U4[1] = PacketBuffer.U4[7]; - gpuDrawG3(gpuPolySpanDriver); - break; - } + case 0x3B: { // Gouraud-shaded 4-pt poly + // See notes regarding '129' for 0x30..0x33 further above -senquack + PP driver = gpuPolySpanDrivers[ + (gpu_unai.blit_mask?1024:0) | + Dithering | + Blending_Mode | + gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB + ]; + gpuDrawPolyG(packet, driver, true); // is_quad = true + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g()); + } break; case 0x3C: case 0x3D: case 0x3E: - case 0x3F: { - gpuSetCLUT (PacketBuffer.U4[2] >> 16); - gpuSetTexture (PacketBuffer.U4[5] >> 16); - const PP gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | ((Lighting)?129:0) | PixelMSB]; - gpuDrawGT3(gpuPolySpanDriver); - PacketBuffer.U4[0] = PacketBuffer.U4[9]; - PacketBuffer.U4[1] = PacketBuffer.U4[10]; - PacketBuffer.U4[2] = PacketBuffer.U4[11]; - gpuDrawGT3(gpuPolySpanDriver); - break; - } + case 0x3F: { // Gouraud-shaded, textured 4-pt poly + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16); + PP driver = gpuPolySpanDrivers[ + (gpu_unai.blit_mask?1024:0) | + Dithering | + Blending_Mode | gpu_unai.TEXT_MODE | + gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB + ]; + gpuDrawPolyGT(packet, driver, true); // is_quad = true + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt()); + } break; case 0x40: case 0x41: case 0x42: - case 0x43: - gpuDrawLF(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]); - break; - - case 0x48 ... 0x4F: - { + case 0x43: { // Monochrome line + // Shift index right by one, as untextured prims don't use lighting + u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; + PSD driver = gpuPixelSpanDrivers[driver_idx]; + gpuDrawLineF(packet, driver); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); + } break; + + case 0x48 ... 0x4F: { // Monochrome line strip u32 num_vertexes = 1; - u32 *list_position = &(list[2]); + le32_t *list_position = &list[2]; - gpuDrawLF(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]); + // Shift index right by one, as untextured prims don't use lighting + u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; + PSD driver = gpuPixelSpanDrivers[driver_idx]; + gpuDrawLineF(packet, driver); while(1) { - PacketBuffer.U4[1] = PacketBuffer.U4[2]; - PacketBuffer.U4[2] = *list_position++; - gpuDrawLF(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]); + gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[2]; + gpu_unai.PacketBuffer.U4[2] = *list_position++; + gpuDrawLineF(packet, driver); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); num_vertexes++; if(list_position >= list_end) { cmd = -1; goto breakloop; } - if((*list_position & 0xf000f000) == 0x50005000) + if((le32_raw(*list_position) & HTOLE32(0xf000f000)) == HTOLE32(0x50005000)) break; } len += (num_vertexes - 2); - break; - } + } break; case 0x50: case 0x51: case 0x52: - case 0x53: - gpuDrawLG(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]); - break; - - case 0x58 ... 0x5F: - { + case 0x53: { // Gouraud-shaded line + // Shift index right by one, as untextured prims don't use lighting + u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; + // Index MSB selects Gouraud-shaded PixelSpanDriver: + driver_idx |= (1 << 5); + PSD driver = gpuPixelSpanDrivers[driver_idx]; + gpuDrawLineG(packet, driver); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); + } break; + + case 0x58 ... 0x5F: { // Gouraud-shaded line strip u32 num_vertexes = 1; - u32 *list_position = &(list[2]); + le32_t *list_position = &list[2]; - gpuDrawLG(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]); + // Shift index right by one, as untextured prims don't use lighting + u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; + // Index MSB selects Gouraud-shaded PixelSpanDriver: + driver_idx |= (1 << 5); + PSD driver = gpuPixelSpanDrivers[driver_idx]; + gpuDrawLineG(packet, driver); while(1) { - PacketBuffer.U4[0] = PacketBuffer.U4[2]; - PacketBuffer.U4[1] = PacketBuffer.U4[3]; - PacketBuffer.U4[2] = *list_position++; - PacketBuffer.U4[3] = *list_position++; - gpuDrawLG(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]); + gpu_unai.PacketBuffer.U4[0] = gpu_unai.PacketBuffer.U4[2]; + gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[3]; + gpu_unai.PacketBuffer.U4[2] = *list_position++; + gpu_unai.PacketBuffer.U4[3] = *list_position++; + gpuDrawLineG(packet, driver); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); num_vertexes++; if(list_position >= list_end) { cmd = -1; goto breakloop; } - if((*list_position & 0xf000f000) == 0x50005000) + if((le32_raw(*list_position) & HTOLE32(0xf000f000)) == HTOLE32(0x50005000)) break; } len += (num_vertexes - 2) * 2; - break; - } + } break; case 0x60: case 0x61: case 0x62: - case 0x63: - gpuDrawT(gpuTileSpanDrivers [Blending_Mode | Masking | Blending | (PixelMSB>>3)]); - break; + case 0x63: { // Monochrome rectangle (variable size) + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; + s32 w = 0, h = 0; + gpuDrawT(packet, driver, &w, &h); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h)); + } break; case 0x64: case 0x65: case 0x66: - case 0x67: - gpuSetCLUT (PacketBuffer.U4[2] >> 16); - gpuSetTexture (GPU_GP1); - if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F)) - gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | (enableAbbeyHack<<7) | PixelMSB]); - else - gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | (enableAbbeyHack<<7) | PixelMSB]); - break; + case 0x67: { // Textured rectangle (variable size) + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); + s32 w = 0, h = 0; + + //senquack - Only color 808080h-878787h allows skipping lighting calculation: + // This fixes Silent Hill running animation on loading screens: + // (On PSX, color values 0x00-0x7F darken the source texture's color, + // 0x81-FF lighten textures (ultimately clamped to 0x1F), + // 0x80 leaves source texture color unchanged, HOWEVER, + // gpu_unai uses a simple lighting LUT whereby only the upper + // 5 bits of an 8-bit color are used, so 0x80-0x87 all behave as + // 0x80. + // + // NOTE: I've changed all textured sprite draw commands here and + // elsewhere to use proper behavior, but left poly commands + // alone, I don't want to slow rendering down too much. (TODO) + //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) + // Strip lower 3 bits of each color and determine if lighting should be used: + if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080)) + driver_idx |= Lighting; + PS driver = gpuSpriteSpanDrivers[driver_idx]; + gpuDrawS(packet, driver, &w, &h); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h)); + } break; case 0x68: case 0x69: case 0x6A: - case 0x6B: - PacketBuffer.U4[2] = 0x00010001; - gpuDrawT(gpuTileSpanDrivers [Blending_Mode | Masking | Blending | (PixelMSB>>3)]); - break; + case 0x6B: { // Monochrome rectangle (1x1 dot) + gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00010001); + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; + s32 w = 0, h = 0; + gpuDrawT(packet, driver, &w, &h); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1)); + } break; case 0x70: case 0x71: case 0x72: - case 0x73: - PacketBuffer.U4[2] = 0x00080008; - gpuDrawT(gpuTileSpanDrivers [Blending_Mode | Masking | Blending | (PixelMSB>>3)]); - break; + case 0x73: { // Monochrome rectangle (8x8) + gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00080008); + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; + s32 w = 0, h = 0; + gpuDrawT(packet, driver, &w, &h); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h)); + } break; case 0x74: case 0x75: case 0x76: - case 0x77: - PacketBuffer.U4[3] = 0x00080008; - gpuSetCLUT (PacketBuffer.U4[2] >> 16); - gpuSetTexture (GPU_GP1); - if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F)) - gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | (enableAbbeyHack<<7) | PixelMSB]); - else - gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | (enableAbbeyHack<<7) | PixelMSB]); - break; + case 0x77: { // Textured rectangle (8x8) + gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00080008); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); + s32 w = 0, h = 0; + + //senquack - Only color 808080h-878787h allows skipping lighting calculation: + //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) + // Strip lower 3 bits of each color and determine if lighting should be used: + if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080)) + driver_idx |= Lighting; + PS driver = gpuSpriteSpanDrivers[driver_idx]; + gpuDrawS(packet, driver, &w, &h); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h)); + } break; case 0x78: case 0x79: case 0x7A: - case 0x7B: - PacketBuffer.U4[2] = 0x00100010; - gpuDrawT(gpuTileSpanDrivers [Blending_Mode | Masking | Blending | (PixelMSB>>3)]); - break; + case 0x7B: { // Monochrome rectangle (16x16) + gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00100010); + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; + s32 w = 0, h = 0; + gpuDrawT(packet, driver, &w, &h); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h)); + } break; case 0x7C: case 0x7D: #ifdef __arm__ - if ((GPU_GP1 & 0x180) == 0 && (Masking | PixelMSB) == 0) + if ((gpu_unai.GPU_GP1 & 0x180) == 0 && (gpu_unai.Masking | gpu_unai.PixelMSB) == 0) { - gpuSetCLUT (PacketBuffer.U4[2] >> 16); - gpuSetTexture (GPU_GP1); - gpuDrawS16(); + s32 w = 0, h = 0; + gpuSetCLUT(le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + gpuDrawS16(packet, &w, &h); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h)); break; } // fallthrough #endif case 0x7E: - case 0x7F: - PacketBuffer.U4[3] = 0x00100010; - gpuSetCLUT (PacketBuffer.U4[2] >> 16); - gpuSetTexture (GPU_GP1); - if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F)) - gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | (enableAbbeyHack<<7) | PixelMSB]); - else - gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | (enableAbbeyHack<<7) | PixelMSB]); - break; + case 0x7F: { // Textured rectangle (16x16) + gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00100010); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); + s32 w = 0, h = 0; + //senquack - Only color 808080h-878787h allows skipping lighting calculation: + //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) + // Strip lower 3 bits of each color and determine if lighting should be used: + if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080)) + driver_idx |= Lighting; + PS driver = gpuSpriteSpanDrivers[driver_idx]; + gpuDrawS(packet, driver, &w, &h); + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h)); + } break; +#ifdef TEST case 0x80: // vid -> vid - gpuMoveImage(); // prim handles updateLace && skip + gpuMoveImage(packet); break; -#ifdef TEST + case 0xA0: // sys -> vid { u32 load_width = list[2] & 0xffff; @@ -449,79 +794,38 @@ int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) u32 load_size = load_width * load_height; len += load_size / 2; - break; - } + } break; + case 0xC0: break; #else - case 0xA0: // sys ->vid - case 0xC0: // vid -> sys + case 0x1F: // irq? + case 0x80 ... 0x9F: // vid -> vid + case 0xA0 ... 0xBF: // sys -> vid + case 0xC0 ... 0xDF: // vid -> sys + // Handled by gpulib goto breakloop; #endif - case 0xE1: { - const u32 temp = PacketBuffer.U4[0]; - GPU_GP1 = (GPU_GP1 & ~0x000007FF) | (temp & 0x000007FF); - gpuSetTexture(temp); - gpu.ex_regs[1] = temp; - break; - } - case 0xE2: { - static const u8 TextureMask[32] = { - 255, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7, - 127, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7 - }; - const u32 temp = PacketBuffer.U4[0]; - TextureWindow[0] = ((temp >> 10) & 0x1F) << 3; - TextureWindow[1] = ((temp >> 15) & 0x1F) << 3; - TextureWindow[2] = TextureMask[(temp >> 0) & 0x1F]; - TextureWindow[3] = TextureMask[(temp >> 5) & 0x1F]; - gpuSetTexture(GPU_GP1); - gpu.ex_regs[2] = temp; - break; - } - case 0xE3: { - const u32 temp = PacketBuffer.U4[0]; - DrawingArea[0] = temp & 0x3FF; - DrawingArea[1] = (temp >> 10) & 0x3FF; - gpu.ex_regs[3] = temp; - break; - } - case 0xE4: { - const u32 temp = PacketBuffer.U4[0]; - DrawingArea[2] = (temp & 0x3FF) + 1; - DrawingArea[3] = ((temp >> 10) & 0x3FF) + 1; - gpu.ex_regs[4] = temp; - break; - } - case 0xE5: { - const u32 temp = PacketBuffer.U4[0]; - DrawingOffset[0] = ((s32)temp<<(32-11))>>(32-11); - DrawingOffset[1] = ((s32)temp<<(32-22))>>(32-11); - gpu.ex_regs[5] = temp; - break; - } - case 0xE6: { - const u32 temp = PacketBuffer.U4[0]; - Masking = (temp & 0x2) << 1; - PixelMSB =(temp & 0x1) << 8; - gpu.ex_regs[6] = temp; - break; - } + case 0xE1 ... 0xE6: { // Draw settings + gpuGP0Cmd_0xEx(gpu_unai, le32_to_u32(gpu_unai.PacketBuffer.U4[0])); + } break; } } breakloop: gpu.ex_regs[1] &= ~0x1ff; - gpu.ex_regs[1] |= GPU_GP1 & 0x1ff; + gpu.ex_regs[1] |= gpu_unai.GPU_GP1 & 0x1ff; + *cycles_sum_out += cpu_cycles_sum; + *cycles_last = cpu_cycles; *last_cmd = cmd; return list - list_start; } -void renderer_sync_ecmds(uint32_t *ecmds) +void renderer_sync_ecmds(u32 *ecmds) { int dummy; - do_cmd_list(&ecmds[1], 6, &dummy); + do_cmd_list(&ecmds[1], 6, &dummy, &dummy, &dummy); } void renderer_update_caches(int x, int y, int w, int h, int state_changed) @@ -536,20 +840,33 @@ void renderer_set_interlace(int enable, int is_odd) { } -#ifndef TEST - #include "../../frontend/plugin_lib.h" - +// Handle any gpulib settings applicable to gpu_unai: void renderer_set_config(const struct rearmed_cbs *cbs) { - force_interlace = cbs->gpu_unai.lineskip; - enableAbbeyHack = cbs->gpu_unai.abe_hack; - light = !cbs->gpu_unai.no_light; - blend = !cbs->gpu_unai.no_blend; + gpu_unai.vram = (le16_t *)gpu.vram; + gpu_unai.config.ilace_force = cbs->gpu_unai.ilace_force; + gpu_unai.config.pixel_skip = cbs->gpu_unai.pixel_skip; + gpu_unai.config.lighting = cbs->gpu_unai.lighting; + gpu_unai.config.fast_lighting = cbs->gpu_unai.fast_lighting; + gpu_unai.config.blending = cbs->gpu_unai.blending; + gpu_unai.config.dithering = cbs->gpu_unai.dithering; + gpu_unai.config.scale_hires = cbs->gpu_unai.scale_hires; + + gpu.state.downscale_enable = gpu_unai.config.scale_hires; + if (gpu_unai.config.scale_hires) { + map_downscale_buffer(); + } else { + unmap_downscale_buffer(); + } +} - GPU_FrameBuffer = (u16 *)gpu.vram; +void renderer_sync(void) +{ } -#endif +void renderer_notify_update_lace(int updated) +{ +} // vim:shiftwidth=2:expandtab