X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=plugins%2Fgpu_neon%2Fpsx_gpu_if.c;h=30faee256b06e92b525f95140050367fd7359c77;hb=308c6e678a2f0a56a9dee35307070550354f580c;hp=ca76fe2498af08e9b80b1cee4548e82cadb99eff;hpb=50f9355a2338111d940ed408f52fe1defe4df23e;p=pcsx_rearmed.git

diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c
index ca76fe24..30faee25 100644
--- a/plugins/gpu_neon/psx_gpu_if.c
+++ b/plugins/gpu_neon/psx_gpu_if.c
@@ -15,6 +15,7 @@ extern const unsigned char cmd_lengths[256];
 #define command_lengths cmd_lengths
 
 static unsigned int *ex_regs;
+static int initialized;
 
 #define PCSX
 #define SET_Ex(r, v) \
@@ -30,76 +31,106 @@ int do_cmd_list(uint32_t *list, int count, int *last_cmd)
 {
   int ret;
 
+#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
+  // the asm doesn't bother to save callee-save vector regs, so do it here
+  __asm__ __volatile__("":::"q4","q5","q6","q7");
+#endif
+
   if (gpu.state.enhancement_active)
     ret = gpu_parse_enhanced(&egpu, list, count * 4, (u32 *)last_cmd);
   else
     ret = gpu_parse(&egpu, list, count * 4, (u32 *)last_cmd);
 
+#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
+  __asm__ __volatile__("":::"q4","q5","q6","q7");
+#endif
+
   ex_regs[1] &= ~0x1ff;
   ex_regs[1] |= egpu.texture_settings & 0x1ff;
   return ret;
 }
 
-#define ENHANCEMENT_BUF_SIZE (1024 * 1024 * 2 * 4 + 4096)
+#define ENHANCEMENT_BUF_SIZE (1024 * 1024 * 2 * 4 + 4096 * 2)
 
-int renderer_init(void)
+static void *get_enhancement_bufer(int *x, int *y, int *w, int *h,
+ int *vram_h)
 {
-  initialize_psx_gpu(&egpu, gpu.vram);
-  ex_regs = gpu.ex_regs;
+  uint16_t *ret = select_enhancement_buf_ptr(&egpu, *x);
+
+  *x *= 2;
+  *y *= 2;
+  *w = *w * 2;
+  *h = *h * 2;
+  *vram_h = 1024;
+  return ret;
+}
+
+static void map_enhancement_buffer(void)
+{
+  // currently we use 4x 1024*1024 buffers instead of single 2048*1024
+  // to be able to reuse 1024-width code better (triangle setup,
+  // dithering phase, lines).
+  egpu.enhancement_buf_ptr = gpu.mmap(ENHANCEMENT_BUF_SIZE);
+  if (egpu.enhancement_buf_ptr == NULL) {
+    fprintf(stderr, "failed to map enhancement buffer\n");
+    gpu.get_enhancement_bufer = NULL;
+  }
+  else {
+    egpu.enhancement_buf_ptr += 4096 / 2;
+    gpu.get_enhancement_bufer = get_enhancement_bufer;
+  }
+}
 
-  if (gpu.enhancement_bufer == NULL) {
-    // currently we use 4x 1024*1024 buffers instead of single 2048*1024
-    // to be able to reuse 1024-width code better (triangle setup,
-    // dithering phase, lines).
-    gpu.enhancement_bufer = mmap(NULL, ENHANCEMENT_BUF_SIZE,
-      PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-    if (gpu.enhancement_bufer == MAP_FAILED) {
-      printf("OOM for enhancement buffer\n");
-      gpu.enhancement_bufer = NULL;
-    }
+int renderer_init(void)
+{
+  if (gpu.vram != NULL) {
+    initialize_psx_gpu(&egpu, gpu.vram);
+    initialized = 1;
   }
-  egpu.enhancement_buf_ptr = gpu.enhancement_bufer;
 
+  if (gpu.mmap != NULL && egpu.enhancement_buf_ptr == NULL)
+    map_enhancement_buffer();
+
+  ex_regs = gpu.ex_regs;
   return 0;
 }
 
 void renderer_finish(void)
 {
-  if (gpu.enhancement_bufer != NULL)
-    munmap(gpu.enhancement_bufer, ENHANCEMENT_BUF_SIZE);
-  gpu.enhancement_bufer = NULL;
+  if (egpu.enhancement_buf_ptr != NULL) {
+    egpu.enhancement_buf_ptr -= 4096 / 2;
+    gpu.munmap(egpu.enhancement_buf_ptr, ENHANCEMENT_BUF_SIZE);
+  }
   egpu.enhancement_buf_ptr = NULL;
+  egpu.enhancement_current_buf_ptr = NULL;
+  initialized = 0;
 }
 
 static __attribute__((noinline)) void
 sync_enhancement_buffers(int x, int y, int w, int h)
 {
-  int xt = egpu.enhancement_x_threshold;
+  const int step_x = 1024 / sizeof(egpu.enhancement_buf_by_x16);
   u16 *src, *dst;
-  int wb, i;
+  int w1, fb_index;
 
-  w += x & 7;
-  x &= ~7;
-  w = (w + 7) & ~7;
+  w += x & (step_x - 1);
+  x &= ~(step_x - 1);
+  w = (w + step_x - 1) & ~(step_x - 1);
   if (y + h > 512)
     h = 512 - y;
 
-  for (i = 0; i < 4 && w > 0; i++) {
-    if (x < 512) {
-      wb = w;
-      if (x + w > 512)
-        wb = 512 - x;
-      src = gpu.vram + xt * i + y * 1024 + x;
-      dst = egpu.enhancement_buf_ptr +
-        (1024*1024 + xt * 2) * i + (y * 1024 + x) * 2;
-      scale2x_tiles8(dst, src, wb / 8, h);
-    }
-
-    x -= xt;
-    if (x < 0) {
-      w += x;
-      x = 0;
-    }
+  while (w > 0) {
+    fb_index = egpu.enhancement_buf_by_x16[x / step_x];
+    for (w1 = 0; w > 0; w1++, w -= step_x)
+      if (fb_index != egpu.enhancement_buf_by_x16[x / step_x + w1])
+        break;
+
+    src = gpu.vram + y * 1024 + x;
+    dst = select_enhancement_buf_ptr(&egpu, x);
+    dst += (y * 1024 + x) * 2;
+    scale2x_tiles8(dst, src, w1 * step_x / 8, h);
+
+    x += w1 * step_x;
   }
 }
 
@@ -111,7 +142,7 @@ void renderer_sync_ecmds(uint32_t *ecmds)
 void renderer_update_caches(int x, int y, int w, int h)
 {
   update_texture_cache_region(&egpu, x, y, x + w - 1, y + h - 1);
-  if (gpu.state.enhancement_active && !gpu.status.rgb24)
+  if (gpu.state.enhancement_active && !(gpu.status & PSX_GPU_STATUS_RGB24))
     sync_enhancement_buffers(x, y, w, h);
 }
 
@@ -132,7 +163,11 @@ void renderer_set_interlace(int enable, int is_odd)
 void renderer_notify_res_change(void)
 {
   // note: must keep it multiple of 8
-  egpu.enhancement_x_threshold = gpu.screen.hres;
+  if (egpu.enhancement_x_threshold != gpu.screen.hres)
+  {
+    egpu.enhancement_x_threshold = gpu.screen.hres;
+    update_enhancement_buf_table_from_hres(&egpu);
+  }
 }
 
 #include "../../frontend/plugin_lib.h"
@@ -148,4 +183,14 @@ void renderer_set_config(const struct rearmed_cbs *cbs)
     sync_enhancement_buffers(0, 0, 1024, 512);
   }
   enhancement_was_on = cbs->gpu_neon.enhancement_enable;
+
+  if (!initialized) {
+    initialize_psx_gpu(&egpu, gpu.vram);
+    initialized = 1;
+  }
+
+  if (gpu.mmap != NULL && egpu.enhancement_buf_ptr == NULL)
+    map_enhancement_buffer();
+  if (cbs->pl_set_gpu_caps)
+    cbs->pl_set_gpu_caps(GPU_CAP_SUPPORTS_2X);
 }