Add a threaded renderer
authorJustin Weiss <justin@justinweiss.com>
Mon, 14 Sep 2020 03:06:57 +0000 (20:06 -0700)
committerJustin Weiss <justin@justinweiss.com>
Fri, 23 Oct 2020 01:07:00 +0000 (18:07 -0700)
This change adds a gpulib implementation that accepts GPU commands and
runs them through a real gpulib implementation on a thread. Depending
on a setting, it can either force a sync every frame, or continue to
work until the next frame arrives.

15 files changed:
Makefile
Makefile.libretro
frontend/libretro.c
frontend/libretro_core_options.h
frontend/main.c
frontend/menu.c
frontend/plugin_lib.h
plugins/dfxvideo/gpulib_if.c
plugins/gpu-gles/gpulib_if.c
plugins/gpu_neon/psx_gpu_if.c
plugins/gpu_unai/gpulib_if.cpp
plugins/gpulib/gpu.c
plugins/gpulib/gpu.h
plugins/gpulib/gpulib_thread_if.c [new file with mode: 0644]
plugins/gpulib/gpulib_thread_if.h [new file with mode: 0644]

index 1496f4e..d6cb946 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -184,6 +184,10 @@ endif
 ifeq "$(BUILTIN_GPU)" "unai"
 CFLAGS += -DGPU_UNAI
 CFLAGS += -DUSE_GPULIB=1
+ifeq "$(THREAD_RENDERING)" "1"
+CFLAGS += -DTHREAD_RENDERING
+OBJS += plugins/gpulib/gpulib_thread_if.o
+endif
 #CFLAGS += -DINLINE="static __inline__"
 #CFLAGS += -Dasm="__asm__ __volatile__"
 OBJS += plugins/gpu_unai/gpulib_if.o
index 741ecfa..e58d63d 100644 (file)
@@ -217,6 +217,7 @@ else ifeq ($(platform), ctr)
 
 #      CFLAGS += -DPCSX
        BUILTIN_GPU = unai
+       THREAD_RENDERING = 1
        DYNAREC = ari64
        DRC_CACHE_BASE = 0
        ARCH = arm
index 93bfc28..323932a 100644 (file)
@@ -1818,6 +1818,21 @@ static void update_variables(bool in_flight)
          Config.SpuIrq = 1;
    }
 
+#ifdef THREAD_RENDERING
+   var.key = "pcsx_rearmed_gpu_thread_rendering";
+   var.value = NULL;
+
+   if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
+   {
+      if (strcmp(var.value, "disabled") == 0)
+         pl_rearmed_cbs.thread_rendering = THREAD_RENDERING_OFF;
+      else if (strcmp(var.value, "sync") == 0)
+         pl_rearmed_cbs.thread_rendering = THREAD_RENDERING_SYNC;
+      else if (strcmp(var.value, "async") == 0)
+         pl_rearmed_cbs.thread_rendering = THREAD_RENDERING_ASYNC;
+   }
+#endif
+
 #ifdef GPU_PEOPS
    var.value = NULL;
    var.key = "pcsx_rearmed_gpu_peops_odd_even_bit";
@@ -2031,7 +2046,7 @@ static void update_variables(bool in_flight)
             "pcsx_rearmed_gpu_unai_fast_lighting",
             "pcsx_rearmed_gpu_unai_ilace_force",
             "pcsx_rearmed_gpu_unai_pixel_skip",
-            "pcsx_rearmed_gpu_unai_scale_hires"
+            "pcsx_rearmed_gpu_unai_scale_hires",
          };
 
          option_display.visible = show_advanced_gpu_unai_settings;
index 941bd81..1075e8f 100644 (file)
@@ -972,6 +972,20 @@ struct retro_core_option_definition option_defs_us[] = {
       },
       "disabled",
    },
+#ifdef THREAD_RENDERING
+   {
+      "pcsx_rearmed_gpu_thread_rendering",
+      "Threaded Rendering",
+      "When enabled, runs GPU commands in a thread. Sync waits for drawing to finish before vsync. Async will not wait unless there's another frame behind it.",
+      {
+         { "disabled", NULL },
+         { "sync",  NULL },
+         { "async",  NULL },
+         { NULL, NULL},
+      },
+      "disabled",
+   },
+#endif
 #endif /* GPU UNAI Advanced Settings */
 
    {
index 51cb7bf..d3c7d40 100644 (file)
@@ -125,6 +125,8 @@ void emu_set_default_config(void)
        Config.SpuIrq = Config.RCntFix = Config.VSyncWA = 0;
        Config.PsxAuto = 1;
 
+       pl_rearmed_cbs.thread_rendering = 0;
+
        pl_rearmed_cbs.gpu_neon.allow_interlace = 2; // auto
        pl_rearmed_cbs.gpu_neon.enhancement_enable =
        pl_rearmed_cbs.gpu_neon.enhancement_no_main = 0;
index e5c2738..bd56cf4 100644 (file)
@@ -420,6 +420,7 @@ static const struct {
        CE_INTVAL_N("adev0_is_nublike", in_adev_is_nublike[0]),
        CE_INTVAL_N("adev1_is_nublike", in_adev_is_nublike[1]),
        CE_INTVAL_V(frameskip, 3),
+       CE_INTVAL_P(thread_rendering),
        CE_INTVAL_P(gpu_peops.iUseDither),
        CE_INTVAL_P(gpu_peops.dwActFixes),
        CE_INTVAL_P(gpu_unai.ilace_force),
index 114aaad..71dfcb5 100644 (file)
@@ -1,6 +1,10 @@
 #ifndef __PLUGIN_LIB_H__
 #define __PLUGIN_LIB_H__
 
+#define THREAD_RENDERING_OFF   0
+#define THREAD_RENDERING_SYNC  1
+#define THREAD_RENDERING_ASYNC 2
+
 enum {
        DKEY_SELECT = 0,
        DKEY_L3,
@@ -70,6 +74,7 @@ struct rearmed_cbs {
        unsigned int *gpu_hcnt;
        unsigned int flip_cnt; // increment manually if not using pl_vout_flip
        unsigned int only_16bpp; // platform is 16bpp-only
+       unsigned int thread_rendering;
        struct {
                int   allow_interlace; // 0 off, 1 on, 2 guess
                int   enhancement_enable;
index bb3ad56..db0797c 100644 (file)
@@ -426,6 +426,14 @@ void renderer_set_interlace(int enable, int is_odd)
 {
 }
 
+void renderer_sync(void)
+{
+}
+
+void renderer_notify_update_lace(int updated)
+{
+}
+
 #include "../../frontend/plugin_lib.h"
 
 void renderer_set_config(const struct rearmed_cbs *cbs)
index 1f4a23d..8cc1469 100644 (file)
@@ -769,3 +769,11 @@ static void fps_update(void)
   DisplayText(buf, 1);
  }
 }
+
+void renderer_sync(void)
+{
+}
+
+void renderer_notify_update_lace(int updated)
+{
+}
index 3f3805a..81b9bae 100644 (file)
@@ -204,3 +204,9 @@ void renderer_set_config(const struct rearmed_cbs *cbs)
   }
 
 }
+void renderer_sync(void)
+{
+}
+void renderer_notify_update_lace(int updated)
+{
+}
index e84eff5..588134d 100644 (file)
 #include <stdlib.h>
 #include <string.h>
 #include "../gpulib/gpu.h"
+
+#ifdef THREAD_RENDERING
+#include "../gpulib/gpulib_thread_if.h"
+#define do_cmd_list real_do_cmd_list
+#define renderer_init real_renderer_init
+#define renderer_finish real_renderer_finish
+#define renderer_sync_ecmds real_renderer_sync_ecmds
+#define renderer_update_caches real_renderer_update_caches
+#define renderer_flush_queues real_renderer_flush_queues
+#define renderer_set_interlace real_renderer_set_interlace
+#define renderer_set_config real_renderer_set_config
+#define renderer_notify_res_change real_renderer_notify_res_change
+#define renderer_notify_update_lace real_renderer_notify_update_lace
+#define renderer_sync real_renderer_sync
+#define ex_regs scratch_ex_regs
+#endif
+
 //#include "port.h"
 #include "gpu_unai.h"
 
@@ -802,4 +819,12 @@ void renderer_set_config(const struct rearmed_cbs *cbs)
   }
 }
 
+void renderer_sync(void)
+{
+}
+
+void renderer_notify_update_lace(int updated)
+{
+}
+
 // vim:shiftwidth=2:expandtab
index 007da65..ed37b71 100644 (file)
@@ -40,6 +40,8 @@ static void finish_vram_transfer(int is_read);
 
 static noinline void do_cmd_reset(void)
 {
+  renderer_sync();
+
   if (unlikely(gpu.cmd_len > 0))
     do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
   gpu.cmd_len = 0;
@@ -52,7 +54,6 @@ static noinline void do_cmd_reset(void)
 static noinline void do_reset(void)
 {
   unsigned int i;
-
   do_cmd_reset();
 
   memset(gpu.regs, 0, sizeof(gpu.regs));
@@ -370,6 +371,8 @@ static int do_vram_io(uint32_t *data, int count, int is_read)
   int l;
   count *= 2; // operate in 16bpp pixels
 
+  renderer_sync();
+
   if (gpu.dma.offset) {
     l = w - gpu.dma.offset;
     if (count < l)
@@ -714,12 +717,15 @@ long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
     case 1: // save
       if (gpu.cmd_len > 0)
         flush_cmd_buffer();
+
+      renderer_sync();
       memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
       memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
       memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
       freeze->ulStatus = gpu.status.reg;
       break;
     case 0: // load
+      renderer_sync();
       memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
       memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
       memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
@@ -752,6 +758,8 @@ void GPUupdateLace(void)
     return;
   }
 
+  renderer_notify_update_lace(0);
+
   if (!gpu.state.fb_dirty)
     return;
 
@@ -767,6 +775,7 @@ void GPUupdateLace(void)
   vout_update();
   gpu.state.fb_dirty = 0;
   gpu.state.blanked = 0;
+  renderer_notify_update_lace(1);
 }
 
 void GPUvBlank(int is_vblank, int lcf)
index d0f3bf8..64d2eec 100644 (file)
@@ -93,6 +93,7 @@ struct psx_gpu {
     uint32_t last_flip_frame;
     uint32_t pending_fill[3];
   } frameskip;
+  uint32_t scratch_ex_regs[8]; // for threaded rendering
   int useDithering:1; /* 0 - off , 1 - on */
   uint16_t *(*get_enhancement_bufer)
     (int *x, int *y, int *w, int *h, int *vram_h);
@@ -118,6 +119,8 @@ void renderer_flush_queues(void);
 void renderer_set_interlace(int enable, int is_odd);
 void renderer_set_config(const struct rearmed_cbs *config);
 void renderer_notify_res_change(void);
+void renderer_notify_update_lace(int updated);
+void renderer_sync(void);
 
 int  vout_init(void);
 int  vout_finish(void);
diff --git a/plugins/gpulib/gpulib_thread_if.c b/plugins/gpulib/gpulib_thread_if.c
new file mode 100644 (file)
index 0000000..f0f607d
--- /dev/null
@@ -0,0 +1,481 @@
+/**************************************************************************
+*   Copyright (C) 2020 The RetroArch Team                                 *
+*                                                                         *
+*   This program is free software; you can redistribute it and/or modify  *
+*   it under the terms of the GNU General Public License as published by  *
+*   the Free Software Foundation; either version 2 of the License, or     *
+*   (at your option) any later version.                                   *
+*                                                                         *
+*   This program is distributed in the hope that it will be useful,       *
+*   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+*   GNU General Public License for more details.                          *
+*                                                                         *
+*   You should have received a copy of the GNU General Public License     *
+*   along with this program; if not, write to the                         *
+*   Free Software Foundation, Inc.,                                       *
+*   51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA.           *
+***************************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+#include "../gpulib/gpu.h"
+#include "../../frontend/plugin_lib.h"
+#include "gpulib_thread_if.h"
+
+typedef struct {
+       uint32_t *cmd_list;
+       int count;
+       int last_cmd;
+} video_thread_cmd;
+
+#define QUEUE_SIZE 0x2000
+
+typedef struct {
+       size_t start;
+       size_t end;
+       size_t used;
+       video_thread_cmd queue[QUEUE_SIZE];
+} video_thread_queue;
+
+typedef struct {
+       pthread_t thread;
+       pthread_mutex_t queue_lock;
+       pthread_cond_t cond_msg_avail;
+       pthread_cond_t cond_msg_done;
+       pthread_cond_t cond_queue_empty;
+       video_thread_queue *queue;
+       video_thread_queue *bg_queue;
+       bool running;
+} video_thread_state;
+
+static video_thread_state thread;
+static video_thread_queue queues[2];
+static int thread_rendering;
+static bool hold_cmds;
+static bool needs_display;
+
+extern const unsigned char cmd_lengths[];
+
+static void *video_thread_main(void *arg) {
+       video_thread_state *thread = (video_thread_state *)arg;
+       video_thread_cmd *cmd;
+       int i;
+       static int processed = 0;
+
+       while(1) {
+               int result, last_cmd, start, end;
+               video_thread_queue *queue;
+               pthread_mutex_lock(&thread->queue_lock);
+
+               while (!thread->queue->used && thread->running) {
+                       pthread_cond_wait(&thread->cond_msg_avail, &thread->queue_lock);
+               }
+
+               if (!thread->running) {
+                       pthread_mutex_unlock(&thread->queue_lock);
+                       break;
+               }
+
+               queue = thread->queue;
+               start = queue->start;
+               end = queue->end > queue->start ? queue->end : QUEUE_SIZE;
+               queue->start = end % QUEUE_SIZE;
+               pthread_mutex_unlock(&thread->queue_lock);
+
+               for (i = start; i < end; i++) {
+                       cmd = &queue->queue[i];
+                       result = real_do_cmd_list(cmd->cmd_list, cmd->count, &last_cmd);
+
+                       if (result != cmd->count) {
+                               fprintf(stderr, "Processed wrong cmd count: expected %d, got %d\n", cmd->count, result);
+                       }
+
+#ifdef _3DS
+                       /* Periodically yield so as not to starve other threads */
+                       processed += cmd->count;
+                       if (processed >= 512) {
+                               svcSleepThread(1);
+                               processed %= 512;
+                       }
+#endif
+               }
+
+               pthread_mutex_lock(&thread->queue_lock);
+               queue->used -= (end - start);
+
+               if (!queue->used)
+                       pthread_cond_signal(&thread->cond_queue_empty);
+
+               pthread_cond_signal(&thread->cond_msg_done);
+               pthread_mutex_unlock(&thread->queue_lock);
+       }
+
+       return 0;
+}
+
+static void cmd_queue_swap() {
+       video_thread_queue *tmp;
+       if (!thread.bg_queue->used) return;
+
+       pthread_mutex_lock(&thread.queue_lock);
+       if (!thread.queue->used) {
+               tmp = thread.queue;
+               thread.queue = thread.bg_queue;
+               thread.bg_queue = tmp;
+               needs_display = true;
+               pthread_cond_signal(&thread.cond_msg_avail);
+       }
+       pthread_mutex_unlock(&thread.queue_lock);
+}
+
+/* Waits for the main queue to completely finish. */
+void renderer_wait() {
+       if (!thread.running) return;
+
+       /* Not completely safe, but should be fine since the render thread
+        * only decreases used, and we check again inside the lock. */
+       if (!thread.queue->used) {
+               return;
+       }
+
+       pthread_mutex_lock(&thread.queue_lock);
+
+       while (thread.queue->used) {
+               pthread_cond_wait(&thread.cond_queue_empty, &thread.queue_lock);
+       }
+
+       pthread_mutex_unlock(&thread.queue_lock);
+}
+
+/* Waits for all GPU commands in both queues to finish, bringing VRAM
+ * completely up-to-date. */
+void renderer_sync(void) {
+       if (!thread.running) return;
+
+       /* Not completely safe, but should be fine since the render thread
+        * only decreases used, and we check again inside the lock. */
+       if (!thread.queue->used && !thread.bg_queue->used) {
+               return;
+       }
+
+       /* Flush both queues. This is necessary because gpulib could be
+        * trying to process a DMA write that a command in the queue should
+        * run beforehand. For example, Xenogears sprites write a black
+        * rectangle over the to-be-DMA'd spot in VRAM -- if this write
+        * happens after the DMA, it will clear the DMA, resulting in
+        * flickering sprites. We need to be totally up-to-date. This may
+        * drop a frame. */
+       renderer_wait();
+       cmd_queue_swap();
+       hold_cmds = false;
+       renderer_wait();
+}
+
+static void video_thread_stop() {
+       int i;
+       renderer_sync();
+
+       if (thread.running) {
+               thread.running = false;
+               pthread_cond_signal(&thread.cond_msg_avail);
+               pthread_join(thread.thread, NULL);
+       }
+
+       pthread_mutex_destroy(&thread.queue_lock);
+       pthread_cond_destroy(&thread.cond_msg_avail);
+       pthread_cond_destroy(&thread.cond_msg_done);
+       pthread_cond_destroy(&thread.cond_queue_empty);
+
+       for (i = 0; i < QUEUE_SIZE; i++) {
+               video_thread_cmd *cmd = &thread.queue->queue[i];
+               free(cmd->cmd_list);
+               cmd->cmd_list = NULL;
+       }
+
+       for (i = 0; i < QUEUE_SIZE; i++) {
+               video_thread_cmd *cmd = &thread.bg_queue->queue[i];
+               free(cmd->cmd_list);
+               cmd->cmd_list = NULL;
+       }
+}
+
+static void video_thread_start() {
+       fprintf(stdout, "Starting render thread\n");
+
+       if (pthread_cond_init(&thread.cond_msg_avail, NULL) ||
+                       pthread_cond_init(&thread.cond_msg_done, NULL) ||
+                       pthread_cond_init(&thread.cond_queue_empty, NULL) ||
+                       pthread_mutex_init(&thread.queue_lock, NULL) ||
+                       pthread_create(&thread.thread, NULL, video_thread_main, &thread)) {
+               goto error;
+       }
+
+       thread.queue = &queues[0];
+       thread.bg_queue = &queues[1];
+
+       thread.running = true;
+       return;
+
+ error:
+       fprintf(stderr,"Failed to start rendering thread\n");
+       video_thread_stop();
+}
+
+static void video_thread_queue_cmd(uint32_t *list, int count, int last_cmd) {
+       video_thread_cmd *cmd;
+       uint32_t *cmd_list;
+       video_thread_queue *queue;
+       bool lock;
+
+       cmd_list = (uint32_t *)calloc(count, sizeof(uint32_t));
+
+       if (!cmd_list) {
+               /* Out of memory, disable the thread and run sync from now on */
+               fprintf(stderr,"Failed to allocate render thread command list, stopping thread\n");
+               video_thread_stop();
+       }
+
+       memcpy(cmd_list, list, count * sizeof(uint32_t));
+
+       if (hold_cmds && thread.bg_queue->used >= QUEUE_SIZE) {
+               /* If the bg queue is full, do a full sync to empty both queues
+                * and clear space. This should be very rare, I've only seen it in
+                * Tekken 3 post-battle-replay. */
+               renderer_sync();
+       }
+
+       if (hold_cmds) {
+               queue = thread.bg_queue;
+               lock = false;
+       } else {
+               queue = thread.queue;
+               lock = true;
+       }
+
+       if (lock) {
+               pthread_mutex_lock(&thread.queue_lock);
+
+               while (queue->used >= QUEUE_SIZE) {
+                       pthread_cond_wait(&thread.cond_msg_done, &thread.queue_lock);
+               }
+       }
+
+       cmd = &queue->queue[queue->end];
+       free(cmd->cmd_list);
+       cmd->cmd_list = cmd_list;
+       cmd->count = count;
+       cmd->last_cmd = last_cmd;
+       queue->end = (queue->end + 1) % QUEUE_SIZE;
+       queue->used++;
+
+       if (lock) {
+               pthread_cond_signal(&thread.cond_msg_avail);
+               pthread_mutex_unlock(&thread.queue_lock);
+       }
+}
+
+/* Slice off just the part of the list that can be handled async, and
+ * update ex_regs. */
+static int scan_cmd_list(uint32_t *data, int count, int *last_cmd)
+{
+       int cmd = 0, pos = 0, len, v;
+
+       while (pos < count) {
+               uint32_t *list = data + pos;
+               cmd = list[0] >> 24;
+               len = 1 + cmd_lengths[cmd];
+
+               switch (cmd) {
+                       case 0x02:
+                               break;
+                       case 0x24 ... 0x27:
+                       case 0x2c ... 0x2f:
+                       case 0x34 ... 0x37:
+                       case 0x3c ... 0x3f:
+                               gpu.ex_regs[1] &= ~0x1ff;
+                               gpu.ex_regs[1] |= list[4 + ((cmd >> 4) & 1)] & 0x1ff;
+                               break;
+                       case 0x48 ... 0x4F:
+                               for (v = 3; pos + v < count; v++)
+                               {
+                                       if ((list[v] & 0xf000f000) == 0x50005000)
+                                               break;
+                               }
+                               len += v - 3;
+                               break;
+                       case 0x58 ... 0x5F:
+                               for (v = 4; pos + v < count; v += 2)
+                               {
+                                       if ((list[v] & 0xf000f000) == 0x50005000)
+                                               break;
+                               }
+                               len += v - 4;
+                               break;
+                       default:
+                               if ((cmd & 0xf8) == 0xe0)
+                                       gpu.ex_regs[cmd & 7] = list[0];
+                               break;
+               }
+
+               if (pos + len > count) {
+                       cmd = -1;
+                       break; /* incomplete cmd */
+               }
+               if (0xa0 <= cmd && cmd <= 0xdf)
+                       break; /* image i/o */
+
+               pos += len;
+       }
+
+       *last_cmd = cmd;
+       return pos;
+}
+
+int do_cmd_list(uint32_t *list, int count, int *last_cmd) {
+       int pos = 0;
+
+       if (thread.running) {
+               pos = scan_cmd_list(list, count, last_cmd);
+               video_thread_queue_cmd(list, pos, *last_cmd);
+       } else {
+               pos = real_do_cmd_list(list, count, last_cmd);
+               memcpy(gpu.ex_regs, gpu.scratch_ex_regs, sizeof(gpu.ex_regs));
+       }
+       return pos;
+}
+
+int renderer_init(void) {
+       if (thread_rendering) {
+               video_thread_start();
+       }
+       return real_renderer_init();
+}
+
+void renderer_finish(void) {
+       real_renderer_finish();
+
+       if (thread_rendering && thread.running) {
+               video_thread_stop();
+       }
+}
+
+void renderer_sync_ecmds(uint32_t * ecmds) {
+       if (thread.running) {
+               int dummy;
+               do_cmd_list(&ecmds[1], 6, &dummy);
+       } else {
+               real_renderer_sync_ecmds(ecmds);
+       }
+}
+
+void renderer_update_caches(int x, int y, int w, int h) {
+       renderer_sync();
+       real_renderer_update_caches(x, y, w, h);
+}
+
+void renderer_flush_queues(void) {
+       /* Called during DMA and updateLace. We want to sync if it's DMA,
+        * but not if it's updateLace. Instead of syncing here, there's a
+        * renderer_sync call during DMA. */
+       real_renderer_flush_queues();
+}
+
+/*
+ * Normally all GPU commands are processed before rendering the
+ * frame. For games that naturally run < 50/60fps, this is unnecessary
+ * -- it forces the game to render as if it was 60fps and leaves the
+ * GPU idle half the time on a 30fps game, for example.
+ *
+ * Allowing the renderer to wait until a frame is done before
+ * rendering it would give it double, triple, or quadruple the amount
+ * of time to finish before we have to wait for it.
+ *
+ * We can use a heuristic to figure out when to force a render.
+ *
+ * - If a frame isn't done when we're asked to render, wait for it and
+ *   put future GPU commands in a separate buffer (for the next frame)
+ *
+ * - If the frame is done, and had no future GPU commands, render it.
+ *
+ * - If we do have future GPU commands, it meant the frame took too
+ *   long to render and there's another frame waiting. Stop until the
+ *   first frame finishes, render it, and start processing the next
+ *   one.
+ *
+ * This may possibly add a frame or two of latency that shouldn't be
+ * different than the real device. It may skip rendering a frame
+ * entirely if a VRAM transfer happens while a frame is waiting, or in
+ * games that natively run at 60fps if frames are coming in too
+ * quickly to process. Depending on how the game treats "60fps," this
+ * may not be noticeable.
+ */
+void renderer_notify_update_lace(int updated) {
+       if (!thread.running) return;
+
+       if (thread_rendering == THREAD_RENDERING_SYNC) {
+               renderer_sync();
+               return;
+       }
+
+       if (updated) {
+               cmd_queue_swap();
+               return;
+       }
+
+       pthread_mutex_lock(&thread.queue_lock);
+       if (thread.bg_queue->used) {
+               /* We have commands for a future frame to run. Force a wait until
+                * the current frame is finished, and start processing the next
+                * frame after it's drawn (see the `updated` clause above). */
+               pthread_mutex_unlock(&thread.queue_lock);
+               renderer_wait();
+               pthread_mutex_lock(&thread.queue_lock);
+
+               /* We are no longer holding commands back, so the next frame may
+                * get mixed into the following frame. This is usually fine, but can
+                * result in frameskip-like effects for 60fps games. */
+               hold_cmds = false;
+               needs_display = true;
+               gpu.state.fb_dirty = true;
+       } else if (thread.queue->used) {
+               /* We are still drawing during a vblank. Cut off the current frame
+                * by sending new commands to the background queue and skip
+                * drawing our partly rendered frame to the display. */
+               hold_cmds = true;
+               needs_display = true;
+               gpu.state.fb_dirty = false;
+       } else if (needs_display && !thread.queue->used) {
+               /* We have processed all commands in the queue, render the
+                * buffer. We know we have something to render, because
+                * needs_display is true. */
+               hold_cmds = false;
+               needs_display = false;
+               gpu.state.fb_dirty = true;
+       } else {
+               /* Everything went normally, so do the normal thing. */
+       }
+
+       pthread_mutex_unlock(&thread.queue_lock);
+}
+
+void renderer_set_interlace(int enable, int is_odd) {
+       real_renderer_set_interlace(enable, is_odd);
+}
+
+void renderer_set_config(const struct rearmed_cbs *cbs) {
+       renderer_sync();
+       thread_rendering = cbs->thread_rendering;
+       if (!thread.running && thread_rendering != THREAD_RENDERING_OFF) {
+               video_thread_start();
+       } else if (thread.running && thread_rendering == THREAD_RENDERING_OFF) {
+               video_thread_stop();
+       }
+       real_renderer_set_config(cbs);
+}
+
+void renderer_notify_res_change(void) {
+       renderer_sync();
+       real_renderer_notify_res_change();
+}
diff --git a/plugins/gpulib/gpulib_thread_if.h b/plugins/gpulib/gpulib_thread_if.h
new file mode 100644 (file)
index 0000000..b1ea97f
--- /dev/null
@@ -0,0 +1,41 @@
+/**************************************************************************
+*   Copyright (C) 2020 The RetroArch Team                                 *
+*                                                                         *
+*   This program is free software; you can redistribute it and/or modify  *
+*   it under the terms of the GNU General Public License as published by  *
+*   the Free Software Foundation; either version 2 of the License, or     *
+*   (at your option) any later version.                                   *
+*                                                                         *
+*   This program is distributed in the hope that it will be useful,       *
+*   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+*   GNU General Public License for more details.                          *
+*                                                                         *
+*   You should have received a copy of the GNU General Public License     *
+*   along with this program; if not, write to the                         *
+*   Free Software Foundation, Inc.,                                       *
+*   51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA.           *
+***************************************************************************/
+
+#ifndef __GPULIB_THREAD_H__
+#define __GPULIB_THREAD_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int  real_do_cmd_list(uint32_t *list, int count, int *last_cmd);
+int  real_renderer_init(void);
+void real_renderer_finish(void);
+void real_renderer_sync_ecmds(uint32_t * ecmds);
+void real_renderer_update_caches(int x, int y, int w, int h);
+void real_renderer_flush_queues(void);
+void real_renderer_set_interlace(int enable, int is_odd);
+void real_renderer_set_config(const struct rearmed_cbs *config);
+void real_renderer_notify_res_change(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __GPULIB_THREAD_H__ */