1 /**************************************************************************
2 * Copyright (C) 2020 The RetroArch Team *
4 * This program is free software; you can redistribute it and/or modify *
5 * it under the terms of the GNU General Public License as published by *
6 * the Free Software Foundation; either version 2 of the License, or *
7 * (at your option) any later version. *
9 * This program is distributed in the hope that it will be useful, *
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
12 * GNU General Public License for more details. *
14 * You should have received a copy of the GNU General Public License *
15 * along with this program; if not, write to the *
16 * Free Software Foundation, Inc., *
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
18 ***************************************************************************/
24 #include "../gpulib/gpu.h"
25 #include "../../frontend/plugin_lib.h"
26 #include "gpulib_thread_if.h"
30 #define BOOL unsigned short
38 #define QUEUE_SIZE 0x2000
44 video_thread_cmd queue[QUEUE_SIZE];
49 pthread_mutex_t queue_lock;
50 pthread_cond_t cond_msg_avail;
51 pthread_cond_t cond_msg_done;
52 pthread_cond_t cond_queue_empty;
53 video_thread_queue *queue;
54 video_thread_queue *bg_queue;
58 static video_thread_state thread;
59 static video_thread_queue queues[2];
60 static int thread_rendering;
61 static BOOL hold_cmds;
62 static BOOL needs_display;
64 extern const unsigned char cmd_lengths[];
66 static void *video_thread_main(void *arg) {
67 video_thread_state *thread = (video_thread_state *)arg;
68 video_thread_cmd *cmd;
72 static int processed = 0;
76 int result, last_cmd, start, end;
77 video_thread_queue *queue;
78 pthread_mutex_lock(&thread->queue_lock);
80 while (!thread->queue->used && thread->running) {
81 pthread_cond_wait(&thread->cond_msg_avail, &thread->queue_lock);
84 if (!thread->running) {
85 pthread_mutex_unlock(&thread->queue_lock);
89 queue = thread->queue;
91 end = queue->end > queue->start ? queue->end : QUEUE_SIZE;
92 queue->start = end % QUEUE_SIZE;
93 pthread_mutex_unlock(&thread->queue_lock);
95 for (i = start; i < end; i++) {
96 cmd = &queue->queue[i];
97 result = real_do_cmd_list(cmd->cmd_list, cmd->count, &last_cmd);
99 if (result != cmd->count) {
100 fprintf(stderr, "Processed wrong cmd count: expected %d, got %d\n", cmd->count, result);
104 /* Periodically yield so as not to starve other threads */
105 processed += cmd->count;
106 if (processed >= 512) {
113 pthread_mutex_lock(&thread->queue_lock);
114 queue->used -= (end - start);
117 pthread_cond_signal(&thread->cond_queue_empty);
119 pthread_cond_signal(&thread->cond_msg_done);
120 pthread_mutex_unlock(&thread->queue_lock);
126 static void cmd_queue_swap() {
127 video_thread_queue *tmp;
128 if (!thread.bg_queue->used) return;
130 pthread_mutex_lock(&thread.queue_lock);
131 if (!thread.queue->used) {
133 thread.queue = thread.bg_queue;
134 thread.bg_queue = tmp;
135 needs_display = TRUE;
136 pthread_cond_signal(&thread.cond_msg_avail);
138 pthread_mutex_unlock(&thread.queue_lock);
141 /* Waits for the main queue to completely finish. */
142 void renderer_wait() {
143 if (!thread.running) return;
145 /* Not completely safe, but should be fine since the render thread
146 * only decreases used, and we check again inside the lock. */
147 if (!thread.queue->used) {
151 pthread_mutex_lock(&thread.queue_lock);
153 while (thread.queue->used) {
154 pthread_cond_wait(&thread.cond_queue_empty, &thread.queue_lock);
157 pthread_mutex_unlock(&thread.queue_lock);
160 /* Waits for all GPU commands in both queues to finish, bringing VRAM
161 * completely up-to-date. */
162 void renderer_sync(void) {
163 if (!thread.running) return;
165 /* Not completely safe, but should be fine since the render thread
166 * only decreases used, and we check again inside the lock. */
167 if (!thread.queue->used && !thread.bg_queue->used) {
171 /* Flush both queues. This is necessary because gpulib could be
172 * trying to process a DMA write that a command in the queue should
173 * run beforehand. For example, Xenogears sprites write a black
174 * rectangle over the to-be-DMA'd spot in VRAM -- if this write
175 * happens after the DMA, it will clear the DMA, resulting in
176 * flickering sprites. We need to be totally up-to-date. This may
184 static void video_thread_stop() {
188 if (thread.running) {
189 thread.running = FALSE;
190 pthread_cond_signal(&thread.cond_msg_avail);
191 pthread_join(thread.thread, NULL);
194 pthread_mutex_destroy(&thread.queue_lock);
195 pthread_cond_destroy(&thread.cond_msg_avail);
196 pthread_cond_destroy(&thread.cond_msg_done);
197 pthread_cond_destroy(&thread.cond_queue_empty);
199 for (i = 0; i < QUEUE_SIZE; i++) {
200 video_thread_cmd *cmd = &thread.queue->queue[i];
202 cmd->cmd_list = NULL;
205 for (i = 0; i < QUEUE_SIZE; i++) {
206 video_thread_cmd *cmd = &thread.bg_queue->queue[i];
208 cmd->cmd_list = NULL;
212 static void video_thread_start() {
213 fprintf(stdout, "Starting render thread\n");
215 if (pthread_cond_init(&thread.cond_msg_avail, NULL) ||
216 pthread_cond_init(&thread.cond_msg_done, NULL) ||
217 pthread_cond_init(&thread.cond_queue_empty, NULL) ||
218 pthread_mutex_init(&thread.queue_lock, NULL) ||
219 pthread_create(&thread.thread, NULL, video_thread_main, &thread)) {
223 thread.queue = &queues[0];
224 thread.bg_queue = &queues[1];
226 thread.running = TRUE;
230 fprintf(stderr,"Failed to start rendering thread\n");
234 static void video_thread_queue_cmd(uint32_t *list, int count, int last_cmd) {
235 video_thread_cmd *cmd;
237 video_thread_queue *queue;
240 cmd_list = (uint32_t *)calloc(count, sizeof(uint32_t));
243 /* Out of memory, disable the thread and run sync from now on */
244 fprintf(stderr,"Failed to allocate render thread command list, stopping thread\n");
248 memcpy(cmd_list, list, count * sizeof(uint32_t));
250 if (hold_cmds && thread.bg_queue->used >= QUEUE_SIZE) {
251 /* If the bg queue is full, do a full sync to empty both queues
252 * and clear space. This should be very rare, I've only seen it in
253 * Tekken 3 post-battle-replay. */
258 queue = thread.bg_queue;
261 queue = thread.queue;
266 pthread_mutex_lock(&thread.queue_lock);
268 while (queue->used >= QUEUE_SIZE) {
269 pthread_cond_wait(&thread.cond_msg_done, &thread.queue_lock);
273 cmd = &queue->queue[queue->end];
275 cmd->cmd_list = cmd_list;
277 cmd->last_cmd = last_cmd;
278 queue->end = (queue->end + 1) % QUEUE_SIZE;
282 pthread_cond_signal(&thread.cond_msg_avail);
283 pthread_mutex_unlock(&thread.queue_lock);
287 /* Slice off just the part of the list that can be handled async, and
289 static int scan_cmd_list(uint32_t *data, int count, int *last_cmd)
291 int cmd = 0, pos = 0, len, v;
293 while (pos < count) {
294 uint32_t *list = data + pos;
296 len = 1 + cmd_lengths[cmd];
305 gpu.ex_regs[1] &= ~0x1ff;
306 gpu.ex_regs[1] |= list[4 + ((cmd >> 4) & 1)] & 0x1ff;
309 for (v = 3; pos + v < count; v++)
311 if ((list[v] & 0xf000f000) == 0x50005000)
317 for (v = 4; pos + v < count; v += 2)
319 if ((list[v] & 0xf000f000) == 0x50005000)
325 if ((cmd & 0xf8) == 0xe0)
326 gpu.ex_regs[cmd & 7] = list[0];
330 if (pos + len > count) {
332 break; /* incomplete cmd */
334 if (0xa0 <= cmd && cmd <= 0xdf)
335 break; /* image i/o */
344 int do_cmd_list(uint32_t *list, int count, int *last_cmd) {
347 if (thread.running) {
348 pos = scan_cmd_list(list, count, last_cmd);
349 video_thread_queue_cmd(list, pos, *last_cmd);
351 pos = real_do_cmd_list(list, count, last_cmd);
352 memcpy(gpu.ex_regs, gpu.scratch_ex_regs, sizeof(gpu.ex_regs));
357 int renderer_init(void) {
358 if (thread_rendering) {
359 video_thread_start();
361 return real_renderer_init();
364 void renderer_finish(void) {
365 real_renderer_finish();
367 if (thread_rendering && thread.running) {
372 void renderer_sync_ecmds(uint32_t * ecmds) {
373 if (thread.running) {
375 do_cmd_list(&ecmds[1], 6, &dummy);
377 real_renderer_sync_ecmds(ecmds);
381 void renderer_update_caches(int x, int y, int w, int h) {
383 real_renderer_update_caches(x, y, w, h);
386 void renderer_flush_queues(void) {
387 /* Called during DMA and updateLace. We want to sync if it's DMA,
388 * but not if it's updateLace. Instead of syncing here, there's a
389 * renderer_sync call during DMA. */
390 real_renderer_flush_queues();
394 * Normally all GPU commands are processed before rendering the
395 * frame. For games that naturally run < 50/60fps, this is unnecessary
396 * -- it forces the game to render as if it was 60fps and leaves the
397 * GPU idle half the time on a 30fps game, for example.
399 * Allowing the renderer to wait until a frame is done before
400 * rendering it would give it double, triple, or quadruple the amount
401 * of time to finish before we have to wait for it.
403 * We can use a heuristic to figure out when to force a render.
405 * - If a frame isn't done when we're asked to render, wait for it and
406 * put future GPU commands in a separate buffer (for the next frame)
408 * - If the frame is done, and had no future GPU commands, render it.
410 * - If we do have future GPU commands, it meant the frame took too
411 * long to render and there's another frame waiting. Stop until the
412 * first frame finishes, render it, and start processing the next
415 * This may possibly add a frame or two of latency that shouldn't be
416 * different than the real device. It may skip rendering a frame
417 * entirely if a VRAM transfer happens while a frame is waiting, or in
418 * games that natively run at 60fps if frames are coming in too
419 * quickly to process. Depending on how the game treats "60fps," this
420 * may not be noticeable.
422 void renderer_notify_update_lace(int updated) {
423 if (!thread.running) return;
425 if (thread_rendering == THREAD_RENDERING_SYNC) {
435 pthread_mutex_lock(&thread.queue_lock);
436 if (thread.bg_queue->used) {
437 /* We have commands for a future frame to run. Force a wait until
438 * the current frame is finished, and start processing the next
439 * frame after it's drawn (see the `updated` clause above). */
440 pthread_mutex_unlock(&thread.queue_lock);
442 pthread_mutex_lock(&thread.queue_lock);
444 /* We are no longer holding commands back, so the next frame may
445 * get mixed into the following frame. This is usually fine, but can
446 * result in frameskip-like effects for 60fps games. */
448 needs_display = TRUE;
449 gpu.state.fb_dirty = TRUE;
450 } else if (thread.queue->used) {
451 /* We are still drawing during a vblank. Cut off the current frame
452 * by sending new commands to the background queue and skip
453 * drawing our partly rendered frame to the display. */
455 needs_display = TRUE;
456 gpu.state.fb_dirty = FALSE;
457 } else if (needs_display && !thread.queue->used) {
458 /* We have processed all commands in the queue, render the
459 * buffer. We know we have something to render, because
460 * needs_display is TRUE. */
462 needs_display = FALSE;
463 gpu.state.fb_dirty = TRUE;
465 /* Everything went normally, so do the normal thing. */
468 pthread_mutex_unlock(&thread.queue_lock);
471 void renderer_set_interlace(int enable, int is_odd) {
472 real_renderer_set_interlace(enable, is_odd);
475 void renderer_set_config(const struct rearmed_cbs *cbs) {
477 thread_rendering = cbs->thread_rendering;
478 if (!thread.running && thread_rendering != THREAD_RENDERING_OFF) {
479 video_thread_start();
480 } else if (thread.running && thread_rendering == THREAD_RENDERING_OFF) {
483 real_renderer_set_config(cbs);
486 void renderer_notify_res_change(void) {
488 real_renderer_notify_res_change();