update gpulib_thread_if
[pcsx_rearmed.git] / plugins / gpulib / gpulib_thread_if.c
index f0f607d..107ffd3 100644 (file)
 ***************************************************************************/
 
 #include <stdlib.h>
+#include <stdio.h>
 #include <string.h>
 #include <pthread.h>
 #include "../gpulib/gpu.h"
 #include "../../frontend/plugin_lib.h"
+#include "gpu.h"
+#include "gpu_timing.h"
 #include "gpulib_thread_if.h"
 
+#define FALSE 0
+#define TRUE 1
+#define BOOL unsigned short
+
 typedef struct {
        uint32_t *cmd_list;
        int count;
@@ -47,14 +54,15 @@ typedef struct {
        pthread_cond_t cond_queue_empty;
        video_thread_queue *queue;
        video_thread_queue *bg_queue;
-       bool running;
+       BOOL running;
 } video_thread_state;
 
 static video_thread_state thread;
 static video_thread_queue queues[2];
 static int thread_rendering;
-static bool hold_cmds;
-static bool needs_display;
+static BOOL hold_cmds;
+static BOOL needs_display;
+static BOOL flushed;
 
 extern const unsigned char cmd_lengths[];
 
@@ -62,10 +70,13 @@ static void *video_thread_main(void *arg) {
        video_thread_state *thread = (video_thread_state *)arg;
        video_thread_cmd *cmd;
        int i;
+
+#ifdef _3DS
        static int processed = 0;
+#endif /* _3DS */
 
        while(1) {
-               int result, last_cmd, start, end;
+               int result, cycles_dummy = 0, last_cmd, start, end;
                video_thread_queue *queue;
                pthread_mutex_lock(&thread->queue_lock);
 
@@ -86,8 +97,8 @@ static void *video_thread_main(void *arg) {
 
                for (i = start; i < end; i++) {
                        cmd = &queue->queue[i];
-                       result = real_do_cmd_list(cmd->cmd_list, cmd->count, &last_cmd);
-
+                       result = real_do_cmd_list(cmd->cmd_list, cmd->count,
+                                       &cycles_dummy, &cycles_dummy, &last_cmd);
                        if (result != cmd->count) {
                                fprintf(stderr, "Processed wrong cmd count: expected %d, got %d\n", cmd->count, result);
                        }
@@ -99,7 +110,7 @@ static void *video_thread_main(void *arg) {
                                svcSleepThread(1);
                                processed %= 512;
                        }
-#endif
+#endif /* _3DS */
                }
 
                pthread_mutex_lock(&thread->queue_lock);
@@ -124,7 +135,6 @@ static void cmd_queue_swap() {
                tmp = thread.queue;
                thread.queue = thread.bg_queue;
                thread.bg_queue = tmp;
-               needs_display = true;
                pthread_cond_signal(&thread.cond_msg_avail);
        }
        pthread_mutex_unlock(&thread.queue_lock);
@@ -160,6 +170,13 @@ void renderer_sync(void) {
                return;
        }
 
+       if (thread.bg_queue->used) {
+               /* When we flush the background queue, the vblank handler can't
+                * know that we had a frame pending, and we delay rendering too
+                * long. Force it. */
+               flushed = TRUE;
+       }
+
        /* Flush both queues. This is necessary because gpulib could be
         * trying to process a DMA write that a command in the queue should
         * run beforehand. For example, Xenogears sprites write a black
@@ -169,7 +186,7 @@ void renderer_sync(void) {
         * drop a frame. */
        renderer_wait();
        cmd_queue_swap();
-       hold_cmds = false;
+       hold_cmds = FALSE;
        renderer_wait();
 }
 
@@ -178,7 +195,7 @@ static void video_thread_stop() {
        renderer_sync();
 
        if (thread.running) {
-               thread.running = false;
+               thread.running = FALSE;
                pthread_cond_signal(&thread.cond_msg_avail);
                pthread_join(thread.thread, NULL);
        }
@@ -215,7 +232,7 @@ static void video_thread_start() {
        thread.queue = &queues[0];
        thread.bg_queue = &queues[1];
 
-       thread.running = true;
+       thread.running = TRUE;
        return;
 
  error:
@@ -227,7 +244,7 @@ static void video_thread_queue_cmd(uint32_t *list, int count, int last_cmd) {
        video_thread_cmd *cmd;
        uint32_t *cmd_list;
        video_thread_queue *queue;
-       bool lock;
+       BOOL lock;
 
        cmd_list = (uint32_t *)calloc(count, sizeof(uint32_t));
 
@@ -248,10 +265,10 @@ static void video_thread_queue_cmd(uint32_t *list, int count, int last_cmd) {
 
        if (hold_cmds) {
                queue = thread.bg_queue;
-               lock = false;
+               lock = FALSE;
        } else {
                queue = thread.queue;
-               lock = true;
+               lock = TRUE;
        }
 
        if (lock) {
@@ -278,41 +295,99 @@ static void video_thread_queue_cmd(uint32_t *list, int count, int last_cmd) {
 
 /* Slice off just the part of the list that can be handled async, and
  * update ex_regs. */
-static int scan_cmd_list(uint32_t *data, int count, int *last_cmd)
+static int scan_cmd_list(uint32_t *data, int count,
+       int *cycles_sum_out, int *cycles_last, int *last_cmd)
 {
+       int cpu_cycles_sum = 0, cpu_cycles = *cycles_last;
        int cmd = 0, pos = 0, len, v;
 
        while (pos < count) {
                uint32_t *list = data + pos;
-               cmd = list[0] >> 24;
+               short *slist = (void *)list;
+               cmd = LE32TOH(list[0]) >> 24;
                len = 1 + cmd_lengths[cmd];
 
                switch (cmd) {
                        case 0x02:
+                               gput_sum(cpu_cycles_sum, cpu_cycles,
+                                       gput_fill(LE16TOH(slist[4]) & 0x3ff,
+                                               LE16TOH(slist[5]) & 0x1ff));
+                               break;
+                       case 0x20 ... 0x23:
+                               gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base());
                                break;
                        case 0x24 ... 0x27:
+                               gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t());
+                               gpu.ex_regs[1] &= ~0x1ff;
+                               gpu.ex_regs[1] |= LE32TOH(list[4]) & 0x1ff;
+                               break;
+                       case 0x28 ... 0x2b:
+                               gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base());
+                               break;
                        case 0x2c ... 0x2f:
+                               gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t());
+                               gpu.ex_regs[1] &= ~0x1ff;
+                               gpu.ex_regs[1] |= LE32TOH(list[4]) & 0x1ff;
+                               break;
+                       case 0x30 ... 0x33:
+                               gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g());
+                               break;
                        case 0x34 ... 0x37:
+                               gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt());
+                               gpu.ex_regs[1] &= ~0x1ff;
+                               gpu.ex_regs[1] |= LE32TOH(list[5]) & 0x1ff;
+                               break;
+                       case 0x38 ... 0x3b:
+                               gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g());
+                               break;
                        case 0x3c ... 0x3f:
+                               gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt());
                                gpu.ex_regs[1] &= ~0x1ff;
-                               gpu.ex_regs[1] |= list[4 + ((cmd >> 4) & 1)] & 0x1ff;
+                               gpu.ex_regs[1] |= LE32TOH(list[5]) & 0x1ff;
+                               break;
+                       case 0x40 ... 0x47:
+                               gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
                                break;
                        case 0x48 ... 0x4F:
                                for (v = 3; pos + v < count; v++)
                                {
+                                       gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
                                        if ((list[v] & 0xf000f000) == 0x50005000)
                                                break;
                                }
                                len += v - 3;
                                break;
+                       case 0x50 ... 0x57:
+                               gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
+                               break;
                        case 0x58 ... 0x5F:
                                for (v = 4; pos + v < count; v += 2)
                                {
+                                       gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
                                        if ((list[v] & 0xf000f000) == 0x50005000)
                                                break;
                                }
                                len += v - 4;
                                break;
+                       case 0x60 ... 0x63:
+                               gput_sum(cpu_cycles_sum, cpu_cycles,
+                                       gput_sprite(LE16TOH(slist[4]) & 0x3ff,
+                                               LE16TOH(slist[5]) & 0x1ff));
+                               break;
+                       case 0x64 ... 0x67:
+                               gput_sum(cpu_cycles_sum, cpu_cycles,
+                                       gput_sprite(LE16TOH(slist[6]) & 0x3ff,
+                                               LE16TOH(slist[7]) & 0x1ff));
+                               break;
+                       case 0x68 ... 0x6b:
+                               gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1));
+                               break;
+                       case 0x70 ... 0x77:
+                               gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(8, 8));
+                               break;
+                       case 0x78 ... 0x7f:
+                               gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(16, 16));
+                               break;
                        default:
                                if ((cmd & 0xf8) == 0xe0)
                                        gpu.ex_regs[cmd & 7] = list[0];
@@ -323,24 +398,28 @@ static int scan_cmd_list(uint32_t *data, int count, int *last_cmd)
                        cmd = -1;
                        break; /* incomplete cmd */
                }
-               if (0xa0 <= cmd && cmd <= 0xdf)
+               if (0x80 <= cmd && cmd <= 0xdf)
                        break; /* image i/o */
 
                pos += len;
        }
 
+       *cycles_sum_out += cpu_cycles_sum;
+       *cycles_last = cpu_cycles;
        *last_cmd = cmd;
        return pos;
 }
 
-int do_cmd_list(uint32_t *list, int count, int *last_cmd) {
+int do_cmd_list(uint32_t *list, int count,
+ int *cycles_sum, int *cycles_last, int *last_cmd)
+{
        int pos = 0;
 
        if (thread.running) {
-               pos = scan_cmd_list(list, count, last_cmd);
+               pos = scan_cmd_list(list, count, cycles_sum, cycles_last, last_cmd);
                video_thread_queue_cmd(list, pos, *last_cmd);
        } else {
-               pos = real_do_cmd_list(list, count, last_cmd);
+               pos = real_do_cmd_list(list, count, cycles_sum, cycles_last, last_cmd);
                memcpy(gpu.ex_regs, gpu.scratch_ex_regs, sizeof(gpu.ex_regs));
        }
        return pos;
@@ -363,16 +442,16 @@ void renderer_finish(void) {
 
 void renderer_sync_ecmds(uint32_t * ecmds) {
        if (thread.running) {
-               int dummy;
-               do_cmd_list(&ecmds[1], 6, &dummy);
+               int dummy = 0;
+               do_cmd_list(&ecmds[1], 6, &dummy, &dummy, &dummy);
        } else {
                real_renderer_sync_ecmds(ecmds);
        }
 }
 
-void renderer_update_caches(int x, int y, int w, int h) {
+void renderer_update_caches(int x, int y, int w, int h, int state_changed) {
        renderer_sync();
-       real_renderer_update_caches(x, y, w, h);
+       real_renderer_update_caches(x, y, w, h, state_changed);
 }
 
 void renderer_flush_queues(void) {
@@ -425,7 +504,7 @@ void renderer_notify_update_lace(int updated) {
        }
 
        pthread_mutex_lock(&thread.queue_lock);
-       if (thread.bg_queue->used) {
+       if (thread.bg_queue->used || flushed) {
                /* We have commands for a future frame to run. Force a wait until
                 * the current frame is finished, and start processing the next
                 * frame after it's drawn (see the `updated` clause above). */
@@ -436,23 +515,24 @@ void renderer_notify_update_lace(int updated) {
                /* We are no longer holding commands back, so the next frame may
                 * get mixed into the following frame. This is usually fine, but can
                 * result in frameskip-like effects for 60fps games. */
-               hold_cmds = false;
-               needs_display = true;
-               gpu.state.fb_dirty = true;
+               flushed = FALSE;
+               hold_cmds = FALSE;
+               needs_display = TRUE;
+               gpu.state.fb_dirty = TRUE;
        } else if (thread.queue->used) {
                /* We are still drawing during a vblank. Cut off the current frame
                 * by sending new commands to the background queue and skip
                 * drawing our partly rendered frame to the display. */
-               hold_cmds = true;
-               needs_display = true;
-               gpu.state.fb_dirty = false;
+               hold_cmds = TRUE;
+               needs_display = TRUE;
+               gpu.state.fb_dirty = FALSE;
        } else if (needs_display && !thread.queue->used) {
                /* We have processed all commands in the queue, render the
                 * buffer. We know we have something to render, because
-                * needs_display is true. */
-               hold_cmds = false;
-               needs_display = false;
-               gpu.state.fb_dirty = true;
+                * needs_display is TRUE. */
+               hold_cmds = FALSE;
+               needs_display = FALSE;
+               gpu.state.fb_dirty = TRUE;
        } else {
                /* Everything went normally, so do the normal thing. */
        }