X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=plugins%2Fgpulib%2Fgpulib_thread_if.c;h=107ffd3568a634cb29148557400223b9a31ca8ad;hb=025b6fde9b8ce688008227211168358b96e98d62;hp=f0f607d621cbfb12411b39eb839c04f7341ac99d;hpb=accedc82b01fe5834a805a9872405d51de1d5c06;p=pcsx_rearmed.git diff --git a/plugins/gpulib/gpulib_thread_if.c b/plugins/gpulib/gpulib_thread_if.c index f0f607d6..107ffd35 100644 --- a/plugins/gpulib/gpulib_thread_if.c +++ b/plugins/gpulib/gpulib_thread_if.c @@ -18,12 +18,19 @@ ***************************************************************************/ #include +#include #include #include #include "../gpulib/gpu.h" #include "../../frontend/plugin_lib.h" +#include "gpu.h" +#include "gpu_timing.h" #include "gpulib_thread_if.h" +#define FALSE 0 +#define TRUE 1 +#define BOOL unsigned short + typedef struct { uint32_t *cmd_list; int count; @@ -47,14 +54,15 @@ typedef struct { pthread_cond_t cond_queue_empty; video_thread_queue *queue; video_thread_queue *bg_queue; - bool running; + BOOL running; } video_thread_state; static video_thread_state thread; static video_thread_queue queues[2]; static int thread_rendering; -static bool hold_cmds; -static bool needs_display; +static BOOL hold_cmds; +static BOOL needs_display; +static BOOL flushed; extern const unsigned char cmd_lengths[]; @@ -62,10 +70,13 @@ static void *video_thread_main(void *arg) { video_thread_state *thread = (video_thread_state *)arg; video_thread_cmd *cmd; int i; + +#ifdef _3DS static int processed = 0; +#endif /* _3DS */ while(1) { - int result, last_cmd, start, end; + int result, cycles_dummy = 0, last_cmd, start, end; video_thread_queue *queue; pthread_mutex_lock(&thread->queue_lock); @@ -86,8 +97,8 @@ static void *video_thread_main(void *arg) { for (i = start; i < end; i++) { cmd = &queue->queue[i]; - result = real_do_cmd_list(cmd->cmd_list, cmd->count, &last_cmd); - + result = real_do_cmd_list(cmd->cmd_list, cmd->count, + &cycles_dummy, &cycles_dummy, &last_cmd); if (result != cmd->count) { fprintf(stderr, "Processed wrong cmd count: expected %d, got %d\n", cmd->count, result); } @@ -99,7 +110,7 @@ static void *video_thread_main(void *arg) { svcSleepThread(1); processed %= 512; } -#endif +#endif /* _3DS */ } pthread_mutex_lock(&thread->queue_lock); @@ -124,7 +135,6 @@ static void cmd_queue_swap() { tmp = thread.queue; thread.queue = thread.bg_queue; thread.bg_queue = tmp; - needs_display = true; pthread_cond_signal(&thread.cond_msg_avail); } pthread_mutex_unlock(&thread.queue_lock); @@ -160,6 +170,13 @@ void renderer_sync(void) { return; } + if (thread.bg_queue->used) { + /* When we flush the background queue, the vblank handler can't + * know that we had a frame pending, and we delay rendering too + * long. Force it. */ + flushed = TRUE; + } + /* Flush both queues. This is necessary because gpulib could be * trying to process a DMA write that a command in the queue should * run beforehand. For example, Xenogears sprites write a black @@ -169,7 +186,7 @@ void renderer_sync(void) { * drop a frame. */ renderer_wait(); cmd_queue_swap(); - hold_cmds = false; + hold_cmds = FALSE; renderer_wait(); } @@ -178,7 +195,7 @@ static void video_thread_stop() { renderer_sync(); if (thread.running) { - thread.running = false; + thread.running = FALSE; pthread_cond_signal(&thread.cond_msg_avail); pthread_join(thread.thread, NULL); } @@ -215,7 +232,7 @@ static void video_thread_start() { thread.queue = &queues[0]; thread.bg_queue = &queues[1]; - thread.running = true; + thread.running = TRUE; return; error: @@ -227,7 +244,7 @@ static void video_thread_queue_cmd(uint32_t *list, int count, int last_cmd) { video_thread_cmd *cmd; uint32_t *cmd_list; video_thread_queue *queue; - bool lock; + BOOL lock; cmd_list = (uint32_t *)calloc(count, sizeof(uint32_t)); @@ -248,10 +265,10 @@ static void video_thread_queue_cmd(uint32_t *list, int count, int last_cmd) { if (hold_cmds) { queue = thread.bg_queue; - lock = false; + lock = FALSE; } else { queue = thread.queue; - lock = true; + lock = TRUE; } if (lock) { @@ -278,41 +295,99 @@ static void video_thread_queue_cmd(uint32_t *list, int count, int last_cmd) { /* Slice off just the part of the list that can be handled async, and * update ex_regs. */ -static int scan_cmd_list(uint32_t *data, int count, int *last_cmd) +static int scan_cmd_list(uint32_t *data, int count, + int *cycles_sum_out, int *cycles_last, int *last_cmd) { + int cpu_cycles_sum = 0, cpu_cycles = *cycles_last; int cmd = 0, pos = 0, len, v; while (pos < count) { uint32_t *list = data + pos; - cmd = list[0] >> 24; + short *slist = (void *)list; + cmd = LE32TOH(list[0]) >> 24; len = 1 + cmd_lengths[cmd]; switch (cmd) { case 0x02: + gput_sum(cpu_cycles_sum, cpu_cycles, + gput_fill(LE16TOH(slist[4]) & 0x3ff, + LE16TOH(slist[5]) & 0x1ff)); + break; + case 0x20 ... 0x23: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base()); break; case 0x24 ... 0x27: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t()); + gpu.ex_regs[1] &= ~0x1ff; + gpu.ex_regs[1] |= LE32TOH(list[4]) & 0x1ff; + break; + case 0x28 ... 0x2b: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base()); + break; case 0x2c ... 0x2f: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t()); + gpu.ex_regs[1] &= ~0x1ff; + gpu.ex_regs[1] |= LE32TOH(list[4]) & 0x1ff; + break; + case 0x30 ... 0x33: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g()); + break; case 0x34 ... 0x37: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt()); + gpu.ex_regs[1] &= ~0x1ff; + gpu.ex_regs[1] |= LE32TOH(list[5]) & 0x1ff; + break; + case 0x38 ... 0x3b: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g()); + break; case 0x3c ... 0x3f: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt()); gpu.ex_regs[1] &= ~0x1ff; - gpu.ex_regs[1] |= list[4 + ((cmd >> 4) & 1)] & 0x1ff; + gpu.ex_regs[1] |= LE32TOH(list[5]) & 0x1ff; + break; + case 0x40 ... 0x47: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); break; case 0x48 ... 0x4F: for (v = 3; pos + v < count; v++) { + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); if ((list[v] & 0xf000f000) == 0x50005000) break; } len += v - 3; break; + case 0x50 ... 0x57: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); + break; case 0x58 ... 0x5F: for (v = 4; pos + v < count; v += 2) { + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); if ((list[v] & 0xf000f000) == 0x50005000) break; } len += v - 4; break; + case 0x60 ... 0x63: + gput_sum(cpu_cycles_sum, cpu_cycles, + gput_sprite(LE16TOH(slist[4]) & 0x3ff, + LE16TOH(slist[5]) & 0x1ff)); + break; + case 0x64 ... 0x67: + gput_sum(cpu_cycles_sum, cpu_cycles, + gput_sprite(LE16TOH(slist[6]) & 0x3ff, + LE16TOH(slist[7]) & 0x1ff)); + break; + case 0x68 ... 0x6b: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1)); + break; + case 0x70 ... 0x77: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(8, 8)); + break; + case 0x78 ... 0x7f: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(16, 16)); + break; default: if ((cmd & 0xf8) == 0xe0) gpu.ex_regs[cmd & 7] = list[0]; @@ -323,24 +398,28 @@ static int scan_cmd_list(uint32_t *data, int count, int *last_cmd) cmd = -1; break; /* incomplete cmd */ } - if (0xa0 <= cmd && cmd <= 0xdf) + if (0x80 <= cmd && cmd <= 0xdf) break; /* image i/o */ pos += len; } + *cycles_sum_out += cpu_cycles_sum; + *cycles_last = cpu_cycles; *last_cmd = cmd; return pos; } -int do_cmd_list(uint32_t *list, int count, int *last_cmd) { +int do_cmd_list(uint32_t *list, int count, + int *cycles_sum, int *cycles_last, int *last_cmd) +{ int pos = 0; if (thread.running) { - pos = scan_cmd_list(list, count, last_cmd); + pos = scan_cmd_list(list, count, cycles_sum, cycles_last, last_cmd); video_thread_queue_cmd(list, pos, *last_cmd); } else { - pos = real_do_cmd_list(list, count, last_cmd); + pos = real_do_cmd_list(list, count, cycles_sum, cycles_last, last_cmd); memcpy(gpu.ex_regs, gpu.scratch_ex_regs, sizeof(gpu.ex_regs)); } return pos; @@ -363,16 +442,16 @@ void renderer_finish(void) { void renderer_sync_ecmds(uint32_t * ecmds) { if (thread.running) { - int dummy; - do_cmd_list(&ecmds[1], 6, &dummy); + int dummy = 0; + do_cmd_list(&ecmds[1], 6, &dummy, &dummy, &dummy); } else { real_renderer_sync_ecmds(ecmds); } } -void renderer_update_caches(int x, int y, int w, int h) { +void renderer_update_caches(int x, int y, int w, int h, int state_changed) { renderer_sync(); - real_renderer_update_caches(x, y, w, h); + real_renderer_update_caches(x, y, w, h, state_changed); } void renderer_flush_queues(void) { @@ -425,7 +504,7 @@ void renderer_notify_update_lace(int updated) { } pthread_mutex_lock(&thread.queue_lock); - if (thread.bg_queue->used) { + if (thread.bg_queue->used || flushed) { /* We have commands for a future frame to run. Force a wait until * the current frame is finished, and start processing the next * frame after it's drawn (see the `updated` clause above). */ @@ -436,23 +515,24 @@ void renderer_notify_update_lace(int updated) { /* We are no longer holding commands back, so the next frame may * get mixed into the following frame. This is usually fine, but can * result in frameskip-like effects for 60fps games. */ - hold_cmds = false; - needs_display = true; - gpu.state.fb_dirty = true; + flushed = FALSE; + hold_cmds = FALSE; + needs_display = TRUE; + gpu.state.fb_dirty = TRUE; } else if (thread.queue->used) { /* We are still drawing during a vblank. Cut off the current frame * by sending new commands to the background queue and skip * drawing our partly rendered frame to the display. */ - hold_cmds = true; - needs_display = true; - gpu.state.fb_dirty = false; + hold_cmds = TRUE; + needs_display = TRUE; + gpu.state.fb_dirty = FALSE; } else if (needs_display && !thread.queue->used) { /* We have processed all commands in the queue, render the * buffer. We know we have something to render, because - * needs_display is true. */ - hold_cmds = false; - needs_display = false; - gpu.state.fb_dirty = true; + * needs_display is TRUE. */ + hold_cmds = FALSE; + needs_display = FALSE; + gpu.state.fb_dirty = TRUE; } else { /* Everything went normally, so do the normal thing. */ }