X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=plugins%2Fgpulib%2Fgpulib_thread_if.c;h=107ffd3568a634cb29148557400223b9a31ca8ad;hb=025b6fde9b8ce688008227211168358b96e98d62;hp=c95f5295ef37a28c72882c9d36236db573f2f019;hpb=a903b13150257ec490fe776fb5bc2e1fbc2a312e;p=pcsx_rearmed.git diff --git a/plugins/gpulib/gpulib_thread_if.c b/plugins/gpulib/gpulib_thread_if.c index c95f5295..107ffd35 100644 --- a/plugins/gpulib/gpulib_thread_if.c +++ b/plugins/gpulib/gpulib_thread_if.c @@ -23,6 +23,8 @@ #include #include "../gpulib/gpu.h" #include "../../frontend/plugin_lib.h" +#include "gpu.h" +#include "gpu_timing.h" #include "gpulib_thread_if.h" #define FALSE 0 @@ -60,6 +62,7 @@ static video_thread_queue queues[2]; static int thread_rendering; static BOOL hold_cmds; static BOOL needs_display; +static BOOL flushed; extern const unsigned char cmd_lengths[]; @@ -73,7 +76,7 @@ static void *video_thread_main(void *arg) { #endif /* _3DS */ while(1) { - int result, last_cmd, start, end; + int result, cycles_dummy = 0, last_cmd, start, end; video_thread_queue *queue; pthread_mutex_lock(&thread->queue_lock); @@ -94,8 +97,8 @@ static void *video_thread_main(void *arg) { for (i = start; i < end; i++) { cmd = &queue->queue[i]; - result = real_do_cmd_list(cmd->cmd_list, cmd->count, &last_cmd); - + result = real_do_cmd_list(cmd->cmd_list, cmd->count, + &cycles_dummy, &cycles_dummy, &last_cmd); if (result != cmd->count) { fprintf(stderr, "Processed wrong cmd count: expected %d, got %d\n", cmd->count, result); } @@ -132,7 +135,6 @@ static void cmd_queue_swap() { tmp = thread.queue; thread.queue = thread.bg_queue; thread.bg_queue = tmp; - needs_display = TRUE; pthread_cond_signal(&thread.cond_msg_avail); } pthread_mutex_unlock(&thread.queue_lock); @@ -168,6 +170,13 @@ void renderer_sync(void) { return; } + if (thread.bg_queue->used) { + /* When we flush the background queue, the vblank handler can't + * know that we had a frame pending, and we delay rendering too + * long. Force it. */ + flushed = TRUE; + } + /* Flush both queues. This is necessary because gpulib could be * trying to process a DMA write that a command in the queue should * run beforehand. For example, Xenogears sprites write a black @@ -286,41 +295,99 @@ static void video_thread_queue_cmd(uint32_t *list, int count, int last_cmd) { /* Slice off just the part of the list that can be handled async, and * update ex_regs. */ -static int scan_cmd_list(uint32_t *data, int count, int *last_cmd) +static int scan_cmd_list(uint32_t *data, int count, + int *cycles_sum_out, int *cycles_last, int *last_cmd) { + int cpu_cycles_sum = 0, cpu_cycles = *cycles_last; int cmd = 0, pos = 0, len, v; while (pos < count) { uint32_t *list = data + pos; - cmd = list[0] >> 24; + short *slist = (void *)list; + cmd = LE32TOH(list[0]) >> 24; len = 1 + cmd_lengths[cmd]; switch (cmd) { case 0x02: + gput_sum(cpu_cycles_sum, cpu_cycles, + gput_fill(LE16TOH(slist[4]) & 0x3ff, + LE16TOH(slist[5]) & 0x1ff)); + break; + case 0x20 ... 0x23: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base()); break; case 0x24 ... 0x27: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t()); + gpu.ex_regs[1] &= ~0x1ff; + gpu.ex_regs[1] |= LE32TOH(list[4]) & 0x1ff; + break; + case 0x28 ... 0x2b: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base()); + break; case 0x2c ... 0x2f: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t()); + gpu.ex_regs[1] &= ~0x1ff; + gpu.ex_regs[1] |= LE32TOH(list[4]) & 0x1ff; + break; + case 0x30 ... 0x33: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g()); + break; case 0x34 ... 0x37: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt()); + gpu.ex_regs[1] &= ~0x1ff; + gpu.ex_regs[1] |= LE32TOH(list[5]) & 0x1ff; + break; + case 0x38 ... 0x3b: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g()); + break; case 0x3c ... 0x3f: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt()); gpu.ex_regs[1] &= ~0x1ff; - gpu.ex_regs[1] |= list[4 + ((cmd >> 4) & 1)] & 0x1ff; + gpu.ex_regs[1] |= LE32TOH(list[5]) & 0x1ff; + break; + case 0x40 ... 0x47: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); break; case 0x48 ... 0x4F: for (v = 3; pos + v < count; v++) { + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); if ((list[v] & 0xf000f000) == 0x50005000) break; } len += v - 3; break; + case 0x50 ... 0x57: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); + break; case 0x58 ... 0x5F: for (v = 4; pos + v < count; v += 2) { + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); if ((list[v] & 0xf000f000) == 0x50005000) break; } len += v - 4; break; + case 0x60 ... 0x63: + gput_sum(cpu_cycles_sum, cpu_cycles, + gput_sprite(LE16TOH(slist[4]) & 0x3ff, + LE16TOH(slist[5]) & 0x1ff)); + break; + case 0x64 ... 0x67: + gput_sum(cpu_cycles_sum, cpu_cycles, + gput_sprite(LE16TOH(slist[6]) & 0x3ff, + LE16TOH(slist[7]) & 0x1ff)); + break; + case 0x68 ... 0x6b: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1)); + break; + case 0x70 ... 0x77: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(8, 8)); + break; + case 0x78 ... 0x7f: + gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(16, 16)); + break; default: if ((cmd & 0xf8) == 0xe0) gpu.ex_regs[cmd & 7] = list[0]; @@ -331,24 +398,28 @@ static int scan_cmd_list(uint32_t *data, int count, int *last_cmd) cmd = -1; break; /* incomplete cmd */ } - if (0xa0 <= cmd && cmd <= 0xdf) + if (0x80 <= cmd && cmd <= 0xdf) break; /* image i/o */ pos += len; } + *cycles_sum_out += cpu_cycles_sum; + *cycles_last = cpu_cycles; *last_cmd = cmd; return pos; } -int do_cmd_list(uint32_t *list, int count, int *last_cmd) { +int do_cmd_list(uint32_t *list, int count, + int *cycles_sum, int *cycles_last, int *last_cmd) +{ int pos = 0; if (thread.running) { - pos = scan_cmd_list(list, count, last_cmd); + pos = scan_cmd_list(list, count, cycles_sum, cycles_last, last_cmd); video_thread_queue_cmd(list, pos, *last_cmd); } else { - pos = real_do_cmd_list(list, count, last_cmd); + pos = real_do_cmd_list(list, count, cycles_sum, cycles_last, last_cmd); memcpy(gpu.ex_regs, gpu.scratch_ex_regs, sizeof(gpu.ex_regs)); } return pos; @@ -371,16 +442,16 @@ void renderer_finish(void) { void renderer_sync_ecmds(uint32_t * ecmds) { if (thread.running) { - int dummy; - do_cmd_list(&ecmds[1], 6, &dummy); + int dummy = 0; + do_cmd_list(&ecmds[1], 6, &dummy, &dummy, &dummy); } else { real_renderer_sync_ecmds(ecmds); } } -void renderer_update_caches(int x, int y, int w, int h) { +void renderer_update_caches(int x, int y, int w, int h, int state_changed) { renderer_sync(); - real_renderer_update_caches(x, y, w, h); + real_renderer_update_caches(x, y, w, h, state_changed); } void renderer_flush_queues(void) { @@ -433,7 +504,7 @@ void renderer_notify_update_lace(int updated) { } pthread_mutex_lock(&thread.queue_lock); - if (thread.bg_queue->used) { + if (thread.bg_queue->used || flushed) { /* We have commands for a future frame to run. Force a wait until * the current frame is finished, and start processing the next * frame after it's drawn (see the `updated` clause above). */ @@ -444,6 +515,7 @@ void renderer_notify_update_lace(int updated) { /* We are no longer holding commands back, so the next frame may * get mixed into the following frame. This is usually fine, but can * result in frameskip-like effects for 60fps games. */ + flushed = FALSE; hold_cmds = FALSE; needs_display = TRUE; gpu.state.fb_dirty = TRUE;