X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=plugins%2Fgpulib%2Fgpulib_thread_if.c;h=107ffd3568a634cb29148557400223b9a31ca8ad;hb=025b6fde9b8ce688008227211168358b96e98d62;hp=c95f5295ef37a28c72882c9d36236db573f2f019;hpb=a903b13150257ec490fe776fb5bc2e1fbc2a312e;p=pcsx_rearmed.git

diff --git a/plugins/gpulib/gpulib_thread_if.c b/plugins/gpulib/gpulib_thread_if.c
index c95f5295..107ffd35 100644
--- a/plugins/gpulib/gpulib_thread_if.c
+++ b/plugins/gpulib/gpulib_thread_if.c
@@ -23,6 +23,8 @@
 #include <pthread.h>
 #include "../gpulib/gpu.h"
 #include "../../frontend/plugin_lib.h"
+#include "gpu.h"
+#include "gpu_timing.h"
 #include "gpulib_thread_if.h"
 
 #define FALSE 0
@@ -60,6 +62,7 @@ static video_thread_queue queues[2];
 static int thread_rendering;
 static BOOL hold_cmds;
 static BOOL needs_display;
+static BOOL flushed;
 
 extern const unsigned char cmd_lengths[];
 
@@ -73,7 +76,7 @@ static void *video_thread_main(void *arg) {
 #endif /* _3DS */
 
 	while(1) {
-		int result, last_cmd, start, end;
+		int result, cycles_dummy = 0, last_cmd, start, end;
 		video_thread_queue *queue;
 		pthread_mutex_lock(&thread->queue_lock);
 
@@ -94,8 +97,8 @@ static void *video_thread_main(void *arg) {
 
 		for (i = start; i < end; i++) {
 			cmd = &queue->queue[i];
-			result = real_do_cmd_list(cmd->cmd_list, cmd->count, &last_cmd);
-
+			result = real_do_cmd_list(cmd->cmd_list, cmd->count,
+					&cycles_dummy, &cycles_dummy, &last_cmd);
 			if (result != cmd->count) {
 				fprintf(stderr, "Processed wrong cmd count: expected %d, got %d\n", cmd->count, result);
 			}
@@ -132,7 +135,6 @@ static void cmd_queue_swap() {
 		tmp = thread.queue;
 		thread.queue = thread.bg_queue;
 		thread.bg_queue = tmp;
-		needs_display = TRUE;
 		pthread_cond_signal(&thread.cond_msg_avail);
 	}
 	pthread_mutex_unlock(&thread.queue_lock);
@@ -168,6 +170,13 @@ void renderer_sync(void) {
 		return;
 	}
 
+	if (thread.bg_queue->used) {
+		/* When we flush the background queue, the vblank handler can't
+		 * know that we had a frame pending, and we delay rendering too
+		 * long. Force it. */
+		flushed = TRUE;
+	}
+
 	/* Flush both queues. This is necessary because gpulib could be
 	 * trying to process a DMA write that a command in the queue should
 	 * run beforehand. For example, Xenogears sprites write a black
@@ -286,41 +295,99 @@ static void video_thread_queue_cmd(uint32_t *list, int count, int last_cmd) {
 
 /* Slice off just the part of the list that can be handled async, and
  * update ex_regs. */
-static int scan_cmd_list(uint32_t *data, int count, int *last_cmd)
+static int scan_cmd_list(uint32_t *data, int count,
+	int *cycles_sum_out, int *cycles_last, int *last_cmd)
 {
+	int cpu_cycles_sum = 0, cpu_cycles = *cycles_last;
 	int cmd = 0, pos = 0, len, v;
 
 	while (pos < count) {
 		uint32_t *list = data + pos;
-		cmd = list[0] >> 24;
+		short *slist = (void *)list;
+		cmd = LE32TOH(list[0]) >> 24;
 		len = 1 + cmd_lengths[cmd];
 
 		switch (cmd) {
 			case 0x02:
+				gput_sum(cpu_cycles_sum, cpu_cycles,
+					gput_fill(LE16TOH(slist[4]) & 0x3ff,
+						LE16TOH(slist[5]) & 0x1ff));
+				break;
+			case 0x20 ... 0x23:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base());
 				break;
 			case 0x24 ... 0x27:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t());
+				gpu.ex_regs[1] &= ~0x1ff;
+				gpu.ex_regs[1] |= LE32TOH(list[4]) & 0x1ff;
+				break;
+			case 0x28 ... 0x2b:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base());
+				break;
 			case 0x2c ... 0x2f:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t());
+				gpu.ex_regs[1] &= ~0x1ff;
+				gpu.ex_regs[1] |= LE32TOH(list[4]) & 0x1ff;
+				break;
+			case 0x30 ... 0x33:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g());
+				break;
 			case 0x34 ... 0x37:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt());
+				gpu.ex_regs[1] &= ~0x1ff;
+				gpu.ex_regs[1] |= LE32TOH(list[5]) & 0x1ff;
+				break;
+			case 0x38 ... 0x3b:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g());
+				break;
 			case 0x3c ... 0x3f:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt());
 				gpu.ex_regs[1] &= ~0x1ff;
-				gpu.ex_regs[1] |= list[4 + ((cmd >> 4) & 1)] & 0x1ff;
+				gpu.ex_regs[1] |= LE32TOH(list[5]) & 0x1ff;
+				break;
+			case 0x40 ... 0x47:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
 				break;
 			case 0x48 ... 0x4F:
 				for (v = 3; pos + v < count; v++)
 				{
+					gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
 					if ((list[v] & 0xf000f000) == 0x50005000)
 						break;
 				}
 				len += v - 3;
 				break;
+			case 0x50 ... 0x57:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
+				break;
 			case 0x58 ... 0x5F:
 				for (v = 4; pos + v < count; v += 2)
 				{
+					gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
 					if ((list[v] & 0xf000f000) == 0x50005000)
 						break;
 				}
 				len += v - 4;
 				break;
+			case 0x60 ... 0x63:
+				gput_sum(cpu_cycles_sum, cpu_cycles,
+					gput_sprite(LE16TOH(slist[4]) & 0x3ff,
+						LE16TOH(slist[5]) & 0x1ff));
+				break;
+			case 0x64 ... 0x67:
+				gput_sum(cpu_cycles_sum, cpu_cycles,
+					gput_sprite(LE16TOH(slist[6]) & 0x3ff,
+						LE16TOH(slist[7]) & 0x1ff));
+				break;
+			case 0x68 ... 0x6b:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1));
+				break;
+			case 0x70 ... 0x77:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(8, 8));
+				break;
+			case 0x78 ... 0x7f:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(16, 16));
+				break;
 			default:
 				if ((cmd & 0xf8) == 0xe0)
 					gpu.ex_regs[cmd & 7] = list[0];
@@ -331,24 +398,28 @@ static int scan_cmd_list(uint32_t *data, int count, int *last_cmd)
 			cmd = -1;
 			break; /* incomplete cmd */
 		}
-		if (0xa0 <= cmd && cmd <= 0xdf)
+		if (0x80 <= cmd && cmd <= 0xdf)
 			break; /* image i/o */
 
 		pos += len;
 	}
 
+	*cycles_sum_out += cpu_cycles_sum;
+	*cycles_last = cpu_cycles;
 	*last_cmd = cmd;
 	return pos;
 }
 
-int do_cmd_list(uint32_t *list, int count, int *last_cmd) {
+int do_cmd_list(uint32_t *list, int count,
+ int *cycles_sum, int *cycles_last, int *last_cmd)
+{
 	int pos = 0;
 
 	if (thread.running) {
-		pos = scan_cmd_list(list, count, last_cmd);
+		pos = scan_cmd_list(list, count, cycles_sum, cycles_last, last_cmd);
 		video_thread_queue_cmd(list, pos, *last_cmd);
 	} else {
-		pos = real_do_cmd_list(list, count, last_cmd);
+		pos = real_do_cmd_list(list, count, cycles_sum, cycles_last, last_cmd);
 		memcpy(gpu.ex_regs, gpu.scratch_ex_regs, sizeof(gpu.ex_regs));
 	}
 	return pos;
@@ -371,16 +442,16 @@ void renderer_finish(void) {
 
 void renderer_sync_ecmds(uint32_t * ecmds) {
 	if (thread.running) {
-		int dummy;
-		do_cmd_list(&ecmds[1], 6, &dummy);
+		int dummy = 0;
+		do_cmd_list(&ecmds[1], 6, &dummy, &dummy, &dummy);
 	} else {
 		real_renderer_sync_ecmds(ecmds);
 	}
 }
 
-void renderer_update_caches(int x, int y, int w, int h) {
+void renderer_update_caches(int x, int y, int w, int h, int state_changed) {
 	renderer_sync();
-	real_renderer_update_caches(x, y, w, h);
+	real_renderer_update_caches(x, y, w, h, state_changed);
 }
 
 void renderer_flush_queues(void) {
@@ -433,7 +504,7 @@ void renderer_notify_update_lace(int updated) {
 	}
 
 	pthread_mutex_lock(&thread.queue_lock);
-	if (thread.bg_queue->used) {
+	if (thread.bg_queue->used || flushed) {
 		/* We have commands for a future frame to run. Force a wait until
 		 * the current frame is finished, and start processing the next
 		 * frame after it's drawn (see the `updated` clause above). */
@@ -444,6 +515,7 @@ void renderer_notify_update_lace(int updated) {
 		/* We are no longer holding commands back, so the next frame may
 		 * get mixed into the following frame. This is usually fine, but can
 		 * result in frameskip-like effects for 60fps games. */
+		flushed = FALSE;
 		hold_cmds = FALSE;
 		needs_display = TRUE;
 		gpu.state.fb_dirty = TRUE;