X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=plugins%2Fgpulib%2Fgpulib_thread_if.c;h=107ffd3568a634cb29148557400223b9a31ca8ad;hb=025b6fde9b8ce688008227211168358b96e98d62;hp=f0f607d621cbfb12411b39eb839c04f7341ac99d;hpb=accedc82b01fe5834a805a9872405d51de1d5c06;p=pcsx_rearmed.git

diff --git a/plugins/gpulib/gpulib_thread_if.c b/plugins/gpulib/gpulib_thread_if.c
index f0f607d6..107ffd35 100644
--- a/plugins/gpulib/gpulib_thread_if.c
+++ b/plugins/gpulib/gpulib_thread_if.c
@@ -18,12 +18,19 @@
 ***************************************************************************/
 
 #include <stdlib.h>
+#include <stdio.h>
 #include <string.h>
 #include <pthread.h>
 #include "../gpulib/gpu.h"
 #include "../../frontend/plugin_lib.h"
+#include "gpu.h"
+#include "gpu_timing.h"
 #include "gpulib_thread_if.h"
 
+#define FALSE 0
+#define TRUE 1
+#define BOOL unsigned short
+
 typedef struct {
 	uint32_t *cmd_list;
 	int count;
@@ -47,14 +54,15 @@ typedef struct {
 	pthread_cond_t cond_queue_empty;
 	video_thread_queue *queue;
 	video_thread_queue *bg_queue;
-	bool running;
+	BOOL running;
 } video_thread_state;
 
 static video_thread_state thread;
 static video_thread_queue queues[2];
 static int thread_rendering;
-static bool hold_cmds;
-static bool needs_display;
+static BOOL hold_cmds;
+static BOOL needs_display;
+static BOOL flushed;
 
 extern const unsigned char cmd_lengths[];
 
@@ -62,10 +70,13 @@ static void *video_thread_main(void *arg) {
 	video_thread_state *thread = (video_thread_state *)arg;
 	video_thread_cmd *cmd;
 	int i;
+
+#ifdef _3DS
 	static int processed = 0;
+#endif /* _3DS */
 
 	while(1) {
-		int result, last_cmd, start, end;
+		int result, cycles_dummy = 0, last_cmd, start, end;
 		video_thread_queue *queue;
 		pthread_mutex_lock(&thread->queue_lock);
 
@@ -86,8 +97,8 @@ static void *video_thread_main(void *arg) {
 
 		for (i = start; i < end; i++) {
 			cmd = &queue->queue[i];
-			result = real_do_cmd_list(cmd->cmd_list, cmd->count, &last_cmd);
-
+			result = real_do_cmd_list(cmd->cmd_list, cmd->count,
+					&cycles_dummy, &cycles_dummy, &last_cmd);
 			if (result != cmd->count) {
 				fprintf(stderr, "Processed wrong cmd count: expected %d, got %d\n", cmd->count, result);
 			}
@@ -99,7 +110,7 @@ static void *video_thread_main(void *arg) {
 				svcSleepThread(1);
 				processed %= 512;
 			}
-#endif
+#endif /* _3DS */
 		}
 
 		pthread_mutex_lock(&thread->queue_lock);
@@ -124,7 +135,6 @@ static void cmd_queue_swap() {
 		tmp = thread.queue;
 		thread.queue = thread.bg_queue;
 		thread.bg_queue = tmp;
-		needs_display = true;
 		pthread_cond_signal(&thread.cond_msg_avail);
 	}
 	pthread_mutex_unlock(&thread.queue_lock);
@@ -160,6 +170,13 @@ void renderer_sync(void) {
 		return;
 	}
 
+	if (thread.bg_queue->used) {
+		/* When we flush the background queue, the vblank handler can't
+		 * know that we had a frame pending, and we delay rendering too
+		 * long. Force it. */
+		flushed = TRUE;
+	}
+
 	/* Flush both queues. This is necessary because gpulib could be
 	 * trying to process a DMA write that a command in the queue should
 	 * run beforehand. For example, Xenogears sprites write a black
@@ -169,7 +186,7 @@ void renderer_sync(void) {
 	 * drop a frame. */
 	renderer_wait();
 	cmd_queue_swap();
-	hold_cmds = false;
+	hold_cmds = FALSE;
 	renderer_wait();
 }
 
@@ -178,7 +195,7 @@ static void video_thread_stop() {
 	renderer_sync();
 
 	if (thread.running) {
-		thread.running = false;
+		thread.running = FALSE;
 		pthread_cond_signal(&thread.cond_msg_avail);
 		pthread_join(thread.thread, NULL);
 	}
@@ -215,7 +232,7 @@ static void video_thread_start() {
 	thread.queue = &queues[0];
 	thread.bg_queue = &queues[1];
 
-	thread.running = true;
+	thread.running = TRUE;
 	return;
 
  error:
@@ -227,7 +244,7 @@ static void video_thread_queue_cmd(uint32_t *list, int count, int last_cmd) {
 	video_thread_cmd *cmd;
 	uint32_t *cmd_list;
 	video_thread_queue *queue;
-	bool lock;
+	BOOL lock;
 
 	cmd_list = (uint32_t *)calloc(count, sizeof(uint32_t));
 
@@ -248,10 +265,10 @@ static void video_thread_queue_cmd(uint32_t *list, int count, int last_cmd) {
 
 	if (hold_cmds) {
 		queue = thread.bg_queue;
-		lock = false;
+		lock = FALSE;
 	} else {
 		queue = thread.queue;
-		lock = true;
+		lock = TRUE;
 	}
 
 	if (lock) {
@@ -278,41 +295,99 @@ static void video_thread_queue_cmd(uint32_t *list, int count, int last_cmd) {
 
 /* Slice off just the part of the list that can be handled async, and
  * update ex_regs. */
-static int scan_cmd_list(uint32_t *data, int count, int *last_cmd)
+static int scan_cmd_list(uint32_t *data, int count,
+	int *cycles_sum_out, int *cycles_last, int *last_cmd)
 {
+	int cpu_cycles_sum = 0, cpu_cycles = *cycles_last;
 	int cmd = 0, pos = 0, len, v;
 
 	while (pos < count) {
 		uint32_t *list = data + pos;
-		cmd = list[0] >> 24;
+		short *slist = (void *)list;
+		cmd = LE32TOH(list[0]) >> 24;
 		len = 1 + cmd_lengths[cmd];
 
 		switch (cmd) {
 			case 0x02:
+				gput_sum(cpu_cycles_sum, cpu_cycles,
+					gput_fill(LE16TOH(slist[4]) & 0x3ff,
+						LE16TOH(slist[5]) & 0x1ff));
+				break;
+			case 0x20 ... 0x23:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base());
 				break;
 			case 0x24 ... 0x27:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t());
+				gpu.ex_regs[1] &= ~0x1ff;
+				gpu.ex_regs[1] |= LE32TOH(list[4]) & 0x1ff;
+				break;
+			case 0x28 ... 0x2b:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base());
+				break;
 			case 0x2c ... 0x2f:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t());
+				gpu.ex_regs[1] &= ~0x1ff;
+				gpu.ex_regs[1] |= LE32TOH(list[4]) & 0x1ff;
+				break;
+			case 0x30 ... 0x33:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g());
+				break;
 			case 0x34 ... 0x37:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt());
+				gpu.ex_regs[1] &= ~0x1ff;
+				gpu.ex_regs[1] |= LE32TOH(list[5]) & 0x1ff;
+				break;
+			case 0x38 ... 0x3b:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g());
+				break;
 			case 0x3c ... 0x3f:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt());
 				gpu.ex_regs[1] &= ~0x1ff;
-				gpu.ex_regs[1] |= list[4 + ((cmd >> 4) & 1)] & 0x1ff;
+				gpu.ex_regs[1] |= LE32TOH(list[5]) & 0x1ff;
+				break;
+			case 0x40 ... 0x47:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
 				break;
 			case 0x48 ... 0x4F:
 				for (v = 3; pos + v < count; v++)
 				{
+					gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
 					if ((list[v] & 0xf000f000) == 0x50005000)
 						break;
 				}
 				len += v - 3;
 				break;
+			case 0x50 ... 0x57:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
+				break;
 			case 0x58 ... 0x5F:
 				for (v = 4; pos + v < count; v += 2)
 				{
+					gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
 					if ((list[v] & 0xf000f000) == 0x50005000)
 						break;
 				}
 				len += v - 4;
 				break;
+			case 0x60 ... 0x63:
+				gput_sum(cpu_cycles_sum, cpu_cycles,
+					gput_sprite(LE16TOH(slist[4]) & 0x3ff,
+						LE16TOH(slist[5]) & 0x1ff));
+				break;
+			case 0x64 ... 0x67:
+				gput_sum(cpu_cycles_sum, cpu_cycles,
+					gput_sprite(LE16TOH(slist[6]) & 0x3ff,
+						LE16TOH(slist[7]) & 0x1ff));
+				break;
+			case 0x68 ... 0x6b:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1));
+				break;
+			case 0x70 ... 0x77:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(8, 8));
+				break;
+			case 0x78 ... 0x7f:
+				gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(16, 16));
+				break;
 			default:
 				if ((cmd & 0xf8) == 0xe0)
 					gpu.ex_regs[cmd & 7] = list[0];
@@ -323,24 +398,28 @@ static int scan_cmd_list(uint32_t *data, int count, int *last_cmd)
 			cmd = -1;
 			break; /* incomplete cmd */
 		}
-		if (0xa0 <= cmd && cmd <= 0xdf)
+		if (0x80 <= cmd && cmd <= 0xdf)
 			break; /* image i/o */
 
 		pos += len;
 	}
 
+	*cycles_sum_out += cpu_cycles_sum;
+	*cycles_last = cpu_cycles;
 	*last_cmd = cmd;
 	return pos;
 }
 
-int do_cmd_list(uint32_t *list, int count, int *last_cmd) {
+int do_cmd_list(uint32_t *list, int count,
+ int *cycles_sum, int *cycles_last, int *last_cmd)
+{
 	int pos = 0;
 
 	if (thread.running) {
-		pos = scan_cmd_list(list, count, last_cmd);
+		pos = scan_cmd_list(list, count, cycles_sum, cycles_last, last_cmd);
 		video_thread_queue_cmd(list, pos, *last_cmd);
 	} else {
-		pos = real_do_cmd_list(list, count, last_cmd);
+		pos = real_do_cmd_list(list, count, cycles_sum, cycles_last, last_cmd);
 		memcpy(gpu.ex_regs, gpu.scratch_ex_regs, sizeof(gpu.ex_regs));
 	}
 	return pos;
@@ -363,16 +442,16 @@ void renderer_finish(void) {
 
 void renderer_sync_ecmds(uint32_t * ecmds) {
 	if (thread.running) {
-		int dummy;
-		do_cmd_list(&ecmds[1], 6, &dummy);
+		int dummy = 0;
+		do_cmd_list(&ecmds[1], 6, &dummy, &dummy, &dummy);
 	} else {
 		real_renderer_sync_ecmds(ecmds);
 	}
 }
 
-void renderer_update_caches(int x, int y, int w, int h) {
+void renderer_update_caches(int x, int y, int w, int h, int state_changed) {
 	renderer_sync();
-	real_renderer_update_caches(x, y, w, h);
+	real_renderer_update_caches(x, y, w, h, state_changed);
 }
 
 void renderer_flush_queues(void) {
@@ -425,7 +504,7 @@ void renderer_notify_update_lace(int updated) {
 	}
 
 	pthread_mutex_lock(&thread.queue_lock);
-	if (thread.bg_queue->used) {
+	if (thread.bg_queue->used || flushed) {
 		/* We have commands for a future frame to run. Force a wait until
 		 * the current frame is finished, and start processing the next
 		 * frame after it's drawn (see the `updated` clause above). */
@@ -436,23 +515,24 @@ void renderer_notify_update_lace(int updated) {
 		/* We are no longer holding commands back, so the next frame may
 		 * get mixed into the following frame. This is usually fine, but can
 		 * result in frameskip-like effects for 60fps games. */
-		hold_cmds = false;
-		needs_display = true;
-		gpu.state.fb_dirty = true;
+		flushed = FALSE;
+		hold_cmds = FALSE;
+		needs_display = TRUE;
+		gpu.state.fb_dirty = TRUE;
 	} else if (thread.queue->used) {
 		/* We are still drawing during a vblank. Cut off the current frame
 		 * by sending new commands to the background queue and skip
 		 * drawing our partly rendered frame to the display. */
-		hold_cmds = true;
-		needs_display = true;
-		gpu.state.fb_dirty = false;
+		hold_cmds = TRUE;
+		needs_display = TRUE;
+		gpu.state.fb_dirty = FALSE;
 	} else if (needs_display && !thread.queue->used) {
 		/* We have processed all commands in the queue, render the
 		 * buffer. We know we have something to render, because
-		 * needs_display is true. */
-		hold_cmds = false;
-		needs_display = false;
-		gpu.state.fb_dirty = true;
+		 * needs_display is TRUE. */
+		hold_cmds = FALSE;
+		needs_display = FALSE;
+		gpu.state.fb_dirty = TRUE;
 	} else {
 		/* Everything went normally, so do the normal thing. */
 	}