frontend/menu.o: CFLAGS += -DUSE_ASYNC_CDROM
USE_RTHREADS := 1
endif
+ifeq "$(USE_ASYNC_GPU)" "1"
+frontend/libretro.o: CFLAGS += -DUSE_ASYNC_GPU
+frontend/menu.o: CFLAGS += -DUSE_ASYNC_GPU
+USE_RTHREADS := 1
+endif
# dynarec
ifeq "$(DYNAREC)" "lightrec"
# builtin gpu
OBJS += plugins/gpulib/gpu.o plugins/gpulib/vout_pl.o plugins/gpulib/prim.o
+ifeq "$(USE_ASYNC_GPU)" "1"
+OBJS += plugins/gpulib/gpu_async.o
+plugins/gpulib/%.o: CFLAGS += -DUSE_ASYNC_GPU
+endif
ifeq "$(BUILTIN_GPU)" "neon"
CFLAGS += -DGPU_NEON
OBJS += plugins/gpu_neon/psx_gpu_if.o
plugins/dfxvideo/gpulib_if.o: CFLAGS += -fno-strict-aliasing
frontend/menu.o frontend/plugin_lib.o: CFLAGS += -DBUILTIN_GPU_PEOPS
OBJS += plugins/dfxvideo/gpulib_if.o
-ifeq "$(THREAD_RENDERING)" "1"
-CFLAGS += -DTHREAD_RENDERING
-OBJS += plugins/gpulib/gpulib_thread_if.o
-endif
endif
ifeq "$(BUILTIN_GPU)" "unai"
ifeq "$(ARCH)" "arm"
OBJS += plugins/gpu_unai/gpu_arm.o
endif
-ifeq "$(THREAD_RENDERING)" "1"
-CFLAGS += -DTHREAD_RENDERING
-OBJS += plugins/gpulib/gpulib_thread_if.o
-endif
ifneq "$(GPU_UNAI_NO_OLD)" "1"
OBJS += plugins/gpu_unai/old/if.o
else
HAVE_CHD ?= 1
HAVE_PHYSICAL_CDROM ?= 1
USE_ASYNC_CDROM ?= 1
+USE_ASYNC_GPU ?= 1
USE_LIBRETRO_VFS ?= 0
NDRC_THREAD ?= 1
GNU_LINKER ?= 1
ifeq ($(platform), unix)
TARGET := $(TARGET_NAME)_libretro.so
fpic := -fPIC
- THREAD_RENDERING = 1
ifeq ($(shell uname),Linux)
LIGHTREC_CUSTOM_MAP := 1
endif
NEED_SYSCONF := 1
HAVE_PHYSICAL_CDROM = 0
USE_ASYNC_CDROM = 0
+ USE_ASYNC_GPU = 0
# PSP
else ifeq ($(platform), psp1)
OBJS += frontend/3ds/utils.o
BUILTIN_GPU = unai
- THREAD_RENDERING = 1
DYNAREC = ari64
ARCH = arm
PARTIAL_LINKING = 1
CFLAGS += -DGEKKO -mcpu=750 -meabi -mhard-float
CFLAGS += -DNO_DYLIB
STATIC_LINKING := 1
- THREAD_RENDERING := 0
NO_PTHREAD := 1
NO_MMAP := 1
NO_POSIX_MEMALIGN := 1
LIBRT :=
HAVE_PHYSICAL_CDROM = 0
USE_ASYNC_CDROM = 0
+ USE_ASYNC_GPU = 0
# QNX
else ifeq ($(platform), qnx)
# but we don't want to include libretro-common's rthread object files here
USE_RTHREADS=0
USE_ASYNC_CDROM=0
+ USE_ASYNC_GPU=0
# so we disable some uses of threads within pcsx_rearmed.
# is this a good solution? I don't know!
else
LIBPTHREAD :=
USE_RTHREADS=0
USE_ASYNC_CDROM=0
+ USE_ASYNC_GPU=0
NO_PTHREAD=1
endif
DYNAREC =
fi
if [ "$multithreading" = "yes" ]; then
echo "USE_ASYNC_CDROM = 1" >> $config_mak
+ echo "USE_ASYNC_GPU = 1" >> $config_mak
echo "NDRC_THREAD = 1" >> $config_mak
fi
pl_rearmed_cbs.show_overscan = 0;
}
-#ifdef THREAD_RENDERING
+#ifdef USE_ASYNC_GPU
var.key = "pcsx_rearmed_gpu_thread_rendering";
var.value = NULL;
if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
{
- if (strcmp(var.value, "disabled") == 0)
- pl_rearmed_cbs.thread_rendering = THREAD_RENDERING_OFF;
- else if (strcmp(var.value, "sync") == 0)
- pl_rearmed_cbs.thread_rendering = THREAD_RENDERING_SYNC;
- else if (strcmp(var.value, "async") == 0)
- pl_rearmed_cbs.thread_rendering = THREAD_RENDERING_ASYNC;
+ if (strcmp(var.value, "enabled") == 0)
+ pl_rearmed_cbs.thread_rendering = 1;
+ else
+ pl_rearmed_cbs.thread_rendering = 0;
}
#endif
"enabled",
#endif
},
-#ifdef THREAD_RENDERING
+#ifdef USE_ASYNC_GPU
{
"pcsx_rearmed_gpu_thread_rendering",
"Threaded Rendering",
NULL,
- "When enabled, runs GPU commands in a secondary thread. 'Synchronous' improves performance while maintaining proper frame pacing. 'Asynchronous' improves performance even further, but may cause dropped frames and increased latency. Produces best results with games that run natively at less than 60 frames per second.",
+ "When enabled, runs GPU commands in a secondary thread.",
NULL,
"video",
{
{ "disabled", NULL },
- { "sync", "Synchronous" },
- { "async", "Asynchronous" },
+ { "enabled", NULL },
{ NULL, NULL},
},
"disabled",
CE_INTVAL_N("adev1_is_nublike", in_adev_is_nublike[1]),
CE_INTVAL_V(frameskip, 4),
CE_INTVAL_PV(dithering, 2),
+ CE_INTVAL_P(thread_rendering),
CE_INTVAL_P(gpu_peops.dwActFixes),
CE_INTVAL_P(gpu_unai.old_renderer),
CE_INTVAL_P(gpu_unai.ilace_force),
"must save config and reload the game if changed"
#endif
;
+static const char h_sputhr[] = "Warning: has some known bugs\n";
// static const char h_gpu_peops[] = "Configure P.E.Op.S. SoftGL Driver V1.17";
// static const char h_gpu_peopsgl[]= "Configure P.E.Op.S. MesaGL Driver V1.78";
// static const char h_gpu_unai[] = "Configure Unai/PCSX4ALL Team plugin (new)";
{
mee_enum_h ("BIOS", 0, bios_sel, bioses, h_bios),
mee_enum ("BIOS logo (slow boot)", 0, slowboot_sel, men_bios_boot),
- mee_enum ("GPU Dithering", 0, pl_rearmed_cbs.dithering, men_gpu_dithering),
mee_enum_h ("GPU plugin", 0, gpu_plugsel, gpu_plugins, h_plugin_gpu),
+#ifdef USE_ASYNC_GPU
+ mee_onoff ("GPU multithreading", 0, pl_rearmed_cbs.thread_rendering, 1),
+#endif
+ mee_enum ("GPU dithering", 0, pl_rearmed_cbs.dithering, men_gpu_dithering),
mee_enum_h ("SPU plugin", 0, spu_plugsel, spu_plugins, h_plugin_spu),
+#ifndef C64X_DSP
+ mee_onoff_h ("SPU multithreading", MA_OPT_SPU_THREAD, spu_config.iUseThread, 1, h_sputhr),
+#endif
mee_handler ("Configure selected GPU plugin", menu_loop_pluginsel_options),
mee_handler_h ("Configure built-in SPU plugin", menu_loop_plugin_spu, h_spu),
mee_end,
{
static int sel = 0;
slowboot_sel = Config.SlowBoot;
+#ifndef C64X_DSP
+ me_enable(e_menu_plugin_options, MA_OPT_SPU_THREAD, spu_config.iThreadAvail);
+#endif
me_loop(e_menu_plugin_options, &sel);
Config.SlowBoot = slowboot_sel;
static const char h_restore_def[] = "Switches back to default / recommended\n"
"configuration";
static const char h_frameskip[] = "Warning: frameskip sometimes causes glitches\n";
-static const char h_sputhr[] = "Warning: has some known bugs\n";
static menu_entry e_menu_options[] =
{
mee_range ("CPU clock", MA_OPT_CPU_CLOCKS, cpu_clock, 20, 5000),
#ifdef C64X_DSP
mee_onoff_h ("Use C64x DSP for sound", MA_OPT_SPU_THREAD, spu_config.iUseThread, 1, h_sputhr),
-#else
- mee_onoff_h ("Threaded SPU", MA_OPT_SPU_THREAD, spu_config.iUseThread, 1, h_sputhr),
#endif
mee_handler_id("[Display]", MA_OPT_DISP_OPTS, menu_loop_gfx_options),
mee_handler ("[BIOS/Plugins]", menu_loop_plugin_options),
static int sel = 0;
me_enable(e_menu_options, MA_OPT_CPU_CLOCKS, cpu_clock_st > 0);
+#ifdef C64X_DSP
me_enable(e_menu_options, MA_OPT_SPU_THREAD, spu_config.iThreadAvail);
+#endif
me_enable(e_menu_options, MA_OPT_SAVECFG_GAME, ready_to_go && CdromId[0]);
me_loop(e_menu_options, &sel);
#ifndef __PLUGIN_LIB_H__
#define __PLUGIN_LIB_H__
-#define THREAD_RENDERING_OFF 0
-#define THREAD_RENDERING_SYNC 1
-#define THREAD_RENDERING_ASYNC 2
-
enum {
DKEY_SELECT = 0,
DKEY_L3,
USE_LIBRETRO_VFS ?= 0
USE_ASYNC_CDROM ?= 1
+USE_ASYNC_GPU ?= 1
USE_RTHREADS ?= 0
NDRC_THREAD ?= 1
COREFLAGS += -DUSE_ASYNC_CDROM
USE_RTHREADS := 1
endif
+ifeq ($(USE_ASYNC_GPU),1)
+SOURCES_C += $(GPU_DIR)/gpu_async.c \
+COREFLAGS += -DUSE_ASYNC_GPU
+USE_RTHREADS := 1
+endif
ifeq ($(USE_RTHREADS),1)
SOURCES_C += \
$(FRONTEND_DIR)/pcsxr-threads.c \
#pragma GCC diagnostic ignored "-Wmisleading-indentation"
#endif
-#ifdef THREAD_RENDERING
-#include "../gpulib/gpulib_thread_if.h"
-#define do_cmd_list real_do_cmd_list
-#define renderer_init real_renderer_init
-#define renderer_finish real_renderer_finish
-#define renderer_sync_ecmds real_renderer_sync_ecmds
-#define renderer_update_caches real_renderer_update_caches
-#define renderer_flush_queues real_renderer_flush_queues
-#define renderer_set_interlace real_renderer_set_interlace
-#define renderer_set_config real_renderer_set_config
-#define renderer_notify_res_change real_renderer_notify_res_change
-#define renderer_notify_update_lace real_renderer_notify_update_lace
-#define renderer_sync real_renderer_sync
-#define ex_regs scratch_ex_regs
-#endif
-
#define u32 uint32_t
#define INFO_TW 0
{
}
-void renderer_notify_res_change(void)
-{
-}
-
-void renderer_notify_scanout_change(int x, int y)
+void renderer_notify_screen_change(const struct psx_gpu_screen *screen)
{
}
#include "../gpulib/gpu_timing.h"
extern const unsigned char cmd_lengths[256];
-int do_cmd_list(uint32_t *list, int list_len,
+int renderer_do_cmd_list(uint32_t *list, int list_len, uint32_t *ex_regs,
int *cycles_sum_out, int *cycles_last, int *last_cmd)
{
int cpu_cycles_sum = 0, cpu_cycles = *cycles_last;
if (0x80 <= cmd && cmd < 0xe0)
break; // image i/o, forward to upper layer
else if ((cmd & 0xf8) == 0xe0)
- gpu.ex_regs[cmd & 7] = GETLE32(list);
+ ex_regs[cmd & 7] = GETLE32(list);
#endif
primTableJ[cmd]((void *)list);
}
breakloop:
- gpu.ex_regs[1] &= ~0x1ff;
- gpu.ex_regs[1] |= lGPUstatusRet & 0x1ff;
+ ex_regs[1] &= ~0x1ff;
+ ex_regs[1] |= lGPUstatusRet & 0x1ff;
*cycles_sum_out += cpu_cycles_sum;
*cycles_last = cpu_cycles;
{
}
-void renderer_sync(void)
-{
-}
-
-void renderer_notify_update_lace(int updated)
-{
-}
-
#include "../../frontend/plugin_lib.h"
void renderer_set_config(const struct rearmed_cbs *cbs)
{
}
-void renderer_notify_res_change(void)
-{
-}
-
-void renderer_notify_scanout_change(int x, int y)
+void renderer_notify_screen_change(const struct psx_gpu_screen *screen)
{
}
extern const unsigned char cmd_lengths[256];
// XXX: mostly dupe code from soft peops
-int do_cmd_list(uint32_t *list, int list_len,
+int renderer_do_cmd_list(uint32_t *list, int list_len, uint32_t *ex_regs,
int *cycles_sum_out, int *cycles_last, int *last_cmd)
{
unsigned int cmd, len;
if (cmd == 0xa0 || cmd == 0xc0)
break; // image i/o, forward to upper layer
else if ((cmd & 0xf8) == 0xe0)
- gpu.ex_regs[cmd & 7] = list[0];
+ ex_regs[cmd & 7] = list[0];
#endif
primTableJ[cmd]((void *)list);
}
breakloop:
- gpu.ex_regs[1] &= ~0x1ff;
- gpu.ex_regs[1] |= lGPUstatusRet & 0x1ff;
+ ex_regs[1] &= ~0x1ff;
+ ex_regs[1] |= lGPUstatusRet & 0x1ff;
*last_cmd = cmd;
return list - list_start;
DisplayText(buf, 1);
}
}
-
-void renderer_sync(void)
-{
-}
-
-void renderer_notify_update_lace(int updated)
-{
-}
edge_data_struct *span_edge_data, vec_4x32u *span_uvrg_offset);
void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram);
-u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
+u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *ex_regs,
s32 *cpu_cycles_sum_out, s32 *cpu_cycles_last, u32 *last_command);
void triangle_benchmark(psx_gpu_struct *psx_gpu);
s32 dummy0 = 0;
s32 dummy1 = 0;
u32 dummy2 = 0;
+ u32 ex_regs[8];
if((argc != 3) && (argc != 4))
{
init_counter();
#endif
- gpu_parse(psx_gpu, list, size, &dummy0, &dummy1, &dummy2);
+ gpu_parse(psx_gpu, list, size, ex_regs, &dummy0, &dummy1, &dummy2);
flush_render_block_buffer(psx_gpu);
clear_stats();
u32 cycles = get_counter();
#endif
- gpu_parse(psx_gpu, list, size, &dummy0, &dummy1, &dummy2);
+ gpu_parse(psx_gpu, list, size, ex_regs, &dummy0, &dummy1, &dummy2);
flush_render_block_buffer(psx_gpu);
#ifdef CYCLE_COUNTER
get_vertex_data_xy(vertex_number, offset16); \
set_vertex_color_constant(vertex_number, color) \
-#ifndef SET_Ex
-#define SET_Ex(r, v)
-#endif
-
static void textured_sprite(psx_gpu_struct *psx_gpu, const u32 *list,
s32 width, s32 height, u32 *cpu_cycles_sum, u32 *cpu_cycles)
{
render_triangle_p(psx_gpu, triangle.vertexes, current_command);
}
-u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
+u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *ex_regs,
s32 *cpu_cycles_sum_out, s32 *cpu_cycles_last, u32 *last_command)
{
vertex_struct vertexes[4] __attribute__((aligned(16))) = {};
psx_gpu->render_state_base &= ~RENDER_STATE_DITHER;
psx_gpu->display_area_draw_enable = (list[0] >> 10) & 0x1;
- SET_Ex(1, list[0]);
+ ex_regs[1] = list[0];
break;
case 0xE2:
update_texture_ptr(psx_gpu);
}
- SET_Ex(2, list[0]);
+ ex_regs[2] = list[0];
break;
}
psx_gpu->viewport_start_y, psx_gpu->viewport_end_x,
psx_gpu->viewport_end_y);
#endif
- SET_Ex(3, list[0]);
+ ex_regs[3] = list[0];
break;
}
psx_gpu->viewport_start_y, psx_gpu->viewport_end_x,
psx_gpu->viewport_end_y);
#endif
- SET_Ex(4, list[0]);
+ ex_regs[4] = list[0];
break;
}
psx_gpu->offset_x = sign_extend_11bit(list[0]);
psx_gpu->offset_y = sign_extend_11bit(list[0] >> 11);
- SET_Ex(5, list[0]);
+ ex_regs[5] = list[0];
break;
}
psx_gpu->mask_msb = mask_msb;
}
- SET_Ex(6, list[0]);
+ ex_regs[6] = list[0];
break;
}
do_sprite_enhanced(psx_gpu, x, y, u, v, width_b, height_b, list[0]);
}
-u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
+u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *ex_regs,
s32 *cpu_cycles_sum_out, s32 *cpu_cycles_last, u32 *last_command)
{
vertex_struct vertexes[4] __attribute__((aligned(16))) = {};
psx_gpu->render_state_base &= ~RENDER_STATE_DITHER;
psx_gpu->display_area_draw_enable = (list[0] >> 10) & 0x1;
- SET_Ex(1, list[0]);
+ ex_regs[1] = list[0];
break;
case 0xE2:
update_texture_ptr(psx_gpu);
}
- SET_Ex(2, list[0]);
+ ex_regs[2] = list[0];
break;
}
psx_gpu->viewport_start_y, psx_gpu->viewport_end_x,
psx_gpu->viewport_end_y);
#endif
- SET_Ex(3, list[0]);
+ ex_regs[3] = list[0];
break;
}
psx_gpu->viewport_start_y, psx_gpu->viewport_end_x,
psx_gpu->viewport_end_y);
#endif
- SET_Ex(4, list[0]);
+ ex_regs[4] = list[0];
break;
}
psx_gpu->offset_x = sign_extend_11bit(list[0]);
psx_gpu->offset_y = sign_extend_11bit(list[0] >> 11);
- SET_Ex(5, list[0]);
+ ex_regs[5] = list[0];
break;
}
psx_gpu->mask_msb = mask_msb;
}
- SET_Ex(6, list[0]);
+ ex_regs[6] = list[0];
break;
}
extern const unsigned char cmd_lengths[256];
#define command_lengths cmd_lengths
-static unsigned int *ex_regs;
static int initialized;
#define PCSX
-#define SET_Ex(r, v) \
- ex_regs[r] = v
static __attribute__((noinline)) void
sync_enhancement_buffers(int x, int y, int w, int h);
static psx_gpu_struct egpu __attribute__((aligned(256)));
-int do_cmd_list(uint32_t *list, int count,
+int renderer_do_cmd_list(uint32_t *list, int count, uint32_t *ex_regs,
int *cycles_sum, int *cycles_last, int *last_cmd)
{
int ret;
if (gpu.state.enhancement_active)
- ret = gpu_parse_enhanced(&egpu, list, count * 4,
+ ret = gpu_parse_enhanced(&egpu, list, count * 4, ex_regs,
cycles_sum, cycles_last, (u32 *)last_cmd);
else
- ret = gpu_parse(&egpu, list, count * 4,
+ ret = gpu_parse(&egpu, list, count * 4, ex_regs,
cycles_sum, cycles_last, (u32 *)last_cmd);
ex_regs[1] &= ~0x1ff;
if (gpu.mmap != NULL && egpu.enhancement_buf_ptr == NULL)
map_enhancement_buffer();
- ex_regs = gpu.ex_regs;
return 0;
}
{
s32 dummy0 = 0;
u32 dummy1 = 0;
- gpu_parse(&egpu, ecmds + 1, 6 * 4, &dummy0, &dummy0, &dummy1);
+ gpu_parse(&egpu, ecmds + 1, 6 * 4, ecmds, &dummy0, &dummy0, &dummy1);
}
void renderer_update_caches(int x, int y, int w, int h, int state_changed)
egpu.render_mode |= RENDER_INTERLACE_ODD;
}
-void renderer_notify_res_change(void)
+void renderer_notify_screen_change(const struct psx_gpu_screen *screen)
{
- renderer_notify_scanout_change(gpu.screen.src_x, gpu.screen.src_y);
-}
-
-void renderer_notify_scanout_change(int x, int y)
-{
- int vres = gpu.screen.vres;
+ int x = screen->src_x, y = screen->src_y;
+ int vres = screen->vres;
if (!gpu.state.enhancement_active || !egpu.enhancement_buf_ptr)
return;
- if (gpu.screen.y < 0)
- vres -= gpu.screen.y;
- update_enhancement_buf_scanouts(&egpu, x, y, gpu.screen.hres, vres);
+ if (screen->y < 0)
+ vres -= screen->y;
+ update_enhancement_buf_scanouts(&egpu, x, y, screen->hres, vres);
}
#include "../../frontend/plugin_lib.h"
}
}
-void renderer_sync(void)
-{
-}
-
-void renderer_notify_update_lace(int updated)
-{
-}
-
// vim:ts=2:sw=2:expandtab
#include "../gpulib/gpu.h"
#include "old/if.h"
-#ifdef THREAD_RENDERING
-#include "../gpulib/gpulib_thread_if.h"
-#define do_cmd_list real_do_cmd_list
-#define renderer_init real_renderer_init
-#define renderer_finish real_renderer_finish
-#define renderer_sync_ecmds real_renderer_sync_ecmds
-#define renderer_update_caches real_renderer_update_caches
-#define renderer_flush_queues real_renderer_flush_queues
-#define renderer_set_interlace real_renderer_set_interlace
-#define renderer_set_config real_renderer_set_config
-#define renderer_notify_res_change real_renderer_notify_res_change
-#define renderer_notify_update_lace real_renderer_notify_update_lace
-#define renderer_sync real_renderer_sync
-#define ex_regs scratch_ex_regs
-#endif
-
//#include "port.h"
#include "gpu_unai.h"
unmap_downscale_buffer();
}
-void renderer_notify_res_change(void)
+void renderer_notify_screen_change(const struct psx_gpu_screen *screen)
{
gpu_unai.inn.ilace_mask = gpu_unai.config.ilace_force;
*/
}
-void renderer_notify_scanout_change(int x, int y)
-{
-}
-
#ifdef USE_GPULIB
// Handles GP0 draw settings commands 0xE1...0xE6
static void gpuGP0Cmd_0xEx(gpu_unai_t &gpu_unai, u32 cmd_word)
{
// Assume incoming GP0 command is 0xE1..0xE6, convert to 1..6
u8 num = (cmd_word >> 24) & 7;
- gpu.ex_regs[num] = cmd_word; // Update gpulib register
switch (num) {
case 1: {
// GP0(E1h) - Draw Mode setting (aka "Texpage")
extern const unsigned char cmd_lengths[256];
-int do_cmd_list(u32 *list_, int list_len,
+int renderer_do_cmd_list(u32 *list_, int list_len, uint32_t *ex_regs,
int *cycles_sum_out, int *cycles_last, int *last_cmd)
{
int cpu_cycles_sum = 0, cpu_cycles = *cycles_last;
le32_t *list_start = list;
le32_t *list_end = list + list_len;
- if (IS_OLD_RENDERER())
- return oldunai_do_cmd_list(list_, list_len, cycles_sum_out, cycles_last, last_cmd);
+ if (IS_OLD_RENDERER()) {
+ return oldunai_do_cmd_list(list_, list_len, ex_regs,
+ cycles_sum_out, cycles_last, last_cmd);
+ }
for (; list < list_end; list += 1 + len)
{
goto breakloop;
#endif
case 0xE1 ... 0xE6: { // Draw settings
- gpuGP0Cmd_0xEx(gpu_unai, le32_to_u32(gpu_unai.PacketBuffer.U4[0]));
+ u32 cmd_word = le32_to_u32(gpu_unai.PacketBuffer.U4[0]);
+ ex_regs[(cmd_word >> 24) & 7] = cmd_word;
+ gpuGP0Cmd_0xEx(gpu_unai, cmd_word);
} break;
}
}
breakloop:
- gpu.ex_regs[1] &= ~0x1ff;
- gpu.ex_regs[1] |= gpu_unai.GPU_GP1 & 0x1ff;
+ ex_regs[1] &= ~0x1ff;
+ ex_regs[1] |= gpu_unai.GPU_GP1 & 0x1ff;
*cycles_sum_out += cpu_cycles_sum;
*cycles_last = cpu_cycles;
{
if (!IS_OLD_RENDERER()) {
int dummy;
- do_cmd_list(&ecmds[1], 6, &dummy, &dummy, &dummy);
+ renderer_do_cmd_list(&ecmds[1], 6, ecmds, &dummy, &dummy, &dummy);
}
else
oldunai_renderer_sync_ecmds(ecmds);
void renderer_set_interlace(int enable, int is_odd)
{
- renderer_notify_res_change();
+ renderer_notify_screen_change(&gpu.screen);
}
#include "../../frontend/plugin_lib.h"
oldunai_renderer_set_config(cbs);
}
-void renderer_sync(void)
-{
-}
-
-void renderer_notify_update_lace(int updated)
-{
-}
-
// vim:shiftwidth=2:expandtab
extern const unsigned char cmd_lengths[256];
-int oldunai_do_cmd_list(uint32_t *list, int list_len,
+int oldunai_do_cmd_list(uint32_t *list, int list_len, uint32_t *ex_regs,
int *cycles_sum_out, int *cycles_last, int *last_cmd)
{
unsigned int cmd = 0, len, i;
const u32 temp = PacketBuffer.U4[0];
GPU_GP1 = (GPU_GP1 & ~0x000007FF) | (temp & 0x000007FF);
gpuSetTexture(temp);
- gpu.ex_regs[1] = temp;
+ ex_regs[1] = temp;
break;
}
case 0xE2: {
TextureWindow[2] = TextureMask[(temp >> 0) & 0x1F];
TextureWindow[3] = TextureMask[(temp >> 5) & 0x1F];
gpuSetTexture(GPU_GP1);
- gpu.ex_regs[2] = temp;
+ ex_regs[2] = temp;
break;
}
case 0xE3: {
const u32 temp = PacketBuffer.U4[0];
DrawingArea[0] = temp & 0x3FF;
DrawingArea[1] = (temp >> 10) & 0x3FF;
- gpu.ex_regs[3] = temp;
+ ex_regs[3] = temp;
break;
}
case 0xE4: {
const u32 temp = PacketBuffer.U4[0];
DrawingArea[2] = (temp & 0x3FF) + 1;
DrawingArea[3] = ((temp >> 10) & 0x3FF) + 1;
- gpu.ex_regs[4] = temp;
+ ex_regs[4] = temp;
break;
}
case 0xE5: {
const u32 temp = PacketBuffer.U4[0];
DrawingOffset[0] = ((s32)temp<<(32-11))>>(32-11);
DrawingOffset[1] = ((s32)temp<<(32-22))>>(32-11);
- gpu.ex_regs[5] = temp;
+ ex_regs[5] = temp;
break;
}
case 0xE6: {
const u32 temp = PacketBuffer.U4[0];
Masking = (temp & 0x2) << 1;
PixelMSB =(temp & 0x1) << 8;
- gpu.ex_regs[6] = temp;
+ ex_regs[6] = temp;
break;
}
}
}
breakloop:
- gpu.ex_regs[1] &= ~0x1ff;
- gpu.ex_regs[1] |= GPU_GP1 & 0x1ff;
+ ex_regs[1] &= ~0x1ff;
+ ex_regs[1] |= GPU_GP1 & 0x1ff;
*last_cmd = cmd;
return list - list_start;
void oldunai_renderer_sync_ecmds(uint32_t *ecmds)
{
int dummy;
- do_cmd_list(&ecmds[1], 6, &dummy, &dummy, &dummy);
+ renderer_do_cmd_list(&ecmds[1], 6, ecmds, &dummy, &dummy, &dummy);
}
#ifndef TEST
#ifndef GPU_UNAI_NO_OLD
+#include <stdint.h>
+
struct rearmed_cbs;
void oldunai_renderer_init(void);
-int oldunai_do_cmd_list(uint32_t *list, int list_len,
+int oldunai_do_cmd_list(uint32_t *list, int list_len, uint32_t *ex_regs,
int *cycles_sum_out, int *cycles_last, int *last_cmd);
void oldunai_renderer_sync_ecmds(uint32_t *ecmds);
void oldunai_renderer_set_config(const struct rearmed_cbs *cbs);
ifndef DEBUG
CFLAGS += -O2
endif
+CFLAGS += -DGPULIB_PLUGIN
include ../../config.mak
#include "gpu.h"
#include "gpu_timing.h"
+#include "gpu_async.h"
#include "../../libpcsxcore/gpu.h" // meh
#include "../../frontend/plugin_lib.h"
#include "../../include/compiler_features.h"
int *cycles_sum, int *cycles_last);
static noinline void finish_vram_transfer(struct psx_gpu *gpu, int is_read);
+static void sync_renderer(struct psx_gpu *gpu)
+{
+ if (gpu_async_enabled(gpu))
+ gpu_async_sync(gpu);
+ else
+ renderer_flush_queues();
+}
+
static noinline void do_cmd_reset(struct psx_gpu *gpu)
{
int dummy = 0;
- renderer_sync();
+
if (unlikely(gpu->cmd_len > 0))
do_cmd_buffer(gpu, gpu->cmd_buffer, gpu->cmd_len, &dummy, &dummy);
gpu->cmd_len = 0;
+ sync_renderer(gpu);
if (unlikely(gpu->dma.h > 0))
finish_vram_transfer(gpu, gpu->dma_start.is_read);
gpu->screen.vres = gpu->screen.h = 240;
gpu->screen.x = gpu->screen.y = 0;
renderer_sync_ecmds(gpu->ex_regs);
- renderer_notify_res_change();
+ renderer_notify_screen_change(&gpu->screen);
+ gpu_async_sync_ecmds(gpu);
}
static noinline void update_width(struct psx_gpu *gpu)
if (!gpu->frameskip.active && gpu->frameskip.pending_fill[0] != 0) {
int dummy = 0;
- do_cmd_list(gpu->frameskip.pending_fill, 3, &dummy, &dummy, &dummy);
+ if (gpu_async_enabled(gpu))
+ (void)gpu_async_do_cmd_list(gpu, gpu->frameskip.pending_fill, 3,
+ &dummy, &dummy, &dummy);
+ else
+ renderer_do_cmd_list(gpu->frameskip.pending_fill, 3, gpu->ex_regs,
+ &dummy, &dummy, &dummy);
gpu->frameskip.pending_fill[0] = 0;
}
}
return 0;
}
else {
- fprintf(stderr, "could not map vram, expect crashes\n");
+ SysPrintf("could not map vram, expect crashes\n");
gpu.vram = NULL;
return -1;
}
{
long ret;
+ gpu_async_stop(&gpu);
renderer_finish();
ret = vout_finish();
if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
gpu.screen.src_x = src_x;
gpu.screen.src_y = src_y;
- renderer_notify_scanout_change(src_x, src_y);
+ if (gpu.async)
+ gpu_async_notify_screen_change(&gpu);
+ else
+ renderer_notify_screen_change(&gpu.screen);
if (gpu.frameskip.set) {
decide_frameskip_allow(&gpu);
if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
update_width(&gpu);
update_height(&gpu);
- renderer_notify_res_change();
+ if (gpu.async)
+ gpu_async_notify_screen_change(&gpu);
+ else
+ renderer_notify_screen_change(&gpu.screen);
break;
default:
if ((cmd & 0xf0) == 0x10)
int l;
count *= 2; // operate in 16bpp pixels
- renderer_sync();
+ //sync_renderer(gpu); // done in start_vram_transfer()
if (gpu->dma.offset) {
l = w - gpu->dma.offset;
gpu->dma.is_read = is_read;
gpu->dma_start = gpu->dma;
- renderer_flush_queues();
+ sync_renderer(gpu);
+
if (is_read) {
const uint16_t *mem = VRAM_MEM_XY(gpu->vram, gpu->dma.x, gpu->dma.y);
gpu->status |= PSX_GPU_STATUS_IMG;
gpu->gpu_state_change(PGS_VRAM_TRANSFER_END, 0);
}
-static void do_vram_copy(struct psx_gpu *gpu, const uint32_t *params, int *cpu_cycles)
+int do_vram_copy(uint16_t *vram, const uint32_t *ex_regs,
+ const uint32_t *params, int *cpu_cycles)
{
- const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
- const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
- const uint32_t dx = LE32TOH(params[1]) & 0x3FF;
- const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
- uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
- uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
- uint16_t msb = gpu->ex_regs[6] << 15;
- uint16_t *vram = gpu->vram;
+ const uint32_t sx = LE32TOH(params[1]) & 0x3FF;
+ const uint32_t sy = (LE32TOH(params[1]) >> 16) & 0x1FF;
+ const uint32_t dx = LE32TOH(params[2]) & 0x3FF;
+ const uint32_t dy = (LE32TOH(params[2]) >> 16) & 0x1FF;
+ uint32_t w = ((LE32TOH(params[3]) - 1) & 0x3FF) + 1;
+ uint32_t h = (((LE32TOH(params[3]) >> 16) - 1) & 0x1FF) + 1;
+ uint16_t msb = ex_regs[6] << 15;
uint16_t lbuf[128];
uint32_t x, y;
*cpu_cycles += gput_copy(w, h);
if (sx == dx && sy == dy && msb == 0)
- return;
+ return 4;
renderer_flush_queues();
}
renderer_update_caches(dx, dy, w, h, 0);
+ return 4;
}
static noinline int do_cmd_list_skip(struct psx_gpu *gpu, uint32_t *data,
case 0x02:
if ((LE32TOH(list[2]) & 0x3ff) > gpu->screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu->screen.h)
// clearing something large, don't skip
- do_cmd_list(list, 3, &dummy, &dummy, &dummy);
+ renderer_do_cmd_list(list, 3, gpu->ex_regs, &dummy, &dummy, &dummy);
else
memcpy(gpu->frameskip.pending_fill, list, 3 * 4);
break;
case 0x34 ... 0x37:
case 0x3c ... 0x3f:
gpu->ex_regs[1] &= ~0x1ff;
- gpu->ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
+ gpu->ex_regs[1] |= (LE32TOH(list[4 + ((cmd >> 4) & 1)]) >> 16) & 0x1ff;
break;
case 0x48 ... 0x4F:
for (v = 3; pos + v < count; v++)
pos += len;
}
- renderer_sync_ecmds(gpu->ex_regs);
+ if (gpu->async)
+ gpu_async_sync_ecmds(gpu);
+ else
+ renderer_sync_ecmds(gpu->ex_regs);
*last_cmd = cmd;
return pos;
}
cmd = -1; // incomplete cmd, can't consume yet
break;
}
- renderer_sync();
+ if (gpu->async)
+ break;
*cycles_sum += *cycles_last;
*cycles_last = 0;
- do_vram_copy(gpu, data + pos + 1, cycles_last);
+ do_vram_copy(gpu->vram, gpu->ex_regs, data + pos, cycles_last);
vram_dirty = 1;
pos += 4;
continue;
continue;
}
- // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
if (gpu->frameskip.active &&
(gpu->frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0)) {
+ // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
pos += do_cmd_list_skip(gpu, data + pos, count - pos, &cmd);
}
+ else if (gpu->async) {
+ pos += gpu_async_do_cmd_list(gpu, data + pos, count - pos,
+ cycles_sum, cycles_last, &cmd);
+ vram_dirty = 1;
+ }
else {
- pos += do_cmd_list(data + pos, count - pos, cycles_sum, cycles_last, &cmd);
+ pos += renderer_do_cmd_list(data + pos, count - pos, gpu->ex_regs,
+ cycles_sum, cycles_last, &cmd);
vram_dirty = 1;
}
if (gpu.cmd_len > 0)
flush_cmd_buffer(&gpu);
- renderer_sync();
+ sync_renderer(&gpu);
memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
freeze->ulStatus = gpu.status;
break;
case 0: // load
- renderer_sync();
+ sync_renderer(&gpu);
memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
//memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
GPUwriteStatus((i << 24) | freeze->ulControl[i]);
renderer_sync_ecmds(gpu.ex_regs);
renderer_update_caches(0, 0, 1024, 512, 0);
+ gpu_async_sync_ecmds(&gpu);
break;
}
if (gpu.cmd_len > 0)
flush_cmd_buffer(&gpu);
- renderer_flush_queues();
#ifndef RAW_FB_DISPLAY
if (gpu.status & PSX_GPU_STATUS_BLANKING) {
return;
}
- renderer_notify_update_lace(0);
-
if (!gpu.state.fb_dirty)
return;
#endif
gpu.frameskip.frame_ready = 0;
}
+ sync_renderer(&gpu);
updated = vout_update();
if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
renderer_update_caches(0, 0, 1024, 512, 1);
gpu.state.fb_dirty = 0;
gpu.state.blanked = 0;
}
- renderer_notify_update_lace(1);
}
void GPUvBlank(int is_vblank, int lcf)
if (cbs->pl_vout_set_raw_vram)
cbs->pl_vout_set_raw_vram(gpu.vram);
+ sync_renderer(&gpu);
renderer_set_config(cbs);
vout_set_config(cbs);
+
+ if (cbs->thread_rendering)
+ gpu_async_start(&gpu);
+ else
+ gpu_async_stop(&gpu);
+}
+
+// for standalone plugins only
+#ifdef GPULIB_PLUGIN
+#include <stdarg.h>
+void SysPrintf(const char *fmt, ...)
+{
+ va_list list;
+ va_start(list, fmt);
+ vfprintf(stderr, fmt, list);
+ va_end(list);
}
+#endif
// vim:shiftwidth=2:expandtab
#define PSX_GPU_STATUS_DMA(x) ((x) << 29)
#define PSX_GPU_STATUS_DMA_MASK (BIT(29) | BIT(30))
+struct psx_gpu_async;
+
+struct psx_gpu_screen {
+ short hres, vres;
+ short x, y, w, h;
+ short x1, x2;
+ short y1, y2;
+ short src_x, src_y;
+};
+
struct psx_gpu {
uint32_t regs[16];
uint16_t *vram;
uint32_t status;
uint32_t gp0;
uint32_t ex_regs[8];
- struct {
- int hres, vres;
- int x, y, w, h;
- int x1, x2;
- int y1, y2;
- int src_x, src_y;
- } screen;
+ struct psx_gpu_screen screen;
struct {
int x, y, w, h;
short int offset, is_read;
uint32_t last_flip_frame;
uint32_t pending_fill[3];
} frameskip;
- uint32_t scratch_ex_regs[8]; // for threaded rendering
uint32_t cmd_buffer[CMD_BUFFER_LEN];
+ struct psx_gpu_async *async;
void *(*get_enhancement_bufer)
(int *x, int *y, int *w, int *h, int *vram_h);
uint16_t *(*get_downscale_buffer)
extern const unsigned char cmd_lengths[256];
-int do_cmd_list(uint32_t *list, int count,
- int *cycles_sum, int *cycles_last, int *last_cmd);
-
struct rearmed_cbs;
+// ex_regs: renderer should write Ex values for gpulib, never use them itself
+int renderer_do_cmd_list(uint32_t *list, int count, uint32_t *ex_regs,
+ int *cycles_sum, int *cycles_last, int *last_cmd);
+
int renderer_init(void);
void renderer_finish(void);
void renderer_sync_ecmds(uint32_t * ecmds);
void renderer_flush_queues(void);
void renderer_set_interlace(int enable, int is_odd);
void renderer_set_config(const struct rearmed_cbs *config);
-void renderer_notify_res_change(void);
-void renderer_notify_update_lace(int updated);
-void renderer_sync(void);
-void renderer_notify_scanout_change(int x, int y);
+void renderer_notify_screen_change(const struct psx_gpu_screen *screen);
int vout_init(void);
int vout_finish(void);
void vout_blank(void);
void vout_set_config(const struct rearmed_cbs *config);
+// helpers
+int do_vram_copy(uint16_t *vram, const uint32_t *ex_regs,
+ const uint32_t *params, int *cpu_cycles);
+
int prim_try_simplify_quad_t (void *simplified, const void *prim);
int prim_try_simplify_quad_gt(void *simplified, const void *prim);
void GPUgetScreenInfo(int *y, int *base_hres);
void GPUrearmedCallbacks(const struct rearmed_cbs *cbs_);
+void SysPrintf(const char *fmt, ...);
+
#ifdef __cplusplus
}
#endif
--- /dev/null
+/*
+ * (C) Gražvydas "notaz" Ignotas, 2025
+ *
+ * This work is licensed under the terms of any of these licenses
+ * (at your option):
+ * - GNU GPL, version 2 or later.
+ * - GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include "gpu.h"
+#include "gpu_async.h"
+#include "gpu_timing.h"
+#include "../../include/arm_features.h"
+#include "../../include/compiler_features.h"
+#include "../../frontend/pcsxr-threads.h"
+
+//#define agpu_log gpu_log
+#define agpu_log(...)
+
+#define AGPU_BUF_LEN (128*1024/4u) // must be power of 2
+#define AGPU_BUF_MASK (AGPU_BUF_LEN - 1)
+#ifndef min
+#define min(a, b) ((b) < (a) ? (b) : (a))
+#endif
+
+// must be in 0xc0...0xdf range that can't appear in thread's real cmd stream
+#define FAKECMD_SCREEN_CHANGE 0xdfu
+
+#if defined(__aarch64__) || defined(HAVE_ARMV6)
+#define BARRIER() __asm__ __volatile__ ("dmb ishst" ::: "memory")
+#else
+#define BARRIER() __asm__ __volatile__ ("" ::: "memory")
+#endif
+
+enum waitmode {
+ waitmode_none = 0,
+ waitmode_progress,
+ waitmode_full,
+};
+
+struct psx_gpu_async
+{
+ uint32_t pos_added;
+ uint32_t pos_used;
+ enum waitmode wait_mode;
+ uint8_t exit;
+ uint8_t idle;
+ sthread_t *thread;
+ slock_t *lock;
+ scond_t *cond_use;
+ scond_t *cond_add;
+ uint32_t ex_regs[8]; // used by vram copy at least
+ uint32_t cmd_buffer[AGPU_BUF_LEN];
+};
+
+union cmd_screen_change
+{
+ uint32_t u32s[4];
+ struct {
+ uint32_t cmd;
+ short x, y;
+ short src_x, src_y;
+ short hres, vres;
+ };
+};
+
+static int noinline do_notify_screen_change(struct psx_gpu *gpu,
+ const union cmd_screen_change *cmd);
+
+static int calc_space_for_add(struct psx_gpu_async *agpu)
+{
+ int pos_used, space;
+ pos_used = *(volatile uint32_t *)&agpu->pos_used;
+ space = AGPU_BUF_LEN - (agpu->pos_added - pos_used);
+ assert(space >= 0);
+ assert(space <= AGPU_BUF_LEN);
+ return space;
+}
+
+// adds everything or nothing, else we may get incomplete cmd
+static int do_add(struct psx_gpu_async *agpu, const uint32_t *list, int len)
+{
+ int pos, space, left, retval = 0;
+ uint32_t pos_added = agpu->pos_added;
+
+ assert(len < AGPU_BUF_LEN);
+ space = calc_space_for_add(agpu);
+ if (space < len)
+ return 0;
+
+ pos = pos_added & AGPU_BUF_MASK;
+ left = AGPU_BUF_LEN - pos;
+ if (left < len) {
+ memset(&agpu->cmd_buffer[pos], 0, left * 4);
+ pos_added += left;
+ pos = 0;
+ space = calc_space_for_add(agpu);
+ }
+ if (space >= len) {
+ memcpy(&agpu->cmd_buffer[pos], list, len * 4);
+ pos_added += len;
+ retval = len;
+ }
+ BARRIER();
+ *(volatile uint32_t *)&agpu->pos_added = pos_added;
+ return retval;
+}
+
+static void do_add_with_wait(struct psx_gpu_async *agpu, const uint32_t *list, int len)
+{
+ for (;;)
+ {
+ if (do_add(agpu, list, len))
+ break;
+ slock_lock(agpu->lock);
+ while (len > AGPU_BUF_LEN - (agpu->pos_added - agpu->pos_used)) {
+ assert(!agpu->idle);
+ assert(agpu->wait_mode == waitmode_none);
+ agpu->wait_mode = waitmode_progress;
+ scond_wait(agpu->cond_add, agpu->lock);
+ agpu->wait_mode = waitmode_none;
+ }
+ slock_unlock(agpu->lock);
+ }
+}
+
+static void run_thread_nolock(struct psx_gpu_async *agpu)
+{
+ if (agpu->idle) {
+ agpu->idle = 0;
+ scond_signal(agpu->cond_use);
+ }
+}
+
+static void run_thread(struct psx_gpu_async *agpu)
+{
+ slock_lock(agpu->lock);
+ run_thread_nolock(agpu);
+ slock_unlock(agpu->lock);
+}
+
+int gpu_async_do_cmd_list(struct psx_gpu *gpu, uint32_t *list_data, int list_len,
+ int *cpu_cycles_sum_out, int *cpu_cycles_last, int *last_cmd)
+{
+ uint32_t cyc_sum = 0, cyc = *cpu_cycles_last;
+ struct psx_gpu_async *agpu = gpu->async;
+ int dst_added = 0, dst_can_add = 1;
+ int rendered_anything = 0;
+ int cmd = -1, pos, len;
+
+ assert(agpu);
+ for (pos = 0; pos < list_len; pos += len)
+ {
+ const uint32_t *list = list_data + pos;
+ const int16_t *slist = (void *)list;
+ int rendered = 1, skip = 0;
+ int num_vertexes, w, h;
+
+ cmd = LE32TOH(list[0]) >> 24;
+ len = 1 + cmd_lengths[cmd];
+ if (pos + len > list_len) {
+ cmd = -1;
+ break; // incomplete cmd
+ }
+
+ switch (cmd) {
+ case 0x02:
+ w = LE16TOH(slist[4]) & 0x3FF;
+ h = LE16TOH(slist[5]) & 0x1FF;
+ gput_sum(cyc_sum, cyc, gput_fill(w, h));
+ break;
+ case 0x1f: // irq?
+ goto breakloop;
+ case 0x20 ... 0x23: gput_sum(cyc_sum, cyc, gput_poly_base()); break;
+ case 0x24 ... 0x27: gput_sum(cyc_sum, cyc, gput_poly_base_t()); goto do_texpage;
+ case 0x28 ... 0x2b: gput_sum(cyc_sum, cyc, gput_quad_base()); break;
+ case 0x2c ... 0x2f: gput_sum(cyc_sum, cyc, gput_quad_base_t()); goto do_texpage;
+ case 0x30 ... 0x33: gput_sum(cyc_sum, cyc, gput_poly_base_g()); break;
+ case 0x34 ... 0x37: gput_sum(cyc_sum, cyc, gput_poly_base_gt()); goto do_texpage;
+ case 0x38 ... 0x3b: gput_sum(cyc_sum, cyc, gput_quad_base_g()); break;
+ case 0x3c ... 0x3f: gput_sum(cyc_sum, cyc, gput_quad_base_gt());
+ do_texpage:
+ gpu->ex_regs[1] &= ~0x1ff;
+ gpu->ex_regs[1] |= (LE32TOH(list[4 + ((cmd >> 4) & 1)]) >> 16) & 0x1ff;
+ break;
+ case 0x40 ... 0x47:
+ gput_sum(cyc_sum, cyc, gput_line(0));
+ break;
+ case 0x48 ... 0x4F:
+ for (num_vertexes = 2; ; num_vertexes++)
+ {
+ gput_sum(cyc_sum, cyc, gput_line(0));
+ if (pos + num_vertexes + 1 >= list_len) {
+ cmd = -1;
+ goto breakloop;
+ }
+ if ((list[num_vertexes + 1] & LE32TOH(0xf000f000)) == LE32TOH(0x50005000))
+ break;
+ }
+ len += (num_vertexes - 2);
+ break;
+ case 0x50 ... 0x57:
+ gput_sum(cyc_sum, cyc, gput_line(0));
+ break;
+ case 0x58 ... 0x5f:
+ for (num_vertexes = 2; ; num_vertexes++)
+ {
+ gput_sum(cyc_sum, cyc, gput_line(0));
+ if (pos + num_vertexes*2 >= list_len) {
+ cmd = -1;
+ goto breakloop;
+ }
+ if ((list[num_vertexes * 2] & LE32TOH(0xf000f000)) == LE32TOH(0x50005000))
+ break;
+ }
+ len += (num_vertexes - 2) * 2;
+ break;
+ case 0x60 ... 0x63:
+ w = LE16TOH(slist[4]) & 0x3FF;
+ h = LE16TOH(slist[5]) & 0x1FF;
+ gput_sum(cyc_sum, cyc, gput_sprite(w, h));
+ break;
+ case 0x64 ... 0x67:
+ w = LE16TOH(slist[6]) & 0x3FF;
+ h = LE16TOH(slist[7]) & 0x1FF;
+ gput_sum(cyc_sum, cyc, gput_sprite(w, h));
+ break;
+ case 0x68 ... 0x6b: gput_sum(cyc_sum, cyc, gput_sprite(1, 1)); break;
+ case 0x70 ... 0x73:
+ case 0x74 ... 0x77: gput_sum(cyc_sum, cyc, gput_sprite(8, 8)); break;
+ case 0x78 ... 0x7b:
+ case 0x7C ... 0x7f: gput_sum(cyc_sum, cyc, gput_sprite(16, 16)); break;
+ case 0x80 ... 0x9f: // vid -> vid
+ w = ((LE16TOH(slist[6]) - 1) & 0x3ff) + 1;
+ h = ((LE16TOH(slist[7]) - 1) & 0x1ff) + 1;
+ gput_sum(cyc_sum, cyc, gput_copy(w, h));
+ break;
+ case 0xa0 ... 0xbf: // sys -> vid
+ case 0xc0 ... 0xdf: // vid -> sys
+ goto breakloop;
+ case 0xe0 ... 0xe7:
+ gpu->ex_regs[cmd & 7] = LE32TOH(list[0]);
+ rendered = 0;
+ break;
+ default:
+ rendered = 0;
+ skip = 1;
+ break;
+ }
+ rendered_anything |= rendered;
+ if (dst_can_add) {
+ if (!skip) {
+ int added = dst_can_add = do_add(agpu, list, len);
+ dst_added += added;
+ }
+ else
+ dst_added += len;
+ }
+ }
+breakloop:
+ if (dst_added && (rendered_anything || dst_added < pos))
+ run_thread(agpu);
+ if (dst_added < pos) {
+ int left = pos - dst_added;
+ agpu_log(gpu, "agpu: wait %d left %d\n", agpu->pos_added - agpu->pos_used, left);
+ do_add_with_wait(agpu, list_data + dst_added, left);
+ }
+
+ *cpu_cycles_sum_out += cyc_sum;
+ *cpu_cycles_last = cyc;
+ *last_cmd = cmd;
+ return pos;
+}
+
+static STRHEAD_RET_TYPE gpu_async_thread(void *unused)
+{
+ struct psx_gpu *gpup = &gpu;
+ struct psx_gpu_async *agpu = gpup->async;
+ int dirty = 0;
+
+ assert(agpu);
+ slock_lock(agpu->lock);
+ while (!agpu->exit)
+ {
+ int len = agpu->pos_added - agpu->pos_used;
+ int pos, done, cycles_dummy = 0, cmd = -1;
+ assert(len >= 0);
+ if (len == 0 && !dirty) {
+ if (agpu->wait_mode == waitmode_full)
+ scond_signal(agpu->cond_add);
+ agpu->idle = 1;
+ scond_wait(agpu->cond_use, agpu->lock);
+ continue;
+ }
+ slock_unlock(agpu->lock);
+
+ if (len == 0 && dirty) {
+ renderer_flush_queues();
+ dirty = 0;
+ slock_lock(agpu->lock);
+ continue;
+ }
+
+ pos = agpu->pos_used & AGPU_BUF_MASK;
+ len = min(len, AGPU_BUF_LEN - pos);
+ done = renderer_do_cmd_list(agpu->cmd_buffer + pos, len, agpu->ex_regs,
+ &cycles_dummy, &cycles_dummy, &cmd);
+ if (done != len) {
+ if (0x80 <= cmd && cmd < 0xa0)
+ done += do_vram_copy(gpup->vram, agpu->ex_regs,
+ agpu->cmd_buffer + pos + done, &cycles_dummy);
+ else if (cmd == FAKECMD_SCREEN_CHANGE)
+ done += do_notify_screen_change(gpup,
+ (const void *)(agpu->cmd_buffer + pos + done));
+ else if (0xa0 <= cmd && cmd < 0xec)
+ assert(0); // todo?
+ else
+ assert(0); // should not happen
+ }
+
+ dirty = 1;
+ assert(done > 0);
+ slock_lock(agpu->lock);
+ agpu->pos_used += done;
+ if (agpu->wait_mode == waitmode_progress)
+ scond_signal(agpu->cond_add);
+ }
+ slock_unlock(agpu->lock);
+ STRHEAD_RETURN();
+}
+
+void gpu_async_notify_screen_change(struct psx_gpu *gpu)
+{
+ union cmd_screen_change cmd;
+
+ if (!gpu->async || !gpu->state.enhancement_active) // gpu_neon only
+ return;
+ cmd.cmd = HTOLE32(FAKECMD_SCREEN_CHANGE << 24);
+ cmd.x = gpu->screen.x;
+ cmd.y = gpu->screen.y;
+ cmd.hres = gpu->screen.hres;
+ cmd.vres = gpu->screen.vres;
+ cmd.src_x = gpu->screen.src_x;
+ cmd.src_y = gpu->screen.src_y;
+ do_add_with_wait(gpu->async, cmd.u32s, sizeof(cmd) / 4);
+}
+
+static int noinline do_notify_screen_change(struct psx_gpu *gpu,
+ const union cmd_screen_change *cmd)
+{
+ struct psx_gpu_screen screen = gpu->screen;
+ screen.x = cmd->x;
+ screen.y = cmd->y;
+ screen.hres = cmd->hres;
+ screen.vres = cmd->vres;
+ screen.src_x = cmd->src_x;
+ screen.src_y = cmd->src_y;
+ renderer_notify_screen_change(&screen);
+ return sizeof(*cmd) / 4;
+}
+
+void gpu_async_sync(struct psx_gpu *gpu)
+{
+ struct psx_gpu_async *agpu = gpu->async;
+
+ if (!agpu || (agpu->idle && agpu->pos_added == agpu->pos_used))
+ return;
+ agpu_log(gpu, "agpu: stall %d\n", agpu->pos_added - agpu->pos_used);
+ slock_lock(agpu->lock);
+ if (agpu->idle && agpu->pos_added != agpu->pos_used)
+ run_thread_nolock(agpu);
+ if (!agpu->idle) {
+ assert(agpu->wait_mode == waitmode_none);
+ agpu->wait_mode = waitmode_full;
+ scond_wait(agpu->cond_add, agpu->lock);
+ agpu->wait_mode = waitmode_none;
+ }
+ slock_unlock(agpu->lock);
+ assert(agpu->pos_added == agpu->pos_used);
+ assert(agpu->idle);
+}
+
+void gpu_async_sync_ecmds(struct psx_gpu *gpu)
+{
+ struct psx_gpu_async *agpu = gpu->async;
+ int i;
+
+ if (agpu) {
+ for (i = 0; i < 6 && agpu->pos_added - agpu->pos_used < AGPU_BUF_LEN; i++)
+ agpu->cmd_buffer[agpu->pos_added++ & AGPU_BUF_MASK] = gpu->ex_regs[i + 1];
+ assert(i == 6);
+ }
+}
+
+static void psx_gpu_async_free(struct psx_gpu_async *agpu)
+{
+ agpu->exit = 1;
+ if (agpu->lock) {
+ slock_lock(agpu->lock);
+ if (agpu->cond_use)
+ scond_signal(agpu->cond_use);
+ slock_unlock(agpu->lock);
+ }
+ if (agpu->thread) {
+ sthread_join(agpu->thread);
+ agpu->thread = NULL;
+ }
+ if (agpu->cond_add) { scond_free(agpu->cond_add); agpu->cond_add = NULL; }
+ if (agpu->cond_use) { scond_free(agpu->cond_use); agpu->cond_use = NULL; }
+ if (agpu->lock) { slock_free(agpu->lock); agpu->lock = NULL; }
+ free(agpu);
+}
+
+void gpu_async_stop(struct psx_gpu *gpu)
+{
+ if (gpu->async) {
+ gpu_async_sync(gpu);
+ psx_gpu_async_free(gpu->async);
+ gpu->async = NULL;
+ }
+}
+
+void gpu_async_start(struct psx_gpu *gpu)
+{
+ struct psx_gpu_async *agpu;
+ if (gpu->async)
+ return;
+
+ agpu = calloc(1, sizeof(*agpu));
+ if (agpu) {
+ agpu->lock = slock_new();
+ agpu->cond_add = scond_new();
+ agpu->cond_use = scond_new();
+ if (agpu->lock && agpu->cond_add && agpu->cond_use) {
+ gpu->async = agpu;
+ agpu->thread = pcsxr_sthread_create(gpu_async_thread, PCSXRT_GPU);
+ }
+ if (agpu->thread) {
+ gpu_async_sync_ecmds(gpu);
+ return;
+ }
+ }
+
+ SysPrintf("gpu thread init failed\n");
+ gpu->async = NULL;
+ if (agpu)
+ psx_gpu_async_free(agpu);
+}
+
+// vim:shiftwidth=2:expandtab
--- /dev/null
+#ifndef __GPULIB_GPU_ASYNC_H__
+#define __GPULIB_GPU_ASYNC_H__
+
+#include <stdint.h>
+
+struct psx_gpu;
+struct psx_gpu_async;
+
+#ifdef USE_ASYNC_GPU
+
+#define gpu_async_enabled(gpu) ((gpu)->async)
+
+int gpu_async_do_cmd_list(struct psx_gpu *gpu, uint32_t *list, int list_len,
+ int *cycles_sum_out, int *cycles_last, int *last_cmd);
+void gpu_async_start(struct psx_gpu *gpu);
+void gpu_async_stop(struct psx_gpu *gpu);
+void gpu_async_sync(struct psx_gpu *gpu);
+void gpu_async_sync_ecmds(struct psx_gpu *gpu);
+void gpu_async_notify_screen_change(struct psx_gpu *gpu);
+
+#else
+
+#define gpu_async_enabled(gpu) 0
+#define gpu_async_do_cmd_list(gpu, list, list_len, c0, c1, cmd) (list_len)
+#define gpu_async_start(gpu)
+#define gpu_async_stop(gpu)
+#define gpu_async_sync(gpu) do {} while (0)
+#define gpu_async_sync_ecmds(gpu)
+#define gpu_async_notify_screen_change(gpu)
+
+#endif
+
+#endif // __GPULIB_GPU_ASYNC_H__
// very conservative and wrong
-#define gput_fill(w, h) (23 + (4 + (w) / 16u) * (h))
+#define gput_fill(w, h) (23 + (4 + ((w) + 15) / 16u) * (h))
#define gput_copy(w, h) ((w) * (h))
#define gput_poly_base() (23)
#define gput_poly_base_t() (gput_poly_base() + 90)
+++ /dev/null
-/**************************************************************************
-* Copyright (C) 2020 The RetroArch Team *
-* *
-* This program is free software; you can redistribute it and/or modify *
-* it under the terms of the GNU General Public License as published by *
-* the Free Software Foundation; either version 2 of the License, or *
-* (at your option) any later version. *
-* *
-* This program is distributed in the hope that it will be useful, *
-* but WITHOUT ANY WARRANTY; without even the implied warranty of *
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
-* GNU General Public License for more details. *
-* *
-* You should have received a copy of the GNU General Public License *
-* along with this program; if not, write to the *
-* Free Software Foundation, Inc., *
-* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
-***************************************************************************/
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <pthread.h>
-#include "../gpulib/gpu.h"
-#include "../../frontend/plugin_lib.h"
-#include "gpu.h"
-#include "gpu_timing.h"
-#include "gpulib_thread_if.h"
-
-extern void SysPrintf(const char *fmt, ...);
-
-#define FALSE 0
-#define TRUE 1
-#define BOOL unsigned short
-
-typedef struct {
- uint32_t *cmd_list;
- int count;
- int last_cmd;
-} video_thread_cmd;
-
-#define QUEUE_SIZE 0x2000
-
-typedef struct {
- size_t start;
- size_t end;
- size_t used;
- video_thread_cmd queue[QUEUE_SIZE];
-} video_thread_queue;
-
-typedef struct {
- pthread_t thread;
- pthread_mutex_t queue_lock;
- pthread_cond_t cond_msg_avail;
- pthread_cond_t cond_msg_done;
- pthread_cond_t cond_queue_empty;
- video_thread_queue *queue;
- video_thread_queue *bg_queue;
- BOOL running;
-} video_thread_state;
-
-static video_thread_state thread;
-static video_thread_queue queues[2];
-static int thread_rendering;
-static BOOL hold_cmds;
-static BOOL needs_display;
-static BOOL flushed;
-
-extern const unsigned char cmd_lengths[];
-
-static void *video_thread_main(void *arg) {
- video_thread_cmd *cmd;
- int i;
-
-#ifdef _3DS
- static int processed = 0;
-#endif /* _3DS */
-
-#if defined(__arm__) && defined(__ARM_FP)
- // RunFast mode
- uint32_t fpscr = ~0;
- __asm__ volatile("vmrs %0, fpscr" : "=r"(fpscr));
- fpscr &= ~0x00009f9f;
- fpscr |= 0x03000000; // DN | FZ
- __asm__ volatile("vmsr fpscr, %0" :: "r"(fpscr));
-#endif
-
- while(1) {
- int result, cycles_dummy = 0, last_cmd, start, end;
- video_thread_queue *queue;
- pthread_mutex_lock(&thread.queue_lock);
-
- while (!thread.queue->used && thread.running) {
- pthread_cond_wait(&thread.cond_msg_avail, &thread.queue_lock);
- }
-
- if (!thread.running) {
- pthread_mutex_unlock(&thread.queue_lock);
- break;
- }
-
- queue = thread.queue;
- start = queue->start;
- end = queue->end > queue->start ? queue->end : QUEUE_SIZE;
- queue->start = end % QUEUE_SIZE;
- pthread_mutex_unlock(&thread.queue_lock);
-
- for (i = start; i < end; i++) {
- cmd = &queue->queue[i];
- result = real_do_cmd_list(cmd->cmd_list, cmd->count,
- &cycles_dummy, &cycles_dummy, &last_cmd);
- if (result != cmd->count) {
- fprintf(stderr, "Processed wrong cmd count: expected %d, got %d\n", cmd->count, result);
- }
-
-#ifdef _3DS
- /* Periodically yield so as not to starve other threads */
- processed += cmd->count;
- if (processed >= 512) {
- svcSleepThread(1);
- processed %= 512;
- }
-#endif /* _3DS */
- }
-
- pthread_mutex_lock(&thread.queue_lock);
- queue->used -= (end - start);
-
- if (!queue->used)
- pthread_cond_signal(&thread.cond_queue_empty);
-
- pthread_cond_signal(&thread.cond_msg_done);
- pthread_mutex_unlock(&thread.queue_lock);
- }
-
- return 0;
-}
-
-static void cmd_queue_swap() {
- video_thread_queue *tmp;
- if (!thread.bg_queue->used) return;
-
- pthread_mutex_lock(&thread.queue_lock);
- if (!thread.queue->used) {
- tmp = thread.queue;
- thread.queue = thread.bg_queue;
- thread.bg_queue = tmp;
- pthread_cond_signal(&thread.cond_msg_avail);
- }
- pthread_mutex_unlock(&thread.queue_lock);
-}
-
-/* Waits for the main queue to completely finish. */
-void renderer_wait() {
- if (!thread.running) return;
-
- /* Not completely safe, but should be fine since the render thread
- * only decreases used, and we check again inside the lock. */
- if (!thread.queue->used) {
- return;
- }
-
- pthread_mutex_lock(&thread.queue_lock);
-
- while (thread.queue->used) {
- pthread_cond_wait(&thread.cond_queue_empty, &thread.queue_lock);
- }
-
- pthread_mutex_unlock(&thread.queue_lock);
-}
-
-/* Waits for all GPU commands in both queues to finish, bringing VRAM
- * completely up-to-date. */
-void renderer_sync(void) {
- if (!thread.running) return;
-
- /* Not completely safe, but should be fine since the render thread
- * only decreases used, and we check again inside the lock. */
- if (!thread.queue->used && !thread.bg_queue->used) {
- return;
- }
-
- if (thread.bg_queue->used) {
- /* When we flush the background queue, the vblank handler can't
- * know that we had a frame pending, and we delay rendering too
- * long. Force it. */
- flushed = TRUE;
- }
-
- /* Flush both queues. This is necessary because gpulib could be
- * trying to process a DMA write that a command in the queue should
- * run beforehand. For example, Xenogears sprites write a black
- * rectangle over the to-be-DMA'd spot in VRAM -- if this write
- * happens after the DMA, it will clear the DMA, resulting in
- * flickering sprites. We need to be totally up-to-date. This may
- * drop a frame. */
- renderer_wait();
- cmd_queue_swap();
- hold_cmds = FALSE;
- renderer_wait();
-}
-
-static void video_thread_stop() {
- int i;
- renderer_sync();
-
- if (thread.running) {
- thread.running = FALSE;
- pthread_cond_signal(&thread.cond_msg_avail);
- pthread_join(thread.thread, NULL);
- }
-
- pthread_mutex_destroy(&thread.queue_lock);
- pthread_cond_destroy(&thread.cond_msg_avail);
- pthread_cond_destroy(&thread.cond_msg_done);
- pthread_cond_destroy(&thread.cond_queue_empty);
-
- for (i = 0; i < QUEUE_SIZE; i++) {
- video_thread_cmd *cmd = &thread.queue->queue[i];
- free(cmd->cmd_list);
- cmd->cmd_list = NULL;
- }
-
- for (i = 0; i < QUEUE_SIZE; i++) {
- video_thread_cmd *cmd = &thread.bg_queue->queue[i];
- free(cmd->cmd_list);
- cmd->cmd_list = NULL;
- }
-}
-
-static void video_thread_start() {
- SysPrintf("Starting render thread\n");
-
- thread.queue = &queues[0];
- thread.bg_queue = &queues[1];
- thread.running = TRUE;
-
- if (pthread_cond_init(&thread.cond_msg_avail, NULL) ||
- pthread_cond_init(&thread.cond_msg_done, NULL) ||
- pthread_cond_init(&thread.cond_queue_empty, NULL) ||
- pthread_mutex_init(&thread.queue_lock, NULL) ||
- pthread_create(&thread.thread, NULL, video_thread_main, &thread)) {
- goto error;
- }
-
- return;
-
- error:
- SysPrintf("Failed to start rendering thread\n");
- thread.running = FALSE;
- video_thread_stop();
-}
-
-static void video_thread_queue_cmd(uint32_t *list, int count, int last_cmd) {
- video_thread_cmd *cmd;
- uint32_t *cmd_list;
- video_thread_queue *queue;
- BOOL lock;
-
- cmd_list = (uint32_t *)calloc(count, sizeof(uint32_t));
-
- if (!cmd_list) {
- /* Out of memory, disable the thread and run sync from now on */
- SysPrintf("Failed to allocate render thread command list, stopping thread\n");
- video_thread_stop();
- }
-
- memcpy(cmd_list, list, count * sizeof(uint32_t));
-
- if (hold_cmds && thread.bg_queue->used >= QUEUE_SIZE) {
- /* If the bg queue is full, do a full sync to empty both queues
- * and clear space. This should be very rare, I've only seen it in
- * Tekken 3 post-battle-replay. */
- renderer_sync();
- }
-
- if (hold_cmds) {
- queue = thread.bg_queue;
- lock = FALSE;
- } else {
- queue = thread.queue;
- lock = TRUE;
- }
-
- if (lock) {
- pthread_mutex_lock(&thread.queue_lock);
-
- while (queue->used >= QUEUE_SIZE) {
- pthread_cond_wait(&thread.cond_msg_done, &thread.queue_lock);
- }
- }
-
- cmd = &queue->queue[queue->end];
- free(cmd->cmd_list);
- cmd->cmd_list = cmd_list;
- cmd->count = count;
- cmd->last_cmd = last_cmd;
- queue->end = (queue->end + 1) % QUEUE_SIZE;
- queue->used++;
-
- if (lock) {
- pthread_cond_signal(&thread.cond_msg_avail);
- pthread_mutex_unlock(&thread.queue_lock);
- }
-}
-
-/* Slice off just the part of the list that can be handled async, and
- * update ex_regs. */
-static int scan_cmd_list(uint32_t *data, int count,
- int *cycles_sum_out, int *cycles_last, int *last_cmd)
-{
- int cpu_cycles_sum = 0, cpu_cycles = *cycles_last;
- int cmd = 0, pos = 0, len, v;
-
- while (pos < count) {
- uint32_t *list = data + pos;
- short *slist = (void *)list;
- cmd = LE32TOH(list[0]) >> 24;
- len = 1 + cmd_lengths[cmd];
-
- switch (cmd) {
- case 0x02:
- gput_sum(cpu_cycles_sum, cpu_cycles,
- gput_fill(LE16TOH(slist[4]) & 0x3ff,
- LE16TOH(slist[5]) & 0x1ff));
- break;
- case 0x20 ... 0x23:
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base());
- break;
- case 0x24 ... 0x27:
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t());
- gpu.ex_regs[1] &= ~0x1ff;
- gpu.ex_regs[1] |= LE32TOH(list[4]) & 0x1ff;
- break;
- case 0x28 ... 0x2b:
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base());
- break;
- case 0x2c ... 0x2f:
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t());
- gpu.ex_regs[1] &= ~0x1ff;
- gpu.ex_regs[1] |= LE32TOH(list[4]) & 0x1ff;
- break;
- case 0x30 ... 0x33:
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g());
- break;
- case 0x34 ... 0x37:
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt());
- gpu.ex_regs[1] &= ~0x1ff;
- gpu.ex_regs[1] |= LE32TOH(list[5]) & 0x1ff;
- break;
- case 0x38 ... 0x3b:
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g());
- break;
- case 0x3c ... 0x3f:
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt());
- gpu.ex_regs[1] &= ~0x1ff;
- gpu.ex_regs[1] |= LE32TOH(list[5]) & 0x1ff;
- break;
- case 0x40 ... 0x47:
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
- break;
- case 0x48 ... 0x4F:
- for (v = 3; pos + v < count; v++)
- {
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
- if ((list[v] & 0xf000f000) == 0x50005000)
- break;
- }
- len += v - 3;
- break;
- case 0x50 ... 0x57:
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
- break;
- case 0x58 ... 0x5F:
- for (v = 4; pos + v < count; v += 2)
- {
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
- if ((list[v] & 0xf000f000) == 0x50005000)
- break;
- }
- len += v - 4;
- break;
- case 0x60 ... 0x63:
- gput_sum(cpu_cycles_sum, cpu_cycles,
- gput_sprite(LE16TOH(slist[4]) & 0x3ff,
- LE16TOH(slist[5]) & 0x1ff));
- break;
- case 0x64 ... 0x67:
- gput_sum(cpu_cycles_sum, cpu_cycles,
- gput_sprite(LE16TOH(slist[6]) & 0x3ff,
- LE16TOH(slist[7]) & 0x1ff));
- break;
- case 0x68 ... 0x6b:
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1));
- break;
- case 0x70 ... 0x77:
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(8, 8));
- break;
- case 0x78 ... 0x7f:
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(16, 16));
- break;
- default:
- if ((cmd & 0xf8) == 0xe0)
- gpu.ex_regs[cmd & 7] = list[0];
- break;
- }
-
- if (pos + len > count) {
- cmd = -1;
- break; /* incomplete cmd */
- }
- if (0x80 <= cmd && cmd <= 0xdf)
- break; /* image i/o */
-
- pos += len;
- }
-
- *cycles_sum_out += cpu_cycles_sum;
- *cycles_last = cpu_cycles;
- *last_cmd = cmd;
- return pos;
-}
-
-int do_cmd_list(uint32_t *list, int count,
- int *cycles_sum, int *cycles_last, int *last_cmd)
-{
- int pos = 0;
-
- if (thread.running) {
- pos = scan_cmd_list(list, count, cycles_sum, cycles_last, last_cmd);
- video_thread_queue_cmd(list, pos, *last_cmd);
- } else {
- pos = real_do_cmd_list(list, count, cycles_sum, cycles_last, last_cmd);
- memcpy(gpu.ex_regs, gpu.scratch_ex_regs, sizeof(gpu.ex_regs));
- }
- return pos;
-}
-
-int renderer_init(void) {
- if (thread_rendering) {
- video_thread_start();
- }
- return real_renderer_init();
-}
-
-void renderer_finish(void) {
- real_renderer_finish();
-
- if (thread_rendering && thread.running) {
- video_thread_stop();
- }
-}
-
-void renderer_sync_ecmds(uint32_t * ecmds) {
- if (thread.running) {
- int dummy = 0;
- do_cmd_list(&ecmds[1], 6, &dummy, &dummy, &dummy);
- } else {
- real_renderer_sync_ecmds(ecmds);
- }
-}
-
-void renderer_update_caches(int x, int y, int w, int h, int state_changed) {
- renderer_sync();
- real_renderer_update_caches(x, y, w, h, state_changed);
-}
-
-void renderer_flush_queues(void) {
- /* Called during DMA and updateLace. We want to sync if it's DMA,
- * but not if it's updateLace. Instead of syncing here, there's a
- * renderer_sync call during DMA. */
- real_renderer_flush_queues();
-}
-
-/*
- * Normally all GPU commands are processed before rendering the
- * frame. For games that naturally run < 50/60fps, this is unnecessary
- * -- it forces the game to render as if it was 60fps and leaves the
- * GPU idle half the time on a 30fps game, for example.
- *
- * Allowing the renderer to wait until a frame is done before
- * rendering it would give it double, triple, or quadruple the amount
- * of time to finish before we have to wait for it.
- *
- * We can use a heuristic to figure out when to force a render.
- *
- * - If a frame isn't done when we're asked to render, wait for it and
- * put future GPU commands in a separate buffer (for the next frame)
- *
- * - If the frame is done, and had no future GPU commands, render it.
- *
- * - If we do have future GPU commands, it meant the frame took too
- * long to render and there's another frame waiting. Stop until the
- * first frame finishes, render it, and start processing the next
- * one.
- *
- * This may possibly add a frame or two of latency that shouldn't be
- * different than the real device. It may skip rendering a frame
- * entirely if a VRAM transfer happens while a frame is waiting, or in
- * games that natively run at 60fps if frames are coming in too
- * quickly to process. Depending on how the game treats "60fps," this
- * may not be noticeable.
- */
-void renderer_notify_update_lace(int updated) {
- if (!thread.running) return;
-
- if (thread_rendering == THREAD_RENDERING_SYNC) {
- renderer_sync();
- return;
- }
-
- if (updated) {
- cmd_queue_swap();
- return;
- }
-
- pthread_mutex_lock(&thread.queue_lock);
- if (thread.bg_queue->used || flushed) {
- /* We have commands for a future frame to run. Force a wait until
- * the current frame is finished, and start processing the next
- * frame after it's drawn (see the `updated` clause above). */
- pthread_mutex_unlock(&thread.queue_lock);
- renderer_wait();
- pthread_mutex_lock(&thread.queue_lock);
-
- /* We are no longer holding commands back, so the next frame may
- * get mixed into the following frame. This is usually fine, but can
- * result in frameskip-like effects for 60fps games. */
- flushed = FALSE;
- hold_cmds = FALSE;
- needs_display = TRUE;
- gpu.state.fb_dirty = TRUE;
- } else if (thread.queue->used) {
- /* We are still drawing during a vblank. Cut off the current frame
- * by sending new commands to the background queue and skip
- * drawing our partly rendered frame to the display. */
- hold_cmds = TRUE;
- needs_display = TRUE;
- gpu.state.fb_dirty = FALSE;
- } else if (needs_display && !thread.queue->used) {
- /* We have processed all commands in the queue, render the
- * buffer. We know we have something to render, because
- * needs_display is TRUE. */
- hold_cmds = FALSE;
- needs_display = FALSE;
- gpu.state.fb_dirty = TRUE;
- } else {
- /* Everything went normally, so do the normal thing. */
- }
-
- pthread_mutex_unlock(&thread.queue_lock);
-}
-
-void renderer_set_interlace(int enable, int is_odd) {
- real_renderer_set_interlace(enable, is_odd);
-}
-
-void renderer_set_config(const struct rearmed_cbs *cbs) {
- renderer_sync();
- thread_rendering = cbs->thread_rendering;
- if (!thread.running && thread_rendering != THREAD_RENDERING_OFF) {
- video_thread_start();
- } else if (thread.running && thread_rendering == THREAD_RENDERING_OFF) {
- video_thread_stop();
- }
- real_renderer_set_config(cbs);
-}
-
-void renderer_notify_res_change(void) {
- renderer_sync();
- real_renderer_notify_res_change();
-}
+++ /dev/null
-/**************************************************************************
-* Copyright (C) 2020 The RetroArch Team *
-* *
-* This program is free software; you can redistribute it and/or modify *
-* it under the terms of the GNU General Public License as published by *
-* the Free Software Foundation; either version 2 of the License, or *
-* (at your option) any later version. *
-* *
-* This program is distributed in the hope that it will be useful, *
-* but WITHOUT ANY WARRANTY; without even the implied warranty of *
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
-* GNU General Public License for more details. *
-* *
-* You should have received a copy of the GNU General Public License *
-* along with this program; if not, write to the *
-* Free Software Foundation, Inc., *
-* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
-***************************************************************************/
-
-#ifndef __GPULIB_THREAD_H__
-#define __GPULIB_THREAD_H__
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-int real_do_cmd_list(uint32_t *list, int count,
- int *cycles_sum_out, int *cycles_last, int *last_cmd);
-int real_renderer_init(void);
-void real_renderer_finish(void);
-void real_renderer_sync_ecmds(uint32_t * ecmds);
-void real_renderer_update_caches(int x, int y, int w, int h, int state_changed);
-void real_renderer_flush_queues(void);
-void real_renderer_set_interlace(int enable, int is_odd);
-void real_renderer_set_config(const struct rearmed_cbs *config);
-void real_renderer_notify_res_change(void);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* __GPULIB_THREAD_H__ */
int main(int argc, char *argv[])
{
unsigned int start_cycles;
+ uint32_t ex_regs[8] = { 0, };
uint32_t *list;
int size, dummy;
FILE *state_file;
start_cycles = pcnt_get();
- do_cmd_list(list, size / 4, &dummy, &dummy);
+ renderer_do_cmd_list(list, size / 4, ex_regs, &dummy, &dummy, &dummy);
renderer_flush_queues();
printf("%u\n", pcnt_get() - start_cycles);