extern uint32_t GPUreadStatus(void);
extern uint32_t GPUreadData(void);
extern void GPUreadDataMem(uint32_t *, int);
-extern long GPUdmaChain(uint32_t *, uint32_t, uint32_t *);
+extern long GPUdmaChain(uint32_t *, uint32_t, uint32_t *, int32_t *);
extern void GPUupdateLace(void);
extern long GPUfreeze(uint32_t, void *);
extern void GPUvBlank(int, int);
typedef uint32_t (CALLBACK* GPUreadStatus)(void);\r
typedef uint32_t (CALLBACK* GPUreadData)(void);\r
typedef void (CALLBACK* GPUreadDataMem)(uint32_t *, int);\r
-typedef long (CALLBACK* GPUdmaChain)(uint32_t *,uint32_t, uint32_t *);\r
+typedef long (CALLBACK* GPUdmaChain)(uint32_t *, uint32_t, uint32_t *, int32_t *);\r
typedef void (CALLBACK* GPUupdateLace)(void);\r
typedef void (CALLBACK* GPUmakeSnapshot)(void);\r
typedef void (CALLBACK* GPUkeypressed)(int);\r
DMA_INTERRUPT(4);
}
+#if 0
// Taken from PEOPS SOFTGPU
static inline boolean CheckForEndlessLoop(u32 laddr, u32 *lUsedAddr) {
if (laddr == lUsedAddr[1]) return TRUE;
return size;
}
+#endif
void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU
- u32 *ptr, madr_next, *madr_next_p, size;
+ u32 *ptr, madr_next, *madr_next_p;
u32 words, words_left, words_max, words_copy;
- int do_walking;
+ int cycles_sum, cycles_last_cmd = 0, do_walking;
madr &= ~3;
switch (chcr) {
do_walking = Config.hacks.gpu_slow_list_walking;
madr_next_p = do_walking ? &madr_next : NULL;
- size = GPU_dmaChain((u32 *)psxM, madr & 0x1fffff, madr_next_p);
- if ((int)size <= 0)
- size = gpuDmaChainSize(madr);
+ cycles_sum = GPU_dmaChain((u32 *)psxM, madr & 0x1fffff,
+ madr_next_p, &cycles_last_cmd);
HW_DMA2_MADR = SWAPu32(madr_next);
// a hack for Judge Dredd which is annoyingly sensitive to timing
if (Config.hacks.gpu_timing1024)
- size = 1024;
+ cycles_sum = 1024;
- psxRegs.gpuIdleAfter = psxRegs.cycle + size + 16;
- set_event(PSXINT_GPUDMA, size);
+ psxRegs.gpuIdleAfter = psxRegs.cycle + cycles_sum + cycles_last_cmd;
+ set_event(PSXINT_GPUDMA, cycles_sum);
+ //printf("%u dma2cf: %d,%d %08x\n", psxRegs.cycle, cycles_sum,
+ // cycles_last_cmd, HW_DMA2_MADR);
return;
default:
void gpuInterrupt() {
if (HW_DMA2_CHCR == SWAP32(0x01000401) && !(HW_DMA2_MADR & SWAP32(0x800000)))
{
- u32 size, madr_next = 0xffffff, madr = SWAPu32(HW_DMA2_MADR);
- size = GPU_dmaChain((u32 *)psxM, madr & 0x1fffff, &madr_next);
+ u32 madr_next = 0xffffff, madr = SWAPu32(HW_DMA2_MADR);
+ int cycles_sum, cycles_last_cmd = 0;
+ cycles_sum = GPU_dmaChain((u32 *)psxM, madr & 0x1fffff,
+ &madr_next, &cycles_last_cmd);
HW_DMA2_MADR = SWAPu32(madr_next);
- psxRegs.gpuIdleAfter = psxRegs.cycle + size + 64;
- set_event(PSXINT_GPUDMA, size);
+ if ((s32)(psxRegs.gpuIdleAfter - psxRegs.cycle) > 0)
+ cycles_sum += psxRegs.gpuIdleAfter - psxRegs.cycle;
+ psxRegs.gpuIdleAfter = psxRegs.cycle + cycles_sum + cycles_last_cmd;
+ set_event(PSXINT_GPUDMA, cycles_sum);
+ //printf("%u dma2cn: %d,%d %08x\n", psxRegs.cycle, cycles_sum,
+ // cycles_last_cmd, HW_DMA2_MADR);
return;
}
if (HW_DMA2_CHCR & SWAP32(0x01000000))
#include "../gpulib/gpu_timing.h"
extern const unsigned char cmd_lengths[256];
-int do_cmd_list(uint32_t *list, int list_len, int *cpu_cycles_out, int *last_cmd)
+int do_cmd_list(uint32_t *list, int list_len,
+ int *cycles_sum_out, int *cycles_last, int *last_cmd)
{
+ int cpu_cycles_sum = 0, cpu_cycles = *cycles_last;
unsigned int cmd = 0, len;
uint32_t *list_start = list;
uint32_t *list_end = list + list_len;
- u32 cpu_cycles = 0;
for (; list < list_end; list += 1 + len)
{
while(1)
{
- cpu_cycles += gput_line(0);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
if(list_position >= list_end) {
cmd = -1;
while(1)
{
- cpu_cycles += gput_line(0);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
if(list_position >= list_end) {
cmd = -1;
#ifdef TEST
case 0xA0: // sys -> vid
{
- u32 load_width = LE2HOST32(slist[4]);
- u32 load_height = LE2HOST32(slist[5]);
+ u32 load_width = LE2HOST16(slist[4]);
+ u32 load_height = LE2HOST16(slist[5]);
u32 load_size = load_width * load_height;
len += load_size / 2;
// timing
case 0x02:
- cpu_cycles += gput_fill(LE2HOST32(slist[4]) & 0x3ff,
- LE2HOST32(slist[5]) & 0x1ff);
+ gput_sum(cpu_cycles_sum, cpu_cycles,
+ gput_fill(LE2HOST16(slist[4]) & 0x3ff, LE2HOST16(slist[5]) & 0x1ff));
break;
- case 0x20 ... 0x23: cpu_cycles += gput_poly_base(); break;
- case 0x24 ... 0x27: cpu_cycles += gput_poly_base_t(); break;
- case 0x28 ... 0x2B: cpu_cycles += gput_quad_base(); break;
- case 0x2C ... 0x2F: cpu_cycles += gput_quad_base_t(); break;
- case 0x30 ... 0x33: cpu_cycles += gput_poly_base_g(); break;
- case 0x34 ... 0x37: cpu_cycles += gput_poly_base_gt(); break;
- case 0x38 ... 0x3B: cpu_cycles += gput_quad_base_g(); break;
- case 0x3C ... 0x3F: cpu_cycles += gput_quad_base_gt(); break;
- case 0x40 ... 0x47: cpu_cycles += gput_line(0); break;
- case 0x50 ... 0x57: cpu_cycles += gput_line(0); break;
+ case 0x20 ... 0x23: gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base()); break;
+ case 0x24 ... 0x27: gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t()); break;
+ case 0x28 ... 0x2B: gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base()); break;
+ case 0x2C ... 0x2F: gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t()); break;
+ case 0x30 ... 0x33: gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g()); break;
+ case 0x34 ... 0x37: gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt()); break;
+ case 0x38 ... 0x3B: gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g()); break;
+ case 0x3C ... 0x3F: gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt()); break;
+ case 0x40 ... 0x47: gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); break;
+ case 0x50 ... 0x57: gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); break;
case 0x60 ... 0x63:
- cpu_cycles += gput_sprite(LE2HOST32(slist[4]) & 0x3ff,
- LE2HOST32(slist[5]) & 0x1ff);
+ gput_sum(cpu_cycles_sum, cpu_cycles,
+ gput_sprite(LE2HOST16(slist[4]) & 0x3ff, LE2HOST16(slist[5]) & 0x1ff));
break;
case 0x64 ... 0x67:
- cpu_cycles += gput_sprite(LE2HOST32(slist[6]) & 0x3ff,
- LE2HOST32(slist[7]) & 0x1ff);
+ gput_sum(cpu_cycles_sum, cpu_cycles,
+ gput_sprite(LE2HOST16(slist[6]) & 0x3ff, LE2HOST16(slist[7]) & 0x1ff));
break;
- case 0x68 ... 0x6B: cpu_cycles += gput_sprite(1, 1); break;
+ case 0x68 ... 0x6B: gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1)); break;
case 0x70 ... 0x73:
- case 0x74 ... 0x77: cpu_cycles += gput_sprite(8, 8); break;
+ case 0x74 ... 0x77: gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(8, 8)); break;
case 0x78 ... 0x7B:
- case 0x7C ... 0x7F: cpu_cycles += gput_sprite(16, 16); break;
+ case 0x7C ... 0x7F: gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(16, 16)); break;
}
}
gpu.ex_regs[1] &= ~0x1ff;
gpu.ex_regs[1] |= lGPUstatusRet & 0x1ff;
- *cpu_cycles_out += cpu_cycles;
+ *cycles_sum_out += cpu_cycles_sum;
+ *cycles_last = cpu_cycles;
*last_cmd = cmd;
return list - list_start;
}
extern const unsigned char cmd_lengths[256];
// XXX: mostly dupe code from soft peops
-int do_cmd_list(unsigned int *list, int list_len, int *cycles, int *last_cmd)
+int do_cmd_list(uint32_t *list, int list_len,
+ int *cycles_sum_out, int *cycles_last, int *last_cmd)
{
unsigned int cmd, len;
unsigned int *list_start = list;
void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram);
u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
- s32 *cpu_cycles, u32 *last_command);
+ s32 *cpu_cycles_sum_out, s32 *cpu_cycles_last, u32 *last_command);
void triangle_benchmark(psx_gpu_struct *psx_gpu);
#endif
u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
- s32 *cpu_cycles_out, u32 *last_command)
+ s32 *cpu_cycles_sum_out, s32 *cpu_cycles_last, u32 *last_command)
{
vertex_struct vertexes[4] __attribute__((aligned(16))) = {};
- u32 current_command = 0, command_length, cpu_cycles = 0;
+ u32 current_command = 0, command_length;
+ u32 cpu_cycles_sum = 0, cpu_cycles = *cpu_cycles_last;
u32 *list_start = list;
u32 *list_end = list + (size / 4);
u32 color = list[0] & 0xFFFFFF;
do_fill(psx_gpu, x, y, width, height, color);
- cpu_cycles += gput_fill(width, height);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_fill(width, height));
break;
}
get_vertex_data_xy(2, 6);
render_triangle(psx_gpu, vertexes, current_command);
- cpu_cycles += gput_poly_base();
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base());
break;
}
get_vertex_data_xy_uv(2, 10);
render_triangle(psx_gpu, vertexes, current_command);
- cpu_cycles += gput_poly_base_t();
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t());
break;
}
render_triangle(psx_gpu, vertexes, current_command);
render_triangle(psx_gpu, &(vertexes[1]), current_command);
- cpu_cycles += gput_quad_base();
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base());
break;
}
render_triangle(psx_gpu, vertexes, current_command);
render_triangle(psx_gpu, &(vertexes[1]), current_command);
- cpu_cycles += gput_quad_base_t();
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t());
break;
}
get_vertex_data_xy_rgb(2, 8);
render_triangle(psx_gpu, vertexes, current_command);
- cpu_cycles += gput_poly_base_g();
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g());
break;
}
get_vertex_data_xy_uv_rgb(2, 12);
render_triangle(psx_gpu, vertexes, current_command);
- cpu_cycles += gput_poly_base_gt();
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt());
break;
}
render_triangle(psx_gpu, vertexes, current_command);
render_triangle(psx_gpu, &(vertexes[1]), current_command);
- cpu_cycles += gput_quad_base_g();
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g());
break;
}
render_triangle(psx_gpu, vertexes, current_command);
render_triangle(psx_gpu, &(vertexes[1]), current_command);
- cpu_cycles += gput_quad_base_gt();
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt());
break;
}
vertexes[1].y = list_s16[5] + psx_gpu->offset_y;
render_line(psx_gpu, vertexes, current_command, list[0], 0);
- cpu_cycles += gput_line(0);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
break;
}
vertexes[1].y = (xy >> 16) + psx_gpu->offset_y;
render_line(psx_gpu, vertexes, current_command, list[0], 0);
- cpu_cycles += gput_line(0);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
list_position++;
num_vertexes++;
vertexes[1].y = list_s16[7] + psx_gpu->offset_y;
render_line(psx_gpu, vertexes, current_command, 0, 0);
- cpu_cycles += gput_line(0);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
break;
}
vertexes[1].y = (xy >> 16) + psx_gpu->offset_y;
render_line(psx_gpu, vertexes, current_command, 0, 0);
- cpu_cycles += gput_line(0);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
list_position += 2;
num_vertexes++;
render_sprite(psx_gpu, x, y, 0, 0, &width, &height,
current_command, list[0]);
- cpu_cycles += gput_sprite(width, height);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
break;
}
render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF,
&width, &height, current_command, list[0]);
- cpu_cycles += gput_sprite(width, height);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
break;
}
render_sprite(psx_gpu, x, y, 0, 0, &width, &height,
current_command, list[0]);
- cpu_cycles += gput_sprite(1, 1);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1));
break;
}
render_sprite(psx_gpu, x, y, 0, 0, &width, &height,
current_command, list[0]);
- cpu_cycles += gput_sprite(width, height);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
break;
}
render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF,
&width, &height, current_command, list[0]);
- cpu_cycles += gput_sprite(width, height);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
break;
}
render_sprite(psx_gpu, x, y, 0, 0, &width, &height,
current_command, list[0]);
- cpu_cycles += gput_sprite(width, height);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
break;
}
render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF,
&width, &height, current_command, list[0]);
- cpu_cycles += gput_sprite(width, height);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
break;
}
}
breakloop:
- *cpu_cycles_out += cpu_cycles;
+ *cpu_cycles_sum_out += cpu_cycles_sum;
+ *cpu_cycles_last = cpu_cycles;
*last_command = current_command;
return list - list_start;
}
#endif
u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
- s32 *cpu_cycles_out, u32 *last_command)
+ s32 *cpu_cycles_sum_out, s32 *cpu_cycles_last, u32 *last_command)
{
vertex_struct vertexes[4] __attribute__((aligned(16))) = {};
- u32 current_command = 0, command_length, cpu_cycles = 0;
+ u32 current_command = 0, command_length;
+ u32 cpu_cycles_sum = 0, cpu_cycles = *cpu_cycles_last;
u32 *list_start = list;
u32 *list_end = list + (size / 4);
x &= ~0xF;
width = ((width + 0xF) & ~0xF);
- cpu_cycles += gput_fill(width, height);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_fill(width, height));
if (width == 0 || height == 0)
break;
get_vertex_data_xy(2, 6);
do_triangle_enhanced(psx_gpu, vertexes, current_command);
- cpu_cycles += gput_poly_base();
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base());
break;
}
get_vertex_data_xy_uv(2, 10);
do_triangle_enhanced(psx_gpu, vertexes, current_command);
- cpu_cycles += gput_poly_base_t();
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t());
break;
}
get_vertex_data_xy(3, 8);
do_quad_enhanced(psx_gpu, vertexes, current_command);
- cpu_cycles += gput_quad_base();
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base());
break;
}
uv_hack(vertexes, 4);
do_quad_enhanced(psx_gpu, vertexes, current_command);
- cpu_cycles += gput_quad_base_t();
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t());
break;
}
get_vertex_data_xy_rgb(2, 8);
do_triangle_enhanced(psx_gpu, vertexes, current_command);
- cpu_cycles += gput_poly_base_g();
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g());
break;
}
get_vertex_data_xy_uv_rgb(2, 12);
do_triangle_enhanced(psx_gpu, vertexes, current_command);
- cpu_cycles += gput_poly_base_gt();
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt());
break;
}
get_vertex_data_xy_rgb(3, 12);
do_quad_enhanced(psx_gpu, vertexes, current_command);
- cpu_cycles += gput_quad_base_g();
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g());
break;
}
uv_hack(vertexes, 4);
do_quad_enhanced(psx_gpu, vertexes, current_command);
- cpu_cycles += gput_quad_base_gt();
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt());
break;
}
render_line(psx_gpu, vertexes, current_command, list[0], 0);
if (enhancement_enable(psx_gpu))
render_line(psx_gpu, vertexes, current_command, list[0], 1);
- cpu_cycles += gput_line(0);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
break;
}
render_line(psx_gpu, vertexes, current_command, list[0], 0);
if (enhancement_enable(psx_gpu))
render_line(psx_gpu, vertexes, current_command, list[0], 1);
- cpu_cycles += gput_line(0);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
list_position++;
num_vertexes++;
render_line(psx_gpu, vertexes, current_command, 0, 0);
if (enhancement_enable(psx_gpu))
render_line(psx_gpu, vertexes, current_command, 0, 1);
- cpu_cycles += gput_line(0);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
break;
}
render_line(psx_gpu, vertexes, current_command, 0, 0);
if (enhancement_enable(psx_gpu))
render_line(psx_gpu, vertexes, current_command, 0, 1);
- cpu_cycles += gput_line(0);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
list_position += 2;
num_vertexes++;
if (check_enhanced_range(psx_gpu, x, x + width))
do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]);
- cpu_cycles += gput_sprite(width, height);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
break;
}
if (check_enhanced_range(psx_gpu, x, x + width))
do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]);
- cpu_cycles += gput_sprite(width, height);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
break;
}
if (check_enhanced_range(psx_gpu, x, x + 1))
do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]);
- cpu_cycles += gput_sprite(1, 1);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1));
break;
}
if (check_enhanced_range(psx_gpu, x, x + 8))
do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]);
- cpu_cycles += gput_sprite(width, height);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
break;
}
if (check_enhanced_range(psx_gpu, x, x + 8))
do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]);
- cpu_cycles += gput_sprite(width, height);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
break;
}
if (check_enhanced_range(psx_gpu, x, x + 16))
do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]);
- cpu_cycles += gput_sprite(width, height);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
break;
}
if (check_enhanced_range(psx_gpu, x, x + 16))
do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]);
- cpu_cycles += gput_sprite(width, height);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
break;
}
enhancement_disable();
breakloop:
- *cpu_cycles_out += cpu_cycles;
+ *cpu_cycles_sum_out += cpu_cycles_sum;
+ *cpu_cycles_last = cpu_cycles;
*last_command = current_command;
return list - list_start;
}
static psx_gpu_struct egpu __attribute__((aligned(256)));
-int do_cmd_list(uint32_t *list, int count, int *cycles, int *last_cmd)
+int do_cmd_list(uint32_t *list, int count,
+ int *cycles_sum, int *cycles_last, int *last_cmd)
{
int ret;
#endif
if (gpu.state.enhancement_active)
- ret = gpu_parse_enhanced(&egpu, list, count * 4, cycles, (u32 *)last_cmd);
+ ret = gpu_parse_enhanced(&egpu, list, count * 4,
+ cycles_sum, cycles_last, (u32 *)last_cmd);
else
- ret = gpu_parse(&egpu, list, count * 4, cycles, (u32 *)last_cmd);
+ ret = gpu_parse(&egpu, list, count * 4,
+ cycles_sum, cycles_last, (u32 *)last_cmd);
#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
__asm__ __volatile__("":::"q4","q5","q6","q7");
{
s32 dummy0 = 0;
u32 dummy1 = 0;
- gpu_parse(&egpu, ecmds + 1, 6 * 4, &dummy0, &dummy1);
+ gpu_parse(&egpu, ecmds + 1, 6 * 4, &dummy0, &dummy0, &dummy1);
}
void renderer_update_caches(int x, int y, int w, int h, int state_changed)
#include "../gpulib/gpu_timing.h"
extern const unsigned char cmd_lengths[256];
-int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd)
+int do_cmd_list(u32 *list_, int list_len,
+ int *cycles_sum_out, int *cycles_last, int *last_cmd)
{
+ int cpu_cycles_sum = 0, cpu_cycles = *cycles_last;
u32 cmd = 0, len, i;
- le32_t *list = (le32_t *)_list;
+ le32_t *list = (le32_t *)list_;
le32_t *list_start = list;
le32_t *list_end = list + list_len;
- u32 cpu_cycles = 0;
//TODO: set ilace_mask when resolution changes instead of every time,
// eliminate #ifdef below.
{
case 0x02:
gpuClearImage(packet);
- cpu_cycles += gput_fill(le16_to_s16(packet.U2[4]) & 0x3ff,
- le16_to_s16(packet.U2[5]) & 0x1ff);
+ gput_sum(cpu_cycles_sum, cpu_cycles,
+ gput_fill(le16_to_s16(packet.U2[4]) & 0x3ff, le16_to_s16(packet.U2[5]) & 0x1ff));
break;
case 0x20:
gpu_unai.Masking | Blending | gpu_unai.PixelMSB
];
gpuDrawPolyF(packet, driver, false);
- cpu_cycles += gput_poly_base();
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base());
} break;
case 0x24:
PP driver = gpuPolySpanDrivers[driver_idx];
gpuDrawPolyFT(packet, driver, false);
- cpu_cycles += gput_poly_base_t();
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t());
} break;
case 0x28:
gpu_unai.Masking | Blending | gpu_unai.PixelMSB
];
gpuDrawPolyF(packet, driver, true); // is_quad = true
- cpu_cycles += gput_quad_base();
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base());
} break;
case 0x2C:
PP driver = gpuPolySpanDrivers[driver_idx];
gpuDrawPolyFT(packet, driver, true); // is_quad = true
- cpu_cycles += gput_quad_base_t();
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t());
} break;
case 0x30:
gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB
];
gpuDrawPolyG(packet, driver, false);
- cpu_cycles += gput_poly_base_g();
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g());
} break;
case 0x34:
gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB
];
gpuDrawPolyGT(packet, driver, false);
- cpu_cycles += gput_poly_base_gt();
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt());
} break;
case 0x38:
gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB
];
gpuDrawPolyG(packet, driver, true); // is_quad = true
- cpu_cycles += gput_quad_base_g();
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g());
} break;
case 0x3C:
gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB
];
gpuDrawPolyGT(packet, driver, true); // is_quad = true
- cpu_cycles += gput_quad_base_gt();
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt());
} break;
case 0x40:
u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1;
PSD driver = gpuPixelSpanDrivers[driver_idx];
gpuDrawLineF(packet, driver);
- cpu_cycles += gput_line(0);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
} break;
case 0x48 ... 0x4F: { // Monochrome line strip
gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[2];
gpu_unai.PacketBuffer.U4[2] = *list_position++;
gpuDrawLineF(packet, driver);
- cpu_cycles += gput_line(0);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
num_vertexes++;
if(list_position >= list_end) {
driver_idx |= (1 << 5);
PSD driver = gpuPixelSpanDrivers[driver_idx];
gpuDrawLineG(packet, driver);
- cpu_cycles += gput_line(0);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
} break;
case 0x58 ... 0x5F: { // Gouraud-shaded line strip
gpu_unai.PacketBuffer.U4[2] = *list_position++;
gpu_unai.PacketBuffer.U4[3] = *list_position++;
gpuDrawLineG(packet, driver);
- cpu_cycles += gput_line(0);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
num_vertexes++;
if(list_position >= list_end) {
PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
s32 w = 0, h = 0;
gpuDrawT(packet, driver, &w, &h);
- cpu_cycles += gput_sprite(w, h);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
} break;
case 0x64:
driver_idx |= Lighting;
PS driver = gpuSpriteSpanDrivers[driver_idx];
gpuDrawS(packet, driver, &w, &h);
- cpu_cycles += gput_sprite(w, h);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
} break;
case 0x68:
PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
s32 w = 0, h = 0;
gpuDrawT(packet, driver, &w, &h);
- cpu_cycles += gput_sprite(1, 1);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1));
} break;
case 0x70:
PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
s32 w = 0, h = 0;
gpuDrawT(packet, driver, &w, &h);
- cpu_cycles += gput_sprite(w, h);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
} break;
case 0x74:
driver_idx |= Lighting;
PS driver = gpuSpriteSpanDrivers[driver_idx];
gpuDrawS(packet, driver, &w, &h);
- cpu_cycles += gput_sprite(w, h);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
} break;
case 0x78:
PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
s32 w = 0, h = 0;
gpuDrawT(packet, driver, &w, &h);
- cpu_cycles += gput_sprite(w, h);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
} break;
case 0x7C:
s32 w = 0, h = 0;
gpuSetCLUT(le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
gpuDrawS16(packet, &w, &h);
- cpu_cycles += gput_sprite(w, h);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
break;
}
// fallthrough
driver_idx |= Lighting;
PS driver = gpuSpriteSpanDrivers[driver_idx];
gpuDrawS(packet, driver, &w, &h);
- cpu_cycles += gput_sprite(w, h);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
} break;
#ifdef TEST
gpu.ex_regs[1] &= ~0x1ff;
gpu.ex_regs[1] |= gpu_unai.GPU_GP1 & 0x1ff;
- *cpu_cycles_out += cpu_cycles;
+ *cycles_sum_out += cpu_cycles_sum;
+ *cycles_last = cpu_cycles;
*last_cmd = cmd;
return list - list_start;
}
void renderer_sync_ecmds(u32 *ecmds)
{
int dummy;
- do_cmd_list(&ecmds[1], 6, &dummy, &dummy);
+ do_cmd_list(&ecmds[1], 6, &dummy, &dummy, &dummy);
}
void renderer_update_caches(int x, int y, int w, int h, int state_changed)
struct psx_gpu gpu;
-static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles);
+static noinline int do_cmd_buffer(uint32_t *data, int count,
+ int *cycles_sum, int *cycles_last);
static void finish_vram_transfer(int is_read);
static noinline void do_cmd_reset(void)
int dummy = 0;
renderer_sync();
if (unlikely(gpu.cmd_len > 0))
- do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy);
+ do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
gpu.cmd_len = 0;
if (unlikely(gpu.dma.h > 0))
if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
int dummy = 0;
- do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy);
+ do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy, &dummy);
gpu.frameskip.pending_fill[0] = 0;
}
}
case 0x02:
if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
// clearing something large, don't skip
- do_cmd_list(list, 3, &dummy, &dummy);
+ do_cmd_list(list, 3, &dummy, &dummy, &dummy);
else
memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
break;
return pos;
}
-static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles)
+static noinline int do_cmd_buffer(uint32_t *data, int count,
+ int *cycles_sum, int *cycles_last)
{
int cmd, pos;
uint32_t old_e3 = gpu.ex_regs[3];
cmd = -1; // incomplete cmd, can't consume yet
break;
}
- do_vram_copy(data + pos + 1, cpu_cycles);
+ *cycles_sum += *cycles_last;
+ *cycles_last = 0;
+ do_vram_copy(data + pos + 1, cycles_last);
vram_dirty = 1;
pos += 4;
continue;
if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
else {
- pos += do_cmd_list(data + pos, count - pos, cpu_cycles, &cmd);
+ pos += do_cmd_list(data + pos, count - pos, cycles_sum, cycles_last, &cmd);
vram_dirty = 1;
}
static noinline void flush_cmd_buffer(void)
{
int dummy = 0, left;
- left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy);
+ left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
if (left > 0)
memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
if (left != gpu.cmd_len) {
if (unlikely(gpu.cmd_len > 0))
flush_cmd_buffer();
- left = do_cmd_buffer(mem, count, &dummy);
+ left = do_cmd_buffer(mem, count, &dummy, &dummy);
if (left)
log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
}
flush_cmd_buffer();
}
-long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, uint32_t *progress_addr)
+long GPUdmaChain(uint32_t *rambase, uint32_t start_addr,
+ uint32_t *progress_addr, int32_t *cycles_last_cmd)
{
uint32_t addr, *list, ld_addr = 0;
int len, left, count;
- int cpu_cycles = 0;
+ int cpu_cycles_sum = 0;
+ int cpu_cycles_last = 0;
preload(rambase + (start_addr & 0x1fffff) / 4);
addr = LE32TOH(list[0]) & 0xffffff;
preload(rambase + (addr & 0x1fffff) / 4);
- cpu_cycles += 10;
+ cpu_cycles_sum += 10;
if (len > 0)
- cpu_cycles += 5 + len;
+ cpu_cycles_sum += 5 + len;
- log_io(".chain %08lx #%d+%d %u\n",
- (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles);
+ log_io(".chain %08lx #%d+%d %u+%u\n",
+ (long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles_sum, cpu_cycles_last);
if (unlikely(gpu.cmd_len > 0)) {
if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
log_anomaly("cmd_buffer overflow, likely garbage commands\n");
}
if (len) {
- left = do_cmd_buffer(list + 1, len, &cpu_cycles);
+ left = do_cmd_buffer(list + 1, len, &cpu_cycles_sum, &cpu_cycles_last);
if (left) {
memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
gpu.cmd_len = left;
}
}
+ //printf(" -> %d %d\n", cpu_cycles_sum, cpu_cycles_last);
gpu.state.last_list.frame = *gpu.state.frame_count;
gpu.state.last_list.hcnt = *gpu.state.hcnt;
- gpu.state.last_list.cycles = cpu_cycles;
+ gpu.state.last_list.cycles = cpu_cycles_sum + cpu_cycles_last;
gpu.state.last_list.addr = start_addr;
- return cpu_cycles;
+ *cycles_last_cmd = cpu_cycles_last;
+ return cpu_cycles_sum;
}
void GPUreadDataMem(uint32_t *mem, int count)
extern const unsigned char cmd_lengths[256];
-int do_cmd_list(uint32_t *list, int count, int *cycles, int *last_cmd);
+int do_cmd_list(uint32_t *list, int count,
+ int *cycles_sum, int *cycles_last, int *last_cmd);
struct rearmed_cbs;
long GPUinit(void);
long GPUshutdown(void);
void GPUwriteDataMem(uint32_t *mem, int count);
-long GPUdmaChain(uint32_t *rambase, uint32_t addr, uint32_t *progress_addr);
+long GPUdmaChain(uint32_t *rambase, uint32_t addr,
+ uint32_t *progress_addr, int32_t *cycles_last_cmd);
void GPUwriteData(uint32_t data);
void GPUreadDataMem(uint32_t *mem, int count);
uint32_t GPUreadData(void);
// very conservative and wrong
-#define gput_fill(w, h) (23 + (4 + (w) / 32u) * (h))
+#define gput_fill(w, h) (23 + (4 + (w) / 16u) * (h))
#define gput_copy(w, h) ((w) * (h))
#define gput_poly_base() (23)
#define gput_poly_base_t() (gput_poly_base() + 90)
#define gput_line(k) (8 + (k))
#define gput_sprite(w, h) (8 + ((w) / 2u) * (h))
+// sort of a workaround for lack of proper fifo emulation
+#define gput_sum(sum, cnt, new_cycles) do { \
+ sum += cnt; cnt = new_cycles; \
+} while (0)