{
}
+#include "../gpulib/gpu_timing.h"
extern const unsigned char cmd_lengths[256];
-int do_cmd_list(uint32_t *list, int list_len, int *last_cmd)
+int do_cmd_list(uint32_t *list, int list_len, int *cpu_cycles_out, int *last_cmd)
{
unsigned int cmd = 0, len;
uint32_t *list_start = list;
uint32_t *list_end = list + list_len;
+ u32 cpu_cycles = 0;
for (; list < list_end; list += 1 + len)
{
+ short *slist = (void *)list;
cmd = GETLE32(list) >> 24;
len = cmd_lengths[cmd];
if (list + 1 + len > list_end) {
while(1)
{
+ cpu_cycles += gput_line(0);
+
if(list_position >= list_end) {
cmd = -1;
goto breakloop;
while(1)
{
+ cpu_cycles += gput_line(0);
+
if(list_position >= list_end) {
cmd = -1;
goto breakloop;
#ifdef TEST
case 0xA0: // sys -> vid
{
- short *slist = (void *)list;
u32 load_width = LE2HOST32(slist[4]);
u32 load_height = LE2HOST32(slist[5]);
u32 load_size = load_width * load_height;
break;
}
#endif
+
+ // timing
+ case 0x02:
+ cpu_cycles += gput_fill(LE2HOST32(slist[4]) & 0x3ff,
+ LE2HOST32(slist[5]) & 0x1ff);
+ break;
+ case 0x20 ... 0x23: cpu_cycles += gput_poly_base(); break;
+ case 0x24 ... 0x27: cpu_cycles += gput_poly_base_t(); break;
+ case 0x28 ... 0x2B: cpu_cycles += gput_quad_base(); break;
+ case 0x2C ... 0x2F: cpu_cycles += gput_quad_base_t(); break;
+ case 0x30 ... 0x33: cpu_cycles += gput_poly_base_g(); break;
+ case 0x34 ... 0x37: cpu_cycles += gput_poly_base_gt(); break;
+ case 0x38 ... 0x3B: cpu_cycles += gput_quad_base_g(); break;
+ case 0x3C ... 0x3F: cpu_cycles += gput_quad_base_gt(); break;
+ case 0x40 ... 0x47: cpu_cycles += gput_line(0); break;
+ case 0x50 ... 0x57: cpu_cycles += gput_line(0); break;
+ case 0x60 ... 0x63:
+ cpu_cycles += gput_sprite(LE2HOST32(slist[4]) & 0x3ff,
+ LE2HOST32(slist[5]) & 0x1ff);
+ break;
+ case 0x64 ... 0x67:
+ cpu_cycles += gput_sprite(LE2HOST32(slist[6]) & 0x3ff,
+ LE2HOST32(slist[7]) & 0x1ff);
+ break;
+ case 0x68 ... 0x6B: cpu_cycles += gput_sprite(1, 1); break;
+ case 0x70 ... 0x73:
+ case 0x74 ... 0x77: cpu_cycles += gput_sprite(8, 8); break;
+ case 0x78 ... 0x7B:
+ case 0x7C ... 0x7F: cpu_cycles += gput_sprite(16, 16); break;
}
}
gpu.ex_regs[1] &= ~0x1ff;
gpu.ex_regs[1] |= lGPUstatusRet & 0x1ff;
+ *cpu_cycles_out += cpu_cycles;
*last_cmd = cmd;
return list - list_start;
}
cbs->pl_set_gpu_caps(0);
set_vram(gpu.vram);
}
+
+// vim:ts=2:shiftwidth=2:expandtab
extern const unsigned char cmd_lengths[256];
// XXX: mostly dupe code from soft peops
-int do_cmd_list(unsigned int *list, int list_len, int *last_cmd)
+int do_cmd_list(unsigned int *list, int list_len, int *cycles, int *last_cmd)
{
unsigned int cmd, len;
unsigned int *list_start = list;
void flush_render_block_buffer(psx_gpu_struct *psx_gpu);
void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram);
-u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command);
+u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
+ s32 *cpu_cycles, u32 *last_command);
void triangle_benchmark(psx_gpu_struct *psx_gpu);
FILE *state_file;
FILE *list_file;
u32 no_display = 0;
+ s32 dummy0 = 0;
+ u32 dummy1 = 0;
if((argc != 3) && (argc != 4))
{
init_counter();
#endif
- gpu_parse(psx_gpu, list, size, NULL);
+ gpu_parse(psx_gpu, list, size, &dummy0, &dummy1);
flush_render_block_buffer(psx_gpu);
clear_stats();
u32 cycles = get_counter();
#endif
- gpu_parse(psx_gpu, list, size, NULL);
+ gpu_parse(psx_gpu, list, size, &dummy0, &dummy1);
flush_render_block_buffer(psx_gpu);
#ifdef NEON_BUILD
#include <stdio.h>
#include "common.h"
+#include "../../gpulib/gpu_timing.h"
#ifndef command_lengths
const u8 command_lengths[256] =
#define SET_Ex(r, v)
#endif
-u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command)
+u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
+ s32 *cpu_cycles_out, u32 *last_command)
{
vertex_struct vertexes[4] __attribute__((aligned(16))) = {};
- u32 current_command = 0, command_length;
+ u32 current_command = 0, command_length, cpu_cycles = 0;
u32 *list_start = list;
u32 *list_end = list + (size / 4);
for(; list < list_end; list += 1 + command_length)
{
- s16 *list_s16 = (void *)list;
- current_command = *list >> 24;
- command_length = command_lengths[current_command];
- if (list + 1 + command_length > list_end) {
- current_command = (u32)-1;
- break;
- }
-
- switch(current_command)
- {
- case 0x00:
- break;
-
- case 0x02:
+ s16 *list_s16 = (void *)list;
+ current_command = *list >> 24;
+ command_length = command_lengths[current_command];
+ if (list + 1 + command_length > list_end) {
+ current_command = (u32)-1;
+ break;
+ }
+
+ switch(current_command)
+ {
+ case 0x00:
+ break;
+
+ case 0x02:
{
u32 x = list_s16[2] & 0x3FF;
u32 y = list_s16[3] & 0x1FF;
u32 color = list[0] & 0xFFFFFF;
do_fill(psx_gpu, x, y, width, height, color);
- break;
+ cpu_cycles += gput_fill(width, height);
+ break;
}
-
- case 0x20 ... 0x23:
+
+ case 0x20 ... 0x23:
{
set_triangle_color(psx_gpu, list[0] & 0xFFFFFF);
get_vertex_data_xy(2, 6);
render_triangle(psx_gpu, vertexes, current_command);
- break;
+ cpu_cycles += gput_poly_base();
+ break;
}
- case 0x24 ... 0x27:
+ case 0x24 ... 0x27:
{
set_clut(psx_gpu, list_s16[5]);
set_texture(psx_gpu, list_s16[9]);
get_vertex_data_xy_uv(2, 10);
render_triangle(psx_gpu, vertexes, current_command);
- break;
+ cpu_cycles += gput_poly_base_t();
+ break;
}
- case 0x28 ... 0x2B:
+ case 0x28 ... 0x2B:
{
set_triangle_color(psx_gpu, list[0] & 0xFFFFFF);
render_triangle(psx_gpu, vertexes, current_command);
render_triangle(psx_gpu, &(vertexes[1]), current_command);
- break;
+ cpu_cycles += gput_quad_base();
+ break;
}
- case 0x2C ... 0x2F:
+ case 0x2C ... 0x2F:
{
set_clut(psx_gpu, list_s16[5]);
set_texture(psx_gpu, list_s16[9]);
render_triangle(psx_gpu, vertexes, current_command);
render_triangle(psx_gpu, &(vertexes[1]), current_command);
- break;
+ cpu_cycles += gput_quad_base_t();
+ break;
}
- case 0x30 ... 0x33:
+ case 0x30 ... 0x33:
{
get_vertex_data_xy_rgb(0, 0);
get_vertex_data_xy_rgb(1, 4);
get_vertex_data_xy_rgb(2, 8);
render_triangle(psx_gpu, vertexes, current_command);
- break;
+ cpu_cycles += gput_poly_base_g();
+ break;
}
- case 0x34:
- case 0x35:
- case 0x36:
- case 0x37:
+ case 0x34 ... 0x37:
{
set_clut(psx_gpu, list_s16[5]);
set_texture(psx_gpu, list_s16[11]);
get_vertex_data_xy_uv_rgb(2, 12);
render_triangle(psx_gpu, vertexes, current_command);
- break;
+ cpu_cycles += gput_poly_base_gt();
+ break;
}
- case 0x38:
- case 0x39:
- case 0x3A:
- case 0x3B:
+ case 0x38 ... 0x3B:
{
get_vertex_data_xy_rgb(0, 0);
get_vertex_data_xy_rgb(1, 4);
render_triangle(psx_gpu, vertexes, current_command);
render_triangle(psx_gpu, &(vertexes[1]), current_command);
- break;
+ cpu_cycles += gput_quad_base_g();
+ break;
}
- case 0x3C:
- case 0x3D:
- case 0x3E:
- case 0x3F:
+ case 0x3C ... 0x3F:
{
set_clut(psx_gpu, list_s16[5]);
set_texture(psx_gpu, list_s16[11]);
render_triangle(psx_gpu, vertexes, current_command);
render_triangle(psx_gpu, &(vertexes[1]), current_command);
- break;
+ cpu_cycles += gput_quad_base_gt();
+ break;
}
- case 0x40 ... 0x47:
+ case 0x40 ... 0x47:
{
vertexes[0].x = list_s16[2] + psx_gpu->offset_x;
vertexes[0].y = list_s16[3] + psx_gpu->offset_y;
vertexes[1].y = list_s16[5] + psx_gpu->offset_y;
render_line(psx_gpu, vertexes, current_command, list[0], 0);
- break;
+ cpu_cycles += gput_line(0);
+ break;
}
- case 0x48 ... 0x4F:
+ case 0x48 ... 0x4F:
{
u32 num_vertexes = 1;
u32 *list_position = &(list[2]);
vertexes[1].y = (xy >> 16) + psx_gpu->offset_y;
render_line(psx_gpu, vertexes, current_command, list[0], 0);
+ cpu_cycles += gput_line(0);
list_position++;
num_vertexes++;
break;
}
- case 0x50 ... 0x57:
+ case 0x50 ... 0x57:
{
vertexes[0].r = list[0] & 0xFF;
vertexes[0].g = (list[0] >> 8) & 0xFF;
vertexes[1].y = list_s16[7] + psx_gpu->offset_y;
render_line(psx_gpu, vertexes, current_command, 0, 0);
- break;
+ cpu_cycles += gput_line(0);
+ break;
}
case 0x58 ... 0x5F:
vertexes[1].y = (xy >> 16) + psx_gpu->offset_y;
render_line(psx_gpu, vertexes, current_command, 0, 0);
+ cpu_cycles += gput_line(0);
list_position += 2;
num_vertexes++;
break;
}
- case 0x60 ... 0x63:
+ case 0x60 ... 0x63:
{
u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
u32 height = list_s16[5] & 0x1FF;
render_sprite(psx_gpu, x, y, 0, 0, width, height, current_command, list[0]);
- break;
+ cpu_cycles += gput_sprite(width, height);
+ break;
}
- case 0x64 ... 0x67:
+ case 0x64 ... 0x67:
{
u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, width, height,
current_command, list[0]);
- break;
+ cpu_cycles += gput_sprite(width, height);
+ break;
}
- case 0x68:
- case 0x69:
- case 0x6A:
- case 0x6B:
+ case 0x68 ... 0x6B:
{
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
render_sprite(psx_gpu, x, y, 0, 0, 1, 1, current_command, list[0]);
- break;
+ cpu_cycles += gput_sprite(1, 1);
+ break;
}
- case 0x70:
- case 0x71:
- case 0x72:
- case 0x73:
+ case 0x70 ... 0x73:
{
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
render_sprite(psx_gpu, x, y, 0, 0, 8, 8, current_command, list[0]);
- break;
+ cpu_cycles += gput_sprite(8, 8);
+ break;
}
- case 0x74:
- case 0x75:
- case 0x76:
- case 0x77:
+ case 0x74 ... 0x77:
{
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, 8, 8,
current_command, list[0]);
- break;
+ cpu_cycles += gput_sprite(8, 8);
+ break;
}
- case 0x78:
- case 0x79:
- case 0x7A:
- case 0x7B:
+ case 0x78 ... 0x7B:
{
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
render_sprite(psx_gpu, x, y, 0, 0, 16, 16, current_command, list[0]);
- break;
+ cpu_cycles += gput_sprite(16, 16);
+ break;
}
- case 0x7C:
- case 0x7D:
- case 0x7E:
- case 0x7F:
+ case 0x7C ... 0x7F:
{
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, 16, 16,
current_command, list[0]);
- break;
+ cpu_cycles += gput_sprite(16, 16);
+ break;
}
#ifdef PCSX
render_block_copy(psx_gpu, (u16 *)&(list_s16[6]), load_x, load_y,
load_width, load_height, load_width);
- break;
+ break;
}
case 0xC0 ... 0xDF: // vid -> sys
break;
#endif
- case 0xE1:
+ case 0xE1:
set_texture(psx_gpu, list[0]);
if(list[0] & (1 << 9))
psx_gpu->render_state_base &= ~RENDER_STATE_DITHER;
psx_gpu->display_area_draw_enable = (list[0] >> 10) & 0x1;
- SET_Ex(1, list[0]);
- break;
+ SET_Ex(1, list[0]);
+ break;
- case 0xE2:
+ case 0xE2:
{
// TODO: Clean
u32 texture_window_settings = list[0];
psx_gpu->offset_x = offset_x >> 21;
psx_gpu->offset_y = offset_y >> 21;
- SET_Ex(5, list[0]);
- break;
- }
+ SET_Ex(5, list[0]);
+ break;
+ }
- case 0xE6:
+ case 0xE6:
{
u32 mask_settings = list[0];
u16 mask_msb = mask_settings << 15;
psx_gpu->mask_msb = mask_msb;
}
- SET_Ex(6, list[0]);
- break;
+ SET_Ex(6, list[0]);
+ break;
}
- default:
- break;
- }
+ default:
+ break;
+ }
}
breakloop:
- if (last_command != NULL)
- *last_command = current_command;
+ *cpu_cycles_out += cpu_cycles;
+ *last_command = current_command;
return list - list_start;
}
#endif
u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
- u32 *last_command)
+ s32 *cpu_cycles_out, u32 *last_command)
{
vertex_struct vertexes[4] __attribute__((aligned(16))) = {};
- u32 current_command = 0, command_length;
+ u32 current_command = 0, command_length, cpu_cycles = 0;
u32 *list_start = list;
u32 *list_end = list + (size / 4);
x &= ~0xF;
width = ((width + 0xF) & ~0xF);
+ cpu_cycles += gput_fill(width, height);
if (width == 0 || height == 0)
break;
get_vertex_data_xy(2, 6);
do_triangle_enhanced(psx_gpu, vertexes, current_command);
+ cpu_cycles += gput_poly_base();
break;
}
get_vertex_data_xy_uv(2, 10);
do_triangle_enhanced(psx_gpu, vertexes, current_command);
+ cpu_cycles += gput_poly_base_t();
break;
}
get_vertex_data_xy(3, 8);
do_quad_enhanced(psx_gpu, vertexes, current_command);
+ cpu_cycles += gput_quad_base();
break;
}
uv_hack(vertexes, 4);
do_quad_enhanced(psx_gpu, vertexes, current_command);
+ cpu_cycles += gput_quad_base_t();
break;
}
get_vertex_data_xy_rgb(2, 8);
do_triangle_enhanced(psx_gpu, vertexes, current_command);
+ cpu_cycles += gput_poly_base_g();
break;
}
- case 0x34:
- case 0x35:
- case 0x36:
- case 0x37:
+ case 0x34 ... 0x37:
{
set_clut(psx_gpu, list_s16[5]);
set_texture(psx_gpu, list_s16[11]);
get_vertex_data_xy_uv_rgb(2, 12);
do_triangle_enhanced(psx_gpu, vertexes, current_command);
+ cpu_cycles += gput_poly_base_gt();
break;
}
- case 0x38:
- case 0x39:
- case 0x3A:
- case 0x3B:
+ case 0x38 ... 0x3B:
{
get_vertex_data_xy_rgb(0, 0);
get_vertex_data_xy_rgb(1, 4);
get_vertex_data_xy_rgb(3, 12);
do_quad_enhanced(psx_gpu, vertexes, current_command);
+ cpu_cycles += gput_quad_base_g();
break;
}
- case 0x3C:
- case 0x3D:
- case 0x3E:
- case 0x3F:
+ case 0x3C ... 0x3F:
{
set_clut(psx_gpu, list_s16[5]);
set_texture(psx_gpu, list_s16[11]);
uv_hack(vertexes, 4);
do_quad_enhanced(psx_gpu, vertexes, current_command);
+ cpu_cycles += gput_quad_base_gt();
break;
}
render_line(psx_gpu, vertexes, current_command, list[0], 0);
if (enhancement_enable(psx_gpu))
render_line(psx_gpu, vertexes, current_command, list[0], 1);
+ cpu_cycles += gput_line(0);
break;
}
render_line(psx_gpu, vertexes, current_command, list[0], 0);
if (enhancement_enable(psx_gpu))
render_line(psx_gpu, vertexes, current_command, list[0], 1);
+ cpu_cycles += gput_line(0);
list_position++;
num_vertexes++;
render_line(psx_gpu, vertexes, current_command, 0, 0);
if (enhancement_enable(psx_gpu))
render_line(psx_gpu, vertexes, current_command, 0, 1);
+ cpu_cycles += gput_line(0);
break;
}
render_line(psx_gpu, vertexes, current_command, 0, 0);
if (enhancement_enable(psx_gpu))
render_line(psx_gpu, vertexes, current_command, 0, 1);
+ cpu_cycles += gput_line(0);
list_position += 2;
num_vertexes++;
if (check_enhanced_range(psx_gpu, x, x + width))
do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]);
+ cpu_cycles += gput_sprite(width, height);
break;
}
if (check_enhanced_range(psx_gpu, x, x + width))
do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]);
+ cpu_cycles += gput_sprite(width, height);
break;
}
- case 0x68:
- case 0x69:
- case 0x6A:
- case 0x6B:
+ case 0x68 ... 0x6B:
{
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
if (check_enhanced_range(psx_gpu, x, x + 1))
do_sprite_enhanced(psx_gpu, x, y, 0, 0, 1, 1, list[0]);
+ cpu_cycles += gput_sprite(1, 1);
break;
}
- case 0x70:
- case 0x71:
- case 0x72:
- case 0x73:
+ case 0x70 ... 0x73:
{
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
if (check_enhanced_range(psx_gpu, x, x + 8))
do_sprite_enhanced(psx_gpu, x, y, 0, 0, 8, 8, list[0]);
+ cpu_cycles += gput_sprite(8, 8);
break;
}
- case 0x74:
- case 0x75:
- case 0x76:
- case 0x77:
+ case 0x74 ... 0x77:
{
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
if (check_enhanced_range(psx_gpu, x, x + 8))
do_sprite_enhanced(psx_gpu, x, y, u, v, 8, 8, list[0]);
+ cpu_cycles += gput_sprite(8, 8);
break;
}
- case 0x78:
- case 0x79:
- case 0x7A:
- case 0x7B:
+ case 0x78 ... 0x7B:
{
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
if (check_enhanced_range(psx_gpu, x, x + 16))
do_sprite_enhanced(psx_gpu, x, y, 0, 0, 16, 16, list[0]);
+ cpu_cycles += gput_sprite(16, 16);
break;
}
- case 0x7C:
- case 0x7D:
- case 0x7E:
- case 0x7F:
+ case 0x7C ... 0x7F:
{
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
if (check_enhanced_range(psx_gpu, x, x + 16))
do_sprite_enhanced(psx_gpu, x, y, u, v, 16, 16, list[0]);
+ cpu_cycles += gput_sprite(16, 16);
break;
}
enhancement_disable();
breakloop:
- if (last_command != NULL)
- *last_command = current_command;
+ *cpu_cycles_out += cpu_cycles;
+ *last_command = current_command;
return list - list_start;
}
static psx_gpu_struct egpu __attribute__((aligned(256)));
-int do_cmd_list(uint32_t *list, int count, int *last_cmd)
+int do_cmd_list(uint32_t *list, int count, int *cycles, int *last_cmd)
{
int ret;
#endif
if (gpu.state.enhancement_active)
- ret = gpu_parse_enhanced(&egpu, list, count * 4, (u32 *)last_cmd);
+ ret = gpu_parse_enhanced(&egpu, list, count * 4, cycles, (u32 *)last_cmd);
else
- ret = gpu_parse(&egpu, list, count * 4, (u32 *)last_cmd);
+ ret = gpu_parse(&egpu, list, count * 4, cycles, (u32 *)last_cmd);
#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
__asm__ __volatile__("":::"q4","q5","q6","q7");
void renderer_sync_ecmds(uint32_t *ecmds)
{
- gpu_parse(&egpu, ecmds + 1, 6 * 4, NULL);
+ s32 dummy0 = 0;
+ u32 dummy1 = 0;
+ gpu_parse(&egpu, ecmds + 1, 6 * 4, &dummy0, &dummy1);
}
void renderer_update_caches(int x, int y, int w, int h, int state_changed)
}
#endif
+#include "../gpulib/gpu_timing.h"
extern const unsigned char cmd_lengths[256];
-int do_cmd_list(u32 *_list, int list_len, int *last_cmd)
+int do_cmd_list(u32 *_list, int list_len, int *cpu_cycles_out, int *last_cmd)
{
u32 cmd = 0, len, i;
le32_t *list = (le32_t *)_list;
le32_t *list_start = list;
le32_t *list_end = list + list_len;
+ u32 cpu_cycles = 0;
//TODO: set ilace_mask when resolution changes instead of every time,
// eliminate #ifdef below.
{
case 0x02:
gpuClearImage(packet);
+ cpu_cycles += gput_fill(le16_to_s16(packet.U2[4]) & 0x3ff,
+ le16_to_s16(packet.U2[5]) & 0x1ff);
break;
case 0x20:
gpu_unai.Masking | Blending | gpu_unai.PixelMSB
];
gpuDrawPolyF(packet, driver, false);
+ cpu_cycles += gput_poly_base();
} break;
case 0x24:
PP driver = gpuPolySpanDrivers[driver_idx];
gpuDrawPolyFT(packet, driver, false);
+ cpu_cycles += gput_poly_base_t();
} break;
case 0x28:
gpu_unai.Masking | Blending | gpu_unai.PixelMSB
];
gpuDrawPolyF(packet, driver, true); // is_quad = true
+ cpu_cycles += gput_quad_base();
} break;
case 0x2C:
PP driver = gpuPolySpanDrivers[driver_idx];
gpuDrawPolyFT(packet, driver, true); // is_quad = true
+ cpu_cycles += gput_quad_base_t();
} break;
case 0x30:
gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB
];
gpuDrawPolyG(packet, driver, false);
+ cpu_cycles += gput_poly_base_g();
} break;
case 0x34:
gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB
];
gpuDrawPolyGT(packet, driver, false);
+ cpu_cycles += gput_poly_base_gt();
} break;
case 0x38:
gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB
];
gpuDrawPolyG(packet, driver, true); // is_quad = true
+ cpu_cycles += gput_quad_base_g();
} break;
case 0x3C:
gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB
];
gpuDrawPolyGT(packet, driver, true); // is_quad = true
+ cpu_cycles += gput_quad_base_gt();
} break;
case 0x40:
u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1;
PSD driver = gpuPixelSpanDrivers[driver_idx];
gpuDrawLineF(packet, driver);
+ cpu_cycles += gput_line(0);
} break;
case 0x48 ... 0x4F: { // Monochrome line strip
gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[2];
gpu_unai.PacketBuffer.U4[2] = *list_position++;
gpuDrawLineF(packet, driver);
+ cpu_cycles += gput_line(0);
num_vertexes++;
if(list_position >= list_end) {
driver_idx |= (1 << 5);
PSD driver = gpuPixelSpanDrivers[driver_idx];
gpuDrawLineG(packet, driver);
+ cpu_cycles += gput_line(0);
} break;
case 0x58 ... 0x5F: { // Gouraud-shaded line strip
gpu_unai.PacketBuffer.U4[2] = *list_position++;
gpu_unai.PacketBuffer.U4[3] = *list_position++;
gpuDrawLineG(packet, driver);
+ cpu_cycles += gput_line(0);
num_vertexes++;
if(list_position >= list_end) {
case 0x63: { // Monochrome rectangle (variable size)
PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
gpuDrawT(packet, driver);
+ cpu_cycles += gput_sprite(le16_to_u16(packet.U2[4]) & 0x3ff,
+ le16_to_u16(packet.U2[5]) & 0x1ff);
} break;
case 0x64:
driver_idx |= Lighting;
PS driver = gpuSpriteSpanDrivers[driver_idx];
gpuDrawS(packet, driver);
+ cpu_cycles += gput_sprite(le16_to_u16(packet.U2[6]) & 0x3ff,
+ le16_to_u16(packet.U2[7]) & 0x1ff);
} break;
case 0x68:
gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00010001);
PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
gpuDrawT(packet, driver);
+ cpu_cycles += gput_sprite(1, 1);
} break;
case 0x70:
gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00080008);
PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
gpuDrawT(packet, driver);
+ cpu_cycles += gput_sprite(8, 8);
} break;
case 0x74:
driver_idx |= Lighting;
PS driver = gpuSpriteSpanDrivers[driver_idx];
gpuDrawS(packet, driver);
+ cpu_cycles += gput_sprite(8, 8);
} break;
case 0x78:
gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00100010);
PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
gpuDrawT(packet, driver);
+ cpu_cycles += gput_sprite(16, 16);
} break;
case 0x7C:
{
gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
gpuDrawS16(packet);
+ cpu_cycles += gput_sprite(16, 16);
break;
}
// fallthrough
driver_idx |= Lighting;
PS driver = gpuSpriteSpanDrivers[driver_idx];
gpuDrawS(packet, driver);
+ cpu_cycles += gput_sprite(16, 16);
} break;
#ifdef TEST
gpu.ex_regs[1] &= ~0x1ff;
gpu.ex_regs[1] |= gpu_unai.GPU_GP1 & 0x1ff;
+ *cpu_cycles_out += cpu_cycles;
*last_cmd = cmd;
return list - list_start;
}
void renderer_sync_ecmds(u32 *ecmds)
{
int dummy;
- do_cmd_list(&ecmds[1], 6, &dummy);
+ do_cmd_list(&ecmds[1], 6, &dummy, &dummy);
}
void renderer_update_caches(int x, int y, int w, int h, int state_changed)
extern const unsigned char cmd_lengths[256];
-int do_cmd_list(unsigned int *list, int list_len, int *last_cmd)
+int do_cmd_list(unsigned int *list, int list_len, int *cycles, int *last_cmd)
{
unsigned int cmd = 0, len, i;
unsigned int *list_start = list;
void renderer_sync_ecmds(uint32_t *ecmds)
{
int dummy;
- do_cmd_list(&ecmds[1], 6, &dummy);
+ do_cmd_list(&ecmds[1], 6, &dummy, &dummy);
}
void renderer_update_caches(int x, int y, int w, int h, int state_changed)
#include <stdlib.h>
#include <string.h>
#include "gpu.h"
+#include "gpu_timing.h"
#include "../../libpcsxcore/gpu.h" // meh
#include "../../frontend/plugin_lib.h"
struct psx_gpu gpu;
-static noinline int do_cmd_buffer(uint32_t *data, int count);
+static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles);
static void finish_vram_transfer(int is_read);
static noinline void do_cmd_reset(void)
{
+ int dummy = 0;
if (unlikely(gpu.cmd_len > 0))
- do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
+ do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy);
gpu.cmd_len = 0;
if (unlikely(gpu.dma.h > 0))
gpu.frameskip.active = 0;
if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
- int dummy;
- do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
+ int dummy = 0;
+ do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy);
gpu.frameskip.pending_fill[0] = 0;
}
}
gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
}
-static void do_vram_copy(const uint32_t *params)
+static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
{
const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
uint16_t lbuf[128];
uint32_t x, y;
+ *cpu_cycles += gput_copy(w, h);
if (sx == dx && sy == dy && msb == 0)
return;
static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
{
- int cmd = 0, pos = 0, len, dummy, v;
+ int cmd = 0, pos = 0, len, dummy = 0, v;
int skip = 1;
gpu.frameskip.pending_fill[0] = 0;
case 0x02:
if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
// clearing something large, don't skip
- do_cmd_list(list, 3, &dummy);
+ do_cmd_list(list, 3, &dummy, &dummy);
else
memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
break;
return pos;
}
-static noinline int do_cmd_buffer(uint32_t *data, int count)
+static noinline int do_cmd_buffer(uint32_t *data, int count, int *cpu_cycles)
{
int cmd, pos;
uint32_t old_e3 = gpu.ex_regs[3];
cmd = -1; // incomplete cmd, can't consume yet
break;
}
- do_vram_copy(data + pos + 1);
+ do_vram_copy(data + pos + 1, cpu_cycles);
vram_dirty = 1;
pos += 4;
continue;
if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
else {
- pos += do_cmd_list(data + pos, count - pos, &cmd);
+ pos += do_cmd_list(data + pos, count - pos, cpu_cycles, &cmd);
vram_dirty = 1;
}
static noinline void flush_cmd_buffer(void)
{
- int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
+ int dummy = 0, left;
+ left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy);
if (left > 0)
memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
if (left != gpu.cmd_len) {
void GPUwriteDataMem(uint32_t *mem, int count)
{
- int left;
+ int dummy = 0, left;
log_io("gpu_dma_write %p %d\n", mem, count);
if (unlikely(gpu.cmd_len > 0))
flush_cmd_buffer();
- left = do_cmd_buffer(mem, count);
+ left = do_cmd_buffer(mem, count, &dummy);
if (left)
log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
}
{
uint32_t addr, *list, ld_addr = 0;
int len, left, count;
- long cpu_cycles = 0;
+ int cpu_cycles = 0;
preload(rambase + (start_addr & 0x1fffff) / 4);
}
if (len) {
- left = do_cmd_buffer(list + 1, len);
+ left = do_cmd_buffer(list + 1, len, &cpu_cycles);
if (left) {
memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
gpu.cmd_len = left;
extern const unsigned char cmd_lengths[256];
-int do_cmd_list(uint32_t *list, int count, int *last_cmd);
+int do_cmd_list(uint32_t *list, int count, int *cycles, int *last_cmd);
struct rearmed_cbs;
--- /dev/null
+
+// very conservative and wrong
+#define gput_fill(w, h) (23 + (4 + (w) / 16u) * (h))
+#define gput_copy(w, h) ((w) * (h))
+#define gput_poly_base() (23)
+#define gput_poly_base_t() (gput_poly_base() + 90)
+#define gput_poly_base_g() (gput_poly_base() + 144)
+#define gput_poly_base_gt() (gput_poly_base() + 225)
+#define gput_quad_base() gput_poly_base()
+#define gput_quad_base_t() gput_poly_base_t()
+#define gput_quad_base_g() gput_poly_base_g()
+#define gput_quad_base_gt() gput_poly_base_gt()
+#define gput_line(k) (8 + (k))
+#define gput_sprite(w, h) (8 + ((w) / 2u) * (h))
+
pcnt_init();
renderer_init();
- memcpy(gpu.vram, state.vram, sizeof(gpu.vram));
+ memcpy(gpu.vram, state.vram, 1024*512*2);
if ((state.gpu_register[8] & 0x24) == 0x24)
renderer_set_interlace(1, !(state.status >> 31));
start_cycles = pcnt_get();
- do_cmd_list(list, size / 4, &dummy);
+ do_cmd_list(list, size / 4, &dummy, &dummy);
renderer_flush_queues();
printf("%u\n", pcnt_get() - start_cycles);