extern const unsigned char cmd_lengths[256];
-void do_cmd_list(unsigned int *list, int list_len)
+int do_cmd_list(unsigned int *list, int list_len, int *last_cmd)
{
- unsigned int cmd, len;
-
+ unsigned int cmd = 0, len;
+ unsigned int *list_start = list;
unsigned int *list_end = list + list_len;
for (; list < list_end; list += 1 + len)
{
cmd = *list >> 24;
len = cmd_lengths[cmd];
+ if (list + 1 + len > list_end) {
+ cmd = -1;
+ break;
+ }
+
+#ifndef TEST
+ if (cmd == 0xa0 || cmd == 0xc0)
+ break; // image i/o, forward to upper layer
+ else if ((cmd & 0xf8) == 0xe0)
+ gpu.ex_regs[cmd & 7] = list[0];
+#endif
primTableJ[cmd]((void *)list);
{
case 0x48 ... 0x4F:
{
- u32 num_vertexes = 1;
- u32 *list_position = &(list[2]);
+ u32 num_vertexes = 2;
+ u32 *list_position = &(list[3]);
while(1)
{
num_vertexes++;
}
- if(num_vertexes > 2)
- len += (num_vertexes - 2);
-
+ len += (num_vertexes - 2);
break;
}
case 0x58 ... 0x5F:
{
- u32 num_vertexes = 1;
- u32 *list_position = &(list[2]);
+ u32 num_vertexes = 2;
+ u32 *list_position = &(list[4]);
while(1)
{
num_vertexes++;
}
- if(num_vertexes > 2)
- len += (num_vertexes - 2) * 2;
-
+ len += (num_vertexes - 2) * 2;
break;
}
#endif
}
}
+
+ gpu.ex_regs[1] &= ~0x1ff;
+ gpu.ex_regs[1] |= lGPUstatusRet & 0x1ff;
+
+ *last_cmd = cmd;
+ return list - list_start;
}
void renderer_sync_ecmds(uint32_t *ecmds)
extern const unsigned char cmd_lengths[256];
-void do_cmd_list(unsigned int *list, int list_len)
+// XXX: mostly dupe code from soft peops
+int do_cmd_list(unsigned int *list, int list_len, int *last_cmd)
{
unsigned int cmd, len;
-
+ unsigned int *list_start = list;
unsigned int *list_end = list + list_len;
for (; list < list_end; list += 1 + len)
{
cmd = *list >> 24;
len = cmd_lengths[cmd];
+ if (list + 1 + len > list_end) {
+ cmd = -1;
+ break;
+ }
+
+#ifndef TEST
+ if (cmd == 0xa0 || cmd == 0xc0)
+ break; // image i/o, forward to upper layer
+ else if ((cmd & 0xf8) == 0xe0)
+ gpu.ex_regs[cmd & 7] = list[0];
+#endif
primTableJ[cmd]((void *)list);
{
case 0x48 ... 0x4F:
{
- uint32_t num_vertexes = 1;
- uint32_t *list_position = &(list[2]);
+ uint32_t num_vertexes = 2;
+ uint32_t *list_position = &(list[3]);
while(1)
{
num_vertexes++;
}
- if(num_vertexes > 2)
- len += (num_vertexes - 2);
-
+ len += (num_vertexes - 2);
break;
}
case 0x58 ... 0x5F:
{
- uint32_t num_vertexes = 1;
- uint32_t *list_position = &(list[2]);
+ uint32_t num_vertexes = 2;
+ uint32_t *list_position = &(list[4]);
while(1)
{
num_vertexes++;
}
- if(num_vertexes > 2)
- len += (num_vertexes - 2) * 2;
-
+ len += (num_vertexes - 2) * 2;
break;
}
#endif
}
}
+
+ gpu.ex_regs[1] &= ~0x1ff;
+ gpu.ex_regs[1] |= lGPUstatusRet & 0x1ff;
+
+ *last_cmd = cmd;
+ return list - list_start;
}
void renderer_sync_ecmds(uint32_t *ecmds)
void flush_render_block_buffer(psx_gpu_struct *psx_gpu);
void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram);
-void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size);
+u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command);
void triangle_benchmark(psx_gpu_struct *psx_gpu);
init_counter();
#endif
- gpu_parse(psx_gpu, list, size);
+ gpu_parse(psx_gpu, list, size, NULL);
flush_render_block_buffer(psx_gpu);
clear_stats();
u32 cycles = get_counter();
#endif
- gpu_parse(psx_gpu, list, size);
+ gpu_parse(psx_gpu, list, size, NULL);
flush_render_block_buffer(psx_gpu);
#ifdef NEON_BUILD
get_vertex_data_xy(vertex_number, offset16); \
set_vertex_color_constant(vertex_number, color) \
+#ifndef SET_Ex
+#define SET_Ex(r, v)
+#endif
+
vertex_struct vertexes[4] __attribute__((aligned(32)));
-void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
+u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command)
{
- u32 current_command, command_length;
-
+ u32 current_command = 0, command_length;
+
+ u32 *list_start = list;
u32 *list_end = list + (size / 4);
for(; list < list_end; list += 1 + command_length)
s16 *list_s16 = (void *)list;
current_command = *list >> 24;
command_length = command_lengths[current_command];
+ if (list + 1 + command_length > list_end) {
+ current_command = (u32)-1;
+ break;
+ }
switch(current_command)
{
render_block_move(psx_gpu, list_s16[2] & 0x3FF, list_s16[3] & 0x1FF,
list_s16[4] & 0x3FF, list_s16[5] & 0x1FF, list_s16[6], list_s16[7]);
break;
-
+
+#ifdef PCSX
+ case 0xA0: // sys -> vid
+ case 0xC0: // vid -> sys
+ goto breakloop;
+#else
case 0xA0: // sys -> vid
{
u32 load_x = list_s16[2] & 0x3FF;
load_width, load_height, load_width);
break;
}
-
+
case 0xC0: // vid -> sys
break;
-
+#endif
+
case 0xE1:
set_texture(psx_gpu, list[0] & 0x1FF);
psx_gpu->render_state_base &= ~RENDER_STATE_DITHER;
psx_gpu->display_area_draw_enable = (list[0] >> 10) & 0x1;
+ SET_Ex(1, list[0]);
break;
case 0xE2:
update_texture_ptr(psx_gpu);
}
+ SET_Ex(2, list[0]);
break;
}
psx_gpu->viewport_start_y, psx_gpu->viewport_end_x,
psx_gpu->viewport_end_y);
#endif
+ SET_Ex(3, list[0]);
break;
case 0xE4:
psx_gpu->viewport_start_y, psx_gpu->viewport_end_x,
psx_gpu->viewport_end_y);
#endif
+ SET_Ex(4, list[0]);
break;
case 0xE5:
psx_gpu->offset_x = offset_x >> 21;
psx_gpu->offset_y = offset_y >> 21;
+ SET_Ex(5, list[0]);
break;
}
psx_gpu->mask_msb = mask_msb;
}
+ SET_Ex(6, list[0]);
break;
}
break;
}
}
+
+#ifdef PCSX
+breakloop:
+#endif
+ if (last_command != NULL)
+ *last_command = current_command;
+ return list - list_start;
}
extern const unsigned char cmd_lengths[256];
#define command_lengths cmd_lengths
+static unsigned int *ex_regs;
+
+#define PCSX
+#define SET_Ex(r, v) \
+ ex_regs[r] = v
+
#include "psx_gpu/psx_gpu.c"
#include "psx_gpu/psx_gpu_parse.c"
#include "../gpulib/gpu.h"
static psx_gpu_struct egpu __attribute__((aligned(256)));
-void do_cmd_list(uint32_t *list, int count)
+int do_cmd_list(uint32_t *list, int count, int *last_cmd)
{
- gpu_parse(&egpu, list, count * 4);
+ int ret = gpu_parse(&egpu, list, count * 4, (u32 *)last_cmd);
+
+ ex_regs[1] &= ~0x1ff;
+ ex_regs[1] |= egpu.texture_settings & 0x1ff;
+ return ret;
}
int renderer_init(void)
{
initialize_psx_gpu(&egpu, gpu.vram);
+ ex_regs = gpu.ex_regs;
return 0;
}
void renderer_sync_ecmds(uint32_t *ecmds)
{
- gpu_parse(&egpu, ecmds + 1, 6 * 4);
+ gpu_parse(&egpu, ecmds + 1, 6 * 4, NULL);
}
void renderer_update_caches(int x, int y, int w, int h)
extern const unsigned char cmd_lengths[256];
-void do_cmd_list(unsigned int *list, int list_len)
+int do_cmd_list(unsigned int *list, int list_len, int *last_cmd)
{
- unsigned int cmd, len;
+ unsigned int cmd = 0, len;
+ unsigned int *list_start = list;
unsigned int *list_end = list + list_len;
linesInterlace = force_interlace;
{
cmd = *list >> 24;
len = cmd_lengths[cmd];
+ if (list + 1 + len > list_end) {
+ cmd = -1;
+ break;
+ }
+
+#ifndef TEST
+ if (cmd == 0xa0 || cmd == 0xc0)
+ break; // image i/o, forward to upper layer
+ else if ((cmd & 0xf8) == 0xe0)
+ gpu.ex_regs[cmd & 7] = list[0];
+#endif
switch(cmd)
{
while(1)
{
- if((*list_position & 0xf000f000) == 0x50005000 || list_position >= list_end)
- break;
-
PacketBuffer.U4[1] = PacketBuffer.U4[2];
PacketBuffer.U4[2] = *list_position++;
gpuDrawLF(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]);
num_vertexes++;
+ if((*list_position & 0xf000f000) == 0x50005000 || list_position >= list_end)
+ break;
}
- if(num_vertexes > 2)
- len += (num_vertexes - 2);
-
+ len += (num_vertexes - 2);
break;
}
while(1)
{
- if((*list_position & 0xf000f000) == 0x50005000 || list_position >= list_end)
- break;
-
PacketBuffer.U4[0] = PacketBuffer.U4[2];
PacketBuffer.U4[1] = PacketBuffer.U4[3];
PacketBuffer.U4[2] = *list_position++;
gpuDrawLG(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]);
num_vertexes++;
+ if((*list_position & 0xf000f000) == 0x50005000 || list_position >= list_end)
+ break;
}
- if(num_vertexes > 2)
- len += (num_vertexes - 2) * 2;
-
+ len += (num_vertexes - 2) * 2;
break;
}
break;
}
}
+
+ gpu.ex_regs[1] &= ~0x1ff;
+ gpu.ex_regs[1] |= GPU_GP1 & 0x1ff;
+
+ *last_cmd = cmd;
+ return list - list_start;
}
void renderer_sync_ecmds(uint32_t *ecmds)
gpu.frameskip.active = 0;
}
-static noinline void decide_frameskip_allow(uint32_t cmd_e3)
+static noinline int decide_frameskip_allow(uint32_t cmd_e3)
{
// no frameskip if it decides to draw to display area,
// but not for interlace since it'll most likely always do that
gpu.frameskip.allow = gpu.status.interlace ||
(uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
(uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
+ return gpu.frameskip.allow;
}
static noinline void get_gpu_info(uint32_t data)
gpu.dma_start.w, gpu.dma_start.h);
}
+static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
+{
+ int cmd = 0, pos = 0, len, dummy;
+ int skip = 1;
+
+ while (pos < count && skip) {
+ uint32_t *list = data + pos;
+ cmd = list[0] >> 24;
+ len = 1 + cmd_lengths[cmd];
+
+ if (cmd == 0x02) {
+ if ((list[2] & 0x3ff) > gpu.screen.w || ((list[2] >> 16) & 0x1ff) > gpu.screen.h)
+ // clearing something large, don't skip
+ do_cmd_list(data + pos, 3, &dummy);
+ }
+ else if ((cmd & 0xf4) == 0x24) {
+ // flat textured prim
+ gpu.ex_regs[1] &= ~0x1ff;
+ gpu.ex_regs[1] |= list[4] & 0x1ff;
+ }
+ else if ((cmd & 0xf4) == 0x34) {
+ // shaded textured prim
+ gpu.ex_regs[1] &= ~0x1ff;
+ gpu.ex_regs[1] |= list[5] & 0x1ff;
+ }
+ else if (cmd == 0xe3)
+ skip = decide_frameskip_allow(list[0]);
+
+ if ((cmd & 0xf8) == 0xe0)
+ gpu.ex_regs[cmd & 7] = list[0];
+
+ if (pos + len > count) {
+ cmd = -1;
+ break; // incomplete cmd
+ }
+ if (cmd == 0xa0 || cmd == 0xc0)
+ break; // image i/o
+ pos += len;
+ }
+
+ renderer_sync_ecmds(gpu.ex_regs);
+ *last_cmd = cmd;
+ return pos;
+}
+
static noinline int do_cmd_buffer(uint32_t *data, int count)
{
- int len, cmd, start, pos;
+ int cmd, pos;
+ uint32_t old_e3 = gpu.ex_regs[3];
int vram_dirty = 0;
// process buffer
- for (start = pos = 0; pos < count; )
+ for (pos = 0; pos < count; )
{
- cmd = -1;
- len = 0;
-
- if (gpu.dma.h) {
+ if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
+ vram_dirty = 1;
pos += do_vram_io(data + pos, count - pos, 0);
if (pos == count)
break;
- start = pos;
- }
-
- // do look-ahead pass to detect SR changes and VRAM i/o
- while (pos < count) {
- uint32_t *list = data + pos;
- cmd = list[0] >> 24;
- len = 1 + cmd_lengths[cmd];
-
- //printf(" %3d: %02x %d\n", pos, cmd, len);
- if ((cmd & 0xf4) == 0x24) {
- // flat textured prim
- gpu.ex_regs[1] &= ~0x1ff;
- gpu.ex_regs[1] |= list[4] & 0x1ff;
- }
- else if ((cmd & 0xf4) == 0x34) {
- // shaded textured prim
- gpu.ex_regs[1] &= ~0x1ff;
- gpu.ex_regs[1] |= list[5] & 0x1ff;
- }
- else if (cmd == 0xe3)
- decide_frameskip_allow(list[0]);
-
- if (2 <= cmd && cmd < 0xc0)
- vram_dirty = 1;
- else if ((cmd & 0xf8) == 0xe0)
- gpu.ex_regs[cmd & 7] = list[0];
-
- if (pos + len > count) {
- cmd = -1;
- break; // incomplete cmd
- }
- if (cmd == 0xa0 || cmd == 0xc0)
- break; // image i/o
- pos += len;
- }
-
- if (pos - start > 0) {
- if (!gpu.frameskip.active || !gpu.frameskip.allow)
- do_cmd_list(data + start, pos - start);
- start = pos;
}
+ cmd = data[pos] >> 24;
if (cmd == 0xa0 || cmd == 0xc0) {
// consume vram write/read cmd
start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
- pos += len;
+ pos += 3;
+ continue;
}
- else if (cmd == -1)
+
+ if (gpu.frameskip.active && gpu.frameskip.allow)
+ pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
+ else {
+ pos += do_cmd_list(data + pos, count - pos, &cmd);
+ vram_dirty = 1;
+ }
+
+ if (cmd == -1)
+ // incomplete cmd
break;
}
gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;
- if (gpu.frameskip.active)
- renderer_sync_ecmds(gpu.ex_regs);
gpu.state.fb_dirty |= vram_dirty;
+ if (old_e3 != gpu.ex_regs[3])
+ decide_frameskip_allow(gpu.ex_regs[3]);
+
return count - pos;
}
extern const unsigned char cmd_lengths[256];
-void do_cmd_list(uint32_t *list, int count);
+int do_cmd_list(uint32_t *list, int count, int *last_cmd);
struct rearmed_cbs;