[pcsx_rearmed.git] / plugins / gpu_neon / gpu.c

/*
 * (C) Gražvydas "notaz" Ignotas, 2011
 *
 * This work is licensed under the terms of any of these licenses
 * (at your option):
 *  - GNU GPL, version 2 or later.
 *  - GNU LGPL, version 2.1 or later.
 * See the COPYING file in the top-level directory.
 */

#include <stdio.h>
#include <string.h>
#include "gpu.h"

#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#define unlikely(x) __builtin_expect((x), 0)
#define noinline __attribute__((noinline))

#define gpu_log(fmt, ...) \
  printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)

//#define log_io gpu_log
#define log_io(...)
//#define log_anomaly gpu_log
#define log_anomaly(...)

struct psx_gpu gpu __attribute__((aligned(2048)));

static noinline void do_reset(void)
{
  memset(gpu.regs, 0, sizeof(gpu.regs));
  memset(gpu.ex_regs, 0, sizeof(gpu.ex_regs));
  gpu.status.reg = 0x14802000;
  gpu.gp0 = 0;
  gpu.regs[3] = 1;
  gpu.screen.hres = gpu.screen.w = 256;
  gpu.screen.vres = gpu.screen.h = 240;
}

static noinline void update_width(void)
{
  int sw = gpu.screen.x2 - gpu.screen.x1;
  if (sw <= 0 || sw >= 2560)
    // full width
    gpu.screen.w = gpu.screen.hres;
  else
    gpu.screen.w = sw * gpu.screen.hres / 2560;
}

static noinline void update_height(void)
{
  int sh = gpu.screen.y2 - gpu.screen.y1;
  if (gpu.status.dheight)
    sh *= 2;
  if (sh <= 0)
    sh = gpu.screen.vres;

  gpu.screen.h = sh;
}

static noinline void decide_frameskip(void)
{
  if (gpu.frameskip.active)
    gpu.frameskip.cnt++;
  else {
    gpu.frameskip.cnt = 0;
    gpu.frameskip.frame_ready = 1;
  }

  if (!gpu.frameskip.active && *gpu.frameskip.advice)
    gpu.frameskip.active = 1;
  else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
    gpu.frameskip.active = 1;
  else
    gpu.frameskip.active = 0;
}

static noinline void decide_frameskip_allow(uint32_t cmd_e3)
{
  // no frameskip if it decides to draw to display area,
  // but not for interlace since it'll most likely always do that
  uint32_t x = cmd_e3 & 0x3ff;
  uint32_t y = (cmd_e3 >> 10) & 0x3ff;
  gpu.frameskip.allow = gpu.status.interlace ||
    (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
    (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
}

static noinline void get_gpu_info(uint32_t data)
{
  switch (data & 0x0f) {
    case 0x02:
    case 0x03:
    case 0x04:
    case 0x05:
      gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
      break;
    case 0x06:
      gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
      break;
    case 0x07:
      gpu.gp0 = 2;
      break;
    default:
      gpu.gp0 = 0;
      break;
  }
}

long GPUinit(void)
{
  int ret;
  ret  = vout_init();
  ret |= renderer_init();

  gpu.state.frame_count = &gpu.zero;
  gpu.state.hcnt = &gpu.zero;
  do_reset();
  return ret;
}

long GPUshutdown(void)
{
  return vout_finish();
}

void GPUwriteStatus(uint32_t data)
{
  static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
  static const short vres[4] = { 240, 480, 256, 480 };
  uint32_t cmd = data >> 24;

  if (cmd < ARRAY_SIZE(gpu.regs)) {
    if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
      return;
    gpu.regs[cmd] = data;
  }

  gpu.state.fb_dirty = 1;

  switch (cmd) {
    case 0x00:
      do_reset();
      break;
    case 0x03:
      gpu.status.blanking = data & 1;
      break;
    case 0x04:
      gpu.status.dma = data & 3;
      break;
    case 0x05:
      gpu.screen.x = data & 0x3ff;
      gpu.screen.y = (data >> 10) & 0x3ff;
      if (gpu.frameskip.set) {
        decide_frameskip_allow(gpu.ex_regs[3]);
        if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
          decide_frameskip();
          gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
        }
      }
      break;
    case 0x06:
      gpu.screen.x1 = data & 0xfff;
      gpu.screen.x2 = (data >> 12) & 0xfff;
      update_width();
      break;
    case 0x07:
      gpu.screen.y1 = data & 0x3ff;
      gpu.screen.y2 = (data >> 10) & 0x3ff;
      update_height();
      break;
    case 0x08:
      gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
      gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
      gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
      update_width();
      update_height();
      break;
    default:
      if ((cmd & 0xf0) == 0x10)
        get_gpu_info(data);
      break;
  }
}

const unsigned char cmd_lengths[256] =
{
	0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
	5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
	2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
	3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
	2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
	1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
	3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};

#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]

static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
{
  uint16_t *vram = VRAM_MEM_XY(x, y);
  if (is_read)
    memcpy(mem, vram, l * 2);
  else
    memcpy(vram, mem, l * 2);
}

static int do_vram_io(uint32_t *data, int count, int is_read)
{
  int count_initial = count;
  uint16_t *sdata = (uint16_t *)data;
  int x = gpu.dma.x, y = gpu.dma.y;
  int w = gpu.dma.w, h = gpu.dma.h;
  int o = gpu.dma.offset;
  int l;
  count *= 2; // operate in 16bpp pixels

  if (gpu.dma.offset) {
    l = w - gpu.dma.offset;
    if (count < l)
      l = count;

    do_vram_line(x + o, y, sdata, l, is_read);

    if (o + l < w)
      o += l;
    else {
      o = 0;
      y++;
      h--;
    }
    sdata += l;
    count -= l;
  }

  for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
    y &= 511;
    do_vram_line(x, y, sdata, w, is_read);
  }

  if (h > 0 && count > 0) {
    y &= 511;
    do_vram_line(x, y, sdata, count, is_read);
    o = count;
    count = 0;
  }
  gpu.dma.y = y;
  gpu.dma.h = h;
  gpu.dma.offset = o;

  return count_initial - count / 2;
}

static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
{
  if (gpu.dma.h)
    log_anomaly("start_vram_transfer while old unfinished\n");

  gpu.dma.x = pos_word & 0x3ff;
  gpu.dma.y = (pos_word >> 16) & 0x1ff;
  gpu.dma.w = size_word & 0x3ff;
  gpu.dma.h = (size_word >> 16) & 0x1ff;
  gpu.dma.offset = 0;

  renderer_flush_queues();
  if (is_read) {
    gpu.status.img = 1;
    // XXX: wrong for width 1
    memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
    gpu.state.last_vram_read_frame = *gpu.state.frame_count;
  }
  else {
    renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
  }

  log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
    gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
}

static int check_cmd(uint32_t *data, int count)
{
  int len, cmd, start, pos;
  int vram_dirty = 0;

  // process buffer
  for (start = pos = 0; pos < count; )
  {
    cmd = -1;
    len = 0;

    if (gpu.dma.h) {
      pos += do_vram_io(data + pos, count - pos, 0);
      if (pos == count)
        break;
      start = pos;
    }

    // do look-ahead pass to detect SR changes and VRAM i/o
    while (pos < count) {
      uint32_t *list = data + pos;
      cmd = list[0] >> 24;
      len = 1 + cmd_lengths[cmd];

      //printf("  %3d: %02x %d\n", pos, cmd, len);
      if ((cmd & 0xf4) == 0x24) {
        // flat textured prim
        gpu.ex_regs[1] &= ~0x1ff;
        gpu.ex_regs[1] |= list[4] & 0x1ff;
      }
      else if ((cmd & 0xf4) == 0x34) {
        // shaded textured prim
        gpu.ex_regs[1] &= ~0x1ff;
        gpu.ex_regs[1] |= list[5] & 0x1ff;
      }
      else if (cmd == 0xe3)
        decide_frameskip_allow(list[0]);

      if (2 <= cmd && cmd < 0xc0)
        vram_dirty = 1;
      else if ((cmd & 0xf8) == 0xe0)
        gpu.ex_regs[cmd & 7] = list[0];

      if (pos + len > count) {
        cmd = -1;
        break; // incomplete cmd
      }
      if (cmd == 0xa0 || cmd == 0xc0)
        break; // image i/o
      pos += len;
    }

    if (pos - start > 0) {
      if (!gpu.frameskip.active || !gpu.frameskip.allow)
        do_cmd_list(data + start, pos - start);
      start = pos;
    }

    if (cmd == 0xa0 || cmd == 0xc0) {
      // consume vram write/read cmd
      start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
      pos += len;
    }
    else if (cmd == -1)
      break;
  }

  gpu.status.reg &= ~0x1fff;
  gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
  gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;

  if (gpu.frameskip.active)
    renderer_sync_ecmds(gpu.ex_regs);
  gpu.state.fb_dirty |= vram_dirty;

  return count - pos;
}

static void flush_cmd_buffer(void)
{
  int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
  if (left > 0)
    memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
  gpu.cmd_len = left;
}

void GPUwriteDataMem(uint32_t *mem, int count)
{
  int left;

  log_io("gpu_dma_write %p %d\n", mem, count);

  if (unlikely(gpu.cmd_len > 0))
    flush_cmd_buffer();

  left = check_cmd(mem, count);
  if (left)
    log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
}

void GPUwriteData(uint32_t data)
{
  log_io("gpu_write %08x\n", data);
  gpu.cmd_buffer[gpu.cmd_len++] = data;
  if (gpu.cmd_len >= CMD_BUFFER_LEN)
    flush_cmd_buffer();
}

long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
{
  uint32_t addr, *list;
  uint32_t *llist_entry = NULL;
  int len, left, count;
  long cpu_cycles = 0;

  if (unlikely(gpu.cmd_len > 0))
    flush_cmd_buffer();

  // ff7 sends it's main list twice, detect this
  if (*gpu.state.frame_count == gpu.state.last_list.frame &&
      *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
       gpu.state.last_list.cycles > 2048)
  {
    llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
    *llist_entry |= 0x800000;
  }

  log_io("gpu_dma_chain\n");
  addr = start_addr & 0xffffff;
  for (count = 0; addr != 0xffffff; count++)
  {
    list = rambase + (addr & 0x1fffff) / 4;
    len = list[0] >> 24;
    addr = list[0] & 0xffffff;
    cpu_cycles += 10;
    if (len > 0)
      cpu_cycles += 5 + len;

    log_io(".chain %08x #%d\n", (list - rambase) * 4, len);

    // loop detection marker
    // (bit23 set causes DMA error on real machine, so
    //  unlikely to be ever set by the game)
    list[0] |= 0x800000;

    if (len) {
      left = check_cmd(list + 1, len);
      if (left)
        log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
    }

    if (addr & 0x800000)
      break;
  }

  // remove loop detection markers
  addr = start_addr & 0x1fffff;
  while (count-- > 0) {
    list = rambase + addr / 4;
    addr = list[0] & 0x1fffff;
    list[0] &= ~0x800000;
  }
  if (llist_entry)
    *llist_entry &= ~0x800000;

  gpu.state.last_list.frame = *gpu.state.frame_count;
  gpu.state.last_list.hcnt = *gpu.state.hcnt;
  gpu.state.last_list.cycles = cpu_cycles;
  gpu.state.last_list.addr = start_addr;

  return cpu_cycles;
}

void GPUreadDataMem(uint32_t *mem, int count)
{
  log_io("gpu_dma_read  %p %d\n", mem, count);

  if (unlikely(gpu.cmd_len > 0))
    flush_cmd_buffer();

  if (gpu.dma.h)
    do_vram_io(mem, count, 1);
}

uint32_t GPUreadData(void)
{
  uint32_t ret;

  if (unlikely(gpu.cmd_len > 0))
    flush_cmd_buffer();

  ret = gpu.gp0;
  if (gpu.dma.h)
    do_vram_io(&ret, 1, 1);

  log_io("gpu_read %08x\n", ret);
  return ret;
}

uint32_t GPUreadStatus(void)
{
  uint32_t ret;

  if (unlikely(gpu.cmd_len > 0))
    flush_cmd_buffer();

  ret = gpu.status.reg;
  log_io("gpu_read_status %08x\n", ret);
  return ret;
}

struct GPUFreeze
{
  uint32_t ulFreezeVersion;      // should be always 1 for now (set by main emu)
  uint32_t ulStatus;             // current gpu status
  uint32_t ulControl[256];       // latest control register values
  unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
};

long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
{
  int i;

  switch (type) {
    case 1: // save
      if (gpu.cmd_len > 0)
        flush_cmd_buffer();
      memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
      memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
      memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
      freeze->ulStatus = gpu.status.reg;
      break;
    case 0: // load
      renderer_invalidate_caches(0, 0, 1024, 512);
      memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
      memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
      memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
      gpu.status.reg = freeze->ulStatus;
      for (i = 8; i > 0; i--) {
        gpu.regs[i] ^= 1; // avoid reg change detection
        GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
      }
      renderer_sync_ecmds(gpu.ex_regs);
      break;
  }

  return 1;
}

void GPUupdateLace(void)
{
  if (gpu.cmd_len > 0)
    flush_cmd_buffer();
  renderer_flush_queues();

  if (gpu.status.blanking || !gpu.state.fb_dirty)
    return;

  if (gpu.frameskip.set) {
    if (!gpu.frameskip.frame_ready) {
      if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
        return;
      gpu.frameskip.active = 0;
    }
    gpu.frameskip.frame_ready = 0;
  }

  vout_update();
  gpu.state.fb_dirty = 0;
}

void GPUvBlank(int is_vblank, int lcf)
{
  int interlace = gpu.state.allow_interlace
    && gpu.status.interlace && gpu.status.dheight;
  // interlace doesn't look nice on progressive displays,
  // so we have this "auto" mode here for games that don't read vram
  if (gpu.state.allow_interlace == 2
      && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
  {
    interlace = 0;
  }
  if (interlace || interlace != gpu.state.old_interlace) {
    gpu.state.old_interlace = interlace;

    if (gpu.cmd_len > 0)
      flush_cmd_buffer();
    renderer_flush_queues();
    renderer_set_interlace(interlace, !lcf);
  }
}

#include "../../frontend/plugin_lib.h"

void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
{
  gpu.frameskip.set = cbs->frameskip;
  gpu.frameskip.advice = &cbs->fskip_advice;
  gpu.frameskip.active = 0;
  gpu.frameskip.frame_ready = 1;
  gpu.state.hcnt = cbs->gpu_hcnt;
  gpu.state.frame_count = cbs->gpu_frame_count;
  gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;

  if (cbs->pl_vout_set_raw_vram)
    cbs->pl_vout_set_raw_vram(gpu.vram);
  renderer_set_config(cbs);
  vout_set_config(cbs);
}

// vim:shiftwidth=2:expandtab
Commit	Line	Data
	1	/*
	2	* (C) Gražvydas "notaz" Ignotas, 2011
	3	*
	4	* This work is licensed under the terms of any of these licenses
	5	* (at your option):
	6	* - GNU GPL, version 2 or later.
	7	* - GNU LGPL, version 2.1 or later.
	8	* See the COPYING file in the top-level directory.
	9	*/
	10
	11	#include <stdio.h>
	12	#include <string.h>
	13	#include "gpu.h"
	14
	15	#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
	16	#define unlikely(x) __builtin_expect((x), 0)
	17	#define noinline __attribute__((noinline))
	18
	19	#define gpu_log(fmt, ...) \
	20	printf("%d:%03d: " fmt, gpu.state.frame_count, gpu.state.hcnt, ##__VA_ARGS__)
	21
	22	//#define log_io gpu_log
	23	#define log_io(...)
	24	//#define log_anomaly gpu_log
	25	#define log_anomaly(...)
	26
	27	struct psx_gpu gpu __attribute__((aligned(2048)));
	28
	29	static noinline void do_reset(void)
	30	{
	31	memset(gpu.regs, 0, sizeof(gpu.regs));
	32	memset(gpu.ex_regs, 0, sizeof(gpu.ex_regs));
	33	gpu.status.reg = 0x14802000;
	34	gpu.gp0 = 0;
	35	gpu.regs[3] = 1;
	36	gpu.screen.hres = gpu.screen.w = 256;
	37	gpu.screen.vres = gpu.screen.h = 240;
	38	}
	39
	40	static noinline void update_width(void)
	41	{
	42	int sw = gpu.screen.x2 - gpu.screen.x1;
	43	if (sw <= 0 \|\| sw >= 2560)
	44	// full width
	45	gpu.screen.w = gpu.screen.hres;
	46	else
	47	gpu.screen.w = sw * gpu.screen.hres / 2560;
	48	}
	49
	50	static noinline void update_height(void)
	51	{
	52	int sh = gpu.screen.y2 - gpu.screen.y1;
	53	if (gpu.status.dheight)
	54	sh *= 2;
	55	if (sh <= 0)
	56	sh = gpu.screen.vres;
	57
	58	gpu.screen.h = sh;
	59	}
	60
	61	static noinline void decide_frameskip(void)
	62	{
	63	if (gpu.frameskip.active)
	64	gpu.frameskip.cnt++;
	65	else {
	66	gpu.frameskip.cnt = 0;
	67	gpu.frameskip.frame_ready = 1;
	68	}
	69
	70	if (!gpu.frameskip.active && *gpu.frameskip.advice)
	71	gpu.frameskip.active = 1;
	72	else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
	73	gpu.frameskip.active = 1;
	74	else
	75	gpu.frameskip.active = 0;
	76	}
	77
	78	static noinline void decide_frameskip_allow(uint32_t cmd_e3)
	79	{
	80	// no frameskip if it decides to draw to display area,
	81	// but not for interlace since it'll most likely always do that
	82	uint32_t x = cmd_e3 & 0x3ff;
	83	uint32_t y = (cmd_e3 >> 10) & 0x3ff;
	84	gpu.frameskip.allow = gpu.status.interlace \|\|
	85	(uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w \|\|
	86	(uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
	87	}
	88
	89	static noinline void get_gpu_info(uint32_t data)
	90	{
	91	switch (data & 0x0f) {
	92	case 0x02:
	93	case 0x03:
	94	case 0x04:
	95	case 0x05:
	96	gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
	97	break;
	98	case 0x06:
	99	gpu.gp0 = gpu.ex_regs[5] & 0xfffff;
	100	break;
	101	case 0x07:
	102	gpu.gp0 = 2;
	103	break;
	104	default:
	105	gpu.gp0 = 0;
	106	break;
	107	}
	108	}
	109
	110	long GPUinit(void)
	111	{
	112	int ret;
	113	ret = vout_init();
	114	ret \|= renderer_init();
	115
	116	gpu.state.frame_count = &gpu.zero;
	117	gpu.state.hcnt = &gpu.zero;
	118	do_reset();
	119	return ret;
	120	}
	121
	122	long GPUshutdown(void)
	123	{
	124	return vout_finish();
	125	}
	126
	127	void GPUwriteStatus(uint32_t data)
	128	{
	129	static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
	130	static const short vres[4] = { 240, 480, 256, 480 };
	131	uint32_t cmd = data >> 24;
	132
	133	if (cmd < ARRAY_SIZE(gpu.regs)) {
	134	if (cmd != 0 && cmd != 5 && gpu.regs[cmd] == data)
	135	return;
	136	gpu.regs[cmd] = data;
	137	}
	138
	139	gpu.state.fb_dirty = 1;
	140
	141	switch (cmd) {
	142	case 0x00:
	143	do_reset();
	144	break;
	145	case 0x03:
	146	gpu.status.blanking = data & 1;
	147	break;
	148	case 0x04:
	149	gpu.status.dma = data & 3;
	150	break;
	151	case 0x05:
	152	gpu.screen.x = data & 0x3ff;
	153	gpu.screen.y = (data >> 10) & 0x3ff;
	154	if (gpu.frameskip.set) {
	155	decide_frameskip_allow(gpu.ex_regs[3]);
	156	if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
	157	decide_frameskip();
	158	gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
	159	}
	160	}
	161	break;
	162	case 0x06:
	163	gpu.screen.x1 = data & 0xfff;
	164	gpu.screen.x2 = (data >> 12) & 0xfff;
	165	update_width();
	166	break;
	167	case 0x07:
	168	gpu.screen.y1 = data & 0x3ff;
	169	gpu.screen.y2 = (data >> 10) & 0x3ff;
	170	update_height();
	171	break;
	172	case 0x08:
	173	gpu.status.reg = (gpu.status.reg & ~0x7f0000) \| ((data & 0x3F) << 17) \| ((data & 0x40) << 10);
	174	gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
	175	gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
	176	update_width();
	177	update_height();
	178	break;
	179	default:
	180	if ((cmd & 0xf0) == 0x10)
	181	get_gpu_info(data);
	182	break;
	183	}
	184	}
	185
	186	const unsigned char cmd_lengths[256] =
	187	{
	188	0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	189	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	190	3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
	191	5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
	192	2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
	193	3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
	194	2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
	195	1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
	196	3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
	197	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	198	2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
	199	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	200	2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
	201	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	202	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
	203	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
	204	};
	205
	206	#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
	207
	208	static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
	209	{
	210	uint16_t *vram = VRAM_MEM_XY(x, y);
	211	if (is_read)
	212	memcpy(mem, vram, l * 2);
	213	else
	214	memcpy(vram, mem, l * 2);
	215	}
	216
	217	static int do_vram_io(uint32_t *data, int count, int is_read)
	218	{
	219	int count_initial = count;
	220	uint16_t sdata = (uint16_t )data;
	221	int x = gpu.dma.x, y = gpu.dma.y;
	222	int w = gpu.dma.w, h = gpu.dma.h;
	223	int o = gpu.dma.offset;
	224	int l;
	225	count *= 2; // operate in 16bpp pixels
	226
	227	if (gpu.dma.offset) {
	228	l = w - gpu.dma.offset;
	229	if (count < l)
	230	l = count;
	231
	232	do_vram_line(x + o, y, sdata, l, is_read);
	233
	234	if (o + l < w)
	235	o += l;
	236	else {
	237	o = 0;
	238	y++;
	239	h--;
	240	}
	241	sdata += l;
	242	count -= l;
	243	}
	244
	245	for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
	246	y &= 511;
	247	do_vram_line(x, y, sdata, w, is_read);
	248	}
	249
	250	if (h > 0 && count > 0) {
	251	y &= 511;
	252	do_vram_line(x, y, sdata, count, is_read);
	253	o = count;
	254	count = 0;
	255	}
	256	gpu.dma.y = y;
	257	gpu.dma.h = h;
	258	gpu.dma.offset = o;
	259
	260	return count_initial - count / 2;
	261	}
	262
	263	static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
	264	{
	265	if (gpu.dma.h)
	266	log_anomaly("start_vram_transfer while old unfinished\n");
	267
	268	gpu.dma.x = pos_word & 0x3ff;
	269	gpu.dma.y = (pos_word >> 16) & 0x1ff;
	270	gpu.dma.w = size_word & 0x3ff;
	271	gpu.dma.h = (size_word >> 16) & 0x1ff;
	272	gpu.dma.offset = 0;
	273
	274	renderer_flush_queues();
	275	if (is_read) {
	276	gpu.status.img = 1;
	277	// XXX: wrong for width 1
	278	memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
	279	gpu.state.last_vram_read_frame = *gpu.state.frame_count;
	280	}
	281	else {
	282	renderer_invalidate_caches(gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
	283	}
	284
	285	log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
	286	gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
	287	}
	288
	289	static int check_cmd(uint32_t *data, int count)
	290	{
	291	int len, cmd, start, pos;
	292	int vram_dirty = 0;
	293
	294	// process buffer
	295	for (start = pos = 0; pos < count; )
	296	{
	297	cmd = -1;
	298	len = 0;
	299
	300	if (gpu.dma.h) {
	301	pos += do_vram_io(data + pos, count - pos, 0);
	302	if (pos == count)
	303	break;
	304	start = pos;
	305	}
	306
	307	// do look-ahead pass to detect SR changes and VRAM i/o
	308	while (pos < count) {
	309	uint32_t *list = data + pos;
	310	cmd = list[0] >> 24;
	311	len = 1 + cmd_lengths[cmd];
	312
	313	//printf(" %3d: %02x %d\n", pos, cmd, len);
	314	if ((cmd & 0xf4) == 0x24) {
	315	// flat textured prim
	316	gpu.ex_regs[1] &= ~0x1ff;
	317	gpu.ex_regs[1] \|= list[4] & 0x1ff;
	318	}
	319	else if ((cmd & 0xf4) == 0x34) {
	320	// shaded textured prim
	321	gpu.ex_regs[1] &= ~0x1ff;
	322	gpu.ex_regs[1] \|= list[5] & 0x1ff;
	323	}
	324	else if (cmd == 0xe3)
	325	decide_frameskip_allow(list[0]);
	326
	327	if (2 <= cmd && cmd < 0xc0)
	328	vram_dirty = 1;
	329	else if ((cmd & 0xf8) == 0xe0)
	330	gpu.ex_regs[cmd & 7] = list[0];
	331
	332	if (pos + len > count) {
	333	cmd = -1;
	334	break; // incomplete cmd
	335	}
	336	if (cmd == 0xa0 \|\| cmd == 0xc0)
	337	break; // image i/o
	338	pos += len;
	339	}
	340
	341	if (pos - start > 0) {
	342	if (!gpu.frameskip.active \|\| !gpu.frameskip.allow)
	343	do_cmd_list(data + start, pos - start);
	344	start = pos;
	345	}
	346
	347	if (cmd == 0xa0 \|\| cmd == 0xc0) {
	348	// consume vram write/read cmd
	349	start_vram_transfer(data[pos + 1], data[pos + 2], cmd == 0xc0);
	350	pos += len;
	351	}
	352	else if (cmd == -1)
	353	break;
	354	}
	355
	356	gpu.status.reg &= ~0x1fff;
	357	gpu.status.reg \|= gpu.ex_regs[1] & 0x7ff;
	358	gpu.status.reg \|= (gpu.ex_regs[6] & 3) << 11;
	359
	360	if (gpu.frameskip.active)
	361	renderer_sync_ecmds(gpu.ex_regs);
	362	gpu.state.fb_dirty \|= vram_dirty;
	363
	364	return count - pos;
	365	}
	366
	367	static void flush_cmd_buffer(void)
	368	{
	369	int left = check_cmd(gpu.cmd_buffer, gpu.cmd_len);
	370	if (left > 0)
	371	memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
	372	gpu.cmd_len = left;
	373	}
	374
	375	void GPUwriteDataMem(uint32_t *mem, int count)
	376	{
	377	int left;
	378
	379	log_io("gpu_dma_write %p %d\n", mem, count);
	380
	381	if (unlikely(gpu.cmd_len > 0))
	382	flush_cmd_buffer();
	383
	384	left = check_cmd(mem, count);
	385	if (left)
	386	log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
	387	}
	388
	389	void GPUwriteData(uint32_t data)
	390	{
	391	log_io("gpu_write %08x\n", data);
	392	gpu.cmd_buffer[gpu.cmd_len++] = data;
	393	if (gpu.cmd_len >= CMD_BUFFER_LEN)
	394	flush_cmd_buffer();
	395	}
	396
	397	long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
	398	{
	399	uint32_t addr, *list;
	400	uint32_t *llist_entry = NULL;
	401	int len, left, count;
	402	long cpu_cycles = 0;
	403
	404	if (unlikely(gpu.cmd_len > 0))
	405	flush_cmd_buffer();
	406
	407	// ff7 sends it's main list twice, detect this
	408	if (*gpu.state.frame_count == gpu.state.last_list.frame &&
	409	*gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
	410	gpu.state.last_list.cycles > 2048)
	411	{
	412	llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
	413	*llist_entry \|= 0x800000;
	414	}
	415
	416	log_io("gpu_dma_chain\n");
	417	addr = start_addr & 0xffffff;
	418	for (count = 0; addr != 0xffffff; count++)
	419	{
	420	list = rambase + (addr & 0x1fffff) / 4;
	421	len = list[0] >> 24;
	422	addr = list[0] & 0xffffff;
	423	cpu_cycles += 10;
	424	if (len > 0)
	425	cpu_cycles += 5 + len;
	426
	427	log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
	428
	429	// loop detection marker
	430	// (bit23 set causes DMA error on real machine, so
	431	// unlikely to be ever set by the game)
	432	list[0] \|= 0x800000;
	433
	434	if (len) {
	435	left = check_cmd(list + 1, len);
	436	if (left)
	437	log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
	438	}
	439
	440	if (addr & 0x800000)
	441	break;
	442	}
	443
	444	// remove loop detection markers
	445	addr = start_addr & 0x1fffff;
	446	while (count-- > 0) {
	447	list = rambase + addr / 4;
	448	addr = list[0] & 0x1fffff;
	449	list[0] &= ~0x800000;
	450	}
	451	if (llist_entry)
	452	*llist_entry &= ~0x800000;
	453
	454	gpu.state.last_list.frame = *gpu.state.frame_count;
	455	gpu.state.last_list.hcnt = *gpu.state.hcnt;
	456	gpu.state.last_list.cycles = cpu_cycles;
	457	gpu.state.last_list.addr = start_addr;
	458
	459	return cpu_cycles;
	460	}
	461
	462	void GPUreadDataMem(uint32_t *mem, int count)
	463	{
	464	log_io("gpu_dma_read %p %d\n", mem, count);
	465
	466	if (unlikely(gpu.cmd_len > 0))
	467	flush_cmd_buffer();
	468
	469	if (gpu.dma.h)
	470	do_vram_io(mem, count, 1);
	471	}
	472
	473	uint32_t GPUreadData(void)
	474	{
	475	uint32_t ret;
	476
	477	if (unlikely(gpu.cmd_len > 0))
	478	flush_cmd_buffer();
	479
	480	ret = gpu.gp0;
	481	if (gpu.dma.h)
	482	do_vram_io(&ret, 1, 1);
	483
	484	log_io("gpu_read %08x\n", ret);
	485	return ret;
	486	}
	487
	488	uint32_t GPUreadStatus(void)
	489	{
	490	uint32_t ret;
	491
	492	if (unlikely(gpu.cmd_len > 0))
	493	flush_cmd_buffer();
	494
	495	ret = gpu.status.reg;
	496	log_io("gpu_read_status %08x\n", ret);
	497	return ret;
	498	}
	499
	500	struct GPUFreeze
	501	{
	502	uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
	503	uint32_t ulStatus; // current gpu status
	504	uint32_t ulControl[256]; // latest control register values
	505	unsigned char psxVRam[102410242]; // current VRam image (full 2 MB for ZN)
	506	};
	507
	508	long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
	509	{
	510	int i;
	511
	512	switch (type) {
	513	case 1: // save
	514	if (gpu.cmd_len > 0)
	515	flush_cmd_buffer();
	516	memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
	517	memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
	518	memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
	519	freeze->ulStatus = gpu.status.reg;
	520	break;
	521	case 0: // load
	522	renderer_invalidate_caches(0, 0, 1024, 512);
	523	memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
	524	memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
	525	memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
	526	gpu.status.reg = freeze->ulStatus;
	527	for (i = 8; i > 0; i--) {
	528	gpu.regs[i] ^= 1; // avoid reg change detection
	529	GPUwriteStatus((i << 24) \| (gpu.regs[i] ^ 1));
	530	}
	531	renderer_sync_ecmds(gpu.ex_regs);
	532	break;
	533	}
	534
	535	return 1;
	536	}
	537
	538	void GPUupdateLace(void)
	539	{
	540	if (gpu.cmd_len > 0)
	541	flush_cmd_buffer();
	542	renderer_flush_queues();
	543
	544	if (gpu.status.blanking \|\| !gpu.state.fb_dirty)
	545	return;
	546
	547	if (gpu.frameskip.set) {
	548	if (!gpu.frameskip.frame_ready) {
	549	if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
	550	return;
	551	gpu.frameskip.active = 0;
	552	}
	553	gpu.frameskip.frame_ready = 0;
	554	}
	555
	556	vout_update();
	557	gpu.state.fb_dirty = 0;
	558	}
	559
	560	void GPUvBlank(int is_vblank, int lcf)
	561	{
	562	int interlace = gpu.state.allow_interlace
	563	&& gpu.status.interlace && gpu.status.dheight;
	564	// interlace doesn't look nice on progressive displays,
	565	// so we have this "auto" mode here for games that don't read vram
	566	if (gpu.state.allow_interlace == 2
	567	&& *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
	568	{
	569	interlace = 0;
	570	}
	571	if (interlace \|\| interlace != gpu.state.old_interlace) {
	572	gpu.state.old_interlace = interlace;
	573
	574	if (gpu.cmd_len > 0)
	575	flush_cmd_buffer();
	576	renderer_flush_queues();
	577	renderer_set_interlace(interlace, !lcf);
	578	}
	579	}
	580
	581	#include "../../frontend/plugin_lib.h"
	582
	583	void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
	584	{
	585	gpu.frameskip.set = cbs->frameskip;
	586	gpu.frameskip.advice = &cbs->fskip_advice;
	587	gpu.frameskip.active = 0;
	588	gpu.frameskip.frame_ready = 1;
	589	gpu.state.hcnt = cbs->gpu_hcnt;
	590	gpu.state.frame_count = cbs->gpu_frame_count;
	591	gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
	592
	593	if (cbs->pl_vout_set_raw_vram)
	594	cbs->pl_vout_set_raw_vram(gpu.vram);
	595	renderer_set_config(cbs);
	596	vout_set_config(cbs);
	597	}
	598
	599	// vim:shiftwidth=2:expandtab