[pcsx_rearmed.git] / plugins / gpulib / gpu.c

/*
 * (C) Gražvydas "notaz" Ignotas, 2011-2012
 *
 * This work is licensed under the terms of any of these licenses
 * (at your option):
 *  - GNU GPL, version 2 or later.
 *  - GNU LGPL, version 2.1 or later.
 * See the COPYING file in the top-level directory.
 */

#include <stdio.h>
#include <string.h>
#include <stdlib.h> /* for calloc */

#include "gpu.h"

#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#ifdef __GNUC__
#define unlikely(x) __builtin_expect((x), 0)
#define preload __builtin_prefetch
#define noinline __attribute__((noinline))
#else
#define unlikely(x)
#define preload(...)
#define noinline
#endif

#define gpu_log(fmt, ...) \
  printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)

//#define log_io gpu_log
#define log_io(...)
//#define log_anomaly gpu_log
#define log_anomaly(...)

struct psx_gpu gpu;

static noinline int do_cmd_buffer(uint32_t *data, int count);
static void finish_vram_transfer(int is_read);

static noinline void do_cmd_reset(void)
{
  renderer_sync();

  if (unlikely(gpu.cmd_len > 0))
    do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
  gpu.cmd_len = 0;

  if (unlikely(gpu.dma.h > 0))
    finish_vram_transfer(gpu.dma_start.is_read);
  gpu.dma.h = 0;
}

static noinline void do_reset(void)
{
  unsigned int i;
  do_cmd_reset();

  memset(gpu.regs, 0, sizeof(gpu.regs));
  for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
    gpu.ex_regs[i] = (0xe0 + i) << 24;
  gpu.status.reg = 0x14802000;
  gpu.gp0 = 0;
  gpu.regs[3] = 1;
  gpu.screen.hres = gpu.screen.w = 256;
  gpu.screen.vres = gpu.screen.h = 240;
}

static noinline void update_width(void)
{
  int sw = gpu.screen.x2 - gpu.screen.x1;
  if (sw <= 0 || sw >= 2560)
    // full width
    gpu.screen.w = gpu.screen.hres;
  else
    gpu.screen.w = sw * gpu.screen.hres / 2560;
}

static noinline void update_height(void)
{
  // TODO: emulate this properly..
  int sh = gpu.screen.y2 - gpu.screen.y1;
  if (gpu.status.dheight)
    sh *= 2;
  if (sh <= 0 || sh > gpu.screen.vres)
    sh = gpu.screen.vres;

  gpu.screen.h = sh;
}

static noinline void decide_frameskip(void)
{
  if (gpu.frameskip.active)
    gpu.frameskip.cnt++;
  else {
    gpu.frameskip.cnt = 0;
    gpu.frameskip.frame_ready = 1;
  }

  if (!gpu.frameskip.active && *gpu.frameskip.advice)
    gpu.frameskip.active = 1;
  else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
    gpu.frameskip.active = 1;
  else
    gpu.frameskip.active = 0;

  if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
    int dummy;
    do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
    gpu.frameskip.pending_fill[0] = 0;
  }
}

static noinline int decide_frameskip_allow(uint32_t cmd_e3)
{
  // no frameskip if it decides to draw to display area,
  // but not for interlace since it'll most likely always do that
  uint32_t x = cmd_e3 & 0x3ff;
  uint32_t y = (cmd_e3 >> 10) & 0x3ff;
  gpu.frameskip.allow = gpu.status.interlace ||
    (uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w ||
    (uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
  return gpu.frameskip.allow;
}

static noinline void get_gpu_info(uint32_t data)
{
  switch (data & 0x0f) {
    case 0x02:
    case 0x03:
    case 0x04:
      gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
      break;
    case 0x05:
    case 0x06:
      gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
      break;
    case 0x07:
      gpu.gp0 = 2;
      break;
    default:
      gpu.gp0 = 0;
      break;
  }
}

// double, for overdraw guard
#define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)

//  Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
//  renderer/downscaler it uses in high res modes:
#ifdef GCW_ZERO
	// On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
	// fills. (Will change this value if it ever gets large page support)
	#define VRAM_ALIGN 8192
#else
	#define VRAM_ALIGN 16
#endif

// vram ptr received from mmap/malloc/alloc (will deallocate using this)
static uint16_t *vram_ptr_orig = NULL;

#ifdef GPULIB_USE_MMAP
static int map_vram(void)
{
  gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
  if (gpu.vram != NULL) {
	// 4kb guard in front
    gpu.vram += (4096 / 2);
	// Align
	gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
    return 0;
  }
  else {
    fprintf(stderr, "could not map vram, expect crashes\n");
    return -1;
  }
}
#else
static int map_vram(void)
{
  gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
  if (gpu.vram != NULL) {
	// 4kb guard in front
    gpu.vram += (4096 / 2);
	// Align
	gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
    return 0;
  } else {
    fprintf(stderr, "could not allocate vram, expect crashes\n");
    return -1;
  }
}

static int allocate_vram(void)
{
  gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
  if (gpu.vram != NULL) {
	// 4kb guard in front
    gpu.vram += (4096 / 2);
	// Align
	gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
    return 0;
  } else {
    fprintf(stderr, "could not allocate vram, expect crashes\n");
    return -1;
  }
}
#endif

long GPUinit(void)
{
#ifndef GPULIB_USE_MMAP
  if (gpu.vram == NULL) {
    if (allocate_vram() != 0) {
      printf("ERROR: could not allocate VRAM, exiting..\n");
	  exit(1);
	}
  }
#endif

  //extern uint32_t hSyncCount;         // in psxcounters.cpp
  //extern uint32_t frame_counter;      // in psxcounters.cpp
  //gpu.state.hcnt = &hSyncCount;
  //gpu.state.frame_count = &frame_counter;

  int ret;
  ret  = vout_init();
  ret |= renderer_init();

  gpu.state.frame_count = &gpu.zero;
  gpu.state.hcnt = &gpu.zero;
  gpu.frameskip.active = 0;
  gpu.cmd_len = 0;
  do_reset();

  /*if (gpu.mmap != NULL) {
    if (map_vram() != 0)
      ret = -1;
  }*/
  return ret;
}

long GPUshutdown(void)
{
  long ret;

  renderer_finish();
  ret = vout_finish();

  if (vram_ptr_orig != NULL) {
#ifdef GPULIB_USE_MMAP
    gpu.munmap(vram_ptr_orig, VRAM_SIZE);
#else
    free(vram_ptr_orig);
#endif
  }
  vram_ptr_orig = gpu.vram = NULL;

  return ret;
}

void GPUwriteStatus(uint32_t data)
{
	//senquack TODO: Would it be wise to add cmd buffer flush here, since
	// status settings can affect commands already in buffer?

  static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
  static const short vres[4] = { 240, 480, 256, 480 };
  uint32_t cmd = data >> 24;

  if (cmd < ARRAY_SIZE(gpu.regs)) {
    if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
      return;
    gpu.regs[cmd] = data;
  }

  gpu.state.fb_dirty = 1;

  switch (cmd) {
    case 0x00:
      do_reset();
      break;
    case 0x01:
      do_cmd_reset();
      break;
    case 0x03:
      gpu.status.blanking = data & 1;
      break;
    case 0x04:
      gpu.status.dma = data & 3;
      break;
    case 0x05:
      gpu.screen.x = data & 0x3ff;
      gpu.screen.y = (data >> 10) & 0x1ff;
      if (gpu.frameskip.set) {
        decide_frameskip_allow(gpu.ex_regs[3]);
        if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
          decide_frameskip();
          gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
        }
      }
      break;
    case 0x06:
      gpu.screen.x1 = data & 0xfff;
      gpu.screen.x2 = (data >> 12) & 0xfff;
      update_width();
      break;
    case 0x07:
      gpu.screen.y1 = data & 0x3ff;
      gpu.screen.y2 = (data >> 10) & 0x3ff;
      update_height();
      break;
    case 0x08:
      gpu.status.reg = (gpu.status.reg & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
      gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
      gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
      update_width();
      update_height();
      renderer_notify_res_change();
      break;
    default:
      if ((cmd & 0xf0) == 0x10)
        get_gpu_info(data);
      break;
  }

#ifdef GPUwriteStatus_ext
  GPUwriteStatus_ext(data);
#endif
}

const unsigned char cmd_lengths[256] =
{
	0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
	5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
	2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
	3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
	2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
	1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
	3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};

#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]

static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
{
  uint16_t *vram = VRAM_MEM_XY(x, y);
  if (is_read)
    memcpy(mem, vram, l * 2);
  else
    memcpy(vram, mem, l * 2);
}

static int do_vram_io(uint32_t *data, int count, int is_read)
{
  int count_initial = count;
  uint16_t *sdata = (uint16_t *)data;
  int x = gpu.dma.x, y = gpu.dma.y;
  int w = gpu.dma.w, h = gpu.dma.h;
  int o = gpu.dma.offset;
  int l;
  count *= 2; // operate in 16bpp pixels

  renderer_sync();

  if (gpu.dma.offset) {
    l = w - gpu.dma.offset;
    if (count < l)
      l = count;

    do_vram_line(x + o, y, sdata, l, is_read);

    if (o + l < w)
      o += l;
    else {
      o = 0;
      y++;
      h--;
    }
    sdata += l;
    count -= l;
  }

  for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
    y &= 511;
    do_vram_line(x, y, sdata, w, is_read);
  }

  if (h > 0) {
    if (count > 0) {
      y &= 511;
      do_vram_line(x, y, sdata, count, is_read);
      o = count;
      count = 0;
    }
  }
  else
    finish_vram_transfer(is_read);
  gpu.dma.y = y;
  gpu.dma.h = h;
  gpu.dma.offset = o;

  return count_initial - count / 2;
}

static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
{
  if (gpu.dma.h)
    log_anomaly("start_vram_transfer while old unfinished\n");

  gpu.dma.x = pos_word & 0x3ff;
  gpu.dma.y = (pos_word >> 16) & 0x1ff;
  gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
  gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
  gpu.dma.offset = 0;
  gpu.dma.is_read = is_read;
  gpu.dma_start = gpu.dma;

  renderer_flush_queues();
  if (is_read) {
    gpu.status.img = 1;
    // XXX: wrong for width 1
    memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
    gpu.state.last_vram_read_frame = *gpu.state.frame_count;
  }

  log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
    gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
}

static void finish_vram_transfer(int is_read)
{
  if (is_read)
    gpu.status.img = 0;
  else
    renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
                           gpu.dma_start.w, gpu.dma_start.h);
}

static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
{
  int cmd = 0, pos = 0, len, dummy, v;
  int skip = 1;

  gpu.frameskip.pending_fill[0] = 0;

  while (pos < count && skip) {
    uint32_t *list = data + pos;
    cmd = list[0] >> 24;
    len = 1 + cmd_lengths[cmd];

    switch (cmd) {
      case 0x02:
        if ((int)(list[2] & 0x3ff) > gpu.screen.w || (int)((list[2] >> 16) & 0x1ff) > gpu.screen.h)
          // clearing something large, don't skip
          do_cmd_list(list, 3, &dummy);
        else
          memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
        break;
      case 0x24 ... 0x27:
      case 0x2c ... 0x2f:
      case 0x34 ... 0x37:
      case 0x3c ... 0x3f:
        gpu.ex_regs[1] &= ~0x1ff;
        gpu.ex_regs[1] |= list[4 + ((cmd >> 4) & 1)] & 0x1ff;
        break;
      case 0x48 ... 0x4F:
        for (v = 3; pos + v < count; v++)
        {
          if ((list[v] & 0xf000f000) == 0x50005000)
            break;
        }
        len += v - 3;
        break;
      case 0x58 ... 0x5F:
        for (v = 4; pos + v < count; v += 2)
        {
          if ((list[v] & 0xf000f000) == 0x50005000)
            break;
        }
        len += v - 4;
        break;
      default:
        if (cmd == 0xe3)
          skip = decide_frameskip_allow(list[0]);
        if ((cmd & 0xf8) == 0xe0)
          gpu.ex_regs[cmd & 7] = list[0];
        break;
    }

    if (pos + len > count) {
      cmd = -1;
      break; // incomplete cmd
    }
    if (0xa0 <= cmd && cmd <= 0xdf)
      break; // image i/o

    pos += len;
  }

  renderer_sync_ecmds(gpu.ex_regs);
  *last_cmd = cmd;
  return pos;
}

static noinline int do_cmd_buffer(uint32_t *data, int count)
{
  int cmd, pos;
  uint32_t old_e3 = gpu.ex_regs[3];
  int vram_dirty = 0;

  // process buffer
  for (pos = 0; pos < count; )
  {
    if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
      vram_dirty = 1;
      pos += do_vram_io(data + pos, count - pos, 0);
      if (pos == count)
        break;
    }

    cmd = data[pos] >> 24;
    if (0xa0 <= cmd && cmd <= 0xdf) {
      if (unlikely((pos+2) >= count)) {
        // incomplete vram write/read cmd, can't consume yet
        cmd = -1;
        break;
      }

      // consume vram write/read cmd
      start_vram_transfer(data[pos + 1], data[pos + 2], (cmd & 0xe0) == 0xc0);
      pos += 3;
      continue;
    }

    // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
    if (gpu.frameskip.active && (gpu.frameskip.allow || ((data[pos] >> 24) & 0xf0) == 0xe0))
      pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
    else {
      pos += do_cmd_list(data + pos, count - pos, &cmd);
      vram_dirty = 1;
    }

    if (cmd == -1)
      // incomplete cmd
      break;
  }

  gpu.status.reg &= ~0x1fff;
  gpu.status.reg |= gpu.ex_regs[1] & 0x7ff;
  gpu.status.reg |= (gpu.ex_regs[6] & 3) << 11;

  gpu.state.fb_dirty |= vram_dirty;

  if (old_e3 != gpu.ex_regs[3])
    decide_frameskip_allow(gpu.ex_regs[3]);

  return count - pos;
}

static void flush_cmd_buffer(void)
{
  int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
  if (left > 0)
    memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
  gpu.cmd_len = left;
}

void GPUwriteDataMem(uint32_t *mem, int count)
{
  int left;

  log_io("gpu_dma_write %p %d\n", mem, count);

  if (unlikely(gpu.cmd_len > 0))
    flush_cmd_buffer();

  left = do_cmd_buffer(mem, count);
  if (left)
    log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
}

void GPUwriteData(uint32_t data)
{
  log_io("gpu_write %08x\n", data);
  gpu.cmd_buffer[gpu.cmd_len++] = data;
  if (gpu.cmd_len >= CMD_BUFFER_LEN)
    flush_cmd_buffer();
}

long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
{
  uint32_t addr, *list, ld_addr = 0;
  int len, left, count;
  long cpu_cycles = 0;

  preload(rambase + (start_addr & 0x1fffff) / 4);

  if (unlikely(gpu.cmd_len > 0))
    flush_cmd_buffer();

  log_io("gpu_dma_chain\n");
  addr = start_addr & 0xffffff;
  for (count = 0; (addr & 0x800000) == 0; count++)
  {
    list = rambase + (addr & 0x1fffff) / 4;
    len = list[0] >> 24;
    addr = list[0] & 0xffffff;
    preload(rambase + (addr & 0x1fffff) / 4);

    cpu_cycles += 10;
    if (len > 0)
      cpu_cycles += 5 + len;

    log_io(".chain %08x #%d\n", (list - rambase) * 4, len);

    if (len) {
      left = do_cmd_buffer(list + 1, len);
      if (left)
        log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
    }

    #define LD_THRESHOLD (8*1024)
    if (count >= LD_THRESHOLD) {
      if (count == LD_THRESHOLD) {
        ld_addr = addr;
        continue;
      }

      // loop detection marker
      // (bit23 set causes DMA error on real machine, so
      //  unlikely to be ever set by the game)
      list[0] |= 0x800000;
    }
  }

  if (ld_addr != 0) {
    // remove loop detection markers
    count -= LD_THRESHOLD + 2;
    addr = ld_addr & 0x1fffff;
    while (count-- > 0) {
      list = rambase + addr / 4;
      addr = list[0] & 0x1fffff;
      list[0] &= ~0x800000;
    }
  }

  gpu.state.last_list.frame = *gpu.state.frame_count;
  gpu.state.last_list.hcnt = *gpu.state.hcnt;
  gpu.state.last_list.cycles = cpu_cycles;
  gpu.state.last_list.addr = start_addr;

  return cpu_cycles;
}

void GPUreadDataMem(uint32_t *mem, int count)
{
  log_io("gpu_dma_read  %p %d\n", mem, count);

  if (unlikely(gpu.cmd_len > 0))
    flush_cmd_buffer();

  if (gpu.dma.h)
    do_vram_io(mem, count, 1);
}

uint32_t GPUreadData(void)
{
  uint32_t ret;

  if (unlikely(gpu.cmd_len > 0))
    flush_cmd_buffer();

  ret = gpu.gp0;
  if (gpu.dma.h)
    do_vram_io(&ret, 1, 1);

  log_io("gpu_read %08x\n", ret);
  return ret;
}

uint32_t GPUreadStatus(void)
{
  uint32_t ret;

  if (unlikely(gpu.cmd_len > 0))
    flush_cmd_buffer();

  ret = gpu.status.reg;
  log_io("gpu_read_status %08x\n", ret);
  return ret;
}

struct GPUFreeze
{
  uint32_t ulFreezeVersion;      // should be always 1 for now (set by main emu)
  uint32_t ulStatus;             // current gpu status
  uint32_t ulControl[256];       // latest control register values
  unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
};

long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
{
  int i;

  switch (type) {
    case 1: // save
      if (gpu.cmd_len > 0)
        flush_cmd_buffer();

      renderer_sync();
      memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
      memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
      memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
      freeze->ulStatus = gpu.status.reg;
      break;
    case 0: // load
      renderer_sync();
      memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
      memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
      memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
      gpu.status.reg = freeze->ulStatus;
      gpu.cmd_len = 0;
      for (i = 8; i > 0; i--) {
        gpu.regs[i] ^= 1; // avoid reg change detection
        GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
      }
      renderer_sync_ecmds(gpu.ex_regs);
      renderer_update_caches(0, 0, 1024, 512);
      break;
  }

  return 1;
}

void GPUupdateLace(void)
{
  if (gpu.cmd_len > 0)
    flush_cmd_buffer();
  renderer_flush_queues();

  if (gpu.status.blanking) {
    if (!gpu.state.blanked) {
      vout_blank();
      gpu.state.blanked = 1;
      gpu.state.fb_dirty = 1;
    }
    return;
  }

  renderer_notify_update_lace(0);

  if (!gpu.state.fb_dirty)
    return;

  if (gpu.frameskip.set) {
    if (!gpu.frameskip.frame_ready) {
      if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
        return;
      gpu.frameskip.active = 0;
    }
    gpu.frameskip.frame_ready = 0;
  }

  vout_update();
  gpu.state.fb_dirty = 0;
  gpu.state.blanked = 0;
  renderer_notify_update_lace(1);
}

void GPUvBlank(int is_vblank, int lcf)
{
  int interlace = gpu.state.allow_interlace
    && gpu.status.interlace && gpu.status.dheight;
  // interlace doesn't look nice on progressive displays,
  // so we have this "auto" mode here for games that don't read vram
  if (gpu.state.allow_interlace == 2
      && *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
  {
    interlace = 0;
  }
  if (interlace || interlace != gpu.state.old_interlace) {
    gpu.state.old_interlace = interlace;

    if (gpu.cmd_len > 0)
      flush_cmd_buffer();
    renderer_flush_queues();
    renderer_set_interlace(interlace, !lcf);
  }
}

#include "../../frontend/plugin_lib.h"

void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
{
  gpu.frameskip.set = cbs->frameskip;
  gpu.frameskip.advice = &cbs->fskip_advice;
  gpu.frameskip.active = 0;
  gpu.frameskip.frame_ready = 1;
  gpu.state.hcnt = cbs->gpu_hcnt;
  gpu.state.frame_count = cbs->gpu_frame_count;
  gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
  gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;

  gpu.useDithering = cbs->gpu_neon.allow_dithering;
  gpu.mmap = cbs->mmap;
  gpu.munmap = cbs->munmap;

  // delayed vram mmap
  if (gpu.vram == NULL)
    map_vram();

  if (cbs->pl_vout_set_raw_vram)
    cbs->pl_vout_set_raw_vram(gpu.vram);
  renderer_set_config(cbs);
  vout_set_config(cbs);
}

// vim:shiftwidth=2:expandtab
Commit	Line	Data
	1	/*
	2	* (C) Gražvydas "notaz" Ignotas, 2011-2012
	3	*
	4	* This work is licensed under the terms of any of these licenses
	5	* (at your option):
	6	* - GNU GPL, version 2 or later.
	7	* - GNU LGPL, version 2.1 or later.
	8	* See the COPYING file in the top-level directory.
	9	*/
	10
	11	#include <stdio.h>
	12	#include <string.h>
	13	#include <stdlib.h> /* for calloc */
	14
	15	#include "gpu.h"
	16
	17	#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
	18	#ifdef __GNUC__
	19	#define unlikely(x) __builtin_expect((x), 0)
	20	#define preload __builtin_prefetch
	21	#define noinline __attribute__((noinline))
	22	#else
	23	#define unlikely(x)
	24	#define preload(...)
	25	#define noinline
	26	#endif
	27
	28	#define gpu_log(fmt, ...) \
	29	printf("%d:%03d: " fmt, gpu.state.frame_count, gpu.state.hcnt, ##__VA_ARGS__)
	30
	31	//#define log_io gpu_log
	32	#define log_io(...)
	33	//#define log_anomaly gpu_log
	34	#define log_anomaly(...)
	35
	36	struct psx_gpu gpu;
	37
	38	static noinline int do_cmd_buffer(uint32_t *data, int count);
	39	static void finish_vram_transfer(int is_read);
	40
	41	static noinline void do_cmd_reset(void)
	42	{
	43	renderer_sync();
	44
	45	if (unlikely(gpu.cmd_len > 0))
	46	do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
	47	gpu.cmd_len = 0;
	48
	49	if (unlikely(gpu.dma.h > 0))
	50	finish_vram_transfer(gpu.dma_start.is_read);
	51	gpu.dma.h = 0;
	52	}
	53
	54	static noinline void do_reset(void)
	55	{
	56	unsigned int i;
	57	do_cmd_reset();
	58
	59	memset(gpu.regs, 0, sizeof(gpu.regs));
	60	for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
	61	gpu.ex_regs[i] = (0xe0 + i) << 24;
	62	gpu.status.reg = 0x14802000;
	63	gpu.gp0 = 0;
	64	gpu.regs[3] = 1;
	65	gpu.screen.hres = gpu.screen.w = 256;
	66	gpu.screen.vres = gpu.screen.h = 240;
	67	}
	68
	69	static noinline void update_width(void)
	70	{
	71	int sw = gpu.screen.x2 - gpu.screen.x1;
	72	if (sw <= 0 \|\| sw >= 2560)
	73	// full width
	74	gpu.screen.w = gpu.screen.hres;
	75	else
	76	gpu.screen.w = sw * gpu.screen.hres / 2560;
	77	}
	78
	79	static noinline void update_height(void)
	80	{
	81	// TODO: emulate this properly..
	82	int sh = gpu.screen.y2 - gpu.screen.y1;
	83	if (gpu.status.dheight)
	84	sh *= 2;
	85	if (sh <= 0 \|\| sh > gpu.screen.vres)
	86	sh = gpu.screen.vres;
	87
	88	gpu.screen.h = sh;
	89	}
	90
	91	static noinline void decide_frameskip(void)
	92	{
	93	if (gpu.frameskip.active)
	94	gpu.frameskip.cnt++;
	95	else {
	96	gpu.frameskip.cnt = 0;
	97	gpu.frameskip.frame_ready = 1;
	98	}
	99
	100	if (!gpu.frameskip.active && *gpu.frameskip.advice)
	101	gpu.frameskip.active = 1;
	102	else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
	103	gpu.frameskip.active = 1;
	104	else
	105	gpu.frameskip.active = 0;
	106
	107	if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
	108	int dummy;
	109	do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy);
	110	gpu.frameskip.pending_fill[0] = 0;
	111	}
	112	}
	113
	114	static noinline int decide_frameskip_allow(uint32_t cmd_e3)
	115	{
	116	// no frameskip if it decides to draw to display area,
	117	// but not for interlace since it'll most likely always do that
	118	uint32_t x = cmd_e3 & 0x3ff;
	119	uint32_t y = (cmd_e3 >> 10) & 0x3ff;
	120	gpu.frameskip.allow = gpu.status.interlace \|\|
	121	(uint32_t)(x - gpu.screen.x) >= (uint32_t)gpu.screen.w \|\|
	122	(uint32_t)(y - gpu.screen.y) >= (uint32_t)gpu.screen.h;
	123	return gpu.frameskip.allow;
	124	}
	125
	126	static noinline void get_gpu_info(uint32_t data)
	127	{
	128	switch (data & 0x0f) {
	129	case 0x02:
	130	case 0x03:
	131	case 0x04:
	132	gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
	133	break;
	134	case 0x05:
	135	case 0x06:
	136	gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
	137	break;
	138	case 0x07:
	139	gpu.gp0 = 2;
	140	break;
	141	default:
	142	gpu.gp0 = 0;
	143	break;
	144	}
	145	}
	146
	147	// double, for overdraw guard
	148	#define VRAM_SIZE ((1024 * 512 * 2 * 2) + 4096)
	149
	150	// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
	151	// renderer/downscaler it uses in high res modes:
	152	#ifdef GCW_ZERO
	153	// On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
	154	// fills. (Will change this value if it ever gets large page support)
	155	#define VRAM_ALIGN 8192
	156	#else
	157	#define VRAM_ALIGN 16
	158	#endif
	159
	160	// vram ptr received from mmap/malloc/alloc (will deallocate using this)
	161	static uint16_t *vram_ptr_orig = NULL;
	162
	163	#ifdef GPULIB_USE_MMAP
	164	static int map_vram(void)
	165	{
	166	gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE + (VRAM_ALIGN-1));
	167	if (gpu.vram != NULL) {
	168	// 4kb guard in front
	169	gpu.vram += (4096 / 2);
	170	// Align
	171	gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
	172	return 0;
	173	}
	174	else {
	175	fprintf(stderr, "could not map vram, expect crashes\n");
	176	return -1;
	177	}
	178	}
	179	#else
	180	static int map_vram(void)
	181	{
	182	gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
	183	if (gpu.vram != NULL) {
	184	// 4kb guard in front
	185	gpu.vram += (4096 / 2);
	186	// Align
	187	gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
	188	return 0;
	189	} else {
	190	fprintf(stderr, "could not allocate vram, expect crashes\n");
	191	return -1;
	192	}
	193	}
	194
	195	static int allocate_vram(void)
	196	{
	197	gpu.vram = vram_ptr_orig = (uint16_t*)calloc(VRAM_SIZE + (VRAM_ALIGN-1), 1);
	198	if (gpu.vram != NULL) {
	199	// 4kb guard in front
	200	gpu.vram += (4096 / 2);
	201	// Align
	202	gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
	203	return 0;
	204	} else {
	205	fprintf(stderr, "could not allocate vram, expect crashes\n");
	206	return -1;
	207	}
	208	}
	209	#endif
	210
	211	long GPUinit(void)
	212	{
	213	#ifndef GPULIB_USE_MMAP
	214	if (gpu.vram == NULL) {
	215	if (allocate_vram() != 0) {
	216	printf("ERROR: could not allocate VRAM, exiting..\n");
	217	exit(1);
	218	}
	219	}
	220	#endif
	221
	222	//extern uint32_t hSyncCount; // in psxcounters.cpp
	223	//extern uint32_t frame_counter; // in psxcounters.cpp
	224	//gpu.state.hcnt = &hSyncCount;
	225	//gpu.state.frame_count = &frame_counter;
	226
	227	int ret;
	228	ret = vout_init();
	229	ret \|= renderer_init();
	230
	231	gpu.state.frame_count = &gpu.zero;
	232	gpu.state.hcnt = &gpu.zero;
	233	gpu.frameskip.active = 0;
	234	gpu.cmd_len = 0;
	235	do_reset();
	236
	237	/*if (gpu.mmap != NULL) {
	238	if (map_vram() != 0)
	239	ret = -1;
	240	}*/
	241	return ret;
	242	}
	243
	244	long GPUshutdown(void)
	245	{
	246	long ret;
	247
	248	renderer_finish();
	249	ret = vout_finish();
	250
	251	if (vram_ptr_orig != NULL) {
	252	#ifdef GPULIB_USE_MMAP
	253	gpu.munmap(vram_ptr_orig, VRAM_SIZE);
	254	#else
	255	free(vram_ptr_orig);
	256	#endif
	257	}
	258	vram_ptr_orig = gpu.vram = NULL;
	259
	260	return ret;
	261	}
	262
	263	void GPUwriteStatus(uint32_t data)
	264	{
	265	//senquack TODO: Would it be wise to add cmd buffer flush here, since
	266	// status settings can affect commands already in buffer?
	267
	268	static const short hres[8] = { 256, 368, 320, 384, 512, 512, 640, 640 };
	269	static const short vres[4] = { 240, 480, 256, 480 };
	270	uint32_t cmd = data >> 24;
	271
	272	if (cmd < ARRAY_SIZE(gpu.regs)) {
	273	if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
	274	return;
	275	gpu.regs[cmd] = data;
	276	}
	277
	278	gpu.state.fb_dirty = 1;
	279
	280	switch (cmd) {
	281	case 0x00:
	282	do_reset();
	283	break;
	284	case 0x01:
	285	do_cmd_reset();
	286	break;
	287	case 0x03:
	288	gpu.status.blanking = data & 1;
	289	break;
	290	case 0x04:
	291	gpu.status.dma = data & 3;
	292	break;
	293	case 0x05:
	294	gpu.screen.x = data & 0x3ff;
	295	gpu.screen.y = (data >> 10) & 0x1ff;
	296	if (gpu.frameskip.set) {
	297	decide_frameskip_allow(gpu.ex_regs[3]);
	298	if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
	299	decide_frameskip();
	300	gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
	301	}
	302	}
	303	break;
	304	case 0x06:
	305	gpu.screen.x1 = data & 0xfff;
	306	gpu.screen.x2 = (data >> 12) & 0xfff;
	307	update_width();
	308	break;
	309	case 0x07:
	310	gpu.screen.y1 = data & 0x3ff;
	311	gpu.screen.y2 = (data >> 10) & 0x3ff;
	312	update_height();
	313	break;
	314	case 0x08:
	315	gpu.status.reg = (gpu.status.reg & ~0x7f0000) \| ((data & 0x3F) << 17) \| ((data & 0x40) << 10);
	316	gpu.screen.hres = hres[(gpu.status.reg >> 16) & 7];
	317	gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
	318	update_width();
	319	update_height();
	320	renderer_notify_res_change();
	321	break;
	322	default:
	323	if ((cmd & 0xf0) == 0x10)
	324	get_gpu_info(data);
	325	break;
	326	}
	327
	328	#ifdef GPUwriteStatus_ext
	329	GPUwriteStatus_ext(data);
	330	#endif
	331	}
	332
	333	const unsigned char cmd_lengths[256] =
	334	{
	335	0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	336	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	337	3, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 8, 8, 8, 8, // 20
	338	5, 5, 5, 5, 8, 8, 8, 8, 7, 7, 7, 7, 11, 11, 11, 11,
	339	2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, // 40
	340	3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
	341	2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, // 60
	342	1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2,
	343	3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80
	344	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	345	2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a0
	346	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	347	2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // c0
	348	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	349	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // e0
	350	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
	351	};
	352
	353	#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
	354
	355	static inline void do_vram_line(int x, int y, uint16_t *mem, int l, int is_read)
	356	{
	357	uint16_t *vram = VRAM_MEM_XY(x, y);
	358	if (is_read)
	359	memcpy(mem, vram, l * 2);
	360	else
	361	memcpy(vram, mem, l * 2);
	362	}
	363
	364	static int do_vram_io(uint32_t *data, int count, int is_read)
	365	{
	366	int count_initial = count;
	367	uint16_t sdata = (uint16_t )data;
	368	int x = gpu.dma.x, y = gpu.dma.y;
	369	int w = gpu.dma.w, h = gpu.dma.h;
	370	int o = gpu.dma.offset;
	371	int l;
	372	count *= 2; // operate in 16bpp pixels
	373
	374	renderer_sync();
	375
	376	if (gpu.dma.offset) {
	377	l = w - gpu.dma.offset;
	378	if (count < l)
	379	l = count;
	380
	381	do_vram_line(x + o, y, sdata, l, is_read);
	382
	383	if (o + l < w)
	384	o += l;
	385	else {
	386	o = 0;
	387	y++;
	388	h--;
	389	}
	390	sdata += l;
	391	count -= l;
	392	}
	393
	394	for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
	395	y &= 511;
	396	do_vram_line(x, y, sdata, w, is_read);
	397	}
	398
	399	if (h > 0) {
	400	if (count > 0) {
	401	y &= 511;
	402	do_vram_line(x, y, sdata, count, is_read);
	403	o = count;
	404	count = 0;
	405	}
	406	}
	407	else
	408	finish_vram_transfer(is_read);
	409	gpu.dma.y = y;
	410	gpu.dma.h = h;
	411	gpu.dma.offset = o;
	412
	413	return count_initial - count / 2;
	414	}
	415
	416	static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
	417	{
	418	if (gpu.dma.h)
	419	log_anomaly("start_vram_transfer while old unfinished\n");
	420
	421	gpu.dma.x = pos_word & 0x3ff;
	422	gpu.dma.y = (pos_word >> 16) & 0x1ff;
	423	gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
	424	gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
	425	gpu.dma.offset = 0;
	426	gpu.dma.is_read = is_read;
	427	gpu.dma_start = gpu.dma;
	428
	429	renderer_flush_queues();
	430	if (is_read) {
	431	gpu.status.img = 1;
	432	// XXX: wrong for width 1
	433	memcpy(&gpu.gp0, VRAM_MEM_XY(gpu.dma.x, gpu.dma.y), 4);
	434	gpu.state.last_vram_read_frame = *gpu.state.frame_count;
	435	}
	436
	437	log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
	438	gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
	439	}
	440
	441	static void finish_vram_transfer(int is_read)
	442	{
	443	if (is_read)
	444	gpu.status.img = 0;
	445	else
	446	renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
	447	gpu.dma_start.w, gpu.dma_start.h);
	448	}
	449
	450	static noinline int do_cmd_list_skip(uint32_t data, int count, int last_cmd)
	451	{
	452	int cmd = 0, pos = 0, len, dummy, v;
	453	int skip = 1;
	454
	455	gpu.frameskip.pending_fill[0] = 0;
	456
	457	while (pos < count && skip) {
	458	uint32_t *list = data + pos;
	459	cmd = list[0] >> 24;
	460	len = 1 + cmd_lengths[cmd];
	461
	462	switch (cmd) {
	463	case 0x02:
	464	if ((int)(list[2] & 0x3ff) > gpu.screen.w \|\| (int)((list[2] >> 16) & 0x1ff) > gpu.screen.h)
	465	// clearing something large, don't skip
	466	do_cmd_list(list, 3, &dummy);
	467	else
	468	memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
	469	break;
	470	case 0x24 ... 0x27:
	471	case 0x2c ... 0x2f:
	472	case 0x34 ... 0x37:
	473	case 0x3c ... 0x3f:
	474	gpu.ex_regs[1] &= ~0x1ff;
	475	gpu.ex_regs[1] \|= list[4 + ((cmd >> 4) & 1)] & 0x1ff;
	476	break;
	477	case 0x48 ... 0x4F:
	478	for (v = 3; pos + v < count; v++)
	479	{
	480	if ((list[v] & 0xf000f000) == 0x50005000)
	481	break;
	482	}
	483	len += v - 3;
	484	break;
	485	case 0x58 ... 0x5F:
	486	for (v = 4; pos + v < count; v += 2)
	487	{
	488	if ((list[v] & 0xf000f000) == 0x50005000)
	489	break;
	490	}
	491	len += v - 4;
	492	break;
	493	default:
	494	if (cmd == 0xe3)
	495	skip = decide_frameskip_allow(list[0]);
	496	if ((cmd & 0xf8) == 0xe0)
	497	gpu.ex_regs[cmd & 7] = list[0];
	498	break;
	499	}
	500
	501	if (pos + len > count) {
	502	cmd = -1;
	503	break; // incomplete cmd
	504	}
	505	if (0xa0 <= cmd && cmd <= 0xdf)
	506	break; // image i/o
	507
	508	pos += len;
	509	}
	510
	511	renderer_sync_ecmds(gpu.ex_regs);
	512	*last_cmd = cmd;
	513	return pos;
	514	}
	515
	516	static noinline int do_cmd_buffer(uint32_t *data, int count)
	517	{
	518	int cmd, pos;
	519	uint32_t old_e3 = gpu.ex_regs[3];
	520	int vram_dirty = 0;
	521
	522	// process buffer
	523	for (pos = 0; pos < count; )
	524	{
	525	if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
	526	vram_dirty = 1;
	527	pos += do_vram_io(data + pos, count - pos, 0);
	528	if (pos == count)
	529	break;
	530	}
	531
	532	cmd = data[pos] >> 24;
	533	if (0xa0 <= cmd && cmd <= 0xdf) {
	534	if (unlikely((pos+2) >= count)) {
	535	// incomplete vram write/read cmd, can't consume yet
	536	cmd = -1;
	537	break;
	538	}
	539
	540	// consume vram write/read cmd
	541	start_vram_transfer(data[pos + 1], data[pos + 2], (cmd & 0xe0) == 0xc0);
	542	pos += 3;
	543	continue;
	544	}
	545
	546	// 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
	547	if (gpu.frameskip.active && (gpu.frameskip.allow \|\| ((data[pos] >> 24) & 0xf0) == 0xe0))
	548	pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
	549	else {
	550	pos += do_cmd_list(data + pos, count - pos, &cmd);
	551	vram_dirty = 1;
	552	}
	553
	554	if (cmd == -1)
	555	// incomplete cmd
	556	break;
	557	}
	558
	559	gpu.status.reg &= ~0x1fff;
	560	gpu.status.reg \|= gpu.ex_regs[1] & 0x7ff;
	561	gpu.status.reg \|= (gpu.ex_regs[6] & 3) << 11;
	562
	563	gpu.state.fb_dirty \|= vram_dirty;
	564
	565	if (old_e3 != gpu.ex_regs[3])
	566	decide_frameskip_allow(gpu.ex_regs[3]);
	567
	568	return count - pos;
	569	}
	570
	571	static void flush_cmd_buffer(void)
	572	{
	573	int left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len);
	574	if (left > 0)
	575	memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
	576	gpu.cmd_len = left;
	577	}
	578
	579	void GPUwriteDataMem(uint32_t *mem, int count)
	580	{
	581	int left;
	582
	583	log_io("gpu_dma_write %p %d\n", mem, count);
	584
	585	if (unlikely(gpu.cmd_len > 0))
	586	flush_cmd_buffer();
	587
	588	left = do_cmd_buffer(mem, count);
	589	if (left)
	590	log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
	591	}
	592
	593	void GPUwriteData(uint32_t data)
	594	{
	595	log_io("gpu_write %08x\n", data);
	596	gpu.cmd_buffer[gpu.cmd_len++] = data;
	597	if (gpu.cmd_len >= CMD_BUFFER_LEN)
	598	flush_cmd_buffer();
	599	}
	600
	601	long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
	602	{
	603	uint32_t addr, *list, ld_addr = 0;
	604	int len, left, count;
	605	long cpu_cycles = 0;
	606
	607	preload(rambase + (start_addr & 0x1fffff) / 4);
	608
	609	if (unlikely(gpu.cmd_len > 0))
	610	flush_cmd_buffer();
	611
	612	log_io("gpu_dma_chain\n");
	613	addr = start_addr & 0xffffff;
	614	for (count = 0; (addr & 0x800000) == 0; count++)
	615	{
	616	list = rambase + (addr & 0x1fffff) / 4;
	617	len = list[0] >> 24;
	618	addr = list[0] & 0xffffff;
	619	preload(rambase + (addr & 0x1fffff) / 4);
	620
	621	cpu_cycles += 10;
	622	if (len > 0)
	623	cpu_cycles += 5 + len;
	624
	625	log_io(".chain %08x #%d\n", (list - rambase) * 4, len);
	626
	627	if (len) {
	628	left = do_cmd_buffer(list + 1, len);
	629	if (left)
	630	log_anomaly("GPUdmaChain: discarded %d/%d words\n", left, len);
	631	}
	632
	633	#define LD_THRESHOLD (8*1024)
	634	if (count >= LD_THRESHOLD) {
	635	if (count == LD_THRESHOLD) {
	636	ld_addr = addr;
	637	continue;
	638	}
	639
	640	// loop detection marker
	641	// (bit23 set causes DMA error on real machine, so
	642	// unlikely to be ever set by the game)
	643	list[0] \|= 0x800000;
	644	}
	645	}
	646
	647	if (ld_addr != 0) {
	648	// remove loop detection markers
	649	count -= LD_THRESHOLD + 2;
	650	addr = ld_addr & 0x1fffff;
	651	while (count-- > 0) {
	652	list = rambase + addr / 4;
	653	addr = list[0] & 0x1fffff;
	654	list[0] &= ~0x800000;
	655	}
	656	}
	657
	658	gpu.state.last_list.frame = *gpu.state.frame_count;
	659	gpu.state.last_list.hcnt = *gpu.state.hcnt;
	660	gpu.state.last_list.cycles = cpu_cycles;
	661	gpu.state.last_list.addr = start_addr;
	662
	663	return cpu_cycles;
	664	}
	665
	666	void GPUreadDataMem(uint32_t *mem, int count)
	667	{
	668	log_io("gpu_dma_read %p %d\n", mem, count);
	669
	670	if (unlikely(gpu.cmd_len > 0))
	671	flush_cmd_buffer();
	672
	673	if (gpu.dma.h)
	674	do_vram_io(mem, count, 1);
	675	}
	676
	677	uint32_t GPUreadData(void)
	678	{
	679	uint32_t ret;
	680
	681	if (unlikely(gpu.cmd_len > 0))
	682	flush_cmd_buffer();
	683
	684	ret = gpu.gp0;
	685	if (gpu.dma.h)
	686	do_vram_io(&ret, 1, 1);
	687
	688	log_io("gpu_read %08x\n", ret);
	689	return ret;
	690	}
	691
	692	uint32_t GPUreadStatus(void)
	693	{
	694	uint32_t ret;
	695
	696	if (unlikely(gpu.cmd_len > 0))
	697	flush_cmd_buffer();
	698
	699	ret = gpu.status.reg;
	700	log_io("gpu_read_status %08x\n", ret);
	701	return ret;
	702	}
	703
	704	struct GPUFreeze
	705	{
	706	uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
	707	uint32_t ulStatus; // current gpu status
	708	uint32_t ulControl[256]; // latest control register values
	709	unsigned char psxVRam[102410242]; // current VRam image (full 2 MB for ZN)
	710	};
	711
	712	long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
	713	{
	714	int i;
	715
	716	switch (type) {
	717	case 1: // save
	718	if (gpu.cmd_len > 0)
	719	flush_cmd_buffer();
	720
	721	renderer_sync();
	722	memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
	723	memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
	724	memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
	725	freeze->ulStatus = gpu.status.reg;
	726	break;
	727	case 0: // load
	728	renderer_sync();
	729	memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
	730	memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
	731	memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
	732	gpu.status.reg = freeze->ulStatus;
	733	gpu.cmd_len = 0;
	734	for (i = 8; i > 0; i--) {
	735	gpu.regs[i] ^= 1; // avoid reg change detection
	736	GPUwriteStatus((i << 24) \| (gpu.regs[i] ^ 1));
	737	}
	738	renderer_sync_ecmds(gpu.ex_regs);
	739	renderer_update_caches(0, 0, 1024, 512);
	740	break;
	741	}
	742
	743	return 1;
	744	}
	745
	746	void GPUupdateLace(void)
	747	{
	748	if (gpu.cmd_len > 0)
	749	flush_cmd_buffer();
	750	renderer_flush_queues();
	751
	752	if (gpu.status.blanking) {
	753	if (!gpu.state.blanked) {
	754	vout_blank();
	755	gpu.state.blanked = 1;
	756	gpu.state.fb_dirty = 1;
	757	}
	758	return;
	759	}
	760
	761	renderer_notify_update_lace(0);
	762
	763	if (!gpu.state.fb_dirty)
	764	return;
	765
	766	if (gpu.frameskip.set) {
	767	if (!gpu.frameskip.frame_ready) {
	768	if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9)
	769	return;
	770	gpu.frameskip.active = 0;
	771	}
	772	gpu.frameskip.frame_ready = 0;
	773	}
	774
	775	vout_update();
	776	gpu.state.fb_dirty = 0;
	777	gpu.state.blanked = 0;
	778	renderer_notify_update_lace(1);
	779	}
	780
	781	void GPUvBlank(int is_vblank, int lcf)
	782	{
	783	int interlace = gpu.state.allow_interlace
	784	&& gpu.status.interlace && gpu.status.dheight;
	785	// interlace doesn't look nice on progressive displays,
	786	// so we have this "auto" mode here for games that don't read vram
	787	if (gpu.state.allow_interlace == 2
	788	&& *gpu.state.frame_count - gpu.state.last_vram_read_frame > 1)
	789	{
	790	interlace = 0;
	791	}
	792	if (interlace \|\| interlace != gpu.state.old_interlace) {
	793	gpu.state.old_interlace = interlace;
	794
	795	if (gpu.cmd_len > 0)
	796	flush_cmd_buffer();
	797	renderer_flush_queues();
	798	renderer_set_interlace(interlace, !lcf);
	799	}
	800	}
	801
	802	#include "../../frontend/plugin_lib.h"
	803
	804	void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
	805	{
	806	gpu.frameskip.set = cbs->frameskip;
	807	gpu.frameskip.advice = &cbs->fskip_advice;
	808	gpu.frameskip.active = 0;
	809	gpu.frameskip.frame_ready = 1;
	810	gpu.state.hcnt = cbs->gpu_hcnt;
	811	gpu.state.frame_count = cbs->gpu_frame_count;
	812	gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
	813	gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
	814
	815	gpu.useDithering = cbs->gpu_neon.allow_dithering;
	816	gpu.mmap = cbs->mmap;
	817	gpu.munmap = cbs->munmap;
	818
	819	// delayed vram mmap
	820	if (gpu.vram == NULL)
	821	map_vram();
	822
	823	if (cbs->pl_vout_set_raw_vram)
	824	cbs->pl_vout_set_raw_vram(gpu.vram);
	825	renderer_set_config(cbs);
	826	vout_set_config(cbs);
	827	}
	828
	829	// vim:shiftwidth=2:expandtab