*/
#include <stdio.h>
+#include <assert.h>
#include <sys/mman.h>
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#ifndef min
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#endif
+#ifndef max
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#endif
+
extern const unsigned char cmd_lengths[256];
#define command_lengths cmd_lengths
static unsigned int *ex_regs;
+static int initialized;
#define PCSX
#define SET_Ex(r, v) \
ex_regs[r] = v
+static __attribute__((noinline)) void
+sync_enhancement_buffers(int x, int y, int w, int h);
+
+#include "../gpulib/gpu.h"
#include "psx_gpu/psx_gpu.c"
#include "psx_gpu/psx_gpu_parse.c"
-#include "../gpulib/gpu.h"
static psx_gpu_struct egpu __attribute__((aligned(256)));
-int do_cmd_list(uint32_t *list, int count, int *last_cmd)
+int do_cmd_list(uint32_t *list, int count,
+ int *cycles_sum, int *cycles_last, int *last_cmd)
{
int ret;
+#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
+ // the asm doesn't bother to save callee-save vector regs, so do it here
+ __asm__ __volatile__("":::"q4","q5","q6","q7");
+#endif
+
if (gpu.state.enhancement_active)
- ret = gpu_parse_enhanced(&egpu, list, count * 4, (u32 *)last_cmd);
+ ret = gpu_parse_enhanced(&egpu, list, count * 4,
+ cycles_sum, cycles_last, (u32 *)last_cmd);
else
- ret = gpu_parse(&egpu, list, count * 4, (u32 *)last_cmd);
+ ret = gpu_parse(&egpu, list, count * 4,
+ cycles_sum, cycles_last, (u32 *)last_cmd);
+
+#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
+ __asm__ __volatile__("":::"q4","q5","q6","q7");
+#endif
ex_regs[1] &= ~0x1ff;
ex_regs[1] |= egpu.texture_settings & 0x1ff;
return ret;
}
-#define ENHANCEMENT_BUF_SIZE (1024 * 1024 * 2 * 4 + 4096)
+#define ENHANCEMENT_BUF_SIZE (1024 * 1024 * 2 * 4 + 4096 * 2)
-int renderer_init(void)
+static void *get_enhancement_bufer(int *x, int *y, int *w, int *h,
+ int *vram_h)
{
- initialize_psx_gpu(&egpu, gpu.vram);
- ex_regs = gpu.ex_regs;
+ uint16_t *ret = select_enhancement_buf_ptr(&egpu, *x, *y);
+ if (ret == NULL)
+ return NULL;
+
+ *x *= 2;
+ *y *= 2;
+ *w = *w * 2;
+ *h = *h * 2;
+ *vram_h = 1024;
+ return ret;
+}
- if (gpu.enhancement_bufer == NULL) {
- // currently we use 4x 1024*1024 buffers instead of single 2048*1024
- // to be able to reuse 1024-width code better (triangle setup,
- // dithering phase, lines).
- gpu.enhancement_bufer = mmap(NULL, ENHANCEMENT_BUF_SIZE,
- PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (gpu.enhancement_bufer == MAP_FAILED) {
- printf("OOM for enhancement buffer\n");
- gpu.enhancement_bufer = NULL;
- }
+static void map_enhancement_buffer(void)
+{
+ // currently we use 4x 1024*1024 buffers instead of single 2048*1024
+ // to be able to reuse 1024-width code better (triangle setup,
+ // dithering phase, lines).
+ egpu.enhancement_buf_ptr = gpu.mmap(ENHANCEMENT_BUF_SIZE);
+ if (egpu.enhancement_buf_ptr == NULL) {
+ fprintf(stderr, "failed to map enhancement buffer\n");
+ gpu.get_enhancement_bufer = NULL;
+ }
+ else {
+ egpu.enhancement_buf_ptr += 4096 / 2;
+ gpu.get_enhancement_bufer = get_enhancement_bufer;
+ }
+}
+
+int renderer_init(void)
+{
+ if (gpu.vram != NULL) {
+ initialize_psx_gpu(&egpu, gpu.vram);
+ initialized = 1;
}
- egpu.enhancement_buf_ptr = gpu.enhancement_bufer;
+ if (gpu.mmap != NULL && egpu.enhancement_buf_ptr == NULL)
+ map_enhancement_buffer();
+
+ ex_regs = gpu.ex_regs;
return 0;
}
void renderer_finish(void)
{
- if (gpu.enhancement_bufer != NULL)
- munmap(gpu.enhancement_bufer, ENHANCEMENT_BUF_SIZE);
- gpu.enhancement_bufer = NULL;
+ if (egpu.enhancement_buf_ptr != NULL) {
+ egpu.enhancement_buf_ptr -= 4096 / 2;
+ gpu.munmap(egpu.enhancement_buf_ptr, ENHANCEMENT_BUF_SIZE);
+ }
egpu.enhancement_buf_ptr = NULL;
+ egpu.enhancement_current_buf_ptr = NULL;
+ initialized = 0;
}
static __attribute__((noinline)) void
sync_enhancement_buffers(int x, int y, int w, int h)
{
- int xt = egpu.enhancement_x_threshold;
- u16 *src, *dst;
- int wb, i;
-
- w += x & 7;
- x &= ~7;
- w = (w + 7) & ~7;
- if (y + h > 512)
- h = 512 - y;
-
- for (i = 0; i < 4 && w > 0; i++) {
- if (x < 512) {
- wb = w;
- if (x + w > 512)
- wb = 512 - x;
- src = gpu.vram + xt * i + y * 1024 + x;
- dst = egpu.enhancement_buf_ptr +
- (1024*1024 + xt * 2) * i + (y * 1024 + x) * 2;
- scale2x_tiles8(dst, src, wb / 8, h);
- }
-
- x -= xt;
- if (x < 0) {
- w += x;
- x = 0;
- }
+ int i, right = x + w, bottom = y + h;
+ const u16 *src = gpu.vram;
+ // use these because the scanout struct may hold reduced w, h
+ // due to intersection stuff, see the update_enhancement_buf_scanouts() mess
+ int s_w = max(gpu.screen.hres, gpu.screen.w);
+ int s_h = gpu.screen.vres;
+ if (gpu.screen.y < 0)
+ s_h -= gpu.screen.y;
+ s_w = min(s_w, 512);
+ for (i = 0; i < ARRAY_SIZE(egpu.enhancement_scanouts); i++) {
+ const struct psx_gpu_scanout *s = &egpu.enhancement_scanouts[i];
+ u16 *dst = select_enhancement_buf_by_index(&egpu, i);
+ int x1, x2, y1, y2;
+ if (s->w == 0) continue;
+ if (s->x >= right) continue;
+ if (s->x + s_w <= x) continue;
+ if (s->y >= bottom) continue;
+ if (s->y + s_h <= y) continue;
+ x1 = max(x, s->x);
+ x2 = min(right, s->x + s_w);
+ y1 = max(y, s->y);
+ y2 = min(bottom, s->y + s_h);
+ // 16-byte align for the asm version
+ x2 += x1 & 7;
+ x1 &= ~7;
+ scale2x_tiles8(dst + y1 * 1024*2 + x1 * 2,
+ src + y1 * 1024 + x1, (x2 - x1 + 7) / 8u, y2 - y1);
}
}
void renderer_sync_ecmds(uint32_t *ecmds)
{
- gpu_parse(&egpu, ecmds + 1, 6 * 4, NULL);
+ s32 dummy0 = 0;
+ u32 dummy1 = 0;
+ gpu_parse(&egpu, ecmds + 1, 6 * 4, &dummy0, &dummy0, &dummy1);
}
-void renderer_update_caches(int x, int y, int w, int h)
+void renderer_update_caches(int x, int y, int w, int h, int state_changed)
{
update_texture_cache_region(&egpu, x, y, x + w - 1, y + h - 1);
- if (gpu.state.enhancement_active && !gpu.status.rgb24)
+
+ if (gpu.state.enhancement_active) {
+ if (state_changed) {
+ int vres = gpu.screen.vres;
+ if (gpu.screen.y < 0)
+ vres -= gpu.screen.y;
+ memset(egpu.enhancement_scanouts, 0, sizeof(egpu.enhancement_scanouts));
+ egpu.enhancement_scanout_eselect = 0;
+ update_enhancement_buf_scanouts(&egpu,
+ gpu.screen.src_x, gpu.screen.src_y, gpu.screen.hres, vres);
+ return;
+ }
sync_enhancement_buffers(x, y, w, h);
+ }
}
void renderer_flush_queues(void)
void renderer_notify_res_change(void)
{
- // note: must keep it multiple of 8
- egpu.enhancement_x_threshold = gpu.screen.hres;
+ renderer_notify_scanout_change(gpu.screen.src_x, gpu.screen.src_y);
+}
+
+void renderer_notify_scanout_change(int x, int y)
+{
+ int vres = gpu.screen.vres;
+ if (!gpu.state.enhancement_active || !egpu.enhancement_buf_ptr)
+ return;
+
+ if (gpu.screen.y < 0)
+ vres -= gpu.screen.y;
+ update_enhancement_buf_scanouts(&egpu, x, y, gpu.screen.hres, vres);
}
#include "../../frontend/plugin_lib.h"
void renderer_set_config(const struct rearmed_cbs *cbs)
{
- static int enhancement_was_on;
+ if (!initialized) {
+ initialize_psx_gpu(&egpu, gpu.vram);
+ initialized = 1;
+ }
+ if (cbs->pl_set_gpu_caps)
+ cbs->pl_set_gpu_caps(GPU_CAP_SUPPORTS_2X);
disable_main_render = cbs->gpu_neon.enhancement_no_main;
- if (egpu.enhancement_buf_ptr != NULL && cbs->gpu_neon.enhancement_enable
- && !enhancement_was_on)
- {
- sync_enhancement_buffers(0, 0, 1024, 512);
+ if (gpu.state.enhancement_enable) {
+ if (gpu.mmap != NULL && egpu.enhancement_buf_ptr == NULL)
+ map_enhancement_buffer();
}
- enhancement_was_on = cbs->gpu_neon.enhancement_enable;
}
+
+// vim:ts=2:sw=2:expandtab