From 9ee0fd5b333039b1140d90f935aa9299825f1e42 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 19 Aug 2012 22:39:49 +0300 Subject: [PATCH] start mmap'ing vram, with hugetlb if possible --- frontend/common/plat.h | 4 +++ frontend/linux/plat.c | 47 ++++++++++++++++++++++++++++++---- frontend/plugin_lib.c | 13 ++++++++++ frontend/plugin_lib.h | 2 ++ plugins/dfxvideo/gpulib_if.c | 10 ++++++-- plugins/gpu-gles/gpulib_if.c | 11 ++++++-- plugins/gpu_neon/psx_gpu_if.c | 44 ++++++++++++++++++++----------- plugins/gpu_unai/gpulib_if.cpp | 2 ++ plugins/gpulib/gpu.c | 44 ++++++++++++++++++++++++++++--- plugins/gpulib/gpu.h | 5 ++-- 10 files changed, 152 insertions(+), 30 deletions(-) diff --git a/frontend/common/plat.h b/frontend/common/plat.h index 0a9fc0b2..416f8ac7 100644 --- a/frontend/common/plat.h +++ b/frontend/common/plat.h @@ -45,6 +45,10 @@ int plat_is_dir(const char *path); int plat_wait_event(int *fds_hnds, int count, int timeout_ms); void plat_sleep_ms(int ms); +void *plat_mmap(unsigned long addr, size_t size, int need_exec); +void *plat_mremap(void *ptr, size_t oldsize, size_t newsize); +void plat_munmap(void *ptr, size_t size); + /* timers, to be used for time diff and must refer to the same clock */ unsigned int plat_get_ticks_ms(void); unsigned int plat_get_ticks_us(void); diff --git a/frontend/linux/plat.c b/frontend/linux/plat.c index b7152b55..044084ed 100644 --- a/frontend/linux/plat.c +++ b/frontend/linux/plat.c @@ -17,9 +17,17 @@ #include #include #include +#include #include "../common/plat.h" +/* XXX: maybe unhardcode pagesize? */ +#define HUGETLB_PAGESIZE (2 * 1024 * 1024) +#define HUGETLB_THRESHOLD (HUGETLB_PAGESIZE / 2) +#ifndef MAP_HUGETLB +#define MAP_HUGETLB 0x40000 /* arch specific */ +#endif + int plat_is_dir(const char *path) { @@ -126,16 +134,34 @@ int plat_wait_event(int *fds_hnds, int count, int timeout_ms) return ret; } -void *plat_mmap(unsigned long addr, size_t size) +void *plat_mmap(unsigned long addr, size_t size, int need_exec) { + static int hugetlb_disabled; + int prot = PROT_READ | PROT_WRITE; + int flags = MAP_PRIVATE | MAP_ANONYMOUS; void *req, *ret; req = (void *)addr; - ret = mmap(req, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (need_exec) + prot |= PROT_EXEC; + if (size >= HUGETLB_THRESHOLD && !hugetlb_disabled) + flags |= MAP_HUGETLB; + + ret = mmap(req, size, prot, flags, -1, 0); + if (ret == MAP_FAILED && (flags & MAP_HUGETLB)) { + fprintf(stderr, + "warning: failed to do hugetlb mmap (%p, %zu): %d\n", + req, size, errno); + hugetlb_disabled = 1; + flags &= ~MAP_HUGETLB; + ret = mmap(req, size, prot, flags, -1, 0); + } if (ret == MAP_FAILED) return NULL; - if (ret != req) - printf("warning: mmaped to %p, requested %p\n", ret, req); + + if (req != NULL && ret != req) + fprintf(stderr, + "warning: mmaped to %p, requested %p\n", ret, req); return ret; } @@ -155,7 +181,18 @@ void *plat_mremap(void *ptr, size_t oldsize, size_t newsize) void plat_munmap(void *ptr, size_t size) { - munmap(ptr, size); + int ret; + + ret = munmap(ptr, size); + if (ret != 0 && (size & (HUGETLB_PAGESIZE - 1))) { + // prehaps an autorounded hugetlb mapping? + size = (size + HUGETLB_PAGESIZE - 1) & ~(HUGETLB_PAGESIZE - 1); + ret = munmap(ptr, size); + } + if (ret != 0) { + fprintf(stderr, + "munmap(%p, %zu) failed: %d\n", ptr, size, errno); + } } /* lprintf */ diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index 4dbb9a7d..c2e2ab4c 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -21,6 +21,7 @@ #include "linux/fbdev.h" #include "common/fonts.h" #include "common/input.h" +#include "common/plat.h" #include "menu.h" #include "main.h" #include "plat.h" @@ -484,12 +485,24 @@ static void pl_get_layer_pos(int *x, int *y, int *w, int *h) *h = g_layer_h; } +static void *pl_mmap(unsigned int size) +{ + return plat_mmap(0, size, 0); +} + +static void pl_munmap(void *ptr, unsigned int size) +{ + plat_munmap(ptr, size); +} + struct rearmed_cbs pl_rearmed_cbs = { pl_get_layer_pos, pl_vout_open, pl_vout_set_mode, pl_vout_flip, pl_vout_close, + pl_mmap, + pl_munmap, }; /* watchdog */ diff --git a/frontend/plugin_lib.h b/frontend/plugin_lib.h index 7687bf84..1701d06c 100644 --- a/frontend/plugin_lib.h +++ b/frontend/plugin_lib.h @@ -44,6 +44,8 @@ struct rearmed_cbs { void *(*pl_vout_set_mode)(int w, int h, int bpp); void *(*pl_vout_flip)(void); void (*pl_vout_close)(void); + void *(*mmap)(unsigned int size); + void (*munmap)(void *ptr, unsigned int size); // these are only used by some frontends void (*pl_vout_raw_flip)(int x, int y); void (*pl_vout_set_raw_vram)(void *vram); diff --git a/plugins/dfxvideo/gpulib_if.c b/plugins/dfxvideo/gpulib_if.c index 6f425bdd..50130f6f 100644 --- a/plugins/dfxvideo/gpulib_if.c +++ b/plugins/dfxvideo/gpulib_if.c @@ -265,9 +265,9 @@ long lLowerpart; ///////////////////////////////////////////////////////////////////////////// -int renderer_init(void) +static void set_vram(void *vram) { - psxVub=(void *)gpu.vram; + psxVub=vram; psxVsb=(signed char *)psxVub; // different ways of accessing PSX VRAM psxVsw=(signed short *)psxVub; @@ -276,6 +276,11 @@ int renderer_init(void) psxVul=(uint32_t *)psxVub; psxVuw_eom=psxVuw+1024*512; // pre-calc of end of vram +} + +int renderer_init(void) +{ + set_vram(gpu.vram); PSXDisplay.RGB24 = FALSE; // init some stuff PSXDisplay.Interlaced = FALSE; @@ -416,4 +421,5 @@ void renderer_set_config(const struct rearmed_cbs *cbs) { iUseDither = cbs->gpu_peops.iUseDither; dwActFixes = cbs->gpu_peops.dwActFixes; + set_vram(gpu.vram); } diff --git a/plugins/gpu-gles/gpulib_if.c b/plugins/gpu-gles/gpulib_if.c index 09dc2009..d245c44b 100644 --- a/plugins/gpu-gles/gpulib_if.c +++ b/plugins/gpu-gles/gpulib_if.c @@ -479,10 +479,15 @@ switch((gdata>>24)&0xff) static int is_opened; -int renderer_init(void) +static void set_vram(void *vram) { - psxVub=(void *)gpu.vram; + psxVub=vram; psxVuw=(unsigned short *)psxVub; +} + +int renderer_init(void) +{ + set_vram(gpu.vram); PSXDisplay.RGB24 = FALSE; // init some stuff PSXDisplay.Interlaced = FALSE; @@ -710,6 +715,8 @@ void renderer_set_config(const struct rearmed_cbs *cbs_) bUseFastMdec = cbs->gpu_peopsgl.bUseFastMdec; iTexGarbageCollection = cbs->gpu_peopsgl.iTexGarbageCollection; iVRamSize = cbs->gpu_peopsgl.iVRamSize; + + set_vram(gpu.vram); } void SetAspectRatio(void) diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c index ca76fe24..1b4dcc55 100644 --- a/plugins/gpu_neon/psx_gpu_if.c +++ b/plugins/gpu_neon/psx_gpu_if.c @@ -15,6 +15,7 @@ extern const unsigned char cmd_lengths[256]; #define command_lengths cmd_lengths static unsigned int *ex_regs; +static int initialized; #define PCSX #define SET_Ex(r, v) \ @@ -42,33 +43,38 @@ int do_cmd_list(uint32_t *list, int count, int *last_cmd) #define ENHANCEMENT_BUF_SIZE (1024 * 1024 * 2 * 4 + 4096) -int renderer_init(void) +static void map_enhancement_buffer(void) { - initialize_psx_gpu(&egpu, gpu.vram); - ex_regs = gpu.ex_regs; + // currently we use 4x 1024*1024 buffers instead of single 2048*1024 + // to be able to reuse 1024-width code better (triangle setup, + // dithering phase, lines). + gpu.enhancement_bufer = gpu.mmap(ENHANCEMENT_BUF_SIZE); + if (gpu.enhancement_bufer == NULL) + fprintf(stderr, "failed to map enhancement buffer\n"); + egpu.enhancement_buf_ptr = gpu.enhancement_bufer; +} - if (gpu.enhancement_bufer == NULL) { - // currently we use 4x 1024*1024 buffers instead of single 2048*1024 - // to be able to reuse 1024-width code better (triangle setup, - // dithering phase, lines). - gpu.enhancement_bufer = mmap(NULL, ENHANCEMENT_BUF_SIZE, - PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (gpu.enhancement_bufer == MAP_FAILED) { - printf("OOM for enhancement buffer\n"); - gpu.enhancement_bufer = NULL; - } +int renderer_init(void) +{ + if (gpu.vram != NULL) { + initialize_psx_gpu(&egpu, gpu.vram); + initialized = 1; } - egpu.enhancement_buf_ptr = gpu.enhancement_bufer; + if (gpu.mmap != NULL && gpu.enhancement_bufer == NULL) + map_enhancement_buffer(); + + ex_regs = gpu.ex_regs; return 0; } void renderer_finish(void) { if (gpu.enhancement_bufer != NULL) - munmap(gpu.enhancement_bufer, ENHANCEMENT_BUF_SIZE); + gpu.munmap(gpu.enhancement_bufer, ENHANCEMENT_BUF_SIZE); gpu.enhancement_bufer = NULL; egpu.enhancement_buf_ptr = NULL; + initialized = 0; } static __attribute__((noinline)) void @@ -148,4 +154,12 @@ void renderer_set_config(const struct rearmed_cbs *cbs) sync_enhancement_buffers(0, 0, 1024, 512); } enhancement_was_on = cbs->gpu_neon.enhancement_enable; + + if (!initialized) { + initialize_psx_gpu(&egpu, gpu.vram); + initialized = 1; + } + + if (gpu.enhancement_bufer == NULL) + map_enhancement_buffer(); } diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index 2f741ad2..de167214 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -533,6 +533,8 @@ void renderer_set_config(const struct rearmed_cbs *cbs) enableAbbeyHack = cbs->gpu_unai.abe_hack; light = !cbs->gpu_unai.no_light; blend = !cbs->gpu_unai.no_blend; + + GPU_FrameBuffer = (u16 *)gpu.vram; } #endif diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index e133f07e..b61bff60 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -24,7 +24,7 @@ //#define log_anomaly gpu_log #define log_anomaly(...) -struct psx_gpu gpu __attribute__((aligned(2048))); +struct psx_gpu gpu; static noinline int do_cmd_buffer(uint32_t *data, int count); static void finish_vram_transfer(int is_read); @@ -133,6 +133,22 @@ static noinline void get_gpu_info(uint32_t data) } } +// double, for overdraw guard +#define VRAM_SIZE (1024 * 512 * 2 * 2) + +static int map_vram(void) +{ + gpu.vram = gpu.mmap(VRAM_SIZE); + if (gpu.vram != NULL) { + gpu.vram += 4096 / 2; + return 0; + } + else { + fprintf(stderr, "could not map vram, expect crashes\n"); + return -1; + } +} + long GPUinit(void) { int ret; @@ -145,13 +161,26 @@ long GPUinit(void) gpu.cmd_len = 0; do_reset(); + if (gpu.mmap != NULL) { + if (map_vram() != 0) + ret = -1; + } return ret; } long GPUshutdown(void) { + long ret; + renderer_finish(); - return vout_finish(); + ret = vout_finish(); + if (gpu.vram != NULL) { + gpu.vram -= 4096 / 2; + gpu.munmap(gpu.vram, VRAM_SIZE); + } + gpu.vram = NULL; + + return ret; } void GPUwriteStatus(uint32_t data) @@ -584,13 +613,13 @@ long GPUfreeze(uint32_t type, struct GPUFreeze *freeze) case 1: // save if (gpu.cmd_len > 0) flush_cmd_buffer(); - memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram)); + memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2); memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs)); memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs)); freeze->ulStatus = gpu.status.reg; break; case 0: // load - memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram)); + memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2); memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs)); memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs)); gpu.status.reg = freeze->ulStatus; @@ -673,6 +702,13 @@ void GPUrearmedCallbacks(const struct rearmed_cbs *cbs) gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace; gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable; + gpu.mmap = cbs->mmap; + gpu.munmap = cbs->munmap; + + // delayed vram mmap + if (gpu.vram == NULL) + map_vram(); + if (cbs->pl_vout_set_raw_vram) cbs->pl_vout_set_raw_vram(gpu.vram); renderer_set_config(cbs); diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index 5ad2a461..78a89907 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -17,10 +17,9 @@ extern "C" { #define CMD_BUFFER_LEN 1024 struct psx_gpu { - uint16_t vram[1024 * 512]; - uint16_t guard[1024 * 512]; // overdraw guard uint32_t cmd_buffer[CMD_BUFFER_LEN]; uint32_t regs[16]; + uint16_t *vram; union { uint32_t reg; struct { @@ -90,6 +89,8 @@ struct psx_gpu { uint32_t pending_fill[3]; } frameskip; uint16_t *enhancement_bufer; + void *(*mmap)(unsigned int size); + void (*munmap)(void *ptr, unsigned int size); }; extern struct psx_gpu gpu; -- 2.39.5