From d0b9b0df7ed0dc36aabe30e2d584c27c8c32a3d9 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 9 Jan 2016 02:56:28 +0200 Subject: [PATCH] get rid of pthreads --- loader/Makefile | 6 +- loader/emu.c | 120 ++++++++++++----------- loader/header.h | 6 +- loader/llibc.c | 208 ++++++++++++++++++++++++++++++++++++++++ loader/llibc.h | 7 ++ loader/patches.c | 2 +- loader/sys_cacheflush.S | 26 ----- loader/sys_cacheflush.h | 1 - loader/syscalls.S | 88 +++++++++++++++++ loader/syscalls.h | 23 +++++ 10 files changed, 399 insertions(+), 88 deletions(-) create mode 100644 loader/llibc.c create mode 100644 loader/llibc.h delete mode 100644 loader/sys_cacheflush.S delete mode 100644 loader/sys_cacheflush.h create mode 100644 loader/syscalls.S create mode 100644 loader/syscalls.h diff --git a/loader/Makefile b/loader/Makefile index 90b6839..eff5e2f 100644 --- a/loader/Makefile +++ b/loader/Makefile @@ -3,7 +3,7 @@ CROSS_COMPILE ?= arm-linux- CC = $(CROSS_COMPILE)gcc AS = $(CROSS_COMPILE)as CFLAGS += -Wall -ggdb -DLOADER -LDFLAGS += -ggdb -lpthread -lrt +LDFLAGS += -ggdb ifndef DEBUG CFLAGS += -O2 -fno-strict-aliasing LDFLAGS += -O2 @@ -20,7 +20,7 @@ ASFLAGS += --32 endif ifeq "$(ARCH)" "arm" ASFLAGS += -mfloat-abi=soft -OBJ += sys_cacheflush.o emu_arm.o +OBJ += syscalls.o emu_arm.o endif ifdef PND CFLAGS += -DPND @@ -38,7 +38,7 @@ vpath %.s = ../common/ TARGET_S = ginge_sloader$(TAG) TARGET_D = ginge_dyn$(TAG) -OBJ += emu.o host.o host_fb.o cmn.o +OBJ += emu.o host.o host_fb.o cmn.o llibc.o OBJ_S += $(OBJ) loader.o loader_$(ARCH).o patches.o OBJ_D += $(OBJ) dl.o diff --git a/loader/emu.c b/loader/emu.c index f14b0fc..64c2c99 100644 --- a/loader/emu.c +++ b/loader/emu.c @@ -21,19 +21,22 @@ #include #include #include -#include #include #include +#include #include #include +#include #include #include +#include #include "header.h" #include "../common/host_fb.h" #include "../common/cmn.h" -#include "sys_cacheflush.h" +#include "syscalls.h" #include "realfuncs.h" +#include "llibc.h" #if (DBG & 2) && !(DBG & 4) #define LOG_IO_UNK @@ -56,7 +59,7 @@ #endif #ifdef LOG_SEGV -#define segvlog printf +#define segvlog g_printf #else #define segvlog(...) #endif @@ -70,8 +73,10 @@ typedef unsigned int u32; typedef unsigned short u16; typedef unsigned char u8; -static pthread_mutex_t fb_mutex = PTHREAD_MUTEX_INITIALIZER; -static pthread_cond_t fb_cond = PTHREAD_COND_INITIALIZER; +#define THREAD_STACK_SIZE 0x200000 + +static int fb_sync_thread_paused; +static int fb_sync_thread_futex; static struct { u32 dstctrl; @@ -147,7 +152,7 @@ static void log_io(const char *pfx, u32 a, u32 d, int size) if ((a & ~0xffff) == 0x7f000000) reg = regnames[a & 0xffff]; - printf(fmt, pfx, a, d, reg); + g_printf(fmt, pfx, a, d, reg); } #endif @@ -188,9 +193,9 @@ static void blt_tr(void *dst, void *src, u32 trc, int w) u32 *r = &blitter.dstctrl; \ int i; \ for (i = 0; i < 4*4; i++, r++) { \ - printf("%08x ", *r); \ + g_printf("%08x ", *r); \ if ((i & 3) == 3) \ - printf("\n"); \ + g_printf("\n"); \ } \ } @@ -281,8 +286,10 @@ static void blitter_do(void) } } - if (to_screen) - pthread_cond_signal(&fb_cond); + if (to_screen) { + fb_sync_thread_futex = 1; + g_futex_raw(&fb_sync_thread_futex, FUTEX_WAKE, 1, NULL); + } return; bad_blit: @@ -329,65 +336,55 @@ static void mlc_flip(void *src, int bpp, int stride) } } -#define ts_add_nsec(ts, ns) { \ - ts.tv_nsec += ns; \ - if (ts.tv_nsec >= 1000000000) { \ - ts.tv_sec++; \ - ts.tv_nsec -= 1000000000; \ - } \ -} - -static int fb_sync_thread_paused; - static void *fb_sync_thread(void *arg) { + unsigned long sigmask[2] = { ~0ul, ~0ul }; + struct timespec ts = { 0, 0 }; int invalid_fb_addr = 1; int manual_refresh = 0; int frame_counter = 0; - struct timespec ts; - int ret, wait_ret; + int wait_ret; + + // this thread can't run any signal handlers since the + // app's stack/tls stuff will never be set up here + sigmask[0] &= ~(1ul << (SIGSEGV - 1)); + g_rt_sigprocmask_raw(SIG_SETMASK, sigmask, NULL, sizeof(sigmask)); - //ret = pthread_setschedprio(pthread_self(), -1); - //log("pthread_setschedprio %d\n", ret); //ret = setpriority(PRIO_PROCESS, 0, -1); //log("setpriority %d\n", ret); - ret = clock_gettime(CLOCK_REALTIME, &ts); - if (ret != 0) { - perror(PFX "clock_gettime"); - exit(1); - } + // tell the main thread we're done init + fb_sync_thread_futex = 0; + g_futex_raw(&fb_sync_thread_futex, FUTEX_WAKE, 1, NULL); while (1) { u8 *gp2x_fb, *gp2x_fb_end; - ret = pthread_mutex_lock(&fb_mutex); - wait_ret = pthread_cond_timedwait(&fb_cond, &fb_mutex, &ts); - ret |= pthread_mutex_unlock(&fb_mutex); + wait_ret = g_futex_raw(&fb_sync_thread_futex, FUTEX_WAIT, 0, &ts); - if (ret != 0) { - err("fb_thread: mutex error: %d\n", ret); - sleep(1); - goto check_keys; - } - if (wait_ret != 0 && wait_ret != ETIMEDOUT) { - err("fb_thread: cond error: %d\n", wait_ret); - sleep(1); + // this is supposed to be done atomically, but to make life + // easier ignore it for now, race impact is low anyway + fb_sync_thread_futex = 0; + + if (wait_ret != 0 && wait_ret != -EWOULDBLOCK + && wait_ret != -ETIMEDOUT) + { + err("fb_thread: futex error: %d\n", wait_ret); + g_sleep(1); goto check_keys; } if (fb_sync_thread_paused) { - ts_add_nsec(ts, 100000000); + ts.tv_nsec = 100000000; goto check_keys; } - if (wait_ret != ETIMEDOUT) { - clock_gettime(CLOCK_REALTIME, &ts); - ts_add_nsec(ts, 50000000); + if (wait_ret == 0) { + ts.tv_nsec = 50000000; manual_refresh++; if (manual_refresh == 2) dbg("fb_thread: switch to manual refresh\n"); } else { - ts_add_nsec(ts, 16666667); + ts.tv_nsec = 16666667; if (manual_refresh > 1) dbg("fb_thread: switch to auto refresh\n"); manual_refresh = 0; @@ -574,7 +571,7 @@ static u32 xread32(u32 a) switch (a_) { case 0x0a00: // TCOUNT, 1/7372800s - clock_gettime(CLOCK_REALTIME, &ts); + g_clock_gettime_raw(CLOCK_REALTIME, &ts); t64 = (u64)ts.tv_sec * 1000000000 + ts.tv_nsec; // t * 7372800.0 / 1000000000 * 0x100000000 ~= t * 31665935 t64 *= 31665935; @@ -639,9 +636,11 @@ static void xwrite16(u32 a, u32 d) return; case 0x2914: mmsp2.mlc_stl_adrh = d; - if (mmsp2.mlc_stl_adr != mmsp2.old_mlc_stl_adr) + if (mmsp2.mlc_stl_adr != mmsp2.old_mlc_stl_adr) { // ask for refresh - pthread_cond_signal(&fb_cond); + fb_sync_thread_futex = 1; + g_futex_raw(&fb_sync_thread_futex, FUTEX_WAKE, 1, NULL); + } mmsp2.old_mlc_stl_adr = mmsp2.mlc_stl_adr; return; case 0x2958: @@ -682,9 +681,11 @@ static void xwrite32(u32 a, u32 d) case 0x4038: // MLCADDRESS0 case 0x406c: // MLCADDRESS1 pollux.mlc_stl_adr = d; - if (d != mmsp2.old_mlc_stl_adr) + if (d != mmsp2.old_mlc_stl_adr) { // ask for refresh - pthread_cond_signal(&fb_cond); + fb_sync_thread_futex = 1; + g_futex_raw(&fb_sync_thread_futex, FUTEX_WAKE, 1, NULL); + } mmsp2.old_mlc_stl_adr = d; return; case 0x403c: // MLCPALETTE0 @@ -971,7 +972,6 @@ void emu_init(void *map_bottom) .sa_sigaction = segv_sigaction, .sa_flags = SA_SIGINFO, }; - pthread_t tid; void *pret; int ret; @@ -1021,12 +1021,22 @@ void emu_init(void *map_bottom) exit(1); } - ret = pthread_create(&tid, NULL, fb_sync_thread, NULL); - if (ret != 0) { - err("failed to create fb_sync_thread: %d\n", ret); + pret = mmap(NULL, THREAD_STACK_SIZE, PROT_READ|PROT_WRITE|PROT_EXEC, + MAP_PRIVATE|MAP_ANONYMOUS|MAP_GROWSDOWN, -1, 0); + if (mmsp2.umem == MAP_FAILED) { + perror(PFX "mmap thread stack"); + exit(1); + } + fb_sync_thread_futex = 1; + ret = g_clone(CLONE_VM | CLONE_FS | CLONE_FILES + | CLONE_SIGHAND | CLONE_THREAD, + (char *)pret + THREAD_STACK_SIZE, 0, 0, 0, + fb_sync_thread); + if (ret == 0 || ret == -1) { + perror(PFX "start fb thread"); exit(1); } - pthread_detach(tid); + g_futex_raw(&fb_sync_thread_futex, FUTEX_WAIT, 1, NULL); // defaults mmsp2.mlc_stl_adr = 0x03101000; // fb2 is at 0x03381000 diff --git a/loader/header.h b/loader/header.h index f24457f..4bc6e1b 100644 --- a/loader/header.h +++ b/loader/header.h @@ -1,9 +1,11 @@ #ifndef INCLUDE_sQt5fY5eUJn5tKV0IBTDxK0zqQutTqTp #define INCLUDE_sQt5fY5eUJn5tKV0IBTDxK0zqQutTqTp 1 +#include "llibc.h" + #define PFX "ginge: " -#define err(f, ...) fprintf(stderr, PFX f, ##__VA_ARGS__) -#define log(f, ...) fprintf(stdout, PFX f, ##__VA_ARGS__) +#define err(f, ...) g_fprintf(2, PFX f, ##__VA_ARGS__) +#define log(f, ...) g_fprintf(1, PFX f, ##__VA_ARGS__) #ifdef DBG #define dbg log #define dbg_c printf diff --git a/loader/llibc.c b/loader/llibc.c new file mode 100644 index 0000000..484b52d --- /dev/null +++ b/loader/llibc.c @@ -0,0 +1,208 @@ +/* + * GINGE - GINGE Is Not Gp2x Emulator + * (C) notaz, 2016 + * + * This work is licensed under the MAME license, see COPYING file for details. + */ +#include +#include +#include + +#include "syscalls.h" +#include "llibc.h" + +// lame, broken and slow, but enough for ginge's needs +static void format_number(char **dst_, int dst_len, unsigned int n, + char fmt, int justify, int zeropad) +{ + char buf[32], *p = buf, *dst; + int printing = 0; + unsigned int div; + unsigned int t; + unsigned int w; + int spaces; + int neg = 0; + int left; + + w = justify < 0 ? -justify : justify; + if (w >= 32) + w = 31; + + switch (fmt) { + case 'i': + case 'd': + if ((signed int)n < 0) { + n = -n; + neg = 1; + } + case 'u': + div = 1000000000; + left = 10; + while (w > left) { + *p++ = ' '; + w--; + continue; + } + while (left > 0) { + t = n / div; + n -= t * div; + div /= 10; + if (t || left == 1) { + if (neg && t && !printing) { + *p++ = '-'; + if (w > 0) w--; + } + printing = 1; + } + if (printing) + *p++ = t + '0'; + else if (w >= left) { + *p++ = ' '; + w--; + } + left--; + } + break; + + case 'p': + w = 8; + zeropad = 1; + case 'x': + left = 8; + while (w > left) { + *p++ = zeropad ? '0' : ' '; + w--; + continue; + } + while (left > 0) { + t = n >> (left * 4 - 4); + t &= 0x0f; + if (t || left == 1) + printing = 1; + if (printing) + *p++ = t < 10 ? t + '0' : t + 'a' - 10; + else if (w >= left) { + *p++ = zeropad ? '0' : ' '; + w--; + } + left--; + } + break; + + default: + memcpy(buf, "", 9); + break; + } + *p = 0; + + spaces = 0; + p = buf; + if (justify < 0) { + while (*p == ' ') { + spaces++; + p++; + } + } + + dst = *dst_; + while (*p != 0 && dst_len > 1) { + *dst++ = *p++; + dst_len--; + } + while (spaces > 0 && dst_len > 1) { + *dst++ = ' '; + spaces--; + dst_len--; + } + *dst = 0; + *dst_ = dst; +} + +int parse_dec(const char **p_) +{ + const char *p = *p_; + int neg = 0; + int r = 0; + + if (*p == '-') { + neg = 1; + p++; + } + + while ('0' <= *p && *p <= '9') { + r = r * 10 + *p - '0'; + p++; + } + + *p_ = p; + return neg ? -r : r; +} + +void g_fprintf(int fd, const char *fmt, ...) +{ + char buf[256], *d = buf; + const char *s = fmt; + int left = sizeof(buf);; + int justify; + int zeropad; + va_list ap; + + va_start(ap, fmt); + while (*s != 0 && left > 1) { + if (*s != '%') { + *d++ = *s++; + left--; + continue; + } + s++; + if (*s == 0) + break; + if (*s == '%') { + *d++ = *s++; + left--; + continue; + } + + zeropad = *s == '0'; + justify = parse_dec(&s); + if (*s == 'l') + s++; // ignore for now + if (*s == 's') { + char *ns = va_arg(ap, char *); + int len = strlen(ns); + while (justify > len && left > 1) { + *d++ = ' '; + justify--; + left--; + } + if (len > left - 1) { + memcpy(d, ns, left - 1); + break; + } + memcpy(d, ns, len); + d += len; + left -= len; + while (justify < -len && left > 1) { + *d++ = ' '; + justify++; + left--; + } + s++; + continue; + } + + format_number(&d, left, va_arg(ap, int), *s++, justify, zeropad); + } + *d = 0; + va_end(ap); + + g_write_raw(fd, buf, d - buf); +} + +void g_sleep(unsigned int seconds) +{ + struct timespec ts = { seconds, 0 }; + g_nanosleep_raw(&ts, NULL); +} + +// vim:shiftwidth=2:expandtab diff --git a/loader/llibc.h b/loader/llibc.h new file mode 100644 index 0000000..5381972 --- /dev/null +++ b/loader/llibc.h @@ -0,0 +1,7 @@ +void g_fprintf(int fd, const char *fmt, ...) + __attribute__((format(printf, 2, 3))); + +#define g_printf(fmt, ...) \ + g_fprintf(1, fmt, ##__VA_ARGS__) + +void g_sleep(unsigned int seconds); diff --git a/loader/patches.c b/loader/patches.c index c8ff2fd..a6e3192 100644 --- a/loader/patches.c +++ b/loader/patches.c @@ -7,7 +7,7 @@ #include #include "header.h" -#include "sys_cacheflush.h" +#include "syscalls.h" #include "override.c" diff --git a/loader/sys_cacheflush.S b/loader/sys_cacheflush.S deleted file mode 100644 index d6ecebe..0000000 --- a/loader/sys_cacheflush.S +++ /dev/null @@ -1,26 +0,0 @@ -@ vim:filetype=armasm -#include - - -.global sys_cacheflush @ const void *start_addr, const void *end_addr - -sys_cacheflush: - mov r2, #0 -#ifdef __ARM_EABI__ - /* EABI version */ - str r7, [sp, #-4]! - mov r7, #(__ARM_NR_cacheflush & 0xff) -#if (__ARM_NR_cacheflush & 0x00ff00) - orr r7, r7, #(__ARM_NR_cacheflush & 0x00ff00) -#endif -#if (__ARM_NR_cacheflush & 0xff0000) - orr r7, r7, #(__ARM_NR_cacheflush & 0xff0000) -#endif - swi 0 - ldr r7, [sp], #4 -#else - /* OABI */ - swi __ARM_NR_cacheflush -#endif - bx lr - diff --git a/loader/sys_cacheflush.h b/loader/sys_cacheflush.h deleted file mode 100644 index a35c00e..0000000 --- a/loader/sys_cacheflush.h +++ /dev/null @@ -1 +0,0 @@ -void sys_cacheflush(const void *start_addr, const void *end_addr); diff --git a/loader/syscalls.S b/loader/syscalls.S new file mode 100644 index 0000000..e72e7d2 --- /dev/null +++ b/loader/syscalls.S @@ -0,0 +1,88 @@ +@ vim:filetype=armasm +#include + + +.global sys_cacheflush @ const void *start_addr, const void *end_addr +sys_cacheflush: + mov r2, #0 +#ifdef __ARM_EABI__ + /* EABI version */ + str r7, [sp, #-4]! + mov r7, #(__ARM_NR_cacheflush & 0xff) +#if (__ARM_NR_cacheflush & 0x00ff00) + orr r7, r7, #(__ARM_NR_cacheflush & 0x00ff00) +#endif +#if (__ARM_NR_cacheflush & 0xff0000) + orr r7, r7, #(__ARM_NR_cacheflush & 0xff0000) +#endif + swi 0 + ldr r7, [sp], #4 +#else + /* OABI */ + swi __ARM_NR_cacheflush +#endif + bx lr + + +#ifdef __ARM_EABI__ +#error hm? +#endif + +.global g_syscall +g_syscall: + mov r12, sp + stmfd sp!, {r4, r5, r6} + ldmia r12, {r4, r5, r6} + swi __NR_syscall + ldmfd sp!, {r4, r5, r6} + +.global g_syscall_error +g_syscall_error: + cmn r0, #4096 + bxcc lr + stmfd sp!, {r4, lr} + rsb r4, r0, #0 + bl __errno_location + str r4, [r0] + mov r0, #-1 + ldmfd sp!, {r4, pc} + +.global g_clone +g_clone: + ldr r12,[sp, #4] @ arg6 - convenience func ptr + str r4, [sp, #-4]! + ldr r4, [sp, #4] @ arg5 + swi __NR_clone + tst r0, r0 + bxeq r12 @ child + cmn r0, #4096 + ldr r4, [sp], #4 + bcs g_syscall_error + bx lr + +@ raw - no errno +.macro raw_syscall_easy name nr +.global \name +\name: + swi \nr + bx lr +.endm + +.macro raw_syscall name nr +.global \name +\name: + mov r12, sp + stmfd sp!, {r4, r5, r6, lr} + ldmia r12, {r4, r5, r6} + swi \nr + stmfd sp!, {r4, r5, r6, pc} +.endm + +raw_syscall_easy g_open_raw, __NR_open +raw_syscall_easy g_read_raw, __NR_read +raw_syscall_easy g_write_raw, __NR_write +raw_syscall_easy g_futex_raw, __NR_futex +raw_syscall_easy g_nanosleep_raw, __NR_nanosleep +raw_syscall_easy g_clock_gettime_raw, __NR_clock_gettime +raw_syscall_easy g_rt_sigprocmask_raw, __NR_rt_sigprocmask + diff --git a/loader/syscalls.h b/loader/syscalls.h new file mode 100644 index 0000000..e82e0fa --- /dev/null +++ b/loader/syscalls.h @@ -0,0 +1,23 @@ +#include + +struct timespec; + +void sys_cacheflush(const void *start_addr, const void *end_addr); + +long g_syscall(long number, ...); + +// arg6 is func ptr, for convenience +long g_clone(unsigned long flags, void *child_stack, ...); + +int g_syscall_error(long kret); + +// raw - no errno handling +long g_open_raw(const char *pathname, int flags, ...); +long g_read_raw(int fd, void *buf, size_t count); +long g_write_raw(int fd, const void *buf, size_t count); +long g_futex_raw(int *uaddr, int op, int val, + const struct timespec *timeout); +long g_nanosleep_raw(const struct timespec *req, struct timespec *rem); +long g_clock_gettime_raw(int clk_id, const struct timespec *tp); +long g_rt_sigprocmask_raw(int how, const void *set, void *oldset, + size_t sigsetsize); -- 2.39.2