pico/cd/pcm.o: CFLAGS += -fno-strict-aliasing
pico/cd/LC89510.o: CFLAGS += -fno-strict-aliasing
pico/cd/gfx_cd.o: CFLAGS += -fno-strict-aliasing
+ifeq (1,$(use_sh2drc))
+ifneq (,$(findstring -flto,$(CFLAGS)))
+# if using the DRC, memory and sh2soc use a global register variable to avoid
+# saving and reloading the SH2 SR. However, this collides with the use of LTO.
+pico/32x/memory.o: CFLAGS += -fno-lto
+pico/32x/sh2soc.o: CFLAGS += -fno-lto
+endif
+endif
# fame needs ~2GB of RAM to compile on gcc 4.8
# on x86, this is reduced by ~300MB when debug info is off (but not on ARM)
STATIC_LINKING:= 0
TARGET_NAME := picodrive
LIBM := -lm
-GIT_VERSION ?= " $(shell git rev-parse --short HEAD || echo unknown)"
+GIT_VERSION ?= $(shell git rev-parse --short HEAD || echo unknown)
ifneq ($(GIT_VERSION)," unknown")
CFLAGS += -DGIT_VERSION=\"$(GIT_VERSION)\"
endif
use_fame = 1
use_drz80 = 0
use_cz80 = 1
+ use_sh2drc = 1
# Windows
else
#define host_arg2reg(rd, arg) \
rd = arg
+#define emith_rw_offs_max() 0xff
+
/* SH2 drc specific */
/* pushes r12 for eabi alignment */
#define emith_sh2_drc_entry() \
#define emith_flush() /**/
#define host_instructions_updated(base, end) __builtin___clear_cache(base, end)
#define emith_jump_patch_size() 8
+#define emith_rw_offs_max() 0xff
// SH2 drc specific
// NB: for adcf and sbcf, carry-in must be dealt with separately (see there)
static void emith_set_arith_flags(int rd, int rt, int rs, s32 imm, int sub)
{
- if (sub && rd == FNZ && rt && rs) // is this cmp_r_r?
+ if (sub && rd == FNZ && rt > AT && rs > AT) // is this cmp_r_r?
emith_flg_rs = rs, emith_flg_rt = rt;
else emith_flg_rs = emith_flg_rt = 0;
// NB: mips32r2 has EXT and INS
#define emith_clear_msb(d, s, count) /* bits to clear */ do { \
u32 t; \
- if ((count) > 16) { \
+ if ((count) >= 16) { \
t = (count) - 16; \
t = 0xffff >> t; \
emith_and_r_r_imm(d, s, t); \
// NB: mips32r2 has SYNCI
#define host_instructions_updated(base, end) __builtin___clear_cache(base, end)
#define emith_jump_patch_size() 4
+#define emith_rw_offs_max() 0x7fff
// SH2 drc specific
#define emith_sh2_drc_entry() do { \
#define host_instructions_updated(base, end)
+#define emith_rw_offs_max() 0xffffffff
+
#ifdef __x86_64__
#define HOST_REGS 16
static int rcache_get_tmp(void);
static void rcache_free_tmp(int hr);
-// Note: cache_regs[] must have at least the amount of REG and TEMP registers
-// used by handlers in worst case (currently 4).
+// Note: cache_regs[] must have at least the amount of HRF_REG registers used
+// by handlers in worst case (currently 4).
// Register assignment goes by ABI convention. Caller save registers are TEMP,
// the others are either static or REG. SR must be static, R0 very recommended.
// VBR, PC, PR must not be static (read from context in utils).
// NB may return either REG or TEMP
static int emit_get_rbase_and_offs(SH2 *sh2, sh2_reg_e r, int rmode, u32 *offs)
{
- uptr omask = 0xff; // offset mask, XXX: ARM oriented..
+ uptr omask = emith_rw_offs_max(); // offset mask
u32 mask = 0;
u32 a;
int poffs;
static void sh2_generate_utils(void)
{
- int arg0, arg1, arg2, arg3, sr, tmp;
+ int arg0, arg1, arg2, arg3, sr, tmp, tmp2;
host_arg2reg(arg0, 0);
host_arg2reg(arg1, 1);
emith_sub_r_imm(tmp, 4*2);
rcache_clean();
// push SR
- tmp = rcache_get_reg_arg(0, SHR_SP, NULL);
- emith_add_r_imm(tmp, 4);
+ tmp = rcache_get_reg_arg(0, SHR_SP,&tmp2);
+ emith_add_r_r_imm(tmp, tmp2, 4);
tmp = rcache_get_reg_arg(1, SHR_SR, NULL);
emith_clear_msb(tmp, tmp, 22);
emith_move_r_r_ptr(arg2, CONTEXT_REG);
- rcache_invalidate();
+ rcache_invalidate_tmp();
emith_call(p32x_sh2_write32); // XXX: use sh2_drc_write32?
// push PC
rcache_get_reg_arg(0, SHR_SP, NULL);
emith_ctx_read(arg1, SHR_PC * 4);
emith_move_r_r_ptr(arg2, CONTEXT_REG);
- rcache_invalidate();
+ rcache_invalidate_tmp();
emith_call(p32x_sh2_write32);
// update I, cycles, do callback
emith_ctx_read(arg1, offsetof(SH2, pending_level));
// fetch oldest write to address from fifo, but stop when reaching the present
idx = sh2_poll_rd[hix];
while (idx != sh2_poll_wr[hix] && CYCLES_GE(cycles, fifo[idx].cycles)) {
-// int oidx = idx;
p = &fifo[idx];
idx = (idx+1) % PFIFO_SZ;
- if (CYCLES_GT(cycles, p->cycles+80)) {
- // drop older fifo stores that may cause synchronisation problems.
- // NB unfortunately this cycle diff is quite sensitive:
- // observed in Brutal Unleashed: min 80, observed in Afterburner: max 110
- sh2_poll_rd[hix] = idx;
- } else if (p->a == a) {
- // replace current data with fifo value and discard fifo entry
- if (cpu != p->cpu) {
+ if (cpu != p->cpu) {
+ if (CYCLES_GT(cycles, p->cycles+80)) {
+ // drop older fifo stores that may cause synchronisation problems.
+ sh2_poll_rd[hix] = idx;
+ } else if (p->a == a) {
+ // replace current data with fifo value and discard fifo entry
d = p->d;
p->a = -1;
-// if (oidx == sh2_poll_rd[hix])
-// sh2_poll_rd[hix] = idx;
+ break;
}
- break;
}
}
return d;
{
int hix = (a >> 1) % PFIFO_CNT;
struct sh2_poll_fifo *fifo = sh2_poll_fifo[hix];
- struct sh2_poll_fifo *p = &fifo[sh2_poll_wr[hix]];
struct sh2_poll_fifo *q = &fifo[(sh2_poll_wr[hix]-1) % PFIFO_SZ];
int cpu = sh2 ? sh2->is_slave+1 : 0;
// intermediate values that may cause synchronisation problems.
// NB this can take an eternity on m68k: mov.b <addr1.l>,<addr2.l> needs
// 28 m68k-cycles (~80 sh2-cycles) to complete (observed in Metal Head)
- if (q->a == a && !CYCLES_GT(cycles,q->cycles+30)) {
+ if (q->a == a && sh2_poll_wr[hix] != sh2_poll_rd[hix] && !CYCLES_GT(cycles,q->cycles+30)) {
q->d = d;
} else {
// store write to poll address in fifo
+ fifo[sh2_poll_wr[hix]] =
+ (struct sh2_poll_fifo){ .cycles = cycles, .a = a, .d = d, .cpu = cpu };
sh2_poll_wr[hix] = (sh2_poll_wr[hix]+1) % PFIFO_SZ;
if (sh2_poll_wr[hix] == sh2_poll_rd[hix])
// fifo overflow, discard oldest value
sh2_poll_rd[hix] = (sh2_poll_rd[hix]+1) % PFIFO_SZ;
- *p = (struct sh2_poll_fifo){ .cycles = cycles, .a = a, .d = d, .cpu = cpu };
}
}
sh2_drc_mem_setup(&msh2);
sh2_drc_mem_setup(&ssh2);
+ memset(sh2_poll_rd, 0, sizeof(sh2_poll_rd));
+ memset(sh2_poll_wr, 0, sizeof(sh2_poll_wr));
// z80 hack
z80_map_set(z80_write_map, 0x8000, 0xffff, z80_md_bank_write_32x, 1);
# compile with target C compiler and extract value from .rodata section
compile_rodata ()
{
- $CC $CFLAGS -I .. -c /tmp/getoffs.c -o /tmp/getoffs.o || exit 1
+ $CC $CFLAGS -I .. -shared /tmp/getoffs.c -o /tmp/getoffs.o || exit 1
# find the name of the .rodata section (in case -fdata-sections is used)
rosect=$(readelf -S /tmp/getoffs.o | grep '\.rodata' |
sed 's/^[^.]*././;s/ .*//')