From: kub Date: Tue, 30 Jul 2019 18:55:48 +0000 (+0200) Subject: sh2 drc: add mipsel backend for MIPS32 Release 1 (for JZ47xx) X-Git-Tag: v2.00~844 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d80a5fd2ab743382f734346760fadd2dc44955f1;p=picodrive.git sh2 drc: add mipsel backend for MIPS32 Release 1 (for JZ47xx) --- diff --git a/Makefile b/Makefile index 1b2aab41..62accf77 100644 --- a/Makefile +++ b/Makefile @@ -55,6 +55,9 @@ use_cz80 ?= 1 ifneq (,$(findstring 86,$(ARCH))) use_sh2drc ?= 1 endif +ifneq (,$(findstring mips,$(ARCH))) +use_sh2drc ?= 1 +endif endif -include Makefile.local @@ -245,7 +248,7 @@ pico/carthw_cfg.c: pico/carthw.cfg # random deps pico/carthw/svp/compiler.o : cpu/drc/emit_arm.c cpu/sh2/compiler.o : cpu/drc/emit_arm.c -cpu/sh2/compiler.o : cpu/drc/emit_x86.c +cpu/sh2/compiler.o : cpu/drc/emit_x86.c cpu/drc/emit_mips.c cpu/sh2/mame/sh2pico.o : cpu/sh2/mame/sh2.c pico/pico.o pico/cd/mcd.o pico/32x/32x.o : pico/pico_cmn.c pico/pico_int.h pico/memory.o pico/cd/memory.o pico/32x/memory.o : pico/pico_int.h pico/memory.h diff --git a/config.gcw0 b/config.gcw0 new file mode 100644 index 00000000..1d2ccef0 --- /dev/null +++ b/config.gcw0 @@ -0,0 +1,16 @@ +# Automatically generated by configure +# Configured with: './configure' '--platform=generic' +CC = mipsel-gcw0-linux-uclibc-gcc +CXX = mipsel-gcw0-linux-uclibc-g++ +AS = mipsel-gcw0-linux-uclibc-as +STRIP = mipsel-gcw0-linux-uclibc-strip +CFLAGS += -I${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/ +CFLAGS += -I${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/SDL +CFLAGS += -D_GNU_SOURCE=1 -D_REENTRANT -Wno-unused-result -fno-stack-protector +ASFLAGS += +LDFLAGS += +LDLIBS += -B${HOME}/opt/gcw0-toolchain/usr/lib -Wl,-rpath-link=${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/lib -Wl,-rpath-link=${HOME}/opt/gcw0-toolchain/usr/mipsel-gcw0-linux-uclibc/sysroot/lib -lSDL -lasound -lpng -lz -lm -lstdc++ -ldl + +ARCH = mipsel +PLATFORM = opendingux +SOUND_DRIVERS = sdl diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 0eb2d972..72542a3f 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -1098,11 +1098,14 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define emith_jump_cond_patchable(cond, target) \ emith_jump_cond(cond, target) -#define emith_jump_patch(ptr, target) do { \ +#define emith_jump_patch(ptr, target) ({ \ u32 *ptr_ = ptr; \ u32 val_ = (u32 *)(target) - ptr_ - 2; \ *ptr_ = (*ptr_ & 0xff000000) | (val_ & 0x00ffffff); \ -} while (0) + (u8 *)ptr; \ +}) + +#define emith_jump_patch_size() 4 #define emith_jump_at(ptr, target) do { \ u32 val_ = (u32 *)(target) - (u32 *)(ptr) - 2; \ diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c new file mode 100644 index 00000000..f56b89a3 --- /dev/null +++ b/cpu/drc/emit_mips.c @@ -0,0 +1,1464 @@ +/* + * Basic macros to emit MIPS II/MIPS32 Release 1 instructions and some utils + * Copyright (C) 2019 kub + * + * This work is licensed under the terms of MAME license. + * See COPYING file in the top-level directory. + */ +#define HOST_REGS 32 +#define CONTEXT_REG 23 // s7 +#define RET_REG 2 // v0 + +// NB: the ubiquitous JZ74[46]0 uses MIPS32 Release 1, a slight MIPS II superset + +// registers usable for user code: r1-r25, others reserved or special +#define Z0 0 // zero register +#define GP 28 // global pointer +#define SP 29 // stack pointer +#define FP 30 // frame pointer +#define LR 31 // link register +// internally used by code emitter: +#define AT 1 // used to hold intermediate results +#define FNZ 15 // emulated processor flags: N (bit 31) ,Z (all bits) +#define FC 24 // emulated processor flags: C (bit 0), others 0 +#define FV 25 // emulated processor flags: Nt^Ns (bit 31). others ? + + +// unified conditions; virtual, not corresponding to anything real on MIPS +#define DCOND_EQ 0x0 +#define DCOND_NE 0x1 +#define DCOND_HS 0x2 +#define DCOND_LO 0x3 +#define DCOND_MI 0x4 +#define DCOND_PL 0x5 +#define DCOND_VS 0x6 +#define DCOND_VC 0x7 +#define DCOND_HI 0x8 +#define DCOND_LS 0x9 +#define DCOND_GE 0xa +#define DCOND_LT 0xb +#define DCOND_GT 0xc +#define DCOND_LE 0xd + +#define DCOND_CS DCOND_LO +#define DCOND_CC DCOND_HS + +// unified insn +#define MIPS_INSN(op, rs, rt, rd, sa, fn) \ + (((op)<<26)|((rs)<<21)|((rt)<<16)|((rd)<<11)|((sa)<<6)|((fn)<<0)) + +#define _ 0 // marker for "field unused" +#define __(n) o##n // enum marker for "undefined" + +// opcode field (encoded in op) +enum { OP__FN=000, OP__RT, OP_J, OP_JAL, OP_BEQ, OP_BNE, OP_BLEZ, OP_BGTZ }; +enum { OP_ADDI=010, OP_ADDIU, OP_SLTI, OP_SLTIU, OP_ANDI, OP_ORI, OP_XORI, OP_LUI }; +enum { OP_LB=040, OP_LH, OP_LWL, OP_LW, OP_LBU, OP_LHU, OP_LWR }; +enum { OP_SB=050, OP_SH, OP_SWL, OP_SW, __(54), __(55), OP_SWR }; +// function field (encoded in fn if opcode = OP__FN) +enum { FN_SLL=000, __(01), FN_SRL, FN_SRA, FN_SLLV, __(05), FN_SRLV, FN_SRAV }; +enum { FN_MFHI=020, FN_MTHI, FN_MFLO, FN_MTLO }; +enum { FN_MULT=030, FN_MULTU, FN_DIV, FN_DIVU }; +enum { FN_ADD=040, FN_ADDU, FN_SUB, FN_SUBU, FN_AND, FN_OR, FN_XOR, FN_NOR }; +enum { FN_JR=010, FN_JALR, FN_MOVZ, FN_MOVN, FN_SYNC=017, FN_SLT=052, FN_SLTU }; +// rt field (encoded in rt if opcode = OP__RT) +enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; + +#define MIPS_NOP 000 // null operation: SLL r0, r0, #0 + +// arithmetic/logical + +#define MIPS_OP_REG(op, rd, rs, rt) \ + MIPS_INSN(OP__FN, rs, rt, rd, _, op) // R-type, SPECIAL +#define MIPS_OP_IMM(op, rt, rs, imm) \ + MIPS_INSN(op, rs, rt, _, _, (u16)(imm)) // I-type + +// rd = rt OP rs +#define MIPS_ADD_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_ADDU, rd, rs, rt) +#define MIPS_SUB_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_SUBU, rd, rs, rt) + +#define MIPS_NEG_REG(rd, rt) \ + MIPS_SUB_REG(rd, Z0, rt) + +#define MIPS_XOR_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_XOR, rd, rs, rt) +#define MIPS_OR_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_OR, rd, rs, rt) +#define MIPS_AND_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_AND, rd, rs, rt) +#define MIPS_NOR_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_NOR, rd, rs, rt) + +#define MIPS_MOVE_REG(rd, rs) \ + MIPS_OR_REG(rd, rs, Z0) +#define MIPS_MVN_REG(rd, rs) \ + MIPS_NOR_REG(rd, rs, Z0) + +// rd = rt SHIFT rs +#define MIPS_LSL_REG(rd, rt, rs) \ + MIPS_OP_REG(FN_SLLV, rd, rs, rt) +#define MIPS_LSR_REG(rd, rt, rs) \ + MIPS_OP_REG(FN_SRLV, rd, rs, rt) +#define MIPS_ASR_REG(rd, rt, rs) \ + MIPS_OP_REG(FN_SRAV, rd, rs, rt) + +// rd = (rs < rt) +#define MIPS_SLT_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_SLT, rd, rs, rt) +#define MIPS_SLTU_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_SLTU, rd, rs, rt) + +// rt = rs OP imm16 +#define MIPS_ADD_IMM(rt, rs, imm16) \ + MIPS_OP_IMM(OP_ADDIU, rt, rs, imm16) + +#define MIPS_XOR_IMM(rt, rs, imm16) \ + MIPS_OP_IMM(OP_XORI, rt, rs, imm16) +#define MIPS_OR_IMM(rt, rs, imm16) \ + MIPS_OP_IMM(OP_ORI, rt, rs, imm16) +#define MIPS_AND_IMM(rt, rs, imm16) \ + MIPS_OP_IMM(OP_ANDI, rt, rs, imm16) + +// rt = (imm16 << (0|16)) +#define MIPS_MOV_IMM(rt, imm16) \ + MIPS_OP_IMM(OP_ORI, rt, Z0, imm16) +#define MIPS_MOVT_IMM(rt, imm16) \ + MIPS_OP_IMM(OP_LUI, rt, _, imm16) + +// rd = rt SHIFT imm5 +#define MIPS_LSL_IMM(rd, rt, bits) \ + MIPS_INSN(OP__FN, _, rt, rd, bits, FN_SLL) +#define MIPS_LSR_IMM(rd, rt, bits) \ + MIPS_INSN(OP__FN, _, rt, rd, bits, FN_SRL) +#define MIPS_ASR_IMM(rd, rt, bits) \ + MIPS_INSN(OP__FN, _, rt, rd, bits, FN_SRA) + +// rt = (rs < imm16) +#define MIPS_SLT_IMM(rt, rs, imm16) \ + MIPS_OP_IMM(OP_SLTI, rt, rs, imm16) +#define MIPS_SLTU_IMM(rt, rs, imm16) \ + MIPS_OP_IMM(OP_SLTIU, rt, rs, imm16) + +// multiplication + +#define MIPS_MULT(rt, rs) \ + MIPS_OP_REG(FN_MULT, _, rs, rt) +#define MIPS_MULTU(rt, rs) \ + MIPS_OP_REG(FN_MULTU, _, rs, rt) +#define MIPS_MFLO(rd) \ + MIPS_OP_REG(FN_MFLO, rd, _, _) +#define MIPS_MFHI(rd) \ + MIPS_OP_REG(FN_MFHI, rd, _, _) + +// branching + +#define MIPS_J(abs26) \ + MIPS_INSN(OP_J, _,_,_,_, (abs26) >> 2) // J-type +#define MIPS_JAL(abs26) \ + MIPS_INSN(OP_JAL, _,_,_,_, (abs26) >> 2) +#define MIPS_JR(rs) \ + MIPS_OP_REG(FN_JR,_,rs,_) +#define MIPS_JALR(rd, rs) \ + MIPS_OP_REG(FN_JALR,rd,rs,_) + +// conditional branches; no condition code, these compare rs against rt or Z0 +#define MIPS_BEQ (OP_BEQ << 5) +#define MIPS_BNE (OP_BNE << 5) +#define MIPS_BLE (OP_BLEZ << 5) +#define MIPS_BGT (OP_BGTZ << 5) +#define MIPS_BLT ((OP__RT << 5)|RT_BLTZ) +#define MIPS_BGE ((OP__RT << 5)|RT_BGEZ) +#define MIPS_BGTL ((OP__RT << 5)|RT_BLTZAL) +#define MIPS_BGEL ((OP__RT << 5)|RT_BGEZAL) + +#define MIPS_BCONDZ(cond, rs, offs16) \ + MIPS_OP_IMM((cond >> 5), (cond & 0x1f), rs, (offs16) >> 2) +#define MIPS_B(offs16) \ + MIPS_BCONDZ(MIPS_BEQ, Z0, offs16) +#define MIPS_BL(offs16) \ + MIPS_BCONDZ(MIPS_BGEL, Z0, offs16) + +// load/store indexed base + +#define MIPS_LW(rt, rs, offs16) \ + MIPS_INSN(OP_LW, rs, rt, _,_, (u16)(offs16)) +#define MIPS_LH(rt, rs, offs16) \ + MIPS_INSN(OP_LH, rs, rt, _,_, (u16)(offs16)) +#define MIPS_LB(rt, rs, offs16) \ + MIPS_INSN(OP_LB, rs, rt, _,_, (u16)(offs16)) +#define MIPS_LHU(rt, rs, offs16) \ + MIPS_INSN(OP_LHU, rs, rt, _,_, (u16)(offs16)) +#define MIPS_LBU(rt, rs, offs16) \ + MIPS_INSN(OP_LBU, rs, rt, _,_, (u16)(offs16)) + +#define MIPS_SW(rt, rs, offs16) \ + MIPS_INSN(OP_SW, rs, rt, _,_, (u16)(offs16)) +#define MIPS_SH(rt, rs, offs16) \ + MIPS_INSN(OP_SH, rs, rt, _,_, (u16)(offs16)) +#define MIPS_SB(rt, rs, offs16) \ + MIPS_INSN(OP_SB, rs, rt, _,_, (u16)(offs16)) + +// XXX: tcache_ptr type for SVP and SH2 compilers differs.. +#define EMIT_PTR(ptr, x) \ + do { \ + *(u32 *)(ptr) = x; \ + ptr = (void *)((u8 *)(ptr) + sizeof(u32)); \ + } while (0) + +// FIFO for 2 instructions, for delay slot handling +u32 emith_last_insns[2] = { -1,-1 }; +int emith_last_idx; + +#define EMIT_PUSHOP() \ + do { \ + emith_last_idx ^= 1; \ + if (emith_last_insns[emith_last_idx] != -1) \ + EMIT_PTR(tcache_ptr, emith_last_insns[emith_last_idx]);\ + emith_last_insns[emith_last_idx] = -1; \ + } while (0) + +#define EMIT(op) \ + do { \ + EMIT_PUSHOP(); \ + emith_last_insns[emith_last_idx] = op; \ + COUNT_OP; \ + } while (0) + +#define emith_flush() \ + do { \ + int i; for (i = 0; i < 2; i++) EMIT_PUSHOP(); \ + } while (0) + +#define emith_insn_ptr() (u8 *)((u32 *)tcache_ptr + \ + (emith_last_insns[0] != -1) + (emith_last_insns[1] != -1)) + +// delay slot stuff +static int emith_is_j(u32 op) // J, JAL + { return ((op>>26) & 076) == OP_J; } +static int emith_is_jr(u32 op) // JR, JALR + { return (op>>26) == OP__FN && (op & 076) == FN_JR; } +static int emith_is_b(u32 op) // B + { return ((op>>26) & 074) == OP_BEQ || + ((op>>26) == OP__RT && ((op>>16) & 036) == RT_BLTZ); } +// register usage for dependency evaluation XXX better do this as in emit_arm? +static uint64_t emith_has_rs[3] = // OP__FN, OP__RT, others + { 0x00fffffffffa0ff0ULL, 0x000fff0fUL, 0xffffffff0f007f30ULL }; +static uint64_t emith_has_rt[3] = // OP__FN, OP__RT, others + { 0xff00fffffff00cffULL, 0x00000000UL, 0x8000ff0000000030ULL }; +static uint64_t emith_has_rd[3] = // OP__FN, OP__RT, others (rt instead of rd) + { 0xff00fffffff50fffULL, 0x00000000UL, 0x119100ff0f00ff00ULL }; +#define emith_has_(rx,ix,op,sa,m) \ + (emith_has_##rx[ix] & (1ULL << (((op)>>(sa)) & (m)))) +static int emith_rs(u32 op) + { if ((op>>26) == OP__FN) + return emith_has_(rs,0,op, 0,0x3f) ? (op>>21)&0x1f : 0; + if ((op>>26) == OP__RT) + return emith_has_(rs,1,op,16,0x1f) ? (op>>21)&0x1f : 0; + return emith_has_(rs,2,op,26,0x3f) ? (op>>21)&0x1f : 0; + } +static int emith_rt(u32 op) + { if ((op>>26) == OP__FN) + return emith_has_(rt,0,op, 0,0x3f) ? (op>>16)&0x1f : 0; + if ((op>>26) == OP__RT) + return 0; + return emith_has_(rt,2,op,26,0x3f) ? (op>>16)&0x1f : 0; + } +static int emith_rd(u32 op) + { if ((op>>26) == OP__FN) + return emith_has_(rd,0,op, 0,0x3f) ? (op>>11)&0x1f :-1; + if ((op>>26) == OP__RT) + return -1; + return emith_has_(rd,2,op,26,0x3f) ? (op>>16)&0x1f :-1; + } + +static int emith_b_isswap(u32 bop, u32 lop) +{ + if (emith_is_j(bop)) + return bop; + else if (emith_is_jr(bop) && emith_rd(lop) != emith_rs(bop)) + return bop; + else if (emith_is_b(bop) && emith_rd(lop) != emith_rs(bop)) + if ((bop & 0xffff) != 0x7fff) // displacement overflow? + return (bop & 0xffff0000) | ((bop & 0xffff)+1); + return 0; +} + +// emit branch, trying to fill the delay slot with one of the last insns +static void *emith_branch(u32 op) +{ + int idx = emith_last_idx; + u32 op1 = emith_last_insns[idx], op2 = emith_last_insns[idx^1]; + u32 bop = 0; + void *bp; + + // check last insn (op1) + if (op1 != -1 && op1) + bop = emith_b_isswap(op, op1); + // if not, check older insn (op2); mustn't interact with op1 to overtake + if (!bop && op2 != -1 && op2 && emith_rd(op1) != emith_rd(op2) && + emith_rs(op1) != emith_rd(op2) && emith_rt(op1) != emith_rd(op2) && + emith_rs(op2) != emith_rd(op1) && emith_rt(op2) != emith_rd(op1)) { + idx ^= 1; + bop = emith_b_isswap(op, op2); + } + + if (bop) { // can swap + if (emith_last_insns[idx^1] != -1) + EMIT_PTR(tcache_ptr, emith_last_insns[idx^1]); + bp = tcache_ptr; + EMIT_PTR(tcache_ptr, bop); COUNT_OP; + EMIT_PTR(tcache_ptr, emith_last_insns[idx]); + emith_last_insns[0] = emith_last_insns[1] = -1; + } else { // can't swap + emith_flush(); + bp = tcache_ptr; + EMIT_PTR(tcache_ptr, op); COUNT_OP; + EMIT_PTR(tcache_ptr, MIPS_NOP); COUNT_OP; + } + return bp; +} + +// if-then-else conditional execution helpers +#define JMP_POS(ptr) \ + ptr = emith_branch(MIPS_BCONDZ(cond_m, cond_r, 0)); + +#define JMP_EMIT(cond, ptr) { \ + u32 val_ = emith_insn_ptr() - (u8 *)(ptr) - 4; \ + EMIT_PTR(ptr, MIPS_BCONDZ(cond_m, cond_r, val_ & 0x0003ffff)); \ + emith_flush(); /* NO delay slot handling across jump targets */ \ +} + +#define JMP_EMIT_NC(ptr) { \ + u32 val_ = emith_insn_ptr() - (u8 *)(ptr) - 4; \ + EMIT_PTR(ptr, MIPS_B(val_ & 0x0003ffff)); \ + emith_flush(); \ +} + +#define EMITH_JMP_START(cond) { \ + int cond_r, cond_m = emith_cond_check(cond, &cond_r); \ + u8 *cond_ptr; \ + JMP_POS(cond_ptr) + +#define EMITH_JMP_END(cond) \ + JMP_EMIT(cond, cond_ptr); \ +} + +#define EMITH_JMP3_START(cond) { \ + int cond_r, cond_m = emith_cond_check(cond, &cond_r); \ + u8 *cond_ptr, *else_ptr; \ + JMP_POS(cond_ptr) + +#define EMITH_JMP3_MID(cond) \ + JMP_POS(else_ptr); \ + JMP_EMIT(cond, cond_ptr); + +#define EMITH_JMP3_END() \ + JMP_EMIT_NC(else_ptr); \ +} + +// "simple" jump (no more then a few insns) +// ARM32 will use conditional instructions here +#define EMITH_SJMP_START EMITH_JMP_START +#define EMITH_SJMP_END EMITH_JMP_END + +#define EMITH_SJMP3_START EMITH_JMP3_START +#define EMITH_SJMP3_MID EMITH_JMP3_MID +#define EMITH_SJMP3_END EMITH_JMP3_END + +#define EMITH_SJMP2_START(cond) \ + EMITH_SJMP3_START(cond) +#define EMITH_SJMP2_MID(cond) \ + EMITH_SJMP3_MID(cond) +#define EMITH_SJMP2_END(cond) \ + EMITH_SJMP3_END() + + +// flag register emulation. this is modelled after arm/x86. +// the FNZ register stores the result of the last flag setting operation for +// N and Z flag, used for EQ,NE,MI,PL branches. +// the FC register stores the C flag (used for HI,HS,LO,LS,CC,CS). +// the FV register stores information for V flag calculation (used for +// GT,GE,LT,LE,VC,VS). V flag is costly and only fully calculated when needed. +// the core registers may be temp registers, since the condition after calls +// is undefined anyway. + +// flag emulation creates 2 (ie cmp #0/beq) up to 9 (ie adcf/ble) extra insns. +// flag handling shortcuts may reduce this by 1-4 insns, see emith_cond_check() +int emith_flg_rs, emith_flg_rt; // registers used in FNZ=rs-rt (aka cmp_r_r) +int emith_flg_noV; // V flag known not to be set + +// store minimal cc information: rd, rt^rs, carry +// NB: the result *must* first go to FNZ, in case rd == rs or rd == rt. +// NB: for adcf and sbcf, carry-in must be dealt with separately (see there) +static void emith_set_arith_flags(int rd, int rt, int rs, s32 imm, int sub) +{ + if (sub && rd == FNZ && rt && rs) // is this cmp_r_r? + emith_flg_rs = rs, emith_flg_rt = rt; + else emith_flg_rs = emith_flg_rt = 0; + + if (sub) // C = sub:rt 0) // Nt^Ns + EMIT(MIPS_XOR_REG(FV, rt, rs)); + else if (imm < 0) + EMIT(MIPS_NOR_REG(FV, rt, Z0)); + else if (imm > 0) + EMIT(MIPS_OR_REG(FV, rt, Z0)); // Nt^Ns in FV, bit 31 + else emith_flg_noV = 1; // imm #0, never overflows + // full V = Nd^Nt^Ns^C calculation is deferred until really needed + + if (rd != FNZ) + EMIT(MIPS_MOVE_REG(rd, FNZ)); // N,Z via result value in FNZ +} + +// data processing, register +#define emith_move_r_r_ptr(d, s) \ + EMIT(MIPS_MOVE_REG(d, s)) +#define emith_move_r_r_ptr_c(cond, d, s) \ + emith_move_r_r_ptr(d, s) + +#define emith_move_r_r(d, s) \ + emith_move_r_r_ptr(d, s) +#define emith_move_r_r_c(cond, d, s) \ + emith_move_r_r(d, s) + +#define emith_mvn_r_r(d, s) \ + EMIT(MIPS_MVN_REG(d, s)) + +#define emith_add_r_r_r_lsl_ptr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_ADD_REG(d, s1, AT)); \ + } else EMIT(MIPS_ADD_REG(d, s1, s2)); \ +} while (0) +#define emith_add_r_r_r_lsl(d, s1, s2, simm) \ + emith_add_r_r_r_lsl_ptr(d, s1, s2, simm) + +#define emith_add_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSR_IMM(AT, s2, simm)); \ + EMIT(MIPS_ADD_REG(d, s1, AT)); \ + } else EMIT(MIPS_ADD_REG(d, s1, s2)); \ +} while (0) + +#define emith_addf_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_ADD_REG(FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 0); \ + } else { \ + EMIT(MIPS_ADD_REG(FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + } \ +} while (0) + +#define emith_addf_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSR_IMM(AT, s2, simm)); \ + EMIT(MIPS_ADD_REG(FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 0); \ + } else { \ + EMIT(MIPS_ADD_REG(FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + } \ +} while (0) + +#define emith_sub_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_SUB_REG(d, s1, AT)); \ + } else EMIT(MIPS_SUB_REG(d, s1, s2)); \ +} while (0) + +#define emith_subf_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_SUB_REG(FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 1); \ + } else { \ + EMIT(MIPS_SUB_REG(FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 1); \ + } \ +} while (0) + +#define emith_or_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_OR_REG(d, s1, AT)); \ + } else EMIT(MIPS_OR_REG(d, s1, s2)); \ +} while (0) + +#define emith_eor_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_XOR_REG(d, s1, AT)); \ + } else EMIT(MIPS_XOR_REG(d, s1, s2)); \ +} while (0) + +#define emith_eor_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSR_IMM(AT, s2, simm)); \ + EMIT(MIPS_XOR_REG(d, s1, AT)); \ + } else EMIT(MIPS_XOR_REG(d, s1, s2)); \ +} while (0) + +#define emith_and_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_AND_REG(d, s1, AT)); \ + } else EMIT(MIPS_AND_REG(d, s1, s2)); \ +} while (0) + +#define emith_or_r_r_lsl(d, s, lslimm) \ + emith_or_r_r_r_lsl(d, d, s, lslimm) + +#define emith_eor_r_r_lsr(d, s, lsrimm) \ + emith_eor_r_r_r_lsr(d, d, s, lsrimm) + +#define emith_add_r_r_r(d, s1, s2) \ + emith_add_r_r_r_lsl(d, s1, s2, 0) + +#define emith_addf_r_r_r(d, s1, s2) \ + emith_addf_r_r_r_lsl(d, s1, s2, 0) + +#define emith_sub_r_r_r(d, s1, s2) \ + emith_sub_r_r_r_lsl(d, s1, s2, 0) + +#define emith_subf_r_r_r(d, s1, s2) \ + emith_subf_r_r_r_lsl(d, s1, s2, 0) + +#define emith_or_r_r_r(d, s1, s2) \ + emith_or_r_r_r_lsl(d, s1, s2, 0) + +#define emith_eor_r_r_r(d, s1, s2) \ + emith_eor_r_r_r_lsl(d, s1, s2, 0) + +#define emith_and_r_r_r(d, s1, s2) \ + emith_and_r_r_r_lsl(d, s1, s2, 0) + +#define emith_add_r_r_ptr(d, s) \ + emith_add_r_r_r_lsl_ptr(d, d, s, 0) +#define emith_add_r_r(d, s) \ + emith_add_r_r_r(d, d, s) + +#define emith_sub_r_r(d, s) \ + emith_sub_r_r_r(d, d, s) + +#define emith_neg_r_r(d, s) \ + EMIT(MIPS_NEG_REG(d, s)) + +#define emith_adc_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(AT, s1, FC); \ + emith_add_r_r_r(d, AT, s2); \ +} while (0) + +#define emith_adc_r_r(d, s) \ + emith_adc_r_r_r(d, d, s) + +// NB: the incoming C can cause its own outgoing C if s2+C=0 (or s1+C=0 FWIW) +// moreover, s2 is 0 if there is C, so no other C can be generated. +#define emith_adcf_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(FNZ, s2, FC); \ + EMIT(MIPS_SLTU_REG(AT, FNZ, FC)); \ + emith_add_r_r_r(FNZ, s1, FNZ); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + emith_or_r_r(FC, AT); \ +} while (0) + +#define emith_sbcf_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(FNZ, s2, FC); \ + EMIT(MIPS_SLTU_REG(AT, FNZ, FC)); \ + emith_sub_r_r_r(FNZ, s1, FNZ); \ + emith_set_arith_flags(d, s1, s2, 0, 1); \ + emith_or_r_r(FC, AT); \ +} while (0) + +#define emith_and_r_r(d, s) \ + emith_and_r_r_r(d, d, s) +#define emith_and_r_r_c(cond, d, s) \ + emith_and_r_r(d, s) + +#define emith_or_r_r(d, s) \ + emith_or_r_r_r(d, d, s) + +#define emith_eor_r_r(d, s) \ + emith_eor_r_r_r(d, d, s) + +#define emith_tst_r_r_ptr(d, s) \ + emith_and_r_r_r(FNZ, d, s) +#define emith_tst_r_r(d, s) \ + emith_tst_r_r_ptr(d, s) + +#define emith_teq_r_r(d, s) \ + emith_eor_r_r_r(FNZ, d, s) + +#define emith_cmp_r_r(d, s) \ + emith_subf_r_r_r(FNZ, d, s) + +#define emith_addf_r_r(d, s) \ + emith_addf_r_r_r(d, d, s) + +#define emith_subf_r_r(d, s) \ + emith_subf_r_r_r(d, d, s) + +#define emith_adcf_r_r(d, s) \ + emith_adcf_r_r_r(d, d, s) + +#define emith_sbcf_r_r(d, s) \ + emith_sbcf_r_r_r(d, d, s) + +#define emith_negcf_r_r(d, s) \ + emith_sbcf_r_r_r(d, Z0, s) + + +// move immediate +static void emith_move_imm(int r, uintptr_t imm) +{ + if ((s16)imm != imm) { + int s = Z0; + if (imm >> 16) { + EMIT(MIPS_MOVT_IMM(r, imm >> 16)); + s = r; + } + if ((u16)imm) + EMIT(MIPS_OR_IMM(r, s, (u16)imm)); + } else + EMIT(MIPS_ADD_IMM(r, Z0, imm)); +} + +#define emith_move_r_ptr_imm(r, imm) \ + emith_move_imm(r, (uintptr_t)(imm)) + +#define emith_move_r_imm(r, imm) \ + emith_move_imm(r, (u32)(imm)) +#define emith_move_r_imm_c(cond, r, imm) \ + emith_move_r_imm(r, imm) + + +// arithmetic, immediate +static void emith_arith_imm(int op, int rd, int rs, u32 imm) +{ + if ((s16)imm != imm) { + emith_move_r_imm(AT, imm); + EMIT(MIPS_OP_REG(FN_ADD + (op-OP_ADDI), rd, rs, AT)); + } else if (imm || rd != rs) + EMIT(MIPS_OP_IMM(op, rd, rs, imm)); +} + +#define emith_add_r_imm(r, imm) \ + emith_add_r_r_imm(r, r, imm) +#define emith_add_r_imm_c(cond, r, imm) \ + emith_add_r_imm(r, imm) + +#define emith_addf_r_imm(r, imm) \ + emith_addf_r_r_imm(r, imm) + +#define emith_sub_r_imm(r, imm) \ + emith_sub_r_r_imm(r, r, imm) +#define emith_sub_r_imm_c(cond, r, imm) \ + emith_sub_r_imm(r, imm) + +#define emith_subf_r_imm(r, imm) \ + emith_subf_r_r_imm(r, r, imm) + +#define emith_adc_r_imm(r, imm) \ + emith_adc_r_r_imm(r, r, imm); + +#define emith_adcf_r_imm(r, imm) \ + emith_adcf_r_r_imm(r, r, imm) + +#define emith_cmp_r_imm(r, imm) \ + emith_subf_r_r_imm(FNZ, r, (s16)imm) + + +#define emith_add_r_r_ptr_imm(d, s, imm) \ + emith_arith_imm(OP_ADDIU, d, s, imm) + +#define emith_add_r_r_imm(d, s, imm) \ + emith_add_r_r_ptr_imm(d, s, imm) + +#define emith_addf_r_r_imm(d, s, imm) do { \ + emith_add_r_r_imm(FNZ, s, imm); \ + emith_set_arith_flags(d, s, 0, imm, 0); \ +} while (0) + +#define emith_adc_r_r_imm(d, s, imm) do { \ + emith_add_r_r_r(AT, s, FC); \ + emith_add_r_r_imm(d, AT, imm); \ +} while (0) + +#define emith_adcf_r_r_imm(d, s, imm) do { \ + emith_add_r_r_r(FNZ, s, FC); \ + EMIT(MIPS_SLTU_REG(AT, FNZ, FC)); \ + emith_add_r_r_imm(FNZ, FNZ, imm); \ + emith_set_arith_flags(d, s, 0, imm, 0); \ + emith_or_r_r(FC, AT); \ +} while (0) + +// NB: no SUBI in MIPS II, since ADDI takes a signed imm +#define emith_sub_r_r_imm(d, s, imm) \ + emith_add_r_r_imm(d, s, -(imm)) +#define emith_sub_r_r_imm_c(cond, d, s, imm) \ + emith_sub_r_r_imm(d, s, imm) + +#define emith_subf_r_r_imm(d, s, imm) do { \ + emith_sub_r_r_imm(FNZ, s, imm); \ + emith_set_arith_flags(d, s, 0, imm, 1); \ +} while (0) + +// logical, immediate +static void emith_log_imm(int op, int rd, int rs, u32 imm) +{ + if (imm >> 16) { + emith_move_r_imm(AT, imm); + EMIT(MIPS_OP_REG(FN_AND + (op-OP_ANDI), rd, rs, AT)); + } else if (op == OP_ANDI || imm || rd != rs) + EMIT(MIPS_OP_IMM(op, rd, rs, imm)); +} + +#define emith_and_r_imm(r, imm) \ + emith_log_imm(OP_ANDI, r, r, imm) + +#define emith_or_r_imm(r, imm) \ + emith_log_imm(OP_ORI, r, r, imm) +#define emith_or_r_imm_c(cond, r, imm) \ + emith_or_r_imm(r, imm) + +#define emith_eor_r_imm_ptr(r, imm) \ + emith_log_imm(OP_XORI, r, r, imm) +#define emith_eor_r_imm_ptr_c(cond, r, imm) \ + emith_eor_r_imm_ptr(r, imm) + +#define emith_eor_r_imm(r, imm) \ + emith_eor_r_imm_ptr(r, imm) +#define emith_eor_r_imm_c(cond, r, imm) \ + emith_eor_r_imm(r, imm) + +/* NB: BIC #imm not available in MIPS; use AND #~imm instead */ +#define emith_bic_r_imm(r, imm) \ + emith_log_imm(OP_ANDI, r, r, ~(imm)) +#define emith_bic_r_imm_c(cond, r, imm) \ + emith_bic_r_imm(r, imm) + +#define emith_tst_r_imm(r, imm) \ + emith_log_imm(OP_ANDI, FNZ, r, imm) +#define emith_tst_r_imm_c(cond, r, imm) \ + emith_tst_r_imm(r, imm) + +#define emith_and_r_r_imm(d, s, imm) \ + emith_log_imm(OP_ANDI, d, s, imm) + +#define emith_or_r_r_imm(d, s, imm) \ + emith_log_imm(OP_ORI, d, s, imm) + +#define emith_eor_r_r_imm(d, s, imm) \ + emith_log_imm(OP_XORI, d, s, imm) + +// shift +#define emith_lsl(d, s, cnt) \ + EMIT(MIPS_LSL_IMM(d, s, cnt)) + +#define emith_lsr(d, s, cnt) \ + EMIT(MIPS_LSR_IMM(d, s, cnt)) + +#define emith_asr(d, s, cnt) \ + EMIT(MIPS_ASR_IMM(d, s, cnt)) + +// NB: mips32r2 has ROT (SLR with R bit set) +#define emith_ror(d, s, cnt) do { \ + EMIT(MIPS_LSL_IMM(AT, s, 32-(cnt))); \ + EMIT(MIPS_LSR_IMM(d, s, cnt)); \ + EMIT(MIPS_OR_REG(d, d, AT)); \ +} while (0) +#define emith_ror_c(cond, d, s, cnt) \ + emith_ror(d, s, cnt) + +#define emith_rol(d, s, cnt) do { \ + EMIT(MIPS_LSR_IMM(AT, s, 32-(cnt))); \ + EMIT(MIPS_LSL_IMM(d, s, cnt)); \ + EMIT(MIPS_OR_REG(d, d, AT)); \ +} while (0) + +// NB: all flag setting shifts make V undefined +// NB: mips32r2 has EXT (useful for extracting C) +#define emith_lslf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_lsl(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_lsr(FC, _s, 31); \ + emith_lsl(d, _s, 1); \ + } \ + emith_move_r_r(FNZ, d); \ +} while (0) + +#define emith_lsrf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_lsr(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_and_r_r_imm(FC, _s, 1); \ + emith_lsr(d, _s, 1); \ + } \ + emith_move_r_r(FNZ, d); \ +} while (0) + +#define emith_asrf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_asr(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_and_r_r_imm(FC, _s, 1); \ + emith_asr(d, _s, 1); \ + } \ + emith_move_r_r(FNZ, d); \ +} while (0) + +#define emith_rolf(d, s, cnt) do { \ + emith_rol(d, s, cnt); \ + emith_and_r_r_imm(FC, d, 1); \ + emith_move_r_r(FNZ, d); \ +} while (0) + +#define emith_rorf(d, s, cnt) do { \ + emith_ror(d, s, cnt); \ + emith_lsr(FC, d, 31); \ + emith_move_r_r(FNZ, d); \ +} while (0) + +#define emith_rolcf(d) do { \ + emith_lsr(AT, d, 31); \ + emith_lsl(d, d, 1); \ + emith_or_r_r(d, FC); \ + emith_move_r_r(FC, AT); \ + emith_move_r_r(FNZ, d); \ +} while (0) + +#define emith_rorcf(d) do { \ + emith_and_r_r_imm(AT, d, 1); \ + emith_lsr(d, d, 1); \ + emith_lsl(FC, FC, 31); \ + emith_or_r_r(d, FC); \ + emith_move_r_r(FC, AT); \ + emith_move_r_r(FNZ, d); \ +} while (0) + +// signed/unsigned extend +// NB: mips32r2 has EXT and INS +#define emith_clear_msb(d, s, count) /* bits to clear */ do { \ + u32 t; \ + if ((count) > 16) { \ + t = (count) - 16; \ + t = 0xffff >> t; \ + emith_and_r_r_imm(d, s, t); \ + } else { \ + emith_lsl(d, s, count); \ + emith_lsr(d, d, count); \ + } \ +} while (0) +#define emith_clear_msb_c(cond, d, s, count) \ + emith_clear_msb(d, s, count) + +// NB: mips32r2 has SE[BH]H +#define emith_sext(d, s, count) /* bits to keep */ do { \ + emith_lsl(d, s, 32-(count)); \ + emith_asr(d, d, 32-(count)); \ +} while (0) + +// multiply Rd = Rn*Rm (+ Ra); NB: next 2 insns after MFLO/MFHI mustn't be MULT +static u8 *last_lohi; +static void emith_lohi_nops(void) +{ + u32 d; + while ((d = emith_insn_ptr() - last_lohi) < 8 && d >= 0) EMIT(MIPS_NOP); +} + +#define emith_mul(d, s1, s2) do { \ + emith_lohi_nops(); \ + EMIT(MIPS_MULTU(s1, s2)); \ + EMIT(MIPS_MFLO(d)); \ + last_lohi = emith_insn_ptr(); \ +} while (0) + +#define emith_mul_u64(dlo, dhi, s1, s2) do { \ + emith_lohi_nops(); \ + EMIT(MIPS_MULTU(s1, s2)); \ + EMIT(MIPS_MFLO(dlo)); \ + EMIT(MIPS_MFHI(dhi)); \ + last_lohi = emith_insn_ptr(); \ +} while (0) + +#define emith_mul_s64(dlo, dhi, s1, s2) do { \ + emith_lohi_nops(); \ + EMIT(MIPS_MULT(s1, s2)); \ + EMIT(MIPS_MFLO(dlo)); \ + EMIT(MIPS_MFHI(dhi)); \ + last_lohi = emith_insn_ptr(); \ +} while (0) + +#define emith_mula_s64(dlo, dhi, s1, s2) do { \ + int t_ = rcache_get_tmp(); \ + emith_lohi_nops(); \ + EMIT(MIPS_MULT(s1, s2)); \ + EMIT(MIPS_MFLO(AT)); \ + emith_add_r_r(dlo, AT); \ + EMIT(MIPS_SLTU_REG(t_, dlo, AT)); \ + EMIT(MIPS_MFHI(AT)); \ + last_lohi = emith_insn_ptr(); \ + emith_add_r_r(dhi, AT); \ + emith_add_r_r(dhi, t_); \ + rcache_free_tmp(t_); \ +} while (0) +#define emith_mula_s64_c(cond, dlo, dhi, s1, s2) \ + emith_mula_s64(dlo, dhi, s1, s2) + +// load/store. offs has 16 bits signed, which is currently sufficient +#define emith_read_r_r_offs_ptr(r, rs, offs) \ + EMIT(MIPS_LW(r, rs, offs)) +#define emith_read_r_r_offs_ptr_c(cond, r, rs, offs) \ + emith_read_r_r_offs_ptr(r, rs, offs) + +#define emith_read_r_r_offs(r, rs, offs) \ + emith_read_r_r_offs_ptr(r, rs, offs) +#define emith_read_r_r_offs_c(cond, r, rs, offs) \ + emith_read_r_r_offs(r, rs, offs) + +#define emith_read_r_r_r_ptr(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + EMIT(MIPS_LW(r, AT, 0)); \ +} while (0) + +#define emith_read_r_r_r(r, rs, rm) \ + emith_read_r_r_r_ptr(r, rs, rm) +#define emith_read_r_r_r_c(cond, r, rs, rm) \ + emith_read_r_r_r(r, rs, rm) + +#define emith_read_r_r_r_ptr_wb(r, rs, rm) do { \ + emith_add_r_r_r(rs, rs, rm); \ + EMIT(MIPS_LW(r, rs, 0)); \ +} while (0) +#define emith_read_r_r_r_wb(r, rs, rm) \ + emith_read_r_r_r_ptr_wb(r, rs, rm) + +#define emith_read8_r_r_offs(r, rs, offs) \ + EMIT(MIPS_LBU(r, rs, offs)) +#define emith_read8_r_r_offs_c(cond, r, rs, offs) \ + emith_read8_r_r_offs(r, rs, offs) + +#define emith_read8_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + EMIT(MIPS_LBU(r, AT, 0)); \ +} while (0) +#define emith_read8_r_r_r_c(cond, r, rs, rm) \ + emith_read8_r_r_r(r, rs, rm) + +#define emith_read16_r_r_offs(r, rs, offs) \ + EMIT(MIPS_LHU(r, rs, offs)) +#define emith_read16_r_r_offs_c(cond, r, rs, offs) \ + emith_read16_r_r_offs(r, rs, offs) + +#define emith_read16_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + EMIT(MIPS_LHU(r, AT, 0)); \ +} while (0) +#define emith_read16_r_r_r_c(cond, r, rs, rm) \ + emith_read16_r_r_r(r, rs, rm) + +#define emith_read8s_r_r_offs(r, rs, offs) \ + EMIT(MIPS_LB(r, rs, offs)) +#define emith_read8s_r_r_offs_c(cond, r, rs, offs) \ + emith_read8s_r_r_offs(r, rs, offs) + +#define emith_read8s_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + EMIT(MIPS_LB(r, AT, 0)); \ +} while (0) +#define emith_read8s_r_r_r_c(cond, r, rs, rm) \ + emith_read8s_r_r_r(r, rs, rm) + +#define emith_read16s_r_r_offs(r, rs, offs) \ + EMIT(MIPS_LH(r, rs, offs)) +#define emith_read16s_r_r_offs_c(cond, r, rs, offs) \ + emith_read16s_r_r_offs(r, rs, offs) + +#define emith_read16s_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + EMIT(MIPS_LH(r, AT, 0)); \ +} while (0) +#define emith_read16s_r_r_r_c(cond, r, rs, rm) \ + emith_read16s_r_r_r(r, rs, rm) + + +#define emith_write_r_r_offs_ptr(r, rs, offs) \ + EMIT(MIPS_SW(r, rs, offs)) +#define emith_write_r_r_offs_ptr_c(cond, r, rs, offs) \ + emith_write_r_r_offs_ptr(r, rs, offs) + +#define emith_write_r_r_r_ptr(r, rs, rm) do { \ + emith_add_r_r_r(AT, rs, rm); \ + EMIT(MIPS_SW(r, AT, 0)); \ +} while (0) +#define emith_write_r_r_r_ptr_c(cond, r, rs, rm) \ + emith_write_r_r_r_ptr(r, rs, rm) + +#define emith_write_r_r_offs(r, rs, offs) \ + emith_write_r_r_offs_ptr(r, rs, offs) +#define emith_write_r_r_offs_c(cond, r, rs, offs) \ + emith_write_r_r_offs(r, rs, offs) + +#define emith_write_r_r_r(r, rs, rm) \ + emith_write_r_r_r_ptr(r, rs, rm) +#define emith_write_r_r_r_c(cond, r, rs, rm) \ + emith_write_r_r_r(r, rs, rm) + +#define emith_write_r_r_r_ptr_wb(r, rs, rm) do { \ + emith_add_r_r_r(rs, rs, rm); \ + EMIT(MIPS_SW(r, rs, 0)); \ +} while (0) +#define emith_write_r_r_r_wb(r, rs, rm) \ + emith_write_r_r_r_ptr_wb(r, rs, rm) + +#define emith_ctx_read_ptr(r, offs) \ + emith_read_r_r_offs_ptr(r, CONTEXT_REG, offs) + +#define emith_ctx_read(r, offs) \ + emith_read_r_r_offs(r, CONTEXT_REG, offs) +#define emith_ctx_read_c(cond, r, offs) \ + emith_ctx_read(r, offs) + +#define emith_ctx_write_ptr(r, offs) \ + emith_write_r_r_offs_ptr(r, CONTEXT_REG, offs) + +#define emith_ctx_write(r, offs) \ + emith_write_r_r_offs(r, CONTEXT_REG, offs) + +#define emith_ctx_read_multiple(r, offs, cnt, tmpr) do { \ + int r_ = r, offs_ = offs, cnt_ = cnt; \ + for (; cnt_ > 0; r_++, offs_ += 4, cnt_--) \ + emith_ctx_read(r_, offs_); \ +} while (0) + +#define emith_ctx_write_multiple(r, offs, cnt, tmpr) do { \ + int r_ = r, offs_ = offs, cnt_ = cnt; \ + for (; cnt_ > 0; r_++, offs_ += 4, cnt_--) \ + emith_ctx_write(r_, offs_); \ +} while (0) + +// function call handling +#define emith_save_caller_regs(mask) do { \ + int _c; u32 _m = mask & 0x300fffc; /* r2-r15,r24-r25 */ \ + if (__builtin_parity(_m) == 1) _m |= 0x1; /* ABI align */ \ + int _s = count_bits(_m) * 4, _o = _s; \ + if (_s) emith_sub_r_imm(SP, _s); \ + for (_c = HOST_REGS; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ + if (_m & (1 << _c)) \ + { _o -= 4; if (_c) emith_write_r_r_offs(_c, SP, _o); } \ +} while (0) + +#define emith_restore_caller_regs(mask) do { \ + int _c; u32 _m = mask & 0x300fffc; \ + if (__builtin_parity(_m) == 1) _m |= 0x1; \ + int _s = count_bits(_m) * 4, _o = 0; \ + for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ + if (_m & (1 << _c)) \ + { if (_c) emith_read_r_r_offs(_c, SP, _o); _o += 4; } \ + if (_s) emith_add_r_imm(SP, _s); \ +} while (0) + +#define host_arg2reg(rd, arg) \ + rd = (arg+4) + +#define emith_pass_arg_r(arg, reg) \ + emith_move_r_r(arg, reg) + +#define emith_pass_arg_imm(arg, imm) \ + emith_move_r_imm(arg, imm) + +// branching +#define emith_invert_branch(cond) /* inverted conditional branch */ \ + (((cond) >> 5) == OP__RT ? (cond) ^ 0x01 : (cond) ^ 0x20) + +// evaluate the emulated condition, returns a register/branch type pair +static int emith_cond_check(int cond, int *r) +{ + int b = 0; + + // shortcut for comparing 2 registers + if (emith_flg_rs || emith_flg_rt) switch (cond) { + case DCOND_LS: EMIT(MIPS_SLTU_REG(AT, emith_flg_rs, emith_flg_rt)); + *r = AT, b = MIPS_BEQ; break; // s <= t unsigned + case DCOND_HI: EMIT(MIPS_SLTU_REG(AT, emith_flg_rs, emith_flg_rt)); + *r = AT, b = MIPS_BNE; break; // s > t unsigned + case DCOND_LT: EMIT(MIPS_SLT_REG(AT, emith_flg_rt, emith_flg_rs)); + *r = AT, b = MIPS_BNE; break; // s < t + case DCOND_GE: EMIT(MIPS_SLT_REG(AT, emith_flg_rt, emith_flg_rs)); + *r = AT, b = MIPS_BEQ; break; // s >= t + case DCOND_LE: EMIT(MIPS_SLT_REG(AT, emith_flg_rs, emith_flg_rt)); + *r = AT, b = MIPS_BEQ; break; // s <= t + case DCOND_GT: EMIT(MIPS_SLT_REG(AT, emith_flg_rs, emith_flg_rt)); + *r = AT, b = MIPS_BNE; break; // s > t + } + + // shortcut for V known to be 0 + if (!b && emith_flg_noV) switch (cond) { + case DCOND_VS: *r = Z0; b = MIPS_BNE; break; // never + case DCOND_VC: *r = Z0; b = MIPS_BEQ; break; // always + case DCOND_LT: *r = FNZ, b = MIPS_BLT; break; // N + case DCOND_GE: *r = FNZ, b = MIPS_BGE; break; // !N + case DCOND_LE: *r = FNZ, b = MIPS_BLE; break; // N || Z + case DCOND_GT: *r = FNZ, b = MIPS_BGT; break; // !N && !Z + } + + // the full monty if no shortcut + if (!b) switch (cond) { + // conditions using NZ + case DCOND_EQ: *r = FNZ; b = MIPS_BEQ; break; // Z + case DCOND_NE: *r = FNZ; b = MIPS_BNE; break; // !Z + case DCOND_MI: *r = FNZ; b = MIPS_BLT; break; // N + case DCOND_PL: *r = FNZ; b = MIPS_BGE; break; // !N + // conditions using C + case DCOND_LO: *r = FC; b = MIPS_BNE; break; // C + case DCOND_HS: *r = FC; b = MIPS_BEQ; break; // !C + // conditions using CZ + case DCOND_LS: // C || Z + case DCOND_HI: // !C && !Z + EMIT(MIPS_ADD_IMM(AT, FC, (u16)-1)); // !C && !Z + EMIT(MIPS_AND_REG(AT, FNZ, AT)); + *r = AT, b = (cond == DCOND_HI ? MIPS_BNE : MIPS_BEQ); + break; + + // conditions using V + case DCOND_VS: // V + case DCOND_VC: // !V + EMIT(MIPS_XOR_REG(AT, FV, FNZ)); // V = Nt^Ns^Nd^C + EMIT(MIPS_LSR_IMM(AT, AT, 31)); + EMIT(MIPS_XOR_REG(AT, AT, FC)); + *r = AT, b = (cond == DCOND_VS ? MIPS_BNE : MIPS_BEQ); + break; + // conditions using VNZ + case DCOND_LT: // N^V + case DCOND_GE: // !(N^V) + EMIT(MIPS_LSR_IMM(AT, FV, 31)); // Nd^V = Nt^Ns^C + EMIT(MIPS_XOR_REG(AT, FC, AT)); + *r = AT, b = (cond == DCOND_LT ? MIPS_BNE : MIPS_BEQ); + break; + case DCOND_LE: // (N^V) || Z + case DCOND_GT: // !(N^V) && !Z + EMIT(MIPS_LSR_IMM(AT, FV, 31)); // Nd^V = Nt^Ns^C + EMIT(MIPS_XOR_REG(AT, FC, AT)); + EMIT(MIPS_ADD_IMM(AT, AT, (u16)-1)); // !(Nd^V) && !Z + EMIT(MIPS_AND_REG(AT, FNZ, AT)); + *r = AT, b = (cond == DCOND_GT ? MIPS_BNE : MIPS_BEQ); + break; + } + return b; +} + +// NB: assumes all targets are in the same 256MB segment +#define emith_jump(target) \ + emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)) +#define emith_jump_patchable(target) \ + emith_jump(target) + +// NB: MIPS conditional branches have only +/- 128KB range +#define emith_jump_cond(cond, target) do { \ + int r_, mcond_ = emith_cond_check(cond, &r_); \ + u32 disp_ = (u8 *)target - emith_insn_ptr() - 4; \ + if (disp_ >= 0xfffe0000 || disp_ <= 0x0001ffff) { /* can use near B */ \ + emith_branch(MIPS_BCONDZ(mcond_,r_,disp_ & 0x0003ffff)); \ + } else { /* far branch if near branch isn't possible */ \ + mcond_ = emith_invert_branch(mcond_); \ + u8 *bp = emith_branch(MIPS_BCONDZ(mcond_, r_, 0)); \ + emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)); \ + EMIT_PTR(bp, MIPS_BCONDZ(mcond_, r_, emith_insn_ptr()-bp-4)); \ + } \ +} while (0) + +#define emith_jump_cond_patchable(cond, target) do { \ + int r_, mcond_ = emith_cond_check(cond, &r_); \ + mcond_ = emith_invert_branch(mcond_); \ + u8 *bp = emith_branch(MIPS_BCONDZ(mcond_, r_, 0));\ + emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)); \ + EMIT_PTR(bp, MIPS_BCONDZ(mcond_, r_, emith_insn_ptr()-bp-4)); \ +} while (0) + +// NB: returns position of patch for cache maintenance +#define emith_jump_patch(ptr, target) ({ \ + u32 *ptr_ = (u32 *)ptr-1; /* must skip condition check code */ \ + while ((ptr_[0] & 0xf8000000) != OP_J << 26) ptr_ ++; \ + EMIT_PTR(ptr_, MIPS_J((uintptr_t)target & 0x0fffffff)); \ + (u8 *)(ptr_-1); \ +}) + +#define emith_jump_reg(r) \ + emith_branch(MIPS_JR(r)) +#define emith_jump_reg_c(cond, r) \ + emith_jump_reg(r) + +#define emith_jump_ctx(offs) do { \ + emith_ctx_read_ptr(AT, offs); \ + emith_jump_reg(AT); \ +} while (0) +#define emith_jump_ctx_c(cond, offs) \ + emith_jump_ctx(offs) + +#define emith_call(target) \ + emith_branch(MIPS_JAL((uintptr_t)target & 0x0fffffff)) +#define emith_call_cond(cond, target) \ + emith_call(target) + +#define emith_call_reg(r) \ + emith_branch(MIPS_JALR(LR, r)) + +#define emith_call_ctx(offs) do { \ + emith_ctx_read_ptr(AT, offs); \ + emith_call_reg(AT); \ +} while (0) + +#define emith_call_link(r, target) do { \ + EMIT(MIPS_BL(4)); EMIT(MIPS_ADD_IMM(r, LR, 8)); emith_flush(); \ + emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)); \ +} while (0) + +#define emith_call_cleanup() /**/ + +#define emith_ret() \ + emith_branch(MIPS_JR(LR)) +#define emith_ret_c(cond) \ + emith_ret() + +#define emith_ret_to_ctx(offs) \ + emith_ctx_write_ptr(LR, offs) + +// NB: ABI SP alignment is 8 for compatibility with MIPS IV +#define emith_push_ret(r) do { \ + emith_sub_r_imm(SP, 8+16); /* reserve new arg save area (16) */ \ + emith_write_r_r_offs(LR, SP, 4+16); \ + if ((r) >= 0) emith_write_r_r_offs(r, SP, 0+16); \ +} while (0) + +#define emith_pop_and_ret(r) do { \ + if ((r) >= 0) emith_read_r_r_offs(r, SP, 0+16); \ + emith_read_r_r_offs(LR, SP, 4+16); \ + emith_add_r_imm(SP, 8+16); \ + emith_ret(); \ +} while (0) + + +// emitter ABI stuff +#define emith_pool_check() /**/ +#define emith_pool_commit(j) /**/ +// NB: mips32r2 has SYNCI +#define host_instructions_updated(base, end) __builtin___clear_cache(base, end) +#define emith_jump_patch_size() 4 + +// SH2 drc specific +#define emith_sh2_drc_entry() do { \ + int _c; u32 _m = 0xd0ff0000; \ + if (__builtin_parity(_m) == 1) _m |= 0x1; /* ABI align for SP is 8 */ \ + int _s = count_bits(_m) * 4 + 16, _o = _s; /* 16 byte arg save area */ \ + if (_s) emith_sub_r_imm(SP, _s); \ + for (_c = HOST_REGS; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ + if (_m & (1 << _c)) \ + { _o -= 4; if (_c) emith_write_r_r_offs(_c, SP, _o); } \ +} while (0) +#define emith_sh2_drc_exit() do { \ + int _c; u32 _m = 0xd0ff0000; \ + if (__builtin_parity(_m) == 1) _m |= 0x1; \ + int _s = count_bits(_m) * 4 + 16, _o = 16; \ + for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ + if (_m & (1 << _c)) \ + { if (_c) emith_read_r_r_offs(_c, SP, _o); _o += 4; } \ + if (_s) emith_add_r_imm(SP, _s); \ + emith_ret(); \ +} while (0) + +// NB: assumes a is in arg0, tab, func and mask are temp +#define emith_sh2_rcall(a, tab, func, mask) do { \ + emith_lsr(mask, a, SH2_READ_SHIFT); \ + emith_add_r_r_r_lsl_ptr(tab, tab, mask, 3); \ + emith_read_r_r_offs_ptr(func, tab, 0); \ + emith_read_r_r_offs(mask, tab, 4); \ + emith_addf_r_r_r/*_ptr*/(func, func, func); \ +} while (0) + +// NB: assumes a, val are in arg0 and arg1, tab and func are temp +#define emith_sh2_wcall(a, val, tab, func) do { \ + emith_lsr(func, a, SH2_WRITE_SHIFT); \ + emith_lsl(func, func, 2); \ + emith_read_r_r_r_ptr(func, tab, func); \ + emith_move_r_r_ptr(6, CONTEXT_REG); /* arg2 */ \ + emith_jump_reg(func); \ +} while (0) + +#define emith_sh2_delay_loop(cycles, reg) do { \ + int sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); \ + int t1 = rcache_get_tmp(); \ + int t2 = rcache_get_tmp(); \ + int t3 = rcache_get_tmp(); \ + /* if (sr < 0) return */ \ + emith_cmp_r_imm(sr, 0); \ + EMITH_JMP_START(DCOND_LE); \ + /* turns = sr.cycles / cycles */ \ + emith_asr(t2, sr, 12); \ + emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \ + emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \ + rcache_free_tmp(t3); \ + if (reg >= 0) { \ + /* if (reg <= turns) turns = reg-1 */ \ + t3 = rcache_get_reg(reg, RC_GR_RMW, NULL); \ + emith_cmp_r_r(t3, t2); \ + EMITH_SJMP_START(DCOND_HI); \ + emith_sub_r_r_imm_c(DCOND_LS, t2, t3, 1); \ + EMITH_SJMP_END(DCOND_HI); \ + /* if (reg <= 1) turns = 0 */ \ + emith_cmp_r_imm(t3, 1); \ + EMITH_SJMP_START(DCOND_HI); \ + emith_move_r_imm_c(DCOND_LS, t2, 0); \ + EMITH_SJMP_END(DCOND_HI); \ + /* reg -= turns */ \ + emith_sub_r_r(t3, t2); \ + } \ + /* sr.cycles -= turns * cycles; */ \ + emith_move_r_imm(t1, cycles); \ + emith_mul(t1, t2, t1); \ + emith_sub_r_r_r_lsl(sr, sr, t1, 12); \ + EMITH_JMP_END(DCOND_LE); \ + rcache_free_tmp(t1); \ + rcache_free_tmp(t2); \ +} while (0) + +/* + * if Q + * t = carry(Rn += Rm) + * else + * t = carry(Rn -= Rm) + * T ^= t + */ +#define emith_sh2_div1_step(rn, rm, sr) do { \ + emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \ + EMITH_JMP3_START(DCOND_EQ); \ + emith_addf_r_r(rn, rm); \ + EMITH_JMP3_MID(DCOND_EQ); \ + emith_subf_r_r(rn, rm); \ + EMITH_JMP3_END(); \ + emith_eor_r_r(sr, FC); \ +} while (0) + +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macl(ml, mh, rn, rm, sr) do { \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* MACH top 16 bits unused if saturated. sign ext for overfl detect */ \ + emith_sext(mh, mh, 16); \ + EMITH_SJMP_END(DCOND_EQ); \ + emith_mula_s64(ml, mh, rn, rm); \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* overflow if top 17 bits of MACH aren't all 1 or 0 */ \ + /* to check: add MACH[15] to MACH[31:16]. this is 0 if no overflow */ \ + emith_asrf(rn, mh, 16); /* sum = (MACH>>16) + ((MACH>>15)&1) */ \ + emith_adcf_r_imm(rn, 0); /* (MACH>>15) is in carry after shift */ \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \ + emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \ + EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> +ovl */ \ + emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0xffffffff */ \ + emith_sub_r_imm_c(DCOND_GT, mh, 1); /* 0x00007fff */ \ + EMITH_SJMP_END(DCOND_LE); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP_END(DCOND_EQ); \ +} while (0) + +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macw(ml, mh, rn, rm, sr) do { \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* XXX: MACH should be untouched when S is set? */ \ + emith_asr(mh, ml, 31); /* sign ext MACL to MACH for ovrfl check */ \ + EMITH_SJMP_END(DCOND_EQ); \ + emith_mula_s64(ml, mh, rn, rm); \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* overflow if top 33 bits of MACH:MACL aren't all 1 or 0 */ \ + /* to check: add MACL[31] to MACH. this is 0 if no overflow */ \ + emith_lsr(rn, ml, 31); \ + emith_addf_r_r(rn, mh); /* sum = MACH + ((MACL>>31)&1) */ \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \ + /* XXX: LSB signalling only in SH1, or in SH2 too? */ \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \ + EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> positive ovrfl */ \ + emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0x7fffffff */ \ + EMITH_SJMP_END(DCOND_LE); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP_END(DCOND_EQ); \ +} while (0) + +#define emith_write_sr(sr, srcr) do { \ + emith_lsr(sr, sr, 10); \ + emith_or_r_r_r_lsl(sr, sr, srcr, 22); \ + emith_ror(sr, sr, 22); \ +} while (0) + +#define emith_carry_to_t(srr, is_sub) do { \ + emith_lsr(sr, sr, 1); \ + emith_adc_r_r(sr, sr); \ +} while (0) + +#define emith_tpop_carry(sr, is_sub) do { \ + emith_and_r_r_imm(FC, sr, 1); \ + emith_lsr(sr, sr, 1); \ +} while (0) + +#define emith_tpush_carry(sr, is_sub) \ + emith_adc_r_r(sr, sr) + +#ifdef T +// T bit handling +#define emith_invert_cond(cond) \ + ((cond) ^ 1) + +static void emith_clr_t_cond(int sr) +{ + emith_bic_r_imm(sr, T); +} + +static void emith_set_t_cond(int sr, int cond) +{ + EMITH_SJMP_START(emith_invert_cond(cond)); + emith_or_r_imm_c(cond, sr, T); + EMITH_SJMP_END(emith_invert_cond(cond)); +} + +#define emith_get_t_cond() -1 + +#define emith_sync_t(sr) ((void)sr) + +#define emith_invalidate_t() + +static void emith_set_t(int sr, int val) +{ + if (val) + emith_or_r_imm(sr, T); + else + emith_bic_r_imm(sr, T); +} + +static int emith_tst_t(int sr, int tf) +{ + emith_tst_r_imm(sr, T); + return tf ? DCOND_NE: DCOND_EQ; +} +#endif diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 0a31d894..a40c0f8c 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -869,11 +869,14 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common #define emith_jump_cond_patchable(cond, target) \ emith_jump_cond(cond, target) -#define emith_jump_patch(ptr, target) do { \ +#define emith_jump_patch(ptr, target) ({ \ u32 disp_ = (u8 *)(target) - ((u8 *)(ptr) + 4); \ u32 offs_ = (*(u8 *)(ptr) == 0x0f) ? 2 : 1; \ EMIT_PTR((u8 *)(ptr) + offs_, disp_ - offs_, u32); \ -} while (0) + ptr; \ +}) + +#define emith_jump_patch_size() 6 #define emith_jump_at(ptr, target) do { \ u32 disp_ = (u8 *)(target) - ((u8 *)(ptr) + 5); \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 3b03d0c2..01fc6ae1 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -466,6 +466,47 @@ static cache_reg_t cache_regs[] = { { 7, HRF_REG }, }; +#elif defined(__mips__) +#include "../drc/emit_mips.c" + +static guest_reg_t guest_regs[] = { + // SHR_R0 .. SHR_SP + {GRF_STATIC, 20} , {GRF_STATIC, 21} , { 0 } , { 0 } , + { 0 } , { 0 } , { 0 } , { 0 } , + { 0 } , { 0 } , { 0 } , { 0 } , + { 0 } , { 0 } , { 0 } , { 0 } , + // SHR_PC, SHR_PPC, SHR_PR, SHR_SR, + // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, + { 0 } , { 0 } , { 0 } , {GRF_STATIC, 22} , + { 0 } , { 0 } , { 0 } , { 0 } , +}; + +// MIPS ABI: params: r4-r7, return: r2-r3, temp: r1(at),r8-r15,r24-r25,r31(ra), +// saved: r16-r23,r30, reserved: r0(zero), r26-r27(irq), r28(gp), r29(sp) +// r1,r15,r24,r25 are used internally by the code emitter +static cache_reg_t cache_regs[] = { + { 14, HRF_TEMP }, // temps + { 13, HRF_TEMP }, + { 12, HRF_TEMP }, + { 11, HRF_TEMP }, + { 10, HRF_TEMP }, + { 9, HRF_TEMP }, + { 8, HRF_TEMP }, + { 7, HRF_TEMP }, // params + { 6, HRF_TEMP }, + { 5, HRF_TEMP }, + { 4, HRF_TEMP }, + { 3, HRF_TEMP }, // RET_REG + { 2, HRF_TEMP }, + { 22, HRF_LOCKED }, // statics + { 21, HRF_LOCKED }, + { 20, HRF_LOCKED }, + { 19, HRF_REG }, // other regs + { 18, HRF_REG }, + { 17, HRF_REG }, + { 16, HRF_REG }, +}; + #elif defined(__i386__) #include "../drc/emit_x86.c" @@ -1050,9 +1091,12 @@ static void dr_block_link(struct block_entry *be, struct block_link *bl, int emi dbg(2, "- %slink from %p to pc %08x entry %p", emit_jump ? "":"early ", bl->jump, bl->target_pc, be->tcache_ptr); - if (emit_jump) - emith_jump_patch(bl->jump, be->tcache_ptr); - // could sync arm caches here, but that's unnecessary + if (emit_jump) { + u8 *jump = emith_jump_patch(bl->jump, be->tcache_ptr); + // only needs sync if patch is possibly crossing cacheline (assume 16 byte) + if ((uintptr_t)jump >>4 != ((uintptr_t)jump+emith_jump_patch_size()-1) >>4) + host_instructions_updated(jump, jump+emith_jump_patch_size()); + } // move bl to block_entry bl->target = be; @@ -1069,9 +1113,9 @@ static void dr_block_unlink(struct block_link *bl, int emit_jump) if (bl->target) { if (emit_jump) { - emith_jump_patch(bl->jump, sh2_drc_dispatcher); + u8 *jump = emith_jump_patch(bl->jump, sh2_drc_dispatcher); // update cpu caches since the previous jump target doesn't exist anymore - host_instructions_updated(bl->jump, bl->jump+4); + host_instructions_updated(jump, jump+emith_jump_patch_size()); } if (bl->prev) @@ -4128,8 +4172,9 @@ end_op: struct op_data *opd_b = (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd; u32 target_pc = opd_b->imm; int cond = -1; - void *target = NULL; int ctaken = 0; + void *target = NULL; + int patchable = 0; if (OP_ISBRACND(opd_b->op)) ctaken = (op_flags[i] & OF_DELAY_OP) ? 1 : 2; @@ -4182,11 +4227,12 @@ end_op: branch_patch_pc[branch_patch_count] = target_pc; branch_patch_ptr[branch_patch_count] = target; branch_patch_count++; - } - else + patchable = 1; + } else dbg(1, "warning: too many local branches"); } #endif + if (target == NULL) { // can't resolve branch locally, make a block exit @@ -4204,14 +4250,24 @@ end_op: } else #endif target = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); + patchable = 1; } - if (cond != -1) { - emith_jump_cond_patchable(cond, target); - } - else if (target != NULL) { - rcache_invalidate(); - emith_jump_patchable(target); + // create branch + if (patchable) { + if (cond != -1) + emith_jump_cond_patchable(cond, target); + else if (target != NULL) { + rcache_invalidate(); + emith_jump_patchable(target); + } + } else { + if (cond != -1) + emith_jump_cond(cond, target); + else if (target != NULL) { + rcache_invalidate(); + emith_jump(target); + } } // branch not taken, correct cycle count diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index 38e47c0b..09f4ae97 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -36,6 +36,8 @@ unsigned short scan_block(unsigned int base_pc, int is_slave, // XXX MUST match definitions in cpu/sh2/compiler.c #if defined(__arm__) #define DRC_SR_REG r10 +#elif defined(__mips__) +#define DRC_SR_REG s6 #elif defined(__i386__) #define DRC_SR_REG edi #elif defined(__x86_64__) diff --git a/platform/common/common.mak b/platform/common/common.mak index 331e7124..5afc0171 100644 --- a/platform/common/common.mak +++ b/platform/common/common.mak @@ -169,7 +169,7 @@ DEFINES += DRC_DEBUG=$(drc_debug) SRCS_COMMON += $(R)cpu/sh2/mame/sh2dasm.c DASM = $(R)platform/libpicofe/linux/host_dasm.c DASMLIBS = -lbfd -lopcodes -liberty -ifeq "$(ARCH)" "arm" +ifeq ("$(ARCH)",$(filter "$(ARCH)","arm" "mipsel")) ifeq ($(filter_out $(shell $(CC) --print-file-name=libbfd.so),"/"),) DASM = $(R)platform/common/host_dasm.c DASMLIBS = diff --git a/platform/common/disarm.c b/platform/common/disarm.c index 2e7c04e7..80655877 100644 --- a/platform/common/disarm.c +++ b/platform/common/disarm.c @@ -435,7 +435,7 @@ static int software_interrupt(unsigned int pc, unsigned int insn, char *buf, siz return 1; } -int disarm(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +int disarm(uintptr_t pc, uint32_t insn, char *buf, size_t buf_len) { if ((insn & 0x0fffffd0) == 0x012fff10) return branch_and_exchange(pc, insn, buf, buf_len); diff --git a/platform/common/disarm.h b/platform/common/disarm.h index 2ea4ccc3..b8634f68 100644 --- a/platform/common/disarm.h +++ b/platform/common/disarm.h @@ -23,6 +23,6 @@ #ifndef DISARM_H #define DISARM_H -int disarm(unsigned int pc, unsigned int insn, char *buf, unsigned int buf_len); +int disarm(uintptr_t long pc, uint32_t, char *buf, unsigned int buf_len); #endif /* DISARM_H */ diff --git a/platform/common/dismips.c b/platform/common/dismips.c new file mode 100644 index 00000000..af71b095 --- /dev/null +++ b/platform/common/dismips.c @@ -0,0 +1,346 @@ +/* + * very basic mips disassembler for MIPS32/MIPS64 Release 1, only for picodrive + * Copyright (C) 2019 kub + * + * This work is licensed under the terms of MAME license. + * See COPYING file in the top-level directory. + */ + +// XXX unimplemented: SYSCALL, BREAK, SYNC, SDBBP, T*, CACHE, PREF, +// MOVF/MOVT, LWC*/LDC*, SWC*/SDC*, COP*. +// however, it's certainly good enough for anything picodrive DRC throws at it. + +#include +#include +#include +#include + +#include "dismips.h" + + +static char *const register_names[32] = { + "$zero", + "$at", + "$v0", + "$v1", + "$a0", + "$a1", + "$a2", + "$a3", + "$t0", + "$t1", + "$t2", + "$t3", + "$t4", + "$t5", + "$t6", + "$t7", + "$s0", + "$s1", + "$s2", + "$s3", + "$s4", + "$s5", + "$s6", + "$s7", + "$t8", + "$t9", + "$k0", + "$k1", + "$gp", + "$sp", + "$fp", + "$ra" +}; + + +enum insn_type { + REG_DTS, REG_TS, // 3, 2, or 1 regs + REG_DS, REG_D, REG_S, + S_IMM_DT, // 2 regs with shift amount + B_IMM_S, B_IMM_TS, // pc-relative branches with 1 or 2 regs + J_IMM, // region-relative jump + A_IMM_TS, // arithmetic immediate with 1 or 2 regs + L_IMM_T, L_IMM_TS, // logical immediate with 2 regs + M_IMM_TS, // memory indexed with 2 regs +}; + +struct insn { + unsigned char op; + enum insn_type type; + char *name; +}; + +// ATTN: these array MUST be sorted by op (decode relies on it) + +// instructions with opcode SPECIAL (R-type) +#define OP_SPECIAL 0x00 +static const struct insn special_insns[] = { + {0x00, S_IMM_DT, "sll"}, + {0x02, S_IMM_DT, "srl"}, + {0x03, S_IMM_DT, "sra"}, + {0x04, REG_DTS, "sllv"}, + {0x06, REG_DTS, "srlv"}, + {0x07, REG_DTS, "srav"}, + {0x08, REG_S, "jr"}, + {0x09, REG_DS, "jalr"}, + {0x0a, REG_DTS, "movz"}, + {0x0b, REG_DTS, "movn"}, +// {0x0c, , "syscall"}, +// {0x0d, , "break"}, +// {0x0f, , "sync"}, + {0x10, REG_D, "mfhi"}, + {0x11, REG_S, "mthi"}, + {0x12, REG_D, "mflo"}, + {0x13, REG_S, "mtlo"}, + {0x14, REG_DTS, "dsllv"}, + {0x16, REG_DTS, "dslrv"}, + {0x17, REG_DTS, "dsrav"}, + {0x18, REG_TS, "mult"}, + {0x19, REG_TS, "multu"}, + {0x1A, REG_TS, "div"}, + {0x1B, REG_TS, "divu"}, + {0x1C, REG_TS, "dmult"}, + {0x1D, REG_TS, "dmultu"}, + {0x1E, REG_TS, "ddiv"}, + {0x1F, REG_TS, "ddivu"}, + {0x20, REG_DTS, "add"}, + {0x21, REG_DTS, "addu"}, + {0x22, REG_DTS, "sub"}, + {0x23, REG_DTS, "subu"}, + {0x24, REG_DTS, "and"}, + {0x25, REG_DTS, "or"}, + {0x26, REG_DTS, "xor"}, + {0x27, REG_DTS, "nor"}, + {0x2A, REG_DTS, "slt"}, + {0x2B, REG_DTS, "sltu"}, + {0x2C, REG_DTS, "dadd"}, + {0x2D, REG_DTS, "daddu"}, + {0x2E, REG_DTS, "dsub"}, + {0x2F, REG_DTS, "dsubu"}, +// {0x30, REG_TS, "tge" }, +// {0x31, REG_TS, "tgeu" }, +// {0x32, REG_TS, "tlt" }, +// {0x33, REG_TS, "tltu" }, +// {0x34, REG_TS, "teq" }, +// {0x36, REG_TS, "tne" }, + {0x38, S_IMM_DT, "dsll"}, + {0x3A, S_IMM_DT, "dsrl"}, + {0x3B, S_IMM_DT, "dsra"}, + {0x3D, S_IMM_DT, "dsll32"}, + {0x3E, S_IMM_DT, "dsrl32"}, + {0x3F, S_IMM_DT, "dsra32"}, +}; + +// instructions with opcode SPECIAL2 (R-type) +#define OP_SPECIAL2 0x1C +static const struct insn special2_insns[] = { + {0x00, REG_TS, "madd" }, + {0x01, REG_TS, "maddu" }, + {0x02, REG_TS, "mul" }, + {0x04, REG_TS, "msub" }, + {0x05, REG_TS, "msubu" }, + {0x20, REG_DS, "clz" }, + {0x21, REG_DS, "clo" }, + {0x24, REG_DS, "dclz" }, + {0x25, REG_DS, "dclo" }, +}; + +// instructions with opcode REGIMM (I-type) +#define OP_REGIMM 0x01 +static const struct insn regimm_insns[] = { + {0x00, B_IMM_S, "bltz"}, + {0x01, B_IMM_S, "bgez"}, + {0x02, B_IMM_S, "bltzl"}, + {0x03, B_IMM_S, "bgezl"}, +// {0x08, , "tgei"}, +// {0x09, , "tgeiu"}, +// {0x0a, , "tlti"}, +// {0x0b, , "tltiu"}, +// {0x0c, , "teqi"}, +// {0x0e, , "tnei"}, + {0x10, B_IMM_S, "bltzal"}, + {0x11, B_IMM_S, "bgezal"}, + {0x12, B_IMM_S, "bltzall"}, + {0x13, B_IMM_S, "bgezall"}, + {0x13, B_IMM_S, "bgezall"}, +}; + +// instructions with other opcodes (I-type) +static const struct insn immediate_insns[] = { + {0x02, J_IMM, "j"}, + {0x03, J_IMM, "jal"}, + {0x04, B_IMM_TS, "beq"}, + {0x05, B_IMM_TS, "bne"}, + {0x06, B_IMM_S, "blez"}, + {0x07, B_IMM_S, "bgtz"}, + {0x08, A_IMM_TS, "addi"}, + {0x09, A_IMM_TS, "addiu"}, + {0x0A, A_IMM_TS, "slti"}, + {0x0B, A_IMM_TS, "sltiu"}, + {0x0C, L_IMM_TS, "andi"}, + {0x0D, L_IMM_TS, "ori"}, + {0x0E, L_IMM_TS, "xori"}, + {0x0F, L_IMM_T, "lui"}, + {0x14, B_IMM_TS, "beql"}, + {0x15, B_IMM_TS, "bnel"}, + {0x16, B_IMM_S, "blezl"}, + {0x17, B_IMM_S, "bgtzl"}, + {0x18, A_IMM_TS, "daddi"}, + {0x19, A_IMM_TS, "daddiu"}, + {0x1A, M_IMM_TS, "ldl"}, + {0x1B, M_IMM_TS, "ldr"}, + {0x20, M_IMM_TS, "lb"}, + {0x21, M_IMM_TS, "lh"}, + {0x22, M_IMM_TS, "lwl"}, + {0x23, M_IMM_TS, "lw"}, + {0x24, M_IMM_TS, "lbu"}, + {0x25, M_IMM_TS, "lhu"}, + {0x26, M_IMM_TS, "lwr"}, + {0x27, M_IMM_TS, "lwu"}, + {0x28, M_IMM_TS, "sb"}, + {0x29, M_IMM_TS, "sh"}, + {0x2A, M_IMM_TS, "swl"}, + {0x2B, M_IMM_TS, "sw"}, + {0x2C, M_IMM_TS, "sdl"}, + {0x2D, M_IMM_TS, "sdr"}, + {0x2E, M_IMM_TS, "swr"}, +// {0x2F, , "cache"}, + {0x30, M_IMM_TS, "ll"}, +// {0x31, , "lwc1"}, +// {0x32, , "lwc2"}, +// {0x33, , "pref"}, + {0x34, M_IMM_TS, "lld"}, +// {0x35, , "ldc1"}, +// {0x36, , "ldc2"}, + {0x37, M_IMM_TS, "ld"}, + {0x38, M_IMM_TS, "sc"}, +// {0x39, , "swc1"}, +// {0x3A, , "swc2"}, + {0x3C, M_IMM_TS, "scd"}, +// {0x3D, , "sdc1"}, +// {0x3E, , "sdc2"}, + {0x3F, M_IMM_TS, "sd"}, +}; + +#define ARRAY_SIZE(a) (sizeof(a)/sizeof(*a)) + +// find instruction description for insn +static const struct insn *decode_insn(uint32_t insn) +{ + uint32_t op = insn >> 26; + const struct insn *pi; + int l = 0, r = 0; + + if (op == OP_SPECIAL) { + op = insn & 0x3f; + pi = special_insns; + r = ARRAY_SIZE(special_insns)-1; + } else if (op == OP_SPECIAL2) { + op = insn & 0x3f; + pi = special2_insns; + r = ARRAY_SIZE(special2_insns)-1; + } else if (op == OP_REGIMM) { + op = (insn>>16) & 0x1f; + pi = regimm_insns; + r = ARRAY_SIZE(regimm_insns)-1; + } else { + pi = immediate_insns; + r = ARRAY_SIZE(immediate_insns)-1; + } + + while (l <= r) { + int m = (l+r) / 2; + if (pi[m].op == op) + return pi+m; + else if (pi[m].op < op) + l = m+1; + else + r = m-1; + } + return NULL; +} + +// calculate target for pc-relative branches +static unsigned long b_target(unsigned long pc, uint32_t insn) +{ + return pc + 4 + (int16_t)insn * 4; +} + +// calculate target for region-relative branches +static unsigned long j_target(unsigned long pc, uint32_t insn) +{ + return (pc & ~0x0fffffffL) | ((insn & 0x03ffffff) << 2); +} + +// main disassembler function +int dismips(uintptr_t pc, uint32_t insn, char *buf, unsigned int buflen) +{ + const struct insn *pi = decode_insn(insn); + char *rs = register_names[(insn >> 21) & 0x1f]; + char *rt = register_names[(insn >> 16) & 0x1f]; + char *rd = register_names[(insn >> 11) & 0x1f]; + int sa = (insn >> 6) & 0x1f; + int imm = (int16_t) insn; + + if (pi == NULL) { + snprintf(buf, buflen, "0x%x", insn); + return 0; + } + + switch (pi->type) { + case REG_DTS: + if ((insn & 0x3f) == 0x25 /*OR*/ && (insn & 0x1f0000) == 0 /*zero*/) + snprintf(buf, buflen, "move %s, %s", rd, rs); + else + snprintf(buf, buflen, "%s %s, %s, %s", pi->name, rd, rs, rt); + break; + case REG_TS: + snprintf(buf, buflen, "%s %s, %s", pi->name, rs, rt); + break; + case REG_DS: + snprintf(buf, buflen, "%s %s, %s", pi->name, rd, rs); + break; + case REG_D: + snprintf(buf, buflen, "%s %s", pi->name, rd); + break; + case REG_S: + snprintf(buf, buflen, "%s %s", pi->name, rs); + break; + case S_IMM_DT: + if (insn == 0x00000000) + snprintf(buf, buflen, "nop"); + else + snprintf(buf, buflen, "%s %s, %s, %d", pi->name, rd, rt, sa); + break; + case B_IMM_S: + snprintf(buf, buflen, "%s %s, 0x%lx", pi->name, rs, b_target(pc, insn)); + break; + case B_IMM_TS: + snprintf(buf, buflen, "%s %s, %s, 0x%lx", pi->name, rs, rt, b_target(pc, insn)); + break; + case J_IMM: + snprintf(buf, buflen, "%s 0x%lx", pi->name, j_target(pc, insn)); + break; + case A_IMM_TS: + if (abs(imm) < 1000) + snprintf(buf, buflen, "%s %s, %s, %d", pi->name, rt, rs, imm); + else + snprintf(buf, buflen, "%s %s, %s, 0x%x", pi->name, rt, rs, imm); + break; + case L_IMM_T: + snprintf(buf, buflen, "%s %s, 0x%x", pi->name, rt, (uint16_t)imm); + break; + case L_IMM_TS: + if ((insn >> 26) == 0x34 /*ORI*/ && (insn & 0x03e00000) == 0 /*zero*/) + snprintf(buf, buflen, "li %s, 0x%x", rt, (uint16_t)imm); + else + snprintf(buf, buflen, "%s %s, %s, 0x%x", pi->name, rt, rs, (uint16_t)imm); + break; + case M_IMM_TS: + snprintf(buf, buflen, "%s %s, %d(%s)", pi->name, rt, imm, rs); + break; + } + return 1; +} + diff --git a/platform/common/dismips.h b/platform/common/dismips.h new file mode 100644 index 00000000..e6338def --- /dev/null +++ b/platform/common/dismips.h @@ -0,0 +1,6 @@ +#ifndef DISMIPS_H +#define DISMIPS_H + +int dismips(uintptr_t pc, uint32_t insn, char *buf, unsigned int buf_len); + +#endif /* DISMIPS_H */ diff --git a/platform/linux/emu.c b/platform/linux/emu.c index 8af5afa8..887d7836 100644 --- a/platform/linux/emu.c +++ b/platform/linux/emu.c @@ -29,7 +29,7 @@ void pemu_prep_defconfig(void) void pemu_validate_config(void) { -#if !defined(__arm__) && !defined(__i386__) && !defined(__x86_64__) +#if !defined(__arm__) && !defined(__aarch64__) && !defined(__mips__) && !defined(__i386__) && !defined(__x86_64__) PicoIn.opt &= ~POPT_EN_DRC; #endif }