[subrepo]
remote = https://github.com/pcercuei/lightrec.git
branch = master
- commit = 30bad28d7a2b2903cd7f3d8024ae7a34a0c8b482
- parent = 0141267e1c5e17c27548f6ad57c7acc22e589990
+ commit = 7545b5a7995be9e7b70e786a6b534004ea26c999
+ parent = 2fba93f2853c57240f031adb4712acbd2a066d34
method = merge
cmdver = 0.4.3
static const char *opcode_flags[] = {
"switched branch/DS",
- "unload Rs",
- "unload Rt",
- "unload Rd",
"sync point",
};
"No div check",
};
-static int print_flags(char *buf, size_t len, u16 flags,
+static size_t do_snprintf(char *buf, size_t len, bool *first,
+ const char *arg1, const char *arg2)
+{
+ size_t bytes;
+
+ if (*first)
+ bytes = snprintf(buf, len, "(%s%s", arg1, arg2);
+ else
+ bytes = snprintf(buf, len, ", %s%s", arg1, arg2);
+
+ *first = false;
+
+ return bytes;
+}
+
+static const char * const reg_op_token[3] = {
+ "-", "*", "~",
+};
+
+static int print_flags(char *buf, size_t len, const struct opcode *op,
const char **array, size_t array_size,
bool is_io)
{
unsigned int i, io_mode;
size_t count = 0, bytes;
bool first = true;
+ u32 flags = op->flags;
+ unsigned int reg_op;
for (i = 0; i < array_size + ARRAY_SIZE(opcode_flags); i++) {
if (!(flags & BIT(i)))
else
flag_name = array[i - ARRAY_SIZE(opcode_flags)];
- if (first)
- bytes = snprintf(buf, len, "(%s", flag_name);
- else
- bytes = snprintf(buf, len, ", %s", flag_name);
-
- first = false;
+ bytes = do_snprintf(buf, len, &first, "", flag_name);
buf += bytes;
len -= bytes;
count += bytes;
if (io_mode > 0) {
io_mode_name = opcode_io_modes[io_mode - 1];
- if (first)
- bytes = snprintf(buf, len, "(%s", io_mode_name);
- else
- bytes = snprintf(buf, len, ", %s", io_mode_name);
+ bytes = do_snprintf(buf, len, &first, "", io_mode_name);
+ buf += bytes;
+ len -= bytes;
+ count += bytes;
+ }
+ }
+
+ if (OPT_EARLY_UNLOAD) {
+ reg_op = LIGHTREC_FLAGS_GET_RS(flags);
+ if (reg_op) {
+ bytes = do_snprintf(buf, len, &first,
+ reg_op_token[reg_op - 1],
+ lightrec_reg_name(op->i.rs));
+ buf += bytes;
+ len -= bytes;
+ count += bytes;
+ }
+
+ reg_op = LIGHTREC_FLAGS_GET_RT(flags);
+ if (reg_op) {
+ bytes = do_snprintf(buf, len, &first,
+ reg_op_token[reg_op - 1],
+ lightrec_reg_name(op->i.rt));
+ buf += bytes;
+ len -= bytes;
+ count += bytes;
+ }
- first = false;
+ reg_op = LIGHTREC_FLAGS_GET_RD(flags);
+ if (reg_op) {
+ bytes = do_snprintf(buf, len, &first,
+ reg_op_token[reg_op - 1],
+ lightrec_reg_name(op->r.rd));
buf += bytes;
len -= bytes;
count += bytes;
std_opcodes[c.i.op],
(pc & 0xf0000000) | (c.j.imm << 2));
case OP_BEQ:
+ if (c.i.rs == c.i.rt) {
+ *flags_ptr = opcode_branch_flags;
+ *nb_flags = ARRAY_SIZE(opcode_branch_flags);
+ return snprintf(buf, len, "b 0x%x",
+ pc + 4 + ((s16)c.i.imm << 2));
+ }
+ fallthrough;
case OP_BNE:
case OP_BLEZ:
case OP_BGTZ:
count2 = 0;
}
- print_flags(buf3, sizeof(buf3), op->flags, flags_ptr, nb_flags,
- is_io);
+ print_flags(buf3, sizeof(buf3), op, flags_ptr, nb_flags, is_io);
printf("0x%08x (0x%x)\t%s%*c%s%*c%s\n", pc, i << 2,
buf, 30 - (int)count, ' ', buf2, 30 - (int)count2, ' ', buf3);
#include "debug.h"
#include "lightrec.h"
+#include "lightrec-config.h"
#ifndef __packed
#define __packed __attribute__((packed))
/* Flags for all opcodes */
#define LIGHTREC_NO_DS BIT(0)
-#define LIGHTREC_UNLOAD_RS BIT(1)
-#define LIGHTREC_UNLOAD_RT BIT(2)
-#define LIGHTREC_UNLOAD_RD BIT(3)
-#define LIGHTREC_SYNC BIT(4)
+#define LIGHTREC_SYNC BIT(1)
/* Flags for load/store opcodes */
-#define LIGHTREC_SMC BIT(5)
-#define LIGHTREC_NO_INVALIDATE BIT(6)
-#define LIGHTREC_NO_MASK BIT(7)
+#define LIGHTREC_SMC BIT(2)
+#define LIGHTREC_NO_INVALIDATE BIT(3)
+#define LIGHTREC_NO_MASK BIT(4)
/* I/O mode for load/store opcodes */
-#define LIGHTREC_IO_MODE_LSB 8
+#define LIGHTREC_IO_MODE_LSB 5
#define LIGHTREC_IO_MODE(x) ((x) << LIGHTREC_IO_MODE_LSB)
#define LIGHTREC_IO_UNKNOWN 0x0
#define LIGHTREC_IO_DIRECT 0x1
(((x) & LIGHTREC_IO_MASK) >> LIGHTREC_IO_MODE_LSB)
/* Flags for branches */
-#define LIGHTREC_EMULATE_BRANCH BIT(5)
-#define LIGHTREC_LOCAL_BRANCH BIT(6)
+#define LIGHTREC_EMULATE_BRANCH BIT(2)
+#define LIGHTREC_LOCAL_BRANCH BIT(3)
/* Flags for div/mult opcodes */
-#define LIGHTREC_NO_LO BIT(5)
-#define LIGHTREC_NO_HI BIT(6)
-#define LIGHTREC_NO_DIV_CHECK BIT(7)
+#define LIGHTREC_NO_LO BIT(2)
+#define LIGHTREC_NO_HI BIT(3)
+#define LIGHTREC_NO_DIV_CHECK BIT(4)
+
+#define LIGHTREC_REG_RS_LSB 26
+#define LIGHTREC_REG_RS(x) ((x) << LIGHTREC_REG_RS_LSB)
+#define LIGHTREC_REG_RS_MASK LIGHTREC_REG_RS(0x3)
+#define LIGHTREC_FLAGS_GET_RS(x) \
+ (((x) & LIGHTREC_REG_RS_MASK) >> LIGHTREC_REG_RS_LSB)
+
+#define LIGHTREC_REG_RT_LSB 28
+#define LIGHTREC_REG_RT(x) ((x) << LIGHTREC_REG_RT_LSB)
+#define LIGHTREC_REG_RT_MASK LIGHTREC_REG_RT(0x3)
+#define LIGHTREC_FLAGS_GET_RT(x) \
+ (((x) & LIGHTREC_REG_RT_MASK) >> LIGHTREC_REG_RT_LSB)
+
+#define LIGHTREC_REG_RD_LSB 30
+#define LIGHTREC_REG_RD(x) ((x) << LIGHTREC_REG_RD_LSB)
+#define LIGHTREC_REG_RD_MASK LIGHTREC_REG_RD(0x3)
+#define LIGHTREC_FLAGS_GET_RD(x) \
+ (((x) & LIGHTREC_REG_RD_MASK) >> LIGHTREC_REG_RD_LSB)
+
+#define LIGHTREC_REG_NOOP 0x0
+#define LIGHTREC_REG_UNLOAD 0x1
+#define LIGHTREC_REG_DISCARD 0x2
+#define LIGHTREC_REG_CLEAN 0x3
struct block;
struct opcode_i i;
struct opcode_j j;
};
- u16 flags;
+ u32 flags;
};
void lightrec_print_disassembly(const struct block *block, const u32 *code);
+static inline _Bool op_flag_no_ds(u32 flags)
+{
+ return OPT_SWITCH_DELAY_SLOTS && (flags & LIGHTREC_NO_DS);
+}
+
+static inline _Bool op_flag_sync(u32 flags)
+{
+ return OPT_LOCAL_BRANCHES && (flags & LIGHTREC_SYNC);
+}
+
+static inline _Bool op_flag_smc(u32 flags)
+{
+ return OPT_FLAG_STORES && (flags & LIGHTREC_SMC);
+}
+
+static inline _Bool op_flag_no_invalidate(u32 flags)
+{
+ return (OPT_FLAG_IO || OPT_FLAG_STORES) &&
+ (flags & LIGHTREC_NO_INVALIDATE);
+}
+
+static inline _Bool op_flag_no_mask(u32 flags)
+{
+ return OPT_FLAG_IO && (flags & LIGHTREC_NO_MASK);
+}
+
+static inline _Bool op_flag_emulate_branch(u32 flags)
+{
+ return OPT_DETECT_IMPOSSIBLE_BRANCHES &&
+ (flags & LIGHTREC_EMULATE_BRANCH);
+}
+
+static inline _Bool op_flag_local_branch(u32 flags)
+{
+ return OPT_LOCAL_BRANCHES && (flags & LIGHTREC_LOCAL_BRANCH);
+}
+
+static inline _Bool op_flag_no_lo(u32 flags)
+{
+ return OPT_FLAG_MULT_DIV && (flags & LIGHTREC_NO_LO);
+}
+
+static inline _Bool op_flag_no_hi(u32 flags)
+{
+ return OPT_FLAG_MULT_DIV && (flags & LIGHTREC_NO_HI);
+}
+
+static inline _Bool op_flag_no_div_check(u32 flags)
+{
+ return OPT_FLAG_MULT_DIV && (flags & LIGHTREC_NO_DIV_CHECK);
+}
+
#endif /* __DISASSEMBLER_H__ */
u32 link, bool update_cycles)
{
struct regcache *reg_cache = state->reg_cache;
- u32 cycles = state->cycles;
jit_state_t *_jit = block->_jit;
const struct opcode *op = &block->opcode_list[offset],
*next = &block->opcode_list[offset + 1];
+ u32 cycles = state->cycles + lightrec_cycles_of_opcode(op->c);
+ u16 offset_after_eob;
jit_note(__FILE__, __LINE__);
}
if (has_delay_slot(op->c) &&
- !(op->flags & (LIGHTREC_NO_DS | LIGHTREC_LOCAL_BRANCH))) {
+ !op_flag_no_ds(op->flags) && !op_flag_local_branch(op->flags)) {
cycles += lightrec_cycles_of_opcode(next->c);
/* Recompile the delay slot */
lightrec_rec_opcode(state, block, offset + 1);
}
- /* Store back remaining registers */
- lightrec_storeback_regs(reg_cache, _jit);
+ /* Clean the remaining registers */
+ lightrec_clean_regs(reg_cache, _jit);
jit_movr(JIT_V0, reg_new_pc);
pr_debug("EOB: %u cycles\n", cycles);
}
- if (offset - !!(op->flags & LIGHTREC_NO_DS) < block->nb_ops - 1)
+ offset_after_eob = offset + 1 +
+ (has_delay_slot(op->c) && !op_flag_no_ds(op->flags));
+
+ if (offset_after_eob < block->nb_ops)
state->branches[state->nb_branches++] = jit_b();
}
union code c = block->opcode_list[offset].c;
u32 cycles = state->cycles;
- if (!after_op)
- cycles -= lightrec_cycles_of_opcode(c);
+ if (after_op)
+ cycles += lightrec_cycles_of_opcode(c);
- lightrec_storeback_regs(reg_cache, _jit);
+ lightrec_clean_regs(reg_cache, _jit);
jit_movi(JIT_V0, block->pc + (offset << 2));
jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles);
31, get_branch_pc(block, offset, 2), true);
}
+static void lightrec_do_early_unload(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
+{
+ struct regcache *reg_cache = state->reg_cache;
+ const struct opcode *op = &block->opcode_list[offset];
+ jit_state_t *_jit = block->_jit;
+ unsigned int i;
+ u8 reg;
+ struct {
+ u8 reg, op;
+ } reg_ops[3] = {
+ { op->r.rd, LIGHTREC_FLAGS_GET_RD(op->flags), },
+ { op->i.rt, LIGHTREC_FLAGS_GET_RT(op->flags), },
+ { op->i.rs, LIGHTREC_FLAGS_GET_RS(op->flags), },
+ };
+
+ for (i = 0; i < ARRAY_SIZE(reg_ops); i++) {
+ reg = reg_ops[i].reg;
+
+ switch (reg_ops[i].op) {
+ case LIGHTREC_REG_UNLOAD:
+ lightrec_clean_reg_if_loaded(reg_cache, _jit, reg, true);
+ break;
+
+ case LIGHTREC_REG_DISCARD:
+ lightrec_discard_reg_if_loaded(reg_cache, reg);
+ break;
+
+ case LIGHTREC_REG_CLEAN:
+ lightrec_clean_reg_if_loaded(reg_cache, _jit, reg, false);
+ break;
+ default:
+ break;
+ };
+ }
+}
+
static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 offset,
jit_code_t code, u32 link, bool unconditional, bool bz)
{
const struct opcode *op = &block->opcode_list[offset],
*next = &block->opcode_list[offset + 1];
jit_node_t *addr;
- u8 link_reg;
- u32 target_offset, cycles = state->cycles;
+ u8 link_reg, rs, rt;
bool is_forward = (s16)op->i.imm >= -1;
+ int op_cycles = lightrec_cycles_of_opcode(op->c);
+ u32 target_offset, cycles = state->cycles + op_cycles;
u32 next_pc;
jit_note(__FILE__, __LINE__);
- if (!(op->flags & LIGHTREC_NO_DS))
+ if (!op_flag_no_ds(op->flags))
cycles += lightrec_cycles_of_opcode(next->c);
- state->cycles = 0;
+ state->cycles = -op_cycles;
+
+ if (!unconditional) {
+ rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs, REG_EXT);
+ rt = bz ? 0 : lightrec_alloc_reg_in(reg_cache,
+ _jit, op->i.rt, REG_EXT);
+
+ /* Unload dead registers before evaluating the branch */
+ if (OPT_EARLY_UNLOAD)
+ lightrec_do_early_unload(state, block, offset);
+ }
if (cycles)
jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles);
if (!unconditional) {
- u8 rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs, REG_EXT),
- rt = bz ? 0 : lightrec_alloc_reg_in(reg_cache,
- _jit, op->i.rt, REG_EXT);
-
/* Generate the branch opcode */
addr = jit_new_node_pww(code, NULL, rs, rt);
regs_backup = lightrec_regcache_enter_branch(reg_cache);
}
- if (op->flags & LIGHTREC_LOCAL_BRANCH) {
- if (next && !(op->flags & LIGHTREC_NO_DS)) {
- /* Recompile the delay slot */
- if (next->opcode)
- lightrec_rec_opcode(state, block, offset + 1);
- }
+ if (op_flag_local_branch(op->flags)) {
+ /* Recompile the delay slot */
+ if (next && next->opcode && !op_flag_no_ds(op->flags))
+ lightrec_rec_opcode(state, block, offset + 1);
if (link) {
/* Update the $ra register */
lightrec_free_reg(reg_cache, link_reg);
}
- /* Store back remaining registers */
- lightrec_storeback_regs(reg_cache, _jit);
+ /* Clean remaining registers */
+ lightrec_clean_regs(reg_cache, _jit);
target_offset = offset + 1 + (s16)op->i.imm
- - !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS));
+ - !!op_flag_no_ds(op->flags);
pr_debug("Adding local branch to offset 0x%x\n",
target_offset << 2);
branch = &state->local_branches[
branch->branch = jit_bgti(LIGHTREC_REG_CYCLE, 0);
}
- if (!(op->flags & LIGHTREC_LOCAL_BRANCH) || !is_forward) {
+ if (!op_flag_local_branch(op->flags) || !is_forward) {
next_pc = get_branch_pc(block, offset, 1 + (s16)op->i.imm);
lightrec_emit_end_of_block(state, block, offset, -1, next_pc,
31, link, false);
lightrec_free_reg(reg_cache, link_reg);
}
- if (!(op->flags & LIGHTREC_NO_DS) && next->opcode)
+ if (!op_flag_no_ds(op->flags) && next->opcode)
lightrec_rec_opcode(state, block, offset + 1);
}
}
{
struct regcache *reg_cache = state->reg_cache;
union code c = block->opcode_list[offset].c;
- u16 flags = block->opcode_list[offset].flags;
+ u32 flags = block->opcode_list[offset].flags;
u8 reg_lo = get_mult_div_lo(c);
u8 reg_hi = get_mult_div_hi(c);
jit_state_t *_jit = block->_jit;
rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, rflags);
rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, rflags);
- if (!(flags & LIGHTREC_NO_LO))
+ if (!op_flag_no_lo(flags))
lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0);
else if (__WORDSIZE == 32)
lo = lightrec_alloc_reg_temp(reg_cache, _jit);
- if (!(flags & LIGHTREC_NO_HI))
+ if (!op_flag_no_hi(flags))
hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, REG_EXT);
if (__WORDSIZE == 32) {
/* On 32-bit systems, do a 32*32->64 bit operation, or a 32*32->32 bit
* operation if the MULT was detected a 32-bit only. */
- if (!(flags & LIGHTREC_NO_HI)) {
+ if (!op_flag_no_hi(flags)) {
if (is_signed)
jit_qmulr(lo, hi, rs, rt);
else
}
} else {
/* On 64-bit systems, do a 64*64->64 bit operation. */
- if (flags & LIGHTREC_NO_LO) {
+ if (op_flag_no_lo(flags)) {
jit_mulr(hi, rs, rt);
jit_rshi(hi, hi, 32);
} else {
jit_mulr(lo, rs, rt);
/* The 64-bit output value is in $lo, store the upper 32 bits in $hi */
- if (!(flags & LIGHTREC_NO_HI))
+ if (!op_flag_no_hi(flags))
jit_rshi(hi, lo, 32);
}
}
lightrec_free_reg(reg_cache, rs);
lightrec_free_reg(reg_cache, rt);
- if (!(flags & LIGHTREC_NO_LO) || __WORDSIZE == 32)
+ if (!op_flag_no_lo(flags) || __WORDSIZE == 32)
lightrec_free_reg(reg_cache, lo);
- if (!(flags & LIGHTREC_NO_HI))
+ if (!op_flag_no_hi(flags))
lightrec_free_reg(reg_cache, hi);
}
{
struct regcache *reg_cache = state->reg_cache;
union code c = block->opcode_list[offset].c;
- u16 flags = block->opcode_list[offset].flags;
- bool no_check = flags & LIGHTREC_NO_DIV_CHECK;
+ u32 flags = block->opcode_list[offset].flags;
+ bool no_check = op_flag_no_div_check(flags);
u8 reg_lo = get_mult_div_lo(c);
u8 reg_hi = get_mult_div_hi(c);
jit_state_t *_jit = block->_jit;
rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, rflags);
rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, rflags);
- if (!(flags & LIGHTREC_NO_LO))
+ if (!op_flag_no_lo(flags))
lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0);
- if (!(flags & LIGHTREC_NO_HI))
+ if (!op_flag_no_hi(flags))
hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, 0);
/* Jump to special handler if dividing by zero */
if (!no_check)
branch = jit_beqi(rt, 0);
- if (flags & LIGHTREC_NO_LO) {
+ if (op_flag_no_lo(flags)) {
if (is_signed)
jit_remr(hi, rs, rt);
else
jit_remr_u(hi, rs, rt);
- } else if (flags & LIGHTREC_NO_HI) {
+ } else if (op_flag_no_hi(flags)) {
if (is_signed)
jit_divr(lo, rs, rt);
else
jit_patch(branch);
- if (!(flags & LIGHTREC_NO_LO)) {
+ if (!op_flag_no_lo(flags)) {
if (is_signed) {
jit_lti(lo, rs, 0);
jit_lshi(lo, lo, 1);
}
}
- if (!(flags & LIGHTREC_NO_HI))
+ if (!op_flag_no_hi(flags))
jit_movr(hi, rs);
jit_patch(to_end);
lightrec_free_reg(reg_cache, rs);
lightrec_free_reg(reg_cache, rt);
- if (!(flags & LIGHTREC_NO_LO))
+ if (!op_flag_no_lo(flags))
lightrec_free_reg(reg_cache, lo);
- if (!(flags & LIGHTREC_NO_HI))
+ if (!op_flag_no_hi(flags))
lightrec_free_reg(reg_cache, hi);
}
{
struct regcache *reg_cache = state->reg_cache;
jit_state_t *_jit = block->_jit;
- u8 tmp, tmp2;
+ u8 tmp;
tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
jit_ldxi(tmp, LIGHTREC_REG_STATE,
offsetof(struct lightrec_state, wrappers_eps[wrapper]));
if (with_arg) {
- tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
- jit_movi(tmp2, arg);
-
- jit_stxi_i(offsetof(struct lightrec_state, c_wrapper_arg),
- LIGHTREC_REG_STATE, tmp2);
-
- lightrec_free_reg(reg_cache, tmp2);
+ jit_prepare();
+ jit_pushargi(arg);
}
lightrec_regcache_mark_live(reg_cache, _jit);
struct regcache *reg_cache = state->reg_cache;
jit_state_t *_jit = block->_jit;
union code c = block->opcode_list[offset].c;
- u16 flags = block->opcode_list[offset].flags;
+ u32 flags = block->opcode_list[offset].flags;
bool is_tagged = LIGHTREC_FLAGS_GET_IO_MODE(flags);
u32 lut_entry;
s16 imm = (s16)c.i.imm;
s32 simm = (s32)imm << (1 - lut_is_32bit(state));
s32 lut_offt = offsetof(struct lightrec_state, code_lut);
- bool no_mask = op->flags & LIGHTREC_NO_MASK;
+ bool no_mask = op_flag_no_mask(op->flags);
bool add_imm = c.i.imm &&
((!state->mirrors_mapped && !no_mask) || (invalidate &&
((imm & 0x3) || simm + lut_offt != (s16)(simm + lut_offt))));
jit_note(__FILE__, __LINE__);
rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
+ rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
if (state->offset_ram || state->offset_scratch)
lightrec_free_reg(reg_cache, tmp2);
}
- rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
-
if (is_big_endian() && swap_code && c.i.rt) {
tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
const struct block *block, u16 offset,
jit_code_t code, jit_code_t swap_code)
{
- u16 flags = block->opcode_list[offset].flags;
- bool no_invalidate = (flags & LIGHTREC_NO_INVALIDATE) ||
+ u32 flags = block->opcode_list[offset].flags;
+ bool no_invalidate = op_flag_no_invalidate(flags) ||
state->state->invalidate_from_dma_only;
switch (LIGHTREC_FLAGS_GET_IO_MODE(flags)) {
struct opcode *op = &block->opcode_list[offset];
jit_state_t *_jit = block->_jit;
u8 rs, rt, addr_reg, flags = REG_EXT;
- bool no_mask = op->flags & LIGHTREC_NO_MASK;
+ bool no_mask = op_flag_no_mask(op->flags);
union code c = op->c;
s16 imm;
u16 offset, jit_code_t code, jit_code_t swap_code,
bool is_unsigned)
{
- u16 flags = block->opcode_list[offset].flags;
+ u32 flags = block->opcode_list[offset].flags;
switch (LIGHTREC_FLAGS_GET_IO_MODE(flags)) {
case LIGHTREC_IO_RAM:
call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_MTC);
if (c.i.op == OP_CP0 &&
- !(block->opcode_list[offset].flags & LIGHTREC_NO_DS) &&
+ !op_flag_no_ds(block->opcode_list[offset].flags) &&
(c.r.rd == 12 || c.r.rd == 13))
lightrec_emit_end_of_block(state, block, offset, -1,
get_ds_pc(block, offset, 1),
lightrec_free_reg(reg_cache, rt);
- if (!(block->opcode_list[offset].flags & LIGHTREC_NO_DS) &&
+ if (!op_flag_no_ds(block->opcode_list[offset].flags) &&
(c.r.rd == 12 || c.r.rd == 13))
lightrec_emit_eob(state, block, offset + 1, true);
}
const struct opcode *op = &block->opcode_list[offset];
jit_state_t *_jit = block->_jit;
lightrec_rec_func_t f;
+ u16 unload_offset;
- if (op->flags & LIGHTREC_SYNC) {
- jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles);
+ if (op_flag_sync(op->flags)) {
+ if (state->cycles)
+ jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles);
state->cycles = 0;
lightrec_storeback_regs(reg_cache, _jit);
(*f)(state, block, offset);
}
- if (unlikely(op->flags & LIGHTREC_UNLOAD_RD)) {
- lightrec_clean_reg_if_loaded(reg_cache, _jit, op->r.rd, true);
- pr_debug("Cleaning RD reg %s\n", lightrec_reg_name(op->r.rd));
- }
- if (unlikely(op->flags & LIGHTREC_UNLOAD_RS)) {
- lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rs, true);
- pr_debug("Cleaning RS reg %s\n", lightrec_reg_name(op->i.rt));
- }
- if (unlikely(op->flags & LIGHTREC_UNLOAD_RT)) {
- lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rt, true);
- pr_debug("Cleaning RT reg %s\n", lightrec_reg_name(op->i.rt));
+ if (OPT_EARLY_UNLOAD) {
+ unload_offset = offset +
+ (has_delay_slot(op->c) && !op_flag_no_ds(op->flags));
+
+ lightrec_do_early_unload(state, block, unload_offset);
}
}
inter->op = next_op(inter);
inter->offset++;
- if (inter->op->flags & LIGHTREC_SYNC) {
+ if (op_flag_sync(inter->op->flags)) {
inter->state->current_cycle += inter->cycles;
inter->cycles = 0;
}
if (!inter->delay_slot) {
cycles = lightrec_cycles_of_opcode(inter->op->c);
- if (has_delay_slot(inter->op->c) &&
- !(inter->op->flags & LIGHTREC_NO_DS))
+ if (!op_flag_no_ds(inter->op->flags) &&
+ has_delay_slot(inter->op->c))
cycles += lightrec_cycles_of_opcode(next_op(inter)->c);
inter->cycles += cycles;
if (link)
state->regs.gpr[31] = old_pc + 8;
- if (inter->op->flags & LIGHTREC_NO_DS)
+ if (op_flag_no_ds(inter->op->flags))
return pc;
return int_delay_slot(inter, pc, true);
static u32 int_jumpr(struct interpreter *inter, u8 link_reg)
{
struct lightrec_state *state = inter->state;
- u32 old_pc, next_pc = state->regs.gpr[inter->op->r.rs];
+ u32 old_pc = int_get_branch_pc(inter);
+ u32 next_pc = state->regs.gpr[inter->op->r.rs];
- if (link_reg) {
- old_pc = int_get_branch_pc(inter);
- state->regs.gpr[link_reg] = old_pc + 8;
+ if (op_flag_emulate_branch(inter->op->flags) && inter->offset) {
+ inter->cycles -= lightrec_cycles_of_opcode(inter->op->c);
+ return old_pc;
}
- if (inter->op->flags & LIGHTREC_NO_DS)
+ if (link_reg)
+ state->regs.gpr[link_reg] = old_pc + 8;
+
+ if (op_flag_no_ds(inter->op->flags))
return next_pc;
return int_delay_slot(inter, next_pc, true);
static u32 int_do_branch(struct interpreter *inter, u32 old_pc, u32 next_pc)
{
- if (!inter->delay_slot &&
- (inter->op->flags & LIGHTREC_LOCAL_BRANCH) &&
+ if (!inter->delay_slot && op_flag_local_branch(inter->op->flags) &&
(s16)inter->op->c.i.imm >= 0) {
next_pc = old_pc + ((1 + (s16)inter->op->c.i.imm) << 2);
next_pc = lightrec_emulate_block(inter->state, inter->block, next_pc);
{
u32 next_pc = pc + 4 + ((s16)code.i.imm << 2);
+ if (op_flag_emulate_branch(inter->op->flags) && inter->offset) {
+ inter->cycles -= lightrec_cycles_of_opcode(inter->op->c);
+ return pc;
+ }
+
update_cycles_before_branch(inter);
- if (inter->op->flags & LIGHTREC_NO_DS) {
+ if (op_flag_no_ds(inter->op->flags)) {
if (branch)
return int_do_branch(inter, pc, next_pc);
else
if (branch)
return int_do_branch(inter, pc, next_pc);
- if (inter->op->flags & LIGHTREC_EMULATE_BRANCH)
+ if (op_flag_emulate_branch(inter->op->flags))
return pc + 8;
else
return jump_after_branch(inter);
/* If we have a MTC0 or CTC0 to CP0 register 12 (Status) or 13 (Cause),
* return early so that the emulator will be able to check software
* interrupt status. */
- if (!(inter->op->flags & LIGHTREC_NO_DS) &&
+ if (!op_flag_no_ds(inter->op->flags) &&
op->i.op == OP_CP0 && (op->r.rd == 12 || op->r.rd == 13))
return int_get_ds_pc(inter, 1);
else
{
u32 next_pc;
- if (likely(!(inter->op->flags & LIGHTREC_SMC)))
+ if (likely(!op_flag_smc(inter->op->flags)))
return int_io(inter, false);
lightrec_rw(inter->state, inter->op->c,
u8 reg_hi = get_mult_div_hi(inter->op->c);
u64 res = (s64)rs * (s64)rt;
- if (!(inter->op->flags & LIGHTREC_NO_HI))
+ if (!op_flag_no_hi(inter->op->flags))
reg_cache[reg_hi] = res >> 32;
- if (!(inter->op->flags & LIGHTREC_NO_LO))
+ if (!op_flag_no_lo(inter->op->flags))
reg_cache[reg_lo] = res;
return jump_next(inter);
u8 reg_hi = get_mult_div_hi(inter->op->c);
u64 res = (u64)rs * (u64)rt;
- if (!(inter->op->flags & LIGHTREC_NO_HI))
+ if (!op_flag_no_hi(inter->op->flags))
reg_cache[reg_hi] = res >> 32;
- if (!(inter->op->flags & LIGHTREC_NO_LO))
+ if (!op_flag_no_lo(inter->op->flags))
reg_cache[reg_lo] = res;
return jump_next(inter);
hi = rs % rt;
}
- if (!(inter->op->flags & LIGHTREC_NO_HI))
+ if (!op_flag_no_hi(inter->op->flags))
reg_cache[reg_hi] = hi;
- if (!(inter->op->flags & LIGHTREC_NO_LO))
+ if (!op_flag_no_lo(inter->op->flags))
reg_cache[reg_lo] = lo;
return jump_next(inter);
hi = rs % rt;
}
- if (!(inter->op->flags & LIGHTREC_NO_HI))
+ if (!op_flag_no_hi(inter->op->flags))
reg_cache[reg_hi] = hi;
- if (!(inter->op->flags & LIGHTREC_NO_LO))
+ if (!op_flag_no_lo(inter->op->flags))
reg_cache[reg_lo] = lo;
return jump_next(inter);
u32 target_cycle;
u32 exit_flags;
u32 old_cycle_counter;
- u32 c_wrapper_arg;
struct block *dispatcher, *c_wrapper_block;
void *c_wrappers[C_WRAPPERS_COUNT];
void *wrappers_eps[C_WRAPPERS_COUNT];
};
u32 lightrec_rw(struct lightrec_state *state, union code op,
- u32 addr, u32 data, u16 *flags,
+ u32 addr, u32 data, u32 *flags,
struct block *block);
void lightrec_free_block(struct lightrec_state *state, struct block *block);
{
u16 flags = block->opcode_list[offset].flags;
- offset += !!(OPT_SWITCH_DELAY_SLOTS && (flags & LIGHTREC_NO_DS));
+ offset += op_flag_no_ds(flags);
return block->pc + (offset + imm << 2);
}
{
u16 flags = block->opcode_list[offset].flags;
- offset -= !!(OPT_SWITCH_DELAY_SLOTS && (flags & LIGHTREC_NO_DS));
+ offset -= op_flag_no_ds(flags);
return block->pc + (offset + imm << 2);
}
union code lightrec_read_opcode(struct lightrec_state *state, u32 pc);
-struct block * lightrec_get_block(struct lightrec_state *state, u32 pc);
int lightrec_compile_block(struct lightrec_cstate *cstate, struct block *block);
void lightrec_free_opcode_list(struct lightrec_state *state, struct block *block);
return (OPT_FLAG_MULT_DIV && c.r.imm) ? c.r.imm : REG_HI;
}
+static inline s16 s16_max(s16 a, s16 b)
+{
+ return a > b ? a : b;
+}
+
#endif /* __LIGHTREC_PRIVATE_H__ */
}
u32 lightrec_rw(struct lightrec_state *state, union code op,
- u32 addr, u32 data, u16 *flags, struct block *block)
+ u32 addr, u32 data, u32 *flags, struct block *block)
{
const struct lightrec_mem_map *map;
const struct lightrec_mem_map_ops *ops;
}
static void lightrec_rw_helper(struct lightrec_state *state,
- union code op, u16 *flags,
+ union code op, u32 *flags,
struct block *block)
{
u32 ret = lightrec_rw(state, op, state->regs.gpr[op.i.rs],
}
}
-static void lightrec_rw_cb(struct lightrec_state *state)
+static void lightrec_rw_cb(struct lightrec_state *state, u32 arg)
{
- lightrec_rw_helper(state, (union code)state->c_wrapper_arg, NULL, NULL);
+ lightrec_rw_helper(state, (union code) arg, NULL, NULL);
}
-static void lightrec_rw_generic_cb(struct lightrec_state *state)
+static void lightrec_rw_generic_cb(struct lightrec_state *state, u32 arg)
{
struct block *block;
struct opcode *op;
bool was_tagged;
- u32 arg = state->c_wrapper_arg;
u16 offset = (u16)arg;
block = lightrec_find_block_from_lut(state->block_cache,
"for recompilation\n", block->pc);
block->flags |= BLOCK_SHOULD_RECOMPILE;
+ lut_write(state, lut_offset(block->pc), NULL);
}
}
lightrec_mtc2(state, op.r.rd, data);
}
-static void lightrec_mtc_cb(struct lightrec_state *state)
+static void lightrec_mtc_cb(struct lightrec_state *state, u32 arg)
{
- union code op = (union code) state->c_wrapper_arg;
+ union code op = (union code) arg;
lightrec_mtc(state, op, state->regs.gpr[op.r.rt]);
}
(*state->ops.cop2_op)(state, op.opcode);
}
-static void lightrec_cp_cb(struct lightrec_state *state)
+static void lightrec_cp_cb(struct lightrec_state *state, u32 arg)
{
- lightrec_cp(state, (union code) state->c_wrapper_arg);
+ lightrec_cp(state, (union code) arg);
}
static void lightrec_syscall_cb(struct lightrec_state *state)
lightrec_set_exit_flags(state, LIGHTREC_EXIT_BREAK);
}
-struct block * lightrec_get_block(struct lightrec_state *state, u32 pc)
+static struct block * lightrec_get_block(struct lightrec_state *state, u32 pc)
{
struct block *block = lightrec_find_block(state->block_cache, pc);
}
static s32 c_function_wrapper(struct lightrec_state *state, s32 cycles_delta,
- void (*f)(struct lightrec_state *))
+ void (*f)(struct lightrec_state *, u32), u32 arg)
{
state->current_cycle = state->target_cycle - cycles_delta;
- (*f)(state);
+ (*f)(state, arg);
return state->target_cycle - state->current_cycle;
}
for (i = 0; i < NUM_TEMPS; i++)
jit_stxi(stack_ptr + i * sizeof(uintptr_t), JIT_FP, JIT_R(i));
+ jit_getarg(JIT_R1, jit_arg());
+
/* Jump to the trampoline */
to_tramp = jit_jmpi();
jit_pushargr(LIGHTREC_REG_STATE);
jit_pushargr(LIGHTREC_REG_CYCLE);
jit_pushargr(JIT_R0);
+ jit_pushargr(JIT_R1);
jit_finishi(c_function_wrapper);
jit_retval_i(LIGHTREC_REG_CYCLE);
continue;
}
- cstate->cycles += lightrec_cycles_of_opcode(elm->c);
-
if (should_emulate(elm)) {
pr_debug("Branch at offset 0x%x will be emulated\n",
i << 2);
lightrec_emit_eob(cstate, block, i, false);
- skip_next = !(elm->flags & LIGHTREC_NO_DS);
+ skip_next = !op_flag_no_ds(elm->flags);
} else {
lightrec_rec_opcode(cstate, block, i);
- skip_next = has_delay_slot(elm->c) &&
- !(elm->flags & LIGHTREC_NO_DS);
+ skip_next = !op_flag_no_ds(elm->flags) && has_delay_slot(elm->c);
#if _WIN32
/* FIXME: GNU Lightning on Windows seems to use our
* mapped registers as temporaries. Until the actual bug
lightrec_regcache_mark_live(cstate->reg_cache, _jit);
#endif
}
+
+ cstate->cycles += lightrec_cycles_of_opcode(elm->c);
}
for (i = 0; i < cstate->nb_branches; i++)
pr_err("Unable to find branch target\n");
}
- jit_ldxi(JIT_R0, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, eob_wrapper_func));
-
- jit_jmpr(JIT_R0);
-
+ jit_patch_abs(jit_jmpi(), state->eob_wrapper_func);
jit_ret();
jit_epilog();
state->current_cycle = ~state->current_cycle;
lightrec_print_info(state);
+ lightrec_free_block_cache(state->block_cache);
+ lightrec_free_block(state, state->dispatcher);
+ lightrec_free_block(state, state->c_wrapper_block);
+
if (ENABLE_THREADED_COMPILER) {
lightrec_free_recompiler(state->rec);
lightrec_reaper_destroy(state->reaper);
lightrec_free_cstate(state->cstate);
}
- lightrec_free_block_cache(state->block_cache);
- lightrec_free_block(state, state->dispatcher);
- lightrec_free_block(state, state->c_wrapper_block);
finish_jit();
if (ENABLE_CODE_BUFFER && state->tlsf)
tlsf_destroy(state->tlsf);
case OP_SPECIAL_MFLO:
return BIT(REG_LO);
case OP_SPECIAL_SLL:
+ if (!op.r.imm)
+ return 0;
+ fallthrough;
case OP_SPECIAL_SRL:
case OP_SPECIAL_SRA:
return BIT(op.r.rt);
case OP_LUI:
return 0;
case OP_BEQ:
+ if (op.i.rs == op.i.rt)
+ return 0;
+ fallthrough;
case OP_BNE:
case OP_LWL:
case OP_LWR:
return BIT(REG_HI);
case OP_SPECIAL_MTLO:
return BIT(REG_LO);
+ case OP_SPECIAL_SLL:
+ if (!op.r.imm)
+ return 0;
+ fallthrough;
default:
return BIT(op.r.rd);
}
union code c;
unsigned int i;
- if (list[offset].flags & LIGHTREC_SYNC)
+ if (op_flag_sync(list[offset].flags))
return -1;
for (i = offset; i > 0; i--) {
return i - 1;
}
- if ((list[i - 1].flags & LIGHTREC_SYNC) ||
+ if (op_flag_sync(list[i - 1].flags) ||
has_delay_slot(c) ||
opcode_reads_register(c, reg))
break;
unsigned int i;
union code c;
- if (list[offset].flags & LIGHTREC_SYNC)
+ if (op_flag_sync(list[offset].flags))
return -1;
for (i = offset; ; i++) {
return i;
}
- if ((list[i].flags & LIGHTREC_SYNC) ||
+ if (op_flag_sync(list[i].flags) ||
has_delay_slot(c) || opcode_writes_register(c, reg))
break;
}
{
unsigned int i;
- if (list[offset].flags & LIGHTREC_SYNC)
+ if (op_flag_sync(list[offset].flags))
return false;
for (i = offset + 1; ; i++) {
return true;
if (has_delay_slot(list[i].c)) {
- if (list[i].flags & LIGHTREC_NO_DS ||
+ if (op_flag_no_ds(list[i].flags) ||
opcode_reads_register(list[i + 1].c, reg))
return false;
known |= BIT(0);
v[0] = 0;
- if (op->flags & LIGHTREC_SYNC)
+ if (op_flag_sync(op->flags))
return BIT(0);
switch (c.i.op) {
*op = &block->opcode_list[offset];
int reader;
- if (!(op->flags & LIGHTREC_SYNC) && (known & BIT(op->i.rt)) &&
+ if (!op_flag_sync(op->flags) && (known & BIT(op->i.rt)) &&
values[op->i.rt] == op->i.imm << 16) {
pr_debug("Converting duplicated LUI to NOP\n");
op->opcode = 0x0;
}
}
+static int lightrec_transform_branches(struct lightrec_state *state,
+ struct block *block)
+{
+ struct opcode *op;
+ unsigned int i;
+ s32 offset;
+
+ for (i = 0; i < block->nb_ops; i++) {
+ op = &block->opcode_list[i];
+
+ switch (op->i.op) {
+ case OP_J:
+ /* Transform J opcode into BEQ $zero, $zero if possible. */
+ offset = (s32)((block->pc & 0xf0000000) >> 2 | op->j.imm)
+ - (s32)(block->pc >> 2) - (s32)i - 1;
+
+ if (offset == (s16)offset) {
+ pr_debug("Transform J into BEQ $zero, $zero\n");
+ op->i.op = OP_BEQ;
+ op->i.rs = 0;
+ op->i.rt = 0;
+ op->i.imm = offset;
+
+ }
+ default: /* fall-through */
+ break;
+ }
+ }
+
+ return 0;
+}
+
static int lightrec_transform_ops(struct lightrec_state *state, struct block *block)
{
struct opcode *list = block->opcode_list;
struct opcode *list, *next = &block->opcode_list[0];
unsigned int i;
union code op, next_op;
- u8 flags;
+ u32 flags;
for (i = 0; i < block->nb_ops - 1; i++) {
list = next;
next_op = next->c;
op = list->c;
- if (!has_delay_slot(op) ||
- list->flags & (LIGHTREC_NO_DS | LIGHTREC_EMULATE_BRANCH) ||
+ if (!has_delay_slot(op) || op_flag_no_ds(list->flags) ||
+ op_flag_emulate_branch(list->flags) ||
op.opcode == 0 || next_op.opcode == 0)
continue;
if (i && has_delay_slot(block->opcode_list[i - 1].c) &&
- !(block->opcode_list[i - 1].flags & LIGHTREC_NO_DS))
+ !op_flag_no_ds(block->opcode_list[i - 1].flags))
continue;
- if ((list->flags & LIGHTREC_SYNC) ||
- (next->flags & LIGHTREC_SYNC))
+ if (op_flag_sync(list->flags) || op_flag_sync(next->flags))
continue;
switch (list->i.op) {
static int lightrec_detect_impossible_branches(struct lightrec_state *state,
struct block *block)
{
- struct opcode *op, *next = &block->opcode_list[0];
+ struct opcode *op, *list = block->opcode_list, *next = &list[0];
unsigned int i;
int ret = 0;
+ s16 offset;
for (i = 0; i < block->nb_ops - 1; i++) {
op = next;
- next = &block->opcode_list[i + 1];
+ next = &list[i + 1];
if (!has_delay_slot(op->c) ||
(!load_in_delay_slot(next->c) &&
continue;
}
+ offset = i + 1 + (s16)op->i.imm;
+ if (load_in_delay_slot(next->c) &&
+ (offset >= 0 && offset < block->nb_ops) &&
+ !opcode_reads_register(list[offset].c, next->c.i.rt)) {
+ /* The 'impossible' branch is a local branch - we can
+ * verify here that the first opcode of the target does
+ * not use the target register of the delay slot */
+
+ pr_debug("Branch at offset 0x%x has load delay slot, "
+ "but is local and dest opcode does not read "
+ "dest register\n", i << 2);
+ continue;
+ }
+
op->flags |= LIGHTREC_EMULATE_BRANCH;
- if (op == block->opcode_list) {
+ if (op == list) {
pr_debug("First opcode of block PC 0x%08x is an impossible branch\n",
block->pc);
bool should_emulate(const struct opcode *list)
{
- return has_delay_slot(list->c) &&
- (list->flags & LIGHTREC_EMULATE_BRANCH);
+ return op_flag_emulate_branch(list->flags) && has_delay_slot(list->c);
+}
+
+static bool op_writes_rd(union code c)
+{
+ switch (c.i.op) {
+ case OP_SPECIAL:
+ case OP_META_MOV:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static void lightrec_add_reg_op(struct opcode *op, u8 reg, u32 reg_op)
+{
+ if (op_writes_rd(op->c) && reg == op->r.rd)
+ op->flags |= LIGHTREC_REG_RD(reg_op);
+ else if (op->i.rs == reg)
+ op->flags |= LIGHTREC_REG_RS(reg_op);
+ else if (op->i.rt == reg)
+ op->flags |= LIGHTREC_REG_RT(reg_op);
+ else
+ pr_debug("Cannot add unload/clean/discard flag: "
+ "opcode does not touch register %s!\n",
+ lightrec_reg_name(reg));
}
static void lightrec_add_unload(struct opcode *op, u8 reg)
{
- if (op->i.op == OP_SPECIAL && reg == op->r.rd)
- op->flags |= LIGHTREC_UNLOAD_RD;
+ lightrec_add_reg_op(op, reg, LIGHTREC_REG_UNLOAD);
+}
- if (op->i.rs == reg)
- op->flags |= LIGHTREC_UNLOAD_RS;
- if (op->i.rt == reg)
- op->flags |= LIGHTREC_UNLOAD_RT;
+static void lightrec_add_discard(struct opcode *op, u8 reg)
+{
+ lightrec_add_reg_op(op, reg, LIGHTREC_REG_DISCARD);
+}
+
+static void lightrec_add_clean(struct opcode *op, u8 reg)
+{
+ lightrec_add_reg_op(op, reg, LIGHTREC_REG_CLEAN);
+}
+
+static void
+lightrec_early_unload_sync(struct opcode *list, s16 *last_r, s16 *last_w)
+{
+ unsigned int reg;
+ s16 offset;
+
+ for (reg = 0; reg < 34; reg++) {
+ offset = s16_max(last_w[reg], last_r[reg]);
+
+ if (offset >= 0)
+ lightrec_add_unload(&list[offset], reg);
+ }
+
+ memset(last_r, 0xff, sizeof(*last_r) * 34);
+ memset(last_w, 0xff, sizeof(*last_w) * 34);
}
static int lightrec_early_unload(struct lightrec_state *state, struct block *block)
{
- unsigned int i, offset;
+ u16 i, offset;
struct opcode *op;
+ s16 last_r[34], last_w[34], last_sync = 0, next_sync = 0;
+ u64 mask_r, mask_w, dirty = 0, loaded = 0;
u8 reg;
- for (reg = 1; reg < 34; reg++) {
- int last_r_id = -1, last_w_id = -1;
+ memset(last_r, 0xff, sizeof(last_r));
+ memset(last_w, 0xff, sizeof(last_w));
- for (i = 0; i < block->nb_ops; i++) {
- union code c = block->opcode_list[i].c;
+ /*
+ * Clean if:
+ * - the register is dirty, and is read again after a branch opcode
+ *
+ * Unload if:
+ * - the register is dirty or loaded, and is not read again
+ * - the register is dirty or loaded, and is written again after a branch opcode
+ * - the next opcode has the SYNC flag set
+ *
+ * Discard if:
+ * - the register is dirty or loaded, and is written again
+ */
- if (opcode_reads_register(c, reg))
- last_r_id = i;
- if (opcode_writes_register(c, reg))
- last_w_id = i;
+ for (i = 0; i < block->nb_ops; i++) {
+ op = &block->opcode_list[i];
+
+ if (op_flag_sync(op->flags) || should_emulate(op)) {
+ /* The next opcode has the SYNC flag set, or is a branch
+ * that should be emulated: unload all registers. */
+ lightrec_early_unload_sync(block->opcode_list, last_r, last_w);
+ dirty = 0;
+ loaded = 0;
}
- if (last_w_id > last_r_id)
- offset = (unsigned int)last_w_id;
- else if (last_r_id >= 0)
- offset = (unsigned int)last_r_id;
- else
- continue;
+ if (next_sync == i) {
+ last_sync = i;
+ pr_debug("Last sync: 0x%x\n", last_sync << 2);
+ }
- op = &block->opcode_list[offset];
+ if (has_delay_slot(op->c)) {
+ next_sync = i + 1 + !op_flag_no_ds(op->flags);
+ pr_debug("Next sync: 0x%x\n", next_sync << 2);
+ }
- if (has_delay_slot(op->c) && (op->flags & LIGHTREC_NO_DS))
- offset++;
+ mask_r = opcode_read_mask(op->c);
+ mask_w = opcode_write_mask(op->c);
- if (offset == block->nb_ops)
- continue;
+ for (reg = 0; reg < 34; reg++) {
+ if (mask_r & BIT(reg)) {
+ if (dirty & BIT(reg) && last_w[reg] < last_sync) {
+ /* The register is dirty, and is read
+ * again after a branch: clean it */
+
+ lightrec_add_clean(&block->opcode_list[last_w[reg]], reg);
+ dirty &= ~BIT(reg);
+ loaded |= BIT(reg);
+ }
+
+ last_r[reg] = i;
+ }
+
+ if (mask_w & BIT(reg)) {
+ if ((dirty & BIT(reg) && last_w[reg] < last_sync) ||
+ (loaded & BIT(reg) && last_r[reg] < last_sync)) {
+ /* The register is dirty or loaded, and
+ * is written again after a branch:
+ * unload it */
+
+ offset = s16_max(last_w[reg], last_r[reg]);
+ lightrec_add_unload(&block->opcode_list[offset], reg);
+ dirty &= ~BIT(reg);
+ loaded &= ~BIT(reg);
+ } else if (!(mask_r & BIT(reg)) &&
+ ((dirty & BIT(reg) && last_w[reg] > last_sync) ||
+ (loaded & BIT(reg) && last_r[reg] > last_sync))) {
+ /* The register is dirty or loaded, and
+ * is written again: discard it */
+
+ offset = s16_max(last_w[reg], last_r[reg]);
+ lightrec_add_discard(&block->opcode_list[offset], reg);
+ dirty &= ~BIT(reg);
+ loaded &= ~BIT(reg);
+ }
+
+ last_w[reg] = i;
+ }
- lightrec_add_unload(&block->opcode_list[offset], reg);
+ }
+
+ dirty |= mask_w;
+ loaded |= mask_r;
}
+ /* Unload all registers that are dirty or loaded at the end of block. */
+ lightrec_early_unload_sync(block->opcode_list, last_r, last_w);
+
return 0;
}
"requiring invalidation\n",
list->opcode);
list->flags |= LIGHTREC_NO_INVALIDATE;
+ list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT);
}
/* Detect writes whose destination address is inside the
kunseg_val = kunseg(val);
psx_map = lightrec_get_map_idx(state, kunseg_val);
+ list->flags &= ~LIGHTREC_IO_MASK;
+
switch (psx_map) {
case PSX_MAP_KERNEL_USER_RAM:
if (val == kunseg_val)
mask |= opcode_read_mask(op->c);
mask |= opcode_write_mask(op->c);
- if (op->flags & LIGHTREC_SYNC)
+ if (op_flag_sync(op->flags))
sync = true;
switch (op->i.op) {
case OP_BGTZ:
case OP_REGIMM:
/* TODO: handle backwards branches too */
- if (!last &&
- (op->flags & LIGHTREC_LOCAL_BRANCH) &&
+ if (!last && op_flag_local_branch(op->flags) &&
(s16)op->c.i.imm >= 0) {
branch_offset = i + 1 + (s16)op->c.i.imm
- - !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS));
+ - !!op_flag_no_ds(op->flags);
reg = get_mfhi_mflo_reg(block, branch_offset, NULL,
mask, sync, mflo, false);
if (op->r.rs != 31)
return reg;
- if (!sync &&
- !(op->flags & LIGHTREC_NO_DS) &&
+ if (!sync && !op_flag_no_ds(op->flags) &&
(next->i.op == OP_SPECIAL) &&
((!mflo && next->r.op == OP_SPECIAL_MFHI) ||
(mflo && next->r.op == OP_SPECIAL_MFLO)))
case OP_BGTZ:
case OP_REGIMM:
/* TODO: handle backwards branches too */
- if ((op->flags & LIGHTREC_LOCAL_BRANCH) &&
- (s16)op->c.i.imm >= 0) {
+ if (op_flag_local_branch(op->flags) && (s16)op->c.i.imm >= 0) {
branch_offset = i + 1 + (s16)op->c.i.imm
- - !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS));
+ - !!op_flag_no_ds(op->flags);
lightrec_replace_lo_hi(block, branch_offset, last, lo);
lightrec_replace_lo_hi(block, i + 1, branch_offset, lo);
/* Don't support opcodes in delay slots */
if ((i && has_delay_slot(block->opcode_list[i - 1].c)) ||
- (list->flags & LIGHTREC_NO_DS)) {
+ op_flag_no_ds(list->flags)) {
continue;
}
IF_OPT(OPT_REMOVE_DIV_BY_ZERO_SEQ, &lightrec_remove_div_by_zero_check_sequence),
IF_OPT(OPT_REPLACE_MEMSET, &lightrec_replace_memset),
IF_OPT(OPT_DETECT_IMPOSSIBLE_BRANCHES, &lightrec_detect_impossible_branches),
+ IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_branches),
IF_OPT(OPT_LOCAL_BRANCHES, &lightrec_local_branches),
IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_ops),
IF_OPT(OPT_SWITCH_DELAY_SLOTS, &lightrec_switch_delay_slots),
}
}
+void lightrec_discard_reg_if_loaded(struct regcache *cache, u8 reg)
+{
+ struct native_register *nreg;
+
+ nreg = find_mapped_reg(cache, reg, false);
+ if (nreg)
+ lightrec_discard_nreg(nreg);
+}
+
struct native_register * lightrec_regcache_enter_branch(struct regcache *cache)
{
struct native_register *backup;
void lightrec_clean_reg_if_loaded(struct regcache *cache, jit_state_t *_jit,
u8 reg, _Bool unload);
+void lightrec_discard_reg_if_loaded(struct regcache *cache, u8 reg);
u8 lightrec_alloc_reg_in_address(struct regcache *cache,
jit_state_t *_jit, u8 reg, s16 offset);
{
struct lightrec_registers *regs;
- lightrec_plugin_shutdown();
- lightrec_plugin_init();
-
regs = lightrec_get_registers(lightrec_state);
+ /* Invalidate all blocks */
+ lightrec_invalidate_all(lightrec_state);
+
+ /* Reset registers */
+ memset(regs, 0, sizeof(*regs));
+
regs->cp0[12] = 0x10900000; // COP0 enabled | BEV = 1 | TS = 1
regs->cp0[15] = 0x00000002; // PRevID = Revision ID, same as R3000A