case OP_SWL:
case OP_SW:
case OP_SWR:
+ case OP_META_LWU:
+ case OP_META_SWU:
return BIT(op.i.rs) | BIT(op.i.rt);
case OP_META:
return BIT(op.m.rs);
case OP_LBU:
case OP_LHU:
case OP_LWR:
+ case OP_META_LWU:
return BIT(op.i.rt);
case OP_JAL:
return BIT(31);
if (opcode_writes_register(list[i].c, reg))
return true;
+ if (is_syscall(list[i].c))
+ return false;
+
if (has_delay_slot(list[i].c)) {
if (op_flag_no_ds(list[i].flags) ||
opcode_reads_register(list[i + 1].c, reg))
return reg_is_read(list, a, b, reg) || reg_is_written(list, a, b, reg);
}
-bool opcode_is_mfc(union code op)
+static bool opcode_is_mfc(union code op)
{
switch (op.i.op) {
case OP_CP0:
return false;
}
-bool opcode_is_load(union code op)
+static bool opcode_is_load(union code op)
{
switch (op.i.op) {
case OP_LB:
case OP_LHU:
case OP_LWR:
case OP_LWC2:
+ case OP_META_LWU:
return true;
default:
return false;
case OP_SWL:
case OP_SWR:
case OP_SWC2:
+ case OP_META_SWU:
return true;
default:
return false;
}
}
+bool opcode_has_load_delay(union code op)
+{
+ return (opcode_is_load(op) && op.i.rt && op.i.op != OP_LWC2)
+ || opcode_is_mfc(op);
+}
+
static u8 opcode_get_io_size(union code op)
{
switch (op.i.op) {
case OP_LBU:
case OP_LHU:
case OP_LWR:
+ case OP_META_LWU:
return false;
default:
return true;
return;
}
- if (op->i.imm != 0 || op->i.rt == 0 || offset == block->nb_ops - 1)
+ if (op->i.imm != 0 || op->i.rt == 0 || is_delay_slot(list, offset))
return;
reader = find_next_reader(list, offset + 1, op->i.rt);
case OP_SWL:
case OP_SW:
case OP_SWR:
+ case OP_META_SWU:
if (is_known_zero(v, op->i.rt))
op->i.rt = 0;
fallthrough;
case OP_LWR:
case OP_LWC2:
case OP_SWC2:
+ case OP_META_LWU:
if (is_known(v, op->i.rs)
&& kunseg(v[op->i.rs].value) == 0)
op->i.rs = 0;
}
}
+static void maybe_remove_load_delay(struct opcode *op)
+{
+ if (op_flag_load_delay(op->flags) && opcode_is_load(op->c))
+ op->flags &= ~LIGHTREC_LOAD_DELAY;
+}
+
static int lightrec_transform_ops(struct lightrec_state *state, struct block *block)
{
struct opcode *op, *list = block->opcode_list;
struct constprop_data v[32] = LIGHTREC_CONSTPROP_INITIALIZER;
unsigned int i;
bool local;
+ int idx;
u8 tmp;
for (i = 0; i < block->nb_ops; i++) {
/* Transform all opcodes detected as useless to real NOPs
* (0x0: SLL r0, r0, #0) */
if (op->opcode != 0 && is_nop(op->c)) {
- pr_debug("Converting useless opcode 0x%08x to NOP\n",
- op->opcode);
+ pr_debug("Converting useless opcode "X32_FMT" to NOP\n",
+ op->opcode);
op->opcode = 0x0;
}
(v[op->i.rs].value ^ v[op->i.rt].value)) {
pr_debug("Found never-taken BEQ\n");
+ if (!op_flag_no_ds(op->flags))
+ maybe_remove_load_delay(&list[i + 1]);
+
local = op_flag_local_branch(op->flags);
op->opcode = 0;
op->flags = 0;
v[op->i.rs].value == v[op->i.rt].value) {
pr_debug("Found never-taken BNE\n");
+ if (!op_flag_no_ds(op->flags))
+ maybe_remove_load_delay(&list[i + 1]);
+
local = op_flag_local_branch(op->flags);
op->opcode = 0;
op->flags = 0;
v[op->i.rs].value & BIT(31)) {
pr_debug("Found never-taken BGTZ\n");
+ if (!op_flag_no_ds(op->flags))
+ maybe_remove_load_delay(&list[i + 1]);
+
local = op_flag_local_branch(op->flags);
op->opcode = 0;
op->flags = 0;
break;
case OP_LUI:
- if (i == 0 || !has_delay_slot(list[i - 1].c))
+ if (!is_delay_slot(list, i))
lightrec_modify_lui(block, i);
lightrec_remove_useless_lui(block, i, v);
- if (i == 0 || !has_delay_slot(list[i - 1].c))
+ if (!is_delay_slot(list, i))
lightrec_lui_to_movi(block, i);
break;
}
}
break;
+ case OP_LWL:
+ case OP_LWR:
+ if (i == 0 || !has_delay_slot(list[i - 1].c)) {
+ idx = find_next_reader(list, i + 1, op->i.rt);
+ if (idx > 0 && list[idx].i.op == (op->i.op ^ 0x4)
+ && list[idx].i.rs == op->i.rs
+ && list[idx].i.rt == op->i.rt
+ && abs((s16)op->i.imm - (s16)list[idx].i.imm) == 3) {
+ /* Replace a LWL/LWR combo with a META_LWU */
+ if (op->i.op == OP_LWL)
+ op->i.imm -= 3;
+ op->i.op = OP_META_LWU;
+ list[idx].opcode = 0;
+ pr_debug("Convert LWL/LWR to LWU\n");
+ }
+ }
+ break;
+ case OP_SWL:
+ case OP_SWR:
+ if (i == 0 || !has_delay_slot(list[i - 1].c)) {
+ idx = find_next_reader(list, i + 1, op->i.rt);
+ if (idx > 0 && list[idx].i.op == (op->i.op ^ 0x4)
+ && list[idx].i.rs == op->i.rs
+ && list[idx].i.rt == op->i.rt
+ && abs((s16)op->i.imm - (s16)list[idx].i.imm) == 3) {
+ /* Replace a SWL/SWR combo with a META_SWU */
+ if (op->i.op == OP_SWL)
+ op->i.imm -= 3;
+ op->i.op = OP_META_SWU;
+ list[idx].opcode = 0;
+ pr_debug("Convert SWL/SWR to SWU\n");
+ }
+ }
+ break;
case OP_REGIMM:
switch (op->r.rt) {
case OP_REGIMM_BLTZ:
} else {
pr_debug("Found never-taken BLTZ/BGEZ\n");
+ if (!op_flag_no_ds(op->flags))
+ maybe_remove_load_delay(&list[i + 1]);
+
local = op_flag_local_branch(op->flags);
op->opcode = 0;
op->flags = 0;
break;
}
- pr_debug("Multiply by power-of-two: %u\n",
+ pr_debug("Multiply by power-of-two: %"PRIu32"\n",
v[op->r.rt].value);
if (op->r.op == OP_SPECIAL_MULT)
for (i = 0; i < block->nb_ops; i++) {
op = &list[i];
- if (!opcode_is_load(op->c) || !op->c.i.rt || op->c.i.op == OP_LWC2)
+ if (!opcode_has_load_delay(op->c))
continue;
if (!is_delay_slot(list, i)) {
switch (next.i.op) {
case OP_LWL:
case OP_LWR:
- case OP_REGIMM:
- case OP_BEQ:
- case OP_BNE:
- case OP_BLEZ:
- case OP_BGTZ:
continue;
}
+ if (has_delay_slot(next))
+ continue;
+
if (opcode_reads_register(next, c.i.rt)
&& !opcode_writes_register(next, c.i.rs)) {
pr_debug("Swapping opcodes at offset 0x%x to "
offset = i + 1 + (s16)list->c.i.imm;
- pr_debug("Found local branch to offset 0x%x\n", offset << 2);
+ pr_debug("Found local branch to offset 0x%"PRIx32"\n", offset << 2);
ds = get_delay_slot(block->opcode_list, i);
if (op_flag_load_delay(ds->flags) && opcode_is_load(ds->c)) {
* registers as address will never hit a code page. */
if (list->i.rs >= 28 && list->i.rs <= 29 &&
!state->maps[PSX_MAP_KERNEL_USER_RAM].ops) {
- pr_debug("Flaging opcode 0x%08x as not requiring invalidation\n",
+ pr_debug("Flaging opcode "X32_FMT" as not requiring invalidation\n",
list->opcode);
list->flags |= LIGHTREC_NO_INVALIDATE;
}
list->flags &= ~(LIGHTREC_NO_LO | LIGHTREC_NO_HI);
}
- if (reg_lo > 0 && reg_lo != REG_LO) {
+ if (0/* Broken */ && reg_lo > 0 && reg_lo != REG_LO) {
pr_debug("Found register %s to hold LO (rs = %u, rt = %u)\n",
lightrec_reg_name(reg_lo), list->r.rs, list->r.rt);
list->r.rd = 0;
}
- if (reg_hi > 0 && reg_hi != REG_HI) {
+ if (0/* Broken */ && reg_hi > 0 && reg_hi != REG_HI) {
pr_debug("Found register %s to hold HI (rs = %u, rt = %u)\n",
lightrec_reg_name(reg_hi), list->r.rs, list->r.rt);
if (i == ARRAY_SIZE(memset_code) - 1) {
/* success! */
- pr_debug("Block at PC 0x%x is a memset\n", block->pc);
+ pr_debug("Block at "PC_FMT" is a memset\n", block->pc);
block_set_flags(block,
BLOCK_IS_MEMSET | BLOCK_NEVER_COMPILE);