case OP_SWL:
case OP_SW:
case OP_SWR:
+ case OP_META_LWU:
+ case OP_META_SWU:
return BIT(op.i.rs) | BIT(op.i.rt);
case OP_META:
return BIT(op.m.rs);
case OP_LBU:
case OP_LHU:
case OP_LWR:
+ case OP_META_LWU:
return BIT(op.i.rt);
case OP_JAL:
return BIT(31);
case OP_LHU:
case OP_LWR:
case OP_LWC2:
+ case OP_META_LWU:
return true;
default:
return false;
case OP_SWL:
case OP_SWR:
case OP_SWC2:
+ case OP_META_SWU:
return true;
default:
return false;
case OP_LBU:
case OP_LHU:
case OP_LWR:
+ case OP_META_LWU:
return false;
default:
return true;
pr_debug("Convert LHU+SLL+SRA to LH\n");
v[ldop->i.rt].known = 0;
- v[ldop->i.rt].sign = 0xffffff80 << 24 - curr->r.imm;
+ v[ldop->i.rt].sign = 0xffffff80 << (24 - curr->r.imm);
}
}
}
}
+static void lightrec_lui_to_movi(struct block *block, unsigned int offset)
+{
+ struct opcode *ori, *lui = &block->opcode_list[offset];
+ int next;
+
+ if (lui->i.op != OP_LUI)
+ return;
+
+ next = find_next_reader(block->opcode_list, offset + 1, lui->i.rt);
+ if (next > 0) {
+ ori = &block->opcode_list[next];
+
+ switch (ori->i.op) {
+ case OP_ORI:
+ case OP_ADDI:
+ case OP_ADDIU:
+ if (ori->i.rs == ori->i.rt && ori->i.imm) {
+ ori->flags |= LIGHTREC_MOVI;
+ lui->flags |= LIGHTREC_MOVI;
+ }
+ break;
+ }
+ }
+}
+
static void lightrec_modify_lui(struct block *block, unsigned int offset)
{
union code c, *lui = &block->opcode_list[offset].c;
}
pr_debug("Convert LUI at offset 0x%x to kuseg\n",
- i - 1 << 2);
+ (i - 1) << 2);
lui->i.imm = kunseg(lui->i.imm << 16) >> 16;
break;
}
case OP_SWL:
case OP_SW:
case OP_SWR:
+ case OP_META_SWU:
if (is_known_zero(v, op->i.rt))
op->i.rt = 0;
fallthrough;
case OP_LWR:
case OP_LWC2:
case OP_SWC2:
+ case OP_META_LWU:
if (is_known(v, op->i.rs)
&& kunseg(v[op->i.rs].value) == 0)
op->i.rs = 0;
}
}
+static void maybe_remove_load_delay(struct opcode *op)
+{
+ if (op_flag_load_delay(op->flags) && opcode_is_load(op->c))
+ op->flags &= ~LIGHTREC_LOAD_DELAY;
+}
+
static int lightrec_transform_ops(struct lightrec_state *state, struct block *block)
{
struct opcode *op, *list = block->opcode_list;
struct constprop_data v[32] = LIGHTREC_CONSTPROP_INITIALIZER;
unsigned int i;
bool local;
+ int idx;
u8 tmp;
for (i = 0; i < block->nb_ops; i++) {
(v[op->i.rs].value ^ v[op->i.rt].value)) {
pr_debug("Found never-taken BEQ\n");
+ if (!op_flag_no_ds(op->flags))
+ maybe_remove_load_delay(&list[i + 1]);
+
local = op_flag_local_branch(op->flags);
op->opcode = 0;
op->flags = 0;
v[op->i.rs].value == v[op->i.rt].value) {
pr_debug("Found never-taken BNE\n");
+ if (!op_flag_no_ds(op->flags))
+ maybe_remove_load_delay(&list[i + 1]);
+
local = op_flag_local_branch(op->flags);
op->opcode = 0;
op->flags = 0;
v[op->i.rs].value & BIT(31)) {
pr_debug("Found never-taken BGTZ\n");
+ if (!op_flag_no_ds(op->flags))
+ maybe_remove_load_delay(&list[i + 1]);
+
local = op_flag_local_branch(op->flags);
op->opcode = 0;
op->flags = 0;
if (i == 0 || !has_delay_slot(list[i - 1].c))
lightrec_modify_lui(block, i);
lightrec_remove_useless_lui(block, i, v);
+ if (i == 0 || !has_delay_slot(list[i - 1].c))
+ lightrec_lui_to_movi(block, i);
break;
/* Transform ORI/ADDI/ADDIU with imm #0 or ORR/ADD/ADDU/SUB/SUBU
}
}
break;
+ case OP_LWL:
+ case OP_LWR:
+ if (i == 0 || !has_delay_slot(list[i - 1].c)) {
+ idx = find_next_reader(list, i + 1, op->i.rt);
+ if (idx > 0 && list[idx].i.op == (op->i.op ^ 0x4)
+ && list[idx].i.rs == op->i.rs
+ && list[idx].i.rt == op->i.rt
+ && abs((s16)op->i.imm - (s16)list[idx].i.imm) == 3) {
+ /* Replace a LWL/LWR combo with a META_LWU */
+ if (op->i.op == OP_LWL)
+ op->i.imm -= 3;
+ op->i.op = OP_META_LWU;
+ list[idx].opcode = 0;
+ pr_debug("Convert LWL/LWR to LWU\n");
+ }
+ }
+ break;
+ case OP_SWL:
+ case OP_SWR:
+ if (i == 0 || !has_delay_slot(list[i - 1].c)) {
+ idx = find_next_reader(list, i + 1, op->i.rt);
+ if (idx > 0 && list[idx].i.op == (op->i.op ^ 0x4)
+ && list[idx].i.rs == op->i.rs
+ && list[idx].i.rt == op->i.rt
+ && abs((s16)op->i.imm - (s16)list[idx].i.imm) == 3) {
+ /* Replace a SWL/SWR combo with a META_SWU */
+ if (op->i.op == OP_SWL)
+ op->i.imm -= 3;
+ op->i.op = OP_META_SWU;
+ list[idx].opcode = 0;
+ pr_debug("Convert SWL/SWR to SWU\n");
+ }
+ }
+ break;
case OP_REGIMM:
switch (op->r.rt) {
case OP_REGIMM_BLTZ:
} else {
pr_debug("Found never-taken BLTZ/BGEZ\n");
+ if (!op_flag_no_ds(op->flags))
+ maybe_remove_load_delay(&list[i + 1]);
+
local = op_flag_local_branch(op->flags);
op->opcode = 0;
op->flags = 0;
/* Assume that all I/O operations that target
* $sp or $gp will always only target a mapped
* memory (RAM, BIOS, scratchpad). */
- list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT);
+ if (state->opt_flags & LIGHTREC_OPT_SP_GP_HIT_RAM)
+ list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_RAM);
+ else
+ list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT);
}
fallthrough;
return 0;
}
+static int lightrec_test_preload_pc(struct lightrec_state *state, struct block *block)
+{
+ unsigned int i;
+ union code c;
+ u32 flags;
+
+ for (i = 0; i < block->nb_ops; i++) {
+ c = block->opcode_list[i].c;
+ flags = block->opcode_list[i].flags;
+
+ if (op_flag_sync(flags))
+ break;
+
+ switch (c.i.op) {
+ case OP_J:
+ case OP_JAL:
+ block->flags |= BLOCK_PRELOAD_PC;
+ return 0;
+
+ case OP_REGIMM:
+ switch (c.r.rt) {
+ case OP_REGIMM_BLTZAL:
+ case OP_REGIMM_BGEZAL:
+ block->flags |= BLOCK_PRELOAD_PC;
+ return 0;
+ default:
+ break;
+ }
+ fallthrough;
+ case OP_BEQ:
+ case OP_BNE:
+ case OP_BLEZ:
+ case OP_BGTZ:
+ if (!op_flag_local_branch(flags)) {
+ block->flags |= BLOCK_PRELOAD_PC;
+ return 0;
+ }
+
+ case OP_SPECIAL:
+ switch (c.r.op) {
+ case OP_SPECIAL_JALR:
+ if (c.r.rd) {
+ block->flags |= BLOCK_PRELOAD_PC;
+ return 0;
+ }
+ break;
+ case OP_SPECIAL_SYSCALL:
+ case OP_SPECIAL_BREAK:
+ block->flags |= BLOCK_PRELOAD_PC;
+ return 0;
+ default:
+ break;
+ }
+ break;
+ }
+ }
+
+ return 0;
+}
+
static int (*lightrec_optimizers[])(struct lightrec_state *state, struct block *) = {
IF_OPT(OPT_REMOVE_DIV_BY_ZERO_SEQ, &lightrec_remove_div_by_zero_check_sequence),
IF_OPT(OPT_REPLACE_MEMSET, &lightrec_replace_memset),
IF_OPT(OPT_FLAG_IO, &lightrec_flag_io),
IF_OPT(OPT_FLAG_MULT_DIV, &lightrec_flag_mults_divs),
IF_OPT(OPT_EARLY_UNLOAD, &lightrec_early_unload),
+ IF_OPT(OPT_PRELOAD_PC, &lightrec_test_preload_pc),
};
int lightrec_optimize(struct lightrec_state *state, struct block *block)