deps/lightning/lib/jit_size.o \
deps/lightning/lib/lightning.o \
deps/lightrec/blockcache.o \
+ deps/lightrec/constprop.o \
deps/lightrec/disassembler.o \
deps/lightrec/emitter.o \
deps/lightrec/interpreter.o \
[subrepo]
remote = https://github.com/pcercuei/lightrec.git
branch = master
- commit = 4e55a15656deb7b2529546af114668fb5dc6870e
- parent = fc7764f123b6445060e56dd27152fffefdec9404
+ commit = 3ff589bcb7d52b3a091fe0b922ba02a0b1a7f095
+ parent = aced3eb3fcaa0fe13c44c4dd196cdab42555fd98
method = merge
cmdver = 0.4.3
cmake_minimum_required(VERSION 3.0)
-project(lightrec LANGUAGES C VERSION 0.5)
+project(lightrec LANGUAGES C VERSION 0.7)
set(BUILD_SHARED_LIBS ON CACHE BOOL "Build shared libraries")
if (NOT BUILD_SHARED_LIBS)
list(APPEND LIGHTREC_SOURCES
blockcache.c
+ constprop.c
emitter.c
interpreter.c
lightrec.c
)
list(APPEND LIGHTREC_HEADERS
blockcache.h
+ constprop.h
debug.h
disassembler.h
emitter.h
option(ENABLE_FIRST_PASS "Run the interpreter as first-pass optimization" ON)
-option(ENABLE_THREADED_COMPILER "Enable threaded compiler" ON)
+option(ENABLE_THREADED_COMPILER "Enable threaded compiler" OFF)
if (ENABLE_THREADED_COMPILER)
list(APPEND LIGHTREC_SOURCES recompiler.c reaper.c)
target_link_libraries(${PROJECT_NAME} PRIVATE ${PTHREAD_LIBRARIES})
endif (ENABLE_THREADED_COMPILER)
-option(ENABLE_CODE_BUFFER "Enable external code buffer" OFF)
+option(ENABLE_CODE_BUFFER "Enable external code buffer" ON)
if (ENABLE_CODE_BUFFER)
target_sources(${PROJECT_NAME} PRIVATE tlsf/tlsf.c)
target_include_directories(${PROJECT_NAME} PRIVATE tlsf)
--- /dev/null
+// SPDX-License-Identifier: LGPL-2.1-or-later
+/*
+ * Copyright (C) 2022 Paul Cercueil <paul@crapouillou.net>
+ */
+
+#include "constprop.h"
+#include "disassembler.h"
+#include "lightrec-private.h"
+
+#include <stdbool.h>
+#include <string.h>
+
+static u32 get_min_value(const struct constprop_data *d)
+{
+ /* Min value: all sign bits to 1, all unknown bits but MSB to 0 */
+ return (d->value & d->known) | d->sign | (~d->known & BIT(31));
+}
+
+static u32 get_max_value(const struct constprop_data *d)
+{
+ /* Max value: all sign bits to 0, all unknown bits to 1 */
+ return ((d->value & d->known) | ~d->known) & ~d->sign;
+}
+
+static u32 lightrec_same_sign(const struct constprop_data *d1,
+ const struct constprop_data *d2)
+{
+ u32 min1, min2, max1, max2, a, b, c, d;
+
+ min1 = get_min_value(d1);
+ max1 = get_max_value(d1);
+ min2 = get_min_value(d2);
+ max2 = get_max_value(d2);
+
+ a = min1 + min2;
+ b = min1 + max2;
+ c = max1 + min2;
+ d = max1 + max2;
+
+ return ((a & b & c & d) | (~a & ~b & ~c & ~d)) & BIT(31);
+}
+
+static u32 lightrec_get_sign_mask(const struct constprop_data *d)
+{
+ u32 imm;
+
+ if (d->sign)
+ return d->sign;
+
+ imm = (d->value & BIT(31)) ? d->value : ~d->value;
+ imm = ~(imm & d->known);
+ if (imm)
+ imm = 32 - clz32(imm);
+
+ return imm < 32 ? GENMASK(31, imm) : 0;
+}
+
+static void lightrec_propagate_addi(u32 rs, u32 rd,
+ const struct constprop_data *d,
+ struct constprop_data *v)
+{
+ u32 end, bit, sum, min, mask, imm, value;
+ struct constprop_data result = {
+ .value = v[rd].value,
+ .known = v[rd].known,
+ .sign = v[rd].sign,
+ };
+ bool carry = false;
+
+ /* clear unknown bits to ease processing */
+ v[rs].value &= v[rs].known;
+ value = d->value & d->known;
+
+ mask = ~(lightrec_get_sign_mask(d) & lightrec_get_sign_mask(&v[rs]));
+ end = mask ? 32 - clz32(mask) : 0;
+
+ for (bit = 0; bit < 32; bit++) {
+ if (v[rs].known & d->known & BIT(bit)) {
+ /* the bits are known - compute the resulting bit and
+ * the carry */
+ sum = ((u32)carry << bit) + (v[rs].value & BIT(bit))
+ + (value & BIT(bit));
+
+ if (sum & BIT(bit))
+ result.value |= BIT(bit);
+ else
+ result.value &= ~BIT(bit);
+
+ result.known |= BIT(bit);
+ result.sign &= ~BIT(bit);
+ carry = sum & BIT(bit + 1);
+ continue;
+ }
+
+ if (bit >= end) {
+ /* We're past the last significant bits of the values
+ * (extra sign bits excepted).
+ * The destination register will be sign-extended
+ * starting from here (if no carry) or from the next
+ * bit (if carry).
+ * If the source registers are not sign-extended and we
+ * have no carry, the algorithm is done here. */
+
+ if ((v[rs].sign | d->sign) & BIT(bit)) {
+ mask = GENMASK(31, bit);
+
+ if (lightrec_same_sign(&v[rs], d)) {
+ /* Theorical minimum and maximum values
+ * have the same sign; therefore the
+ * sign bits are known. */
+ min = get_min_value(&v[rs])
+ + get_min_value(d);
+ result.value = (min & mask)
+ | (result.value & ~mask);
+ result.known |= mask << carry;
+ result.sign = 0;
+ } else {
+ /* min/max have different signs. */
+ result.sign = mask << 1;
+ result.known &= ~mask;
+ }
+ break;
+ } else if (!carry) {
+ /* Past end bit, no carry; we're done here. */
+ break;
+ }
+ }
+
+ result.known &= ~BIT(bit);
+ result.sign &= ~BIT(bit);
+
+ /* Found an unknown bit in one of the registers.
+ * If the carry and the bit in the other register are both zero,
+ * we can continue the algorithm. */
+ if (!carry && (((d->known & ~value)
+ | (v[rs].known & ~v[rs].value)) & BIT(bit)))
+ continue;
+
+ /* We have an unknown bit in one of the source registers, and we
+ * may generate a carry: there's nothing to do. Everything from
+ * this bit till the next known 0 bit or sign bit will be marked
+ * as unknown. The algorithm can then restart at the following
+ * bit. */
+
+ imm = (v[rs].known & d->known & ~v[rs].value & ~value)
+ | v[rs].sign | d->sign;
+
+ imm &= GENMASK(31, bit);
+ imm = imm ? ctz32(imm) : 31;
+ mask = GENMASK(imm, bit);
+ result.known &= ~mask;
+ result.sign &= ~mask;
+
+ bit = imm;
+ carry = false;
+ }
+
+ v[rd] = result;
+}
+
+static void lightrec_propagate_sub(u32 rs, u32 rt, u32 rd,
+ struct constprop_data *v)
+{
+ struct constprop_data d = {
+ .value = ~v[rt].value,
+ .known = v[rt].known,
+ .sign = v[rt].sign,
+ };
+ u32 imm, mask, bit;
+
+ /* Negate the known Rt value, then propagate as a regular ADD. */
+
+ for (bit = 0; bit < 32; bit++) {
+ if (!(d.known & BIT(bit))) {
+ /* Unknown bit - mark bits unknown up to the next known 0 */
+
+ imm = (d.known & ~d.value) | d.sign;
+ imm &= GENMASK(31, bit);
+ imm = imm ? ctz32(imm) : 31;
+ mask = GENMASK(imm, bit);
+ d.known &= ~mask;
+ d.sign &= ~mask;
+ break;
+ }
+
+ if (!(d.value & BIT(bit))) {
+ /* Bit is 0: we can set our carry, and the algorithm is done. */
+ d.value |= BIT(bit);
+ break;
+ }
+
+ /* Bit is 1 - set to 0 and continue algorithm */
+ d.value &= ~BIT(bit);
+ }
+
+ lightrec_propagate_addi(rs, rd, &d, v);
+}
+
+static void lightrec_propagate_slt(u32 rs, u32 rd, bool is_signed,
+ const struct constprop_data *d,
+ struct constprop_data *v)
+{
+ unsigned int bit;
+
+ if (is_signed && (v[rs].known & d->known
+ & (v[rs].value ^ d->value) & BIT(31))) {
+ /* If doing a signed comparison and the two bits 31 are known
+ * to be opposite, we can deduce the value. */
+ v[rd].value = v[rs].value >> 31;
+ v[rd].known = 0xffffffff;
+ v[rd].sign = 0;
+ return;
+ }
+
+ for (bit = 32; bit > 0; bit--) {
+ if (!(v[rs].known & d->known & BIT(bit - 1))) {
+ /* One bit is unknown and we cannot figure out which
+ * value is smaller. We still know that the upper 31
+ * bits are zero. */
+ v[rd].value = 0;
+ v[rd].known = 0xfffffffe;
+ v[rd].sign = 0;
+ break;
+ }
+
+ /* The two bits are equal - continue to the next bit. */
+ if (~(v[rs].value ^ d->value) & BIT(bit - 1))
+ continue;
+
+ /* The two bits aren't equal; we can therefore deduce which
+ * value is smaller. */
+ v[rd].value = !(v[rs].value & BIT(bit - 1));
+ v[rd].known = 0xffffffff;
+ v[rd].sign = 0;
+ break;
+ }
+
+ if (bit == 0) {
+ /* rs == rt and all bits are known */
+ v[rd].value = 0;
+ v[rd].known = 0xffffffff;
+ v[rd].sign = 0;
+ }
+}
+
+void lightrec_consts_propagate(const struct opcode *list,
+ unsigned int idx,
+ struct constprop_data *v)
+{
+ union code c;
+ u32 imm;
+
+ if (idx == 0)
+ return;
+
+ /* Register $zero is always, well, zero */
+ v[0].value = 0;
+ v[0].sign = 0;
+ v[0].known = 0xffffffff;
+
+ if (op_flag_sync(list[idx].flags)) {
+ memset(&v[1], 0, sizeof(*v) * 31);
+ return;
+ }
+
+ if (idx > 1 && !op_flag_sync(list[idx - 1].flags)) {
+ c = list[idx - 2].c;
+
+ switch (c.i.op) {
+ case OP_BNE:
+ /* After a BNE $zero + delay slot, we know that the
+ * branch wasn't taken, and therefore the other register
+ * is zero. */
+ if (c.i.rs == 0) {
+ v[c.i.rt].value = 0;
+ v[c.i.rt].sign = 0;
+ v[c.i.rt].known = 0xffffffff;
+ } else if (c.i.rt == 0) {
+ v[c.i.rs].value = 0;
+ v[c.i.rs].sign = 0;
+ v[c.i.rs].known = 0xffffffff;
+ }
+ break;
+ case OP_BLEZ:
+ v[c.i.rs].value &= ~BIT(31);
+ v[c.i.rs].known |= BIT(31);
+ fallthrough;
+ case OP_BEQ:
+ /* TODO: handle non-zero? */
+ break;
+ case OP_REGIMM:
+ switch (c.r.rt) {
+ case OP_REGIMM_BLTZ:
+ case OP_REGIMM_BLTZAL:
+ v[c.i.rs].value &= ~BIT(31);
+ v[c.i.rs].known |= BIT(31);
+ break;
+ case OP_REGIMM_BGEZ:
+ case OP_REGIMM_BGEZAL:
+ v[c.i.rs].value |= BIT(31);
+ v[c.i.rs].known |= BIT(31);
+ /* TODO: handle non-zero? */
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ c = list[idx - 1].c;
+
+ switch (c.i.op) {
+ case OP_SPECIAL:
+ switch (c.r.op) {
+ case OP_SPECIAL_SLL:
+ v[c.r.rd].value = v[c.r.rt].value << c.r.imm;
+ v[c.r.rd].known = (v[c.r.rt].known << c.r.imm)
+ | (BIT(c.r.imm) - 1);
+ v[c.r.rd].sign = v[c.r.rt].sign << c.r.imm;
+ break;
+
+ case OP_SPECIAL_SRL:
+ v[c.r.rd].value = v[c.r.rt].value >> c.r.imm;
+ v[c.r.rd].known = (v[c.r.rt].known >> c.r.imm)
+ | (BIT(c.r.imm) - 1 << 32 - c.r.imm);
+ v[c.r.rd].sign = c.r.imm ? 0 : v[c.r.rt].sign;
+ break;
+
+ case OP_SPECIAL_SRA:
+ v[c.r.rd].value = (s32)v[c.r.rt].value >> c.r.imm;
+ v[c.r.rd].known = (s32)v[c.r.rt].known >> c.r.imm;
+ v[c.r.rd].sign = (s32)v[c.r.rt].sign >> c.r.imm;
+ break;
+
+ case OP_SPECIAL_SLLV:
+ if ((v[c.r.rs].known & 0x1f) == 0x1f) {
+ imm = v[c.r.rs].value & 0x1f;
+ v[c.r.rd].value = v[c.r.rt].value << imm;
+ v[c.r.rd].known = (v[c.r.rt].known << imm)
+ | (BIT(imm) - 1);
+ v[c.r.rd].sign = v[c.r.rt].sign << imm;
+ } else {
+ v[c.r.rd].known = 0;
+ v[c.r.rd].sign = 0;
+ }
+ break;
+
+ case OP_SPECIAL_SRLV:
+ if ((v[c.r.rs].known & 0x1f) == 0x1f) {
+ imm = v[c.r.rs].value & 0x1f;
+ v[c.r.rd].value = v[c.r.rt].value >> imm;
+ v[c.r.rd].known = (v[c.r.rt].known >> imm)
+ | (BIT(imm) - 1 << 32 - imm);
+ if (imm)
+ v[c.r.rd].sign = 0;
+ } else {
+ v[c.r.rd].known = 0;
+ v[c.r.rd].sign = 0;
+ }
+ break;
+
+ case OP_SPECIAL_SRAV:
+ if ((v[c.r.rs].known & 0x1f) == 0x1f) {
+ imm = v[c.r.rs].value & 0x1f;
+ v[c.r.rd].value = (s32)v[c.r.rt].value >> imm;
+ v[c.r.rd].known = (s32)v[c.r.rt].known >> imm;
+ v[c.r.rd].sign = (s32)v[c.r.rt].sign >> imm;
+ } else {
+ v[c.r.rd].known = 0;
+ v[c.r.rd].sign = 0;
+ }
+ break;
+
+ case OP_SPECIAL_ADD:
+ case OP_SPECIAL_ADDU:
+ if (is_known_zero(v, c.r.rs))
+ v[c.r.rd] = v[c.r.rt];
+ else if (is_known_zero(v, c.r.rt))
+ v[c.r.rd] = v[c.r.rs];
+ else
+ lightrec_propagate_addi(c.r.rs, c.r.rd, &v[c.r.rt], v);
+ break;
+
+ case OP_SPECIAL_SUB:
+ case OP_SPECIAL_SUBU:
+ if (c.r.rs == c.r.rt) {
+ v[c.r.rd].value = 0;
+ v[c.r.rd].known = 0xffffffff;
+ v[c.r.rd].sign = 0;
+ } else {
+ lightrec_propagate_sub(c.r.rs, c.r.rt, c.r.rd, v);
+ }
+ break;
+
+ case OP_SPECIAL_AND:
+ v[c.r.rd].known = (v[c.r.rt].known & v[c.r.rs].known)
+ | (~v[c.r.rt].value & v[c.r.rt].known)
+ | (~v[c.r.rs].value & v[c.r.rs].known);
+ v[c.r.rd].value = v[c.r.rt].value & v[c.r.rs].value & v[c.r.rd].known;
+ v[c.r.rd].sign = v[c.r.rt].sign & v[c.r.rs].sign;
+ break;
+
+ case OP_SPECIAL_OR:
+ v[c.r.rd].known = (v[c.r.rt].known & v[c.r.rs].known)
+ | (v[c.r.rt].value & v[c.r.rt].known)
+ | (v[c.r.rs].value & v[c.r.rs].known);
+ v[c.r.rd].value = (v[c.r.rt].value | v[c.r.rs].value) & v[c.r.rd].known;
+ v[c.r.rd].sign = v[c.r.rt].sign & v[c.r.rs].sign;
+ break;
+
+ case OP_SPECIAL_XOR:
+ v[c.r.rd].value = v[c.r.rt].value ^ v[c.r.rs].value;
+ v[c.r.rd].known = v[c.r.rt].known & v[c.r.rs].known;
+ v[c.r.rd].sign = v[c.r.rt].sign & v[c.r.rs].sign;
+ break;
+
+ case OP_SPECIAL_NOR:
+ v[c.r.rd].known = (v[c.r.rt].known & v[c.r.rs].known)
+ | (v[c.r.rt].value & v[c.r.rt].known)
+ | (v[c.r.rs].value & v[c.r.rs].known);
+ v[c.r.rd].value = ~(v[c.r.rt].value | v[c.r.rs].value) & v[c.r.rd].known;
+ v[c.r.rd].sign = v[c.r.rt].sign & v[c.r.rs].sign;
+ break;
+
+ case OP_SPECIAL_SLT:
+ case OP_SPECIAL_SLTU:
+ lightrec_propagate_slt(c.r.rs, c.r.rd,
+ c.r.op == OP_SPECIAL_SLT,
+ &v[c.r.rt], v);
+ break;
+
+ case OP_SPECIAL_MULT:
+ case OP_SPECIAL_MULTU:
+ case OP_SPECIAL_DIV:
+ case OP_SPECIAL_DIVU:
+ if (OPT_FLAG_MULT_DIV && c.r.rd) {
+ v[c.r.rd].known = 0;
+ v[c.r.rd].sign = 0;
+ }
+ if (OPT_FLAG_MULT_DIV && c.r.imm) {
+ v[c.r.imm].known = 0;
+ v[c.r.imm].sign = 0;
+ }
+ break;
+
+ case OP_SPECIAL_MFLO:
+ case OP_SPECIAL_MFHI:
+ v[c.r.rd].known = 0;
+ v[c.r.rd].sign = 0;
+ break;
+ default:
+ break;
+ }
+ break;
+
+ case OP_META_MULT2:
+ case OP_META_MULTU2:
+ if (OPT_FLAG_MULT_DIV && c.r.rd) {
+ if (c.r.op < 32) {
+ v[c.r.rd].value = v[c.r.rs].value << c.r.op;
+ v[c.r.rd].known = (v[c.r.rs].known << c.r.op)
+ | (BIT(c.r.op) - 1);
+ v[c.r.rd].sign = v[c.r.rs].sign << c.r.op;
+ } else {
+ v[c.r.rd].value = 0;
+ v[c.r.rd].known = 0xffffffff;
+ v[c.r.rd].sign = 0;
+ }
+ }
+
+ if (OPT_FLAG_MULT_DIV && c.r.imm) {
+ if (c.r.op >= 32) {
+ v[c.r.imm].value = v[c.r.rs].value << c.r.op - 32;
+ v[c.r.imm].known = (v[c.r.rs].known << c.r.op - 32)
+ | (BIT(c.r.op - 32) - 1);
+ v[c.r.imm].sign = v[c.r.rs].sign << c.r.op - 32;
+ } else if (c.i.op == OP_META_MULT2) {
+ v[c.r.imm].value = (s32)v[c.r.rs].value >> 32 - c.r.op;
+ v[c.r.imm].known = (s32)v[c.r.rs].known >> 32 - c.r.op;
+ v[c.r.imm].sign = (s32)v[c.r.rs].sign >> 32 - c.r.op;
+ } else {
+ v[c.r.imm].value = v[c.r.rs].value >> 32 - c.r.op;
+ v[c.r.imm].known = v[c.r.rs].known >> 32 - c.r.op;
+ v[c.r.imm].sign = v[c.r.rs].sign >> 32 - c.r.op;
+ }
+ }
+ break;
+
+ case OP_REGIMM:
+ break;
+
+ case OP_ADDI:
+ case OP_ADDIU:
+ if (c.i.imm) {
+ struct constprop_data d = {
+ .value = (s32)(s16)c.i.imm,
+ .known = 0xffffffff,
+ .sign = 0,
+ };
+
+ lightrec_propagate_addi(c.i.rs, c.i.rt, &d, v);
+ } else {
+ /* immediate is zero - that's just a register copy. */
+ v[c.i.rt] = v[c.i.rs];
+ }
+ break;
+
+ case OP_SLTI:
+ case OP_SLTIU:
+ {
+ struct constprop_data d = {
+ .value = (s32)(s16)c.i.imm,
+ .known = 0xffffffff,
+ .sign = 0,
+ };
+
+ lightrec_propagate_slt(c.i.rs, c.i.rt,
+ c.i.op == OP_SLTI, &d, v);
+ }
+ break;
+
+ case OP_ANDI:
+ v[c.i.rt].value = v[c.i.rs].value & c.i.imm;
+ v[c.i.rt].known = v[c.i.rs].known | ~c.i.imm;
+ v[c.i.rt].sign = 0;
+ break;
+
+ case OP_ORI:
+ v[c.i.rt].value = v[c.i.rs].value | c.i.imm;
+ v[c.i.rt].known = v[c.i.rs].known | c.i.imm;
+ v[c.i.rt].sign = (v[c.i.rs].sign & 0xffff) ? 0xffff0000 : v[c.i.rs].sign;
+ break;
+
+ case OP_XORI:
+ v[c.i.rt].value = v[c.i.rs].value ^ c.i.imm;
+ v[c.i.rt].known = v[c.i.rs].known;
+ v[c.i.rt].sign = (v[c.i.rs].sign & 0xffff) ? 0xffff0000 : v[c.i.rs].sign;
+ break;
+
+ case OP_LUI:
+ v[c.i.rt].value = c.i.imm << 16;
+ v[c.i.rt].known = 0xffffffff;
+ v[c.i.rt].sign = 0;
+ break;
+
+ case OP_CP0:
+ switch (c.r.rs) {
+ case OP_CP0_MFC0:
+ case OP_CP0_CFC0:
+ v[c.r.rt].known = 0;
+ v[c.r.rt].sign = 0;
+ break;
+ default:
+ break;
+ }
+ break;
+
+ case OP_CP2:
+ if (c.r.op == OP_CP2_BASIC) {
+ switch (c.r.rs) {
+ case OP_CP2_BASIC_MFC2:
+ switch (c.r.rd) {
+ case 1:
+ case 3:
+ case 5:
+ case 8:
+ case 9:
+ case 10:
+ case 11:
+ /* Signed 16-bit */
+ v[c.r.rt].known = 0;
+ v[c.r.rt].sign = 0xffff8000;
+ break;
+ case 7:
+ case 16:
+ case 17:
+ case 18:
+ case 19:
+ /* Unsigned 16-bit */
+ v[c.r.rt].value = 0;
+ v[c.r.rt].known = 0xffff0000;
+ v[c.r.rt].sign = 0;
+ break;
+ default:
+ /* 32-bit */
+ v[c.r.rt].known = 0;
+ v[c.r.rt].sign = 0;
+ break;
+ }
+ break;
+ case OP_CP2_BASIC_CFC2:
+ switch (c.r.rd) {
+ case 4:
+ case 12:
+ case 20:
+ case 26:
+ case 27:
+ case 29:
+ case 30:
+ /* Signed 16-bit */
+ v[c.r.rt].known = 0;
+ v[c.r.rt].sign = 0xffff8000;
+ break;
+ default:
+ /* 32-bit */
+ v[c.r.rt].known = 0;
+ v[c.r.rt].sign = 0;
+ break;
+ }
+ break;
+ }
+ }
+ break;
+ case OP_LB:
+ v[c.i.rt].known = 0;
+ v[c.i.rt].sign = 0xffffff80;
+ break;
+ case OP_LH:
+ v[c.i.rt].known = 0;
+ v[c.i.rt].sign = 0xffff8000;
+ break;
+ case OP_LBU:
+ v[c.i.rt].value = 0;
+ v[c.i.rt].known = 0xffffff00;
+ v[c.i.rt].sign = 0;
+ break;
+ case OP_LHU:
+ v[c.i.rt].value = 0;
+ v[c.i.rt].known = 0xffff0000;
+ v[c.i.rt].sign = 0;
+ break;
+ case OP_LWL:
+ case OP_LWR:
+ /* LWL/LWR don't write the full register if the address is
+ * unaligned, so we only need to know the low 2 bits */
+ if (v[c.i.rs].known & 0x3) {
+ imm = (v[c.i.rs].value & 0x3) * 8;
+
+ if (c.i.op == OP_LWL) {
+ imm = BIT(24 - imm) - 1;
+ v[c.i.rt].sign &= ~imm;
+ } else {
+ imm = imm ? GENMASK(31, 32 - imm) : 0;
+ v[c.i.rt].sign = 0;
+ }
+ v[c.i.rt].known &= ~imm;
+ break;
+ }
+ fallthrough;
+ case OP_LW:
+ v[c.i.rt].known = 0;
+ v[c.i.rt].sign = 0;
+ break;
+ case OP_META_MOV:
+ v[c.r.rd] = v[c.r.rs];
+ break;
+ case OP_META_EXTC:
+ v[c.i.rt].value = (s32)(s8)v[c.i.rs].value;
+ if (v[c.i.rs].known & BIT(7)) {
+ v[c.i.rt].known = v[c.i.rs].known | 0xffffff00;
+ v[c.i.rt].sign = 0;
+ } else {
+ v[c.i.rt].known = v[c.i.rs].known & 0x7f;
+ v[c.i.rt].sign = 0xffffff80;
+ }
+ break;
+
+ case OP_META_EXTS:
+ v[c.i.rt].value = (s32)(s16)v[c.i.rs].value;
+ if (v[c.i.rs].known & BIT(15)) {
+ v[c.i.rt].known = v[c.i.rs].known | 0xffff0000;
+ v[c.i.rt].sign = 0;
+ } else {
+ v[c.i.rt].known = v[c.i.rs].known & 0x7fff;
+ v[c.i.rt].sign = 0xffff8000;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ /* Reset register 0 which may have been used as a target */
+ v[0].value = 0;
+ v[0].sign = 0;
+ v[0].known = 0xffffffff;
+}
+
+enum psx_map
+lightrec_get_constprop_map(const struct lightrec_state *state,
+ const struct constprop_data *v, u8 reg, s16 imm)
+{
+ const struct lightrec_mem_map *map;
+ unsigned int i;
+ u32 min, max;
+
+ min = get_min_value(&v[reg]) + imm;
+ max = get_max_value(&v[reg]) + imm;
+
+ /* Handle the case where max + imm overflows */
+ if ((min & 0xe0000000) != (max & 0xe0000000))
+ return PSX_MAP_UNKNOWN;
+
+ pr_debug("Min: 0x%08x max: 0x%08x Known: 0x%08x Sign: 0x%08x\n",
+ min, max, v[reg].known, v[reg].sign);
+
+ min = kunseg(min);
+ max = kunseg(max);
+
+ for (i = 0; i < state->nb_maps; i++) {
+ map = &state->maps[i];
+
+ if (min >= map->pc && min < map->pc + map->length
+ && max >= map->pc && max < map->pc + map->length)
+ return (enum psx_map) i;
+ }
+
+ return PSX_MAP_UNKNOWN;
+}
--- /dev/null
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Copyright (C) 2022 Paul Cercueil <paul@crapouillou.net>
+ */
+
+#ifndef __LIGHTREC_CONSTPROP_H__
+#define __LIGHTREC_CONSTPROP_H__
+
+#include "lightrec.h"
+
+#define LIGHTREC_CONSTPROP_INITIALIZER { { 0, 0xffffffff, 0 }, }
+
+struct opcode;
+
+struct constprop_data {
+ u32 value;
+ u32 known;
+ u32 sign;
+};
+
+static inline _Bool is_known(const struct constprop_data *v, u8 reg)
+{
+ return v[reg].known == 0xffffffff;
+}
+
+static inline _Bool bits_are_known_zero(const struct constprop_data *v,
+ u8 reg, u32 mask)
+{
+ return !(~v[reg].known & mask) && !(v[reg].value & mask);
+}
+
+static inline _Bool is_known_zero(const struct constprop_data *v, u8 reg)
+{
+ return bits_are_known_zero(v, reg, 0xffffffff);
+}
+
+void lightrec_consts_propagate(const struct opcode *list,
+ unsigned int idx,
+ struct constprop_data *v);
+
+enum psx_map
+lightrec_get_constprop_map(const struct lightrec_state *state,
+ const struct constprop_data *v, u8 reg, s16 imm);
+
+#endif /* __LIGHTREC_CONSTPROP_H__ */
return snprintf(buf, len, "%s%s,%s",
special_opcodes[c.r.op],
lightrec_reg_name(c.r.rd),
- lightrec_reg_name(c.r.rt));
+ lightrec_reg_name(c.r.rs));
case OP_SPECIAL_SYSCALL:
case OP_SPECIAL_BREAK:
return snprintf(buf, len, "%s", special_opcodes[c.r.op]);
static void rec_REGIMM(struct lightrec_cstate *state, const struct block *block, u16 offset);
static void rec_CP0(struct lightrec_cstate *state, const struct block *block, u16 offset);
static void rec_CP2(struct lightrec_cstate *state, const struct block *block, u16 offset);
+static void rec_cp2_do_mtc2(struct lightrec_cstate *state,
+ const struct block *block, u16 offset, u8 reg, u8 in_reg);
+static void rec_cp2_do_mfc2(struct lightrec_cstate *state,
+ const struct block *block, u16 offset,
+ u8 reg, u8 out_reg);
static void unknown_opcode(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
jit_patch_abs(jit_jmpi(), state->state->eob_wrapper_func);
}
+static void update_ra_register(struct regcache *reg_cache, jit_state_t *_jit,
+ u8 ra_reg, u32 pc, u32 link)
+{
+ u8 link_reg;
+
+ link_reg = lightrec_alloc_reg_out(reg_cache, _jit, ra_reg, 0);
+ lightrec_load_imm(reg_cache, _jit, link_reg, pc, link);
+ lightrec_free_reg(reg_cache, link_reg);
+}
+
static void lightrec_emit_end_of_block(struct lightrec_cstate *state,
const struct block *block, u16 offset,
s8 reg_new_pc, u32 imm, u8 ra_reg,
jit_note(__FILE__, __LINE__);
- if (link) {
- /* Update the $ra register */
- u8 link_reg = lightrec_alloc_reg_out(reg_cache, _jit, ra_reg, 0);
- jit_movi(link_reg, link);
- lightrec_free_reg(reg_cache, link_reg);
- }
+ if (link && ra_reg != reg_new_pc)
+ update_ra_register(reg_cache, _jit, ra_reg, block->pc, link);
- if (reg_new_pc < 0) {
- reg_new_pc = lightrec_alloc_reg(reg_cache, _jit, JIT_V0);
- lightrec_lock_reg(reg_cache, _jit, reg_new_pc);
+ if (reg_new_pc < 0)
+ lightrec_load_next_pc_imm(reg_cache, _jit, block->pc, imm);
+ else
+ lightrec_load_next_pc(reg_cache, _jit, reg_new_pc);
- jit_movi(reg_new_pc, imm);
+ if (link && ra_reg == reg_new_pc) {
+ /* Handle the special case: JALR $r0, $r0
+ * In that case the target PC should be the old value of the
+ * register. */
+ update_ra_register(reg_cache, _jit, ra_reg, block->pc, link);
}
if (has_delay_slot(op->c) &&
/* Clean the remaining registers */
lightrec_clean_regs(reg_cache, _jit);
- jit_movr(JIT_V0, reg_new_pc);
-
if (cycles && update_cycles) {
jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles);
pr_debug("EOB: %u cycles\n", cycles);
lightrec_clean_regs(reg_cache, _jit);
- jit_movi(JIT_V0, block->pc + (offset << 2));
+ lightrec_load_imm(reg_cache, _jit, JIT_V0, block->pc,
+ block->pc + (offset << 2));
jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles);
lightrec_jump_to_eob(state, _jit);
}
-static u8 get_jr_jalr_reg(struct lightrec_cstate *state, const struct block *block, u16 offset)
-{
- struct regcache *reg_cache = state->reg_cache;
- jit_state_t *_jit = block->_jit;
- const struct opcode *op = &block->opcode_list[offset];
- u8 rs;
-
- rs = lightrec_request_reg_in(reg_cache, _jit, op->r.rs, JIT_V0);
- lightrec_lock_reg(reg_cache, _jit, rs);
-
- return rs;
-}
-
static void rec_special_JR(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
- u8 rs = get_jr_jalr_reg(state, block, offset);
+ union code c = block->opcode_list[offset].c;
_jit_name(block->_jit, __func__);
- lightrec_emit_end_of_block(state, block, offset, rs, 0, 31, 0, true);
+ lightrec_emit_end_of_block(state, block, offset, c.r.rs, 0, 31, 0, true);
}
static void rec_special_JALR(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
- u8 rs = get_jr_jalr_reg(state, block, offset);
union code c = block->opcode_list[offset].c;
_jit_name(block->_jit, __func__);
- lightrec_emit_end_of_block(state, block, offset, rs, 0, c.r.rd,
+ lightrec_emit_end_of_block(state, block, offset, c.r.rs, 0, c.r.rd,
get_branch_pc(block, offset, 2), true);
}
const struct opcode *op = &block->opcode_list[offset],
*next = &block->opcode_list[offset + 1];
jit_node_t *addr;
- u8 link_reg, rs, rt;
bool is_forward = (s16)op->i.imm >= -1;
int op_cycles = lightrec_cycles_of_opcode(op->c);
u32 target_offset, cycles = state->cycles + op_cycles;
bool no_indirection = false;
u32 next_pc;
+ u8 rs, rt;
jit_note(__FILE__, __LINE__);
if (!op_flag_no_ds(op->flags) && next->opcode)
lightrec_rec_opcode(state, block, offset + 1);
- if (link) {
- /* Update the $ra register */
- link_reg = lightrec_alloc_reg_out(reg_cache, _jit, 31, 0);
- jit_movi(link_reg, link);
- lightrec_free_reg(reg_cache, link_reg);
- }
+ if (link)
+ update_ra_register(reg_cache, _jit, 31, block->pc, link);
/* Clean remaining registers */
lightrec_clean_regs(reg_cache, _jit);
lightrec_regcache_leave_branch(reg_cache, regs_backup);
- if (bz && link) {
- /* Update the $ra register */
- link_reg = lightrec_alloc_reg_out(reg_cache, _jit,
- 31, REG_EXT);
- jit_movi(link_reg, (s32)link);
- lightrec_free_reg(reg_cache, link_reg);
- }
+ if (bz && link)
+ update_ra_register(reg_cache, _jit, 31, block->pc, link);
if (!op_flag_no_ds(op->flags) && next->opcode)
lightrec_rec_opcode(state, block, offset + 1);
if (!op_flag_no_lo(flags)) {
if (is_signed) {
- jit_lti(lo, rs, 0);
+ jit_ltr(lo, rs, rt);
jit_lshi(lo, lo, 1);
jit_subi(lo, lo, 1);
} else {
- jit_movi(lo, 0xffffffff);
+ jit_subi(lo, rt, 1);
}
}
((!state->mirrors_mapped && !no_mask) || (invalidate &&
((imm & 0x3) || simm + lut_offt != (s16)(simm + lut_offt))));
bool need_tmp = !no_mask || addr_offset || add_imm || invalidate;
+ bool swc2 = c.i.op == OP_SWC2;
+ u8 in_reg = swc2 ? REG_CP2_TEMP : c.i.rt;
- rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
+ rt = lightrec_alloc_reg_in(reg_cache, _jit, in_reg, 0);
rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
if (need_tmp)
tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
addr_reg2 = addr_reg;
}
- if (is_big_endian() && swap_code && c.i.rt) {
+ if (is_big_endian() && swap_code && in_reg) {
tmp3 = lightrec_alloc_reg_temp(reg_cache, _jit);
jit_new_node_ww(swap_code, tmp3, rt);
union code c = block->opcode_list[offset].c;
jit_state_t *_jit = block->_jit;
jit_node_t *to_not_ram, *to_end;
- u8 tmp, tmp2, rs, rt;
+ bool swc2 = c.i.op == OP_SWC2;
+ u8 tmp, tmp2, rs, rt, in_reg = swc2 ? REG_CP2_TEMP : c.i.rt;
s16 imm;
jit_note(__FILE__, __LINE__);
lightrec_free_reg(reg_cache, tmp2);
}
- rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
+ rt = lightrec_alloc_reg_in(reg_cache, _jit, in_reg, 0);
- if (is_big_endian() && swap_code && c.i.rt) {
+ if (is_big_endian() && swap_code && in_reg) {
tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
jit_new_node_ww(swap_code, tmp2, rt);
union code c = block->opcode_list[offset].c;
jit_state_t *_jit = block->_jit;
jit_node_t *to_not_ram, *to_end;
- u8 tmp, tmp2, tmp3, rs, rt;
+ bool swc2 = c.i.op == OP_SWC2;
+ u8 tmp, tmp2, tmp3, masked_reg, rs, rt;
+ u8 in_reg = swc2 ? REG_CP2_TEMP : c.i.rt;
jit_note(__FILE__, __LINE__);
lightrec_free_reg(reg_cache, rs);
tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
- to_not_ram = jit_bgti(tmp2, ram_size);
+ if (state->offset_ram != state->offset_scratch) {
+ to_not_ram = jit_bgti(tmp2, ram_size);
+ masked_reg = tmp2;
+ } else {
+ jit_lti_u(tmp, tmp2, ram_size);
+ jit_movnr(tmp, tmp2, tmp);
+ masked_reg = tmp;
+ }
/* Compute the offset to the code LUT */
- jit_andi(tmp, tmp2, (RAM_SIZE - 1) & ~3);
+ if (c.i.op == OP_SW)
+ jit_andi(tmp, masked_reg, RAM_SIZE - 1);
+ else
+ jit_andi(tmp, masked_reg, (RAM_SIZE - 1) & ~3);
+
if (!lut_is_32bit(state))
jit_lshi(tmp, tmp, 1);
jit_addr(tmp, LIGHTREC_REG_STATE, tmp);
jit_movi(tmp, state->offset_ram);
to_end = jit_b();
+ jit_patch(to_not_ram);
}
- jit_patch(to_not_ram);
-
if (state->offset_ram || state->offset_scratch)
jit_movi(tmp, state->offset_scratch);
lightrec_free_reg(reg_cache, tmp);
lightrec_free_reg(reg_cache, tmp3);
- rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
+ rt = lightrec_alloc_reg_in(reg_cache, _jit, in_reg, 0);
- if (is_big_endian() && swap_code && c.i.rt) {
+ if (is_big_endian() && swap_code && in_reg) {
tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
jit_new_node_ww(swap_code, tmp, rt);
jit_code_t code, jit_code_t swap_code)
{
u32 flags = block->opcode_list[offset].flags;
+ u32 mode = LIGHTREC_FLAGS_GET_IO_MODE(flags);
bool no_invalidate = op_flag_no_invalidate(flags) ||
state->state->invalidate_from_dma_only;
+ union code c = block->opcode_list[offset].c;
+ bool is_swc2 = c.i.op == OP_SWC2;
+
+ if (is_swc2) {
+ switch (mode) {
+ case LIGHTREC_IO_RAM:
+ case LIGHTREC_IO_SCRATCH:
+ case LIGHTREC_IO_DIRECT:
+ case LIGHTREC_IO_DIRECT_HW:
+ rec_cp2_do_mfc2(state, block, offset, c.i.rt, REG_CP2_TEMP);
+ break;
+ default:
+ break;
+ }
+ }
- switch (LIGHTREC_FLAGS_GET_IO_MODE(flags)) {
+ switch (mode) {
case LIGHTREC_IO_RAM:
rec_store_ram(state, block, offset, code,
swap_code, !no_invalidate);
break;
default:
rec_io(state, block, offset, true, false);
- break;
+ return;
}
+
+ if (is_swc2)
+ lightrec_discard_reg_if_loaded(state->reg_cache, REG_CP2_TEMP);
}
static void rec_SB(struct lightrec_cstate *state,
const struct block *block, u16 offset)
{
- _jit_name(block->_jit, __func__);
+ union code c = block->opcode_list[offset].c;
+
+ _jit_name(block->_jit, c.i.op == OP_SWC2 ? "rec_SWC2" : "rec_SW");
rec_store(state, block, offset,
jit_code_stxi_i, jit_code_bswapr_ui);
}
rec_io(state, block, offset, true, false);
}
-static void rec_SWC2(struct lightrec_cstate *state,
- const struct block *block, u16 offset)
-{
- _jit_name(block->_jit, __func__);
- rec_io(state, block, offset, false, false);
-}
-
static void rec_load_memory(struct lightrec_cstate *cstate,
const struct block *block, u16 offset,
jit_code_t code, jit_code_t swap_code, bool is_unsigned,
struct regcache *reg_cache = cstate->reg_cache;
struct opcode *op = &block->opcode_list[offset];
jit_state_t *_jit = block->_jit;
- u8 rs, rt, addr_reg, flags = REG_EXT;
+ u8 rs, rt, out_reg, addr_reg, flags = REG_EXT;
bool no_mask = op_flag_no_mask(op->flags);
union code c = op->c;
s16 imm;
- if (!c.i.rt)
+ if (c.i.op == OP_LWC2)
+ out_reg = REG_CP2_TEMP;
+ else if (c.i.rt)
+ out_reg = c.i.rt;
+ else
return;
if (is_unsigned)
flags |= REG_ZEXT;
rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
- rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags);
+ rt = lightrec_alloc_reg_out(reg_cache, _jit, out_reg, flags);
if (!cstate->state->mirrors_mapped && c.i.imm && !no_mask) {
jit_addi(rt, rs, (s16)c.i.imm);
union code c = block->opcode_list[offset].c;
jit_state_t *_jit = block->_jit;
jit_node_t *to_not_ram, *to_not_bios, *to_end, *to_end2;
- u8 tmp, rs, rt, addr_reg, flags = REG_EXT;
+ u8 tmp, rs, rt, out_reg, addr_reg, flags = REG_EXT;
s16 imm;
- if (!c.i.rt)
+ if (c.i.op == OP_LWC2)
+ out_reg = REG_CP2_TEMP;
+ else if (c.i.rt)
+ out_reg = c.i.rt;
+ else
return;
if (is_unsigned)
jit_note(__FILE__, __LINE__);
rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
- rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags);
+ rt = lightrec_alloc_reg_out(reg_cache, _jit, out_reg, flags);
if ((state->offset_ram == state->offset_bios &&
state->offset_ram == state->offset_scratch &&
u16 offset, jit_code_t code, jit_code_t swap_code,
bool is_unsigned)
{
- u32 flags = block->opcode_list[offset].flags;
+ const struct opcode *op = &block->opcode_list[offset];
+ u32 flags = op->flags;
switch (LIGHTREC_FLAGS_GET_IO_MODE(flags)) {
case LIGHTREC_IO_RAM:
break;
default:
rec_io(state, block, offset, false, true);
- break;
+ return;
+ }
+
+ if (op->i.op == OP_LWC2) {
+ rec_cp2_do_mtc2(state, block, offset, op->i.rt, REG_CP2_TEMP);
+ lightrec_discard_reg_if_loaded(state->reg_cache, REG_CP2_TEMP);
}
}
static void rec_LW(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
+ union code c = block->opcode_list[offset].c;
jit_code_t code;
if (is_big_endian() && __WORDSIZE == 64)
else
code = jit_code_ldxi_i;
- _jit_name(block->_jit, __func__);
+ _jit_name(block->_jit, c.i.op == OP_LWC2 ? "rec_LWC2" : "rec_LW");
rec_load(state, block, offset, code, jit_code_bswapr_ui, false);
}
-static void rec_LWC2(struct lightrec_cstate *state, const struct block *block, u16 offset)
-{
- _jit_name(block->_jit, __func__);
- rec_io(state, block, offset, false, false);
-}
-
static void rec_break_syscall(struct lightrec_cstate *state,
const struct block *block, u16 offset,
u32 exit_code)
jit_state_t *_jit = block->_jit;
jit_note(__FILE__, __LINE__);
- lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, true);
+
+ if (c.i.op != OP_SWC2)
+ lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, true);
call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_MFC);
}
return cp2c_i_offset(reg) + is_big_endian() * 2;
}
-static void rec_cp2_basic_MFC2(struct lightrec_cstate *state,
- const struct block *block, u16 offset)
+static void rec_cp2_do_mfc2(struct lightrec_cstate *state,
+ const struct block *block, u16 offset,
+ u8 reg, u8 out_reg)
{
struct regcache *reg_cache = state->reg_cache;
- const union code c = block->opcode_list[offset].c;
jit_state_t *_jit = block->_jit;
const u32 zext_regs = 0x300f0080;
u8 rt, tmp, tmp2, tmp3, out, flags;
- u8 reg = c.r.rd == 15 ? 14 : c.r.rd;
unsigned int i;
_jit_name(block->_jit, __func__);
}
flags = (zext_regs & BIT(reg)) ? REG_ZEXT : REG_EXT;
- rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, flags);
+ rt = lightrec_alloc_reg_out(reg_cache, _jit, out_reg, flags);
+
+ if (reg == 15)
+ reg = 14;
switch (reg) {
case 1:
lightrec_free_reg(reg_cache, rt);
}
+static void rec_cp2_basic_MFC2(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
+{
+ const union code c = block->opcode_list[offset].c;
+
+ rec_cp2_do_mfc2(state, block, offset, c.r.rd, c.r.rt);
+}
+
static void rec_cp2_basic_CFC2(struct lightrec_cstate *state,
const struct block *block, u16 offset)
{
lightrec_free_reg(reg_cache, rt);
}
-static void rec_cp2_basic_MTC2(struct lightrec_cstate *state,
- const struct block *block, u16 offset)
+static void rec_cp2_do_mtc2(struct lightrec_cstate *state,
+ const struct block *block, u16 offset,
+ u8 reg, u8 in_reg)
{
struct regcache *reg_cache = state->reg_cache;
- const union code c = block->opcode_list[offset].c;
jit_state_t *_jit = block->_jit;
jit_node_t *loop, *to_loop;
u8 rt, tmp, tmp2, flags = 0;
return;
}
- if (c.r.rd == 31)
+ if (reg == 31)
return;
- if (c.r.rd == 30)
+ if (reg == 30)
flags |= REG_EXT;
- rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, flags);
+ rt = lightrec_alloc_reg_in(reg_cache, _jit, in_reg, flags);
- switch (c.r.rd) {
+ switch (reg) {
case 15:
tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
jit_ldxi_i(tmp, LIGHTREC_REG_STATE, cp2d_i_offset(13));
lightrec_free_reg(reg_cache, tmp2);
break;
default:
- jit_stxi_i(cp2d_i_offset(c.r.rd), LIGHTREC_REG_STATE, rt);
+ jit_stxi_i(cp2d_i_offset(reg), LIGHTREC_REG_STATE, rt);
break;
}
lightrec_free_reg(reg_cache, rt);
}
+static void rec_cp2_basic_MTC2(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
+{
+ const union code c = block->opcode_list[offset].c;
+
+ rec_cp2_do_mtc2(state, block, offset, c.r.rd, c.r.rt);
+}
+
static void rec_cp2_basic_CTC2(struct lightrec_cstate *state,
const struct block *block, u16 offset)
{
const struct block *block, u16 offset)
{
struct regcache *reg_cache = state->reg_cache;
- union code c = block->opcode_list[offset].c;
+ const struct opcode *op = &block->opcode_list[offset];
+ union code c = op->c;
jit_state_t *_jit = block->_jit;
+ bool unload_rd;
u8 rs, rd;
_jit_name(block->_jit, __func__);
jit_note(__FILE__, __LINE__);
- if (c.r.rs)
+
+ unload_rd = OPT_EARLY_UNLOAD
+ && LIGHTREC_FLAGS_GET_RD(op->flags) == LIGHTREC_REG_UNLOAD;
+
+ if (c.r.rs || unload_rd)
rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
- rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, REG_EXT);
- if (c.r.rs == 0)
- jit_movi(rd, 0);
- else
- jit_extr_i(rd, rs);
+ if (unload_rd) {
+ /* If the destination register will be unloaded right after the
+ * MOV meta-opcode, we don't actually need to write any host
+ * register - we can just store the source register directly to
+ * the register cache, at the offset corresponding to the
+ * destination register. */
+ lightrec_discard_reg_if_loaded(reg_cache, c.r.rd);
+
+ jit_stxi_i(offsetof(struct lightrec_state, regs.gpr)
+ + c.r.rd << 2, LIGHTREC_REG_STATE, rs);
- if (c.r.rs)
lightrec_free_reg(reg_cache, rs);
- lightrec_free_reg(reg_cache, rd);
+ } else {
+ rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, REG_EXT);
+
+ if (c.r.rs == 0)
+ jit_movi(rd, 0);
+ else
+ jit_extr_i(rd, rs);
+
+ lightrec_free_reg(reg_cache, rd);
+ }
+
+ if (c.r.rs || unload_rd)
+ lightrec_free_reg(reg_cache, rs);
}
static void rec_meta_EXTC_EXTS(struct lightrec_cstate *state,
[OP_SWL] = rec_SWL,
[OP_SW] = rec_SW,
[OP_SWR] = rec_SWR,
- [OP_LWC2] = rec_LWC2,
- [OP_SWC2] = rec_SWC2,
+ [OP_LWC2] = rec_LW,
+ [OP_SWC2] = rec_SW,
[OP_META_MOV] = rec_meta_MOV,
[OP_META_EXTC] = rec_meta_EXTC_EXTS,
struct lightrec_state *state = inter->state;
const struct opcode *op = inter->op;
- lightrec_mtc(state, op->c, state->regs.gpr[op->r.rt]);
+ lightrec_mtc(state, op->c, op->r.rd, state->regs.gpr[op->r.rt]);
/* If we have a MTC0 or CTC0 to CP0 register 12 (Status) or 13 (Cause),
* return early so that the emulator will be able to check software
#define REG_LO 32
#define REG_HI 33
+#define REG_CP2_TEMP (offsetof(struct lightrec_state, cp2_temp_reg) / sizeof(u32))
/* Definition of jit_state_t (avoids inclusion of <lightning.h>) */
struct jit_node;
struct lightrec_state {
struct lightrec_registers regs;
- uintptr_t wrapper_regs[NUM_TEMPS];
+ u32 cp2_temp_reg;
u32 next_pc;
+ uintptr_t wrapper_regs[NUM_TEMPS];
u32 current_cycle;
u32 target_cycle;
u32 exit_flags;
void remove_from_code_lut(struct blockcache *cache, struct block *block);
-enum psx_map
-lightrec_get_map_idx(struct lightrec_state *state, u32 kaddr);
-
const struct lightrec_mem_map *
lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr);
return block->pc + (offset + imm << 2);
}
-void lightrec_mtc(struct lightrec_state *state, union code op, u32 data);
+void lightrec_mtc(struct lightrec_state *state, union code op, u8 reg, u32 data);
u32 lightrec_mfc(struct lightrec_state *state, union code op);
void lightrec_rfe(struct lightrec_state *state);
void lightrec_cp(struct lightrec_state *state, union code op);
#endif
}
+static inline _Bool can_sign_extend(s32 value, u8 order)
+{
+ return (u32)(value >> order - 1) + 1 < 2;
+}
+
+static inline _Bool can_zero_extend(u32 value, u8 order)
+{
+ return (value >> order) == 0;
+}
+
#endif /* __LIGHTREC_PRIVATE_H__ */
}
}
-enum psx_map
+static enum psx_map
lightrec_get_map_idx(struct lightrec_state *state, u32 kaddr)
{
const struct lightrec_mem_map *map;
if (op.i.op == OP_CP0)
return state->regs.cp0[op.r.rd];
- else if (op.r.rs == OP_CP2_BASIC_MFC2)
+
+ if (op.i.op == OP_SWC2) {
+ val = lightrec_mfc2(state, op.i.rt);
+ } else if (op.r.rs == OP_CP2_BASIC_MFC2)
val = lightrec_mfc2(state, op.r.rd);
else {
val = state->regs.cp2c[op.r.rd];
{
u32 rt = lightrec_mfc(state, op);
- if (op.r.rt)
+ if (op.i.op == OP_SWC2)
+ state->cp2_temp_reg = rt;
+ else if (op.r.rt)
state->regs.gpr[op.r.rt] = rt;
}
}
}
-void lightrec_mtc(struct lightrec_state *state, union code op, u32 data)
+void lightrec_mtc(struct lightrec_state *state, union code op, u8 reg, u32 data)
{
if (op.i.op == OP_CP0) {
- lightrec_mtc0(state, op.r.rd, data);
+ lightrec_mtc0(state, reg, data);
} else {
- if (op.r.rs == OP_CP2_BASIC_CTC2)
- lightrec_ctc2(state, op.r.rd, data);
+ if (op.i.op == OP_LWC2 || op.r.rs != OP_CP2_BASIC_CTC2)
+ lightrec_mtc2(state, reg, data);
else
- lightrec_mtc2(state, op.r.rd, data);
+ lightrec_ctc2(state, reg, data);
if (state->ops.cop2_notify)
(*state->ops.cop2_notify)(state, op.opcode, data);
static void lightrec_mtc_cb(struct lightrec_state *state, u32 arg)
{
union code op = (union code) arg;
+ u32 data;
+ u8 reg;
+
+ if (op.i.op == OP_LWC2) {
+ data = state->cp2_temp_reg;
+ reg = op.i.rt;
+ } else {
+ data = state->regs.gpr[op.r.rt];
+ reg = op.r.rd;
+ }
- lightrec_mtc(state, op, state->regs.gpr[op.r.rt]);
+ lightrec_mtc(state, op, reg, data);
}
void lightrec_rfe(struct lightrec_state *state)
void *func;
int err;
- for (;;) {
+ do {
func = lut_read(state, lut_offset(pc));
if (func && func != state->get_next_block)
break;
} else {
lightrec_recompiler_add(state->rec, block);
}
-
- if (state->exit_flags != LIGHTREC_EXIT_NORMAL ||
- state->current_cycle >= state->target_cycle)
- break;
- }
+ } while (state->exit_flags == LIGHTREC_EXIT_NORMAL
+ && state->current_cycle < state->target_cycle);
state->next_pc = pc;
return func;
*size = (unsigned int) new_code_size;
+ if (state->ops.code_inv)
+ state->ops.code_inv(code, new_code_size);
+
return code;
}
jit_prolog();
jit_frame(256);
+ jit_getarg(LIGHTREC_REG_STATE, jit_arg());
+ jit_getarg(JIT_V0, jit_arg());
jit_getarg(JIT_V1, jit_arg());
jit_getarg_i(LIGHTREC_REG_CYCLE, jit_arg());
for (i = 0; i < NUM_REGS; i++)
jit_movr(JIT_V(i + FIRST_REG), JIT_V(i + FIRST_REG));
- /* Pass lightrec_state structure to blocks, using the last callee-saved
- * register that Lightning provides */
- jit_movi(LIGHTREC_REG_STATE, (intptr_t) state);
-
loop = jit_label();
/* Call the block's code */
jit_movr(LIGHTREC_REG_CYCLE, JIT_V0);
}
+ /* Reset JIT_V0 to the next PC */
+ jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE,
+ offsetof(struct lightrec_state, next_pc));
+
/* If we get non-NULL, loop */
jit_patch_at(jit_bnei(JIT_V1, 0), loop);
block->_jit = _jit;
lightrec_regcache_reset(cstate->reg_cache);
+ lightrec_preload_pc(cstate->reg_cache);
+
cstate->cycles = 0;
cstate->nb_local_branches = 0;
cstate->nb_targets = 0;
u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle)
{
- s32 (*func)(void *, s32) = (void *)state->dispatcher->function;
+ s32 (*func)(struct lightrec_state *, u32, void *, s32) = (void *)state->dispatcher->function;
void *block_trace;
s32 cycles_delta;
if (block_trace) {
cycles_delta = state->target_cycle - state->current_cycle;
- cycles_delta = (*func)(block_trace, cycles_delta);
+ cycles_delta = (*func)(state, state->next_pc,
+ block_trace, cycles_delta);
state->current_cycle = state->target_cycle - cycles_delta;
}
void (*cop2_op)(struct lightrec_state *state, u32 op);
void (*enable_ram)(struct lightrec_state *state, _Bool enable);
_Bool (*hw_direct)(u32 kaddr, _Bool is_write, u8 size);
+ void (*code_inv)(void *addr, u32 len);
};
struct lightrec_registers {
* Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
*/
+#include "constprop.h"
#include "lightrec-config.h"
#include "disassembler.h"
#include "lightrec.h"
for (i = offset; ; i++) {
c = list[i].c;
- if (opcode_reads_register(c, reg)) {
- if (i > 0 && has_delay_slot(list[i - 1].c))
- break;
-
+ if (opcode_reads_register(c, reg))
return i;
- }
- if (op_flag_sync(list[i].flags) ||
- has_delay_slot(c) || opcode_writes_register(c, reg))
+ if (op_flag_sync(list[i].flags)
+ || (op_flag_no_ds(list[i].flags) && has_delay_slot(c))
+ || is_delay_slot(list, i)
+ || opcode_writes_register(c, reg))
break;
}
{
unsigned int i;
- if (op_flag_sync(list[offset].flags))
+ if (op_flag_sync(list[offset].flags) || is_delay_slot(list, offset))
return false;
for (i = offset + 1; ; i++) {
return false;
}
-static u32 lightrec_propagate_consts(const struct opcode *op,
- const struct opcode *prev,
- u32 known, u32 *v)
+static void lightrec_optimize_sll_sra(struct opcode *list, unsigned int offset,
+ struct constprop_data *v)
{
- union code c = prev->c;
-
- /* Register $zero is always, well, zero */
- known |= BIT(0);
- v[0] = 0;
-
- if (op_flag_sync(op->flags))
- return BIT(0);
-
- switch (c.i.op) {
- case OP_SPECIAL:
- switch (c.r.op) {
- case OP_SPECIAL_SLL:
- if (known & BIT(c.r.rt)) {
- known |= BIT(c.r.rd);
- v[c.r.rd] = v[c.r.rt] << c.r.imm;
- } else {
- known &= ~BIT(c.r.rd);
- }
- break;
- case OP_SPECIAL_SRL:
- if (known & BIT(c.r.rt)) {
- known |= BIT(c.r.rd);
- v[c.r.rd] = v[c.r.rt] >> c.r.imm;
- } else {
- known &= ~BIT(c.r.rd);
- }
- break;
- case OP_SPECIAL_SRA:
- if (known & BIT(c.r.rt)) {
- known |= BIT(c.r.rd);
- v[c.r.rd] = (s32)v[c.r.rt] >> c.r.imm;
- } else {
- known &= ~BIT(c.r.rd);
- }
- break;
- case OP_SPECIAL_SLLV:
- if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
- known |= BIT(c.r.rd);
- v[c.r.rd] = v[c.r.rt] << (v[c.r.rs] & 0x1f);
- } else {
- known &= ~BIT(c.r.rd);
- }
- break;
- case OP_SPECIAL_SRLV:
- if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
- known |= BIT(c.r.rd);
- v[c.r.rd] = v[c.r.rt] >> (v[c.r.rs] & 0x1f);
- } else {
- known &= ~BIT(c.r.rd);
- }
- break;
- case OP_SPECIAL_SRAV:
- if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
- known |= BIT(c.r.rd);
- v[c.r.rd] = (s32)v[c.r.rt]
- >> (v[c.r.rs] & 0x1f);
- } else {
- known &= ~BIT(c.r.rd);
- }
- break;
- case OP_SPECIAL_ADD:
- case OP_SPECIAL_ADDU:
- if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
- known |= BIT(c.r.rd);
- v[c.r.rd] = (s32)v[c.r.rt] + (s32)v[c.r.rs];
- } else {
- known &= ~BIT(c.r.rd);
- }
- break;
- case OP_SPECIAL_SUB:
- case OP_SPECIAL_SUBU:
- if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
- known |= BIT(c.r.rd);
- v[c.r.rd] = v[c.r.rt] - v[c.r.rs];
- } else {
- known &= ~BIT(c.r.rd);
- }
- break;
- case OP_SPECIAL_AND:
- if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
- known |= BIT(c.r.rd);
- v[c.r.rd] = v[c.r.rt] & v[c.r.rs];
- } else {
- known &= ~BIT(c.r.rd);
- }
- break;
- case OP_SPECIAL_OR:
- if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
- known |= BIT(c.r.rd);
- v[c.r.rd] = v[c.r.rt] | v[c.r.rs];
- } else {
- known &= ~BIT(c.r.rd);
- }
- break;
- case OP_SPECIAL_XOR:
- if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
- known |= BIT(c.r.rd);
- v[c.r.rd] = v[c.r.rt] ^ v[c.r.rs];
- } else {
- known &= ~BIT(c.r.rd);
- }
- break;
- case OP_SPECIAL_NOR:
- if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
- known |= BIT(c.r.rd);
- v[c.r.rd] = ~(v[c.r.rt] | v[c.r.rs]);
- } else {
- known &= ~BIT(c.r.rd);
- }
- break;
- case OP_SPECIAL_SLT:
- if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
- known |= BIT(c.r.rd);
- v[c.r.rd] = (s32)v[c.r.rs] < (s32)v[c.r.rt];
- } else {
- known &= ~BIT(c.r.rd);
- }
- break;
- case OP_SPECIAL_SLTU:
- if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
- known |= BIT(c.r.rd);
- v[c.r.rd] = v[c.r.rs] < v[c.r.rt];
- } else {
- known &= ~BIT(c.r.rd);
- }
- break;
- case OP_SPECIAL_MULT:
- case OP_SPECIAL_MULTU:
- case OP_SPECIAL_DIV:
- case OP_SPECIAL_DIVU:
- if (OPT_FLAG_MULT_DIV && c.r.rd)
- known &= ~BIT(c.r.rd);
- if (OPT_FLAG_MULT_DIV && c.r.imm)
- known &= ~BIT(c.r.imm);
- break;
- case OP_SPECIAL_MFLO:
- case OP_SPECIAL_MFHI:
- known &= ~BIT(c.r.rd);
- break;
- default:
- break;
- }
- break;
- case OP_META_MULT2:
- case OP_META_MULTU2:
- if (OPT_FLAG_MULT_DIV && (known & BIT(c.r.rs))) {
- if (c.r.rd) {
- known |= BIT(c.r.rd);
-
- if (c.r.op < 32)
- v[c.r.rd] = v[c.r.rs] << c.r.op;
- else
- v[c.r.rd] = 0;
- }
-
- if (c.r.imm) {
- known |= BIT(c.r.imm);
-
- if (c.r.op >= 32)
- v[c.r.imm] = v[c.r.rs] << (c.r.op - 32);
- else if (c.i.op == OP_META_MULT2)
- v[c.r.imm] = (s32) v[c.r.rs] >> (32 - c.r.op);
- else
- v[c.r.imm] = v[c.r.rs] >> (32 - c.r.op);
- }
- } else {
- if (OPT_FLAG_MULT_DIV && c.r.rd)
- known &= ~BIT(c.r.rd);
- if (OPT_FLAG_MULT_DIV && c.r.imm)
- known &= ~BIT(c.r.imm);
- }
- break;
- case OP_REGIMM:
- break;
- case OP_ADDI:
- case OP_ADDIU:
- if (known & BIT(c.i.rs)) {
- known |= BIT(c.i.rt);
- v[c.i.rt] = v[c.i.rs] + (s32)(s16)c.i.imm;
- } else {
- known &= ~BIT(c.i.rt);
- }
- break;
- case OP_SLTI:
- if (known & BIT(c.i.rs)) {
- known |= BIT(c.i.rt);
- v[c.i.rt] = (s32)v[c.i.rs] < (s32)(s16)c.i.imm;
- } else {
- known &= ~BIT(c.i.rt);
- }
- break;
- case OP_SLTIU:
- if (known & BIT(c.i.rs)) {
- known |= BIT(c.i.rt);
- v[c.i.rt] = v[c.i.rs] < (u32)(s32)(s16)c.i.imm;
- } else {
- known &= ~BIT(c.i.rt);
- }
- break;
- case OP_ANDI:
- if (known & BIT(c.i.rs)) {
- known |= BIT(c.i.rt);
- v[c.i.rt] = v[c.i.rs] & c.i.imm;
- } else {
- known &= ~BIT(c.i.rt);
- }
- break;
- case OP_ORI:
- if (known & BIT(c.i.rs)) {
- known |= BIT(c.i.rt);
- v[c.i.rt] = v[c.i.rs] | c.i.imm;
- } else {
- known &= ~BIT(c.i.rt);
- }
- break;
- case OP_XORI:
- if (known & BIT(c.i.rs)) {
- known |= BIT(c.i.rt);
- v[c.i.rt] = v[c.i.rs] ^ c.i.imm;
- } else {
- known &= ~BIT(c.i.rt);
- }
- break;
- case OP_LUI:
- known |= BIT(c.i.rt);
- v[c.i.rt] = c.i.imm << 16;
- break;
- case OP_CP0:
- switch (c.r.rs) {
- case OP_CP0_MFC0:
- case OP_CP0_CFC0:
- known &= ~BIT(c.r.rt);
- break;
- }
- break;
- case OP_CP2:
- if (c.r.op == OP_CP2_BASIC) {
- switch (c.r.rs) {
- case OP_CP2_BASIC_MFC2:
- case OP_CP2_BASIC_CFC2:
- known &= ~BIT(c.r.rt);
- break;
- }
- }
- break;
- case OP_LB:
- case OP_LH:
- case OP_LWL:
- case OP_LW:
- case OP_LBU:
- case OP_LHU:
- case OP_LWR:
- case OP_LWC2:
- known &= ~BIT(c.i.rt);
- break;
- case OP_META_MOV:
- if (known & BIT(c.r.rs)) {
- known |= BIT(c.r.rd);
- v[c.r.rd] = v[c.r.rs];
- } else {
- known &= ~BIT(c.r.rd);
- }
- break;
- case OP_META_EXTC:
- if (known & BIT(c.i.rs)) {
- known |= BIT(c.i.rt);
- v[c.i.rt] = (s32)(s8)v[c.i.rs];
- } else {
- known &= ~BIT(c.i.rt);
- }
- break;
- case OP_META_EXTS:
- if (known & BIT(c.i.rs)) {
- known |= BIT(c.i.rt);
- v[c.i.rt] = (s32)(s16)v[c.i.rs];
- } else {
- known &= ~BIT(c.i.rt);
- }
- break;
- default:
- break;
- }
-
- return known;
-}
-
-static void lightrec_optimize_sll_sra(struct opcode *list, unsigned int offset)
-{
- struct opcode *prev, *prev2 = NULL, *curr = &list[offset];
+ struct opcode *ldop = NULL, *curr = &list[offset], *next;
struct opcode *to_change, *to_nop;
int idx, idx2;
if (curr->r.imm != 24 && curr->r.imm != 16)
return;
- idx = find_prev_writer(list, offset, curr->r.rt);
+ if (is_delay_slot(list, offset))
+ return;
+
+ idx = find_next_reader(list, offset + 1, curr->r.rd);
if (idx < 0)
return;
- prev = &list[idx];
+ next = &list[idx];
- if (prev->i.op != OP_SPECIAL || prev->r.op != OP_SPECIAL_SLL ||
- prev->r.imm != curr->r.imm || prev->r.rd != curr->r.rt)
+ if (next->i.op != OP_SPECIAL || next->r.op != OP_SPECIAL_SRA ||
+ next->r.imm != curr->r.imm || next->r.rt != curr->r.rd)
return;
- if (prev->r.rd != prev->r.rt && curr->r.rd != curr->r.rt) {
+ if (curr->r.rd != curr->r.rt && next->r.rd != next->r.rt) {
/* sll rY, rX, 16
* ...
- * srl rZ, rY, 16 */
+ * sra rZ, rY, 16 */
- if (!reg_is_dead(list, offset, curr->r.rt) ||
- reg_is_read_or_written(list, idx, offset, curr->r.rd))
+ if (!reg_is_dead(list, idx, curr->r.rd) ||
+ reg_is_read_or_written(list, offset, idx, next->r.rd))
return;
/* If rY is dead after the SRL, and rZ is not used after the SLL,
* we can change rY to rZ */
pr_debug("Detected SLL/SRA with middle temp register\n");
- prev->r.rd = curr->r.rd;
- curr->r.rt = prev->r.rd;
+ curr->r.rd = next->r.rd;
+ next->r.rt = curr->r.rd;
}
- /* We got a SLL/SRA combo. If imm #16, that's a cast to u16.
- * If imm #24 that's a cast to u8.
+ /* We got a SLL/SRA combo. If imm #16, that's a cast to s16.
+ * If imm #24 that's a cast to s8.
*
* First of all, make sure that the target register of the SLL is not
- * read before the SRA. */
+ * read after the SRA. */
- if (prev->r.rd == prev->r.rt) {
+ if (curr->r.rd == curr->r.rt) {
/* sll rX, rX, 16
* ...
- * srl rY, rX, 16 */
- to_change = curr;
- to_nop = prev;
+ * sra rY, rX, 16 */
+ to_change = next;
+ to_nop = curr;
/* rX is used after the SRA - we cannot convert it. */
- if (prev->r.rd != curr->r.rd && !reg_is_dead(list, offset, prev->r.rd))
+ if (curr->r.rd != next->r.rd && !reg_is_dead(list, idx, curr->r.rd))
return;
} else {
/* sll rY, rX, 16
* ...
- * srl rY, rY, 16 */
- to_change = prev;
- to_nop = curr;
+ * sra rY, rY, 16 */
+ to_change = curr;
+ to_nop = next;
}
- idx2 = find_prev_writer(list, idx, prev->r.rt);
+ idx2 = find_prev_writer(list, offset, curr->r.rt);
if (idx2 >= 0) {
/* Note that PSX games sometimes do casts after
* a LHU or LBU; in this case we can change the
* load opcode to a LH or LB, and the cast can
* be changed to a MOV or a simple NOP. */
- prev2 = &list[idx2];
+ ldop = &list[idx2];
- if (curr->r.rd != prev2->i.rt &&
- !reg_is_dead(list, offset, prev2->i.rt))
- prev2 = NULL;
- else if (curr->r.imm == 16 && prev2->i.op == OP_LHU)
- prev2->i.op = OP_LH;
- else if (curr->r.imm == 24 && prev2->i.op == OP_LBU)
- prev2->i.op = OP_LB;
+ if (next->r.rd != ldop->i.rt &&
+ !reg_is_dead(list, idx, ldop->i.rt))
+ ldop = NULL;
+ else if (curr->r.imm == 16 && ldop->i.op == OP_LHU)
+ ldop->i.op = OP_LH;
+ else if (curr->r.imm == 24 && ldop->i.op == OP_LBU)
+ ldop->i.op = OP_LB;
else
- prev2 = NULL;
+ ldop = NULL;
- if (prev2) {
- if (curr->r.rd == prev2->i.rt) {
+ if (ldop) {
+ if (next->r.rd == ldop->i.rt) {
to_change->opcode = 0;
- } else if (reg_is_dead(list, offset, prev2->i.rt) &&
- !reg_is_read_or_written(list, idx2 + 1, offset, curr->r.rd)) {
+ } else if (reg_is_dead(list, idx, ldop->i.rt) &&
+ !reg_is_read_or_written(list, idx2 + 1, idx, next->r.rd)) {
/* The target register of the SRA is dead after the
* LBU/LHU; we can change the target register of the
* LBU/LHU to the one of the SRA. */
- prev2->i.rt = curr->r.rd;
+ v[ldop->i.rt].known = 0;
+ v[ldop->i.rt].sign = 0;
+ ldop->i.rt = next->r.rd;
to_change->opcode = 0;
} else {
to_change->i.op = OP_META_MOV;
- to_change->r.rd = curr->r.rd;
- to_change->r.rs = prev2->i.rt;
+ to_change->r.rd = next->r.rd;
+ to_change->r.rs = ldop->i.rt;
}
if (to_nop->r.imm == 24)
pr_debug("Convert LBU+SLL+SRA to LB\n");
else
pr_debug("Convert LHU+SLL+SRA to LH\n");
+
+ v[ldop->i.rt].known = 0;
+ v[ldop->i.rt].sign = 0xffffff80 << 24 - curr->r.imm;
}
}
- if (!prev2) {
+ if (!ldop) {
pr_debug("Convert SLL/SRA #%u to EXT%c\n",
- prev->r.imm,
- prev->r.imm == 24 ? 'C' : 'S');
+ curr->r.imm, curr->r.imm == 24 ? 'C' : 'S');
- if (to_change == prev) {
- to_change->i.rs = prev->r.rt;
- to_change->i.rt = curr->r.rd;
+ if (to_change == curr) {
+ to_change->i.rs = curr->r.rt;
+ to_change->i.rt = next->r.rd;
} else {
- to_change->i.rt = curr->r.rd;
- to_change->i.rs = prev->r.rt;
+ to_change->i.rt = next->r.rd;
+ to_change->i.rs = curr->r.rt;
}
if (to_nop->r.imm == 24)
to_nop->opcode = 0;
}
-static void lightrec_remove_useless_lui(struct block *block, unsigned int offset,
- u32 known, u32 *values)
+static void
+lightrec_remove_useless_lui(struct block *block, unsigned int offset,
+ const struct constprop_data *v)
{
struct opcode *list = block->opcode_list,
*op = &block->opcode_list[offset];
int reader;
- if (!op_flag_sync(op->flags) && (known & BIT(op->i.rt)) &&
- values[op->i.rt] == op->i.imm << 16) {
+ if (!op_flag_sync(op->flags) && is_known(v, op->i.rt) &&
+ v[op->i.rt].value == op->i.imm << 16) {
pr_debug("Converting duplicated LUI to NOP\n");
op->opcode = 0x0;
return;
}
- if (op->i.imm != 0 || op->i.rt == 0)
+ if (op->i.imm != 0 || op->i.rt == 0 || offset == block->nb_ops - 1)
return;
reader = find_next_reader(list, offset + 1, op->i.rt);
return popcount32(value) == 1;
}
+static void lightrec_patch_known_zero(struct opcode *op,
+ const struct constprop_data *v)
+{
+ switch (op->i.op) {
+ case OP_SPECIAL:
+ switch (op->r.op) {
+ case OP_SPECIAL_JR:
+ case OP_SPECIAL_JALR:
+ case OP_SPECIAL_MTHI:
+ case OP_SPECIAL_MTLO:
+ if (is_known_zero(v, op->r.rs))
+ op->r.rs = 0;
+ break;
+ default:
+ if (is_known_zero(v, op->r.rs))
+ op->r.rs = 0;
+ fallthrough;
+ case OP_SPECIAL_SLL:
+ case OP_SPECIAL_SRL:
+ case OP_SPECIAL_SRA:
+ if (is_known_zero(v, op->r.rt))
+ op->r.rt = 0;
+ break;
+ case OP_SPECIAL_SYSCALL:
+ case OP_SPECIAL_BREAK:
+ case OP_SPECIAL_MFHI:
+ case OP_SPECIAL_MFLO:
+ break;
+ }
+ break;
+ case OP_CP0:
+ switch (op->r.rs) {
+ case OP_CP0_MTC0:
+ case OP_CP0_CTC0:
+ if (is_known_zero(v, op->r.rt))
+ op->r.rt = 0;
+ break;
+ default:
+ break;
+ }
+ break;
+ case OP_CP2:
+ if (op->r.op == OP_CP2_BASIC) {
+ switch (op->r.rs) {
+ case OP_CP2_BASIC_MTC2:
+ case OP_CP2_BASIC_CTC2:
+ if (is_known_zero(v, op->r.rt))
+ op->r.rt = 0;
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+ case OP_BEQ:
+ case OP_BNE:
+ if (is_known_zero(v, op->i.rt))
+ op->i.rt = 0;
+ fallthrough;
+ case OP_REGIMM:
+ case OP_BLEZ:
+ case OP_BGTZ:
+ case OP_ADDI:
+ case OP_ADDIU:
+ case OP_SLTI:
+ case OP_SLTIU:
+ case OP_ANDI:
+ case OP_ORI:
+ case OP_XORI:
+ case OP_META_MOV:
+ case OP_META_EXTC:
+ case OP_META_EXTS:
+ case OP_META_MULT2:
+ case OP_META_MULTU2:
+ if (is_known_zero(v, op->i.rs))
+ op->i.rs = 0;
+ break;
+ case OP_SB:
+ case OP_SH:
+ case OP_SWL:
+ case OP_SW:
+ case OP_SWR:
+ if (is_known_zero(v, op->i.rt))
+ op->i.rt = 0;
+ fallthrough;
+ case OP_LB:
+ case OP_LH:
+ case OP_LWL:
+ case OP_LW:
+ case OP_LBU:
+ case OP_LHU:
+ case OP_LWR:
+ case OP_LWC2:
+ case OP_SWC2:
+ if (is_known(v, op->i.rs)
+ && kunseg(v[op->i.rs].value) == 0)
+ op->i.rs = 0;
+ break;
+ default:
+ break;
+ }
+}
+
+static void lightrec_reset_syncs(struct block *block)
+{
+ struct opcode *op, *list = block->opcode_list;
+ unsigned int i;
+ s32 offset;
+
+ for (i = 0; i < block->nb_ops; i++)
+ list[i].flags &= ~LIGHTREC_SYNC;
+
+ for (i = 0; i < block->nb_ops; i++) {
+ op = &list[i];
+
+ if (op_flag_local_branch(op->flags) && has_delay_slot(op->c)) {
+ offset = i + 1 + (s16)op->i.imm;
+ list[offset].flags |= LIGHTREC_SYNC;
+ }
+ }
+}
+
static int lightrec_transform_ops(struct lightrec_state *state, struct block *block)
{
- struct opcode *list = block->opcode_list;
- struct opcode *prev, *op = NULL;
- u32 known = BIT(0);
- u32 values[32] = { 0 };
+ struct opcode *op, *list = block->opcode_list;
+ struct constprop_data v[32] = LIGHTREC_CONSTPROP_INITIALIZER;
unsigned int i;
+ bool local;
u8 tmp;
for (i = 0; i < block->nb_ops; i++) {
- prev = op;
op = &list[i];
- if (prev)
- known = lightrec_propagate_consts(op, prev, known, values);
+ lightrec_consts_propagate(list, i, v);
+
+ lightrec_patch_known_zero(op, v);
/* Transform all opcodes detected as useless to real NOPs
* (0x0: SLL r0, r0, #0) */
switch (op->i.op) {
case OP_BEQ:
- if (op->i.rs == op->i.rt) {
+ if (op->i.rs == op->i.rt ||
+ (is_known(v, op->i.rs) && is_known(v, op->i.rt) &&
+ v[op->i.rs].value == v[op->i.rt].value)) {
+ if (op->i.rs != op->i.rt)
+ pr_debug("Found always-taken BEQ\n");
+
op->i.rs = 0;
op->i.rt = 0;
+ } else if (v[op->i.rs].known & v[op->i.rt].known &
+ (v[op->i.rs].value ^ v[op->i.rt].value)) {
+ pr_debug("Found never-taken BEQ\n");
+
+ local = op_flag_local_branch(op->flags);
+ op->opcode = 0;
+ op->flags = 0;
+
+ if (local)
+ lightrec_reset_syncs(block);
} else if (op->i.rs == 0) {
op->i.rs = op->i.rt;
op->i.rt = 0;
break;
case OP_BNE:
- if (op->i.rs == 0) {
+ if (v[op->i.rs].known & v[op->i.rt].known &
+ (v[op->i.rs].value ^ v[op->i.rt].value)) {
+ pr_debug("Found always-taken BNE\n");
+
+ op->i.op = OP_BEQ;
+ op->i.rs = 0;
+ op->i.rt = 0;
+ } else if (is_known(v, op->i.rs) && is_known(v, op->i.rt) &&
+ v[op->i.rs].value == v[op->i.rt].value) {
+ pr_debug("Found never-taken BNE\n");
+
+ local = op_flag_local_branch(op->flags);
+ op->opcode = 0;
+ op->flags = 0;
+
+ if (local)
+ lightrec_reset_syncs(block);
+ } else if (op->i.rs == 0) {
op->i.rs = op->i.rt;
op->i.rt = 0;
}
break;
+ case OP_BLEZ:
+ if (v[op->i.rs].known & BIT(31) &&
+ v[op->i.rs].value & BIT(31)) {
+ pr_debug("Found always-taken BLEZ\n");
+
+ op->i.op = OP_BEQ;
+ op->i.rs = 0;
+ op->i.rt = 0;
+ }
+ break;
+
+ case OP_BGTZ:
+ if (v[op->i.rs].known & BIT(31) &&
+ v[op->i.rs].value & BIT(31)) {
+ pr_debug("Found never-taken BGTZ\n");
+
+ local = op_flag_local_branch(op->flags);
+ op->opcode = 0;
+ op->flags = 0;
+
+ if (local)
+ lightrec_reset_syncs(block);
+ }
+ break;
+
case OP_LUI:
- if (!prev || !has_delay_slot(prev->c))
+ if (i == 0 || !has_delay_slot(list[i - 1].c))
lightrec_modify_lui(block, i);
- lightrec_remove_useless_lui(block, i, known, values);
+ lightrec_remove_useless_lui(block, i, v);
break;
/* Transform ORI/ADDI/ADDIU with imm #0 or ORR/ADD/ADDU/SUB/SUBU
op->r.rd = op->i.rt;
}
break;
+ case OP_ANDI:
+ if (bits_are_known_zero(v, op->i.rs, ~op->i.imm)) {
+ pr_debug("Found useless ANDI 0x%x\n", op->i.imm);
+
+ if (op->i.rs == op->i.rt) {
+ op->opcode = 0;
+ } else {
+ op->i.op = OP_META_MOV;
+ op->r.rd = op->i.rt;
+ }
+ }
+ break;
+ case OP_REGIMM:
+ switch (op->r.rt) {
+ case OP_REGIMM_BLTZ:
+ case OP_REGIMM_BGEZ:
+ if (!(v[op->r.rs].known & BIT(31)))
+ break;
+
+ if (!!(v[op->r.rs].value & BIT(31))
+ ^ (op->r.rt == OP_REGIMM_BGEZ)) {
+ pr_debug("Found always-taken BLTZ/BGEZ\n");
+ op->i.op = OP_BEQ;
+ op->i.rs = 0;
+ op->i.rt = 0;
+ } else {
+ pr_debug("Found never-taken BLTZ/BGEZ\n");
+
+ local = op_flag_local_branch(op->flags);
+ op->opcode = 0;
+ op->flags = 0;
+
+ if (local)
+ lightrec_reset_syncs(block);
+ }
+ break;
+ case OP_REGIMM_BLTZAL:
+ case OP_REGIMM_BGEZAL:
+ /* TODO: Detect always-taken and replace with JAL */
+ break;
+ }
+ break;
case OP_SPECIAL:
switch (op->r.op) {
+ case OP_SPECIAL_SRAV:
+ if ((v[op->r.rs].known & 0x1f) != 0x1f)
+ break;
+
+ pr_debug("Convert SRAV to SRA\n");
+ op->r.imm = v[op->r.rs].value & 0x1f;
+ op->r.op = OP_SPECIAL_SRA;
+
+ fallthrough;
case OP_SPECIAL_SRA:
if (op->r.imm == 0) {
pr_debug("Convert SRA #0 to MOV\n");
op->r.rs = op->r.rt;
break;
}
-
- lightrec_optimize_sll_sra(block->opcode_list, i);
break;
+
+ case OP_SPECIAL_SLLV:
+ if ((v[op->r.rs].known & 0x1f) != 0x1f)
+ break;
+
+ pr_debug("Convert SLLV to SLL\n");
+ op->r.imm = v[op->r.rs].value & 0x1f;
+ op->r.op = OP_SPECIAL_SLL;
+
+ fallthrough;
case OP_SPECIAL_SLL:
+ if (op->r.imm == 0) {
+ pr_debug("Convert SLL #0 to MOV\n");
+ op->i.op = OP_META_MOV;
+ op->r.rs = op->r.rt;
+ }
+
+ lightrec_optimize_sll_sra(block->opcode_list, i, v);
+ break;
+
+ case OP_SPECIAL_SRLV:
+ if ((v[op->r.rs].known & 0x1f) != 0x1f)
+ break;
+
+ pr_debug("Convert SRLV to SRL\n");
+ op->r.imm = v[op->r.rs].value & 0x1f;
+ op->r.op = OP_SPECIAL_SRL;
+
+ fallthrough;
case OP_SPECIAL_SRL:
if (op->r.imm == 0) {
- pr_debug("Convert SLL/SRL #0 to MOV\n");
+ pr_debug("Convert SRL #0 to MOV\n");
op->i.op = OP_META_MOV;
op->r.rs = op->r.rt;
}
break;
+
case OP_SPECIAL_MULT:
case OP_SPECIAL_MULTU:
- if ((known & BIT(op->r.rs)) &&
- is_power_of_two(values[op->r.rs])) {
+ if (is_known(v, op->r.rs) &&
+ is_power_of_two(v[op->r.rs].value)) {
tmp = op->c.i.rs;
op->c.i.rs = op->c.i.rt;
op->c.i.rt = tmp;
- } else if (!(known & BIT(op->r.rt)) ||
- !is_power_of_two(values[op->r.rt])) {
+ } else if (!is_known(v, op->r.rt) ||
+ !is_power_of_two(v[op->r.rt].value)) {
break;
}
pr_debug("Multiply by power-of-two: %u\n",
- values[op->r.rt]);
+ v[op->r.rt].value);
if (op->r.op == OP_SPECIAL_MULT)
op->i.op = OP_META_MULT2;
else
op->i.op = OP_META_MULTU2;
- op->r.op = ctz32(values[op->r.rt]);
+ op->r.op = ctz32(v[op->r.rt].value);
break;
case OP_SPECIAL_OR:
case OP_SPECIAL_ADD:
op.opcode == 0 || next_op.opcode == 0)
continue;
- if (i && has_delay_slot(block->opcode_list[i - 1].c) &&
- !op_flag_no_ds(block->opcode_list[i - 1].flags))
+ if (is_delay_slot(block->opcode_list, i))
continue;
if (op_flag_sync(next->flags))
continue;
}
- pr_debug("Adding sync at offset 0x%x\n", offset << 2);
-
- block->opcode_list[offset].flags |= LIGHTREC_SYNC;
list->flags |= LIGHTREC_LOCAL_BRANCH;
}
+ lightrec_reset_syncs(block);
+
return 0;
}
}
}
+bool is_delay_slot(const struct opcode *list, unsigned int offset)
+{
+ return offset > 0
+ && !op_flag_no_ds(list[offset - 1].flags)
+ && has_delay_slot(list[offset - 1].c);
+}
+
bool should_emulate(const struct opcode *list)
{
return op_flag_emulate_branch(list->flags) && has_delay_slot(list->c);
static int lightrec_flag_io(struct lightrec_state *state, struct block *block)
{
- struct opcode *prev = NULL, *list = NULL;
+ struct opcode *list;
enum psx_map psx_map;
- u32 known = BIT(0);
- u32 values[32] = { 0 };
+ struct constprop_data v[32] = LIGHTREC_CONSTPROP_INITIALIZER;
unsigned int i;
u32 val, kunseg_val;
bool no_mask;
for (i = 0; i < block->nb_ops; i++) {
- prev = list;
list = &block->opcode_list[i];
- if (prev)
- known = lightrec_propagate_consts(list, prev, known, values);
+ lightrec_consts_propagate(block->opcode_list, i, v);
switch (list->i.op) {
case OP_SB:
/* Detect writes whose destination address is inside the
* current block, using constant propagation. When these
* occur, we mark the blocks as not compilable. */
- if ((known & BIT(list->i.rs)) &&
- kunseg(values[list->i.rs]) >= kunseg(block->pc) &&
- kunseg(values[list->i.rs]) < (kunseg(block->pc) +
- block->nb_ops * 4)) {
+ if (is_known(v, list->i.rs) &&
+ kunseg(v[list->i.rs].value) >= kunseg(block->pc) &&
+ kunseg(v[list->i.rs].value) < (kunseg(block->pc) +
+ block->nb_ops * 4)) {
pr_debug("Self-modifying block detected\n");
block_set_flags(block, BLOCK_NEVER_COMPILE);
list->flags |= LIGHTREC_SMC;
case OP_LWL:
case OP_LWR:
case OP_LWC2:
- if (OPT_FLAG_IO && (known & BIT(list->i.rs))) {
- val = values[list->i.rs] + (s16) list->i.imm;
- kunseg_val = kunseg(val);
- psx_map = lightrec_get_map_idx(state, kunseg_val);
+ if (OPT_FLAG_IO &&
+ (v[list->i.rs].known | v[list->i.rs].sign)) {
+ psx_map = lightrec_get_constprop_map(state, v,
+ list->i.rs,
+ (s16) list->i.imm);
+
+ if (psx_map != PSX_MAP_UNKNOWN && !is_known(v, list->i.rs))
+ pr_debug("Detected map thanks to bit-level const propagation!\n");
list->flags &= ~LIGHTREC_IO_MASK;
- no_mask = val == kunseg_val;
+
+ val = v[list->i.rs].value + (s16) list->i.imm;
+ kunseg_val = kunseg(val);
+
+ no_mask = (v[list->i.rs].known & ~v[list->i.rs].value
+ & 0xe0000000) == 0xe0000000;
switch (psx_map) {
case PSX_MAP_KERNEL_USER_RAM:
if (no_mask)
list->flags |= LIGHTREC_NO_MASK;
- break;
+ } else {
+ pr_debug("Flagging opcode %u as I/O access\n",
+ i);
+ list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW);
}
- fallthrough;
+ break;
default:
- pr_debug("Flagging opcode %u as I/O access\n",
- i);
- list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW);
break;
}
}
static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *block)
{
- struct opcode *prev, *list = NULL;
+ struct opcode *list = NULL;
+ struct constprop_data v[32] = LIGHTREC_CONSTPROP_INITIALIZER;
u8 reg_hi, reg_lo;
unsigned int i;
- u32 known = BIT(0);
- u32 values[32] = { 0 };
for (i = 0; i < block->nb_ops - 1; i++) {
- prev = list;
list = &block->opcode_list[i];
- if (prev)
- known = lightrec_propagate_consts(list, prev, known, values);
+ lightrec_consts_propagate(block->opcode_list, i, v);
switch (list->i.op) {
case OP_SPECIAL:
/* If we are dividing by a non-zero constant, don't
* emit the div-by-zero check. */
if (lightrec_always_skip_div_check() ||
- ((known & BIT(list->c.r.rt)) && values[list->c.r.rt]))
+ (v[list->r.rt].known & v[list->r.rt].value)) {
list->flags |= LIGHTREC_NO_DIV_CHECK;
+ }
fallthrough;
case OP_SPECIAL_MULT:
case OP_SPECIAL_MULTU:
}
/* Don't support opcodes in delay slots */
- if ((i && has_delay_slot(block->opcode_list[i - 1].c)) ||
+ if (is_delay_slot(block->opcode_list, i) ||
op_flag_no_ds(list->flags)) {
continue;
}
_Bool opcode_reads_register(union code op, u8 reg);
_Bool opcode_writes_register(union code op, u8 reg);
_Bool has_delay_slot(union code op);
+_Bool is_delay_slot(const struct opcode *list, unsigned int offset);
_Bool load_in_delay_slot(union code op);
_Bool opcode_is_io(union code op);
_Bool is_unconditional_jump(union code c);
#include <stdbool.h>
#include <stddef.h>
+#define REG_PC (offsetof(struct lightrec_state, next_pc) / sizeof(u32))
+
enum reg_priority {
REG_IS_TEMP,
REG_IS_TEMP_VALUE,
struct native_register {
bool used, output, extend, extended,
zero_extend, zero_extended, locked;
- s8 emulated_register;
+ s16 emulated_register;
intptr_t value;
enum reg_priority prio;
};
return false;
}
-static inline s8 lightrec_get_hardwired_reg(u8 reg)
+static inline s8 lightrec_get_hardwired_reg(u16 reg)
{
#if defined(__mips__) || defined(__alpha__) || defined(__riscv)
if (reg == 0)
for (i = ARRAY_SIZE(cache->lightrec_regs); i; i--) {
elm = &cache->lightrec_regs[i - 1];
- if (!elm->used && elm->prio < best) {
+ if (!elm->used && !elm->locked && elm->prio < best) {
nreg = elm;
best = elm->prio;
}
static struct native_register * find_mapped_reg(struct regcache *cache,
- u8 reg, bool out)
+ u16 reg, bool out)
{
unsigned int i;
}
static struct native_register * alloc_in_out(struct regcache *cache,
- u8 reg, bool out)
+ u16 reg, bool out)
{
struct native_register *elm, *nreg = NULL;
enum reg_priority best = REG_NB_PRIORITIES;
for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) {
elm = &cache->lightrec_regs[i];
- if (!elm->used && elm->prio < best) {
+ if (!elm->used && !elm->locked && elm->prio < best) {
nreg = elm;
best = elm->prio;
lightning_reg_to_lightrec(cache, jit_reg), jit_reg);
}
-/* lightrec_lock_reg: the register will be cleaned if dirty, then locked.
- * A locked register cannot only be used as input, not output. */
-void lightrec_lock_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg)
-{
- struct native_register *reg;
-
- if (lightrec_reg_is_zero(jit_reg))
- return;
-
- reg = lightning_reg_to_lightrec(cache, jit_reg);
- lightrec_clean_reg(cache, _jit, jit_reg);
-
- reg->locked = true;
-}
-
u8 lightrec_alloc_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg)
{
struct native_register *reg;
}
u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit,
- u8 reg, u8 flags)
+ u16 reg, u8 flags)
{
struct native_register *nreg;
u8 jit_reg;
}
u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit,
- u8 reg, u8 flags)
+ u16 reg, u8 flags)
{
struct native_register *nreg;
u8 jit_reg;
return jit_reg;
}
-u8 lightrec_request_reg_in(struct regcache *cache, jit_state_t *_jit,
- u8 reg, u8 jit_reg)
+static bool reg_pc_is_mapped(struct regcache *cache)
{
- struct native_register *nreg;
+ struct native_register *nreg = lightning_reg_to_lightrec(cache, JIT_V0);
+
+ return nreg->prio == REG_IS_LOADED && nreg->emulated_register == REG_PC;
+}
+
+void lightrec_load_imm(struct regcache *cache,
+ jit_state_t *_jit, u8 jit_reg, u32 pc, u32 imm)
+{
+ s32 delta = imm - pc;
+
+ if (!reg_pc_is_mapped(cache) || !can_sign_extend(delta, 16))
+ jit_movi(jit_reg, imm);
+ else if (jit_reg != JIT_V0 || delta)
+ jit_addi(jit_reg, JIT_V0, delta);
+}
+
+void lightrec_load_next_pc_imm(struct regcache *cache,
+ jit_state_t *_jit, u32 pc, u32 imm)
+{
+ struct native_register *nreg = lightning_reg_to_lightrec(cache, JIT_V0);
+
+ if (reg_pc_is_mapped(cache)) {
+ /* JIT_V0 contains next PC - so we can overwrite it */
+ lightrec_load_imm(cache, _jit, JIT_V0, pc, imm);
+ } else {
+ /* JIT_V0 contains something else - invalidate it */
+ lightrec_unload_reg(cache, _jit, JIT_V0);
+
+ jit_movi(JIT_V0, imm);
+ }
+
+ nreg->prio = REG_IS_LOADED;
+ nreg->emulated_register = -1;
+ nreg->locked = true;
+}
+
+void lightrec_load_next_pc(struct regcache *cache, jit_state_t *_jit, u8 reg)
+{
+ struct native_register *nreg_v0, *nreg;
u16 offset;
+ u8 jit_reg;
+
+ /* Invalidate JIT_V0 if it is not mapped to 'reg' */
+ nreg_v0 = lightning_reg_to_lightrec(cache, JIT_V0);
+ if (nreg_v0->prio >= REG_IS_LOADED && nreg_v0->emulated_register != reg)
+ lightrec_unload_nreg(cache, _jit, nreg_v0, JIT_V0);
nreg = find_mapped_reg(cache, reg, false);
- if (nreg) {
- jit_reg = lightrec_reg_to_lightning(cache, nreg);
- nreg->used = true;
- return jit_reg;
- }
+ if (!nreg) {
+ /* Not mapped - load the value from the register cache */
- nreg = lightning_reg_to_lightrec(cache, jit_reg);
- lightrec_unload_nreg(cache, _jit, nreg, jit_reg);
+ offset = offsetof(struct lightrec_state, regs.gpr) + (reg << 2);
+ jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE, offset);
- /* Load previous value from register cache */
- offset = offsetof(struct lightrec_state, regs.gpr) + (reg << 2);
- jit_ldxi_i(jit_reg, LIGHTREC_REG_STATE, offset);
+ nreg_v0->prio = REG_IS_LOADED;
+ nreg_v0->emulated_register = reg;
- nreg->extended = true;
- nreg->zero_extended = false;
- nreg->used = true;
- nreg->emulated_register = reg;
- nreg->prio = REG_IS_LOADED;
+ } else if (nreg == nreg_v0) {
+ /* The target register 'reg' is mapped to JIT_V0 */
- return jit_reg;
+ if (!nreg->zero_extended)
+ jit_extr_ui(JIT_V0, JIT_V0);
+
+ } else {
+ /* The target register 'reg' is mapped elsewhere. In that case,
+ * move the register's value to JIT_V0 and re-map it in the
+ * register cache. We can then safely discard the original
+ * mapped register (even if it was dirty). */
+
+ jit_reg = lightrec_reg_to_lightning(cache, nreg);
+ if (nreg->zero_extended)
+ jit_movr(JIT_V0, jit_reg);
+ else
+ jit_extr_ui(JIT_V0, jit_reg);
+
+ *nreg_v0 = *nreg;
+ lightrec_discard_nreg(nreg);
+ }
+
+ lightrec_clean_reg(cache, _jit, JIT_V0);
+
+ nreg_v0->zero_extended = true;
+ nreg_v0->locked = true;
}
static void free_reg(struct native_register *nreg)
}
void lightrec_clean_reg_if_loaded(struct regcache *cache, jit_state_t *_jit,
- u8 reg, bool unload)
+ u16 reg, bool unload)
{
struct native_register *nreg;
u8 jit_reg;
}
}
-void lightrec_discard_reg_if_loaded(struct regcache *cache, u8 reg)
+void lightrec_discard_reg_if_loaded(struct regcache *cache, u16 reg)
{
struct native_register *nreg;
memset(&cache->lightrec_regs, 0, sizeof(cache->lightrec_regs));
}
+void lightrec_preload_pc(struct regcache *cache)
+{
+ struct native_register *nreg;
+
+ /* The block's PC is loaded in JIT_V0 at the start of the block */
+ nreg = lightning_reg_to_lightrec(cache, JIT_V0);
+ nreg->emulated_register = REG_PC;
+ nreg->prio = REG_IS_LOADED;
+ nreg->zero_extended = true;
+}
+
struct regcache * lightrec_regcache_init(struct lightrec_state *state)
{
struct regcache *cache;
u8 lightrec_alloc_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg);
u8 lightrec_alloc_reg_temp(struct regcache *cache, jit_state_t *_jit);
u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit,
- u8 reg, u8 flags);
+ u16 reg, u8 flags);
u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit,
- u8 reg, u8 flags);
+ u16 reg, u8 flags);
-u8 lightrec_request_reg_in(struct regcache *cache, jit_state_t *_jit,
- u8 reg, u8 jit_reg);
+void lightrec_load_imm(struct regcache *cache,
+ jit_state_t *_jit, u8 jit_reg, u32 pc, u32 imm);
+void lightrec_load_next_pc(struct regcache *cache, jit_state_t *_jit, u8 reg);
+void lightrec_load_next_pc_imm(struct regcache *cache,
+ jit_state_t *_jit, u32 pc, u32 imm);
s8 lightrec_get_reg_with_value(struct regcache *cache, intptr_t value);
void lightrec_temp_set_value(struct regcache *cache, u8 jit_reg, intptr_t value);
void lightrec_set_reg_out_flags(struct regcache *cache, u8 jit_reg, u8 flags);
void lightrec_regcache_reset(struct regcache *cache);
+void lightrec_preload_pc(struct regcache *cache);
-void lightrec_lock_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg);
void lightrec_free_reg(struct regcache *cache, u8 jit_reg);
void lightrec_free_regs(struct regcache *cache);
void lightrec_clean_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg);
_Bool lightrec_has_dirty_regs(struct regcache *cache);
void lightrec_clean_reg_if_loaded(struct regcache *cache, jit_state_t *_jit,
- u8 reg, _Bool unload);
-void lightrec_discard_reg_if_loaded(struct regcache *cache, u8 reg);
+ u16 reg, _Bool unload);
+void lightrec_discard_reg_if_loaded(struct regcache *cache, u16 reg);
u8 lightrec_alloc_reg_in_address(struct regcache *cache,
- jit_state_t *_jit, u8 reg, s16 offset);
+ jit_state_t *_jit, u16 reg, s16 offset);
struct native_register * lightrec_regcache_enter_branch(struct regcache *cache);
void lightrec_regcache_leave_branch(struct regcache *cache,