[subrepo]
remote = https://github.com/pcercuei/lightrec.git
branch = master
- commit = 2cca097e538876d219b8af9663abe0ca74f68bb2
- parent = 5c00ea32a0eab812299b08acd14c25bf6ba4ca7a
+ commit = d90de68429bf9c2d67c5f5051d495d1e3131e636
+ parent = a9725dc07f40b39a5533d546b59e45377d1f9b66
method = merge
- cmdver = 0.4.1
+ cmdver = 0.4.3
cmake_minimum_required(VERSION 3.0)
-project(lightrec LANGUAGES C VERSION 0.3)
+project(lightrec LANGUAGES C VERSION 0.4)
set(BUILD_SHARED_LIBS ON CACHE BOOL "Build shared libraries")
if (NOT BUILD_SHARED_LIBS)
add_compile_options(-fvisibility=hidden)
endif()
+set(HAS_DEFAULT_ELM ${CMAKE_COMPILER_IS_GNUCC})
+
list(APPEND LIGHTREC_SOURCES
blockcache.c
- disassembler.c
emitter.c
interpreter.c
lightrec.c
endif (NOT ENABLE_FIRST_PASS)
endif (ENABLE_THREADED_COMPILER)
+option(OPT_REMOVE_DIV_BY_ZERO_SEQ "(optimization) Remove div-by-zero check sequence" ON)
+option(OPT_REPLACE_MEMSET "(optimization) Detect and replace memset with host variant" ON)
+option(OPT_DETECT_IMPOSSIBLE_BRANCHES "(optimization) Detect impossible branches" ON)
+option(OPT_TRANSFORM_OPS "(optimization) Transform opcodes" ON)
+option(OPT_LOCAL_BRANCHES "(optimization) Detect local branches" ON)
+option(OPT_SWITCH_DELAY_SLOTS "(optimization) Switch delay slots" ON)
+option(OPT_FLAG_STORES "(optimization) Flag stores that don't require invalidation" ON)
+option(OPT_FLAG_IO "(optimization) Flag I/O opcodes whose target is known" ON)
+option(OPT_FLAG_MULT_DIV "(optimization) Flag MULT/DIV that only use one of HI/LO" ON)
+option(OPT_EARLY_UNLOAD "(optimization) Unload registers early" ON)
+
include_directories(${CMAKE_CURRENT_BINARY_DIR})
add_library(${PROJECT_NAME} ${LIGHTREC_SOURCES} ${LIGHTREC_HEADERS})
C_EXTENSIONS OFF
)
+if (CMAKE_C_COMPILER_ID MATCHES "GNU|Clang")
+ target_compile_options(${PROJECT_NAME} PRIVATE -Wall -Wno-parentheses)
+endif()
+if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
+ target_compile_options(${PROJECT_NAME} PRIVATE -Wno-initializer-overrides)
+endif()
+
option(ENABLE_TINYMM "Enable optional libtinymm dependency" OFF)
if (ENABLE_TINYMM)
find_library(TINYMM_LIBRARIES tinymm REQUIRED)
target_link_libraries(${PROJECT_NAME} PRIVATE ${LIBLIGHTNING})
if (LOG_LEVEL STREQUAL Debug)
- find_library(LIBOPCODES NAMES opcodes-multiarch opcodes)
- find_path(LIBOPCODES_INCLUDE_DIR dis-asm.h)
-
- if (NOT LIBOPCODES OR NOT LIBOPCODES_INCLUDE_DIR)
- message(SEND_ERROR "Debug log level requires libopcodes (from binutils) to be installed.")
- endif ()
-
set(ENABLE_DISASSEMBLER ON)
- include_directories(${LIBOPCODES_INCLUDE_DIR})
- target_link_libraries(${PROJECT_NAME} PRIVATE ${LIBOPCODES})
+ target_sources(${PROJECT_NAME} PRIVATE disassembler.c)
endif()
-configure_file(config.h.cmakein config.h @ONLY)
+configure_file(lightrec-config.h.cmakein lightrec-config.h @ONLY)
include(GNUInstallDirs)
install(TARGETS ${PROJECT_NAME}
+// SPDX-License-Identifier: LGPL-2.1-or-later
/*
- * Copyright (C) 2015-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2015-2021 Paul Cercueil <paul@crapouillou.net>
*/
#include "blockcache.h"
#include <stdbool.h>
#include <stdlib.h>
+#include <string.h>
/* Must be power of two */
#define LUT_SIZE 0x4000
struct block * lut[LUT_SIZE];
};
+u16 lightrec_get_lut_entry(const struct block *block)
+{
+ return (kunseg(block->pc) >> 2) & (LUT_SIZE - 1);
+}
+
struct block * lightrec_find_block(struct blockcache *cache, u32 pc)
{
struct block *block;
return NULL;
}
-void remove_from_code_lut(struct blockcache *cache, struct block *block)
+struct block * lightrec_find_block_from_lut(struct blockcache *cache,
+ u16 lut_entry, u32 addr_in_block)
{
- struct lightrec_state *state = block->state;
- const struct opcode *op;
- u32 offset = lut_offset(block->pc);
+ struct block *block;
+ u32 pc;
- /* Use state->get_next_block in the code LUT, which basically
- * calls back get_next_block_func(), until the compiler
- * overrides this. This is required, as a NULL value in the code
- * LUT means an outdated block. */
- state->code_lut[offset] = state->get_next_block;
+ addr_in_block = kunseg(addr_in_block);
- for (op = block->opcode_list; op; op = op->next)
- if (op->c.i.op == OP_META_SYNC)
- state->code_lut[offset + op->offset] = NULL;
+ for (block = cache->lut[lut_entry]; block; block = block->next) {
+ pc = kunseg(block->pc);
+ if (addr_in_block >= pc &&
+ addr_in_block < pc + (block->nb_ops << 2))
+ return block;
+ }
+ return NULL;
+}
+
+void remove_from_code_lut(struct blockcache *cache, struct block *block)
+{
+ struct lightrec_state *state = cache->state;
+ u32 offset = lut_offset(block->pc);
+
+ if (block->function) {
+ memset(&state->code_lut[offset], 0,
+ block->nb_ops * sizeof(*state->code_lut));
+ }
}
void lightrec_register_block(struct blockcache *cache, struct block *block)
for (i = 0; i < LUT_SIZE; i++) {
for (block = cache->lut[i]; block; block = next) {
next = block->next;
- lightrec_free_block(block);
+ lightrec_free_block(cache->state, block);
}
}
u32 lightrec_calculate_block_hash(const struct block *block)
{
- const struct lightrec_mem_map *map = block->map;
- u32 pc, hash = 0xffffffff;
- const u32 *code;
+ const u32 *code = block->code;
+ u32 hash = 0xffffffff;
unsigned int i;
- pc = kunseg(block->pc) - map->pc;
-
- while (map->mirror_of)
- map = map->mirror_of;
-
- code = map->address + pc;
-
/* Jenkins one-at-a-time hash algorithm */
for (i = 0; i < block->nb_ops; i++) {
hash += *code++;
return hash;
}
-bool lightrec_block_is_outdated(struct block *block)
+bool lightrec_block_is_outdated(struct lightrec_state *state, struct block *block)
{
- void **lut_entry = &block->state->code_lut[lut_offset(block->pc)];
+ void **lut_entry = &state->code_lut[lut_offset(block->pc)];
bool outdated;
if (*lut_entry)
if (block->function)
*lut_entry = block->function;
else
- *lut_entry = block->state->get_next_block;
+ *lut_entry = state->get_next_block;
}
return outdated;
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
- * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __BLOCKCACHE_H__
struct blockcache;
struct block * lightrec_find_block(struct blockcache *cache, u32 pc);
+struct block * lightrec_find_block_from_lut(struct blockcache *cache,
+ u16 lut_entry, u32 addr_in_block);
+u16 lightrec_get_lut_entry(const struct block *block);
+
void lightrec_register_block(struct blockcache *cache, struct block *block);
void lightrec_unregister_block(struct blockcache *cache, struct block *block);
void lightrec_free_block_cache(struct blockcache *cache);
u32 lightrec_calculate_block_hash(const struct block *block);
-_Bool lightrec_block_is_outdated(struct block *block);
+_Bool lightrec_block_is_outdated(struct lightrec_state *state, struct block *block);
#endif /* __BLOCKCACHE_H__ */
+++ /dev/null
-/*
- * Copyright (C) 2019 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- */
-
-#ifndef __LIGHTREC_CONFIG_H__
-#define __LIGHTREC_CONFIG_H__
-
-#define ENABLE_THREADED_COMPILER 1
-#define ENABLE_FIRST_PASS 1
-#define ENABLE_DISASSEMBLER 0
-#define ENABLE_TINYMM 0
-
-#endif /* __LIGHTREC_CONFIG_H__ */
+++ /dev/null
-/*
- * Copyright (C) 2019 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- */
-
-#ifndef __LIGHTREC_CONFIG_H__
-#define __LIGHTREC_CONFIG_H__
-
-#cmakedefine01 ENABLE_THREADED_COMPILER
-#cmakedefine01 ENABLE_FIRST_PASS
-#cmakedefine01 ENABLE_DISASSEMBLER
-#cmakedefine01 ENABLE_TINYMM
-
-#endif /* __LIGHTREC_CONFIG_H__ */
-
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
- * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef DEBUG_H
+// SPDX-License-Identifier: LGPL-2.1-or-later
/*
- * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
*/
-#include "config.h"
-
-#if ENABLE_DISASSEMBLER
-#include <dis-asm.h>
-#endif
#include <stdbool.h>
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include "debug.h"
-#include "disassembler.h"
#include "lightrec-private.h"
-#include "memmanager.h"
+#include "regcache.h"
-static bool is_unconditional_jump(const struct opcode *op)
-{
- switch (op->i.op) {
- case OP_SPECIAL:
- return op->r.op == OP_SPECIAL_JR || op->r.op == OP_SPECIAL_JALR;
- case OP_J:
- case OP_JAL:
- return true;
- case OP_BEQ:
- case OP_BLEZ:
- return op->i.rs == op->i.rt;
- case OP_REGIMM:
- return (op->r.rt == OP_REGIMM_BGEZ ||
- op->r.rt == OP_REGIMM_BGEZAL) && op->i.rs == 0;
- default:
- return false;
- }
-}
+static const char *std_opcodes[] = {
+ [OP_J] = "j ",
+ [OP_JAL] = "jal ",
+ [OP_BEQ] = "beq ",
+ [OP_BNE] = "bne ",
+ [OP_BLEZ] = "blez ",
+ [OP_BGTZ] = "bgtz ",
+ [OP_ADDI] = "addi ",
+ [OP_ADDIU] = "addiu ",
+ [OP_SLTI] = "slti ",
+ [OP_SLTIU] = "sltiu ",
+ [OP_ANDI] = "andi ",
+ [OP_ORI] = "ori ",
+ [OP_XORI] = "xori ",
+ [OP_LUI] = "lui ",
+ [OP_LB] = "lb ",
+ [OP_LH] = "lh ",
+ [OP_LWL] = "lwl ",
+ [OP_LW] = "lw ",
+ [OP_LBU] = "lbu ",
+ [OP_LHU] = "lhu ",
+ [OP_LWR] = "lwr ",
+ [OP_SB] = "sb ",
+ [OP_SH] = "sh ",
+ [OP_SWL] = "swl ",
+ [OP_SW] = "sw ",
+ [OP_SWR] = "swr ",
+ [OP_LWC2] = "lwc2 ",
+ [OP_SWC2] = "swc2 ",
+};
-static bool is_syscall(const struct opcode *op)
-{
- return (op->i.op == OP_SPECIAL && (op->r.op == OP_SPECIAL_SYSCALL ||
- op->r.op == OP_SPECIAL_BREAK)) ||
- (op->i.op == OP_CP0 && (op->r.rs == OP_CP0_MTC0 ||
- op->r.rs == OP_CP0_CTC0) &&
- (op->r.rd == 12 || op->r.rd == 13));
-}
+static const char *special_opcodes[] = {
+ [OP_SPECIAL_SLL] = "sll ",
+ [OP_SPECIAL_SRL] = "srl ",
+ [OP_SPECIAL_SRA] = "sra ",
+ [OP_SPECIAL_SLLV] = "sllv ",
+ [OP_SPECIAL_SRLV] = "srlv ",
+ [OP_SPECIAL_SRAV] = "srav ",
+ [OP_SPECIAL_JR] = "jr ",
+ [OP_SPECIAL_JALR] = "jalr ",
+ [OP_SPECIAL_SYSCALL] = "syscall ",
+ [OP_SPECIAL_BREAK] = "break ",
+ [OP_SPECIAL_MFHI] = "mfhi ",
+ [OP_SPECIAL_MTHI] = "mthi ",
+ [OP_SPECIAL_MFLO] = "mflo ",
+ [OP_SPECIAL_MTLO] = "mtlo ",
+ [OP_SPECIAL_MULT] = "mult ",
+ [OP_SPECIAL_MULTU] = "multu ",
+ [OP_SPECIAL_DIV] = "div ",
+ [OP_SPECIAL_DIVU] = "divu ",
+ [OP_SPECIAL_ADD] = "add ",
+ [OP_SPECIAL_ADDU] = "addu ",
+ [OP_SPECIAL_SUB] = "sub ",
+ [OP_SPECIAL_SUBU] = "subu ",
+ [OP_SPECIAL_AND] = "and ",
+ [OP_SPECIAL_OR] = "or ",
+ [OP_SPECIAL_XOR] = "xor ",
+ [OP_SPECIAL_NOR] = "nor ",
+ [OP_SPECIAL_SLT] = "slt ",
+ [OP_SPECIAL_SLTU] = "sltu ",
+};
-void lightrec_free_opcode_list(struct lightrec_state *state, struct opcode *list)
-{
- struct opcode *next;
+static const char *regimm_opcodes[] = {
+ [OP_REGIMM_BLTZ] = "bltz ",
+ [OP_REGIMM_BGEZ] = "bgez ",
+ [OP_REGIMM_BLTZAL] = "bltzal ",
+ [OP_REGIMM_BGEZAL] = "bgezal ",
+};
- while (list) {
- next = list->next;
- lightrec_free(state, MEM_FOR_IR, sizeof(*list), list);
- list = next;
- }
-}
+static const char *cp0_opcodes[] = {
+ [OP_CP0_MFC0] = "mfc0 ",
+ [OP_CP0_CFC0] = "cfc0 ",
+ [OP_CP0_MTC0] = "mtc0 ",
+ [OP_CP0_CTC0] = "ctc0 ",
+ [OP_CP0_RFE] = "rfe",
+};
+
+static const char *cp2_opcodes[] = {
+ [OP_CP2_BASIC_MFC2] = "mfc2 ",
+ [OP_CP2_BASIC_CFC2] = "cfc2 ",
+ [OP_CP2_BASIC_MTC2] = "mtc2 ",
+ [OP_CP2_BASIC_CTC2] = "ctc2 ",
+};
+
+static const char *opcode_flags[] = {
+ "switched branch/DS",
+ "unload Rs",
+ "unload Rt",
+ "unload Rd",
+ "sync point",
+};
+
+static const char *opcode_io_flags[] = {
+ "memory I/O",
+ "hardware I/O",
+ "self-modifying code",
+ "no invalidation",
+};
-struct opcode * lightrec_disassemble(struct lightrec_state *state,
- const u32 *src, unsigned int *len)
+static const char *opcode_branch_flags[] = {
+ "emulate branch",
+ "local branch",
+};
+
+static const char *opcode_multdiv_flags[] = {
+ "No LO",
+ "No HI",
+ "No div check",
+};
+
+static int print_flags(char *buf, size_t len, u16 flags,
+ const char **array, size_t array_size)
{
- struct opcode *head = NULL;
- bool stop_next = false;
- struct opcode *curr, *last;
+ const char *flag_name;
unsigned int i;
+ size_t count = 0, bytes;
+ bool first = true;
- for (i = 0, last = NULL; ; i++, last = curr) {
- curr = lightrec_calloc(state, MEM_FOR_IR, sizeof(*curr));
- if (!curr) {
- pr_err("Unable to allocate memory\n");
- lightrec_free_opcode_list(state, head);
- return NULL;
- }
+ for (i = 0; i < array_size + ARRAY_SIZE(opcode_flags); i++) {
+ if (!(flags & BIT(i)))
+ continue;
- if (!last)
- head = curr;
+ if (i < ARRAY_SIZE(opcode_flags))
+ flag_name = opcode_flags[i];
else
- last->next = curr;
-
- /* TODO: Take care of endianness */
- curr->opcode = LE32TOH(*src++);
- curr->offset = i;
-
- /* NOTE: The block disassembly ends after the opcode that
- * follows an unconditional jump (delay slot) */
- if (stop_next || is_syscall(curr))
- break;
- else if (is_unconditional_jump(curr))
- stop_next = true;
+ flag_name = array[i - ARRAY_SIZE(opcode_flags)];
+
+ if (first)
+ bytes = snprintf(buf, len, "(%s", flag_name);
+ else
+ bytes = snprintf(buf, len, ", %s", flag_name);
+
+ first = false;
+ buf += bytes;
+ len -= bytes;
+ count += bytes;
}
- if (len)
- *len = (i + 1) * sizeof(u32);
+ if (!first)
+ count += snprintf(buf, len, ")");
+ else
+ *buf = '\0';
- return head;
+ return count;
}
-unsigned int lightrec_cycles_of_opcode(union code code)
+static int print_op_special(union code c, char *buf, size_t len,
+ const char ***flags_ptr, size_t *nb_flags)
{
- switch (code.i.op) {
- case OP_META_REG_UNLOAD:
- case OP_META_SYNC:
- return 0;
+ switch (c.r.op) {
+ case OP_SPECIAL_SLL:
+ case OP_SPECIAL_SRL:
+ case OP_SPECIAL_SRA:
+ return snprintf(buf, len, "%s%s,%s,%u",
+ special_opcodes[c.r.op],
+ lightrec_reg_name(c.r.rd),
+ lightrec_reg_name(c.r.rt),
+ c.r.imm);
+ case OP_SPECIAL_SLLV:
+ case OP_SPECIAL_SRLV:
+ case OP_SPECIAL_SRAV:
+ case OP_SPECIAL_ADD:
+ case OP_SPECIAL_ADDU:
+ case OP_SPECIAL_SUB:
+ case OP_SPECIAL_SUBU:
+ case OP_SPECIAL_AND:
+ case OP_SPECIAL_OR:
+ case OP_SPECIAL_XOR:
+ case OP_SPECIAL_NOR:
+ case OP_SPECIAL_SLT:
+ case OP_SPECIAL_SLTU:
+ return snprintf(buf, len, "%s%s,%s,%s",
+ special_opcodes[c.r.op],
+ lightrec_reg_name(c.r.rd),
+ lightrec_reg_name(c.r.rt),
+ lightrec_reg_name(c.r.rs));
+ case OP_SPECIAL_JR:
+ case OP_SPECIAL_MTHI:
+ case OP_SPECIAL_MTLO:
+ return snprintf(buf, len, "%s%s",
+ special_opcodes[c.r.op],
+ lightrec_reg_name(c.r.rs));
+ case OP_SPECIAL_JALR:
+ return snprintf(buf, len, "%s%s,%s",
+ special_opcodes[c.r.op],
+ lightrec_reg_name(c.r.rd),
+ lightrec_reg_name(c.r.rt));
+ case OP_SPECIAL_SYSCALL:
+ case OP_SPECIAL_BREAK:
+ return snprintf(buf, len, "%s", special_opcodes[c.r.op]);
+ case OP_SPECIAL_MFHI:
+ case OP_SPECIAL_MFLO:
+ return snprintf(buf, len, "%s%s",
+ special_opcodes[c.r.op],
+ lightrec_reg_name(c.r.rd));
+ case OP_SPECIAL_MULT:
+ case OP_SPECIAL_MULTU:
+ case OP_SPECIAL_DIV:
+ case OP_SPECIAL_DIVU:
+ *flags_ptr = opcode_multdiv_flags;
+ *nb_flags = ARRAY_SIZE(opcode_multdiv_flags);
+ return snprintf(buf, len, "%s%s,%s,%s,%s",
+ special_opcodes[c.r.op],
+ lightrec_reg_name(get_mult_div_hi(c)),
+ lightrec_reg_name(get_mult_div_lo(c)),
+ lightrec_reg_name(c.r.rs),
+ lightrec_reg_name(c.r.rt));
default:
- return 2;
+ return snprintf(buf, len, "unknown (0x%08x)", c.opcode);
}
}
-#if ENABLE_DISASSEMBLER
-void lightrec_print_disassembly(const struct block *block,
- const u32 *code, unsigned int length)
+static int print_op_cp(union code c, char *buf, size_t len, unsigned int cp)
{
- struct disassemble_info info;
+ if (cp == 2) {
+ switch (c.i.rs) {
+ case OP_CP0_MFC0:
+ case OP_CP0_CFC0:
+ case OP_CP0_MTC0:
+ case OP_CP0_CTC0:
+ return snprintf(buf, len, "%s%s,%u",
+ cp2_opcodes[c.i.rs],
+ lightrec_reg_name(c.i.rt),
+ c.r.rd);
+ default:
+ return snprintf(buf, len, "cp2 (0x%08x)", c.opcode);
+ }
+ } else {
+ switch (c.i.rs) {
+ case OP_CP0_MFC0:
+ case OP_CP0_CFC0:
+ case OP_CP0_MTC0:
+ case OP_CP0_CTC0:
+ return snprintf(buf, len, "%s%s,%u",
+ cp0_opcodes[c.i.rs],
+ lightrec_reg_name(c.i.rt),
+ c.r.rd);
+ case OP_CP0_RFE:
+ return snprintf(buf, len, "rfe ");
+ default:
+ return snprintf(buf, len, "unknown (0x%08x)", c.opcode);
+ }
+ }
+}
+
+static int print_op(union code c, u32 pc, char *buf, size_t len,
+ const char ***flags_ptr, size_t *nb_flags)
+{
+ if (c.opcode == 0)
+ return snprintf(buf, len, "nop ");
+
+ switch (c.i.op) {
+ case OP_SPECIAL:
+ return print_op_special(c, buf, len, flags_ptr, nb_flags);
+ case OP_REGIMM:
+ *flags_ptr = opcode_branch_flags;
+ *nb_flags = ARRAY_SIZE(opcode_branch_flags);
+ return snprintf(buf, len, "%s%s,0x%x",
+ regimm_opcodes[c.i.rt],
+ lightrec_reg_name(c.i.rs),
+ pc + 4 + ((s16)c.i.imm << 2));
+ case OP_J:
+ case OP_JAL:
+ return snprintf(buf, len, "%s0x%x",
+ std_opcodes[c.i.op],
+ (pc & 0xf0000000) | (c.j.imm << 2));
+ case OP_BEQ:
+ case OP_BNE:
+ case OP_BLEZ:
+ case OP_BGTZ:
+ *flags_ptr = opcode_branch_flags;
+ *nb_flags = ARRAY_SIZE(opcode_branch_flags);
+ return snprintf(buf, len, "%s%s,%s,0x%x",
+ std_opcodes[c.i.op],
+ lightrec_reg_name(c.i.rs),
+ lightrec_reg_name(c.i.rt),
+ pc + 4 + ((s16)c.i.imm << 2));
+ case OP_ADDI:
+ case OP_ADDIU:
+ case OP_SLTI:
+ case OP_SLTIU:
+ case OP_ANDI:
+ case OP_ORI:
+ case OP_XORI:
+ return snprintf(buf, len, "%s%s,%s,0x%04hx",
+ std_opcodes[c.i.op],
+ lightrec_reg_name(c.i.rt),
+ lightrec_reg_name(c.i.rs),
+ (u16)c.i.imm);
+
+ case OP_LUI:
+ return snprintf(buf, len, "%s%s,0x%04hx",
+ std_opcodes[c.i.op],
+ lightrec_reg_name(c.i.rt),
+ (u16)c.i.imm);
+ case OP_CP0:
+ return print_op_cp(c, buf, len, 0);
+ case OP_CP2:
+ return print_op_cp(c, buf, len, 2);
+ case OP_LB:
+ case OP_LH:
+ case OP_LWL:
+ case OP_LW:
+ case OP_LBU:
+ case OP_LHU:
+ case OP_LWR:
+ case OP_SB:
+ case OP_SH:
+ case OP_SWL:
+ case OP_SW:
+ case OP_SWR:
+ *flags_ptr = opcode_io_flags;
+ *nb_flags = ARRAY_SIZE(opcode_io_flags);
+ return snprintf(buf, len, "%s%s,%hd(%s)",
+ std_opcodes[c.i.op],
+ lightrec_reg_name(c.i.rt),
+ (s16)c.i.imm,
+ lightrec_reg_name(c.i.rs));
+ case OP_LWC2:
+ case OP_SWC2:
+ *flags_ptr = opcode_io_flags;
+ *nb_flags = ARRAY_SIZE(opcode_io_flags);
+ return snprintf(buf, len, "%s%s,%hd(%s)",
+ std_opcodes[c.i.op],
+ lightrec_reg_name(c.i.rt),
+ (s16)c.i.imm,
+ lightrec_reg_name(c.i.rs));
+ case OP_META_MOV:
+ return snprintf(buf, len, "move %s,%s",
+ lightrec_reg_name(c.r.rd),
+ lightrec_reg_name(c.r.rs));
+ case OP_META_EXTC:
+ return snprintf(buf, len, "extc %s,%s",
+ lightrec_reg_name(c.i.rt),
+ lightrec_reg_name(c.i.rs));
+ case OP_META_EXTS:
+ return snprintf(buf, len, "exts %s,%s",
+ lightrec_reg_name(c.i.rt),
+ lightrec_reg_name(c.i.rs));
+ default:
+ return snprintf(buf, len, "unknown (0x%08x)", c.opcode);
+ }
+}
+
+void lightrec_print_disassembly(const struct block *block, const u32 *code)
+{
+ const struct opcode *op;
+ const char **flags_ptr;
+ size_t nb_flags, count, count2;
+ char buf[256], buf2[256], buf3[256];
unsigned int i;
+ u32 pc, branch_pc;
+
+ for (i = 0; i < block->nb_ops; i++) {
+ op = &block->opcode_list[i];
+ branch_pc = get_branch_pc(block, i, 0);
+ pc = block->pc + (i << 2);
+
+ count = print_op((union code)code[i], pc, buf, sizeof(buf),
+ &flags_ptr, &nb_flags);
+
+ flags_ptr = NULL;
+ nb_flags = 0;
+ count2 = print_op(op->c, branch_pc, buf2, sizeof(buf2),
+ &flags_ptr, &nb_flags);
+
+ if (code[i] == op->c.opcode) {
+ *buf2 = '\0';
+ count2 = 0;
+ }
+
+ print_flags(buf3, sizeof(buf3), op->flags, flags_ptr, nb_flags);
- memset(&info, 0, sizeof(info));
- init_disassemble_info(&info, stdout, (fprintf_ftype) fprintf);
-
- info.buffer = (bfd_byte *) code;
- info.buffer_vma = (bfd_vma)(uintptr_t) code;
- info.buffer_length = length;
- info.flavour = bfd_target_unknown_flavour;
- info.arch = bfd_arch_mips;
- info.mach = bfd_mach_mips3000;
- disassemble_init_for_target(&info);
-
- for (i = 0; i < length; i += 4) {
- void print_insn_little_mips(bfd_vma, struct disassemble_info *);
- putc('\t', stdout);
- print_insn_little_mips((bfd_vma)(uintptr_t) code++, &info);
- putc('\n', stdout);
+ printf("0x%08x (0x%x)\t%s%*c%s%*c%s\n", pc, i << 2,
+ buf, 30 - (int)count, ' ', buf2, 30 - (int)count2, ' ', buf3);
}
}
-#endif
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
- * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __DISASSEMBLER_H__
#define __packed __attribute__((packed))
#endif
-#define LIGHTREC_DIRECT_IO (1 << 0)
-#define LIGHTREC_NO_INVALIDATE (1 << 1)
-#define LIGHTREC_NO_DS (1 << 2)
-#define LIGHTREC_SMC (1 << 3)
-#define LIGHTREC_EMULATE_BRANCH (1 << 4)
-#define LIGHTREC_LOCAL_BRANCH (1 << 5)
-#define LIGHTREC_HW_IO (1 << 6)
-#define LIGHTREC_MULT32 (1 << 7)
+#define BIT(x) (1ULL << (x))
+
+/* Flags for all opcodes */
+#define LIGHTREC_NO_DS BIT(0)
+#define LIGHTREC_UNLOAD_RS BIT(1)
+#define LIGHTREC_UNLOAD_RT BIT(2)
+#define LIGHTREC_UNLOAD_RD BIT(3)
+#define LIGHTREC_SYNC BIT(4)
+
+/* Flags for load/store opcodes */
+#define LIGHTREC_DIRECT_IO BIT(5)
+#define LIGHTREC_HW_IO BIT(6)
+#define LIGHTREC_SMC BIT(7)
+#define LIGHTREC_NO_INVALIDATE BIT(8)
+
+/* Flags for branches */
+#define LIGHTREC_EMULATE_BRANCH BIT(5)
+#define LIGHTREC_LOCAL_BRANCH BIT(6)
+
+/* Flags for div/mult opcodes */
+#define LIGHTREC_NO_LO BIT(5)
+#define LIGHTREC_NO_HI BIT(6)
+#define LIGHTREC_NO_DIV_CHECK BIT(7)
struct block;
OP_LWC2 = 0x32,
OP_SWC2 = 0x3a,
- OP_META_REG_UNLOAD = 0x11,
-
- OP_META_BEQZ = 0x14,
- OP_META_BNEZ = 0x15,
-
OP_META_MOV = 0x16,
- OP_META_SYNC = 0x17,
+
+ OP_META_EXTC = 0x17,
+ OP_META_EXTS = 0x18,
};
enum special_opcodes {
struct opcode_j j;
};
u16 flags;
- u16 offset;
- struct opcode *next;
};
-struct opcode * lightrec_disassemble(struct lightrec_state *state,
- const u32 *src, unsigned int *len);
-void lightrec_free_opcode_list(struct lightrec_state *state,
- struct opcode *list);
-
-unsigned int lightrec_cycles_of_opcode(union code code);
-
-void lightrec_print_disassembly(const struct block *block,
- const u32 *code, unsigned int length);
+void lightrec_print_disassembly(const struct block *block, const u32 *code);
#endif /* __DISASSEMBLER_H__ */
+// SPDX-License-Identifier: LGPL-2.1-or-later
/*
- * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
*/
#include "blockcache.h"
#include "debug.h"
#include "disassembler.h"
#include "emitter.h"
+#include "lightning-wrapper.h"
#include "optimizer.h"
#include "regcache.h"
-#include <lightning.h>
#include <stdbool.h>
#include <stddef.h>
-typedef void (*lightrec_rec_func_t)(const struct block *,
- const struct opcode *, u32);
+typedef void (*lightrec_rec_func_t)(struct lightrec_cstate *, const struct block *, u16);
/* Forward declarations */
-static void rec_SPECIAL(const struct block *block,
- const struct opcode *op, u32 pc);
-static void rec_REGIMM(const struct block *block,
- const struct opcode *op, u32 pc);
-static void rec_CP0(const struct block *block, const struct opcode *op, u32 pc);
-static void rec_CP2(const struct block *block, const struct opcode *op, u32 pc);
+static void rec_SPECIAL(struct lightrec_cstate *state, const struct block *block, u16 offset);
+static void rec_REGIMM(struct lightrec_cstate *state, const struct block *block, u16 offset);
+static void rec_CP0(struct lightrec_cstate *state, const struct block *block, u16 offset);
+static void rec_CP2(struct lightrec_cstate *state, const struct block *block, u16 offset);
-
-static void unknown_opcode(const struct block *block,
- const struct opcode *op, u32 pc)
+static void unknown_opcode(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
- pr_warn("Unknown opcode: 0x%08x at PC 0x%08x\n", op->opcode, pc);
+ pr_warn("Unknown opcode: 0x%08x at PC 0x%08x\n",
+ block->opcode_list[offset].c.opcode,
+ block->pc + (offset << 2));
}
-static void lightrec_emit_end_of_block(const struct block *block,
- const struct opcode *op, u32 pc,
+static void lightrec_emit_end_of_block(struct lightrec_cstate *state,
+ const struct block *block, u16 offset,
s8 reg_new_pc, u32 imm, u8 ra_reg,
u32 link, bool update_cycles)
{
- struct lightrec_state *state = block->state;
struct regcache *reg_cache = state->reg_cache;
u32 cycles = state->cycles;
jit_state_t *_jit = block->_jit;
+ const struct opcode *op = &block->opcode_list[offset],
+ *next = &block->opcode_list[offset + 1];
jit_note(__FILE__, __LINE__);
if (link) {
/* Update the $ra register */
- u8 link_reg = lightrec_alloc_reg_out(reg_cache, _jit, ra_reg);
+ u8 link_reg = lightrec_alloc_reg_out(reg_cache, _jit, ra_reg, 0);
jit_movi(link_reg, link);
lightrec_free_reg(reg_cache, link_reg);
}
if (has_delay_slot(op->c) &&
!(op->flags & (LIGHTREC_NO_DS | LIGHTREC_LOCAL_BRANCH))) {
- cycles += lightrec_cycles_of_opcode(op->next->c);
+ cycles += lightrec_cycles_of_opcode(next->c);
/* Recompile the delay slot */
- if (op->next->c.opcode)
- lightrec_rec_opcode(block, op->next, pc + 4);
+ if (next->c.opcode)
+ lightrec_rec_opcode(state, block, offset + 1);
}
/* Store back remaining registers */
pr_debug("EOB: %u cycles\n", cycles);
}
- if (op->next && ((op->flags & LIGHTREC_NO_DS) || op->next->next))
+ if (offset + !!(op->flags & LIGHTREC_NO_DS) < block->nb_ops - 1)
state->branches[state->nb_branches++] = jit_jmpi();
}
-void lightrec_emit_eob(const struct block *block,
- const struct opcode *op, u32 pc)
+void lightrec_emit_eob(struct lightrec_cstate *state, const struct block *block,
+ u16 offset, bool after_op)
{
- struct lightrec_state *state = block->state;
struct regcache *reg_cache = state->reg_cache;
jit_state_t *_jit = block->_jit;
+ union code c = block->opcode_list[offset].c;
+ u32 cycles = state->cycles;
+
+ if (!after_op)
+ cycles -= lightrec_cycles_of_opcode(c);
lightrec_storeback_regs(reg_cache, _jit);
- jit_movi(JIT_V0, pc);
- jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE,
- state->cycles - lightrec_cycles_of_opcode(op->c));
+ jit_movi(JIT_V0, block->pc + (offset << 2));
+ jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles);
state->branches[state->nb_branches++] = jit_jmpi();
}
-static void rec_special_JR(const struct block *block,
- const struct opcode *op, u32 pc)
+static u8 get_jr_jalr_reg(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
- struct regcache *reg_cache = block->state->reg_cache;
+ struct regcache *reg_cache = state->reg_cache;
jit_state_t *_jit = block->_jit;
+ const struct opcode *op = &block->opcode_list[offset],
+ *next = &block->opcode_list[offset + 1];
u8 rs = lightrec_request_reg_in(reg_cache, _jit, op->r.rs, JIT_V0);
- _jit_name(block->_jit, __func__);
+ /* If the source register is already mapped to JIT_R0 or JIT_R1, and the
+ * delay slot is a I/O operation, unload the register, since JIT_R0 and
+ * JIT_R1 are explicitely used by the I/O opcode generators. */
+ if ((rs == JIT_R0 || rs == JIT_R1) &&
+ !(op->flags & LIGHTREC_NO_DS) &&
+ opcode_is_io(next->c) &&
+ !(next->flags & (LIGHTREC_NO_INVALIDATE | LIGHTREC_DIRECT_IO))) {
+ lightrec_unload_reg(reg_cache, _jit, rs);
+ lightrec_free_reg(reg_cache, rs);
+
+ rs = lightrec_request_reg_in(reg_cache, _jit, op->r.rs, JIT_V0);
+ }
+
lightrec_lock_reg(reg_cache, _jit, rs);
- lightrec_emit_end_of_block(block, op, pc, rs, 0, 31, 0, true);
+
+ return rs;
}
-static void rec_special_JALR(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_JR(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
- struct regcache *reg_cache = block->state->reg_cache;
- jit_state_t *_jit = block->_jit;
- u8 rs = lightrec_request_reg_in(reg_cache, _jit, op->r.rs, JIT_V0);
+ u8 rs = get_jr_jalr_reg(state, block, offset);
_jit_name(block->_jit, __func__);
- lightrec_lock_reg(reg_cache, _jit, rs);
- lightrec_emit_end_of_block(block, op, pc, rs, 0, op->r.rd, pc + 8, true);
+ lightrec_emit_end_of_block(state, block, offset, rs, 0, 31, 0, true);
+}
+
+static void rec_special_JALR(struct lightrec_cstate *state, const struct block *block, u16 offset)
+{
+ u8 rs = get_jr_jalr_reg(state, block, offset);
+ union code c = block->opcode_list[offset].c;
+
+ _jit_name(block->_jit, __func__);
+ lightrec_emit_end_of_block(state, block, offset, rs, 0, c.r.rd,
+ get_branch_pc(block, offset, 2), true);
}
-static void rec_J(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_J(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
+ union code c = block->opcode_list[offset].c;
+
_jit_name(block->_jit, __func__);
- lightrec_emit_end_of_block(block, op, pc, -1,
- (pc & 0xf0000000) | (op->j.imm << 2), 31, 0, true);
+ lightrec_emit_end_of_block(state, block, offset, -1,
+ (block->pc & 0xf0000000) | (c.j.imm << 2),
+ 31, 0, true);
}
-static void rec_JAL(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_JAL(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
+ union code c = block->opcode_list[offset].c;
+
_jit_name(block->_jit, __func__);
- lightrec_emit_end_of_block(block, op, pc, -1,
- (pc & 0xf0000000) | (op->j.imm << 2),
- 31, pc + 8, true);
+ lightrec_emit_end_of_block(state, block, offset, -1,
+ (block->pc & 0xf0000000) | (c.j.imm << 2),
+ 31, get_branch_pc(block, offset, 2), true);
}
-static void rec_b(const struct block *block, const struct opcode *op, u32 pc,
+static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 offset,
jit_code_t code, u32 link, bool unconditional, bool bz)
{
- struct regcache *reg_cache = block->state->reg_cache;
+ struct regcache *reg_cache = state->reg_cache;
struct native_register *regs_backup;
jit_state_t *_jit = block->_jit;
struct lightrec_branch *branch;
+ const struct opcode *op = &block->opcode_list[offset],
+ *next = &block->opcode_list[offset + 1];
jit_node_t *addr;
u8 link_reg;
- u32 offset, cycles = block->state->cycles;
+ u32 target_offset, cycles = state->cycles;
bool is_forward = (s16)op->i.imm >= -1;
+ u32 next_pc;
jit_note(__FILE__, __LINE__);
if (!(op->flags & LIGHTREC_NO_DS))
- cycles += lightrec_cycles_of_opcode(op->next->c);
+ cycles += lightrec_cycles_of_opcode(next->c);
- block->state->cycles = 0;
+ state->cycles = 0;
if (cycles)
jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles);
if (!unconditional) {
- u8 rs = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->i.rs),
- rt = bz ? 0 : lightrec_alloc_reg_in_ext(reg_cache,
- _jit, op->i.rt);
+ u8 rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs, REG_EXT),
+ rt = bz ? 0 : lightrec_alloc_reg_in(reg_cache,
+ _jit, op->i.rt, REG_EXT);
/* Generate the branch opcode */
addr = jit_new_node_pww(code, NULL, rs, rt);
}
if (op->flags & LIGHTREC_LOCAL_BRANCH) {
- if (op->next && !(op->flags & LIGHTREC_NO_DS)) {
+ if (next && !(op->flags & LIGHTREC_NO_DS)) {
/* Recompile the delay slot */
- if (op->next->opcode)
- lightrec_rec_opcode(block, op->next, pc + 4);
+ if (next->opcode)
+ lightrec_rec_opcode(state, block, offset + 1);
}
if (link) {
/* Update the $ra register */
- link_reg = lightrec_alloc_reg_out(reg_cache, _jit, 31);
+ link_reg = lightrec_alloc_reg_out(reg_cache, _jit, 31, 0);
jit_movi(link_reg, link);
lightrec_free_reg(reg_cache, link_reg);
}
/* Store back remaining registers */
lightrec_storeback_regs(reg_cache, _jit);
- offset = op->offset + 1 + (s16)op->i.imm;
- pr_debug("Adding local branch to offset 0x%x\n", offset << 2);
- branch = &block->state->local_branches[
- block->state->nb_local_branches++];
+ target_offset = offset + 1 + (s16)op->i.imm
+ - !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS));
+ pr_debug("Adding local branch to offset 0x%x\n",
+ target_offset << 2);
+ branch = &state->local_branches[
+ state->nb_local_branches++];
- branch->target = offset;
+ branch->target = target_offset;
if (is_forward)
branch->branch = jit_jmpi();
else
}
if (!(op->flags & LIGHTREC_LOCAL_BRANCH) || !is_forward) {
- lightrec_emit_end_of_block(block, op, pc, -1,
- pc + 4 + ((s16)op->i.imm << 2),
+ next_pc = get_branch_pc(block, offset, 1 + (s16)op->i.imm);
+ lightrec_emit_end_of_block(state, block, offset, -1, next_pc,
31, link, false);
}
if (bz && link) {
/* Update the $ra register */
- link_reg = lightrec_alloc_reg_out_ext(reg_cache,
- _jit, 31);
+ link_reg = lightrec_alloc_reg_out(reg_cache, _jit,
+ 31, REG_EXT);
jit_movi(link_reg, (s32)link);
lightrec_free_reg(reg_cache, link_reg);
}
- if (!(op->flags & LIGHTREC_NO_DS) && op->next->opcode)
- lightrec_rec_opcode(block, op->next, pc + 4);
+ if (!(op->flags & LIGHTREC_NO_DS) && next->opcode)
+ lightrec_rec_opcode(state, block, offset + 1);
}
}
-static void rec_BNE(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_BNE(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
+ union code c = block->opcode_list[offset].c;
+
_jit_name(block->_jit, __func__);
- rec_b(block, op, pc, jit_code_beqr, 0, false, false);
+
+ if (c.i.rt == 0)
+ rec_b(state, block, offset, jit_code_beqi, 0, false, true);
+ else
+ rec_b(state, block, offset, jit_code_beqr, 0, false, false);
}
-static void rec_BEQ(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_BEQ(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
+ union code c = block->opcode_list[offset].c;
+
_jit_name(block->_jit, __func__);
- rec_b(block, op, pc, jit_code_bner, 0,
- op->i.rs == op->i.rt, false);
+
+ if (c.i.rt == 0)
+ rec_b(state, block, offset, jit_code_bnei, 0, c.i.rs == 0, true);
+ else
+ rec_b(state, block, offset, jit_code_bner, 0, c.i.rs == c.i.rt, false);
}
-static void rec_BLEZ(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_BLEZ(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
+ union code c = block->opcode_list[offset].c;
+
_jit_name(block->_jit, __func__);
- rec_b(block, op, pc, jit_code_bgti, 0, op->i.rs == 0, true);
+ rec_b(state, block, offset, jit_code_bgti, 0, c.i.rs == 0, true);
}
-static void rec_BGTZ(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_BGTZ(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_b(block, op, pc, jit_code_blei, 0, false, true);
+ rec_b(state, block, offset, jit_code_blei, 0, false, true);
}
-static void rec_regimm_BLTZ(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_regimm_BLTZ(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_b(block, op, pc, jit_code_bgei, 0, false, true);
+ rec_b(state, block, offset, jit_code_bgei, 0, false, true);
}
-static void rec_regimm_BLTZAL(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_regimm_BLTZAL(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_b(block, op, pc, jit_code_bgei, pc + 8, false, true);
+ rec_b(state, block, offset, jit_code_bgei,
+ get_branch_pc(block, offset, 2), false, true);
}
-static void rec_regimm_BGEZ(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_regimm_BGEZ(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
+ union code c = block->opcode_list[offset].c;
+
_jit_name(block->_jit, __func__);
- rec_b(block, op, pc, jit_code_blti, 0, !op->i.rs, true);
+ rec_b(state, block, offset, jit_code_blti, 0, !c.i.rs, true);
}
-static void rec_regimm_BGEZAL(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_regimm_BGEZAL(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
+ const struct opcode *op = &block->opcode_list[offset];
_jit_name(block->_jit, __func__);
- rec_b(block, op, pc, jit_code_blti, pc + 8, !op->i.rs, true);
+ rec_b(state, block, offset, jit_code_blti,
+ get_branch_pc(block, offset, 2),
+ !op->i.rs, true);
}
-static void rec_alu_imm(const struct block *block, const struct opcode *op,
- jit_code_t code, bool sign_extend)
+static void rec_alu_imm(struct lightrec_cstate *state, const struct block *block,
+ u16 offset, jit_code_t code, bool slti)
{
- struct regcache *reg_cache = block->state->reg_cache;
+ struct regcache *reg_cache = state->reg_cache;
+ union code c = block->opcode_list[offset].c;
jit_state_t *_jit = block->_jit;
- u8 rs, rt;
+ u8 rs, rt, out_flags = REG_EXT;
+
+ if (slti)
+ out_flags |= REG_ZEXT;
jit_note(__FILE__, __LINE__);
- rs = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->i.rs);
- rt = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->i.rt);
+ rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, REG_EXT);
+ rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, out_flags);
- if (sign_extend)
- jit_new_node_www(code, rt, rs, (s32)(s16) op->i.imm);
- else
- jit_new_node_www(code, rt, rs, (u32)(u16) op->i.imm);
+ jit_new_node_www(code, rt, rs, (s32)(s16) c.i.imm);
lightrec_free_reg(reg_cache, rs);
lightrec_free_reg(reg_cache, rt);
}
-static void rec_alu_special(const struct block *block, const struct opcode *op,
- jit_code_t code, bool out_ext)
+static void rec_alu_special(struct lightrec_cstate *state, const struct block *block,
+ u16 offset, jit_code_t code, bool out_ext)
{
- struct regcache *reg_cache = block->state->reg_cache;
+ struct regcache *reg_cache = state->reg_cache;
+ union code c = block->opcode_list[offset].c;
jit_state_t *_jit = block->_jit;
u8 rd, rt, rs;
jit_note(__FILE__, __LINE__);
- rs = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rs);
- rt = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rt);
-
- if (out_ext)
- rd = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->r.rd);
- else
- rd = lightrec_alloc_reg_out(reg_cache, _jit, op->r.rd);
+ rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, REG_EXT);
+ rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, REG_EXT);
+ rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd,
+ out_ext ? REG_EXT | REG_ZEXT : 0);
jit_new_node_www(code, rd, rs, rt);
lightrec_free_reg(reg_cache, rd);
}
-static void rec_alu_shiftv(const struct block *block,
- const struct opcode *op, jit_code_t code)
+static void rec_alu_shiftv(struct lightrec_cstate *state, const struct block *block,
+ u16 offset, jit_code_t code)
{
- struct regcache *reg_cache = block->state->reg_cache;
+ struct regcache *reg_cache = state->reg_cache;
+ union code c = block->opcode_list[offset].c;
jit_state_t *_jit = block->_jit;
- u8 rd, rt, rs, temp;
+ u8 rd, rt, rs, temp, flags = 0;
jit_note(__FILE__, __LINE__);
- rs = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rs);
- temp = lightrec_alloc_reg_temp(reg_cache, _jit);
+ rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
- if (code == jit_code_rshr) {
- rt = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rt);
- rd = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->r.rd);
- } else {
- rt = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rt);
- rd = lightrec_alloc_reg_out(reg_cache, _jit, op->r.rd);
- }
+ if (code == jit_code_rshr)
+ flags = REG_EXT;
+ else if (code == jit_code_rshr_u)
+ flags = REG_ZEXT;
- jit_andi(temp, rs, 0x1f);
-
-#if __WORDSIZE == 64
- if (code == jit_code_rshr_u) {
- jit_extr_ui(rd, rt);
- jit_new_node_www(code, rd, rd, temp);
- }
-#endif
+ rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, flags);
+ rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, flags);
- if (__WORDSIZE == 32 || code != jit_code_rshr_u)
+ if (rs != rd && rt != rd) {
+ jit_andi(rd, rs, 0x1f);
+ jit_new_node_www(code, rd, rt, rd);
+ } else {
+ temp = lightrec_alloc_reg_temp(reg_cache, _jit);
+ jit_andi(temp, rs, 0x1f);
jit_new_node_www(code, rd, rt, temp);
+ lightrec_free_reg(reg_cache, temp);
+ }
lightrec_free_reg(reg_cache, rs);
- lightrec_free_reg(reg_cache, temp);
lightrec_free_reg(reg_cache, rt);
lightrec_free_reg(reg_cache, rd);
}
-static void rec_ADDIU(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_ADDIU(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_alu_imm(block, op, jit_code_addi, true);
+ rec_alu_imm(state, block, offset, jit_code_addi, false);
}
-static void rec_ADDI(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_ADDI(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
/* TODO: Handle the exception? */
_jit_name(block->_jit, __func__);
- rec_alu_imm(block, op, jit_code_addi, true);
+ rec_alu_imm(state, block, offset, jit_code_addi, false);
}
-static void rec_SLTIU(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_SLTIU(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_alu_imm(block, op, jit_code_lti_u, true);
+ rec_alu_imm(state, block, offset, jit_code_lti_u, true);
}
-static void rec_SLTI(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_SLTI(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_alu_imm(block, op, jit_code_lti, true);
+ rec_alu_imm(state, block, offset, jit_code_lti, true);
}
-static void rec_ANDI(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_ANDI(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
- struct regcache *reg_cache = block->state->reg_cache;
+ struct regcache *reg_cache = state->reg_cache;
+ union code c = block->opcode_list[offset].c;
jit_state_t *_jit = block->_jit;
u8 rs, rt;
_jit_name(block->_jit, __func__);
jit_note(__FILE__, __LINE__);
- rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs);
- rt = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->i.rt);
+ rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
+ rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt,
+ REG_EXT | REG_ZEXT);
/* PSX code uses ANDI 0xff / ANDI 0xffff a lot, which are basically
* casts to uint8_t / uint16_t. */
- if (op->i.imm == 0xff)
+ if (c.i.imm == 0xff)
jit_extr_uc(rt, rs);
- else if (op->i.imm == 0xffff)
+ else if (c.i.imm == 0xffff)
jit_extr_us(rt, rs);
else
- jit_andi(rt, rs, (u32)(u16) op->i.imm);
+ jit_andi(rt, rs, (u32)(u16) c.i.imm);
+
+ lightrec_free_reg(reg_cache, rs);
+ lightrec_free_reg(reg_cache, rt);
+}
+
+static void rec_alu_or_xor(struct lightrec_cstate *state, const struct block *block,
+ u16 offset, jit_code_t code)
+{
+ struct regcache *reg_cache = state->reg_cache;
+ union code c = block->opcode_list[offset].c;
+ jit_state_t *_jit = block->_jit;
+ u8 rs, rt, flags;
+
+ jit_note(__FILE__, __LINE__);
+ rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
+ rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, 0);
+
+ flags = lightrec_get_reg_in_flags(reg_cache, rs);
+ lightrec_set_reg_out_flags(reg_cache, rt, flags);
+
+ jit_new_node_www(code, rt, rs, (u32)(u16) c.i.imm);
lightrec_free_reg(reg_cache, rs);
lightrec_free_reg(reg_cache, rt);
}
-static void rec_ORI(const struct block *block, const struct opcode *op, u32 pc)
+
+static void rec_ORI(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_alu_imm(block, op, jit_code_ori, false);
+ rec_alu_or_xor(state, block, offset, jit_code_ori);
}
-static void rec_XORI(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_XORI(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_alu_imm(block, op, jit_code_xori, false);
+ rec_alu_or_xor(state, block, offset, jit_code_xori);
}
-static void rec_LUI(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_LUI(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
- struct regcache *reg_cache = block->state->reg_cache;
+ struct regcache *reg_cache = state->reg_cache;
+ union code c = block->opcode_list[offset].c;
jit_state_t *_jit = block->_jit;
- u8 rt;
+ u8 rt, flags = REG_EXT;
jit_name(__func__);
jit_note(__FILE__, __LINE__);
- rt = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->i.rt);
- jit_movi(rt, (s32)(op->i.imm << 16));
+ if (!(c.i.imm & BIT(15)))
+ flags |= REG_ZEXT;
+
+ rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags);
+
+ jit_movi(rt, (s32)(c.i.imm << 16));
lightrec_free_reg(reg_cache, rt);
}
-static void rec_special_ADDU(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_ADDU(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_alu_special(block, op, jit_code_addr, false);
+ rec_alu_special(state, block, offset, jit_code_addr, false);
}
-static void rec_special_ADD(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_ADD(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
/* TODO: Handle the exception? */
_jit_name(block->_jit, __func__);
- rec_alu_special(block, op, jit_code_addr, false);
+ rec_alu_special(state, block, offset, jit_code_addr, false);
}
-static void rec_special_SUBU(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_SUBU(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_alu_special(block, op, jit_code_subr, false);
+ rec_alu_special(state, block, offset, jit_code_subr, false);
}
-static void rec_special_SUB(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_SUB(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
/* TODO: Handle the exception? */
_jit_name(block->_jit, __func__);
- rec_alu_special(block, op, jit_code_subr, false);
+ rec_alu_special(state, block, offset, jit_code_subr, false);
}
-static void rec_special_AND(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_AND(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
+ struct regcache *reg_cache = state->reg_cache;
+ union code c = block->opcode_list[offset].c;
+ jit_state_t *_jit = block->_jit;
+ u8 rd, rt, rs, flags_rs, flags_rt, flags_rd;
+
_jit_name(block->_jit, __func__);
- rec_alu_special(block, op, jit_code_andr, false);
+ jit_note(__FILE__, __LINE__);
+ rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
+ rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0);
+ rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, 0);
+
+ flags_rs = lightrec_get_reg_in_flags(reg_cache, rs);
+ flags_rt = lightrec_get_reg_in_flags(reg_cache, rt);
+
+ /* Z(rd) = Z(rs) | Z(rt) */
+ flags_rd = REG_ZEXT & (flags_rs | flags_rt);
+
+ /* E(rd) = (E(rt) & Z(rt)) | (E(rs) & Z(rs)) | (E(rs) & E(rt)) */
+ if (((flags_rs & REG_EXT) && (flags_rt & REG_ZEXT)) ||
+ ((flags_rt & REG_EXT) && (flags_rs & REG_ZEXT)) ||
+ (REG_EXT & flags_rs & flags_rt))
+ flags_rd |= REG_EXT;
+
+ lightrec_set_reg_out_flags(reg_cache, rd, flags_rd);
+
+ jit_andr(rd, rs, rt);
+
+ lightrec_free_reg(reg_cache, rs);
+ lightrec_free_reg(reg_cache, rt);
+ lightrec_free_reg(reg_cache, rd);
}
-static void rec_special_OR(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_or_nor(struct lightrec_cstate *state,
+ const struct block *block, u16 offset, bool nor)
+{
+ struct regcache *reg_cache = state->reg_cache;
+ union code c = block->opcode_list[offset].c;
+ jit_state_t *_jit = block->_jit;
+ u8 rd, rt, rs, flags_rs, flags_rt, flags_rd = 0;
+
+ jit_note(__FILE__, __LINE__);
+ rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
+ rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0);
+ rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, 0);
+
+ flags_rs = lightrec_get_reg_in_flags(reg_cache, rs);
+ flags_rt = lightrec_get_reg_in_flags(reg_cache, rt);
+
+ /* or: Z(rd) = Z(rs) & Z(rt)
+ * nor: Z(rd) = 0 */
+ if (!nor)
+ flags_rd = REG_ZEXT & flags_rs & flags_rt;
+
+ /* E(rd) = (E(rs) & E(rt)) | (E(rt) & !Z(rt)) | (E(rs) & !Z(rs)) */
+ if ((REG_EXT & flags_rs & flags_rt) ||
+ (flags_rt & (REG_EXT | REG_ZEXT) == REG_EXT) ||
+ (flags_rs & (REG_EXT | REG_ZEXT) == REG_EXT))
+ flags_rd |= REG_EXT;
+
+ lightrec_set_reg_out_flags(reg_cache, rd, flags_rd);
+
+ jit_orr(rd, rs, rt);
+
+ if (nor)
+ jit_comr(rd, rd);
+
+ lightrec_free_reg(reg_cache, rs);
+ lightrec_free_reg(reg_cache, rt);
+ lightrec_free_reg(reg_cache, rd);
+}
+
+static void rec_special_OR(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_alu_special(block, op, jit_code_orr, false);
+ rec_special_or_nor(state, block, offset, false);
}
-static void rec_special_XOR(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_NOR(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_alu_special(block, op, jit_code_xorr, false);
+ rec_special_or_nor(state, block, offset, true);
}
-static void rec_special_NOR(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_XOR(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
- struct regcache *reg_cache = block->state->reg_cache;
+ struct regcache *reg_cache = state->reg_cache;
+ union code c = block->opcode_list[offset].c;
jit_state_t *_jit = block->_jit;
- u8 rd;
+ u8 rd, rt, rs, flags_rs, flags_rt, flags_rd;
- jit_name(__func__);
- rec_alu_special(block, op, jit_code_orr, false);
- rd = lightrec_alloc_reg_out(reg_cache, _jit, op->r.rd);
+ _jit_name(block->_jit, __func__);
+
+ jit_note(__FILE__, __LINE__);
+ rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
+ rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0);
+ rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, 0);
- jit_comr(rd, rd);
+ flags_rs = lightrec_get_reg_in_flags(reg_cache, rs);
+ flags_rt = lightrec_get_reg_in_flags(reg_cache, rt);
+ /* Z(rd) = Z(rs) & Z(rt) */
+ flags_rd = REG_ZEXT & flags_rs & flags_rt;
+
+ /* E(rd) = E(rs) & E(rt) */
+ flags_rd |= REG_EXT & flags_rs & flags_rt;
+
+ lightrec_set_reg_out_flags(reg_cache, rd, flags_rd);
+
+ jit_xorr(rd, rs, rt);
+
+ lightrec_free_reg(reg_cache, rs);
+ lightrec_free_reg(reg_cache, rt);
lightrec_free_reg(reg_cache, rd);
}
-static void rec_special_SLTU(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_SLTU(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_alu_special(block, op, jit_code_ltr_u, true);
+ rec_alu_special(state, block, offset, jit_code_ltr_u, true);
}
-static void rec_special_SLT(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_SLT(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_alu_special(block, op, jit_code_ltr, true);
+ rec_alu_special(state, block, offset, jit_code_ltr, true);
}
-static void rec_special_SLLV(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_SLLV(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_alu_shiftv(block, op, jit_code_lshr);
+ rec_alu_shiftv(state, block, offset, jit_code_lshr);
}
-static void rec_special_SRLV(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_SRLV(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_alu_shiftv(block, op, jit_code_rshr_u);
+ rec_alu_shiftv(state, block, offset, jit_code_rshr_u);
}
-static void rec_special_SRAV(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_SRAV(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_alu_shiftv(block, op, jit_code_rshr);
+ rec_alu_shiftv(state, block, offset, jit_code_rshr);
}
-static void rec_alu_shift(const struct block *block,
- const struct opcode *op, jit_code_t code)
+static void rec_alu_shift(struct lightrec_cstate *state, const struct block *block,
+ u16 offset, jit_code_t code)
{
- struct regcache *reg_cache = block->state->reg_cache;
+ struct regcache *reg_cache = state->reg_cache;
+ union code c = block->opcode_list[offset].c;
jit_state_t *_jit = block->_jit;
- u8 rd, rt;
+ u8 rd, rt, flags = 0;
jit_note(__FILE__, __LINE__);
- if (code == jit_code_rshi) {
- rt = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rt);
- rd = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->r.rd);
- } else {
- rt = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rt);
- rd = lightrec_alloc_reg_out(reg_cache, _jit, op->r.rd);
- }
+ if (code == jit_code_rshi)
+ flags = REG_EXT;
+ else if (code == jit_code_rshi_u)
+ flags = REG_ZEXT;
-#if __WORDSIZE == 64
- if (code == jit_code_rshi_u) {
- jit_extr_ui(rd, rt);
- jit_new_node_www(code, rd, rd, op->r.imm);
- }
-#endif
- if (__WORDSIZE == 32 || code != jit_code_rshi_u)
- jit_new_node_www(code, rd, rt, op->r.imm);
+ rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, flags);
+
+ /* Input reg is zero-extended, if we SRL at least by one bit, we know
+ * the output reg will be both zero-extended and sign-extended. */
+ if (code == jit_code_rshi_u && c.r.imm)
+ flags |= REG_EXT;
+ rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, flags);
+
+ jit_new_node_www(code, rd, rt, c.r.imm);
lightrec_free_reg(reg_cache, rt);
lightrec_free_reg(reg_cache, rd);
}
-static void rec_special_SLL(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_SLL(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_alu_shift(block, op, jit_code_lshi);
+ rec_alu_shift(state, block, offset, jit_code_lshi);
}
-static void rec_special_SRL(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_SRL(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_alu_shift(block, op, jit_code_rshi_u);
+ rec_alu_shift(state, block, offset, jit_code_rshi_u);
}
-static void rec_special_SRA(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_SRA(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_alu_shift(block, op, jit_code_rshi);
+ rec_alu_shift(state, block, offset, jit_code_rshi);
}
-static void rec_alu_mult(const struct block *block,
- const struct opcode *op, bool is_signed)
+static void rec_alu_mult(struct lightrec_cstate *state,
+ const struct block *block, u16 offset, bool is_signed)
{
- struct regcache *reg_cache = block->state->reg_cache;
+ struct regcache *reg_cache = state->reg_cache;
+ union code c = block->opcode_list[offset].c;
+ u16 flags = block->opcode_list[offset].flags;
+ u8 reg_lo = get_mult_div_lo(c);
+ u8 reg_hi = get_mult_div_hi(c);
jit_state_t *_jit = block->_jit;
- u8 lo, hi, rs, rt;
+ u8 lo, hi, rs, rt, rflags = 0;
jit_note(__FILE__, __LINE__);
- lo = lightrec_alloc_reg_out(reg_cache, _jit, REG_LO);
- if (!(op->flags & LIGHTREC_MULT32))
- hi = lightrec_alloc_reg_out_ext(reg_cache, _jit, REG_HI);
- else if (__WORDSIZE == 64)
- hi = lightrec_alloc_reg_temp(reg_cache, _jit);
-
- if (__WORDSIZE == 32 || !is_signed) {
- rs = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rs);
- rt = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rt);
+ if (is_signed)
+ rflags = REG_EXT;
+ else
+ rflags = REG_ZEXT;
+
+ rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, rflags);
+ rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, rflags);
+
+ if (!(flags & LIGHTREC_NO_LO))
+ lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0);
+ else if (__WORDSIZE == 32)
+ lo = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+ if (!(flags & LIGHTREC_NO_HI))
+ hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, REG_EXT);
+
+ if (__WORDSIZE == 32) {
+ /* On 32-bit systems, do a 32*32->64 bit operation, or a 32*32->32 bit
+ * operation if the MULT was detected a 32-bit only. */
+ if (!(flags & LIGHTREC_NO_HI)) {
+ if (is_signed)
+ jit_qmulr(lo, hi, rs, rt);
+ else
+ jit_qmulr_u(lo, hi, rs, rt);
+ } else {
+ jit_mulr(lo, rs, rt);
+ }
} else {
- rs = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rs);
- rt = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rt);
- }
+ /* On 64-bit systems, do a 64*64->64 bit operation. */
+ if (flags & LIGHTREC_NO_LO) {
+ jit_mulr(hi, rs, rt);
+ jit_rshi(hi, hi, 32);
+ } else {
+ jit_mulr(lo, rs, rt);
-#if __WORDSIZE == 32
- /* On 32-bit systems, do a 32*32->64 bit operation, or a 32*32->32 bit
- * operation if the MULT was detected a 32-bit only. */
- if (!(op->flags & LIGHTREC_MULT32)) {
- if (is_signed)
- jit_qmulr(lo, hi, rs, rt);
- else
- jit_qmulr_u(lo, hi, rs, rt);
- } else {
- jit_mulr(lo, rs, rt);
- }
-#else
- /* On 64-bit systems, do a 64*64->64 bit operation.
- * The input registers must be 32 bits, so we first sign-extend (if
- * mult) or clear (if multu) the input registers. */
- if (is_signed) {
- jit_mulr(lo, rs, rt);
- } else {
- jit_extr_ui(lo, rt);
- jit_extr_ui(hi, rs);
- jit_mulr(lo, hi, lo);
+ /* The 64-bit output value is in $lo, store the upper 32 bits in $hi */
+ if (!(flags & LIGHTREC_NO_HI))
+ jit_rshi(hi, lo, 32);
+ }
}
- /* The 64-bit output value is in $lo, store the upper 32 bits in $hi */
- if (!(op->flags & LIGHTREC_MULT32))
- jit_rshi(hi, lo, 32);
-#endif
-
lightrec_free_reg(reg_cache, rs);
lightrec_free_reg(reg_cache, rt);
- lightrec_free_reg(reg_cache, lo);
- if (__WORDSIZE == 64 || !(op->flags & LIGHTREC_MULT32))
+ if (!(flags & LIGHTREC_NO_LO) || __WORDSIZE == 32)
+ lightrec_free_reg(reg_cache, lo);
+ if (!(flags & LIGHTREC_NO_HI))
lightrec_free_reg(reg_cache, hi);
}
-static void rec_alu_div(const struct block *block,
- const struct opcode *op, bool is_signed)
+static void rec_alu_div(struct lightrec_cstate *state,
+ const struct block *block, u16 offset, bool is_signed)
{
- struct regcache *reg_cache = block->state->reg_cache;
+ struct regcache *reg_cache = state->reg_cache;
+ union code c = block->opcode_list[offset].c;
+ u16 flags = block->opcode_list[offset].flags;
+ bool no_check = flags & LIGHTREC_NO_DIV_CHECK;
+ u8 reg_lo = get_mult_div_lo(c);
+ u8 reg_hi = get_mult_div_hi(c);
jit_state_t *_jit = block->_jit;
jit_node_t *branch, *to_end;
- u8 lo, hi, rs, rt;
+ u8 lo, hi, rs, rt, rflags = 0;
jit_note(__FILE__, __LINE__);
- lo = lightrec_alloc_reg_out(reg_cache, _jit, REG_LO);
- hi = lightrec_alloc_reg_out(reg_cache, _jit, REG_HI);
- if (__WORDSIZE == 32 || !is_signed) {
- rs = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rs);
- rt = lightrec_alloc_reg_in(reg_cache, _jit, op->r.rt);
- } else {
- rs = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rs);
- rt = lightrec_alloc_reg_in_ext(reg_cache, _jit, op->r.rt);
- }
+ if (is_signed)
+ rflags = REG_EXT;
+ else
+ rflags = REG_ZEXT;
+
+ rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, rflags);
+ rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, rflags);
+
+ if (!(flags & LIGHTREC_NO_LO))
+ lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0);
+
+ if (!(flags & LIGHTREC_NO_HI))
+ hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, 0);
/* Jump to special handler if dividing by zero */
- branch = jit_beqi(rt, 0);
+ if (!no_check)
+ branch = jit_beqi(rt, 0);
-#if __WORDSIZE == 32
- if (is_signed)
- jit_qdivr(lo, hi, rs, rt);
- else
- jit_qdivr_u(lo, hi, rs, rt);
-#else
- /* On 64-bit systems, the input registers must be 32 bits, so we first sign-extend
- * (if div) or clear (if divu) the input registers. */
- if (is_signed) {
- jit_qdivr(lo, hi, rs, rt);
+ if (flags & LIGHTREC_NO_LO) {
+ if (is_signed)
+ jit_remr(hi, rs, rt);
+ else
+ jit_remr_u(hi, rs, rt);
+ } else if (flags & LIGHTREC_NO_HI) {
+ if (is_signed)
+ jit_divr(lo, rs, rt);
+ else
+ jit_divr_u(lo, rs, rt);
} else {
- jit_extr_ui(lo, rt);
- jit_extr_ui(hi, rs);
- jit_qdivr_u(lo, hi, hi, lo);
+ if (is_signed)
+ jit_qdivr(lo, hi, rs, rt);
+ else
+ jit_qdivr_u(lo, hi, rs, rt);
}
-#endif
- /* Jump above the div-by-zero handler */
- to_end = jit_jmpi();
+ if (!no_check) {
+ lightrec_regcache_mark_live(reg_cache, _jit);
- jit_patch(branch);
+ /* Jump above the div-by-zero handler */
+ to_end = jit_jmpi();
- if (is_signed) {
- jit_lti(lo, rs, 0);
- jit_lshi(lo, lo, 1);
- jit_subi(lo, lo, 1);
- } else {
- jit_movi(lo, 0xffffffff);
- }
+ jit_patch(branch);
+
+ if (!(flags & LIGHTREC_NO_LO)) {
+ if (is_signed) {
+ jit_lti(lo, rs, 0);
+ jit_lshi(lo, lo, 1);
+ jit_subi(lo, lo, 1);
+ } else {
+ jit_movi(lo, 0xffffffff);
+ }
+ }
- jit_movr(hi, rs);
+ if (!(flags & LIGHTREC_NO_HI))
+ jit_movr(hi, rs);
- jit_patch(to_end);
+ jit_patch(to_end);
+ }
lightrec_free_reg(reg_cache, rs);
lightrec_free_reg(reg_cache, rt);
- lightrec_free_reg(reg_cache, lo);
- lightrec_free_reg(reg_cache, hi);
+
+ if (!(flags & LIGHTREC_NO_LO))
+ lightrec_free_reg(reg_cache, lo);
+
+ if (!(flags & LIGHTREC_NO_HI))
+ lightrec_free_reg(reg_cache, hi);
}
-static void rec_special_MULT(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_MULT(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_alu_mult(block, op, true);
+ rec_alu_mult(state, block, offset, true);
}
-static void rec_special_MULTU(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_MULTU(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_alu_mult(block, op, false);
+ rec_alu_mult(state, block, offset, false);
}
-static void rec_special_DIV(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_DIV(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_alu_div(block, op, true);
+ rec_alu_div(state, block, offset, true);
}
-static void rec_special_DIVU(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_DIVU(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_alu_div(block, op, false);
+ rec_alu_div(state, block, offset, false);
}
-static void rec_alu_mv_lo_hi(const struct block *block, u8 dst, u8 src)
+static void rec_alu_mv_lo_hi(struct lightrec_cstate *state,
+ const struct block *block, u8 dst, u8 src)
{
- struct regcache *reg_cache = block->state->reg_cache;
+ struct regcache *reg_cache = state->reg_cache;
jit_state_t *_jit = block->_jit;
jit_note(__FILE__, __LINE__);
- src = lightrec_alloc_reg_in(reg_cache, _jit, src);
- dst = lightrec_alloc_reg_out_ext(reg_cache, _jit, dst);
+ src = lightrec_alloc_reg_in(reg_cache, _jit, src, 0);
+ dst = lightrec_alloc_reg_out(reg_cache, _jit, dst, REG_EXT);
-#if __WORDSIZE == 32
- jit_movr(dst, src);
-#else
jit_extr_i(dst, src);
-#endif
lightrec_free_reg(reg_cache, src);
lightrec_free_reg(reg_cache, dst);
}
-static void rec_special_MFHI(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_MFHI(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
+ union code c = block->opcode_list[offset].c;
+
_jit_name(block->_jit, __func__);
- rec_alu_mv_lo_hi(block, op->r.rd, REG_HI);
+ rec_alu_mv_lo_hi(state, block, c.r.rd, REG_HI);
}
-static void rec_special_MTHI(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_MTHI(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
+ union code c = block->opcode_list[offset].c;
+
_jit_name(block->_jit, __func__);
- rec_alu_mv_lo_hi(block, REG_HI, op->r.rs);
+ rec_alu_mv_lo_hi(state, block, REG_HI, c.r.rs);
}
-static void rec_special_MFLO(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_MFLO(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
+ union code c = block->opcode_list[offset].c;
+
_jit_name(block->_jit, __func__);
- rec_alu_mv_lo_hi(block, op->r.rd, REG_LO);
+ rec_alu_mv_lo_hi(state, block, c.r.rd, REG_LO);
}
-static void rec_special_MTLO(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_MTLO(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
+ union code c = block->opcode_list[offset].c;
+
_jit_name(block->_jit, __func__);
- rec_alu_mv_lo_hi(block, REG_LO, op->r.rs);
+ rec_alu_mv_lo_hi(state, block, REG_LO, c.r.rs);
}
-static void rec_io(const struct block *block, const struct opcode *op,
- bool load_rt, bool read_rt)
+static void call_to_c_wrapper(struct lightrec_cstate *state, const struct block *block,
+ u32 arg, bool with_arg, enum c_wrappers wrapper)
{
- struct regcache *reg_cache = block->state->reg_cache;
+ struct regcache *reg_cache = state->reg_cache;
jit_state_t *_jit = block->_jit;
- bool is_tagged = op->flags & (LIGHTREC_HW_IO | LIGHTREC_DIRECT_IO);
- u32 offset;
u8 tmp, tmp2, tmp3;
- jit_note(__FILE__, __LINE__);
+ if (with_arg)
+ tmp3 = lightrec_alloc_reg(reg_cache, _jit, JIT_R1);
+ tmp2 = lightrec_alloc_reg(reg_cache, _jit, JIT_R0);
+ tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
- tmp = lightrec_alloc_reg(reg_cache, _jit, JIT_R0);
+ jit_ldxi(tmp, LIGHTREC_REG_STATE,
+ offsetof(struct lightrec_state, c_wrapper));
+ jit_ldxi(tmp2, LIGHTREC_REG_STATE,
+ offsetof(struct lightrec_state, c_wrappers[wrapper]));
+ if (with_arg)
+ jit_movi(tmp3, arg);
- if (is_tagged) {
- offset = offsetof(struct lightrec_state, rw_func);
- } else {
- tmp3 = lightrec_alloc_reg(reg_cache, _jit, JIT_R1);
- offset = offsetof(struct lightrec_state, rw_generic_func);
- }
+ jit_callr(tmp);
- tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
- jit_ldxi(tmp2, LIGHTREC_REG_STATE, offset);
+ lightrec_free_reg(reg_cache, tmp);
+ lightrec_free_reg(reg_cache, tmp2);
+ if (with_arg)
+ lightrec_free_reg(reg_cache, tmp3);
+ lightrec_regcache_mark_live(reg_cache, _jit);
+}
+
+static void rec_io(struct lightrec_cstate *state,
+ const struct block *block, u16 offset,
+ bool load_rt, bool read_rt)
+{
+ struct regcache *reg_cache = state->reg_cache;
+ jit_state_t *_jit = block->_jit;
+ union code c = block->opcode_list[offset].c;
+ u16 flags = block->opcode_list[offset].flags;
+ bool is_tagged = flags & (LIGHTREC_HW_IO | LIGHTREC_DIRECT_IO);
+ u32 lut_entry;
- lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rs, false);
+ jit_note(__FILE__, __LINE__);
- if (read_rt && likely(op->i.rt))
- lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rt, true);
+ lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rs, false);
+
+ if (read_rt && likely(c.i.rt))
+ lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, true);
else if (load_rt)
- lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rt, false);
+ lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, false);
if (is_tagged) {
- jit_movi(tmp, op->opcode);
+ call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_RW);
} else {
- jit_movi(tmp, (uintptr_t)op);
- jit_movi(tmp3, (uintptr_t)block);
+ lut_entry = lightrec_get_lut_entry(block);
+ call_to_c_wrapper(state, block, (lut_entry << 16) | offset,
+ true, C_WRAPPER_RW_GENERIC);
}
-
- jit_callr(tmp2);
-
- lightrec_free_reg(reg_cache, tmp);
- lightrec_free_reg(reg_cache, tmp2);
- if (!is_tagged)
- lightrec_free_reg(reg_cache, tmp3);
- lightrec_regcache_mark_live(reg_cache, _jit);
}
-static void rec_store_direct_no_invalidate(const struct block *block,
- const struct opcode *op,
- jit_code_t code)
+static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate,
+ const struct block *block,
+ u16 offset, jit_code_t code)
{
- struct lightrec_state *state = block->state;
- struct regcache *reg_cache = state->reg_cache;
+ struct lightrec_state *state = cstate->state;
+ struct regcache *reg_cache = cstate->reg_cache;
+ union code c = block->opcode_list[offset].c;
jit_state_t *_jit = block->_jit;
jit_node_t *to_not_ram, *to_end;
u8 tmp, tmp2, rs, rt;
s16 imm;
jit_note(__FILE__, __LINE__);
- rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs);
+ rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
- tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+ if (state->offset_ram || state->offset_scratch)
+ tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
/* Convert to KUNSEG and avoid RAM mirrors */
if (state->mirrors_mapped) {
- imm = (s16)op->i.imm;
+ imm = (s16)c.i.imm;
jit_andi(tmp, rs, 0x1f800000 | (4 * RAM_SIZE - 1));
- } else if (op->i.imm) {
+ } else if (c.i.imm) {
imm = 0;
- jit_addi(tmp, rs, (s16)op->i.imm);
+ jit_addi(tmp, rs, (s16)c.i.imm);
jit_andi(tmp, tmp, 0x1f800000 | (RAM_SIZE - 1));
} else {
imm = 0;
if (state->offset_ram != state->offset_scratch) {
to_not_ram = jit_bmsi(tmp, BIT(28));
+ lightrec_regcache_mark_live(reg_cache, _jit);
+
jit_movi(tmp2, state->offset_ram);
to_end = jit_jmpi();
jit_movi(tmp2, state->offset_ram);
}
- if (state->offset_ram || state->offset_scratch)
+ if (state->offset_ram || state->offset_scratch) {
jit_addr(tmp, tmp, tmp2);
+ lightrec_free_reg(reg_cache, tmp2);
+ }
- lightrec_free_reg(reg_cache, tmp2);
-
- rt = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rt);
+ rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
jit_new_node_www(code, imm, tmp, rt);
lightrec_free_reg(reg_cache, rt);
lightrec_free_reg(reg_cache, tmp);
}
-static void rec_store_direct(const struct block *block, const struct opcode *op,
- jit_code_t code)
+static void rec_store_direct(struct lightrec_cstate *cstate, const struct block *block,
+ u16 offset, jit_code_t code)
{
- struct lightrec_state *state = block->state;
- struct regcache *reg_cache = state->reg_cache;
+ struct lightrec_state *state = cstate->state;
+ u32 ram_size = state->mirrors_mapped ? RAM_SIZE * 4 : RAM_SIZE;
+ struct regcache *reg_cache = cstate->reg_cache;
+ union code c = block->opcode_list[offset].c;
jit_state_t *_jit = block->_jit;
- jit_node_t *to_not_ram, *to_end = 0;
+ jit_node_t *to_not_ram, *to_end;
u8 tmp, tmp2, tmp3, rs, rt;
jit_note(__FILE__, __LINE__);
- rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs);
+ rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
- tmp3 = lightrec_alloc_reg_in(reg_cache, _jit, 0);
+ tmp3 = lightrec_alloc_reg_in(reg_cache, _jit, 0, 0);
/* Convert to KUNSEG and avoid RAM mirrors */
- if (op->i.imm) {
- jit_addi(tmp2, rs, (s16)op->i.imm);
- jit_andi(tmp2, tmp2, 0x1f800000 | (RAM_SIZE - 1));
+ if (c.i.imm) {
+ jit_addi(tmp2, rs, (s16)c.i.imm);
+ jit_andi(tmp2, tmp2, 0x1f800000 | (ram_size - 1));
} else {
- jit_andi(tmp2, rs, 0x1f800000 | (RAM_SIZE - 1));
+ jit_andi(tmp2, rs, 0x1f800000 | (ram_size - 1));
}
lightrec_free_reg(reg_cache, rs);
tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
- to_not_ram = jit_bgti(tmp2, RAM_SIZE);
+ to_not_ram = jit_bgti(tmp2, ram_size);
+
+ lightrec_regcache_mark_live(reg_cache, _jit);
/* Compute the offset to the code LUT */
jit_andi(tmp, tmp2, (RAM_SIZE - 1) & ~3);
-#if __WORDSIZE == 64
- jit_lshi(tmp, tmp, 1);
-#endif
+ if (__WORDSIZE == 64)
+ jit_lshi(tmp, tmp, 1);
jit_addr(tmp, LIGHTREC_REG_STATE, tmp);
/* Write NULL to the code LUT to invalidate any block that's there */
lightrec_free_reg(reg_cache, tmp);
lightrec_free_reg(reg_cache, tmp3);
- rt = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rt);
+ rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
jit_new_node_www(code, 0, tmp2, rt);
lightrec_free_reg(reg_cache, rt);
lightrec_free_reg(reg_cache, tmp2);
}
-static void rec_store(const struct block *block, const struct opcode *op,
- jit_code_t code)
+static void rec_store(struct lightrec_cstate *state,
+ const struct block *block, u16 offset, jit_code_t code)
{
- if (op->flags & LIGHTREC_NO_INVALIDATE) {
- rec_store_direct_no_invalidate(block, op, code);
- } else if (op->flags & LIGHTREC_DIRECT_IO) {
- if (block->state->invalidate_from_dma_only)
- rec_store_direct_no_invalidate(block, op, code);
+ u16 flags = block->opcode_list[offset].flags;
+
+ if (flags & LIGHTREC_NO_INVALIDATE) {
+ rec_store_direct_no_invalidate(state, block, offset, code);
+ } else if (flags & LIGHTREC_DIRECT_IO) {
+ if (state->state->invalidate_from_dma_only)
+ rec_store_direct_no_invalidate(state, block, offset, code);
else
- rec_store_direct(block, op, code);
+ rec_store_direct(state, block, offset, code);
} else {
- rec_io(block, op, true, false);
+ rec_io(state, block, offset, true, false);
}
}
-static void rec_SB(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_SB(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_store(block, op, jit_code_stxi_c);
+ rec_store(state, block, offset, jit_code_stxi_c);
}
-static void rec_SH(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_SH(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_store(block, op, jit_code_stxi_s);
+ rec_store(state, block, offset, jit_code_stxi_s);
}
-static void rec_SW(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_SW(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
+
{
_jit_name(block->_jit, __func__);
- rec_store(block, op, jit_code_stxi_i);
+ rec_store(state, block, offset, jit_code_stxi_i);
}
-static void rec_SWL(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_SWL(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_io(block, op, true, false);
+ rec_io(state, block, offset, true, false);
}
-static void rec_SWR(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_SWR(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_io(block, op, true, false);
+ rec_io(state, block, offset, true, false);
}
-static void rec_SWC2(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_SWC2(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_io(block, op, false, false);
+ rec_io(state, block, offset, false, false);
}
-static void rec_load_direct(const struct block *block, const struct opcode *op,
- jit_code_t code)
+static void rec_load_direct(struct lightrec_cstate *cstate, const struct block *block,
+ u16 offset, jit_code_t code, bool is_unsigned)
{
- struct lightrec_state *state = block->state;
- struct regcache *reg_cache = state->reg_cache;
+ struct lightrec_state *state = cstate->state;
+ struct regcache *reg_cache = cstate->reg_cache;
+ union code c = block->opcode_list[offset].c;
jit_state_t *_jit = block->_jit;
- jit_node_t *to_not_ram, *to_not_bios = 0, *to_end, *to_end2;
- u8 tmp, rs, rt, addr_reg;
+ jit_node_t *to_not_ram, *to_not_bios, *to_end, *to_end2;
+ u8 tmp, rs, rt, addr_reg, flags = REG_EXT;
s16 imm;
- if (!op->i.rt)
+ if (!c.i.rt)
return;
+ if (is_unsigned)
+ flags |= REG_ZEXT;
+
jit_note(__FILE__, __LINE__);
- rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs);
- rt = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->i.rt);
+ rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
+ rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags);
if ((state->offset_ram == state->offset_bios &&
state->offset_ram == state->offset_scratch &&
- state->mirrors_mapped) || !op->i.imm) {
+ state->mirrors_mapped) || !c.i.imm) {
addr_reg = rs;
- imm = (s16)op->i.imm;
+ imm = (s16)c.i.imm;
} else {
- jit_addi(rt, rs, (s16)op->i.imm);
+ jit_addi(rt, rs, (s16)c.i.imm);
addr_reg = rt;
imm = 0;
- if (op->i.rs != op->i.rt)
+ if (c.i.rs != c.i.rt)
lightrec_free_reg(reg_cache, rs);
}
} else {
to_not_ram = jit_bmsi(addr_reg, BIT(28));
+ lightrec_regcache_mark_live(reg_cache, _jit);
+
/* Convert to KUNSEG and avoid RAM mirrors */
jit_andi(rt, addr_reg, RAM_SIZE - 1);
lightrec_free_reg(reg_cache, tmp);
}
-static void rec_load(const struct block *block, const struct opcode *op,
- jit_code_t code)
+static void rec_load(struct lightrec_cstate *state, const struct block *block,
+ u16 offset, jit_code_t code, bool is_unsigned)
{
- if (op->flags & LIGHTREC_DIRECT_IO)
- rec_load_direct(block, op, code);
+ u16 flags = block->opcode_list[offset].flags;
+
+ if (flags & LIGHTREC_DIRECT_IO)
+ rec_load_direct(state, block, offset, code, is_unsigned);
else
- rec_io(block, op, false, true);
+ rec_io(state, block, offset, false, true);
}
-static void rec_LB(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_LB(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_load(block, op, jit_code_ldxi_c);
+ rec_load(state, block, offset, jit_code_ldxi_c, false);
}
-static void rec_LBU(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_LBU(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_load(block, op, jit_code_ldxi_uc);
+ rec_load(state, block, offset, jit_code_ldxi_uc, true);
}
-static void rec_LH(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_LH(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_load(block, op, jit_code_ldxi_s);
+ rec_load(state, block, offset, jit_code_ldxi_s, false);
}
-static void rec_LHU(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_LHU(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_load(block, op, jit_code_ldxi_us);
+ rec_load(state, block, offset, jit_code_ldxi_us, true);
}
-static void rec_LWL(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_LWL(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_io(block, op, true, true);
+ rec_io(state, block, offset, true, true);
}
-static void rec_LWR(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_LWR(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_io(block, op, true, true);
+ rec_io(state, block, offset, true, true);
}
-static void rec_LW(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_LW(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_load(block, op, jit_code_ldxi_i);
+ rec_load(state, block, offset, jit_code_ldxi_i, false);
}
-static void rec_LWC2(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_LWC2(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_io(block, op, false, false);
+ rec_io(state, block, offset, false, false);
}
-static void rec_break_syscall(const struct block *block,
- const struct opcode *op, u32 pc, bool is_break)
+static void rec_break_syscall(struct lightrec_cstate *state,
+ const struct block *block, u16 offset, bool is_break)
{
- struct regcache *reg_cache = block->state->reg_cache;
- jit_state_t *_jit = block->_jit;
- u32 offset;
- u8 tmp;
-
- jit_note(__FILE__, __LINE__);
+ _jit_note(block->_jit, __FILE__, __LINE__);
if (is_break)
- offset = offsetof(struct lightrec_state, break_func);
+ call_to_c_wrapper(state, block, 0, false, C_WRAPPER_BREAK);
else
- offset = offsetof(struct lightrec_state, syscall_func);
-
- tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
- jit_ldxi(tmp, LIGHTREC_REG_STATE, offset);
- jit_callr(tmp);
- lightrec_free_reg(reg_cache, tmp);
-
- lightrec_regcache_mark_live(reg_cache, _jit);
+ call_to_c_wrapper(state, block, 0, false, C_WRAPPER_SYSCALL);
/* TODO: the return address should be "pc - 4" if we're a delay slot */
- lightrec_emit_end_of_block(block, op, pc, -1, pc, 31, 0, true);
+ lightrec_emit_end_of_block(state, block, offset, -1,
+ get_ds_pc(block, offset, 0),
+ 31, 0, true);
}
-static void rec_special_SYSCALL(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_SYSCALL(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_break_syscall(block, op, pc, false);
+ rec_break_syscall(state, block, offset, false);
}
-static void rec_special_BREAK(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_special_BREAK(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_break_syscall(block, op, pc, true);
+ rec_break_syscall(state, block, offset, true);
}
-static void rec_mfc(const struct block *block, const struct opcode *op)
+static void rec_mfc(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
- u8 tmp, tmp2;
- struct lightrec_state *state = block->state;
struct regcache *reg_cache = state->reg_cache;
+ union code c = block->opcode_list[offset].c;
jit_state_t *_jit = block->_jit;
jit_note(__FILE__, __LINE__);
+ lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, true);
- tmp = lightrec_alloc_reg(reg_cache, _jit, JIT_R0);
- tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
+ call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_MFC);
+}
- jit_ldxi(tmp2, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, mfc_func));
+static void rec_mtc(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
+{
+ struct regcache *reg_cache = state->reg_cache;
+ union code c = block->opcode_list[offset].c;
+ jit_state_t *_jit = block->_jit;
- lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rt, true);
+ jit_note(__FILE__, __LINE__);
+ lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rs, false);
+ lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, false);
- jit_movi(tmp, op->opcode);
- jit_callr(tmp2);
- lightrec_free_reg(reg_cache, tmp);
- lightrec_free_reg(reg_cache, tmp2);
+ call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_MTC);
- lightrec_regcache_mark_live(reg_cache, _jit);
+ if (c.i.op == OP_CP0 &&
+ !(block->opcode_list[offset].flags & LIGHTREC_NO_DS) &&
+ (c.r.rd == 12 || c.r.rd == 13))
+ lightrec_emit_end_of_block(state, block, offset, -1,
+ get_ds_pc(block, offset, 1),
+ 0, 0, true);
}
-static void rec_mtc(const struct block *block, const struct opcode *op, u32 pc)
+static void
+rec_mfc0(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
- struct lightrec_state *state = block->state;
struct regcache *reg_cache = state->reg_cache;
+ union code c = block->opcode_list[offset].c;
jit_state_t *_jit = block->_jit;
- u8 tmp, tmp2;
+ u8 rt;
jit_note(__FILE__, __LINE__);
- tmp = lightrec_alloc_reg(reg_cache, _jit, JIT_R0);
- tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
- jit_ldxi(tmp2, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, mtc_func));
+ rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, REG_EXT);
- lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rs, false);
- lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rt, false);
+ jit_ldxi_i(rt, LIGHTREC_REG_STATE,
+ offsetof(struct lightrec_state, regs.cp0[c.r.rd]));
- jit_movi(tmp, op->opcode);
- jit_callr(tmp2);
- lightrec_free_reg(reg_cache, tmp);
- lightrec_free_reg(reg_cache, tmp2);
+ lightrec_free_reg(reg_cache, rt);
+}
- lightrec_regcache_mark_live(reg_cache, _jit);
+static bool block_in_bios(const struct lightrec_cstate *state,
+ const struct block *block)
+{
+ const struct lightrec_mem_map *bios = &state->state->maps[PSX_MAP_BIOS];
+ u32 pc = kunseg(block->pc);
- if (op->i.op == OP_CP0 && !(op->flags & LIGHTREC_NO_DS) &&
- (op->r.rd == 12 || op->r.rd == 13))
- lightrec_emit_end_of_block(block, op, pc, -1, pc + 4, 0, 0, true);
+ return pc >= bios->pc && pc < bios->pc + bios->length;
}
-static void rec_cp0_MFC0(const struct block *block,
- const struct opcode *op, u32 pc)
+static void
+rec_mtc0(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
- _jit_name(block->_jit, __func__);
- rec_mfc(block, op);
+ struct regcache *reg_cache = state->reg_cache;
+ const union code c = block->opcode_list[offset].c;
+ jit_state_t *_jit = block->_jit;
+ u8 rt, tmp, tmp2, status;
+
+ jit_note(__FILE__, __LINE__);
+
+ switch(c.r.rd) {
+ case 1:
+ case 4:
+ case 8:
+ case 14:
+ case 15:
+ /* Those registers are read-only */
+ return;
+ default:
+ break;
+ }
+
+ if (block_in_bios(state, block) && c.r.rd == 12) {
+ /* If we are running code from the BIOS, handle writes to the
+ * Status register in C. BIOS code may toggle bit 16 which will
+ * map/unmap the RAM, while game code cannot do that. */
+ rec_mtc(state, block, offset);
+ return;
+ }
+
+ rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
+
+ if (c.r.rd != 13) {
+ jit_stxi_i(offsetof(struct lightrec_state, regs.cp0[c.r.rd]),
+ LIGHTREC_REG_STATE, rt);
+ }
+
+ if (c.r.rd == 12 || c.r.rd == 13) {
+ tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
+ jit_ldxi_i(tmp, LIGHTREC_REG_STATE,
+ offsetof(struct lightrec_state, regs.cp0[13]));
+ }
+
+ if (c.r.rd == 12) {
+ status = rt;
+ } else if (c.r.rd == 13) {
+ tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+ /* Cause = (Cause & ~0x0300) | (value & 0x0300) */
+ jit_andi(tmp2, rt, 0x0300);
+ jit_ori(tmp, tmp, 0x0300);
+ jit_xori(tmp, tmp, 0x0300);
+ jit_orr(tmp, tmp, tmp2);
+ jit_ldxi_i(tmp2, LIGHTREC_REG_STATE,
+ offsetof(struct lightrec_state, regs.cp0[12]));
+ jit_stxi_i(offsetof(struct lightrec_state, regs.cp0[13]),
+ LIGHTREC_REG_STATE, tmp);
+ status = tmp2;
+ }
+
+ if (c.r.rd == 12 || c.r.rd == 13) {
+ /* Exit dynarec in case there's a software interrupt.
+ * exit_flags = !!(status & tmp & 0x0300) & status; */
+ jit_andr(tmp, tmp, status);
+ jit_andi(tmp, tmp, 0x0300);
+ jit_nei(tmp, tmp, 0);
+ jit_andr(tmp, tmp, status);
+ jit_stxi_i(offsetof(struct lightrec_state, exit_flags),
+ LIGHTREC_REG_STATE, tmp);
+
+ lightrec_free_reg(reg_cache, tmp);
+ }
+
+ if (c.r.rd == 13)
+ lightrec_free_reg(reg_cache, tmp2);
+
+ lightrec_free_reg(reg_cache, rt);
+
+ if (!(block->opcode_list[offset].flags & LIGHTREC_NO_DS) &&
+ (c.r.rd == 12 || c.r.rd == 13))
+ lightrec_emit_eob(state, block, offset + 1, true);
}
-static void rec_cp0_CFC0(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_cp0_MFC0(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_mfc(block, op);
+ rec_mfc0(state, block, offset);
}
-static void rec_cp0_MTC0(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_cp0_CFC0(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_mtc(block, op, pc);
+ rec_mfc0(state, block, offset);
}
-static void rec_cp0_CTC0(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_cp0_MTC0(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_mtc(block, op, pc);
+ rec_mtc0(state, block, offset);
}
-static void rec_cp2_basic_MFC2(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_cp0_CTC0(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_mfc(block, op);
+ rec_mtc0(state, block, offset);
}
-static void rec_cp2_basic_CFC2(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_cp2_basic_MFC2(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_mfc(block, op);
+ rec_mfc(state, block, offset);
}
-static void rec_cp2_basic_MTC2(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_cp2_basic_CFC2(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_mtc(block, op, pc);
+ rec_mfc(state, block, offset);
}
-static void rec_cp2_basic_CTC2(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_cp2_basic_MTC2(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_mtc(block, op, pc);
+ rec_mtc(state, block, offset);
}
-static void rec_cp0_RFE(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_cp2_basic_CTC2(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
- struct lightrec_state *state = block->state;
- jit_state_t *_jit = block->_jit;
- u8 tmp;
-
- jit_name(__func__);
- jit_note(__FILE__, __LINE__);
-
- tmp = lightrec_alloc_reg_temp(state->reg_cache, _jit);
- jit_ldxi(tmp, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, rfe_func));
- jit_callr(tmp);
- lightrec_free_reg(state->reg_cache, tmp);
-
- lightrec_regcache_mark_live(state->reg_cache, _jit);
+ _jit_name(block->_jit, __func__);
+ rec_mtc(state, block, offset);
}
-static void rec_CP(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_cp0_RFE(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
- struct regcache *reg_cache = block->state->reg_cache;
+ struct regcache *reg_cache = state->reg_cache;
jit_state_t *_jit = block->_jit;
- u8 tmp, tmp2;
+ u8 status, tmp;
jit_name(__func__);
jit_note(__FILE__, __LINE__);
- tmp = lightrec_alloc_reg(reg_cache, _jit, JIT_R0);
- tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
+ status = lightrec_alloc_reg_temp(reg_cache, _jit);
+ jit_ldxi_i(status, LIGHTREC_REG_STATE,
+ offsetof(struct lightrec_state, regs.cp0[12]));
- jit_ldxi(tmp2, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, cp_func));
+ tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
- jit_movi(tmp, op->opcode);
- jit_callr(tmp2);
+ /* status = ((status >> 2) & 0xf) | status & ~0xf; */
+ jit_rshi(tmp, status, 2);
+ jit_andi(tmp, tmp, 0xf);
+ jit_andi(status, status, ~0xful);
+ jit_orr(status, status, tmp);
+
+ jit_ldxi_i(tmp, LIGHTREC_REG_STATE,
+ offsetof(struct lightrec_state, regs.cp0[13]));
+ jit_stxi_i(offsetof(struct lightrec_state, regs.cp0[12]),
+ LIGHTREC_REG_STATE, status);
+
+ /* Exit dynarec in case there's a software interrupt.
+ * exit_flags = !!(status & cause & 0x0300) & status; */
+ jit_andr(tmp, tmp, status);
+ jit_andi(tmp, tmp, 0x0300);
+ jit_nei(tmp, tmp, 0);
+ jit_andr(tmp, tmp, status);
+ jit_stxi_i(offsetof(struct lightrec_state, exit_flags),
+ LIGHTREC_REG_STATE, tmp);
+
+ lightrec_free_reg(reg_cache, status);
lightrec_free_reg(reg_cache, tmp);
- lightrec_free_reg(reg_cache, tmp2);
-
- lightrec_regcache_mark_live(reg_cache, _jit);
}
-static void rec_meta_unload(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_CP(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
- struct lightrec_state *state = block->state;
- struct regcache *reg_cache = state->reg_cache;
+ union code c = block->opcode_list[offset].c;
jit_state_t *_jit = block->_jit;
jit_name(__func__);
jit_note(__FILE__, __LINE__);
- pr_debug("Unloading reg %s\n", lightrec_reg_name(op->i.rs));
- lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rs, true);
+ call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_CP);
}
-static void rec_meta_BEQZ(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_meta_MOV(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
- _jit_name(block->_jit, __func__);
- rec_b(block, op, pc, jit_code_bnei, 0, false, true);
-}
-
-static void rec_meta_BNEZ(const struct block *block,
- const struct opcode *op, u32 pc)
-{
- _jit_name(block->_jit, __func__);
- rec_b(block, op, pc, jit_code_beqi, 0, false, true);
-}
-
-static void rec_meta_MOV(const struct block *block,
- const struct opcode *op, u32 pc)
-{
- struct lightrec_state *state = block->state;
struct regcache *reg_cache = state->reg_cache;
+ union code c = block->opcode_list[offset].c;
jit_state_t *_jit = block->_jit;
u8 rs, rd;
_jit_name(block->_jit, __func__);
jit_note(__FILE__, __LINE__);
- rs = op->r.rs ? lightrec_alloc_reg_in(reg_cache, _jit, op->r.rs) : 0;
- rd = lightrec_alloc_reg_out_ext(reg_cache, _jit, op->r.rd);
+ if (c.r.rs)
+ rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
+ rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, REG_EXT);
- if (op->r.rs == 0) {
+ if (c.r.rs == 0)
jit_movi(rd, 0);
- } else {
-#if __WORDSIZE == 32
- jit_movr(rd, rs);
-#else
+ else
jit_extr_i(rd, rs);
-#endif
- }
- lightrec_free_reg(state->reg_cache, rs);
- lightrec_free_reg(state->reg_cache, rd);
+ if (c.r.rs)
+ lightrec_free_reg(reg_cache, rs);
+ lightrec_free_reg(reg_cache, rd);
}
-static void rec_meta_sync(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_meta_EXTC_EXTS(struct lightrec_cstate *state,
+ const struct block *block,
+ u16 offset)
{
- struct lightrec_state *state = block->state;
- struct lightrec_branch_target *target;
+ struct regcache *reg_cache = state->reg_cache;
+ union code c = block->opcode_list[offset].c;
jit_state_t *_jit = block->_jit;
+ u8 rs, rt;
- jit_name(__func__);
+ _jit_name(block->_jit, __func__);
jit_note(__FILE__, __LINE__);
- jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles);
- state->cycles = 0;
+ rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
+ rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, REG_EXT);
- lightrec_storeback_regs(state->reg_cache, _jit);
- lightrec_regcache_reset(state->reg_cache);
+ if (c.i.op == OP_META_EXTC)
+ jit_extr_c(rt, rs);
+ else
+ jit_extr_s(rt, rs);
- pr_debug("Adding branch target at offset 0x%x\n",
- op->offset << 2);
- target = &state->targets[state->nb_targets++];
- target->offset = op->offset;
- target->label = jit_indirect();
+ lightrec_free_reg(reg_cache, rs);
+ lightrec_free_reg(reg_cache, rt);
}
static const lightrec_rec_func_t rec_standard[64] = {
+ SET_DEFAULT_ELM(rec_standard, unknown_opcode),
[OP_SPECIAL] = rec_SPECIAL,
[OP_REGIMM] = rec_REGIMM,
[OP_J] = rec_J,
[OP_LWC2] = rec_LWC2,
[OP_SWC2] = rec_SWC2,
- [OP_META_REG_UNLOAD] = rec_meta_unload,
- [OP_META_BEQZ] = rec_meta_BEQZ,
- [OP_META_BNEZ] = rec_meta_BNEZ,
[OP_META_MOV] = rec_meta_MOV,
- [OP_META_SYNC] = rec_meta_sync,
+ [OP_META_EXTC] = rec_meta_EXTC_EXTS,
+ [OP_META_EXTS] = rec_meta_EXTC_EXTS,
};
static const lightrec_rec_func_t rec_special[64] = {
+ SET_DEFAULT_ELM(rec_special, unknown_opcode),
[OP_SPECIAL_SLL] = rec_special_SLL,
[OP_SPECIAL_SRL] = rec_special_SRL,
[OP_SPECIAL_SRA] = rec_special_SRA,
};
static const lightrec_rec_func_t rec_regimm[64] = {
+ SET_DEFAULT_ELM(rec_regimm, unknown_opcode),
[OP_REGIMM_BLTZ] = rec_regimm_BLTZ,
[OP_REGIMM_BGEZ] = rec_regimm_BGEZ,
[OP_REGIMM_BLTZAL] = rec_regimm_BLTZAL,
};
static const lightrec_rec_func_t rec_cp0[64] = {
+ SET_DEFAULT_ELM(rec_cp0, rec_CP),
[OP_CP0_MFC0] = rec_cp0_MFC0,
[OP_CP0_CFC0] = rec_cp0_CFC0,
[OP_CP0_MTC0] = rec_cp0_MTC0,
};
static const lightrec_rec_func_t rec_cp2_basic[64] = {
+ SET_DEFAULT_ELM(rec_cp2_basic, rec_CP),
[OP_CP2_BASIC_MFC2] = rec_cp2_basic_MFC2,
[OP_CP2_BASIC_CFC2] = rec_cp2_basic_CFC2,
[OP_CP2_BASIC_MTC2] = rec_cp2_basic_MTC2,
[OP_CP2_BASIC_CTC2] = rec_cp2_basic_CTC2,
};
-static void rec_SPECIAL(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_SPECIAL(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
- lightrec_rec_func_t f = rec_special[op->r.op];
- if (likely(f))
- (*f)(block, op, pc);
+ union code c = block->opcode_list[offset].c;
+ lightrec_rec_func_t f = rec_special[c.r.op];
+
+ if (!HAS_DEFAULT_ELM && unlikely(!f))
+ unknown_opcode(state, block, offset);
else
- unknown_opcode(block, op, pc);
+ (*f)(state, block, offset);
}
-static void rec_REGIMM(const struct block *block,
- const struct opcode *op, u32 pc)
+static void rec_REGIMM(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
- lightrec_rec_func_t f = rec_regimm[op->r.rt];
- if (likely(f))
- (*f)(block, op, pc);
+ union code c = block->opcode_list[offset].c;
+ lightrec_rec_func_t f = rec_regimm[c.r.rt];
+
+ if (!HAS_DEFAULT_ELM && unlikely(!f))
+ unknown_opcode(state, block, offset);
else
- unknown_opcode(block, op, pc);
+ (*f)(state, block, offset);
}
-static void rec_CP0(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_CP0(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
- lightrec_rec_func_t f = rec_cp0[op->r.rs];
- if (likely(f))
- (*f)(block, op, pc);
+ union code c = block->opcode_list[offset].c;
+ lightrec_rec_func_t f = rec_cp0[c.r.rs];
+
+ if (!HAS_DEFAULT_ELM && unlikely(!f))
+ rec_CP(state, block, offset);
else
- rec_CP(block, op, pc);
+ (*f)(state, block, offset);
}
-static void rec_CP2(const struct block *block, const struct opcode *op, u32 pc)
+static void rec_CP2(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
- if (op->r.op == OP_CP2_BASIC) {
- lightrec_rec_func_t f = rec_cp2_basic[op->r.rs];
- if (likely(f)) {
- (*f)(block, op, pc);
+ union code c = block->opcode_list[offset].c;
+
+ if (c.r.op == OP_CP2_BASIC) {
+ lightrec_rec_func_t f = rec_cp2_basic[c.r.rs];
+
+ if (HAS_DEFAULT_ELM || likely(f)) {
+ (*f)(state, block, offset);
return;
}
}
- rec_CP(block, op, pc);
+ rec_CP(state, block, offset);
}
-void lightrec_rec_opcode(const struct block *block,
- const struct opcode *op, u32 pc)
+void lightrec_rec_opcode(struct lightrec_cstate *state,
+ const struct block *block, u16 offset)
{
- lightrec_rec_func_t f = rec_standard[op->i.op];
- if (likely(f))
- (*f)(block, op, pc);
- else
- unknown_opcode(block, op, pc);
+ struct regcache *reg_cache = state->reg_cache;
+ struct lightrec_branch_target *target;
+ const struct opcode *op = &block->opcode_list[offset];
+ jit_state_t *_jit = block->_jit;
+ lightrec_rec_func_t f;
+
+ if (op->flags & LIGHTREC_SYNC) {
+ jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles);
+ state->cycles = 0;
+
+ lightrec_storeback_regs(reg_cache, _jit);
+ lightrec_regcache_reset(reg_cache);
+
+ pr_debug("Adding branch target at offset 0x%x\n", offset << 2);
+ target = &state->targets[state->nb_targets++];
+ target->offset = offset;
+ target->label = jit_indirect();
+ }
+
+ if (likely(op->opcode)) {
+ f = rec_standard[op->i.op];
+
+ if (!HAS_DEFAULT_ELM && unlikely(!f))
+ unknown_opcode(state, block, offset);
+ else
+ (*f)(state, block, offset);
+ }
+
+ if (unlikely(op->flags & LIGHTREC_UNLOAD_RD)) {
+ lightrec_clean_reg_if_loaded(reg_cache, _jit, op->r.rd, true);
+ pr_debug("Cleaning RD reg %s\n", lightrec_reg_name(op->r.rd));
+ }
+ if (unlikely(op->flags & LIGHTREC_UNLOAD_RS)) {
+ lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rs, true);
+ pr_debug("Cleaning RS reg %s\n", lightrec_reg_name(op->i.rt));
+ }
+ if (unlikely(op->flags & LIGHTREC_UNLOAD_RT)) {
+ lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rt, true);
+ pr_debug("Cleaning RT reg %s\n", lightrec_reg_name(op->i.rt));
+ }
}
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
- * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __EMITTER_H__
#include "lightrec.h"
struct block;
+struct lightrec_cstate;
struct opcode;
-void lightrec_rec_opcode(const struct block *block,
- const struct opcode *op, u32 pc);
-void lightrec_emit_eob(const struct block *block,
- const struct opcode *op, u32 pc);
+void lightrec_rec_opcode(struct lightrec_cstate *state, const struct block *block, u16 offset);
+void lightrec_emit_eob(struct lightrec_cstate *state, const struct block *block,
+ u16 offset, _Bool after_op);
#endif /* __EMITTER_H__ */
+// SPDX-License-Identifier: LGPL-2.1-or-later
/*
- * Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
*/
#include "disassembler.h"
struct opcode *op;
u32 cycles;
bool delay_slot;
+ u16 offset;
};
+static u32 int_get_branch_pc(const struct interpreter *inter)
+{
+ return get_branch_pc(inter->block, inter->offset, 0);
+}
+
+static inline u32 int_get_ds_pc(const struct interpreter *inter, s16 imm)
+{
+ return get_ds_pc(inter->block, inter->offset, imm);
+}
+
+static inline struct opcode *next_op(const struct interpreter *inter)
+{
+ return &inter->block->opcode_list[inter->offset + 1];
+}
+
static inline u32 execute(lightrec_int_func_t func, struct interpreter *inter)
{
return (*func)(inter);
}
+static inline u32 lightrec_int_op(struct interpreter *inter)
+{
+ return execute(int_standard[inter->op->i.op], inter);
+}
+
static inline u32 jump_skip(struct interpreter *inter)
{
- inter->op = inter->op->next;
+ inter->op = next_op(inter);
+ inter->offset++;
- return execute(int_standard[inter->op->i.op], inter);
+ if (inter->op->flags & LIGHTREC_SYNC) {
+ inter->state->current_cycle += inter->cycles;
+ inter->cycles = 0;
+ }
+
+ return lightrec_int_op(inter);
}
static inline u32 jump_next(struct interpreter *inter)
if (unlikely(inter->delay_slot))
return 0;
- inter->op = inter->op->next;
+ inter->op = next_op(inter);
+ inter->offset++;
return jump_skip(inter);
}
if (has_delay_slot(inter->op->c) &&
!(inter->op->flags & LIGHTREC_NO_DS))
- cycles += lightrec_cycles_of_opcode(inter->op->next->c);
+ cycles += lightrec_cycles_of_opcode(next_op(inter)->c);
inter->cycles += cycles;
inter->state->current_cycle += inter->cycles;
case OP_JAL:
return true;
case OP_BEQ:
- case OP_META_BEQZ:
return reg_cache[op.r.rs] == reg_cache[op.r.rt];
case OP_BNE:
- case OP_META_BNEZ:
return reg_cache[op.r.rs] != reg_cache[op.r.rt];
case OP_REGIMM:
switch (op.r.rt) {
static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
{
struct lightrec_state *state = inter->state;
- u32 *reg_cache = state->native_reg_cache;
- struct opcode new_op, *op = inter->op->next;
+ u32 *reg_cache = state->regs.gpr;
+ struct opcode new_op, *op = next_op(inter);
union code op_next;
struct interpreter inter2 = {
.state = state,
* but on branch boundaries, we need to adjust the return
* address so that the GTE opcode is effectively executed.
*/
- cause = (*state->ops.cop0_ops.cfc)(state, op->c.opcode, 13);
- epc = (*state->ops.cop0_ops.cfc)(state, op->c.opcode, 14);
+ cause = state->regs.cp0[13];
+ epc = state->regs.cp0[14];
if (!(cause & 0x7c) && epc == pc - 4)
pc -= 4;
} else {
new_op.c = op_next;
new_op.flags = 0;
- new_op.offset = 0;
- new_op.next = NULL;
inter2.op = &new_op;
/* Execute the first opcode of the next block */
- (*int_standard[inter2.op->i.op])(&inter2);
+ lightrec_int_op(&inter2);
if (save_rs) {
new_rs = reg_cache[op->r.rs];
inter->cycles += lightrec_cycles_of_opcode(op_next);
}
} else {
- next_pc = inter->block->pc
- + (inter->op->offset + 2) * sizeof(u32);
+ next_pc = int_get_ds_pc(inter, 2);
}
inter2.block = inter->block;
new_rt = reg_cache[op->r.rt];
/* Execute delay slot opcode */
- ds_next_pc = (*int_standard[inter2.op->i.op])(&inter2);
+ ds_next_pc = lightrec_int_op(&inter2);
if (branch_at_addr) {
if (op_next.i.op == OP_SPECIAL)
new_op.c = op_next;
new_op.flags = 0;
- new_op.offset = sizeof(u32);
- new_op.next = NULL;
inter2.op = &new_op;
inter2.block = NULL;
pr_debug("Running delay slot of branch at target of impossible "
"branch\n");
- (*int_standard[inter2.op->i.op])(&inter2);
+ lightrec_int_op(&inter2);
}
return next_pc;
static u32 int_jump(struct interpreter *inter, bool link)
{
struct lightrec_state *state = inter->state;
- u32 old_pc = inter->block->pc + inter->op->offset * sizeof(u32);
+ u32 old_pc = int_get_branch_pc(inter);
u32 pc = (old_pc & 0xf0000000) | (inter->op->j.imm << 2);
if (link)
- state->native_reg_cache[31] = old_pc + 8;
+ state->regs.gpr[31] = old_pc + 8;
if (inter->op->flags & LIGHTREC_NO_DS)
return pc;
static u32 int_jumpr(struct interpreter *inter, u8 link_reg)
{
struct lightrec_state *state = inter->state;
- u32 old_pc, next_pc = state->native_reg_cache[inter->op->r.rs];
+ u32 old_pc, next_pc = state->regs.gpr[inter->op->r.rs];
if (link_reg) {
- old_pc = inter->block->pc + inter->op->offset * sizeof(u32);
- state->native_reg_cache[link_reg] = old_pc + 8;
+ old_pc = int_get_branch_pc(inter);
+ state->regs.gpr[link_reg] = old_pc + 8;
}
if (inter->op->flags & LIGHTREC_NO_DS)
(inter->op->flags & LIGHTREC_LOCAL_BRANCH) &&
(s16)inter->op->c.i.imm >= 0) {
next_pc = old_pc + ((1 + (s16)inter->op->c.i.imm) << 2);
- next_pc = lightrec_emulate_block(inter->block, next_pc);
+ next_pc = lightrec_emulate_block(inter->state, inter->block, next_pc);
}
return next_pc;
static u32 int_beq(struct interpreter *inter, bool bne)
{
- u32 rs, rt, old_pc = inter->block->pc + inter->op->offset * sizeof(u32);
+ u32 rs, rt, old_pc = int_get_branch_pc(inter);
- rs = inter->state->native_reg_cache[inter->op->i.rs];
- rt = inter->state->native_reg_cache[inter->op->i.rt];
+ rs = inter->state->regs.gpr[inter->op->i.rs];
+ rt = inter->state->regs.gpr[inter->op->i.rt];
return int_branch(inter, old_pc, inter->op->c, (rs == rt) ^ bne);
}
static u32 int_bgez(struct interpreter *inter, bool link, bool lt, bool regimm)
{
- u32 old_pc = inter->block->pc + inter->op->offset * sizeof(u32);
+ u32 old_pc = int_get_branch_pc(inter);
s32 rs;
if (link)
- inter->state->native_reg_cache[31] = old_pc + 8;
+ inter->state->regs.gpr[31] = old_pc + 8;
- rs = (s32)inter->state->native_reg_cache[inter->op->i.rs];
+ rs = (s32)inter->state->regs.gpr[inter->op->i.rs];
return int_branch(inter, old_pc, inter->op->c,
((regimm && !rs) || rs > 0) ^ lt);
val = lightrec_mfc(state, op->c);
if (likely(op->r.rt))
- state->native_reg_cache[op->r.rt] = val;
+ state->regs.gpr[op->r.rt] = val;
return jump_next(inter);
}
struct lightrec_state *state = inter->state;
const struct opcode *op = inter->op;
- lightrec_mtc(state, op->c, state->native_reg_cache[op->r.rt]);
+ lightrec_mtc(state, op->c, state->regs.gpr[op->r.rt]);
/* If we have a MTC0 or CTC0 to CP0 register 12 (Status) or 13 (Cause),
* return early so that the emulator will be able to check software
* interrupt status. */
if (!(inter->op->flags & LIGHTREC_NO_DS) &&
op->i.op == OP_CP0 && (op->r.rd == 12 || op->r.rd == 13))
- return inter->block->pc + (op->offset + 1) * sizeof(u32);
+ return int_get_ds_pc(inter, 1);
else
return jump_next(inter);
}
static u32 int_cp0_RFE(struct interpreter *inter)
{
- struct lightrec_state *state = inter->state;
- u32 status;
-
- /* Read CP0 Status register (r12) */
- status = state->ops.cop0_ops.mfc(state, inter->op->c.opcode, 12);
-
- /* Switch the bits */
- status = ((status & 0x3c) >> 2) | (status & ~0xf);
-
- /* Write it back */
- state->ops.cop0_ops.ctc(state, inter->op->c.opcode, 12, status);
+ lightrec_rfe(inter->state);
return jump_next(inter);
}
static u32 int_CP(struct interpreter *inter)
{
- struct lightrec_state *state = inter->state;
- const struct lightrec_cop_ops *ops;
- const struct opcode *op = inter->op;
-
- if ((op->j.imm >> 25) & 1)
- ops = &state->ops.cop2_ops;
- else
- ops = &state->ops.cop0_ops;
-
- (*ops->op)(state, (op->j.imm) & ~(1 << 25));
+ lightrec_cp(inter->state, inter->op->c);
return jump_next(inter);
}
static u32 int_ADDI(struct interpreter *inter)
{
- u32 *reg_cache = inter->state->native_reg_cache;
+ u32 *reg_cache = inter->state->regs.gpr;
struct opcode_i *op = &inter->op->i;
if (likely(op->rt))
static u32 int_SLTI(struct interpreter *inter)
{
- u32 *reg_cache = inter->state->native_reg_cache;
+ u32 *reg_cache = inter->state->regs.gpr;
struct opcode_i *op = &inter->op->i;
if (likely(op->rt))
static u32 int_SLTIU(struct interpreter *inter)
{
- u32 *reg_cache = inter->state->native_reg_cache;
+ u32 *reg_cache = inter->state->regs.gpr;
struct opcode_i *op = &inter->op->i;
if (likely(op->rt))
static u32 int_ANDI(struct interpreter *inter)
{
- u32 *reg_cache = inter->state->native_reg_cache;
+ u32 *reg_cache = inter->state->regs.gpr;
struct opcode_i *op = &inter->op->i;
if (likely(op->rt))
static u32 int_ORI(struct interpreter *inter)
{
- u32 *reg_cache = inter->state->native_reg_cache;
+ u32 *reg_cache = inter->state->regs.gpr;
struct opcode_i *op = &inter->op->i;
if (likely(op->rt))
static u32 int_XORI(struct interpreter *inter)
{
- u32 *reg_cache = inter->state->native_reg_cache;
+ u32 *reg_cache = inter->state->regs.gpr;
struct opcode_i *op = &inter->op->i;
if (likely(op->rt))
{
struct opcode_i *op = &inter->op->i;
- inter->state->native_reg_cache[op->rt] = op->imm << 16;
+ inter->state->regs.gpr[op->rt] = op->imm << 16;
return jump_next(inter);
}
static u32 int_io(struct interpreter *inter, bool is_load)
{
struct opcode_i *op = &inter->op->i;
- u32 *reg_cache = inter->state->native_reg_cache;
+ u32 *reg_cache = inter->state->regs.gpr;
u32 val;
val = lightrec_rw(inter->state, inter->op->c,
reg_cache[op->rs], reg_cache[op->rt],
- &inter->op->flags);
+ &inter->op->flags, inter->block);
if (is_load && op->rt)
reg_cache[op->rt] = val;
return int_io(inter, false);
lightrec_rw(inter->state, inter->op->c,
- inter->state->native_reg_cache[inter->op->i.rs],
- inter->state->native_reg_cache[inter->op->i.rt],
- &inter->op->flags);
+ inter->state->regs.gpr[inter->op->i.rs],
+ inter->state->regs.gpr[inter->op->i.rt],
+ &inter->op->flags, inter->block);
- next_pc = inter->block->pc + (inter->op->offset + 1) * 4;
+ next_pc = int_get_ds_pc(inter, 1);
/* Invalidate next PC, to force the rest of the block to be rebuilt */
lightrec_invalidate(inter->state, next_pc, 4);
u32 rt;
if (op->opcode) { /* Handle NOPs */
- rt = inter->state->native_reg_cache[op->r.rt];
- inter->state->native_reg_cache[op->r.rd] = rt << op->r.imm;
+ rt = inter->state->regs.gpr[op->r.rt];
+ inter->state->regs.gpr[op->r.rd] = rt << op->r.imm;
}
return jump_next(inter);
static u32 int_special_SRL(struct interpreter *inter)
{
struct opcode *op = inter->op;
- u32 rt = inter->state->native_reg_cache[op->r.rt];
+ u32 rt = inter->state->regs.gpr[op->r.rt];
- inter->state->native_reg_cache[op->r.rd] = rt >> op->r.imm;
+ inter->state->regs.gpr[op->r.rd] = rt >> op->r.imm;
return jump_next(inter);
}
static u32 int_special_SRA(struct interpreter *inter)
{
struct opcode *op = inter->op;
- s32 rt = inter->state->native_reg_cache[op->r.rt];
+ s32 rt = inter->state->regs.gpr[op->r.rt];
- inter->state->native_reg_cache[op->r.rd] = rt >> op->r.imm;
+ inter->state->regs.gpr[op->r.rd] = rt >> op->r.imm;
return jump_next(inter);
}
static u32 int_special_SLLV(struct interpreter *inter)
{
struct opcode *op = inter->op;
- u32 rs = inter->state->native_reg_cache[op->r.rs];
- u32 rt = inter->state->native_reg_cache[op->r.rt];
+ u32 rs = inter->state->regs.gpr[op->r.rs];
+ u32 rt = inter->state->regs.gpr[op->r.rt];
- inter->state->native_reg_cache[op->r.rd] = rt << (rs & 0x1f);
+ inter->state->regs.gpr[op->r.rd] = rt << (rs & 0x1f);
return jump_next(inter);
}
static u32 int_special_SRLV(struct interpreter *inter)
{
struct opcode *op = inter->op;
- u32 rs = inter->state->native_reg_cache[op->r.rs];
- u32 rt = inter->state->native_reg_cache[op->r.rt];
+ u32 rs = inter->state->regs.gpr[op->r.rs];
+ u32 rt = inter->state->regs.gpr[op->r.rt];
- inter->state->native_reg_cache[op->r.rd] = rt >> (rs & 0x1f);
+ inter->state->regs.gpr[op->r.rd] = rt >> (rs & 0x1f);
return jump_next(inter);
}
static u32 int_special_SRAV(struct interpreter *inter)
{
struct opcode *op = inter->op;
- u32 rs = inter->state->native_reg_cache[op->r.rs];
- s32 rt = inter->state->native_reg_cache[op->r.rt];
+ u32 rs = inter->state->regs.gpr[op->r.rs];
+ s32 rt = inter->state->regs.gpr[op->r.rt];
- inter->state->native_reg_cache[op->r.rd] = rt >> (rs & 0x1f);
+ inter->state->regs.gpr[op->r.rd] = rt >> (rs & 0x1f);
return jump_next(inter);
}
else
inter->state->exit_flags |= LIGHTREC_EXIT_SYSCALL;
- return inter->block->pc + inter->op->offset * sizeof(u32);
+ return int_get_ds_pc(inter, 0);
}
static u32 int_special_MFHI(struct interpreter *inter)
{
- u32 *reg_cache = inter->state->native_reg_cache;
+ u32 *reg_cache = inter->state->regs.gpr;
struct opcode_r *op = &inter->op->r;
if (likely(op->rd))
static u32 int_special_MTHI(struct interpreter *inter)
{
- u32 *reg_cache = inter->state->native_reg_cache;
+ u32 *reg_cache = inter->state->regs.gpr;
reg_cache[REG_HI] = reg_cache[inter->op->r.rs];
static u32 int_special_MFLO(struct interpreter *inter)
{
- u32 *reg_cache = inter->state->native_reg_cache;
+ u32 *reg_cache = inter->state->regs.gpr;
struct opcode_r *op = &inter->op->r;
if (likely(op->rd))
static u32 int_special_MTLO(struct interpreter *inter)
{
- u32 *reg_cache = inter->state->native_reg_cache;
+ u32 *reg_cache = inter->state->regs.gpr;
reg_cache[REG_LO] = reg_cache[inter->op->r.rs];
static u32 int_special_MULT(struct interpreter *inter)
{
- u32 *reg_cache = inter->state->native_reg_cache;
+ u32 *reg_cache = inter->state->regs.gpr;
s32 rs = reg_cache[inter->op->r.rs];
s32 rt = reg_cache[inter->op->r.rt];
+ u8 reg_lo = get_mult_div_lo(inter->op->c);
+ u8 reg_hi = get_mult_div_hi(inter->op->c);
u64 res = (s64)rs * (s64)rt;
- if (!(inter->op->flags & LIGHTREC_MULT32))
- reg_cache[REG_HI] = res >> 32;
- reg_cache[REG_LO] = res;
+ if (!(inter->op->flags & LIGHTREC_NO_HI))
+ reg_cache[reg_hi] = res >> 32;
+ if (!(inter->op->flags & LIGHTREC_NO_LO))
+ reg_cache[reg_lo] = res;
return jump_next(inter);
}
static u32 int_special_MULTU(struct interpreter *inter)
{
- u32 *reg_cache = inter->state->native_reg_cache;
+ u32 *reg_cache = inter->state->regs.gpr;
u32 rs = reg_cache[inter->op->r.rs];
u32 rt = reg_cache[inter->op->r.rt];
+ u8 reg_lo = get_mult_div_lo(inter->op->c);
+ u8 reg_hi = get_mult_div_hi(inter->op->c);
u64 res = (u64)rs * (u64)rt;
- if (!(inter->op->flags & LIGHTREC_MULT32))
- reg_cache[REG_HI] = res >> 32;
- reg_cache[REG_LO] = res;
+ if (!(inter->op->flags & LIGHTREC_NO_HI))
+ reg_cache[reg_hi] = res >> 32;
+ if (!(inter->op->flags & LIGHTREC_NO_LO))
+ reg_cache[reg_lo] = res;
return jump_next(inter);
}
static u32 int_special_DIV(struct interpreter *inter)
{
- u32 *reg_cache = inter->state->native_reg_cache;
+ u32 *reg_cache = inter->state->regs.gpr;
s32 rs = reg_cache[inter->op->r.rs];
s32 rt = reg_cache[inter->op->r.rt];
+ u8 reg_lo = get_mult_div_lo(inter->op->c);
+ u8 reg_hi = get_mult_div_hi(inter->op->c);
u32 lo, hi;
if (rt == 0) {
hi = rs;
lo = (rs < 0) * 2 - 1;
- } else if ((rs == 0x80000000) && (rt == 0xFFFFFFFF)) {
- lo = rs;
- hi = 0;
} else {
lo = rs / rt;
hi = rs % rt;
}
- reg_cache[REG_HI] = hi;
- reg_cache[REG_LO] = lo;
+ if (!(inter->op->flags & LIGHTREC_NO_HI))
+ reg_cache[reg_hi] = hi;
+ if (!(inter->op->flags & LIGHTREC_NO_LO))
+ reg_cache[reg_lo] = lo;
return jump_next(inter);
}
static u32 int_special_DIVU(struct interpreter *inter)
{
- u32 *reg_cache = inter->state->native_reg_cache;
+ u32 *reg_cache = inter->state->regs.gpr;
u32 rs = reg_cache[inter->op->r.rs];
u32 rt = reg_cache[inter->op->r.rt];
+ u8 reg_lo = get_mult_div_lo(inter->op->c);
+ u8 reg_hi = get_mult_div_hi(inter->op->c);
u32 lo, hi;
if (rt == 0) {
hi = rs % rt;
}
- reg_cache[REG_HI] = hi;
- reg_cache[REG_LO] = lo;
+ if (!(inter->op->flags & LIGHTREC_NO_HI))
+ reg_cache[reg_hi] = hi;
+ if (!(inter->op->flags & LIGHTREC_NO_LO))
+ reg_cache[reg_lo] = lo;
return jump_next(inter);
}
static u32 int_special_ADD(struct interpreter *inter)
{
- u32 *reg_cache = inter->state->native_reg_cache;
+ u32 *reg_cache = inter->state->regs.gpr;
struct opcode_r *op = &inter->op->r;
s32 rs = reg_cache[op->rs];
s32 rt = reg_cache[op->rt];
static u32 int_special_SUB(struct interpreter *inter)
{
- u32 *reg_cache = inter->state->native_reg_cache;
+ u32 *reg_cache = inter->state->regs.gpr;
struct opcode_r *op = &inter->op->r;
u32 rs = reg_cache[op->rs];
u32 rt = reg_cache[op->rt];
static u32 int_special_AND(struct interpreter *inter)
{
- u32 *reg_cache = inter->state->native_reg_cache;
+ u32 *reg_cache = inter->state->regs.gpr;
struct opcode_r *op = &inter->op->r;
u32 rs = reg_cache[op->rs];
u32 rt = reg_cache[op->rt];
static u32 int_special_OR(struct interpreter *inter)
{
- u32 *reg_cache = inter->state->native_reg_cache;
+ u32 *reg_cache = inter->state->regs.gpr;
struct opcode_r *op = &inter->op->r;
u32 rs = reg_cache[op->rs];
u32 rt = reg_cache[op->rt];
static u32 int_special_XOR(struct interpreter *inter)
{
- u32 *reg_cache = inter->state->native_reg_cache;
+ u32 *reg_cache = inter->state->regs.gpr;
struct opcode_r *op = &inter->op->r;
u32 rs = reg_cache[op->rs];
u32 rt = reg_cache[op->rt];
static u32 int_special_NOR(struct interpreter *inter)
{
- u32 *reg_cache = inter->state->native_reg_cache;
+ u32 *reg_cache = inter->state->regs.gpr;
struct opcode_r *op = &inter->op->r;
u32 rs = reg_cache[op->rs];
u32 rt = reg_cache[op->rt];
static u32 int_special_SLT(struct interpreter *inter)
{
- u32 *reg_cache = inter->state->native_reg_cache;
+ u32 *reg_cache = inter->state->regs.gpr;
struct opcode_r *op = &inter->op->r;
s32 rs = reg_cache[op->rs];
s32 rt = reg_cache[op->rt];
static u32 int_special_SLTU(struct interpreter *inter)
{
- u32 *reg_cache = inter->state->native_reg_cache;
+ u32 *reg_cache = inter->state->regs.gpr;
struct opcode_r *op = &inter->op->r;
u32 rs = reg_cache[op->rs];
u32 rt = reg_cache[op->rt];
return jump_next(inter);
}
-static u32 int_META_SKIP(struct interpreter *inter)
-{
- return jump_skip(inter);
-}
-
static u32 int_META_MOV(struct interpreter *inter)
{
- u32 *reg_cache = inter->state->native_reg_cache;
+ u32 *reg_cache = inter->state->regs.gpr;
struct opcode_r *op = &inter->op->r;
if (likely(op->rd))
return jump_next(inter);
}
-static u32 int_META_SYNC(struct interpreter *inter)
+static u32 int_META_EXTC(struct interpreter *inter)
{
- inter->state->current_cycle += inter->cycles;
- inter->cycles = 0;
+ u32 *reg_cache = inter->state->regs.gpr;
+ struct opcode_i *op = &inter->op->i;
- return jump_skip(inter);
+ if (likely(op->rt))
+ reg_cache[op->rt] = (u32)(s32)(s8)reg_cache[op->rs];
+
+ return jump_next(inter);
+}
+
+static u32 int_META_EXTS(struct interpreter *inter)
+{
+ u32 *reg_cache = inter->state->regs.gpr;
+ struct opcode_i *op = &inter->op->i;
+
+ if (likely(op->rt))
+ reg_cache[op->rt] = (u32)(s32)(s16)reg_cache[op->rs];
+
+ return jump_next(inter);
}
static const lightrec_int_func_t int_standard[64] = {
+ SET_DEFAULT_ELM(int_standard, int_unimplemented),
[OP_SPECIAL] = int_SPECIAL,
[OP_REGIMM] = int_REGIMM,
[OP_J] = int_J,
[OP_LWC2] = int_LWC2,
[OP_SWC2] = int_store,
- [OP_META_REG_UNLOAD] = int_META_SKIP,
- [OP_META_BEQZ] = int_BEQ,
- [OP_META_BNEZ] = int_BNE,
[OP_META_MOV] = int_META_MOV,
- [OP_META_SYNC] = int_META_SYNC,
+ [OP_META_EXTC] = int_META_EXTC,
+ [OP_META_EXTS] = int_META_EXTS,
};
static const lightrec_int_func_t int_special[64] = {
+ SET_DEFAULT_ELM(int_special, int_unimplemented),
[OP_SPECIAL_SLL] = int_special_SLL,
[OP_SPECIAL_SRL] = int_special_SRL,
[OP_SPECIAL_SRA] = int_special_SRA,
};
static const lightrec_int_func_t int_regimm[64] = {
+ SET_DEFAULT_ELM(int_regimm, int_unimplemented),
[OP_REGIMM_BLTZ] = int_regimm_BLTZ,
[OP_REGIMM_BGEZ] = int_regimm_BGEZ,
[OP_REGIMM_BLTZAL] = int_regimm_BLTZAL,
};
static const lightrec_int_func_t int_cp0[64] = {
+ SET_DEFAULT_ELM(int_cp0, int_CP),
[OP_CP0_MFC0] = int_cfc,
[OP_CP0_CFC0] = int_cfc,
[OP_CP0_MTC0] = int_ctc,
};
static const lightrec_int_func_t int_cp2_basic[64] = {
+ SET_DEFAULT_ELM(int_cp2_basic, int_CP),
[OP_CP2_BASIC_MFC2] = int_cfc,
[OP_CP2_BASIC_CFC2] = int_cfc,
[OP_CP2_BASIC_MTC2] = int_ctc,
static u32 int_SPECIAL(struct interpreter *inter)
{
lightrec_int_func_t f = int_special[inter->op->r.op];
- if (likely(f))
- return execute(f, inter);
- else
+
+ if (!HAS_DEFAULT_ELM && unlikely(!f))
return int_unimplemented(inter);
+
+ return execute(f, inter);
}
static u32 int_REGIMM(struct interpreter *inter)
{
lightrec_int_func_t f = int_regimm[inter->op->r.rt];
- if (likely(f))
- return execute(f, inter);
- else
+
+ if (!HAS_DEFAULT_ELM && unlikely(!f))
return int_unimplemented(inter);
+
+ return execute(f, inter);
}
static u32 int_CP0(struct interpreter *inter)
{
lightrec_int_func_t f = int_cp0[inter->op->r.rs];
- if (likely(f))
- return execute(f, inter);
- else
+
+ if (!HAS_DEFAULT_ELM && unlikely(!f))
return int_CP(inter);
+
+ return execute(f, inter);
}
static u32 int_CP2(struct interpreter *inter)
{
if (inter->op->r.op == OP_CP2_BASIC) {
lightrec_int_func_t f = int_cp2_basic[inter->op->r.rs];
- if (likely(f))
+ if (HAS_DEFAULT_ELM || likely(f))
return execute(f, inter);
}
return int_CP(inter);
}
-static u32 lightrec_int_op(struct interpreter *inter)
-{
- return execute(int_standard[inter->op->i.op], inter);
-}
-
-static u32 lightrec_emulate_block_list(struct block *block, struct opcode *op)
+static u32 lightrec_emulate_block_list(struct lightrec_state *state,
+ struct block *block, u32 offset)
{
struct interpreter inter;
u32 pc;
inter.block = block;
- inter.state = block->state;
- inter.op = op;
+ inter.state = state;
+ inter.offset = offset;
+ inter.op = &block->opcode_list[offset];
inter.cycles = 0;
inter.delay_slot = false;
/* Add the cycles of the last branch */
inter.cycles += lightrec_cycles_of_opcode(inter.op->c);
- block->state->current_cycle += inter.cycles;
+ state->current_cycle += inter.cycles;
return pc;
}
-u32 lightrec_emulate_block(struct block *block, u32 pc)
+u32 lightrec_emulate_block(struct lightrec_state *state, struct block *block, u32 pc)
{
u32 offset = (kunseg(pc) - kunseg(block->pc)) >> 2;
- struct opcode *op;
- for (op = block->opcode_list;
- op && (op->offset < offset); op = op->next);
- if (op)
- return lightrec_emulate_block_list(block, op);
+ if (offset < block->nb_ops)
+ return lightrec_emulate_block_list(state, block, offset);
pr_err("PC 0x%x is outside block at PC 0x%x\n", pc, block->pc);
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
- * Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __LIGHTREC_INTERPRETER_H__
struct block;
-u32 lightrec_emulate_block(struct block *block, u32 pc);
+u32 lightrec_emulate_block(struct lightrec_state *state, struct block *block, u32 pc);
#endif /* __LIGHTREC_INTERPRETER_H__ */
--- /dev/null
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Copyright (C) 2022 Paul Cercueil <paul@crapouillou.net>
+ */
+
+#ifndef __LIGHTNING_WRAPPER_H__
+#define __LIGHTNING_WRAPPER_H__
+
+#include <lightning.h>
+
+#if __WORDSIZE == 32
+
+#define jit_ldxi_ui(u,v,w) jit_ldxi_i(u,v,w)
+#define jit_stxi_ui(u,v,w) jit_stxi_i(u,v,w)
+#define jit_extr_i(u,v) jit_movr(u,v)
+#define jit_extr_ui(u,v) jit_movr(u,v)
+#define jit_retval_ui(u) jit_retval(u)
+#define jit_getarg_ui(u,v) jit_getarg_i(u,v)
+
+#endif
+
+#endif /* __LIGHTNING_WRAPPER_H__ */
--- /dev/null
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
+ */
+
+#ifndef __LIGHTREC_CONFIG_H__
+#define __LIGHTREC_CONFIG_H__
+
+#cmakedefine01 ENABLE_THREADED_COMPILER
+#cmakedefine01 ENABLE_FIRST_PASS
+#cmakedefine01 ENABLE_DISASSEMBLER
+#cmakedefine01 ENABLE_TINYMM
+
+#cmakedefine01 HAS_DEFAULT_ELM
+
+#cmakedefine01 OPT_REMOVE_DIV_BY_ZERO_SEQ
+#cmakedefine01 OPT_REPLACE_MEMSET
+#cmakedefine01 OPT_DETECT_IMPOSSIBLE_BRANCHES
+#cmakedefine01 OPT_TRANSFORM_OPS
+#cmakedefine01 OPT_LOCAL_BRANCHES
+#cmakedefine01 OPT_SWITCH_DELAY_SLOTS
+#cmakedefine01 OPT_FLAG_STORES
+#cmakedefine01 OPT_FLAG_IO
+#cmakedefine01 OPT_FLAG_MULT_DIV
+#cmakedefine01 OPT_EARLY_UNLOAD
+
+#endif /* __LIGHTREC_CONFIG_H__ */
+
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
- * Copyright (C) 2016-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2016-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __LIGHTREC_PRIVATE_H__
#define __LIGHTREC_PRIVATE_H__
-#include "config.h"
+#include "lightrec-config.h"
#include "disassembler.h"
#include "lightrec.h"
#endif
#define ARRAY_SIZE(x) (sizeof(x) ? sizeof(x) / sizeof((x)[0]) : 0)
-#define BIT(x) (1 << (x))
#ifdef __GNUC__
# define likely(x) __builtin_expect(!!(x),1)
# define HTOLE16(x) (x)
#endif
+#if HAS_DEFAULT_ELM
+#define SET_DEFAULT_ELM(table, value) [0 ... ARRAY_SIZE(table) - 1] = value
+#else
+#define SET_DEFAULT_ELM(table, value) [0] = NULL
+#endif
+
/* Flags for (struct block *)->flags */
#define BLOCK_NEVER_COMPILE BIT(0)
#define BLOCK_SHOULD_RECOMPILE BIT(1)
#define BLOCK_FULLY_TAGGED BIT(2)
#define BLOCK_IS_DEAD BIT(3)
+#define BLOCK_IS_MEMSET BIT(4)
#define RAM_SIZE 0x200000
#define BIOS_SIZE 0x80000
#define CODE_LUT_SIZE ((RAM_SIZE + BIOS_SIZE) >> 2)
+#define REG_LO 32
+#define REG_HI 33
+
/* Definition of jit_state_t (avoids inclusion of <lightning.h>) */
struct jit_node;
struct jit_state;
struct block {
jit_state_t *_jit;
- struct lightrec_state *state;
struct opcode *opcode_list;
void (*function)(void);
+ const u32 *code;
+ struct block *next;
u32 pc;
u32 hash;
+ unsigned int code_size;
+ u16 nb_ops;
+ u8 flags;
#if ENABLE_THREADED_COMPILER
atomic_flag op_list_freed;
#endif
- unsigned int code_size;
- u16 flags;
- u16 nb_ops;
- const struct lightrec_mem_map *map;
- struct block *next;
};
struct lightrec_branch {
u32 offset;
};
-struct lightrec_state {
- u32 native_reg_cache[34];
- u32 next_pc;
- u32 current_cycle;
- u32 target_cycle;
- u32 exit_flags;
- struct block *dispatcher, *rw_wrapper, *rw_generic_wrapper,
- *mfc_wrapper, *mtc_wrapper, *rfe_wrapper, *cp_wrapper,
- *syscall_wrapper, *break_wrapper;
- void *rw_func, *rw_generic_func, *mfc_func, *mtc_func, *rfe_func,
- *cp_func, *syscall_func, *break_func;
+enum c_wrappers {
+ C_WRAPPER_RW,
+ C_WRAPPER_RW_GENERIC,
+ C_WRAPPER_MFC,
+ C_WRAPPER_MTC,
+ C_WRAPPER_CP,
+ C_WRAPPER_SYSCALL,
+ C_WRAPPER_BREAK,
+ C_WRAPPERS_COUNT,
+};
+
+struct lightrec_cstate {
+ struct lightrec_state *state;
+
struct jit_node *branches[512];
struct lightrec_branch local_branches[512];
struct lightrec_branch_target targets[512];
unsigned int nb_branches;
unsigned int nb_local_branches;
unsigned int nb_targets;
+ unsigned int cycles;
+
+ struct regcache *reg_cache;
+};
+
+struct lightrec_state {
+ struct lightrec_registers regs;
+ u32 next_pc;
+ u32 current_cycle;
+ u32 target_cycle;
+ u32 exit_flags;
+ u32 old_cycle_counter;
+ struct block *dispatcher, *c_wrapper_block;
+ void *c_wrapper, *c_wrappers[C_WRAPPERS_COUNT];
struct tinymm *tinymm;
struct blockcache *block_cache;
- struct regcache *reg_cache;
struct recompiler *rec;
+ struct lightrec_cstate *cstate;
struct reaper *reaper;
void (*eob_wrapper_func)(void);
+ void (*memset_func)(void);
void (*get_next_block)(void);
struct lightrec_ops ops;
unsigned int nb_precompile;
- unsigned int cycles;
unsigned int nb_maps;
const struct lightrec_mem_map *maps;
uintptr_t offset_ram, offset_bios, offset_scratch;
};
u32 lightrec_rw(struct lightrec_state *state, union code op,
- u32 addr, u32 data, u16 *flags);
+ u32 addr, u32 data, u16 *flags,
+ struct block *block);
-void lightrec_free_block(struct block *block);
+void lightrec_free_block(struct lightrec_state *state, struct block *block);
void remove_from_code_lut(struct blockcache *cache, struct block *block);
+const struct lightrec_mem_map *
+lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr);
+
static inline u32 kunseg(u32 addr)
{
if (unlikely(addr >= 0xa0000000))
return (pc & (RAM_SIZE - 1)) >> 2; // RAM
}
+static inline u32 get_ds_pc(const struct block *block, u16 offset, s16 imm)
+{
+ u16 flags = block->opcode_list[offset].flags;
+
+ offset += !!(OPT_SWITCH_DELAY_SLOTS && (flags & LIGHTREC_NO_DS));
+
+ return block->pc + (offset + imm << 2);
+}
+
+static inline u32 get_branch_pc(const struct block *block, u16 offset, s16 imm)
+{
+ u16 flags = block->opcode_list[offset].flags;
+
+ offset -= !!(OPT_SWITCH_DELAY_SLOTS && (flags & LIGHTREC_NO_DS));
+
+ return block->pc + (offset + imm << 2);
+}
+
void lightrec_mtc(struct lightrec_state *state, union code op, u32 data);
u32 lightrec_mfc(struct lightrec_state *state, union code op);
+void lightrec_rfe(struct lightrec_state *state);
+void lightrec_cp(struct lightrec_state *state, union code op);
+
+struct lightrec_cstate * lightrec_create_cstate(struct lightrec_state *state);
+void lightrec_free_cstate(struct lightrec_cstate *cstate);
union code lightrec_read_opcode(struct lightrec_state *state, u32 pc);
struct block * lightrec_get_block(struct lightrec_state *state, u32 pc);
-int lightrec_compile_block(struct block *block);
+int lightrec_compile_block(struct lightrec_cstate *cstate, struct block *block);
+void lightrec_free_opcode_list(struct lightrec_state *state, struct block *block);
+
+unsigned int lightrec_cycles_of_opcode(union code code);
+
+static inline u8 get_mult_div_lo(union code c)
+{
+ return (OPT_FLAG_MULT_DIV && c.r.rd) ? c.r.rd : REG_LO;
+}
+
+static inline u8 get_mult_div_hi(union code c)
+{
+ return (OPT_FLAG_MULT_DIV && c.r.imm) ? c.r.imm : REG_HI;
+}
#endif /* __LIGHTREC_PRIVATE_H__ */
+// SPDX-License-Identifier: LGPL-2.1-or-later
/*
- * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
*/
#include "blockcache.h"
-#include "config.h"
#include "debug.h"
#include "disassembler.h"
#include "emitter.h"
#include "interpreter.h"
+#include "lightrec-config.h"
+#include "lightning-wrapper.h"
#include "lightrec.h"
#include "memmanager.h"
#include "reaper.h"
#include "optimizer.h"
#include <errno.h>
-#include <lightning.h>
+#include <inttypes.h>
#include <limits.h>
#if ENABLE_THREADED_COMPILER
#include <stdatomic.h>
static struct block * lightrec_precompile_block(struct lightrec_state *state,
u32 pc);
+static bool lightrec_block_is_fully_tagged(const struct block *block);
+
+static void lightrec_mtc2(struct lightrec_state *state, u8 reg, u32 data);
+static u32 lightrec_mfc2(struct lightrec_state *state, u8 reg);
static void lightrec_default_sb(struct lightrec_state *state, u32 opcode,
void *host, u32 addr, u8 data)
.lw = lightrec_default_lw,
};
-static void __segfault_cb(struct lightrec_state *state, u32 addr)
+static void __segfault_cb(struct lightrec_state *state, u32 addr,
+ const struct block *block)
{
lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT);
pr_err("Segmentation fault in recompiled code: invalid "
"load/store at address 0x%08x\n", addr);
+ if (block)
+ pr_err("Was executing block PC 0x%08x\n", block->pc);
}
static void lightrec_swl(struct lightrec_state *state,
const struct lightrec_mem_map_ops *ops,
void *host, u32 addr)
{
- u32 data = state->ops.cop2_ops.mfc(state, op.opcode, op.i.rt);
+ u32 data = lightrec_mfc2(state, op.i.rt);
ops->sw(state, op.opcode, host, addr, data);
}
{
u32 data = ops->lw(state, op.opcode, host, addr);
- state->ops.cop2_ops.mtc(state, op.opcode, op.i.rt, data);
+ lightrec_mtc2(state, op.i.rt, data);
}
static void lightrec_invalidate_map(struct lightrec_state *state,
- const struct lightrec_mem_map *map, u32 addr)
+ const struct lightrec_mem_map *map, u32 addr, u32 len)
{
- if (map == &state->maps[PSX_MAP_KERNEL_USER_RAM])
- state->code_lut[lut_offset(addr)] = NULL;
+ if (map == &state->maps[PSX_MAP_KERNEL_USER_RAM]) {
+ memset(&state->code_lut[lut_offset(addr)], 0,
+ ((len + 3) / 4) * sizeof(void *));
+ }
}
-static const struct lightrec_mem_map *
-lightrec_get_map(struct lightrec_state *state, u32 kaddr)
+const struct lightrec_mem_map *
+lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr)
{
+ const struct lightrec_mem_map *map;
unsigned int i;
+ u32 addr;
for (i = 0; i < state->nb_maps; i++) {
- const struct lightrec_mem_map *map = &state->maps[i];
+ const struct lightrec_mem_map *mapi = &state->maps[i];
- if (kaddr >= map->pc && kaddr < map->pc + map->length)
- return map;
+ if (kaddr >= mapi->pc && kaddr < mapi->pc + mapi->length) {
+ map = mapi;
+ break;
+ }
}
- return NULL;
+ if (i == state->nb_maps)
+ return NULL;
+
+ addr = kaddr - map->pc;
+
+ while (map->mirror_of)
+ map = map->mirror_of;
+
+ if (host)
+ *host = map->address + addr;
+
+ return map;
}
u32 lightrec_rw(struct lightrec_state *state, union code op,
- u32 addr, u32 data, u16 *flags)
+ u32 addr, u32 data, u16 *flags, struct block *block)
{
const struct lightrec_mem_map *map;
const struct lightrec_mem_map_ops *ops;
- u32 kaddr, pc, opcode = op.opcode;
+ u32 opcode = op.opcode;
void *host;
addr += (s16) op.i.imm;
- kaddr = kunseg(addr);
- map = lightrec_get_map(state, kaddr);
+ map = lightrec_get_map(state, &host, kunseg(addr));
if (!map) {
- __segfault_cb(state, addr);
+ __segfault_cb(state, addr, block);
return 0;
}
- pc = map->pc;
-
- while (map->mirror_of)
- map = map->mirror_of;
-
- host = (void *)((uintptr_t)map->address + kaddr - pc);
-
if (unlikely(map->ops)) {
if (flags)
*flags |= LIGHTREC_HW_IO;
}
static void lightrec_rw_helper(struct lightrec_state *state,
- union code op, u16 *flags)
+ union code op, u16 *flags,
+ struct block *block)
{
- u32 ret = lightrec_rw(state, op,
- state->native_reg_cache[op.i.rs],
- state->native_reg_cache[op.i.rt], flags);
+ u32 ret = lightrec_rw(state, op, state->regs.gpr[op.i.rs],
+ state->regs.gpr[op.i.rt], flags, block);
switch (op.i.op) {
case OP_LB:
case OP_LWR:
case OP_LW:
if (op.i.rt)
- state->native_reg_cache[op.i.rt] = ret;
+ state->regs.gpr[op.i.rt] = ret;
default: /* fall-through */
break;
}
static void lightrec_rw_cb(struct lightrec_state *state, union code op)
{
- lightrec_rw_helper(state, op, NULL);
+ lightrec_rw_helper(state, op, NULL, NULL);
}
-static void lightrec_rw_generic_cb(struct lightrec_state *state,
- struct opcode *op, struct block *block)
+static void lightrec_rw_generic_cb(struct lightrec_state *state, u32 arg)
{
- bool was_tagged = op->flags & (LIGHTREC_HW_IO | LIGHTREC_DIRECT_IO);
+ struct block *block;
+ struct opcode *op;
+ bool was_tagged;
+ u16 offset = (u16)arg;
+
+ block = lightrec_find_block_from_lut(state->block_cache,
+ arg >> 16, state->next_pc);
+ if (unlikely(!block)) {
+ pr_err("rw_generic: No block found in LUT for PC 0x%x offset 0x%x\n",
+ state->next_pc, offset);
+ return;
+ }
+
+ op = &block->opcode_list[offset];
+ was_tagged = op->flags & (LIGHTREC_HW_IO | LIGHTREC_DIRECT_IO);
- lightrec_rw_helper(state, op->c, &op->flags);
+ lightrec_rw_helper(state, op->c, &op->flags, block);
if (!was_tagged) {
- pr_debug("Opcode of block at PC 0x%08x offset 0x%x has been "
- "tagged - flag for recompilation\n",
- block->pc, op->offset << 2);
+ pr_debug("Opcode of block at PC 0x%08x has been tagged - flag "
+ "for recompilation\n", block->pc);
block->flags |= BLOCK_SHOULD_RECOMPILE;
}
}
-u32 lightrec_mfc(struct lightrec_state *state, union code op)
+static u32 clamp_s32(s32 val, s32 min, s32 max)
{
- bool is_cfc = (op.i.op == OP_CP0 && op.r.rs == OP_CP0_CFC0) ||
- (op.i.op == OP_CP2 && op.r.rs == OP_CP2_BASIC_CFC2);
- u32 (*func)(struct lightrec_state *, u32, u8);
- const struct lightrec_cop_ops *ops;
+ return val < min ? min : val > max ? max : val;
+}
- if (op.i.op == OP_CP0)
- ops = &state->ops.cop0_ops;
- else
- ops = &state->ops.cop2_ops;
+static u32 lightrec_mfc2(struct lightrec_state *state, u8 reg)
+{
+ s16 gteir1, gteir2, gteir3;
+
+ switch (reg) {
+ case 1:
+ case 3:
+ case 5:
+ case 8:
+ case 9:
+ case 10:
+ case 11:
+ return (s32)(s16) state->regs.cp2d[reg];
+ case 7:
+ case 16:
+ case 17:
+ case 18:
+ case 19:
+ return (u16) state->regs.cp2d[reg];
+ case 28:
+ case 29:
+ gteir1 = (s16) state->regs.cp2d[9];
+ gteir2 = (s16) state->regs.cp2d[10];
+ gteir3 = (s16) state->regs.cp2d[11];
+
+ return clamp_s32(gteir1 >> 7, 0, 0x1f) << 0 |
+ clamp_s32(gteir2 >> 7, 0, 0x1f) << 5 |
+ clamp_s32(gteir3 >> 7, 0, 0x1f) << 10;
+ case 15:
+ reg = 14;
+ default: /* fall-through */
+ return state->regs.cp2d[reg];
+ }
+}
- if (is_cfc)
- func = ops->cfc;
+u32 lightrec_mfc(struct lightrec_state *state, union code op)
+{
+ if (op.i.op == OP_CP0)
+ return state->regs.cp0[op.r.rd];
+ else if (op.r.rs == OP_CP2_BASIC_MFC2)
+ return lightrec_mfc2(state, op.r.rd);
else
- func = ops->mfc;
-
- return (*func)(state, op.opcode, op.r.rd);
+ return state->regs.cp2c[op.r.rd];
}
static void lightrec_mfc_cb(struct lightrec_state *state, union code op)
u32 rt = lightrec_mfc(state, op);
if (op.r.rt)
- state->native_reg_cache[op.r.rt] = rt;
+ state->regs.gpr[op.r.rt] = rt;
}
-void lightrec_mtc(struct lightrec_state *state, union code op, u32 data)
+static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data)
{
- bool is_ctc = (op.i.op == OP_CP0 && op.r.rs == OP_CP0_CTC0) ||
- (op.i.op == OP_CP2 && op.r.rs == OP_CP2_BASIC_CTC2);
- void (*func)(struct lightrec_state *, u32, u8, u32);
- const struct lightrec_cop_ops *ops;
+ u32 status, cause;
+
+ switch (reg) {
+ case 1:
+ case 4:
+ case 8:
+ case 14:
+ case 15:
+ /* Those registers are read-only */
+ return;
+ default: /* fall-through */
+ break;
+ }
- if (op.i.op == OP_CP0)
- ops = &state->ops.cop0_ops;
- else
- ops = &state->ops.cop2_ops;
+ if (reg == 12) {
+ status = state->regs.cp0[12];
- if (is_ctc)
- func = ops->ctc;
- else
- func = ops->mtc;
+ if (status & ~data & BIT(16)) {
+ state->ops.enable_ram(state, true);
+ lightrec_invalidate_all(state);
+ } else if (~status & data & BIT(16)) {
+ state->ops.enable_ram(state, false);
+ }
+ }
+
+ state->regs.cp0[reg] = data;
+
+ if (reg == 12 || reg == 13) {
+ cause = state->regs.cp0[13];
+ status = state->regs.cp0[12];
+
+ if (!!(status & cause & 0x300) & status)
+ lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT);
+ }
+}
+
+static u32 count_leading_bits(s32 data)
+{
+#if defined(__has_builtin) && __has_builtin(__builtin_clrsb)
+ return 1 + __builtin_clrsb(data);
+#else
+ u32 cnt = 33;
+
+ data = (data ^ (data >> 31)) << 1;
+
+ do {
+ cnt -= 1;
+ data >>= 1;
+ } while (data);
+
+ return cnt;
+#endif
+}
+
+static void lightrec_mtc2(struct lightrec_state *state, u8 reg, u32 data)
+{
+ switch (reg) {
+ case 15:
+ state->regs.cp2d[12] = state->regs.cp2d[13];
+ state->regs.cp2d[13] = state->regs.cp2d[14];
+ state->regs.cp2d[14] = data;
+ break;
+ case 28:
+ state->regs.cp2d[9] = (data << 7) & 0xf80;
+ state->regs.cp2d[10] = (data << 2) & 0xf80;
+ state->regs.cp2d[11] = (data >> 3) & 0xf80;
+ break;
+ case 31:
+ return;
+ case 30:
+ state->regs.cp2d[31] = count_leading_bits((s32) data);
+ default: /* fall-through */
+ state->regs.cp2d[reg] = data;
+ break;
+ }
+}
- (*func)(state, op.opcode, op.r.rd, data);
+static void lightrec_ctc2(struct lightrec_state *state, u8 reg, u32 data)
+{
+ switch (reg) {
+ case 4:
+ case 12:
+ case 20:
+ case 26:
+ case 27:
+ case 29:
+ case 30:
+ data = (s32)(s16) data;
+ break;
+ case 31:
+ data = (data & 0x7ffff000) | !!(data & 0x7f87e000) << 31;
+ default: /* fall-through */
+ break;
+ }
+
+ state->regs.cp2c[reg] = data;
+}
+
+void lightrec_mtc(struct lightrec_state *state, union code op, u32 data)
+{
+ if (op.i.op == OP_CP0)
+ lightrec_mtc0(state, op.r.rd, data);
+ else if (op.r.rs == OP_CP2_BASIC_CTC2)
+ lightrec_ctc2(state, op.r.rd, data);
+ else
+ lightrec_mtc2(state, op.r.rd, data);
}
static void lightrec_mtc_cb(struct lightrec_state *state, union code op)
{
- lightrec_mtc(state, op, state->native_reg_cache[op.r.rt]);
+ lightrec_mtc(state, op, state->regs.gpr[op.r.rt]);
}
-static void lightrec_rfe_cb(struct lightrec_state *state, union code op)
+void lightrec_rfe(struct lightrec_state *state)
{
u32 status;
/* Read CP0 Status register (r12) */
- status = state->ops.cop0_ops.mfc(state, op.opcode, 12);
+ status = state->regs.cp0[12];
/* Switch the bits */
status = ((status & 0x3c) >> 2) | (status & ~0xf);
/* Write it back */
- state->ops.cop0_ops.ctc(state, op.opcode, 12, status);
+ lightrec_mtc0(state, 12, status);
}
-static void lightrec_cp_cb(struct lightrec_state *state, union code op)
+void lightrec_cp(struct lightrec_state *state, union code op)
{
- void (*func)(struct lightrec_state *, u32);
-
- if ((op.opcode >> 25) & 1)
- func = state->ops.cop2_ops.op;
- else
- func = state->ops.cop0_ops.op;
+ if (op.i.op == OP_CP0) {
+ pr_err("Invalid CP opcode to coprocessor #0\n");
+ return;
+ }
- (*func)(state, op.opcode);
+ (*state->ops.cop2_op)(state, op.opcode);
}
static void lightrec_syscall_cb(struct lightrec_state *state, union code op)
{
struct block *block = lightrec_find_block(state->block_cache, pc);
- if (block && lightrec_block_is_outdated(block)) {
+ if (block && lightrec_block_is_outdated(state, block)) {
pr_debug("Block at PC 0x%08x is outdated!\n", block->pc);
/* Make sure the recompiler isn't processing the block we'll
lightrec_unregister_block(state->block_cache, block);
remove_from_code_lut(state->block_cache, block);
- lightrec_free_block(block);
+ lightrec_free_block(state, block);
block = NULL;
}
for (;;) {
func = state->code_lut[lut_offset(pc)];
if (func && func != state->get_next_block)
- return func;
+ break;
block = lightrec_get_block(state, pc);
if (unlikely(!block))
- return NULL;
+ break;
+
+ if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET)) {
+ func = state->memset_func;
+ break;
+ }
should_recompile = block->flags & BLOCK_SHOULD_RECOMPILE &&
!(block->flags & BLOCK_IS_DEAD);
if (ENABLE_THREADED_COMPILER)
lightrec_recompiler_add(state->rec, block);
else
- lightrec_compile_block(block);
+ lightrec_compile_block(state->cstate, block);
}
if (ENABLE_THREADED_COMPILER && likely(!should_recompile))
- func = lightrec_recompiler_run_first_pass(block, &pc);
+ func = lightrec_recompiler_run_first_pass(state, block, &pc);
else
func = block->function;
if (likely(func))
- return func;
+ break;
- /* Block wasn't compiled yet - run the interpreter */
- if (!ENABLE_THREADED_COMPILER &&
- ((ENABLE_FIRST_PASS && likely(!should_recompile)) ||
- unlikely(block->flags & BLOCK_NEVER_COMPILE)))
- pc = lightrec_emulate_block(block, pc);
+ if (unlikely(block->flags & BLOCK_NEVER_COMPILE)) {
+ pc = lightrec_emulate_block(state, block, pc);
+
+ } else if (!ENABLE_THREADED_COMPILER) {
+ /* Block wasn't compiled yet - run the interpreter */
+ if (block->flags & BLOCK_FULLY_TAGGED)
+ pr_debug("Block fully tagged, skipping first pass\n");
+ else if (ENABLE_FIRST_PASS && likely(!should_recompile))
+ pc = lightrec_emulate_block(state, block, pc);
- if (likely(!(block->flags & BLOCK_NEVER_COMPILE))) {
/* Then compile it using the profiled data */
- if (ENABLE_THREADED_COMPILER)
- lightrec_recompiler_add(state->rec, block);
- else
- lightrec_compile_block(block);
+ lightrec_compile_block(state->cstate, block);
+ } else {
+ lightrec_recompiler_add(state->rec, block);
}
if (state->exit_flags != LIGHTREC_EXIT_NORMAL ||
- state->current_cycle >= state->target_cycle) {
- state->next_pc = pc;
- return NULL;
- }
+ state->current_cycle >= state->target_cycle)
+ break;
}
-}
-static s32 c_generic_function_wrapper(struct lightrec_state *state,
- s32 cycles_delta,
- void (*f)(struct lightrec_state *,
- struct opcode *,
- struct block *),
- struct opcode *op, struct block *block)
-{
- state->current_cycle = state->target_cycle - cycles_delta;
-
- (*f)(state, op, block);
-
- return state->target_cycle - state->current_cycle;
+ state->next_pc = pc;
+ return func;
}
static s32 c_function_wrapper(struct lightrec_state *state, s32 cycles_delta,
- void (*f)(struct lightrec_state *, union code),
- union code op)
+ void (*f)(struct lightrec_state *, u32 d),
+ u32 d)
{
state->current_cycle = state->target_cycle - cycles_delta;
- (*f)(state, op);
+ (*f)(state, d);
return state->target_cycle - state->current_cycle;
}
-static struct block * generate_wrapper(struct lightrec_state *state,
- void *f, bool generic)
+static struct block * generate_wrapper(struct lightrec_state *state)
{
struct block *block;
jit_state_t *_jit;
jit_prepare();
jit_pushargr(LIGHTREC_REG_STATE);
jit_pushargr(LIGHTREC_REG_CYCLE);
- jit_pushargi((uintptr_t)f);
jit_pushargr(JIT_R0);
- if (generic) {
- jit_pushargr(JIT_R1);
- jit_finishi(c_generic_function_wrapper);
- } else {
- jit_finishi(c_function_wrapper);
- }
-
-#if __WORDSIZE == 64
+ jit_pushargr(JIT_R1);
+ jit_finishi(c_function_wrapper);
jit_retval_i(LIGHTREC_REG_CYCLE);
-#else
- jit_retval(LIGHTREC_REG_CYCLE);
-#endif
jit_patch_at(jit_jmpi(), to_fn_epilog);
jit_epilog();
- block->state = state;
block->_jit = _jit;
block->function = jit_emit();
block->opcode_list = NULL;
return NULL;
}
+static u32 lightrec_memset(struct lightrec_state *state)
+{
+ u32 kunseg_pc = kunseg(state->regs.gpr[4]);
+ void *host;
+ const struct lightrec_mem_map *map = lightrec_get_map(state, &host, kunseg_pc);
+ u32 length = state->regs.gpr[5] * 4;
+
+ if (!map) {
+ pr_err("Unable to find memory map for memset target address "
+ "0x%x\n", kunseg_pc);
+ return 0;
+ }
+
+ pr_debug("Calling host memset, PC 0x%x (host address 0x%" PRIxPTR ") for %u bytes\n",
+ kunseg_pc, (uintptr_t)host, length);
+ memset(host, 0, length);
+
+ if (!state->invalidate_from_dma_only)
+ lightrec_invalidate_map(state, map, kunseg_pc, length);
+
+ /* Rough estimation of the number of cycles consumed */
+ return 8 + 5 * (length + 3 / 4);
+}
+
static struct block * generate_dispatcher(struct lightrec_state *state)
{
struct block *block;
jit_state_t *_jit;
- jit_node_t *to_end, *to_end2, *to_c, *loop, *addr, *addr2;
+ jit_node_t *to_end, *to_c, *loop, *addr, *addr2, *addr3;
unsigned int i;
u32 offset, ram_len;
jit_word_t code_size;
jit_frame(256);
jit_getarg(JIT_R0, jit_arg());
-#if __WORDSIZE == 64
jit_getarg_i(LIGHTREC_REG_CYCLE, jit_arg());
-#else
- jit_getarg(LIGHTREC_REG_CYCLE, jit_arg());
-#endif
/* Force all callee-saved registers to be pushed on the stack */
for (i = 0; i < NUM_REGS; i++)
/* Call the block's code */
jit_jmpr(JIT_R0);
+ if (OPT_REPLACE_MEMSET) {
+ /* Blocks will jump here when they need to call
+ * lightrec_memset() */
+ addr3 = jit_indirect();
+
+ jit_prepare();
+ jit_pushargr(LIGHTREC_REG_STATE);
+ jit_finishi(lightrec_memset);
+
+ jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE,
+ offsetof(struct lightrec_state, regs.gpr[31]));
+
+ jit_retval(JIT_R0);
+ jit_subr(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, JIT_R0);
+ }
+
/* The block will jump here, with the number of cycles remaining in
* LIGHTREC_REG_CYCLE */
addr2 = jit_indirect();
+ /* Store back the next_pc to the lightrec_state structure */
+ offset = offsetof(struct lightrec_state, next_pc);
+ jit_stxi_i(offset, LIGHTREC_REG_STATE, JIT_V0);
+
/* Jump to end if state->target_cycle < state->current_cycle */
to_end = jit_blei(LIGHTREC_REG_CYCLE, 0);
to_c = jit_bgei(JIT_R0, ram_len);
/* Fast path: code is running from RAM, use the code LUT */
-#if __WORDSIZE == 64
- jit_lshi(JIT_R0, JIT_R0, 1);
-#endif
+ if (__WORDSIZE == 64)
+ jit_lshi(JIT_R0, JIT_R0, 1);
jit_addr(JIT_R0, JIT_R0, LIGHTREC_REG_STATE);
jit_ldxi(JIT_R0, JIT_R0, offsetof(struct lightrec_state, code_lut));
/* Slow path: call C function get_next_block_func() */
jit_patch(to_c);
- if (ENABLE_FIRST_PASS) {
+ if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) {
/* We may call the interpreter - update state->current_cycle */
jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE,
offsetof(struct lightrec_state, target_cycle));
jit_finishi(&get_next_block_func);
jit_retval(JIT_R0);
- if (ENABLE_FIRST_PASS) {
+ if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) {
/* The interpreter may have updated state->current_cycle and
* state->target_cycle - recalc the delta */
jit_ldxi_i(JIT_R1, LIGHTREC_REG_STATE,
/* If we get non-NULL, loop */
jit_patch_at(jit_bnei(JIT_R0, 0), loop);
- to_end2 = jit_jmpi();
-
/* When exiting, the recompiled code will jump to that address */
jit_note(__FILE__, __LINE__);
jit_patch(to_end);
- /* Store back the next_pc to the lightrec_state structure */
- offset = offsetof(struct lightrec_state, next_pc);
- jit_stxi_i(offset, LIGHTREC_REG_STATE, JIT_V0);
-
- jit_patch(to_end2);
-
jit_retr(LIGHTREC_REG_CYCLE);
jit_epilog();
- block->state = state;
block->_jit = _jit;
block->function = jit_emit();
block->opcode_list = NULL;
block->code_size = code_size;
state->eob_wrapper_func = jit_address(addr2);
+ if (OPT_REPLACE_MEMSET)
+ state->memset_func = jit_address(addr3);
state->get_next_block = jit_address(addr);
if (ENABLE_DISASSEMBLER) {
union code lightrec_read_opcode(struct lightrec_state *state, u32 pc)
{
- u32 addr, kunseg_pc = kunseg(pc);
- const u32 *code;
- const struct lightrec_mem_map *map = lightrec_get_map(state, kunseg_pc);
+ void *host;
- addr = kunseg_pc - map->pc;
+ lightrec_get_map(state, &host, kunseg(pc));
- while (map->mirror_of)
- map = map->mirror_of;
+ const u32 *code = (u32 *)host;
+ return (union code) *code;
+}
- code = map->address + addr;
+unsigned int lightrec_cycles_of_opcode(union code code)
+{
+ return 2;
+}
- return (union code) *code;
+void lightrec_free_opcode_list(struct lightrec_state *state, struct block *block)
+{
+ lightrec_free(state, MEM_FOR_IR,
+ sizeof(*block->opcode_list) * block->nb_ops,
+ block->opcode_list);
+}
+
+static unsigned int lightrec_get_mips_block_len(const u32 *src)
+{
+ unsigned int i;
+ union code c;
+
+ for (i = 1; ; i++) {
+ c.opcode = LE32TOH(*src++);
+
+ if (is_syscall(c))
+ return i;
+
+ if (is_unconditional_jump(c))
+ return i + 1;
+ }
+}
+
+static struct opcode * lightrec_disassemble(struct lightrec_state *state,
+ const u32 *src, unsigned int *len)
+{
+ struct opcode *list;
+ unsigned int i, length;
+
+ length = lightrec_get_mips_block_len(src);
+
+ list = lightrec_malloc(state, MEM_FOR_IR, sizeof(*list) * length);
+ if (!list) {
+ pr_err("Unable to allocate memory\n");
+ return NULL;
+ }
+
+ for (i = 0; i < length; i++) {
+ list[i].opcode = LE32TOH(src[i]);
+ list[i].flags = 0;
+ }
+
+ *len = length * sizeof(u32);
+
+ return list;
}
static struct block * lightrec_precompile_block(struct lightrec_state *state,
{
struct opcode *list;
struct block *block;
- const u32 *code;
- u32 addr, kunseg_pc = kunseg(pc);
- const struct lightrec_mem_map *map = lightrec_get_map(state, kunseg_pc);
+ void *host;
+ const struct lightrec_mem_map *map = lightrec_get_map(state, &host, kunseg(pc));
+ const u32 *code = (u32 *) host;
unsigned int length;
+ bool fully_tagged;
if (!map)
return NULL;
- addr = kunseg_pc - map->pc;
-
- while (map->mirror_of)
- map = map->mirror_of;
-
- code = map->address + addr;
-
block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
if (!block) {
pr_err("Unable to recompile block: Out of memory\n");
}
block->pc = pc;
- block->state = state;
block->_jit = NULL;
block->function = NULL;
block->opcode_list = list;
- block->map = map;
+ block->code = code;
block->next = NULL;
block->flags = 0;
block->code_size = 0;
#endif
block->nb_ops = length / sizeof(u32);
- lightrec_optimize(block);
+ lightrec_optimize(state, block);
length = block->nb_ops * sizeof(u32);
lightrec_register(MEM_FOR_MIPS_CODE, length);
if (ENABLE_DISASSEMBLER) {
- pr_debug("Disassembled block at PC: 0x%x\n", block->pc);
- lightrec_print_disassembly(block, code, length);
+ pr_debug("Disassembled block at PC: 0x%08x\n", block->pc);
+ lightrec_print_disassembly(block, code);
}
- pr_debug("Block size: %lu opcodes\n", block->nb_ops);
+ pr_debug("Block size: %hu opcodes\n", block->nb_ops);
/* If the first opcode is an 'impossible' branch, never compile the
* block */
- if (list->flags & LIGHTREC_EMULATE_BRANCH)
+ if (should_emulate(block->opcode_list))
block->flags |= BLOCK_NEVER_COMPILE;
+ fully_tagged = lightrec_block_is_fully_tagged(block);
+ if (fully_tagged)
+ block->flags |= BLOCK_FULLY_TAGGED;
+
+ if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET))
+ state->code_lut[lut_offset(pc)] = state->memset_func;
+
block->hash = lightrec_calculate_block_hash(block);
pr_debug("Recompile count: %u\n", state->nb_precompile++);
return block;
}
-static bool lightrec_block_is_fully_tagged(struct block *block)
+static bool lightrec_block_is_fully_tagged(const struct block *block)
{
- struct opcode *op;
+ const struct opcode *op;
+ unsigned int i;
+
+ for (i = 0; i < block->nb_ops; i++) {
+ op = &block->opcode_list[i];
- for (op = block->opcode_list; op; op = op->next) {
/* Verify that all load/stores of the opcode list
* Check all loads/stores of the opcode list and mark the
* block as fully compiled if they all have been tagged. */
return true;
}
-static void lightrec_reap_block(void *data)
+static void lightrec_reap_block(struct lightrec_state *state, void *data)
{
struct block *block = data;
pr_debug("Reap dead block at PC 0x%08x\n", block->pc);
- lightrec_free_block(block);
+ lightrec_unregister_block(state->block_cache, block);
+ lightrec_free_block(state, block);
}
-static void lightrec_reap_jit(void *data)
+static void lightrec_reap_jit(struct lightrec_state *state, void *data)
{
_jit_destroy_state(data);
}
-int lightrec_compile_block(struct block *block)
+int lightrec_compile_block(struct lightrec_cstate *cstate,
+ struct block *block)
{
- struct lightrec_state *state = block->state;
+ struct lightrec_state *state = cstate->state;
struct lightrec_branch_target *target;
bool op_list_freed = false, fully_tagged = false;
struct block *block2;
bool skip_next = false;
jit_word_t code_size;
unsigned int i, j;
- u32 next_pc, offset;
+ u32 offset;
fully_tagged = lightrec_block_is_fully_tagged(block);
if (fully_tagged)
oldjit = block->_jit;
block->_jit = _jit;
- lightrec_regcache_reset(state->reg_cache);
- state->cycles = 0;
- state->nb_branches = 0;
- state->nb_local_branches = 0;
- state->nb_targets = 0;
+ lightrec_regcache_reset(cstate->reg_cache);
+ cstate->cycles = 0;
+ cstate->nb_branches = 0;
+ cstate->nb_local_branches = 0;
+ cstate->nb_targets = 0;
jit_prolog();
jit_tramp(256);
start_of_block = jit_label();
- for (elm = block->opcode_list; elm; elm = elm->next) {
- next_pc = block->pc + elm->offset * sizeof(u32);
+ for (i = 0; i < block->nb_ops; i++) {
+ elm = &block->opcode_list[i];
if (skip_next) {
skip_next = false;
continue;
}
- state->cycles += lightrec_cycles_of_opcode(elm->c);
+ cstate->cycles += lightrec_cycles_of_opcode(elm->c);
- if (elm->flags & LIGHTREC_EMULATE_BRANCH) {
+ if (should_emulate(elm)) {
pr_debug("Branch at offset 0x%x will be emulated\n",
- elm->offset << 2);
- lightrec_emit_eob(block, elm, next_pc);
+ i << 2);
+
+ lightrec_emit_eob(cstate, block, i, false);
skip_next = !(elm->flags & LIGHTREC_NO_DS);
- } else if (elm->opcode) {
- lightrec_rec_opcode(block, elm, next_pc);
+ } else {
+ lightrec_rec_opcode(cstate, block, i);
skip_next = has_delay_slot(elm->c) &&
!(elm->flags & LIGHTREC_NO_DS);
#if _WIN32
* mapped registers as temporaries. Until the actual bug
* is found and fixed, unconditionally mark our
* registers as live here. */
- lightrec_regcache_mark_live(state->reg_cache, _jit);
+ lightrec_regcache_mark_live(cstate->reg_cache, _jit);
#endif
}
}
- for (i = 0; i < state->nb_branches; i++)
- jit_patch(state->branches[i]);
+ for (i = 0; i < cstate->nb_branches; i++)
+ jit_patch(cstate->branches[i]);
- for (i = 0; i < state->nb_local_branches; i++) {
- struct lightrec_branch *branch = &state->local_branches[i];
+ for (i = 0; i < cstate->nb_local_branches; i++) {
+ struct lightrec_branch *branch = &cstate->local_branches[i];
pr_debug("Patch local branch to offset 0x%x\n",
branch->target << 2);
continue;
}
- for (j = 0; j < state->nb_targets; j++) {
- if (state->targets[j].offset == branch->target) {
+ for (j = 0; j < cstate->nb_targets; j++) {
+ if (cstate->targets[j].offset == branch->target) {
jit_patch_at(branch->branch,
- state->targets[j].label);
+ cstate->targets[j].label);
break;
}
}
- if (j == state->nb_targets)
+ if (j == cstate->nb_targets)
pr_err("Unable to find branch target\n");
}
/* Add compiled function to the LUT */
state->code_lut[lut_offset(block->pc)] = block->function;
- /* Fill code LUT with the block's entry points */
- for (i = 0; i < state->nb_targets; i++) {
- target = &state->targets[i];
-
- if (target->offset) {
- offset = lut_offset(block->pc) + target->offset;
- state->code_lut[offset] = jit_address(target->label);
- }
+ if (ENABLE_THREADED_COMPILER) {
+ /* Since we might try to reap the same block multiple times,
+ * we need the reaper to wait until everything has been
+ * submitted, so that the duplicate entries can be dropped. */
+ lightrec_reaper_pause(state->reaper);
}
/* Detect old blocks that have been covered by the new one */
- for (i = 0; i < state->nb_targets; i++) {
- target = &state->targets[i];
+ for (i = 0; i < cstate->nb_targets; i++) {
+ target = &cstate->targets[i];
if (!target->offset)
continue;
/* No need to check if block2 is compilable - it must
* be, otherwise block wouldn't be compilable either */
+ /* Set the "block dead" flag to prevent the dynarec from
+ * recompiling this block */
block2->flags |= BLOCK_IS_DEAD;
+ /* If block2 was pending for compilation, cancel it.
+ * If it's being compiled right now, wait until it
+ * finishes. */
+ if (ENABLE_THREADED_COMPILER)
+ lightrec_recompiler_remove(state->rec, block2);
+
+ /* We know from now on that block2 isn't going to be
+ * compiled. We can override the LUT entry with our
+ * new block's entry point. */
+ offset = lut_offset(block->pc) + target->offset;
+ state->code_lut[offset] = jit_address(target->label);
+
pr_debug("Reap block 0x%08x as it's covered by block "
"0x%08x\n", block2->pc, block->pc);
- lightrec_unregister_block(state->block_cache, block2);
-
+ /* Finally, reap the block. */
if (ENABLE_THREADED_COMPILER) {
- lightrec_recompiler_remove(state->rec, block2);
lightrec_reaper_add(state->reaper,
lightrec_reap_block,
block2);
} else {
- lightrec_free_block(block2);
+ lightrec_unregister_block(state->block_cache, block2);
+ lightrec_free_block(state, block2);
}
}
}
+ if (ENABLE_DISASSEMBLER)
+ lightrec_reaper_continue(state->reaper);
+
jit_get_code(&code_size);
lightrec_register(MEM_FOR_CODE, code_size);
block->code_size = code_size;
if (ENABLE_DISASSEMBLER) {
- pr_debug("Compiling block at PC: 0x%x\n", block->pc);
+ pr_debug("Compiling block at PC: 0x%08x\n", block->pc);
jit_disassemble();
}
if (fully_tagged && !op_list_freed) {
pr_debug("Block PC 0x%08x is fully tagged"
" - free opcode list\n", block->pc);
- lightrec_free_opcode_list(state, block->opcode_list);
+ lightrec_free_opcode_list(state, block);
block->opcode_list = NULL;
}
return 0;
}
+static void lightrec_print_info(struct lightrec_state *state)
+{
+ if ((state->current_cycle & ~0xfffffff) != state->old_cycle_counter) {
+ pr_info("Lightrec RAM usage: IR %u KiB, CODE %u KiB, "
+ "MIPS %u KiB, TOTAL %u KiB, avg. IPI %f\n",
+ lightrec_get_mem_usage(MEM_FOR_IR) / 1024,
+ lightrec_get_mem_usage(MEM_FOR_CODE) / 1024,
+ lightrec_get_mem_usage(MEM_FOR_MIPS_CODE) / 1024,
+ lightrec_get_total_mem_usage() / 1024,
+ lightrec_get_average_ipi());
+ state->old_cycle_counter = state->current_cycle & ~0xfffffff;
+ }
+}
+
u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle)
{
s32 (*func)(void *, s32) = (void *)state->dispatcher->function;
target_cycle = UINT_MAX;
state->target_cycle = target_cycle;
+ state->next_pc = pc;
block_trace = get_next_block_func(state, pc);
if (block_trace) {
if (ENABLE_THREADED_COMPILER)
lightrec_reaper_reap(state->reaper);
+ if (LOG_LEVEL >= INFO_L)
+ lightrec_print_info(state);
+
return state->next_pc;
}
state->exit_flags = LIGHTREC_EXIT_NORMAL;
- return lightrec_emulate_block(block, pc);
+ pc = lightrec_emulate_block(state, block, pc);
+
+ if (LOG_LEVEL >= INFO_L)
+ lightrec_print_info(state);
+
+ return pc;
}
-void lightrec_free_block(struct block *block)
+void lightrec_free_block(struct lightrec_state *state, struct block *block)
{
lightrec_unregister(MEM_FOR_MIPS_CODE, block->nb_ops * sizeof(u32));
if (block->opcode_list)
- lightrec_free_opcode_list(block->state, block->opcode_list);
+ lightrec_free_opcode_list(state, block);
if (block->_jit)
_jit_destroy_state(block->_jit);
lightrec_unregister(MEM_FOR_CODE, block->code_size);
- lightrec_free(block->state, MEM_FOR_IR, sizeof(*block), block);
+ lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
+}
+
+struct lightrec_cstate * lightrec_create_cstate(struct lightrec_state *state)
+{
+ struct lightrec_cstate *cstate;
+
+ cstate = lightrec_malloc(state, MEM_FOR_LIGHTREC, sizeof(*cstate));
+ if (!cstate)
+ return NULL;
+
+ cstate->reg_cache = lightrec_regcache_init(state);
+ if (!cstate->reg_cache) {
+ lightrec_free(state, MEM_FOR_LIGHTREC, sizeof(*cstate), cstate);
+ return NULL;
+ }
+
+ cstate->state = state;
+
+ return cstate;
+}
+
+void lightrec_free_cstate(struct lightrec_cstate *cstate)
+{
+ lightrec_free_regcache(cstate->reg_cache);
+ lightrec_free(cstate->state, MEM_FOR_LIGHTREC, sizeof(*cstate), cstate);
}
struct lightrec_state * lightrec_init(char *argv0,
struct lightrec_state *state;
/* Sanity-check ops */
- if (!ops ||
- !ops->cop0_ops.mfc || !ops->cop0_ops.cfc || !ops->cop0_ops.mtc ||
- !ops->cop0_ops.ctc || !ops->cop0_ops.op ||
- !ops->cop2_ops.mfc || !ops->cop2_ops.cfc || !ops->cop2_ops.mtc ||
- !ops->cop2_ops.ctc || !ops->cop2_ops.op) {
+ if (!ops || !ops->cop2_op || !ops->enable_ram) {
pr_err("Missing callbacks in lightrec_ops structure\n");
return NULL;
}
if (!state->block_cache)
goto err_free_tinymm;
- state->reg_cache = lightrec_regcache_init(state);
- if (!state->reg_cache)
- goto err_free_block_cache;
-
if (ENABLE_THREADED_COMPILER) {
state->rec = lightrec_recompiler_init(state);
if (!state->rec)
- goto err_free_reg_cache;
+ goto err_free_block_cache;
state->reaper = lightrec_reaper_init(state);
if (!state->reaper)
goto err_free_recompiler;
+ } else {
+ state->cstate = lightrec_create_cstate(state);
+ if (!state->cstate)
+ goto err_free_block_cache;
}
state->nb_maps = nb;
if (!state->dispatcher)
goto err_free_reaper;
- state->rw_generic_wrapper = generate_wrapper(state,
- lightrec_rw_generic_cb,
- true);
- if (!state->rw_generic_wrapper)
+ state->c_wrapper_block = generate_wrapper(state);
+ if (!state->c_wrapper_block)
goto err_free_dispatcher;
- state->rw_wrapper = generate_wrapper(state, lightrec_rw_cb, false);
- if (!state->rw_wrapper)
- goto err_free_generic_rw_wrapper;
-
- state->mfc_wrapper = generate_wrapper(state, lightrec_mfc_cb, false);
- if (!state->mfc_wrapper)
- goto err_free_rw_wrapper;
-
- state->mtc_wrapper = generate_wrapper(state, lightrec_mtc_cb, false);
- if (!state->mtc_wrapper)
- goto err_free_mfc_wrapper;
-
- state->rfe_wrapper = generate_wrapper(state, lightrec_rfe_cb, false);
- if (!state->rfe_wrapper)
- goto err_free_mtc_wrapper;
-
- state->cp_wrapper = generate_wrapper(state, lightrec_cp_cb, false);
- if (!state->cp_wrapper)
- goto err_free_rfe_wrapper;
-
- state->syscall_wrapper = generate_wrapper(state, lightrec_syscall_cb,
- false);
- if (!state->syscall_wrapper)
- goto err_free_cp_wrapper;
-
- state->break_wrapper = generate_wrapper(state, lightrec_break_cb,
- false);
- if (!state->break_wrapper)
- goto err_free_syscall_wrapper;
-
- state->rw_generic_func = state->rw_generic_wrapper->function;
- state->rw_func = state->rw_wrapper->function;
- state->mfc_func = state->mfc_wrapper->function;
- state->mtc_func = state->mtc_wrapper->function;
- state->rfe_func = state->rfe_wrapper->function;
- state->cp_func = state->cp_wrapper->function;
- state->syscall_func = state->syscall_wrapper->function;
- state->break_func = state->break_wrapper->function;
+ state->c_wrapper = state->c_wrapper_block->function;
+
+ state->c_wrappers[C_WRAPPER_RW] = lightrec_rw_cb;
+ state->c_wrappers[C_WRAPPER_RW_GENERIC] = lightrec_rw_generic_cb;
+ state->c_wrappers[C_WRAPPER_MFC] = lightrec_mfc_cb;
+ state->c_wrappers[C_WRAPPER_MTC] = lightrec_mtc_cb;
+ state->c_wrappers[C_WRAPPER_CP] = lightrec_cp;
+ state->c_wrappers[C_WRAPPER_SYSCALL] = lightrec_syscall_cb;
+ state->c_wrappers[C_WRAPPER_BREAK] = lightrec_break_cb;
map = &state->maps[PSX_MAP_BIOS];
state->offset_bios = (uintptr_t)map->address - map->pc;
state->maps[PSX_MAP_MIRROR3].address == map->address + 0x600000)
state->mirrors_mapped = true;
+ if (state->offset_bios == 0 &&
+ state->offset_scratch == 0 &&
+ state->offset_ram == 0 &&
+ state->mirrors_mapped) {
+ pr_info("Memory map is perfect. Emitted code will be best.\n");
+ } else {
+ pr_info("Memory map is sub-par. Emitted code will be slow.\n");
+ }
+
return state;
-err_free_syscall_wrapper:
- lightrec_free_block(state->syscall_wrapper);
-err_free_cp_wrapper:
- lightrec_free_block(state->cp_wrapper);
-err_free_rfe_wrapper:
- lightrec_free_block(state->rfe_wrapper);
-err_free_mtc_wrapper:
- lightrec_free_block(state->mtc_wrapper);
-err_free_mfc_wrapper:
- lightrec_free_block(state->mfc_wrapper);
-err_free_rw_wrapper:
- lightrec_free_block(state->rw_wrapper);
-err_free_generic_rw_wrapper:
- lightrec_free_block(state->rw_generic_wrapper);
err_free_dispatcher:
- lightrec_free_block(state->dispatcher);
+ lightrec_free_block(state, state->dispatcher);
err_free_reaper:
if (ENABLE_THREADED_COMPILER)
lightrec_reaper_destroy(state->reaper);
err_free_recompiler:
if (ENABLE_THREADED_COMPILER)
lightrec_free_recompiler(state->rec);
-err_free_reg_cache:
- lightrec_free_regcache(state->reg_cache);
+ else
+ lightrec_free_cstate(state->cstate);
err_free_block_cache:
lightrec_free_block_cache(state->block_cache);
err_free_tinymm:
void lightrec_destroy(struct lightrec_state *state)
{
+ /* Force a print info on destroy*/
+ state->current_cycle = ~state->current_cycle;
+ lightrec_print_info(state);
+
if (ENABLE_THREADED_COMPILER) {
lightrec_free_recompiler(state->rec);
lightrec_reaper_destroy(state->reaper);
+ } else {
+ lightrec_free_cstate(state->cstate);
}
- lightrec_free_regcache(state->reg_cache);
lightrec_free_block_cache(state->block_cache);
- lightrec_free_block(state->dispatcher);
- lightrec_free_block(state->rw_generic_wrapper);
- lightrec_free_block(state->rw_wrapper);
- lightrec_free_block(state->mfc_wrapper);
- lightrec_free_block(state->mtc_wrapper);
- lightrec_free_block(state->rfe_wrapper);
- lightrec_free_block(state->cp_wrapper);
- lightrec_free_block(state->syscall_wrapper);
- lightrec_free_block(state->break_wrapper);
+ lightrec_free_block(state, state->dispatcher);
+ lightrec_free_block(state, state->c_wrapper_block);
finish_jit();
#if ENABLE_TINYMM
void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len)
{
u32 kaddr = kunseg(addr & ~0x3);
- const struct lightrec_mem_map *map = lightrec_get_map(state, kaddr);
+ const struct lightrec_mem_map *map = lightrec_get_map(state, NULL, kaddr);
if (map) {
- while (map->mirror_of)
- map = map->mirror_of;
-
if (map != &state->maps[PSX_MAP_KERNEL_USER_RAM])
return;
/* Handle mirrors */
kaddr &= (state->maps[PSX_MAP_KERNEL_USER_RAM].length - 1);
- for (; len > 4; len -= 4, kaddr += 4)
- lightrec_invalidate_map(state, map, kaddr);
-
- lightrec_invalidate_map(state, map, kaddr);
+ lightrec_invalidate_map(state, map, kaddr, len);
}
}
return state->exit_flags;
}
-void lightrec_dump_registers(struct lightrec_state *state, u32 regs[34])
-{
- memcpy(regs, state->native_reg_cache, sizeof(state->native_reg_cache));
-}
-
-void lightrec_restore_registers(struct lightrec_state *state, u32 regs[34])
-{
- memcpy(state->native_reg_cache, regs, sizeof(state->native_reg_cache));
-}
-
u32 lightrec_current_cycle_count(const struct lightrec_state *state)
{
return state->current_cycle;
state->target_cycle = cycles;
}
}
+
+struct lightrec_registers * lightrec_get_registers(struct lightrec_state *state)
+{
+ return &state->regs;
+}
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
- * Copyright (C) 2016-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2016-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __LIGHTREC_H__
/* Exit flags */
#define LIGHTREC_EXIT_NORMAL (0)
-#define LIGHTREC_EXIT_SYSCALL (1 << 0)
+#define LIGHTREC_EXIT_CHECK_INTERRUPT (1 << 0)
#define LIGHTREC_EXIT_BREAK (1 << 1)
-#define LIGHTREC_EXIT_CHECK_INTERRUPT (1 << 2)
+#define LIGHTREC_EXIT_SYSCALL (1 << 2)
#define LIGHTREC_EXIT_SEGFAULT (1 << 3)
enum psx_map {
PSX_MAP_MIRROR3,
};
-enum mem_type {
- MEM_FOR_CODE,
- MEM_FOR_MIPS_CODE,
- MEM_FOR_IR,
- MEM_FOR_LIGHTREC,
- MEM_TYPE_END,
-};
-
struct lightrec_mem_map_ops {
void (*sb)(struct lightrec_state *, u32 opcode,
void *host, u32 addr, u8 data);
const struct lightrec_mem_map *mirror_of;
};
-struct lightrec_cop_ops {
- u32 (*mfc)(struct lightrec_state *state, u32 op, u8 reg);
- u32 (*cfc)(struct lightrec_state *state, u32 op, u8 reg);
- void (*mtc)(struct lightrec_state *state, u32 op, u8 reg, u32 value);
- void (*ctc)(struct lightrec_state *state, u32 op, u8 reg, u32 value);
- void (*op)(struct lightrec_state *state, u32 op);
+struct lightrec_ops {
+ void (*cop2_op)(struct lightrec_state *state, u32 op);
+ void (*enable_ram)(struct lightrec_state *state, _Bool enable);
};
-struct lightrec_ops {
- struct lightrec_cop_ops cop0_ops;
- struct lightrec_cop_ops cop2_ops;
+struct lightrec_registers {
+ u32 gpr[34];
+ u32 cp0[32];
+ u32 cp2d[32];
+ u32 cp2c[32];
};
__api struct lightrec_state *lightrec_init(char *argv0,
__api void lightrec_set_exit_flags(struct lightrec_state *state, u32 flags);
__api u32 lightrec_exit_flags(struct lightrec_state *state);
-__api void lightrec_dump_registers(struct lightrec_state *state, u32 regs[34]);
-__api void lightrec_restore_registers(struct lightrec_state *state,
- u32 regs[34]);
+__api struct lightrec_registers * lightrec_get_registers(struct lightrec_state *state);
__api u32 lightrec_current_cycle_count(const struct lightrec_state *state);
__api void lightrec_reset_cycle_count(struct lightrec_state *state, u32 cycles);
__api void lightrec_set_target_cycle_count(struct lightrec_state *state,
u32 cycles);
-__api unsigned int lightrec_get_mem_usage(enum mem_type type);
-__api unsigned int lightrec_get_total_mem_usage(void);
-__api float lightrec_get_average_ipi(void);
-
#ifdef __cplusplus
};
#endif
+// SPDX-License-Identifier: LGPL-2.1-or-later
/*
- * Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
*/
-#include "config.h"
+#include "lightrec-config.h"
#include "lightrec-private.h"
#include "memmanager.h"
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
- * Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __MEMMANAGER_H__
#include "lightrec.h"
+enum mem_type {
+ MEM_FOR_CODE,
+ MEM_FOR_MIPS_CODE,
+ MEM_FOR_IR,
+ MEM_FOR_LIGHTREC,
+ MEM_TYPE_END,
+};
+
void * lightrec_malloc(struct lightrec_state *state,
enum mem_type type, unsigned int len);
void * lightrec_calloc(struct lightrec_state *state,
void lightrec_register(enum mem_type type, unsigned int len);
void lightrec_unregister(enum mem_type type, unsigned int len);
+unsigned int lightrec_get_mem_usage(enum mem_type type);
+unsigned int lightrec_get_total_mem_usage(void);
+float lightrec_get_average_ipi(void);
+
#endif /* __MEMMANAGER_H__ */
+// SPDX-License-Identifier: LGPL-2.1-or-later
/*
- * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
*/
+#include "lightrec-config.h"
#include "disassembler.h"
#include "lightrec.h"
#include "memmanager.h"
#include <errno.h>
#include <stdbool.h>
#include <stdlib.h>
+#include <string.h>
+
+#define IF_OPT(opt, ptr) ((opt) ? (ptr) : NULL)
struct optimizer_list {
void (**optimizers)(struct opcode *);
unsigned int nb_optimizers;
};
-bool opcode_reads_register(union code op, u8 reg)
+static bool is_nop(union code op);
+
+bool is_unconditional_jump(union code c)
+{
+ switch (c.i.op) {
+ case OP_SPECIAL:
+ return c.r.op == OP_SPECIAL_JR || c.r.op == OP_SPECIAL_JALR;
+ case OP_J:
+ case OP_JAL:
+ return true;
+ case OP_BEQ:
+ case OP_BLEZ:
+ return c.i.rs == c.i.rt;
+ case OP_REGIMM:
+ return (c.r.rt == OP_REGIMM_BGEZ ||
+ c.r.rt == OP_REGIMM_BGEZAL) && c.i.rs == 0;
+ default:
+ return false;
+ }
+}
+
+bool is_syscall(union code c)
+{
+ return (c.i.op == OP_SPECIAL && c.r.op == OP_SPECIAL_SYSCALL) ||
+ (c.i.op == OP_CP0 && (c.r.rs == OP_CP0_MTC0 ||
+ c.r.rs == OP_CP0_CTC0) &&
+ (c.r.rd == 12 || c.r.rd == 13));
+}
+
+static u64 opcode_read_mask(union code op)
{
switch (op.i.op) {
case OP_SPECIAL:
switch (op.r.op) {
case OP_SPECIAL_SYSCALL:
case OP_SPECIAL_BREAK:
- return false;
+ return 0;
case OP_SPECIAL_JR:
case OP_SPECIAL_JALR:
case OP_SPECIAL_MTHI:
case OP_SPECIAL_MTLO:
- return op.r.rs == reg;
+ return BIT(op.r.rs);
case OP_SPECIAL_MFHI:
- return reg == REG_HI;
+ return BIT(REG_HI);
case OP_SPECIAL_MFLO:
- return reg == REG_LO;
+ return BIT(REG_LO);
case OP_SPECIAL_SLL:
case OP_SPECIAL_SRL:
case OP_SPECIAL_SRA:
- return op.r.rt == reg;
+ return BIT(op.r.rt);
default:
- return op.r.rs == reg || op.r.rt == reg;
+ return BIT(op.r.rs) | BIT(op.r.rt);
}
case OP_CP0:
switch (op.r.rs) {
case OP_CP0_MTC0:
case OP_CP0_CTC0:
- return op.r.rt == reg;
+ return BIT(op.r.rt);
default:
- return false;
+ return 0;
}
case OP_CP2:
if (op.r.op == OP_CP2_BASIC) {
switch (op.r.rs) {
case OP_CP2_BASIC_MTC2:
case OP_CP2_BASIC_CTC2:
- return op.r.rt == reg;
+ return BIT(op.r.rt);
default:
- return false;
+ break;
}
- } else {
- return false;
}
+ return 0;
case OP_J:
case OP_JAL:
case OP_LUI:
- return false;
+ return 0;
case OP_BEQ:
case OP_BNE:
case OP_LWL:
case OP_SWL:
case OP_SW:
case OP_SWR:
- return op.i.rs == reg || op.i.rt == reg;
+ return BIT(op.i.rs) | BIT(op.i.rt);
default:
- return op.i.rs == reg;
+ return BIT(op.i.rs);
}
}
-bool opcode_writes_register(union code op, u8 reg)
+static u64 opcode_write_mask(union code op)
{
+ u64 flags;
+
switch (op.i.op) {
case OP_SPECIAL:
switch (op.r.op) {
case OP_SPECIAL_JR:
- case OP_SPECIAL_JALR:
case OP_SPECIAL_SYSCALL:
case OP_SPECIAL_BREAK:
- return false;
+ return 0;
case OP_SPECIAL_MULT:
case OP_SPECIAL_MULTU:
case OP_SPECIAL_DIV:
case OP_SPECIAL_DIVU:
- return reg == REG_LO || reg == REG_HI;
+ if (!OPT_FLAG_MULT_DIV)
+ return BIT(REG_LO) | BIT(REG_HI);
+
+ if (op.r.rd)
+ flags = BIT(op.r.rd);
+ else
+ flags = BIT(REG_LO);
+ if (op.r.imm)
+ flags |= BIT(op.r.imm);
+ else
+ flags |= BIT(REG_HI);
+ return flags;
case OP_SPECIAL_MTHI:
- return reg == REG_HI;
+ return BIT(REG_HI);
case OP_SPECIAL_MTLO:
- return reg == REG_LO;
+ return BIT(REG_LO);
default:
- return op.r.rd == reg;
+ return BIT(op.r.rd);
}
case OP_ADDI:
case OP_ADDIU:
case OP_LBU:
case OP_LHU:
case OP_LWR:
- return op.i.rt == reg;
+ return BIT(op.i.rt);
+ case OP_JAL:
+ return BIT(31);
case OP_CP0:
switch (op.r.rs) {
case OP_CP0_MFC0:
case OP_CP0_CFC0:
- return op.i.rt == reg;
+ return BIT(op.i.rt);
default:
- return false;
+ return 0;
}
case OP_CP2:
if (op.r.op == OP_CP2_BASIC) {
switch (op.r.rs) {
case OP_CP2_BASIC_MFC2:
case OP_CP2_BASIC_CFC2:
- return op.i.rt == reg;
+ return BIT(op.i.rt);
default:
- return false;
+ break;
}
- } else {
- return false;
+ }
+ return 0;
+ case OP_REGIMM:
+ switch (op.r.rt) {
+ case OP_REGIMM_BLTZAL:
+ case OP_REGIMM_BGEZAL:
+ return BIT(31);
+ default:
+ return 0;
}
case OP_META_MOV:
- return op.r.rd == reg;
+ return BIT(op.r.rd);
default:
+ return 0;
+ }
+}
+
+bool opcode_reads_register(union code op, u8 reg)
+{
+ return opcode_read_mask(op) & BIT(reg);
+}
+
+bool opcode_writes_register(union code op, u8 reg)
+{
+ return opcode_write_mask(op) & BIT(reg);
+}
+
+static int find_prev_writer(const struct opcode *list, unsigned int offset, u8 reg)
+{
+ union code c;
+ unsigned int i;
+
+ if (list[offset].flags & LIGHTREC_SYNC)
+ return -1;
+
+ for (i = offset; i > 0; i--) {
+ c = list[i - 1].c;
+
+ if (opcode_writes_register(c, reg)) {
+ if (i > 1 && has_delay_slot(list[i - 2].c))
+ break;
+
+ return i - 1;
+ }
+
+ if ((list[i - 1].flags & LIGHTREC_SYNC) ||
+ has_delay_slot(c) ||
+ opcode_reads_register(c, reg))
+ break;
+ }
+
+ return -1;
+}
+
+static int find_next_reader(const struct opcode *list, unsigned int offset, u8 reg)
+{
+ unsigned int i;
+ union code c;
+
+ if (list[offset].flags & LIGHTREC_SYNC)
+ return -1;
+
+ for (i = offset; ; i++) {
+ c = list[i].c;
+
+ if (opcode_reads_register(c, reg)) {
+ if (i > 0 && has_delay_slot(list[i - 1].c))
+ break;
+
+ return i;
+ }
+
+ if ((list[i].flags & LIGHTREC_SYNC) ||
+ has_delay_slot(c) || opcode_writes_register(c, reg))
+ break;
+ }
+
+ return -1;
+}
+
+static bool reg_is_dead(const struct opcode *list, unsigned int offset, u8 reg)
+{
+ unsigned int i;
+
+ if (list[offset].flags & LIGHTREC_SYNC)
return false;
+
+ for (i = offset + 1; ; i++) {
+ if (opcode_reads_register(list[i].c, reg))
+ return false;
+
+ if (opcode_writes_register(list[i].c, reg))
+ return true;
+
+ if (has_delay_slot(list[i].c)) {
+ if (list[i].flags & LIGHTREC_NO_DS)
+ return false;
+
+ return opcode_writes_register(list[i + 1].c, reg);
+ }
}
}
+static bool reg_is_read(const struct opcode *list,
+ unsigned int a, unsigned int b, u8 reg)
+{
+ /* Return true if reg is read in one of the opcodes of the interval
+ * [a, b[ */
+ for (; a < b; a++) {
+ if (!is_nop(list[a].c) && opcode_reads_register(list[a].c, reg))
+ return true;
+ }
+
+ return false;
+}
+
+static bool reg_is_written(const struct opcode *list,
+ unsigned int a, unsigned int b, u8 reg)
+{
+ /* Return true if reg is written in one of the opcodes of the interval
+ * [a, b[ */
+
+ for (; a < b; a++) {
+ if (!is_nop(list[a].c) && opcode_writes_register(list[a].c, reg))
+ return true;
+ }
+
+ return false;
+}
+
+static bool reg_is_read_or_written(const struct opcode *list,
+ unsigned int a, unsigned int b, u8 reg)
+{
+ return reg_is_read(list, a, b, reg) || reg_is_written(list, a, b, reg);
+}
+
+static bool opcode_is_load(union code op)
+{
+ switch (op.i.op) {
+ case OP_LB:
+ case OP_LH:
+ case OP_LWL:
+ case OP_LW:
+ case OP_LBU:
+ case OP_LHU:
+ case OP_LWR:
+ case OP_LWC2:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool opcode_is_store(union code op)
+{
+ switch (op.i.op) {
+ case OP_SB:
+ case OP_SH:
+ case OP_SW:
+ case OP_SWL:
+ case OP_SWR:
+ case OP_SWC2:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool opcode_is_io(union code op)
+{
+ return opcode_is_load(op) || opcode_is_store(op);
+}
+
/* TODO: Complete */
static bool is_nop(union code op)
{
case OP_SPECIAL_SRA:
case OP_SPECIAL_SRL:
return op.r.rd == op.r.rt && op.r.imm == 0;
+ case OP_SPECIAL_MFHI:
+ case OP_SPECIAL_MFLO:
+ return op.r.rd == 0;
default:
return false;
}
return false;
}
-static u32 lightrec_propagate_consts(union code c, u32 known, u32 *v)
+static u32 lightrec_propagate_consts(const struct opcode *op, u32 known, u32 *v)
{
+ union code c = op->c;
+
+ if (op->flags & LIGHTREC_SYNC)
+ return 0;
+
switch (c.i.op) {
case OP_SPECIAL:
switch (c.r.op) {
return known;
}
-static int lightrec_add_meta(struct block *block,
- struct opcode *op, union code code)
+static void lightrec_optimize_sll_sra(struct opcode *list, unsigned int offset)
{
- struct opcode *meta;
+ struct opcode *prev, *prev2 = NULL, *curr = &list[offset];
+ struct opcode *to_change, *to_nop;
+ int idx, idx2;
- meta = lightrec_malloc(block->state, MEM_FOR_IR, sizeof(*meta));
- if (!meta)
- return -ENOMEM;
+ if (curr->r.imm != 24 && curr->r.imm != 16)
+ return;
+
+ idx = find_prev_writer(list, offset, curr->r.rt);
+ if (idx < 0)
+ return;
+
+ prev = &list[idx];
+
+ if (prev->i.op != OP_SPECIAL || prev->r.op != OP_SPECIAL_SLL ||
+ prev->r.imm != curr->r.imm || prev->r.rd != curr->r.rt)
+ return;
- meta->c = code;
- meta->flags = 0;
+ if (prev->r.rd != prev->r.rt && curr->r.rd != curr->r.rt) {
+ /* sll rY, rX, 16
+ * ...
+ * srl rZ, rY, 16 */
- if (op) {
- meta->offset = op->offset;
- meta->next = op->next;
- op->next = meta;
+ if (!reg_is_dead(list, offset, curr->r.rt) ||
+ reg_is_read_or_written(list, idx, offset, curr->r.rd))
+ return;
+
+ /* If rY is dead after the SRL, and rZ is not used after the SLL,
+ * we can change rY to rZ */
+
+ pr_debug("Detected SLL/SRA with middle temp register\n");
+ prev->r.rd = curr->r.rd;
+ curr->r.rt = prev->r.rd;
+ }
+
+ /* We got a SLL/SRA combo. If imm #16, that's a cast to u16.
+ * If imm #24 that's a cast to u8.
+ *
+ * First of all, make sure that the target register of the SLL is not
+ * read before the SRA. */
+
+ if (prev->r.rd == prev->r.rt) {
+ /* sll rX, rX, 16
+ * ...
+ * srl rY, rX, 16 */
+ to_change = curr;
+ to_nop = prev;
+
+ /* rX is used after the SRA - we cannot convert it. */
+ if (prev->r.rd != curr->r.rd && !reg_is_dead(list, offset, prev->r.rd))
+ return;
} else {
- meta->offset = 0;
- meta->next = block->opcode_list;
- block->opcode_list = meta;
+ /* sll rY, rX, 16
+ * ...
+ * srl rY, rY, 16 */
+ to_change = prev;
+ to_nop = curr;
}
- return 0;
-}
+ idx2 = find_prev_writer(list, idx, prev->r.rt);
+ if (idx2 >= 0) {
+ /* Note that PSX games sometimes do casts after
+ * a LHU or LBU; in this case we can change the
+ * load opcode to a LH or LB, and the cast can
+ * be changed to a MOV or a simple NOP. */
+
+ prev2 = &list[idx2];
+
+ if (curr->r.rd != prev2->i.rt &&
+ !reg_is_dead(list, offset, prev2->i.rt))
+ prev2 = NULL;
+ else if (curr->r.imm == 16 && prev2->i.op == OP_LHU)
+ prev2->i.op = OP_LH;
+ else if (curr->r.imm == 24 && prev2->i.op == OP_LBU)
+ prev2->i.op = OP_LB;
+ else
+ prev2 = NULL;
+
+ if (prev2) {
+ if (curr->r.rd == prev2->i.rt) {
+ to_change->opcode = 0;
+ } else if (reg_is_dead(list, offset, prev2->i.rt) &&
+ !reg_is_read_or_written(list, idx2 + 1, offset, curr->r.rd)) {
+ /* The target register of the SRA is dead after the
+ * LBU/LHU; we can change the target register of the
+ * LBU/LHU to the one of the SRA. */
+ prev2->i.rt = curr->r.rd;
+ to_change->opcode = 0;
+ } else {
+ to_change->i.op = OP_META_MOV;
+ to_change->r.rd = curr->r.rd;
+ to_change->r.rs = prev2->i.rt;
+ }
-static int lightrec_add_sync(struct block *block, struct opcode *prev)
-{
- return lightrec_add_meta(block, prev, (union code){
- .j.op = OP_META_SYNC,
- });
+ if (to_nop->r.imm == 24)
+ pr_debug("Convert LBU+SLL+SRA to LB\n");
+ else
+ pr_debug("Convert LHU+SLL+SRA to LH\n");
+ }
+ }
+
+ if (!prev2) {
+ pr_debug("Convert SLL/SRA #%u to EXT%c\n",
+ prev->r.imm,
+ prev->r.imm == 24 ? 'C' : 'S');
+
+ if (to_change == prev) {
+ to_change->i.rs = prev->r.rt;
+ to_change->i.rt = curr->r.rd;
+ } else {
+ to_change->i.rt = curr->r.rd;
+ to_change->i.rs = prev->r.rt;
+ }
+
+ if (to_nop->r.imm == 24)
+ to_change->i.op = OP_META_EXTC;
+ else
+ to_change->i.op = OP_META_EXTS;
+ }
+
+ to_nop->opcode = 0;
}
-static int lightrec_transform_ops(struct block *block)
+static int lightrec_transform_ops(struct lightrec_state *state, struct block *block)
{
struct opcode *list = block->opcode_list;
+ struct opcode *op;
+ u32 known = BIT(0);
+ u32 values[32] = { 0 };
+ unsigned int i;
+ int reader;
- for (; list; list = list->next) {
+ for (i = 0; i < block->nb_ops; i++) {
+ op = &list[i];
/* Transform all opcodes detected as useless to real NOPs
* (0x0: SLL r0, r0, #0) */
- if (list->opcode != 0 && is_nop(list->c)) {
+ if (op->opcode != 0 && is_nop(op->c)) {
pr_debug("Converting useless opcode 0x%08x to NOP\n",
- list->opcode);
- list->opcode = 0x0;
+ op->opcode);
+ op->opcode = 0x0;
}
- if (!list->opcode)
+ if (!op->opcode)
continue;
- switch (list->i.op) {
- /* Transform BEQ / BNE to BEQZ / BNEZ meta-opcodes if one of the
- * two registers is zero. */
+ /* Register $zero is always, well, zero */
+ known |= BIT(0);
+ values[0] = 0;
+
+ switch (op->i.op) {
case OP_BEQ:
- if ((list->i.rs == 0) ^ (list->i.rt == 0)) {
- list->i.op = OP_META_BEQZ;
- if (list->i.rs == 0) {
- list->i.rs = list->i.rt;
- list->i.rt = 0;
- }
- } else if (list->i.rs == list->i.rt) {
- list->i.rs = 0;
- list->i.rt = 0;
+ if (op->i.rs == op->i.rt) {
+ op->i.rs = 0;
+ op->i.rt = 0;
+ } else if (op->i.rs == 0) {
+ op->i.rs = op->i.rt;
+ op->i.rt = 0;
}
break;
+
case OP_BNE:
- if (list->i.rs == 0) {
- list->i.op = OP_META_BNEZ;
- list->i.rs = list->i.rt;
- list->i.rt = 0;
- } else if (list->i.rt == 0) {
- list->i.op = OP_META_BNEZ;
+ if (op->i.rs == 0) {
+ op->i.rs = op->i.rt;
+ op->i.rt = 0;
+ }
+ break;
+
+ case OP_LUI:
+ if (!(op->flags & LIGHTREC_SYNC) &&
+ (known & BIT(op->i.rt)) &&
+ values[op->i.rt] == op->i.imm << 16) {
+ pr_debug("Converting duplicated LUI to NOP\n");
+ op->opcode = 0x0;
+ }
+
+ if (op->i.imm != 0 || op->i.rt == 0)
+ break;
+
+ reader = find_next_reader(list, i + 1, op->i.rt);
+ if (reader > 0 &&
+ (opcode_writes_register(list[reader].c, op->i.rt) ||
+ reg_is_dead(list, reader, op->i.rt))) {
+
+ pr_debug("Removing useless LUI 0x0\n");
+
+ if (list[reader].i.rs == op->i.rt)
+ list[reader].i.rs = 0;
+ if (list[reader].i.op == OP_SPECIAL &&
+ list[reader].i.rt == op->i.rt)
+ list[reader].i.rt = 0;
+ op->opcode = 0x0;
}
break;
case OP_ORI:
case OP_ADDI:
case OP_ADDIU:
- if (list->i.imm == 0) {
+ if (op->i.imm == 0) {
pr_debug("Convert ORI/ADDI/ADDIU #0 to MOV\n");
- list->i.op = OP_META_MOV;
- list->r.rd = list->i.rt;
+ op->i.op = OP_META_MOV;
+ op->r.rd = op->i.rt;
}
break;
case OP_SPECIAL:
- switch (list->r.op) {
- case OP_SPECIAL_SLL:
+ switch (op->r.op) {
case OP_SPECIAL_SRA:
+ if (op->r.imm == 0) {
+ pr_debug("Convert SRA #0 to MOV\n");
+ op->i.op = OP_META_MOV;
+ op->r.rs = op->r.rt;
+ break;
+ }
+
+ lightrec_optimize_sll_sra(block->opcode_list, i);
+ break;
+ case OP_SPECIAL_SLL:
case OP_SPECIAL_SRL:
- if (list->r.imm == 0) {
- pr_debug("Convert SLL/SRL/SRA #0 to MOV\n");
- list->i.op = OP_META_MOV;
- list->r.rs = list->r.rt;
+ if (op->r.imm == 0) {
+ pr_debug("Convert SLL/SRL #0 to MOV\n");
+ op->i.op = OP_META_MOV;
+ op->r.rs = op->r.rt;
}
break;
case OP_SPECIAL_OR:
case OP_SPECIAL_ADD:
case OP_SPECIAL_ADDU:
- if (list->r.rs == 0) {
+ if (op->r.rs == 0) {
pr_debug("Convert OR/ADD $zero to MOV\n");
- list->i.op = OP_META_MOV;
- list->r.rs = list->r.rt;
+ op->i.op = OP_META_MOV;
+ op->r.rs = op->r.rt;
}
case OP_SPECIAL_SUB: /* fall-through */
case OP_SPECIAL_SUBU:
- if (list->r.rt == 0) {
+ if (op->r.rt == 0) {
pr_debug("Convert OR/ADD/SUB $zero to MOV\n");
- list->i.op = OP_META_MOV;
+ op->i.op = OP_META_MOV;
}
default: /* fall-through */
break;
default: /* fall-through */
break;
}
+
+ known = lightrec_propagate_consts(op, known, values);
}
return 0;
}
-static int lightrec_switch_delay_slots(struct block *block)
+static int lightrec_switch_delay_slots(struct lightrec_state *state, struct block *block)
{
- struct opcode *list, *prev;
+ struct opcode *list, *next = &block->opcode_list[0];
+ unsigned int i;
+ union code op, next_op;
u8 flags;
- for (list = block->opcode_list, prev = NULL; list->next;
- prev = list, list = list->next) {
- union code op = list->c;
- union code next_op = list->next->c;
+ for (i = 0; i < block->nb_ops - 1; i++) {
+ list = next;
+ next = &block->opcode_list[i + 1];
+ next_op = next->c;
+ op = list->c;
if (!has_delay_slot(op) ||
list->flags & (LIGHTREC_NO_DS | LIGHTREC_EMULATE_BRANCH) ||
- op.opcode == 0)
+ op.opcode == 0 || next_op.opcode == 0)
+ continue;
+
+ if (i && has_delay_slot(block->opcode_list[i - 1].c) &&
+ !(block->opcode_list[i - 1].flags & LIGHTREC_NO_DS))
continue;
- if (prev && has_delay_slot(prev->c))
+ if ((list->flags & LIGHTREC_SYNC) ||
+ (next->flags & LIGHTREC_SYNC))
continue;
switch (list->i.op) {
continue;
case OP_BLEZ: /* fall-through */
case OP_BGTZ:
- case OP_META_BEQZ:
- case OP_META_BNEZ:
if (op.i.rs && opcode_writes_register(next_op, op.i.rs))
continue;
break;
}
pr_debug("Swap branch and delay slot opcodes "
- "at offsets 0x%x / 0x%x\n", list->offset << 2,
- list->next->offset << 2);
+ "at offsets 0x%x / 0x%x\n",
+ i << 2, (i + 1) << 2);
- flags = list->next->flags;
+ flags = next->flags;
list->c = next_op;
- list->next->c = op;
- list->next->flags = list->flags | LIGHTREC_NO_DS;
+ next->c = op;
+ next->flags = list->flags | LIGHTREC_NO_DS;
list->flags = flags | LIGHTREC_NO_DS;
- list->offset++;
- list->next->offset--;
}
return 0;
}
-static int lightrec_detect_impossible_branches(struct block *block)
+static int shrink_opcode_list(struct lightrec_state *state, struct block *block, u16 new_size)
+{
+ struct opcode *list;
+
+ if (new_size >= block->nb_ops) {
+ pr_err("Invalid shrink size (%u vs %u)\n",
+ new_size, block->nb_ops);
+ return -EINVAL;
+ }
+
+
+ list = lightrec_malloc(state, MEM_FOR_IR,
+ sizeof(*list) * new_size);
+ if (!list) {
+ pr_err("Unable to allocate memory\n");
+ return -ENOMEM;
+ }
+
+ memcpy(list, block->opcode_list, sizeof(*list) * new_size);
+
+ lightrec_free_opcode_list(state, block);
+ block->opcode_list = list;
+ block->nb_ops = new_size;
+
+ pr_debug("Shrunk opcode list of block PC 0x%08x to %u opcodes\n",
+ block->pc, new_size);
+
+ return 0;
+}
+
+static int lightrec_detect_impossible_branches(struct lightrec_state *state,
+ struct block *block)
{
- struct opcode *op, *next;
+ struct opcode *op, *next = &block->opcode_list[0];
+ unsigned int i;
+ int ret = 0;
+
+ for (i = 0; i < block->nb_ops - 1; i++) {
+ op = next;
+ next = &block->opcode_list[i + 1];
- for (op = block->opcode_list, next = op->next; next;
- op = next, next = op->next) {
if (!has_delay_slot(op->c) ||
(!load_in_delay_slot(next->c) &&
!has_delay_slot(next->c) &&
continue;
}
+ op->flags |= LIGHTREC_EMULATE_BRANCH;
+
if (op == block->opcode_list) {
+ pr_debug("First opcode of block PC 0x%08x is an impossible branch\n",
+ block->pc);
+
/* If the first opcode is an 'impossible' branch, we
* only keep the first two opcodes of the block (the
* branch itself + its delay slot) */
- lightrec_free_opcode_list(block->state, next->next);
- next->next = NULL;
- block->nb_ops = 2;
+ if (block->nb_ops > 2)
+ ret = shrink_opcode_list(state, block, 2);
+ break;
}
-
- op->flags |= LIGHTREC_EMULATE_BRANCH;
}
- return 0;
+ return ret;
}
-static int lightrec_local_branches(struct block *block)
+static int lightrec_local_branches(struct lightrec_state *state, struct block *block)
{
- struct opcode *list, *target, *prev;
+ struct opcode *list;
+ unsigned int i;
s32 offset;
- int ret;
- for (list = block->opcode_list; list; list = list->next) {
- if (list->flags & LIGHTREC_EMULATE_BRANCH)
+ for (i = 0; i < block->nb_ops; i++) {
+ list = &block->opcode_list[i];
+
+ if (should_emulate(list))
continue;
switch (list->i.op) {
case OP_BLEZ:
case OP_BGTZ:
case OP_REGIMM:
- case OP_META_BEQZ:
- case OP_META_BNEZ:
- offset = list->offset + 1 + (s16)list->i.imm;
+ offset = i + 1 + (s16)list->i.imm;
if (offset >= 0 && offset < block->nb_ops)
break;
default: /* fall-through */
pr_debug("Found local branch to offset 0x%x\n", offset << 2);
- for (target = block->opcode_list, prev = NULL;
- target; prev = target, target = target->next) {
- if (target->offset != offset ||
- target->j.op == OP_META_SYNC)
- continue;
-
- if (target->flags & LIGHTREC_EMULATE_BRANCH) {
- pr_debug("Branch target must be emulated"
- " - skip\n");
- break;
- }
-
- if (prev && has_delay_slot(prev->c)) {
- pr_debug("Branch target is a delay slot"
- " - skip\n");
- break;
- }
+ if (should_emulate(&block->opcode_list[offset])) {
+ pr_debug("Branch target must be emulated - skip\n");
+ continue;
+ }
- if (prev && prev->j.op != OP_META_SYNC) {
- pr_debug("Adding sync before offset "
- "0x%x\n", offset << 2);
- ret = lightrec_add_sync(block, prev);
- if (ret)
- return ret;
+ if (offset && has_delay_slot(block->opcode_list[offset - 1].c)) {
+ pr_debug("Branch target is a delay slot - skip\n");
+ continue;
+ }
- prev->next->offset = target->offset;
- }
+ pr_debug("Adding sync at offset 0x%x\n", offset << 2);
- list->flags |= LIGHTREC_LOCAL_BRANCH;
- break;
- }
+ block->opcode_list[offset].flags |= LIGHTREC_SYNC;
+ list->flags |= LIGHTREC_LOCAL_BRANCH;
}
return 0;
case OP_BLEZ:
case OP_BGTZ:
case OP_REGIMM:
- case OP_META_BEQZ:
- case OP_META_BNEZ:
return true;
default:
return false;
}
}
-static int lightrec_add_unload(struct block *block, struct opcode *op, u8 reg)
+bool should_emulate(const struct opcode *list)
{
- return lightrec_add_meta(block, op, (union code){
- .i.op = OP_META_REG_UNLOAD,
- .i.rs = reg,
- });
+ return has_delay_slot(list->c) &&
+ (list->flags & LIGHTREC_EMULATE_BRANCH);
}
-static int lightrec_early_unload(struct block *block)
+static void lightrec_add_unload(struct opcode *op, u8 reg)
{
- struct opcode *list = block->opcode_list;
- u8 i;
+ if (op->i.op == OP_SPECIAL && reg == op->r.rd)
+ op->flags |= LIGHTREC_UNLOAD_RD;
- for (i = 1; i < 34; i++) {
- struct opcode *op, *last_r = NULL, *last_w = NULL;
- unsigned int last_r_id = 0, last_w_id = 0, id = 0;
- int ret;
+ if (op->i.rs == reg)
+ op->flags |= LIGHTREC_UNLOAD_RS;
+ if (op->i.rt == reg)
+ op->flags |= LIGHTREC_UNLOAD_RT;
+}
- for (op = list; op->next; op = op->next, id++) {
- if (opcode_reads_register(op->c, i)) {
- last_r = op;
- last_r_id = id;
- }
+static int lightrec_early_unload(struct lightrec_state *state, struct block *block)
+{
+ unsigned int i, offset;
+ struct opcode *op;
+ u8 reg;
- if (opcode_writes_register(op->c, i)) {
- last_w = op;
- last_w_id = id;
- }
+ for (reg = 1; reg < 34; reg++) {
+ int last_r_id = -1, last_w_id = -1;
+
+ for (i = 0; i < block->nb_ops; i++) {
+ union code c = block->opcode_list[i].c;
+
+ if (opcode_reads_register(c, reg))
+ last_r_id = i;
+ if (opcode_writes_register(c, reg))
+ last_w_id = i;
}
- if (last_w_id > last_r_id) {
- if (has_delay_slot(last_w->c) &&
- !(last_w->flags & LIGHTREC_NO_DS))
- last_w = last_w->next;
+ if (last_w_id > last_r_id)
+ offset = (unsigned int)last_w_id;
+ else if (last_r_id >= 0)
+ offset = (unsigned int)last_r_id;
+ else
+ continue;
- if (last_w->next) {
- ret = lightrec_add_unload(block, last_w, i);
- if (ret)
- return ret;
- }
- } else if (last_r) {
- if (has_delay_slot(last_r->c) &&
- !(last_r->flags & LIGHTREC_NO_DS))
- last_r = last_r->next;
+ op = &block->opcode_list[offset];
- if (last_r->next) {
- ret = lightrec_add_unload(block, last_r, i);
- if (ret)
- return ret;
- }
- }
+ if (has_delay_slot(op->c) && (op->flags & LIGHTREC_NO_DS))
+ offset++;
+
+ if (offset == block->nb_ops)
+ continue;
+
+ lightrec_add_unload(&block->opcode_list[offset], reg);
}
return 0;
}
-static int lightrec_flag_stores(struct block *block)
+static int lightrec_flag_io(struct lightrec_state *state, struct block *block)
{
+ const struct lightrec_mem_map *map;
struct opcode *list;
u32 known = BIT(0);
u32 values[32] = { 0 };
+ unsigned int i;
+ u32 val;
+
+ for (i = 0; i < block->nb_ops; i++) {
+ list = &block->opcode_list[i];
- for (list = block->opcode_list; list; list = list->next) {
/* Register $zero is always, well, zero */
known |= BIT(0);
values[0] = 0;
case OP_SB:
case OP_SH:
case OP_SW:
- /* Mark all store operations that target $sp or $gp
- * as not requiring code invalidation. This is based
- * on the heuristic that stores using one of these
- * registers as address will never hit a code page. */
- if (list->i.rs >= 28 && list->i.rs <= 29 &&
- !block->state->maps[PSX_MAP_KERNEL_USER_RAM].ops) {
- pr_debug("Flaging opcode 0x%08x as not requiring invalidation\n",
- list->opcode);
- list->flags |= LIGHTREC_NO_INVALIDATE;
- }
-
- /* Detect writes whose destination address is inside the
- * current block, using constant propagation. When these
- * occur, we mark the blocks as not compilable. */
- if ((known & BIT(list->i.rs)) &&
- kunseg(values[list->i.rs]) >= kunseg(block->pc) &&
- kunseg(values[list->i.rs]) < (kunseg(block->pc) +
- block->nb_ops * 4)) {
- pr_debug("Self-modifying block detected\n");
- block->flags |= BLOCK_NEVER_COMPILE;
- list->flags |= LIGHTREC_SMC;
+ if (OPT_FLAG_STORES) {
+ /* Mark all store operations that target $sp or $gp
+ * as not requiring code invalidation. This is based
+ * on the heuristic that stores using one of these
+ * registers as address will never hit a code page. */
+ if (list->i.rs >= 28 && list->i.rs <= 29 &&
+ !state->maps[PSX_MAP_KERNEL_USER_RAM].ops) {
+ pr_debug("Flaging opcode 0x%08x as not "
+ "requiring invalidation\n",
+ list->opcode);
+ list->flags |= LIGHTREC_NO_INVALIDATE;
+ }
+
+ /* Detect writes whose destination address is inside the
+ * current block, using constant propagation. When these
+ * occur, we mark the blocks as not compilable. */
+ if ((known & BIT(list->i.rs)) &&
+ kunseg(values[list->i.rs]) >= kunseg(block->pc) &&
+ kunseg(values[list->i.rs]) < (kunseg(block->pc) +
+ block->nb_ops * 4)) {
+ pr_debug("Self-modifying block detected\n");
+ block->flags |= BLOCK_NEVER_COMPILE;
+ list->flags |= LIGHTREC_SMC;
+ }
+ }
+ case OP_SWL: /* fall-through */
+ case OP_SWR:
+ case OP_SWC2:
+ case OP_LB:
+ case OP_LBU:
+ case OP_LH:
+ case OP_LHU:
+ case OP_LW:
+ case OP_LWL:
+ case OP_LWR:
+ case OP_LWC2:
+ if (OPT_FLAG_IO && (known & BIT(list->i.rs))) {
+ val = kunseg(values[list->i.rs] + (s16) list->i.imm);
+ map = lightrec_get_map(state, NULL, val);
+
+ if (!map || map->ops ||
+ map == &state->maps[PSX_MAP_PARALLEL_PORT]) {
+ pr_debug("Flagging opcode %u as accessing I/O registers\n",
+ i);
+ list->flags |= LIGHTREC_HW_IO;
+ } else {
+ pr_debug("Flaging opcode %u as direct memory access\n", i);
+ list->flags |= LIGHTREC_DIRECT_IO;
+ }
}
default: /* fall-through */
break;
}
- known = lightrec_propagate_consts(list->c, known, values);
+ known = lightrec_propagate_consts(list, known, values);
}
return 0;
}
-static bool is_mult32(const struct block *block, const struct opcode *op)
+static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset,
+ const struct opcode *last,
+ u32 mask, bool sync, bool mflo, bool another)
{
- const struct opcode *next, *last = NULL;
- u32 offset;
+ const struct opcode *op, *next = &block->opcode_list[offset];
+ u32 old_mask;
+ u8 reg2, reg = mflo ? REG_LO : REG_HI;
+ u16 branch_offset;
+ unsigned int i;
+
+ for (i = offset; i < block->nb_ops; i++) {
+ op = next;
+ next = &block->opcode_list[i + 1];
+ old_mask = mask;
+
+ /* If any other opcode writes or reads to the register
+ * we'd use, then we cannot use it anymore. */
+ mask |= opcode_read_mask(op->c);
+ mask |= opcode_write_mask(op->c);
+
+ if (op->flags & LIGHTREC_SYNC)
+ sync = true;
- for (op = op->next; op != last; op = op->next) {
switch (op->i.op) {
case OP_BEQ:
case OP_BNE:
case OP_BLEZ:
case OP_BGTZ:
case OP_REGIMM:
- case OP_META_BEQZ:
- case OP_META_BNEZ:
/* TODO: handle backwards branches too */
- if ((op->flags & LIGHTREC_LOCAL_BRANCH) &&
+ if (!last &&
+ (op->flags & LIGHTREC_LOCAL_BRANCH) &&
(s16)op->c.i.imm >= 0) {
- offset = op->offset + 1 + (s16)op->c.i.imm;
-
- for (next = op; next->offset != offset;
- next = next->next);
-
- if (!is_mult32(block, next))
- return false;
-
- last = next;
- continue;
- } else {
- return false;
+ branch_offset = i + 1 + (s16)op->c.i.imm
+ - !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS));
+
+ reg = get_mfhi_mflo_reg(block, branch_offset, NULL,
+ mask, sync, mflo, false);
+ reg2 = get_mfhi_mflo_reg(block, offset + 1, next,
+ mask, sync, mflo, false);
+ if (reg > 0 && reg == reg2)
+ return reg;
+ if (!reg && !reg2)
+ return 0;
}
+
+ return mflo ? REG_LO : REG_HI;
case OP_SPECIAL:
switch (op->r.op) {
case OP_SPECIAL_MULT:
case OP_SPECIAL_MULTU:
case OP_SPECIAL_DIV:
case OP_SPECIAL_DIVU:
+ return 0;
case OP_SPECIAL_MTHI:
- return true;
+ if (!mflo)
+ return 0;
+ continue;
+ case OP_SPECIAL_MTLO:
+ if (mflo)
+ return 0;
+ continue;
case OP_SPECIAL_JR:
- return op->r.rs == 31 &&
- ((op->flags & LIGHTREC_NO_DS) ||
- !(op->next->i.op == OP_SPECIAL &&
- op->next->r.op == OP_SPECIAL_MFHI));
+ if (op->r.rs != 31)
+ return reg;
+
+ if (!sync &&
+ !(op->flags & LIGHTREC_NO_DS) &&
+ (next->i.op == OP_SPECIAL) &&
+ ((!mflo && next->r.op == OP_SPECIAL_MFHI) ||
+ (mflo && next->r.op == OP_SPECIAL_MFLO)))
+ return next->r.rd;
+
+ return 0;
case OP_SPECIAL_JALR:
+ return reg;
case OP_SPECIAL_MFHI:
- return false;
- default:
+ if (!mflo) {
+ if (another)
+ return op->r.rd;
+ /* Must use REG_HI if there is another MFHI target*/
+ reg2 = get_mfhi_mflo_reg(block, i + 1, next,
+ 0, sync, mflo, true);
+ if (reg2 > 0 && reg2 != REG_HI)
+ return REG_HI;
+
+ if (!sync && !(old_mask & BIT(op->r.rd)))
+ return op->r.rd;
+ else
+ return REG_HI;
+ }
+ continue;
+ case OP_SPECIAL_MFLO:
+ if (mflo) {
+ if (another)
+ return op->r.rd;
+ /* Must use REG_LO if there is another MFLO target*/
+ reg2 = get_mfhi_mflo_reg(block, i + 1, next,
+ 0, sync, mflo, true);
+ if (reg2 > 0 && reg2 != REG_LO)
+ return REG_LO;
+
+ if (!sync && !(old_mask & BIT(op->r.rd)))
+ return op->r.rd;
+ else
+ return REG_LO;
+ }
continue;
+ default:
+ break;
}
+
+ /* fall-through */
default:
continue;
}
}
- return last != NULL;
+ return reg;
+}
+
+static void lightrec_replace_lo_hi(struct block *block, u16 offset,
+ u16 last, bool lo)
+{
+ unsigned int i;
+ u32 branch_offset;
+
+ /* This function will remove the following MFLO/MFHI. It must be called
+ * only if get_mfhi_mflo_reg() returned a non-zero value. */
+
+ for (i = offset; i < last; i++) {
+ struct opcode *op = &block->opcode_list[i];
+
+ switch (op->i.op) {
+ case OP_BEQ:
+ case OP_BNE:
+ case OP_BLEZ:
+ case OP_BGTZ:
+ case OP_REGIMM:
+ /* TODO: handle backwards branches too */
+ if ((op->flags & LIGHTREC_LOCAL_BRANCH) &&
+ (s16)op->c.i.imm >= 0) {
+ branch_offset = i + 1 + (s16)op->c.i.imm
+ - !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS));
+
+ lightrec_replace_lo_hi(block, branch_offset, last, lo);
+ lightrec_replace_lo_hi(block, i + 1, branch_offset, lo);
+ }
+ break;
+
+ case OP_SPECIAL:
+ if (lo && op->r.op == OP_SPECIAL_MFLO) {
+ pr_debug("Removing MFLO opcode at offset 0x%x\n",
+ i << 2);
+ op->opcode = 0;
+ return;
+ } else if (!lo && op->r.op == OP_SPECIAL_MFHI) {
+ pr_debug("Removing MFHI opcode at offset 0x%x\n",
+ i << 2);
+ op->opcode = 0;
+ return;
+ }
+
+ /* fall-through */
+ default:
+ break;
+ }
+ }
}
-static int lightrec_flag_mults(struct block *block)
+static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *block)
{
- struct opcode *list, *prev;
+ struct opcode *list;
+ u8 reg_hi, reg_lo;
+ unsigned int i;
+
+ for (i = 0; i < block->nb_ops - 1; i++) {
+ list = &block->opcode_list[i];
- for (list = block->opcode_list, prev = NULL; list;
- prev = list, list = list->next) {
if (list->i.op != OP_SPECIAL)
continue;
switch (list->r.op) {
case OP_SPECIAL_MULT:
case OP_SPECIAL_MULTU:
+ case OP_SPECIAL_DIV:
+ case OP_SPECIAL_DIVU:
break;
default:
continue;
}
- /* Don't support MULT(U) opcodes in delay slots */
- if (prev && has_delay_slot(prev->c))
+ /* Don't support opcodes in delay slots */
+ if ((i && has_delay_slot(block->opcode_list[i - 1].c)) ||
+ (list->flags & LIGHTREC_NO_DS))
continue;
- if (is_mult32(block, list)) {
- pr_debug("Mark MULT(U) opcode at offset 0x%x as"
- " 32-bit\n", list->offset << 2);
- list->flags |= LIGHTREC_MULT32;
+ reg_lo = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, true, false);
+ if (reg_lo == 0) {
+ pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as"
+ " not writing LO\n", i << 2);
+ list->flags |= LIGHTREC_NO_LO;
+ }
+
+ reg_hi = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, false, false);
+ if (reg_hi == 0) {
+ pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as"
+ " not writing HI\n", i << 2);
+ list->flags |= LIGHTREC_NO_HI;
+ }
+
+ if (!reg_lo && !reg_hi) {
+ pr_debug("Both LO/HI unused in this block, they will "
+ "probably be used in parent block - removing "
+ "flags.\n");
+ list->flags &= ~(LIGHTREC_NO_LO | LIGHTREC_NO_HI);
+ }
+
+ if (reg_lo > 0 && reg_lo != REG_LO) {
+ pr_debug("Found register %s to hold LO (rs = %u, rt = %u)\n",
+ lightrec_reg_name(reg_lo), list->r.rs, list->r.rt);
+
+ lightrec_replace_lo_hi(block, i + 1, block->nb_ops, true);
+ list->r.rd = reg_lo;
+ } else {
+ list->r.rd = 0;
+ }
+
+ if (reg_hi > 0 && reg_hi != REG_HI) {
+ pr_debug("Found register %s to hold HI (rs = %u, rt = %u)\n",
+ lightrec_reg_name(reg_hi), list->r.rs, list->r.rt);
+
+ lightrec_replace_lo_hi(block, i + 1, block->nb_ops, false);
+ list->r.imm = reg_hi;
+ } else {
+ list->r.imm = 0;
+ }
+ }
+
+ return 0;
+}
+
+static bool remove_div_sequence(struct block *block, unsigned int offset)
+{
+ struct opcode *op;
+ unsigned int i, found = 0;
+
+ /*
+ * Scan for the zero-checking sequence that GCC automatically introduced
+ * after most DIV/DIVU opcodes. This sequence checks the value of the
+ * divisor, and if zero, executes a BREAK opcode, causing the BIOS
+ * handler to crash the PS1.
+ *
+ * For DIV opcodes, this sequence additionally checks that the signed
+ * operation does not overflow.
+ *
+ * With the assumption that the games never crashed the PS1, we can
+ * therefore assume that the games never divided by zero or overflowed,
+ * and these sequences can be removed.
+ */
+
+ for (i = offset; i < block->nb_ops; i++) {
+ op = &block->opcode_list[i];
+
+ if (!found) {
+ if (op->i.op == OP_SPECIAL &&
+ (op->r.op == OP_SPECIAL_DIV || op->r.op == OP_SPECIAL_DIVU))
+ break;
+
+ if ((op->opcode & 0xfc1fffff) == 0x14000002) {
+ /* BNE ???, zero, +8 */
+ found++;
+ } else {
+ offset++;
+ }
+ } else if (found == 1 && !op->opcode) {
+ /* NOP */
+ found++;
+ } else if (found == 2 && op->opcode == 0x0007000d) {
+ /* BREAK 0x1c00 */
+ found++;
+ } else if (found == 3 && op->opcode == 0x2401ffff) {
+ /* LI at, -1 */
+ found++;
+ } else if (found == 4 && (op->opcode & 0xfc1fffff) == 0x14010004) {
+ /* BNE ???, at, +16 */
+ found++;
+ } else if (found == 5 && op->opcode == 0x3c018000) {
+ /* LUI at, 0x8000 */
+ found++;
+ } else if (found == 6 && (op->opcode & 0x141fffff) == 0x14010002) {
+ /* BNE ???, at, +16 */
+ found++;
+ } else if (found == 7 && !op->opcode) {
+ /* NOP */
+ found++;
+ } else if (found == 8 && op->opcode == 0x0006000d) {
+ /* BREAK 0x1800 */
+ found++;
+ break;
+ } else {
+ break;
+ }
+ }
+
+ if (found >= 3) {
+ if (found != 9)
+ found = 3;
+
+ pr_debug("Removing DIV%s sequence at offset 0x%x\n",
+ found == 9 ? "" : "U", offset << 2);
+
+ for (i = 0; i < found; i++)
+ block->opcode_list[offset + i].opcode = 0;
+
+ return true;
+ }
+
+ return false;
+}
+
+static int lightrec_remove_div_by_zero_check_sequence(struct lightrec_state *state,
+ struct block *block)
+{
+ struct opcode *op;
+ unsigned int i;
+
+ for (i = 0; i < block->nb_ops; i++) {
+ op = &block->opcode_list[i];
+
+ if (op->i.op == OP_SPECIAL &&
+ (op->r.op == OP_SPECIAL_DIVU || op->r.op == OP_SPECIAL_DIV) &&
+ remove_div_sequence(block, i + 1))
+ op->flags |= LIGHTREC_NO_DIV_CHECK;
+ }
+
+ return 0;
+}
+
+static const u32 memset_code[] = {
+ 0x10a00006, // beqz a1, 2f
+ 0x24a2ffff, // addiu v0,a1,-1
+ 0x2403ffff, // li v1,-1
+ 0xac800000, // 1: sw zero,0(a0)
+ 0x2442ffff, // addiu v0,v0,-1
+ 0x1443fffd, // bne v0,v1, 1b
+ 0x24840004, // addiu a0,a0,4
+ 0x03e00008, // 2: jr ra
+ 0x00000000, // nop
+};
+
+static int lightrec_replace_memset(struct lightrec_state *state, struct block *block)
+{
+ unsigned int i;
+ union code c;
+
+ for (i = 0; i < block->nb_ops; i++) {
+ c = block->opcode_list[i].c;
+
+ if (c.opcode != memset_code[i])
+ return 0;
+
+ if (i == ARRAY_SIZE(memset_code) - 1) {
+ /* success! */
+ pr_debug("Block at PC 0x%x is a memset\n", block->pc);
+ block->flags |= BLOCK_IS_MEMSET | BLOCK_NEVER_COMPILE;
+
+ /* Return non-zero to skip other optimizers. */
+ return 1;
}
}
return 0;
}
-static int (*lightrec_optimizers[])(struct block *) = {
- &lightrec_detect_impossible_branches,
- &lightrec_transform_ops,
- &lightrec_local_branches,
- &lightrec_switch_delay_slots,
- &lightrec_flag_stores,
- &lightrec_flag_mults,
- &lightrec_early_unload,
+static int (*lightrec_optimizers[])(struct lightrec_state *state, struct block *) = {
+ IF_OPT(OPT_REMOVE_DIV_BY_ZERO_SEQ, &lightrec_remove_div_by_zero_check_sequence),
+ IF_OPT(OPT_REPLACE_MEMSET, &lightrec_replace_memset),
+ IF_OPT(OPT_DETECT_IMPOSSIBLE_BRANCHES, &lightrec_detect_impossible_branches),
+ IF_OPT(OPT_LOCAL_BRANCHES, &lightrec_local_branches),
+ IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_ops),
+ IF_OPT(OPT_SWITCH_DELAY_SLOTS, &lightrec_switch_delay_slots),
+ IF_OPT(OPT_FLAG_IO || OPT_FLAG_STORES, &lightrec_flag_io),
+ IF_OPT(OPT_FLAG_MULT_DIV, &lightrec_flag_mults_divs),
+ IF_OPT(OPT_EARLY_UNLOAD, &lightrec_early_unload),
};
-int lightrec_optimize(struct block *block)
+int lightrec_optimize(struct lightrec_state *state, struct block *block)
{
unsigned int i;
+ int ret;
for (i = 0; i < ARRAY_SIZE(lightrec_optimizers); i++) {
- int ret = lightrec_optimizers[i](block);
-
- if (ret)
- return ret;
+ if (lightrec_optimizers[i]) {
+ ret = (*lightrec_optimizers[i])(state, block);
+ if (ret)
+ return ret;
+ }
}
return 0;
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
- * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __OPTIMIZER_H__
#include "disassembler.h"
struct block;
+struct opcode;
_Bool opcode_reads_register(union code op, u8 reg);
_Bool opcode_writes_register(union code op, u8 reg);
_Bool has_delay_slot(union code op);
_Bool load_in_delay_slot(union code op);
+_Bool opcode_is_io(union code op);
+_Bool is_unconditional_jump(union code c);
+_Bool is_syscall(union code c);
-int lightrec_optimize(struct block *block);
+_Bool should_emulate(const struct opcode *op);
+
+int lightrec_optimize(struct lightrec_state *state, struct block *block);
#endif /* __OPTIMIZER_H__ */
+// SPDX-License-Identifier: LGPL-2.1-or-later
/*
- * Copyright (C) 2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2020-2021 Paul Cercueil <paul@crapouillou.net>
*/
#include "blockcache.h"
#include <errno.h>
#include <pthread.h>
+#include <stdatomic.h>
#include <stdbool.h>
struct reaper_elm {
struct lightrec_state *state;
pthread_mutex_t mutex;
struct slist_elm reap_list;
+
+ atomic_uint sem;
};
struct reaper *lightrec_reaper_init(struct lightrec_state *state)
}
reaper->state = state;
+ reaper->sem = 0;
slist_init(&reaper->reap_list);
ret = pthread_mutex_init(&reaper->mutex, NULL);
return ret;
}
+static bool lightrec_reaper_can_reap(struct reaper *reaper)
+{
+ return !atomic_load_explicit(&reaper->sem, memory_order_relaxed);
+}
+
void lightrec_reaper_reap(struct reaper *reaper)
{
struct reaper_elm *reaper_elm;
pthread_mutex_lock(&reaper->mutex);
- while (!!(elm = slist_first(&reaper->reap_list))) {
+ while (lightrec_reaper_can_reap(reaper) &&
+ !!(elm = slist_first(&reaper->reap_list))) {
slist_remove(&reaper->reap_list, elm);
pthread_mutex_unlock(&reaper->mutex);
reaper_elm = container_of(elm, struct reaper_elm, slist);
- (*reaper_elm->func)(reaper_elm->data);
+ (*reaper_elm->func)(reaper->state, reaper_elm->data);
lightrec_free(reaper->state, MEM_FOR_LIGHTREC,
sizeof(*reaper_elm), reaper_elm);
pthread_mutex_unlock(&reaper->mutex);
}
+
+void lightrec_reaper_pause(struct reaper *reaper)
+{
+ atomic_fetch_add_explicit(&reaper->sem, 1, memory_order_relaxed);
+}
+
+void lightrec_reaper_continue(struct reaper *reaper)
+{
+ atomic_fetch_sub_explicit(&reaper->sem, 1, memory_order_relaxed);
+}
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
- * Copyright (C) 2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2020-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __LIGHTREC_REAPER_H__
struct lightrec_state;
struct reaper;
-typedef void (*reap_func_t)(void *);
+typedef void (*reap_func_t)(struct lightrec_state *state, void *);
struct reaper *lightrec_reaper_init(struct lightrec_state *state);
void lightrec_reaper_destroy(struct reaper *reaper);
int lightrec_reaper_add(struct reaper *reaper, reap_func_t f, void *data);
void lightrec_reaper_reap(struct reaper *reaper);
+void lightrec_reaper_pause(struct reaper *reaper);
+void lightrec_reaper_continue(struct reaper *reaper);
+
#endif /* __LIGHTREC_REAPER_H__ */
+// SPDX-License-Identifier: LGPL-2.1-or-later
/*
- * Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
*/
#include "debug.h"
#include <stdbool.h>
#include <stdlib.h>
#include <pthread.h>
+#ifdef __linux__
+#include <unistd.h>
+#endif
struct block_rec {
struct block *block;
struct slist_elm slist;
+ bool compiling;
+};
+
+struct recompiler_thd {
+ struct lightrec_cstate *cstate;
+ unsigned int tid;
+ pthread_t thd;
};
struct recompiler {
struct lightrec_state *state;
- pthread_t thd;
pthread_cond_t cond;
+ pthread_cond_t cond2;
pthread_mutex_t mutex;
bool stop;
- struct block *current_block;
struct slist_elm slist;
+
+ unsigned int nb_recs;
+ struct recompiler_thd thds[];
};
-static void lightrec_compile_list(struct recompiler *rec)
+static unsigned int get_processors_count(void)
+{
+ unsigned int nb;
+
+#if defined(PTW32_VERSION)
+ nb = pthread_num_processors_np();
+#elif defined(__APPLE__) || defined(__FreeBSD__)
+ int count;
+ size_t size = sizeof(count);
+
+ nb = sysctlbyname("hw.ncpu", &count, &size, NULL, 0) ? 1 : count;
+#elif defined(__linux__)
+ nb = sysconf(_SC_NPROCESSORS_ONLN);
+#endif
+
+ return nb < 1 ? 1 : nb;
+}
+
+static struct slist_elm * lightrec_get_first_elm(struct slist_elm *head)
+{
+ struct block_rec *block_rec;
+ struct slist_elm *elm;
+
+ for (elm = slist_first(head); elm; elm = elm->next) {
+ block_rec = container_of(elm, struct block_rec, slist);
+
+ if (!block_rec->compiling)
+ return elm;
+ }
+
+ return NULL;
+}
+
+static void lightrec_compile_list(struct recompiler *rec,
+ struct recompiler_thd *thd)
{
struct block_rec *block_rec;
struct slist_elm *next;
struct block *block;
int ret;
- while (!!(next = slist_first(&rec->slist))) {
+ while (!!(next = lightrec_get_first_elm(&rec->slist))) {
block_rec = container_of(next, struct block_rec, slist);
+ block_rec->compiling = true;
block = block_rec->block;
- rec->current_block = block;
pthread_mutex_unlock(&rec->mutex);
- ret = lightrec_compile_block(block);
- if (ret) {
- pr_err("Unable to compile block at PC 0x%x: %d\n",
- block->pc, ret);
+ if (likely(!(block->flags & BLOCK_IS_DEAD))) {
+ ret = lightrec_compile_block(thd->cstate, block);
+ if (ret) {
+ pr_err("Unable to compile block at PC 0x%x: %d\n",
+ block->pc, ret);
+ }
}
pthread_mutex_lock(&rec->mutex);
slist_remove(&rec->slist, next);
lightrec_free(rec->state, MEM_FOR_LIGHTREC,
sizeof(*block_rec), block_rec);
- pthread_cond_signal(&rec->cond);
+ pthread_cond_signal(&rec->cond2);
}
-
- rec->current_block = NULL;
}
static void * lightrec_recompiler_thd(void *d)
{
- struct recompiler *rec = d;
+ struct recompiler_thd *thd = d;
+ struct recompiler *rec = container_of(thd, struct recompiler, thds[thd->tid]);
pthread_mutex_lock(&rec->mutex);
} while (slist_empty(&rec->slist));
- lightrec_compile_list(rec);
+ lightrec_compile_list(rec, thd);
}
out_unlock:
struct recompiler *lightrec_recompiler_init(struct lightrec_state *state)
{
struct recompiler *rec;
+ unsigned int i, nb_recs, nb_cpus;
int ret;
- rec = lightrec_malloc(state, MEM_FOR_LIGHTREC, sizeof(*rec));
+ nb_cpus = get_processors_count();
+ nb_recs = nb_cpus < 2 ? 1 : nb_cpus - 1;
+
+ rec = lightrec_malloc(state, MEM_FOR_LIGHTREC, sizeof(*rec)
+ + nb_recs * sizeof(*rec->thds));
if (!rec) {
pr_err("Cannot create recompiler: Out of memory\n");
return NULL;
}
+ for (i = 0; i < nb_recs; i++) {
+ rec->thds[i].tid = i;
+ rec->thds[i].cstate = NULL;
+ }
+
+ for (i = 0; i < nb_recs; i++) {
+ rec->thds[i].cstate = lightrec_create_cstate(state);
+ if (!rec->state) {
+ pr_err("Cannot create recompiler: Out of memory\n");
+ goto err_free_cstates;
+ }
+ }
+
rec->state = state;
rec->stop = false;
- rec->current_block = NULL;
+ rec->nb_recs = nb_recs;
slist_init(&rec->slist);
ret = pthread_cond_init(&rec->cond, NULL);
if (ret) {
pr_err("Cannot init cond variable: %d\n", ret);
- goto err_free_rec;
+ goto err_free_cstates;
}
- ret = pthread_mutex_init(&rec->mutex, NULL);
+ ret = pthread_cond_init(&rec->cond2, NULL);
if (ret) {
- pr_err("Cannot init mutex variable: %d\n", ret);
+ pr_err("Cannot init cond variable: %d\n", ret);
goto err_cnd_destroy;
}
- ret = pthread_create(&rec->thd, NULL, lightrec_recompiler_thd, rec);
+ ret = pthread_mutex_init(&rec->mutex, NULL);
if (ret) {
- pr_err("Cannot create recompiler thread: %d\n", ret);
- goto err_mtx_destroy;
+ pr_err("Cannot init mutex variable: %d\n", ret);
+ goto err_cnd2_destroy;
}
+ for (i = 0; i < nb_recs; i++) {
+ ret = pthread_create(&rec->thds[i].thd, NULL,
+ lightrec_recompiler_thd, &rec->thds[i]);
+ if (ret) {
+ pr_err("Cannot create recompiler thread: %d\n", ret);
+ /* TODO: Handle cleanup properly */
+ goto err_mtx_destroy;
+ }
+ }
+
+ pr_info("Threaded recompiler started with %u workers.\n", nb_recs);
+
return rec;
err_mtx_destroy:
pthread_mutex_destroy(&rec->mutex);
+err_cnd2_destroy:
+ pthread_cond_destroy(&rec->cond2);
err_cnd_destroy:
pthread_cond_destroy(&rec->cond);
-err_free_rec:
+err_free_cstates:
+ for (i = 0; i < nb_recs; i++) {
+ if (rec->thds[i].cstate)
+ lightrec_free_cstate(rec->thds[i].cstate);
+ }
lightrec_free(state, MEM_FOR_LIGHTREC, sizeof(*rec), rec);
return NULL;
}
void lightrec_free_recompiler(struct recompiler *rec)
{
+ unsigned int i;
+
rec->stop = true;
/* Stop the thread */
pthread_mutex_lock(&rec->mutex);
- pthread_cond_signal(&rec->cond);
+ pthread_cond_broadcast(&rec->cond);
pthread_mutex_unlock(&rec->mutex);
- pthread_join(rec->thd, NULL);
+
+ for (i = 0; i < rec->nb_recs; i++)
+ pthread_join(rec->thds[i].thd, NULL);
+
+ for (i = 0; i < rec->nb_recs; i++)
+ lightrec_free_cstate(rec->thds[i].cstate);
pthread_mutex_destroy(&rec->mutex);
pthread_cond_destroy(&rec->cond);
+ pthread_cond_destroy(&rec->cond2);
lightrec_free(rec->state, MEM_FOR_LIGHTREC, sizeof(*rec), rec);
}
/* The block to compile is already in the queue - bump
* it to the top of the list, unless the block is being
* recompiled. */
- if (prev && !(block->flags & BLOCK_SHOULD_RECOMPILE)) {
+ if (prev && !block_rec->compiling &&
+ !(block->flags & BLOCK_SHOULD_RECOMPILE)) {
slist_remove_next(prev);
slist_append(&rec->slist, elm);
}
pr_debug("Adding block PC 0x%x to recompiler\n", block->pc);
block_rec->block = block;
+ block_rec->compiling = false;
elm = &rec->slist;
out_unlock:
pthread_mutex_unlock(&rec->mutex);
+
return ret;
}
pthread_mutex_lock(&rec->mutex);
- for (elm = slist_first(&rec->slist); elm; elm = elm->next) {
- block_rec = container_of(elm, struct block_rec, slist);
+ while (true) {
+ for (elm = slist_first(&rec->slist); elm; elm = elm->next) {
+ block_rec = container_of(elm, struct block_rec, slist);
- if (block_rec->block == block) {
- if (block == rec->current_block) {
+ if (block_rec->block != block)
+ continue;
+
+ if (block_rec->compiling) {
/* Block is being recompiled - wait for
* completion */
- do {
- pthread_cond_wait(&rec->cond,
- &rec->mutex);
- } while (block == rec->current_block);
+ pthread_cond_wait(&rec->cond2, &rec->mutex);
+
+ /* We can't guarantee the signal was for us.
+ * Since block_rec may have been removed while
+ * we were waiting on the condition, we cannot
+ * check block_rec->compiling again. The best
+ * thing is just to restart the function. */
+ break;
} else {
/* Block is not yet being processed - remove it
* from the list */
slist_remove(&rec->slist, elm);
lightrec_free(rec->state, MEM_FOR_LIGHTREC,
sizeof(*block_rec), block_rec);
+
+ goto out_unlock;
}
+ }
+ if (!elm)
break;
- }
}
+out_unlock:
pthread_mutex_unlock(&rec->mutex);
}
-void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc)
+void * lightrec_recompiler_run_first_pass(struct lightrec_state *state,
+ struct block *block, u32 *pc)
{
bool freed;
+ /* There's no point in running the first pass if the block will never
+ * be compiled. Let the main loop run the interpreter instead. */
+ if (block->flags & BLOCK_NEVER_COMPILE)
+ return NULL;
+
+ /* If the block is already fully tagged, there is no point in running
+ * the first pass. Request a recompilation of the block, and maybe the
+ * interpreter will run the block in the meantime. */
+ if (block->flags & BLOCK_FULLY_TAGGED)
+ lightrec_recompiler_add(state->rec, block);
+
if (likely(block->function)) {
if (block->flags & BLOCK_FULLY_TAGGED) {
freed = atomic_flag_test_and_set(&block->op_list_freed);
/* The block was already compiled but the opcode list
* didn't get freed yet - do it now */
- lightrec_free_opcode_list(block->state,
- block->opcode_list);
+ lightrec_free_opcode_list(state, block);
block->opcode_list = NULL;
}
}
freed = atomic_flag_test_and_set(&block->op_list_freed);
/* Block wasn't compiled yet - run the interpreter */
- *pc = lightrec_emulate_block(block, *pc);
+ *pc = lightrec_emulate_block(state, block, *pc);
if (!freed)
atomic_flag_clear(&block->op_list_freed);
pr_debug("Block PC 0x%08x is fully tagged"
" - free opcode list\n", block->pc);
- lightrec_free_opcode_list(block->state, block->opcode_list);
+ lightrec_free_opcode_list(state, block);
block->opcode_list = NULL;
}
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
- * Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __LIGHTREC_RECOMPILER_H__
int lightrec_recompiler_add(struct recompiler *rec, struct block *block);
void lightrec_recompiler_remove(struct recompiler *rec, struct block *block);
-void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc);
+void * lightrec_recompiler_run_first_pass(struct lightrec_state *state,
+ struct block *block, u32 *pc);
#endif /* __LIGHTREC_RECOMPILER_H__ */
+// SPDX-License-Identifier: LGPL-2.1-or-later
/*
- * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
*/
#include "debug.h"
#include "memmanager.h"
+#include "lightning-wrapper.h"
#include "regcache.h"
-#include <lightning.h>
#include <stdbool.h>
#include <stddef.h>
struct native_register {
- bool used, loaded, dirty, output, extend, extended, locked;
+ bool used, loaded, dirty, output, extend, extended,
+ zero_extend, zero_extended, locked;
s8 emulated_register;
};
return mips_regs[reg];
}
+static inline bool lightrec_reg_is_zero(u8 jit_reg)
+{
+#if defined(__mips__) || defined(__alpha__) || defined(__riscv)
+ if (jit_reg == _ZERO)
+ return true;
+#endif
+ return false;
+}
+
+static inline s8 lightrec_get_hardwired_reg(u8 reg)
+{
+#if defined(__mips__) || defined(__alpha__) || defined(__riscv)
+ if (reg == 0)
+ return _ZERO;
+#endif
+ return -1;
+}
+
static inline u8 lightrec_reg_number(const struct regcache *cache,
const struct native_register *nreg)
{
}
}
+u8 lightrec_get_reg_in_flags(struct regcache *cache, u8 jit_reg)
+{
+ struct native_register *reg;
+ u8 flags = 0;
+
+ if (lightrec_reg_is_zero(jit_reg))
+ return REG_EXT | REG_ZEXT;
+
+ reg = lightning_reg_to_lightrec(cache, jit_reg);
+ if (reg->extended)
+ flags |= REG_EXT;
+ if (reg->zero_extended)
+ flags |= REG_ZEXT;
+
+ return flags;
+}
+
+void lightrec_set_reg_out_flags(struct regcache *cache, u8 jit_reg, u8 flags)
+{
+ struct native_register *reg;
+
+ if (!lightrec_reg_is_zero(jit_reg)) {
+ reg = lightning_reg_to_lightrec(cache, jit_reg);
+ reg->extend = flags & REG_EXT;
+ reg->zero_extend = flags & REG_ZEXT;
+ }
+}
+
static struct native_register * alloc_temp(struct regcache *cache)
{
unsigned int i;
static void lightrec_discard_nreg(struct native_register *nreg)
{
nreg->extended = false;
+ nreg->zero_extended = false;
nreg->loaded = false;
nreg->output = false;
nreg->dirty = false;
{
/* If we get a dirty register, store back the old value */
if (nreg->dirty) {
- s16 offset = offsetof(struct lightrec_state, native_reg_cache)
+ s16 offset = offsetof(struct lightrec_state, regs.gpr)
+ (nreg->emulated_register << 2);
jit_stxi_i(offset, LIGHTREC_REG_STATE, jit_reg);
void lightrec_unload_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg)
{
+ if (lightrec_reg_is_zero(jit_reg))
+ return;
+
lightrec_unload_nreg(cache, _jit,
lightning_reg_to_lightrec(cache, jit_reg), jit_reg);
}
* A locked register cannot only be used as input, not output. */
void lightrec_lock_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg)
{
- struct native_register *reg = lightning_reg_to_lightrec(cache, jit_reg);
+ struct native_register *reg;
+
+ if (lightrec_reg_is_zero(jit_reg))
+ return;
+ reg = lightning_reg_to_lightrec(cache, jit_reg);
lightrec_clean_reg(cache, _jit, jit_reg);
reg->locked = true;
u8 lightrec_alloc_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg)
{
- struct native_register *reg = lightning_reg_to_lightrec(cache, jit_reg);
+ struct native_register *reg;
+ if (lightrec_reg_is_zero(jit_reg))
+ return jit_reg;
+
+ reg = lightning_reg_to_lightrec(cache, jit_reg);
lightrec_unload_nreg(cache, _jit, reg, jit_reg);
reg->used = true;
return jit_reg;
}
-u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit, u8 reg)
+u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit,
+ u8 reg, u8 flags)
{
+ struct native_register *nreg;
u8 jit_reg;
- struct native_register *nreg = alloc_in_out(cache, reg, true);
+ s8 hw_reg;
+
+ hw_reg = lightrec_get_hardwired_reg(reg);
+ if (hw_reg >= 0)
+ return (u8) hw_reg;
+
+ nreg = alloc_in_out(cache, reg, true);
if (!nreg) {
/* No free register, no dirty register to free. */
pr_err("No more registers! Abandon ship!\n");
if (nreg->emulated_register != reg)
lightrec_unload_nreg(cache, _jit, nreg, jit_reg);
- nreg->extend = false;
nreg->used = true;
nreg->output = true;
nreg->emulated_register = reg;
+ nreg->extend = flags & REG_EXT;
+ nreg->zero_extend = flags & REG_ZEXT;
return jit_reg;
}
-u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, u8 reg)
+u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit,
+ u8 reg, u8 flags)
{
+ struct native_register *nreg;
u8 jit_reg;
bool reg_changed;
- struct native_register *nreg = alloc_in_out(cache, reg, false);
+ s8 hw_reg;
+
+ hw_reg = lightrec_get_hardwired_reg(reg);
+ if (hw_reg >= 0)
+ return (u8) hw_reg;
+
+ nreg = alloc_in_out(cache, reg, false);
if (!nreg) {
/* No free register, no dirty register to free. */
pr_err("No more registers! Abandon ship!\n");
lightrec_unload_nreg(cache, _jit, nreg, jit_reg);
if (!nreg->loaded && !nreg->dirty && reg != 0) {
- s16 offset = offsetof(struct lightrec_state, native_reg_cache)
+ s16 offset = offsetof(struct lightrec_state, regs.gpr)
+ (reg << 2);
+ nreg->zero_extended = flags & REG_ZEXT;
+ nreg->extended = !nreg->zero_extended;
+
/* Load previous value from register cache */
- jit_ldxi_i(jit_reg, LIGHTREC_REG_STATE, offset);
+ if (nreg->zero_extended)
+ jit_ldxi_ui(jit_reg, LIGHTREC_REG_STATE, offset);
+ else
+ jit_ldxi_i(jit_reg, LIGHTREC_REG_STATE, offset);
+
nreg->loaded = true;
- nreg->extended = true;
}
/* Clear register r0 before use */
if (reg == 0 && (!nreg->loaded || nreg->dirty)) {
jit_movi(jit_reg, 0);
nreg->extended = true;
+ nreg->zero_extended = true;
nreg->loaded = true;
}
nreg->used = true;
nreg->output = false;
nreg->emulated_register = reg;
- return jit_reg;
-}
-
-u8 lightrec_alloc_reg_out_ext(struct regcache *cache, jit_state_t *_jit, u8 reg)
-{
- struct native_register *nreg;
- u8 jit_reg;
-
- jit_reg = lightrec_alloc_reg_out(cache, _jit, reg);
- nreg = lightning_reg_to_lightrec(cache, jit_reg);
-
- nreg->extend = true;
- return jit_reg;
-}
-
-u8 lightrec_alloc_reg_in_ext(struct regcache *cache, jit_state_t *_jit, u8 reg)
-{
- struct native_register *nreg;
- u8 jit_reg;
-
- jit_reg = lightrec_alloc_reg_in(cache, _jit, reg);
- nreg = lightning_reg_to_lightrec(cache, jit_reg);
-
-#if __WORDSIZE == 64
- if (!nreg->extended) {
+ if ((flags & REG_EXT) && !nreg->extended &&
+ (!nreg->zero_extended || !(flags & REG_ZEXT))) {
nreg->extended = true;
+ nreg->zero_extended = false;
jit_extr_i(jit_reg, jit_reg);
+ } else if (!(flags & REG_EXT) && (flags & REG_ZEXT) &&
+ !nreg->zero_extended) {
+ nreg->zero_extended = true;
+ nreg->extended = false;
+ jit_extr_ui(jit_reg, jit_reg);
}
-#endif
return jit_reg;
}
lightrec_unload_nreg(cache, _jit, nreg, jit_reg);
/* Load previous value from register cache */
- offset = offsetof(struct lightrec_state, native_reg_cache) + (reg << 2);
+ offset = offsetof(struct lightrec_state, regs.gpr) + (reg << 2);
jit_ldxi_i(jit_reg, LIGHTREC_REG_STATE, offset);
nreg->extended = true;
+ nreg->zero_extended = false;
nreg->used = true;
nreg->loaded = true;
nreg->emulated_register = reg;
/* Set output registers as dirty */
if (nreg->used && nreg->output && nreg->emulated_register > 0)
nreg->dirty = true;
- if (nreg->output)
+ if (nreg->output) {
nreg->extended = nreg->extend;
+ nreg->zero_extended = nreg->zero_extend;
+ }
nreg->used = false;
}
void lightrec_free_reg(struct regcache *cache, u8 jit_reg)
{
- free_reg(lightning_reg_to_lightrec(cache, jit_reg));
+ if (!lightrec_reg_is_zero(jit_reg))
+ free_reg(lightning_reg_to_lightrec(cache, jit_reg));
}
void lightrec_free_regs(struct regcache *cache)
struct native_register *nreg, u8 jit_reg, bool clean)
{
if (nreg->dirty) {
- s16 offset = offsetof(struct lightrec_state, native_reg_cache)
+ s16 offset = offsetof(struct lightrec_state, regs.gpr)
+ (nreg->emulated_register << 2);
jit_stxi_i(offset, LIGHTREC_REG_STATE, jit_reg);
void lightrec_clean_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg)
{
- struct native_register *reg = lightning_reg_to_lightrec(cache, jit_reg);
- clean_reg(_jit, reg, jit_reg, true);
+ struct native_register *reg;
+
+ if (!lightrec_reg_is_zero(jit_reg)) {
+ reg = lightning_reg_to_lightrec(cache, jit_reg);
+ clean_reg(_jit, reg, jit_reg, true);
+ }
}
void lightrec_clean_reg_if_loaded(struct regcache *cache, jit_state_t *_jit,
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
- * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __REGCACHE_H__
#define LIGHTREC_REG_STATE (JIT_V(JIT_V_NUM - 1))
#define LIGHTREC_REG_CYCLE (JIT_V(JIT_V_NUM - 2))
-#define REG_LO 32
-#define REG_HI 33
+/* Flags for lightrec_alloc_reg_in / lightrec_alloc_reg_out. */
+#define REG_EXT BIT(0) /* register is sign-extended */
+#define REG_ZEXT BIT(1) /* register is zero-extended */
struct register_value {
_Bool known;
u8 lightrec_alloc_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg);
u8 lightrec_alloc_reg_temp(struct regcache *cache, jit_state_t *_jit);
-u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit, u8 reg);
-u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, u8 reg);
-u8 lightrec_alloc_reg_out_ext(struct regcache *cache,
- jit_state_t *_jit, u8 reg);
-u8 lightrec_alloc_reg_in_ext(struct regcache *cache, jit_state_t *_jit, u8 reg);
+u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit,
+ u8 reg, u8 flags);
+u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit,
+ u8 reg, u8 flags);
u8 lightrec_request_reg_in(struct regcache *cache, jit_state_t *_jit,
u8 reg, u8 jit_reg);
+u8 lightrec_get_reg_in_flags(struct regcache *cache, u8 jit_reg);
+void lightrec_set_reg_out_flags(struct regcache *cache, u8 jit_reg, u8 flags);
+
void lightrec_regcache_reset(struct regcache *cache);
void lightrec_lock_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg);
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
/*
- * Copyright (C) 2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2020-2021 Paul Cercueil <paul@crapouillou.net>
*/
#ifndef __LIGHTREC_SLIST_H__