From 02487de7ff9fcbb6d7d692a6b3ae6e6539708abc Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 30 May 2022 18:00:15 +0100 Subject: [PATCH] git subrepo pull --force deps/lightrec subrepo: subdir: "deps/lightrec" merged: "49ef275a" upstream: origin: "https://github.com/pcercuei/lightrec.git" branch: "master" commit: "49ef275a" git-subrepo: version: "0.4.3" origin: "https://github.com/ingydotnet/git-subrepo.git" commit: "2f68596" --- deps/lightrec/.gitrepo | 4 +- deps/lightrec/CMakeLists.txt | 10 + deps/lightrec/blockcache.c | 15 +- deps/lightrec/disassembler.c | 9 +- deps/lightrec/emitter.c | 305 ++++-- deps/lightrec/lightrec-config.h.cmakein | 1 + deps/lightrec/lightrec-private.h | 50 + deps/lightrec/lightrec.c | 214 +++- deps/lightrec/lightrec.h | 3 + deps/lightrec/optimizer.c | 148 +-- deps/lightrec/tlsf/.gitrepo | 12 + deps/lightrec/tlsf/README.md | 92 ++ deps/lightrec/tlsf/tlsf.c | 1264 +++++++++++++++++++++++ deps/lightrec/tlsf/tlsf.h | 90 ++ 14 files changed, 1994 insertions(+), 223 deletions(-) create mode 100644 deps/lightrec/tlsf/.gitrepo create mode 100644 deps/lightrec/tlsf/README.md create mode 100644 deps/lightrec/tlsf/tlsf.c create mode 100644 deps/lightrec/tlsf/tlsf.h diff --git a/deps/lightrec/.gitrepo b/deps/lightrec/.gitrepo index ae2fc77c..c9d423a7 100644 --- a/deps/lightrec/.gitrepo +++ b/deps/lightrec/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/pcercuei/lightrec.git branch = master - commit = de06670b257004d98d30e8585a4e6530e77d3acd - parent = e24732050e902bd5402b2b7da7c391d2ca8fa799 + commit = 49ef275a66aad8540ab73b09b0dd2128ebe4d6dc + parent = a0467ff492a25521867fcfb7d66b9c617017151a method = merge cmdver = 0.4.3 diff --git a/deps/lightrec/CMakeLists.txt b/deps/lightrec/CMakeLists.txt index 9ff58d62..7b285185 100644 --- a/deps/lightrec/CMakeLists.txt +++ b/deps/lightrec/CMakeLists.txt @@ -108,6 +108,16 @@ if (ENABLE_THREADED_COMPILER) target_link_libraries(${PROJECT_NAME} PRIVATE ${PTHREAD_LIBRARIES}) endif (ENABLE_THREADED_COMPILER) +option(ENABLE_CODE_BUFFER "Enable external code buffer" OFF) +if (ENABLE_CODE_BUFFER) + target_sources(${PROJECT_NAME} PRIVATE tlsf/tlsf.c) + target_include_directories(${PROJECT_NAME} PRIVATE tlsf) +endif (ENABLE_CODE_BUFFER) + +if (ENABLE_CODE_BUFFER AND ENABLE_THREADED_COMPILER) + message(SEND_ERROR "External code buffer cannot be used along with the threaded compiler") +endif (ENABLE_CODE_BUFFER AND ENABLE_THREADED_COMPILER) + find_library(LIBLIGHTNING lightning REQUIRED) find_path(LIBLIGHTNING_INCLUDE_DIR lightning.h REQUIRED) diff --git a/deps/lightrec/blockcache.c b/deps/lightrec/blockcache.c index 4512392d..2182f298 100644 --- a/deps/lightrec/blockcache.c +++ b/deps/lightrec/blockcache.c @@ -63,8 +63,8 @@ void remove_from_code_lut(struct blockcache *cache, struct block *block) u32 offset = lut_offset(block->pc); if (block->function) { - memset(&state->code_lut[offset], 0, - block->nb_ops * sizeof(*state->code_lut)); + memset(lut_address(state, offset), 0, + block->nb_ops * lut_elm_size(state)); } } @@ -152,10 +152,11 @@ u32 lightrec_calculate_block_hash(const struct block *block) bool lightrec_block_is_outdated(struct lightrec_state *state, struct block *block) { - void **lut_entry = &state->code_lut[lut_offset(block->pc)]; + u32 offset = lut_offset(block->pc); bool outdated; + void *addr; - if (*lut_entry) + if (lut_read(state, offset)) return false; outdated = block->hash != lightrec_calculate_block_hash(block); @@ -163,9 +164,11 @@ bool lightrec_block_is_outdated(struct lightrec_state *state, struct block *bloc /* The block was marked as outdated, but the content is still * the same */ if (block->function) - *lut_entry = block->function; + addr = block->function; else - *lut_entry = state->get_next_block; + addr = state->get_next_block; + + lut_write(state, offset, addr); } return outdated; diff --git a/deps/lightrec/disassembler.c b/deps/lightrec/disassembler.c index 43ac6772..cb332c67 100644 --- a/deps/lightrec/disassembler.c +++ b/deps/lightrec/disassembler.c @@ -382,22 +382,23 @@ static int print_op(union code c, u32 pc, char *buf, size_t len, } } -void lightrec_print_disassembly(const struct block *block, const u32 *code) +void lightrec_print_disassembly(const struct block *block, const u32 *code_ptr) { const struct opcode *op; const char **flags_ptr; size_t nb_flags, count, count2; char buf[256], buf2[256], buf3[256]; unsigned int i; - u32 pc, branch_pc; + u32 pc, branch_pc, code; bool is_io; for (i = 0; i < block->nb_ops; i++) { op = &block->opcode_list[i]; branch_pc = get_branch_pc(block, i, 0); pc = block->pc + (i << 2); + code = LE32TOH(code_ptr[i]); - count = print_op((union code)code[i], pc, buf, sizeof(buf), + count = print_op((union code)code, pc, buf, sizeof(buf), &flags_ptr, &nb_flags, &is_io); flags_ptr = NULL; @@ -406,7 +407,7 @@ void lightrec_print_disassembly(const struct block *block, const u32 *code) count2 = print_op(op->c, branch_pc, buf2, sizeof(buf2), &flags_ptr, &nb_flags, &is_io); - if (code[i] == op->c.opcode) { + if (code == op->c.opcode) { *buf2 = '\0'; count2 = 0; } diff --git a/deps/lightrec/emitter.c b/deps/lightrec/emitter.c index fa74cc09..fd289356 100644 --- a/deps/lightrec/emitter.c +++ b/deps/lightrec/emitter.c @@ -392,11 +392,34 @@ static void rec_alu_shiftv(struct lightrec_cstate *state, const struct block *bl lightrec_free_reg(reg_cache, rd); } +static void rec_movi(struct lightrec_cstate *state, + const struct block *block, u16 offset) +{ + struct regcache *reg_cache = state->reg_cache; + union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + u16 flags = REG_EXT; + u8 rt; + + if (!(c.i.imm & 0x8000)) + flags |= REG_ZEXT; + + rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags); + + jit_movi(rt, (s32)(s16) c.i.imm); + + lightrec_free_reg(reg_cache, rt); +} + static void rec_ADDIU(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_alu_imm(state, block, offset, jit_code_addi, false); + + if (block->opcode_list[offset].c.i.rs) + rec_alu_imm(state, block, offset, jit_code_addi, false); + else + rec_movi(state, block, offset); } static void rec_ADDI(struct lightrec_cstate *state, @@ -404,7 +427,7 @@ static void rec_ADDI(struct lightrec_cstate *state, { /* TODO: Handle the exception? */ _jit_name(block->_jit, __func__); - rec_alu_imm(state, block, offset, jit_code_addi, false); + rec_ADDIU(state, block, offset); } static void rec_SLTIU(struct lightrec_cstate *state, @@ -1022,22 +1045,31 @@ static void rec_io(struct lightrec_cstate *state, } } +static u32 rec_ram_mask(struct lightrec_state *state) +{ + return (RAM_SIZE << (state->mirrors_mapped * 2)) - 1; +} + static void rec_store_memory(struct lightrec_cstate *cstate, const struct block *block, u16 offset, jit_code_t code, + jit_code_t swap_code, uintptr_t addr_offset, u32 addr_mask, bool invalidate) { + const struct lightrec_state *state = cstate->state; struct regcache *reg_cache = cstate->reg_cache; struct opcode *op = &block->opcode_list[offset]; jit_state_t *_jit = block->_jit; union code c = op->c; u8 rs, rt, tmp, tmp2, tmp3, addr_reg, addr_reg2; s16 imm = (s16)c.i.imm; - s32 simm = (s32)imm << (__WORDSIZE / 32 - 1); + s32 simm = (s32)imm << (1 - lut_is_32bit(state)); s32 lut_offt = offsetof(struct lightrec_state, code_lut); bool no_mask = op->flags & LIGHTREC_NO_MASK; - bool add_imm = c.i.imm && invalidate && simm + lut_offt != (s16)(simm + lut_offt); + bool add_imm = c.i.imm && + ((!state->mirrors_mapped && !no_mask) || (invalidate && + ((imm & 0x3) || simm + lut_offt != (s16)(simm + lut_offt)))); bool need_tmp = !no_mask || addr_offset || add_imm; bool need_tmp2 = addr_offset || invalidate; @@ -1071,7 +1103,17 @@ static void rec_store_memory(struct lightrec_cstate *cstate, addr_reg2 = addr_reg; } - jit_new_node_www(code, imm, addr_reg2, rt); + if (is_big_endian() && swap_code && c.i.rt) { + tmp3 = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_new_node_ww(swap_code, tmp3, rt); + jit_new_node_www(code, imm, addr_reg2, tmp3); + + lightrec_free_reg(reg_cache, tmp3); + } else { + jit_new_node_www(code, imm, addr_reg2, rt); + } + lightrec_free_reg(reg_cache, rt); if (invalidate) { @@ -1082,17 +1124,22 @@ static void rec_store_memory(struct lightrec_cstate *cstate, addr_reg = tmp2; } - if (__WORDSIZE == 64) { + if (!lut_is_32bit(state)) { jit_lshi(tmp2, addr_reg, 1); addr_reg = tmp2; } - if (__WORDSIZE == 64 || addr_reg != rs || c.i.rs != 0) { + if (addr_reg == rs && c.i.rs == 0) { + addr_reg = LIGHTREC_REG_STATE; + } else { jit_addr(tmp2, addr_reg, LIGHTREC_REG_STATE); addr_reg = tmp2; } - jit_stxi(lut_offt, addr_reg, tmp3); + if (lut_is_32bit(state)) + jit_stxi_i(lut_offt, addr_reg, tmp3); + else + jit_stxi(lut_offt, addr_reg, tmp3); lightrec_free_reg(reg_cache, tmp3); } @@ -1107,29 +1154,32 @@ static void rec_store_memory(struct lightrec_cstate *cstate, static void rec_store_ram(struct lightrec_cstate *cstate, const struct block *block, u16 offset, jit_code_t code, - bool invalidate) + jit_code_t swap_code, bool invalidate) { + struct lightrec_state *state = cstate->state; + _jit_note(block->_jit, __FILE__, __LINE__); - return rec_store_memory(cstate, block, offset, code, - cstate->state->offset_ram, - RAM_SIZE - 1, invalidate); + return rec_store_memory(cstate, block, offset, code, swap_code, + state->offset_ram, rec_ram_mask(state), + invalidate); } static void rec_store_scratch(struct lightrec_cstate *cstate, - const struct block *block, - u16 offset, jit_code_t code) + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code) { _jit_note(block->_jit, __FILE__, __LINE__); - return rec_store_memory(cstate, block, offset, code, + return rec_store_memory(cstate, block, offset, code, swap_code, cstate->state->offset_scratch, 0x1fffffff, false); } static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate, const struct block *block, - u16 offset, jit_code_t code) + u16 offset, jit_code_t code, + jit_code_t swap_code) { struct lightrec_state *state = cstate->state; struct regcache *reg_cache = cstate->reg_cache; @@ -1181,14 +1231,24 @@ static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate, } rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0); - jit_new_node_www(code, imm, tmp, rt); + + if (is_big_endian() && swap_code && c.i.rt) { + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_new_node_ww(swap_code, tmp2, rt); + jit_new_node_www(code, imm, tmp, tmp2); + + lightrec_free_reg(reg_cache, tmp2); + } else { + jit_new_node_www(code, imm, tmp, rt); + } lightrec_free_reg(reg_cache, rt); lightrec_free_reg(reg_cache, tmp); } static void rec_store_direct(struct lightrec_cstate *cstate, const struct block *block, - u16 offset, jit_code_t code) + u16 offset, jit_code_t code, jit_code_t swap_code) { struct lightrec_state *state = cstate->state; u32 ram_size = state->mirrors_mapped ? RAM_SIZE * 4 : RAM_SIZE; @@ -1219,12 +1279,15 @@ static void rec_store_direct(struct lightrec_cstate *cstate, const struct block /* Compute the offset to the code LUT */ jit_andi(tmp, tmp2, (RAM_SIZE - 1) & ~3); - if (__WORDSIZE == 64) + if (!lut_is_32bit(state)) jit_lshi(tmp, tmp, 1); jit_addr(tmp, LIGHTREC_REG_STATE, tmp); /* Write NULL to the code LUT to invalidate any block that's there */ - jit_stxi(offsetof(struct lightrec_state, code_lut), tmp, tmp3); + if (lut_is_32bit(state)) + jit_stxi_i(offsetof(struct lightrec_state, code_lut), tmp, tmp3); + else + jit_stxi(offsetof(struct lightrec_state, code_lut), tmp, tmp3); if (state->offset_ram != state->offset_scratch) { jit_movi(tmp, state->offset_ram); @@ -1247,14 +1310,25 @@ static void rec_store_direct(struct lightrec_cstate *cstate, const struct block lightrec_free_reg(reg_cache, tmp3); rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0); - jit_new_node_www(code, 0, tmp2, rt); + + if (is_big_endian() && swap_code && c.i.rt) { + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_new_node_ww(swap_code, tmp, rt); + jit_new_node_www(code, 0, tmp2, tmp); + + lightrec_free_reg(reg_cache, tmp); + } else { + jit_new_node_www(code, 0, tmp2, rt); + } lightrec_free_reg(reg_cache, rt); lightrec_free_reg(reg_cache, tmp2); } static void rec_store(struct lightrec_cstate *state, - const struct block *block, u16 offset, jit_code_t code) + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code) { u16 flags = block->opcode_list[offset].flags; bool no_invalidate = (flags & LIGHTREC_NO_INVALIDATE) || @@ -1262,16 +1336,19 @@ static void rec_store(struct lightrec_cstate *state, switch (LIGHTREC_FLAGS_GET_IO_MODE(flags)) { case LIGHTREC_IO_RAM: - rec_store_ram(state, block, offset, code, !no_invalidate); + rec_store_ram(state, block, offset, code, + swap_code, !no_invalidate); break; case LIGHTREC_IO_SCRATCH: - rec_store_scratch(state, block, offset, code); + rec_store_scratch(state, block, offset, code, swap_code); break; case LIGHTREC_IO_DIRECT: - if (no_invalidate) - rec_store_direct_no_invalidate(state, block, offset, code); - else - rec_store_direct(state, block, offset, code); + if (no_invalidate) { + rec_store_direct_no_invalidate(state, block, offset, + code, swap_code); + } else { + rec_store_direct(state, block, offset, code, swap_code); + } break; default: rec_io(state, block, offset, true, false); @@ -1283,14 +1360,15 @@ static void rec_SB(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_store(state, block, offset, jit_code_stxi_c); + rec_store(state, block, offset, jit_code_stxi_c, 0); } static void rec_SH(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_store(state, block, offset, jit_code_stxi_s); + rec_store(state, block, offset, + jit_code_stxi_s, jit_code_bswapr_us); } static void rec_SW(struct lightrec_cstate *state, @@ -1298,7 +1376,8 @@ static void rec_SW(struct lightrec_cstate *state, { _jit_name(block->_jit, __func__); - rec_store(state, block, offset, jit_code_stxi_i); + rec_store(state, block, offset, + jit_code_stxi_i, jit_code_bswapr_ui); } static void rec_SWL(struct lightrec_cstate *state, @@ -1323,15 +1402,17 @@ static void rec_SWC2(struct lightrec_cstate *state, } static void rec_load_memory(struct lightrec_cstate *cstate, - const struct block *block, - u16 offset, jit_code_t code, bool is_unsigned, + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code, bool is_unsigned, uintptr_t addr_offset, u32 addr_mask) { struct regcache *reg_cache = cstate->reg_cache; struct opcode *op = &block->opcode_list[offset]; jit_state_t *_jit = block->_jit; u8 rs, rt, addr_reg, flags = REG_EXT; + bool no_mask = op->flags & LIGHTREC_NO_MASK; union code c = op->c; + s16 imm; if (!c.i.rt) return; @@ -1342,11 +1423,18 @@ static void rec_load_memory(struct lightrec_cstate *cstate, rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0); rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags); - if (!(op->flags & LIGHTREC_NO_MASK)) { - jit_andi(rt, rs, addr_mask); + if (!cstate->state->mirrors_mapped && c.i.imm && !no_mask) { + jit_addi(rt, rs, (s16)c.i.imm); addr_reg = rt; + imm = 0; } else { addr_reg = rs; + imm = (s16)c.i.imm; + } + + if (!no_mask) { + jit_andi(rt, addr_reg, addr_mask); + addr_reg = rt; } if (addr_offset) { @@ -1354,44 +1442,55 @@ static void rec_load_memory(struct lightrec_cstate *cstate, addr_reg = rt; } - jit_new_node_www(code, rt, addr_reg, (s16)c.i.imm); + jit_new_node_www(code, rt, addr_reg, imm); + + if (is_big_endian() && swap_code) { + jit_new_node_ww(swap_code, rt, rt); + + if (c.i.op == OP_LH) + jit_extr_s(rt, rt); + else if (c.i.op == OP_LW && __WORDSIZE == 64) + jit_extr_i(rt, rt); + } lightrec_free_reg(reg_cache, rs); lightrec_free_reg(reg_cache, rt); } static void rec_load_ram(struct lightrec_cstate *cstate, - const struct block *block, - u16 offset, jit_code_t code, bool is_unsigned) + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code, bool is_unsigned) { _jit_note(block->_jit, __FILE__, __LINE__); - rec_load_memory(cstate, block, offset, code, is_unsigned, - cstate->state->offset_ram, RAM_SIZE - 1); + rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned, + cstate->state->offset_ram, rec_ram_mask(cstate->state)); } static void rec_load_bios(struct lightrec_cstate *cstate, - const struct block *block, - u16 offset, jit_code_t code, bool is_unsigned) + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code, bool is_unsigned) { _jit_note(block->_jit, __FILE__, __LINE__); - rec_load_memory(cstate, block, offset, code, is_unsigned, + rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned, cstate->state->offset_bios, 0x1fffffff); } static void rec_load_scratch(struct lightrec_cstate *cstate, - const struct block *block, - u16 offset, jit_code_t code, bool is_unsigned) + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code, bool is_unsigned) { _jit_note(block->_jit, __FILE__, __LINE__); - rec_load_memory(cstate, block, offset, code, is_unsigned, + rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned, cstate->state->offset_scratch, 0x1fffffff); } -static void rec_load_direct(struct lightrec_cstate *cstate, const struct block *block, - u16 offset, jit_code_t code, bool is_unsigned) +static void rec_load_direct(struct lightrec_cstate *cstate, + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code, + bool is_unsigned) { struct lightrec_state *state = cstate->state; struct regcache *reg_cache = cstate->reg_cache; @@ -1483,28 +1582,38 @@ static void rec_load_direct(struct lightrec_cstate *cstate, const struct block * jit_new_node_www(code, rt, rt, imm); + if (is_big_endian() && swap_code) { + jit_new_node_ww(swap_code, rt, rt); + + if (c.i.op == OP_LH) + jit_extr_s(rt, rt); + else if (c.i.op == OP_LW && __WORDSIZE == 64) + jit_extr_i(rt, rt); + } + lightrec_free_reg(reg_cache, addr_reg); lightrec_free_reg(reg_cache, rt); lightrec_free_reg(reg_cache, tmp); } static void rec_load(struct lightrec_cstate *state, const struct block *block, - u16 offset, jit_code_t code, bool is_unsigned) + u16 offset, jit_code_t code, jit_code_t swap_code, + bool is_unsigned) { u16 flags = block->opcode_list[offset].flags; switch (LIGHTREC_FLAGS_GET_IO_MODE(flags)) { case LIGHTREC_IO_RAM: - rec_load_ram(state, block, offset, code, is_unsigned); + rec_load_ram(state, block, offset, code, swap_code, is_unsigned); break; case LIGHTREC_IO_BIOS: - rec_load_bios(state, block, offset, code, is_unsigned); + rec_load_bios(state, block, offset, code, swap_code, is_unsigned); break; case LIGHTREC_IO_SCRATCH: - rec_load_scratch(state, block, offset, code, is_unsigned); + rec_load_scratch(state, block, offset, code, swap_code, is_unsigned); break; case LIGHTREC_IO_DIRECT: - rec_load_direct(state, block, offset, code, is_unsigned); + rec_load_direct(state, block, offset, code, swap_code, is_unsigned); break; default: rec_io(state, block, offset, false, true); @@ -1515,25 +1624,25 @@ static void rec_load(struct lightrec_cstate *state, const struct block *block, static void rec_LB(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_load(state, block, offset, jit_code_ldxi_c, false); + rec_load(state, block, offset, jit_code_ldxi_c, 0, false); } static void rec_LBU(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_load(state, block, offset, jit_code_ldxi_uc, true); + rec_load(state, block, offset, jit_code_ldxi_uc, 0, true); } static void rec_LH(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_load(state, block, offset, jit_code_ldxi_s, false); + rec_load(state, block, offset, jit_code_ldxi_s, jit_code_bswapr_us, false); } static void rec_LHU(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_load(state, block, offset, jit_code_ldxi_us, true); + rec_load(state, block, offset, jit_code_ldxi_us, jit_code_bswapr_us, true); } static void rec_LWL(struct lightrec_cstate *state, const struct block *block, u16 offset) @@ -1551,7 +1660,7 @@ static void rec_LWR(struct lightrec_cstate *state, const struct block *block, u1 static void rec_LW(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_load(state, block, offset, jit_code_ldxi_i, false); + rec_load(state, block, offset, jit_code_ldxi_i, jit_code_bswapr_ui, false); } static void rec_LWC2(struct lightrec_cstate *state, const struct block *block, u16 offset) @@ -1759,6 +1868,26 @@ static void rec_cp0_CTC0(struct lightrec_cstate *state, rec_mtc0(state, block, offset); } +static unsigned int cp2d_i_offset(u8 reg) +{ + return offsetof(struct lightrec_state, regs.cp2d[reg]); +} + +static unsigned int cp2d_s_offset(u8 reg) +{ + return cp2d_i_offset(reg) + is_big_endian() * 2; +} + +static unsigned int cp2c_i_offset(u8 reg) +{ + return offsetof(struct lightrec_state, regs.cp2c[reg]); +} + +static unsigned int cp2c_s_offset(u8 reg) +{ + return cp2c_i_offset(reg) + is_big_endian() * 2; +} + static void rec_cp2_basic_MFC2(struct lightrec_cstate *state, const struct block *block, u16 offset) { @@ -1783,16 +1912,14 @@ static void rec_cp2_basic_MFC2(struct lightrec_cstate *state, case 9: case 10: case 11: - jit_ldxi_s(rt, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, regs.cp2d[reg])); + jit_ldxi_s(rt, LIGHTREC_REG_STATE, cp2d_s_offset(reg)); break; case 7: case 16: case 17: case 18: case 19: - jit_ldxi_us(rt, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, regs.cp2d[reg])); + jit_ldxi_us(rt, LIGHTREC_REG_STATE, cp2d_s_offset(reg)); break; case 28: case 29: @@ -1803,8 +1930,7 @@ static void rec_cp2_basic_MFC2(struct lightrec_cstate *state, for (i = 0; i < 3; i++) { out = i == 0 ? rt : tmp; - jit_ldxi_s(tmp, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, regs.cp2d[9 + i])); + jit_ldxi_s(tmp, LIGHTREC_REG_STATE, cp2d_s_offset(9 + i)); jit_movi(tmp2, 0x1f); jit_rshi(out, tmp, 7); @@ -1826,8 +1952,7 @@ static void rec_cp2_basic_MFC2(struct lightrec_cstate *state, lightrec_free_reg(reg_cache, tmp3); break; default: - jit_ldxi_i(rt, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, regs.cp2d[reg])); + jit_ldxi_i(rt, LIGHTREC_REG_STATE, cp2d_i_offset(reg)); break; } @@ -1853,13 +1978,11 @@ static void rec_cp2_basic_CFC2(struct lightrec_cstate *state, case 29: case 30: rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_EXT); - jit_ldxi_s(rt, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, regs.cp2c[c.r.rd])); + jit_ldxi_s(rt, LIGHTREC_REG_STATE, cp2c_s_offset(c.r.rd)); break; default: rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_ZEXT); - jit_ldxi_i(rt, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, regs.cp2c[c.r.rd])); + jit_ldxi_i(rt, LIGHTREC_REG_STATE, cp2c_i_offset(c.r.rd)); break; } @@ -1888,19 +2011,14 @@ static void rec_cp2_basic_MTC2(struct lightrec_cstate *state, switch (c.r.rd) { case 15: tmp = lightrec_alloc_reg_temp(reg_cache, _jit); - jit_ldxi_i(tmp, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, regs.cp2d[13])); + jit_ldxi_i(tmp, LIGHTREC_REG_STATE, cp2d_i_offset(13)); tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); - jit_ldxi_i(tmp2, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, regs.cp2d[14])); + jit_ldxi_i(tmp2, LIGHTREC_REG_STATE, cp2d_i_offset(14)); - jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[12]), - LIGHTREC_REG_STATE, tmp); - jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[13]), - LIGHTREC_REG_STATE, tmp2); - jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[14]), - LIGHTREC_REG_STATE, rt); + jit_stxi_i(cp2d_i_offset(12), LIGHTREC_REG_STATE, tmp); + jit_stxi_i(cp2d_i_offset(13), LIGHTREC_REG_STATE, tmp2); + jit_stxi_i(cp2d_i_offset(14), LIGHTREC_REG_STATE, rt); lightrec_free_reg(reg_cache, tmp); lightrec_free_reg(reg_cache, tmp2); @@ -1910,18 +2028,15 @@ static void rec_cp2_basic_MTC2(struct lightrec_cstate *state, jit_lshi(tmp, rt, 7); jit_andi(tmp, tmp, 0xf80); - jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[9]), - LIGHTREC_REG_STATE, tmp); + jit_stxi_s(cp2d_s_offset(9), LIGHTREC_REG_STATE, tmp); jit_lshi(tmp, rt, 2); jit_andi(tmp, tmp, 0xf80); - jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[10]), - LIGHTREC_REG_STATE, tmp); + jit_stxi_s(cp2d_s_offset(10), LIGHTREC_REG_STATE, tmp); jit_rshi(tmp, rt, 3); jit_andi(tmp, tmp, 0xf80); - jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[11]), - LIGHTREC_REG_STATE, tmp); + jit_stxi_s(cp2d_s_offset(11), LIGHTREC_REG_STATE, tmp); lightrec_free_reg(reg_cache, tmp); break; @@ -1945,17 +2060,14 @@ static void rec_cp2_basic_MTC2(struct lightrec_cstate *state, jit_patch_at(to_loop, loop); - jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[31]), - LIGHTREC_REG_STATE, tmp2); - jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[30]), - LIGHTREC_REG_STATE, rt); + jit_stxi_i(cp2d_i_offset(31), LIGHTREC_REG_STATE, tmp2); + jit_stxi_i(cp2d_i_offset(30), LIGHTREC_REG_STATE, rt); lightrec_free_reg(reg_cache, tmp); lightrec_free_reg(reg_cache, tmp2); break; default: - jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[c.r.rd]), - LIGHTREC_REG_STATE, rt); + jit_stxi_i(cp2d_i_offset(c.r.rd), LIGHTREC_REG_STATE, rt); break; } @@ -1982,8 +2094,7 @@ static void rec_cp2_basic_CTC2(struct lightrec_cstate *state, case 27: case 29: case 30: - jit_stxi_s(offsetof(struct lightrec_state, regs.cp2c[c.r.rd]), - LIGHTREC_REG_STATE, rt); + jit_stxi_s(cp2c_s_offset(c.r.rd), LIGHTREC_REG_STATE, rt); break; case 31: tmp = lightrec_alloc_reg_temp(reg_cache, _jit); @@ -1996,16 +2107,14 @@ static void rec_cp2_basic_CTC2(struct lightrec_cstate *state, jit_andi(tmp2, rt, 0x7ffff000); jit_orr(tmp, tmp2, tmp); - jit_stxi_i(offsetof(struct lightrec_state, regs.cp2c[31]), - LIGHTREC_REG_STATE, tmp); + jit_stxi_i(cp2c_i_offset(31), LIGHTREC_REG_STATE, tmp); lightrec_free_reg(reg_cache, tmp); lightrec_free_reg(reg_cache, tmp2); break; default: - jit_stxi_i(offsetof(struct lightrec_state, regs.cp2c[c.r.rd]), - LIGHTREC_REG_STATE, rt); + jit_stxi_i(cp2c_i_offset(c.r.rd), LIGHTREC_REG_STATE, rt); } lightrec_free_reg(reg_cache, rt); diff --git a/deps/lightrec/lightrec-config.h.cmakein b/deps/lightrec/lightrec-config.h.cmakein index 3cef2b8c..f5524e9d 100644 --- a/deps/lightrec/lightrec-config.h.cmakein +++ b/deps/lightrec/lightrec-config.h.cmakein @@ -10,6 +10,7 @@ #cmakedefine01 ENABLE_FIRST_PASS #cmakedefine01 ENABLE_DISASSEMBLER #cmakedefine01 ENABLE_TINYMM +#cmakedefine01 ENABLE_CODE_BUFFER #cmakedefine01 HAS_DEFAULT_ELM diff --git a/deps/lightrec/lightrec-private.h b/deps/lightrec/lightrec-private.h index 75940b3f..87565a63 100644 --- a/deps/lightrec/lightrec-private.h +++ b/deps/lightrec/lightrec-private.h @@ -6,6 +6,7 @@ #ifndef __LIGHTREC_PRIVATE_H__ #define __LIGHTREC_PRIVATE_H__ +#include "lightning-wrapper.h" #include "lightrec-config.h" #include "disassembler.h" #include "lightrec.h" @@ -135,6 +136,7 @@ struct lightrec_state { struct recompiler *rec; struct lightrec_cstate *cstate; struct reaper *reaper; + void *tlsf; void (*eob_wrapper_func)(void); void (*memset_func)(void); void (*get_next_block)(void); @@ -143,6 +145,7 @@ struct lightrec_state { unsigned int nb_maps; const struct lightrec_mem_map *maps; uintptr_t offset_ram, offset_bios, offset_scratch; + _Bool with_32bit_lut; _Bool mirrors_mapped; _Bool invalidate_from_dma_only; void *code_lut[]; @@ -156,6 +159,9 @@ void lightrec_free_block(struct lightrec_state *state, struct block *block); void remove_from_code_lut(struct blockcache *cache, struct block *block); +enum psx_map +lightrec_get_map_idx(struct lightrec_state *state, u32 kaddr); + const struct lightrec_mem_map * lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr); @@ -175,6 +181,50 @@ static inline u32 lut_offset(u32 pc) return (pc & (RAM_SIZE - 1)) >> 2; // RAM } +static inline _Bool is_big_endian(void) +{ + return __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__; +} + +static inline _Bool lut_is_32bit(const struct lightrec_state *state) +{ + return __WORDSIZE == 32 || + (ENABLE_CODE_BUFFER && state->with_32bit_lut); +} + +static inline size_t lut_elm_size(const struct lightrec_state *state) +{ + return lut_is_32bit(state) ? 4 : sizeof(void *); +} + +static inline void ** lut_address(struct lightrec_state *state, u32 offset) +{ + if (lut_is_32bit(state)) + return (void **) ((uintptr_t) state->code_lut + offset * 4); + else + return &state->code_lut[offset]; +} + +static inline void * lut_read(struct lightrec_state *state, u32 offset) +{ + void **lut_entry = lut_address(state, lut_offset(offset)); + + if (lut_is_32bit(state)) + return (void *)(uintptr_t) *(u32 *) lut_entry; + else + return *lut_entry; +} + +static inline void lut_write(struct lightrec_state *state, u32 offset, void *ptr) +{ + void **lut_entry = lut_address(state, offset); + + if (lut_is_32bit(state)) + *(u32 *) lut_entry = (u32)(uintptr_t) ptr; + else + *lut_entry = ptr; +} + static inline u32 get_ds_pc(const struct block *block, u16 offset, s16 imm) { u16 flags = block->opcode_list[offset].flags; diff --git a/deps/lightrec/lightrec.c b/deps/lightrec/lightrec.c index 9889272a..d172a30a 100644 --- a/deps/lightrec/lightrec.c +++ b/deps/lightrec/lightrec.c @@ -16,6 +16,7 @@ #include "recompiler.h" #include "regcache.h" #include "optimizer.h" +#include "tlsf/tlsf.h" #include #include @@ -198,30 +199,39 @@ static void lightrec_invalidate_map(struct lightrec_state *state, const struct lightrec_mem_map *map, u32 addr, u32 len) { if (map == &state->maps[PSX_MAP_KERNEL_USER_RAM]) { - memset(&state->code_lut[lut_offset(addr)], 0, - ((len + 3) / 4) * sizeof(void *)); + memset(lut_address(state, lut_offset(addr)), 0, + ((len + 3) / 4) * lut_elm_size(state)); } } -const struct lightrec_mem_map * -lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr) +enum psx_map +lightrec_get_map_idx(struct lightrec_state *state, u32 kaddr) { const struct lightrec_mem_map *map; unsigned int i; - u32 addr; for (i = 0; i < state->nb_maps; i++) { - const struct lightrec_mem_map *mapi = &state->maps[i]; + map = &state->maps[i]; - if (kaddr >= mapi->pc && kaddr < mapi->pc + mapi->length) { - map = mapi; - break; - } + if (kaddr >= map->pc && kaddr < map->pc + map->length) + return (enum psx_map) i; } - if (i == state->nb_maps) + return PSX_MAP_UNKNOWN; +} + +const struct lightrec_mem_map * +lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr) +{ + const struct lightrec_mem_map *map; + enum psx_map idx; + u32 addr; + + idx = lightrec_get_map_idx(state, kaddr); + if (idx == PSX_MAP_UNKNOWN) return NULL; + map = &state->maps[idx]; addr = kaddr - map->pc; while (map->mirror_of) @@ -615,7 +625,7 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) void *func; for (;;) { - func = state->code_lut[lut_offset(pc)]; + func = lut_read(state, pc); if (func && func != state->get_next_block) break; @@ -686,13 +696,51 @@ static s32 c_function_wrapper(struct lightrec_state *state, s32 cycles_delta, return state->target_cycle - state->current_cycle; } +static void * lightrec_emit_code(struct lightrec_state *state, + jit_state_t *_jit, unsigned int *size) +{ + bool has_code_buffer = ENABLE_CODE_BUFFER && state->tlsf; + jit_word_t code_size, new_code_size; + void *code; + + jit_realize(); + + if (!ENABLE_DISASSEMBLER) + jit_set_data(NULL, 0, JIT_DISABLE_DATA | JIT_DISABLE_NOTE); + + if (has_code_buffer) { + jit_get_code(&code_size); + code = tlsf_malloc(state->tlsf, (size_t) code_size); + if (!code) + return NULL; + + jit_set_code(code, code_size); + } + + code = jit_emit(); + + jit_get_code(&new_code_size); + lightrec_register(MEM_FOR_CODE, new_code_size); + + if (has_code_buffer) { + tlsf_realloc(state->tlsf, code, new_code_size); + + pr_debug("Creating code block at address 0x%" PRIxPTR ", " + "code size: %" PRIuPTR " new: %" PRIuPTR "\n", + (uintptr_t) code, code_size, new_code_size); + } + + *size = (unsigned int) new_code_size; + + return code; +} + static struct block * generate_wrapper(struct lightrec_state *state) { struct block *block; jit_state_t *_jit; unsigned int i; int stack_ptr; - jit_word_t code_size; jit_node_t *to_tramp, *to_fn_epilog; jit_node_t *addr[C_WRAPPERS_COUNT - 1]; @@ -767,21 +815,20 @@ static struct block * generate_wrapper(struct lightrec_state *state) jit_epilog(); block->_jit = _jit; - block->function = jit_emit(); block->opcode_list = NULL; block->flags = 0; block->nb_ops = 0; + block->function = lightrec_emit_code(state, _jit, + &block->code_size); + if (!block->function) + goto err_free_block; + state->wrappers_eps[C_WRAPPERS_COUNT - 1] = block->function; for (i = 0; i < C_WRAPPERS_COUNT - 1; i++) state->wrappers_eps[i] = jit_address(addr[i]); - jit_get_code(&code_size); - lightrec_register(MEM_FOR_CODE, code_size); - - block->code_size = code_size; - if (ENABLE_DISASSEMBLER) { pr_debug("Wrapper block:\n"); jit_disassemble(); @@ -825,10 +872,9 @@ static struct block * generate_dispatcher(struct lightrec_state *state) { struct block *block; jit_state_t *_jit; - jit_node_t *to_end, *to_c, *loop, *addr, *addr2, *addr3; + jit_node_t *to_end, *loop, *addr, *addr2, *addr3; unsigned int i; - u32 offset, ram_len; - jit_word_t code_size; + u32 offset; block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block)); if (!block) @@ -888,21 +934,27 @@ static struct block * generate_dispatcher(struct lightrec_state *state) to_end = jit_blei(LIGHTREC_REG_CYCLE, 0); /* Convert next PC to KUNSEG and avoid mirrors */ - ram_len = state->maps[PSX_MAP_KERNEL_USER_RAM].length; - jit_andi(JIT_R0, JIT_V0, 0x10000000 | (ram_len - 1)); - to_c = jit_bgei(JIT_R0, ram_len); - - /* Fast path: code is running from RAM, use the code LUT */ - if (__WORDSIZE == 64) + jit_andi(JIT_R0, JIT_V0, 0x10000000 | (RAM_SIZE - 1)); + jit_rshi_u(JIT_R1, JIT_R0, 28); + jit_andi(JIT_R2, JIT_V0, BIOS_SIZE - 1); + jit_addi(JIT_R2, JIT_R2, RAM_SIZE); + jit_movnr(JIT_R0, JIT_R2, JIT_R1); + + /* If possible, use the code LUT */ + if (!lut_is_32bit(state)) jit_lshi(JIT_R0, JIT_R0, 1); jit_addr(JIT_R0, JIT_R0, LIGHTREC_REG_STATE); - jit_ldxi(JIT_R0, JIT_R0, offsetof(struct lightrec_state, code_lut)); + + offset = offsetof(struct lightrec_state, code_lut); + if (lut_is_32bit(state)) + jit_ldxi_ui(JIT_R0, JIT_R0, offset); + else + jit_ldxi(JIT_R0, JIT_R0, offset); /* If we get non-NULL, loop */ jit_patch_at(jit_bnei(JIT_R0, 0), loop); /* Slow path: call C function get_next_block_func() */ - jit_patch(to_c); if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) { /* We may call the interpreter - update state->current_cycle */ @@ -946,15 +998,14 @@ static struct block * generate_dispatcher(struct lightrec_state *state) jit_epilog(); block->_jit = _jit; - block->function = jit_emit(); block->opcode_list = NULL; block->flags = 0; block->nb_ops = 0; - jit_get_code(&code_size); - lightrec_register(MEM_FOR_CODE, code_size); - - block->code_size = code_size; + block->function = lightrec_emit_code(state, _jit, + &block->code_size); + if (!block->function) + goto err_free_block; state->eob_wrapper_func = jit_address(addr2); if (OPT_REPLACE_MEMSET) @@ -984,7 +1035,7 @@ union code lightrec_read_opcode(struct lightrec_state *state, u32 pc) lightrec_get_map(state, &host, kunseg(pc)); const u32 *code = (u32 *)host; - return (union code) *code; + return (union code) LE32TOH(*code); } unsigned int lightrec_cycles_of_opcode(union code code) @@ -1101,7 +1152,7 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state, block->flags |= BLOCK_FULLY_TAGGED; if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET)) - state->code_lut[lut_offset(pc)] = state->memset_func; + lut_write(state, lut_offset(pc), state->memset_func); block->hash = lightrec_calculate_block_hash(block); @@ -1160,6 +1211,19 @@ static void lightrec_reap_jit(struct lightrec_state *state, void *data) _jit_destroy_state(data); } +static void lightrec_free_function(struct lightrec_state *state, void *fn) +{ + if (ENABLE_CODE_BUFFER && state->tlsf) { + pr_debug("Freeing code block at 0x%" PRIxPTR "\n", (uintptr_t) fn); + tlsf_free(state->tlsf, fn); + } +} + +static void lightrec_reap_function(struct lightrec_state *state, void *data) +{ + lightrec_free_function(state, data); +} + int lightrec_compile_block(struct lightrec_cstate *cstate, struct block *block) { @@ -1171,7 +1235,7 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, jit_state_t *_jit, *oldjit; jit_node_t *start_of_block; bool skip_next = false; - jit_word_t code_size; + void *old_fn; unsigned int i, j; u32 offset; @@ -1184,6 +1248,7 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, return -ENOMEM; oldjit = block->_jit; + old_fn = block->function; block->_jit = _jit; lightrec_regcache_reset(cstate->reg_cache); @@ -1261,11 +1326,16 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, jit_ret(); jit_epilog(); - block->function = jit_emit(); + block->function = lightrec_emit_code(state, _jit, + &block->code_size); + if (!block->function) { + pr_err("Unable to compile block!\n"); + } + block->flags &= ~BLOCK_SHOULD_RECOMPILE; /* Add compiled function to the LUT */ - state->code_lut[lut_offset(block->pc)] = block->function; + lut_write(state, lut_offset(block->pc), block->function); if (ENABLE_THREADED_COMPILER) { /* Since we might try to reap the same block multiple times, @@ -1302,7 +1372,7 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, * be compiled. We can override the LUT entry with our new * block's entry point. */ offset = lut_offset(block->pc) + target->offset; - state->code_lut[offset] = jit_address(target->label); + lut_write(state, offset, jit_address(target->label)); if (block2) { pr_debug("Reap block 0x%08x as it's covered by block " @@ -1323,11 +1393,6 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, if (ENABLE_THREADED_COMPILER) lightrec_reaper_continue(state->reaper); - jit_get_code(&code_size); - lightrec_register(MEM_FOR_CODE, code_size); - - block->code_size = code_size; - if (ENABLE_DISASSEMBLER) { pr_debug("Compiling block at PC: 0x%08x\n", block->pc); jit_disassemble(); @@ -1350,11 +1415,15 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, pr_debug("Block 0x%08x recompiled, reaping old jit context.\n", block->pc); - if (ENABLE_THREADED_COMPILER) + if (ENABLE_THREADED_COMPILER) { lightrec_reaper_add(state->reaper, lightrec_reap_jit, oldjit); - else + lightrec_reaper_add(state->reaper, + lightrec_reap_function, old_fn); + } else { _jit_destroy_state(oldjit); + lightrec_free_function(state, old_fn); + } } return 0; @@ -1435,6 +1504,7 @@ void lightrec_free_block(struct lightrec_state *state, struct block *block) lightrec_free_opcode_list(state, block); if (block->_jit) _jit_destroy_state(block->_jit); + lightrec_free_function(state, block->function); lightrec_unregister(MEM_FOR_CODE, block->code_size); lightrec_free(state, MEM_FOR_IR, sizeof(*block), block); } @@ -1469,7 +1539,12 @@ struct lightrec_state * lightrec_init(char *argv0, size_t nb, const struct lightrec_ops *ops) { + const struct lightrec_mem_map *codebuf_map; struct lightrec_state *state; + uintptr_t addr; + void *tlsf = NULL; + bool with_32bit_lut = false; + size_t lut_size; /* Sanity-check ops */ if (!ops || !ops->cop2_op || !ops->enable_ram) { @@ -1477,15 +1552,37 @@ struct lightrec_state * lightrec_init(char *argv0, return NULL; } + if (ENABLE_CODE_BUFFER && nb > PSX_MAP_CODE_BUFFER) { + codebuf_map = &map[PSX_MAP_CODE_BUFFER]; + + tlsf = tlsf_create_with_pool(codebuf_map->address, + codebuf_map->length); + if (!tlsf) { + pr_err("Unable to initialize code buffer\n"); + return NULL; + } + + if (__WORDSIZE == 64) { + addr = (uintptr_t) codebuf_map->address + codebuf_map->length - 1; + with_32bit_lut = addr == (u32) addr; + } + } + + if (with_32bit_lut) + lut_size = CODE_LUT_SIZE * 4; + else + lut_size = CODE_LUT_SIZE * sizeof(void *); + init_jit(argv0); - state = calloc(1, sizeof(*state) + - sizeof(*state->code_lut) * CODE_LUT_SIZE); + state = calloc(1, sizeof(*state) + lut_size); if (!state) goto err_finish_jit; - lightrec_register(MEM_FOR_LIGHTREC, sizeof(*state) + - sizeof(*state->code_lut) * CODE_LUT_SIZE); + lightrec_register(MEM_FOR_LIGHTREC, sizeof(*state) + lut_size); + + state->tlsf = tlsf; + state->with_32bit_lut = with_32bit_lut; #if ENABLE_TINYMM state->tinymm = tinymm_init(malloc, free, 4096); @@ -1554,6 +1651,9 @@ struct lightrec_state * lightrec_init(char *argv0, pr_info("Memory map is sub-par. Emitted code will be slow.\n"); } + if (state->with_32bit_lut) + pr_info("Using 32-bit LUT\n"); + return state; err_free_dispatcher: @@ -1574,10 +1674,12 @@ err_free_tinymm: err_free_state: #endif lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) + - sizeof(*state->code_lut) * CODE_LUT_SIZE); + lut_elm_size(state) * CODE_LUT_SIZE); free(state); err_finish_jit: finish_jit(); + if (ENABLE_CODE_BUFFER && tlsf) + tlsf_destroy(tlsf); return NULL; } @@ -1598,12 +1700,14 @@ void lightrec_destroy(struct lightrec_state *state) lightrec_free_block(state, state->dispatcher); lightrec_free_block(state, state->c_wrapper_block); finish_jit(); + if (ENABLE_CODE_BUFFER && state->tlsf) + tlsf_destroy(state->tlsf); #if ENABLE_TINYMM tinymm_shutdown(state->tinymm); #endif lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) + - sizeof(*state->code_lut) * CODE_LUT_SIZE); + lut_elm_size(state) * CODE_LUT_SIZE); free(state); } @@ -1625,7 +1729,7 @@ void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len) void lightrec_invalidate_all(struct lightrec_state *state) { - memset(state->code_lut, 0, sizeof(*state->code_lut) * CODE_LUT_SIZE); + memset(state->code_lut, 0, lut_elm_size(state) * CODE_LUT_SIZE); } void lightrec_set_invalidate_mode(struct lightrec_state *state, bool dma_only) diff --git a/deps/lightrec/lightrec.h b/deps/lightrec/lightrec.h index e418c706..4f51e1f6 100644 --- a/deps/lightrec/lightrec.h +++ b/deps/lightrec/lightrec.h @@ -58,6 +58,9 @@ enum psx_map { PSX_MAP_MIRROR1, PSX_MAP_MIRROR2, PSX_MAP_MIRROR3, + PSX_MAP_CODE_BUFFER, + + PSX_MAP_UNKNOWN, }; struct lightrec_mem_map_ops { diff --git a/deps/lightrec/optimizer.c b/deps/lightrec/optimizer.c index 7304abca..562f7e00 100644 --- a/deps/lightrec/optimizer.c +++ b/deps/lightrec/optimizer.c @@ -817,6 +817,66 @@ static void lightrec_optimize_sll_sra(struct opcode *list, unsigned int offset) to_nop->opcode = 0; } +static void lightrec_remove_useless_lui(struct block *block, unsigned int offset, + u32 known, u32 *values) +{ + struct opcode *list = block->opcode_list, + *op = &block->opcode_list[offset]; + int reader; + + if (!(op->flags & LIGHTREC_SYNC) && (known & BIT(op->i.rt)) && + values[op->i.rt] == op->i.imm << 16) { + pr_debug("Converting duplicated LUI to NOP\n"); + op->opcode = 0x0; + return; + } + + if (op->i.imm != 0 || op->i.rt == 0) + return; + + reader = find_next_reader(list, offset + 1, op->i.rt); + if (reader <= 0) + return; + + if (opcode_writes_register(list[reader].c, op->i.rt) || + reg_is_dead(list, reader, op->i.rt)) { + pr_debug("Removing useless LUI 0x0\n"); + + if (list[reader].i.rs == op->i.rt) + list[reader].i.rs = 0; + if (list[reader].i.op == OP_SPECIAL && + list[reader].i.rt == op->i.rt) + list[reader].i.rt = 0; + op->opcode = 0x0; + } +} + +static void lightrec_modify_lui(struct block *block, unsigned int offset) +{ + union code c, *lui = &block->opcode_list[offset].c; + bool stop = false, stop_next = false; + unsigned int i; + + for (i = offset + 1; !stop && i < block->nb_ops; i++) { + c = block->opcode_list[i].c; + stop = stop_next; + + if ((opcode_is_store(c) && c.i.rt == lui->i.rt) + || (!opcode_is_load(c) && opcode_reads_register(c, lui->i.rt))) + break; + + if (opcode_writes_register(c, lui->i.rt)) { + pr_debug("Convert LUI at offset 0x%x to kuseg\n", + i - 1 << 2); + lui->i.imm = kunseg(lui->i.imm << 16) >> 16; + break; + } + + if (has_delay_slot(c)) + stop_next = true; + } +} + static int lightrec_transform_ops(struct lightrec_state *state, struct block *block) { struct opcode *list = block->opcode_list; @@ -824,7 +884,6 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl u32 known = BIT(0); u32 values[32] = { 0 }; unsigned int i; - int reader; for (i = 0; i < block->nb_ops; i++) { prev = op; @@ -863,30 +922,8 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl break; case OP_LUI: - if (!(op->flags & LIGHTREC_SYNC) && - (known & BIT(op->i.rt)) && - values[op->i.rt] == op->i.imm << 16) { - pr_debug("Converting duplicated LUI to NOP\n"); - op->opcode = 0x0; - } - - if (op->i.imm != 0 || op->i.rt == 0) - break; - - reader = find_next_reader(list, i + 1, op->i.rt); - if (reader > 0 && - (opcode_writes_register(list[reader].c, op->i.rt) || - reg_is_dead(list, reader, op->i.rt))) { - - pr_debug("Removing useless LUI 0x0\n"); - - if (list[reader].i.rs == op->i.rt) - list[reader].i.rs = 0; - if (list[reader].i.op == OP_SPECIAL && - list[reader].i.rt == op->i.rt) - list[reader].i.rt = 0; - op->opcode = 0x0; - } + lightrec_modify_lui(block, i); + lightrec_remove_useless_lui(block, i, known, values); break; /* Transform ORI/ADDI/ADDIU with imm #0 or ORR/ADD/ADDU/SUB/SUBU @@ -1233,15 +1270,14 @@ static int lightrec_early_unload(struct lightrec_state *state, struct block *blo static int lightrec_flag_io(struct lightrec_state *state, struct block *block) { - const struct lightrec_mem_map *map; - struct opcode *prev2, *prev = NULL, *list = NULL; + struct opcode *prev = NULL, *list = NULL; + enum psx_map psx_map; u32 known = BIT(0); u32 values[32] = { 0 }; unsigned int i; - u32 val; + u32 val, kunseg_val; for (i = 0; i < block->nb_ops; i++) { - prev2 = prev; prev = list; list = &block->opcode_list[i]; @@ -1289,42 +1325,38 @@ static int lightrec_flag_io(struct lightrec_state *state, struct block *block) case OP_LWR: case OP_LWC2: if (OPT_FLAG_IO && (known & BIT(list->i.rs))) { - if (prev && prev->i.op == OP_LUI && - !(prev2 && has_delay_slot(prev2->c)) && - prev->i.rt == list->i.rs && - list->i.rt == list->i.rs && - prev->i.imm & 0x8000) { - pr_debug("Convert LUI at offset 0x%x to kuseg\n", - i - 1 << 2); - - val = kunseg(prev->i.imm << 16); - prev->i.imm = val >> 16; - values[list->i.rs] = val; - } - val = values[list->i.rs] + (s16) list->i.imm; - map = lightrec_get_map(state, NULL, kunseg(val)); - - if (!map || map->ops || - map == &state->maps[PSX_MAP_PARALLEL_PORT]) { - pr_debug("Flagging opcode %u as I/O access\n", - i); - list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW); - break; - } - - if (val - map->pc < map->length) - list->flags |= LIGHTREC_NO_MASK; - - if (map == &state->maps[PSX_MAP_KERNEL_USER_RAM]) { + kunseg_val = kunseg(val); + psx_map = lightrec_get_map_idx(state, kunseg_val); + + switch (psx_map) { + case PSX_MAP_KERNEL_USER_RAM: + if (val == kunseg_val) + list->flags |= LIGHTREC_NO_MASK; + /* fall-through */ + case PSX_MAP_MIRROR1: + case PSX_MAP_MIRROR2: + case PSX_MAP_MIRROR3: pr_debug("Flaging opcode %u as RAM access\n", i); list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_RAM); - } else if (map == &state->maps[PSX_MAP_BIOS]) { + break; + case PSX_MAP_BIOS: pr_debug("Flaging opcode %u as BIOS access\n", i); list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_BIOS); - } else if (map == &state->maps[PSX_MAP_SCRATCH_PAD]) { + break; + case PSX_MAP_SCRATCH_PAD: pr_debug("Flaging opcode %u as scratchpad access\n", i); list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_SCRATCH); + + /* Consider that we're never going to run code from + * the scratchpad. */ + list->flags |= LIGHTREC_NO_INVALIDATE; + break; + default: + pr_debug("Flagging opcode %u as I/O access\n", + i); + list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW); + break; } } default: /* fall-through */ diff --git a/deps/lightrec/tlsf/.gitrepo b/deps/lightrec/tlsf/.gitrepo new file mode 100644 index 00000000..692e5425 --- /dev/null +++ b/deps/lightrec/tlsf/.gitrepo @@ -0,0 +1,12 @@ +; DO NOT EDIT (unless you know what you are doing) +; +; This subdirectory is a git "subrepo", and this file is maintained by the +; git-subrepo command. See https://github.com/git-commands/git-subrepo#readme +; +[subrepo] + remote = https://github.com/mattconte/tlsf + branch = master + commit = deff9ab509341f264addbd3c8ada533678591905 + parent = 1dc0344052e7379e16753e4a285c30fd158bf78d + method = merge + cmdver = 0.4.3 diff --git a/deps/lightrec/tlsf/README.md b/deps/lightrec/tlsf/README.md new file mode 100644 index 00000000..982919fc --- /dev/null +++ b/deps/lightrec/tlsf/README.md @@ -0,0 +1,92 @@ +# tlsf +Two-Level Segregated Fit memory allocator implementation. +Written by Matthew Conte (matt@baisoku.org). +Released under the BSD license. + +Features +-------- + * O(1) cost for malloc, free, realloc, memalign + * Extremely low overhead per allocation (4 bytes) + * Low overhead per TLSF management of pools (~3kB) + * Low fragmentation + * Compiles to only a few kB of code and data + * Support for adding and removing memory pool regions on the fly + +Caveats +------- + * Currently, assumes architecture can make 4-byte aligned accesses + * Not designed to be thread safe; the user must provide this + +Notes +----- +This code was based on the TLSF 1.4 spec and documentation found at: + + http://www.gii.upv.es/tlsf/main/docs + +It also leverages the TLSF 2.0 improvement to shrink the per-block overhead from 8 to 4 bytes. + +History +------- +2016/04/10 - v3.1 + * Code moved to github + * tlsfbits.h rolled into tlsf.c + * License changed to BSD + +2014/02/08 - v3.0 + * This version is based on improvements from 3DInteractive GmbH + * Interface changed to allow more than one memory pool + * Separated pool handling from control structure (adding, removing, debugging) + * Control structure and pools can still be constructed in the same memory block + * Memory blocks for control structure and pools are checked for alignment + * Added functions to retrieve control structure size, alignment size, min and max block size, overhead of pool structure, and overhead of a single allocation + * Minimal Pool size is tlsf_block_size_min() + tlsf_pool_overhead() + * Pool must be empty when it is removed, in order to allow O(1) removal + +2011/10/20 - v2.0 + * 64-bit support + * More compiler intrinsics for ffs/fls + * ffs/fls verification during TLSF creation in debug builds + +2008/04/04 - v1.9 + * Add tlsf_heap_check, a heap integrity check + * Support a predefined tlsf_assert macro + * Fix realloc case where block should shrink; if adjacent block is in use, execution would go down the slow path + +2007/02/08 - v1.8 + * Fix for unnecessary reallocation in tlsf_realloc + +2007/02/03 - v1.7 + * tlsf_heap_walk takes a callback + * tlsf_realloc now returns NULL on failure + * tlsf_memalign optimization for 4-byte alignment + * Usage of size_t where appropriate + +2006/11/21 - v1.6 + * ffs/fls broken out into tlsfbits.h + * tlsf_overhead queries per-pool overhead + +2006/11/07 - v1.5 + * Smart realloc implementation + * Smart memalign implementation + +2006/10/11 - v1.4 + * Add some ffs/fls implementations + * Minor code footprint reduction + +2006/09/14 - v1.3 + * Profiling indicates heavy use of blocks of size 1-128, so implement small block handling + * Reduce pool overhead by about 1kb + * Reduce minimum block size from 32 to 12 bytes + * Realloc bug fix + +2006/09/09 - v1.2 + * Add tlsf_block_size + * Static assertion mechanism for invariants + * Minor bugfixes + +2006/09/01 - v1.1 + * Add tlsf_realloc + * Add tlsf_walk_heap + +2006/08/25 - v1.0 + * First release diff --git a/deps/lightrec/tlsf/tlsf.c b/deps/lightrec/tlsf/tlsf.c new file mode 100644 index 00000000..af575737 --- /dev/null +++ b/deps/lightrec/tlsf/tlsf.c @@ -0,0 +1,1264 @@ +#include +#include +#include +#include +#include +#include + +#include "tlsf.h" + +#if defined(__cplusplus) +#define tlsf_decl inline +#else +#define tlsf_decl static +#endif + +/* +** Architecture-specific bit manipulation routines. +** +** TLSF achieves O(1) cost for malloc and free operations by limiting +** the search for a free block to a free list of guaranteed size +** adequate to fulfill the request, combined with efficient free list +** queries using bitmasks and architecture-specific bit-manipulation +** routines. +** +** Most modern processors provide instructions to count leading zeroes +** in a word, find the lowest and highest set bit, etc. These +** specific implementations will be used when available, falling back +** to a reasonably efficient generic implementation. +** +** NOTE: TLSF spec relies on ffs/fls returning value 0..31. +** ffs/fls return 1-32 by default, returning 0 for error. +*/ + +/* +** Detect whether or not we are building for a 32- or 64-bit (LP/LLP) +** architecture. There is no reliable portable method at compile-time. +*/ +#if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) \ + || defined (_WIN64) || defined (__LP64__) || defined (__LLP64__) +#define TLSF_64BIT +#endif + +/* +** gcc 3.4 and above have builtin support, specialized for architecture. +** Some compilers masquerade as gcc; patchlevel test filters them out. +*/ +#if defined (__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) \ + && defined (__GNUC_PATCHLEVEL__) + +#if defined (__SNC__) +/* SNC for Playstation 3. */ + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + const unsigned int reverse = word & (~word + 1); + const int bit = 32 - __builtin_clz(reverse); + return bit - 1; +} + +#else + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + return __builtin_ffs(word) - 1; +} + +#endif + +tlsf_decl int tlsf_fls(unsigned int word) +{ + const int bit = word ? 32 - __builtin_clz(word) : 0; + return bit - 1; +} + +#elif defined (_MSC_VER) && (_MSC_VER >= 1400) && (defined (_M_IX86) || defined (_M_X64)) +/* Microsoft Visual C++ support on x86/X64 architectures. */ + +#include + +#pragma intrinsic(_BitScanReverse) +#pragma intrinsic(_BitScanForward) + +tlsf_decl int tlsf_fls(unsigned int word) +{ + unsigned long index; + return _BitScanReverse(&index, word) ? index : -1; +} + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + unsigned long index; + return _BitScanForward(&index, word) ? index : -1; +} + +#elif defined (_MSC_VER) && defined (_M_PPC) +/* Microsoft Visual C++ support on PowerPC architectures. */ + +#include + +tlsf_decl int tlsf_fls(unsigned int word) +{ + const int bit = 32 - _CountLeadingZeros(word); + return bit - 1; +} + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + const unsigned int reverse = word & (~word + 1); + const int bit = 32 - _CountLeadingZeros(reverse); + return bit - 1; +} + +#elif defined (__ARMCC_VERSION) +/* RealView Compilation Tools for ARM */ + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + const unsigned int reverse = word & (~word + 1); + const int bit = 32 - __clz(reverse); + return bit - 1; +} + +tlsf_decl int tlsf_fls(unsigned int word) +{ + const int bit = word ? 32 - __clz(word) : 0; + return bit - 1; +} + +#elif defined (__ghs__) +/* Green Hills support for PowerPC */ + +#include + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + const unsigned int reverse = word & (~word + 1); + const int bit = 32 - __CLZ32(reverse); + return bit - 1; +} + +tlsf_decl int tlsf_fls(unsigned int word) +{ + const int bit = word ? 32 - __CLZ32(word) : 0; + return bit - 1; +} + +#else +/* Fall back to generic implementation. */ + +tlsf_decl int tlsf_fls_generic(unsigned int word) +{ + int bit = 32; + + if (!word) bit -= 1; + if (!(word & 0xffff0000)) { word <<= 16; bit -= 16; } + if (!(word & 0xff000000)) { word <<= 8; bit -= 8; } + if (!(word & 0xf0000000)) { word <<= 4; bit -= 4; } + if (!(word & 0xc0000000)) { word <<= 2; bit -= 2; } + if (!(word & 0x80000000)) { word <<= 1; bit -= 1; } + + return bit; +} + +/* Implement ffs in terms of fls. */ +tlsf_decl int tlsf_ffs(unsigned int word) +{ + return tlsf_fls_generic(word & (~word + 1)) - 1; +} + +tlsf_decl int tlsf_fls(unsigned int word) +{ + return tlsf_fls_generic(word) - 1; +} + +#endif + +/* Possibly 64-bit version of tlsf_fls. */ +#if defined (TLSF_64BIT) +tlsf_decl int tlsf_fls_sizet(size_t size) +{ + int high = (int)(size >> 32); + int bits = 0; + if (high) + { + bits = 32 + tlsf_fls(high); + } + else + { + bits = tlsf_fls((int)size & 0xffffffff); + + } + return bits; +} +#else +#define tlsf_fls_sizet tlsf_fls +#endif + +#undef tlsf_decl + +/* +** Constants. +*/ + +/* Public constants: may be modified. */ +enum tlsf_public +{ + /* log2 of number of linear subdivisions of block sizes. Larger + ** values require more memory in the control structure. Values of + ** 4 or 5 are typical. + */ + SL_INDEX_COUNT_LOG2 = 5, +}; + +/* Private constants: do not modify. */ +enum tlsf_private +{ +#if defined (TLSF_64BIT) + /* All allocation sizes and addresses are aligned to 8 bytes. */ + ALIGN_SIZE_LOG2 = 3, +#else + /* All allocation sizes and addresses are aligned to 4 bytes. */ + ALIGN_SIZE_LOG2 = 2, +#endif + ALIGN_SIZE = (1 << ALIGN_SIZE_LOG2), + + /* + ** We support allocations of sizes up to (1 << FL_INDEX_MAX) bits. + ** However, because we linearly subdivide the second-level lists, and + ** our minimum size granularity is 4 bytes, it doesn't make sense to + ** create first-level lists for sizes smaller than SL_INDEX_COUNT * 4, + ** or (1 << (SL_INDEX_COUNT_LOG2 + 2)) bytes, as there we will be + ** trying to split size ranges into more slots than we have available. + ** Instead, we calculate the minimum threshold size, and place all + ** blocks below that size into the 0th first-level list. + */ + +#if defined (TLSF_64BIT) + /* + ** TODO: We can increase this to support larger sizes, at the expense + ** of more overhead in the TLSF structure. + */ + FL_INDEX_MAX = 32, +#else + FL_INDEX_MAX = 30, +#endif + SL_INDEX_COUNT = (1 << SL_INDEX_COUNT_LOG2), + FL_INDEX_SHIFT = (SL_INDEX_COUNT_LOG2 + ALIGN_SIZE_LOG2), + FL_INDEX_COUNT = (FL_INDEX_MAX - FL_INDEX_SHIFT + 1), + + SMALL_BLOCK_SIZE = (1 << FL_INDEX_SHIFT), +}; + +/* +** Cast and min/max macros. +*/ + +#define tlsf_cast(t, exp) ((t) (exp)) +#define tlsf_min(a, b) ((a) < (b) ? (a) : (b)) +#define tlsf_max(a, b) ((a) > (b) ? (a) : (b)) + +/* +** Set assert macro, if it has not been provided by the user. +*/ +#if !defined (tlsf_assert) +#define tlsf_assert assert +#endif + +/* +** Static assertion mechanism. +*/ + +#define _tlsf_glue2(x, y) x ## y +#define _tlsf_glue(x, y) _tlsf_glue2(x, y) +#define tlsf_static_assert(exp) \ + typedef char _tlsf_glue(static_assert, __LINE__) [(exp) ? 1 : -1] + +/* This code has been tested on 32- and 64-bit (LP/LLP) architectures. */ +tlsf_static_assert(sizeof(int) * CHAR_BIT == 32); +tlsf_static_assert(sizeof(size_t) * CHAR_BIT >= 32); +tlsf_static_assert(sizeof(size_t) * CHAR_BIT <= 64); + +/* SL_INDEX_COUNT must be <= number of bits in sl_bitmap's storage type. */ +tlsf_static_assert(sizeof(unsigned int) * CHAR_BIT >= SL_INDEX_COUNT); + +/* Ensure we've properly tuned our sizes. */ +tlsf_static_assert(ALIGN_SIZE == SMALL_BLOCK_SIZE / SL_INDEX_COUNT); + +/* +** Data structures and associated constants. +*/ + +/* +** Block header structure. +** +** There are several implementation subtleties involved: +** - The prev_phys_block field is only valid if the previous block is free. +** - The prev_phys_block field is actually stored at the end of the +** previous block. It appears at the beginning of this structure only to +** simplify the implementation. +** - The next_free / prev_free fields are only valid if the block is free. +*/ +typedef struct block_header_t +{ + /* Points to the previous physical block. */ + struct block_header_t* prev_phys_block; + + /* The size of this block, excluding the block header. */ + size_t size; + + /* Next and previous free blocks. */ + struct block_header_t* next_free; + struct block_header_t* prev_free; +} block_header_t; + +/* +** Since block sizes are always at least a multiple of 4, the two least +** significant bits of the size field are used to store the block status: +** - bit 0: whether block is busy or free +** - bit 1: whether previous block is busy or free +*/ +static const size_t block_header_free_bit = 1 << 0; +static const size_t block_header_prev_free_bit = 1 << 1; + +/* +** The size of the block header exposed to used blocks is the size field. +** The prev_phys_block field is stored *inside* the previous free block. +*/ +static const size_t block_header_overhead = sizeof(size_t); + +/* User data starts directly after the size field in a used block. */ +static const size_t block_start_offset = + offsetof(block_header_t, size) + sizeof(size_t); + +/* +** A free block must be large enough to store its header minus the size of +** the prev_phys_block field, and no larger than the number of addressable +** bits for FL_INDEX. +*/ +static const size_t block_size_min = + sizeof(block_header_t) - sizeof(block_header_t*); +static const size_t block_size_max = tlsf_cast(size_t, 1) << FL_INDEX_MAX; + + +/* The TLSF control structure. */ +typedef struct control_t +{ + /* Empty lists point at this block to indicate they are free. */ + block_header_t block_null; + + /* Bitmaps for free lists. */ + unsigned int fl_bitmap; + unsigned int sl_bitmap[FL_INDEX_COUNT]; + + /* Head of free lists. */ + block_header_t* blocks[FL_INDEX_COUNT][SL_INDEX_COUNT]; +} control_t; + +/* A type used for casting when doing pointer arithmetic. */ +typedef ptrdiff_t tlsfptr_t; + +/* +** block_header_t member functions. +*/ + +static size_t block_size(const block_header_t* block) +{ + return block->size & ~(block_header_free_bit | block_header_prev_free_bit); +} + +static void block_set_size(block_header_t* block, size_t size) +{ + const size_t oldsize = block->size; + block->size = size | (oldsize & (block_header_free_bit | block_header_prev_free_bit)); +} + +static int block_is_last(const block_header_t* block) +{ + return block_size(block) == 0; +} + +static int block_is_free(const block_header_t* block) +{ + return tlsf_cast(int, block->size & block_header_free_bit); +} + +static void block_set_free(block_header_t* block) +{ + block->size |= block_header_free_bit; +} + +static void block_set_used(block_header_t* block) +{ + block->size &= ~block_header_free_bit; +} + +static int block_is_prev_free(const block_header_t* block) +{ + return tlsf_cast(int, block->size & block_header_prev_free_bit); +} + +static void block_set_prev_free(block_header_t* block) +{ + block->size |= block_header_prev_free_bit; +} + +static void block_set_prev_used(block_header_t* block) +{ + block->size &= ~block_header_prev_free_bit; +} + +static block_header_t* block_from_ptr(const void* ptr) +{ + return tlsf_cast(block_header_t*, + tlsf_cast(unsigned char*, ptr) - block_start_offset); +} + +static void* block_to_ptr(const block_header_t* block) +{ + return tlsf_cast(void*, + tlsf_cast(unsigned char*, block) + block_start_offset); +} + +/* Return location of next block after block of given size. */ +static block_header_t* offset_to_block(const void* ptr, size_t size) +{ + return tlsf_cast(block_header_t*, tlsf_cast(tlsfptr_t, ptr) + size); +} + +/* Return location of previous block. */ +static block_header_t* block_prev(const block_header_t* block) +{ + tlsf_assert(block_is_prev_free(block) && "previous block must be free"); + return block->prev_phys_block; +} + +/* Return location of next existing block. */ +static block_header_t* block_next(const block_header_t* block) +{ + block_header_t* next = offset_to_block(block_to_ptr(block), + block_size(block) - block_header_overhead); + tlsf_assert(!block_is_last(block)); + return next; +} + +/* Link a new block with its physical neighbor, return the neighbor. */ +static block_header_t* block_link_next(block_header_t* block) +{ + block_header_t* next = block_next(block); + next->prev_phys_block = block; + return next; +} + +static void block_mark_as_free(block_header_t* block) +{ + /* Link the block to the next block, first. */ + block_header_t* next = block_link_next(block); + block_set_prev_free(next); + block_set_free(block); +} + +static void block_mark_as_used(block_header_t* block) +{ + block_header_t* next = block_next(block); + block_set_prev_used(next); + block_set_used(block); +} + +static size_t align_up(size_t x, size_t align) +{ + tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two"); + return (x + (align - 1)) & ~(align - 1); +} + +static size_t align_down(size_t x, size_t align) +{ + tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two"); + return x - (x & (align - 1)); +} + +static void* align_ptr(const void* ptr, size_t align) +{ + const tlsfptr_t aligned = + (tlsf_cast(tlsfptr_t, ptr) + (align - 1)) & ~(align - 1); + tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two"); + return tlsf_cast(void*, aligned); +} + +/* +** Adjust an allocation size to be aligned to word size, and no smaller +** than internal minimum. +*/ +static size_t adjust_request_size(size_t size, size_t align) +{ + size_t adjust = 0; + if (size) + { + const size_t aligned = align_up(size, align); + + /* aligned sized must not exceed block_size_max or we'll go out of bounds on sl_bitmap */ + if (aligned < block_size_max) + { + adjust = tlsf_max(aligned, block_size_min); + } + } + return adjust; +} + +/* +** TLSF utility functions. In most cases, these are direct translations of +** the documentation found in the white paper. +*/ + +static void mapping_insert(size_t size, int* fli, int* sli) +{ + int fl, sl; + if (size < SMALL_BLOCK_SIZE) + { + /* Store small blocks in first list. */ + fl = 0; + sl = tlsf_cast(int, size) / (SMALL_BLOCK_SIZE / SL_INDEX_COUNT); + } + else + { + fl = tlsf_fls_sizet(size); + sl = tlsf_cast(int, size >> (fl - SL_INDEX_COUNT_LOG2)) ^ (1 << SL_INDEX_COUNT_LOG2); + fl -= (FL_INDEX_SHIFT - 1); + } + *fli = fl; + *sli = sl; +} + +/* This version rounds up to the next block size (for allocations) */ +static void mapping_search(size_t size, int* fli, int* sli) +{ + if (size >= SMALL_BLOCK_SIZE) + { + const size_t round = (1 << (tlsf_fls_sizet(size) - SL_INDEX_COUNT_LOG2)) - 1; + size += round; + } + mapping_insert(size, fli, sli); +} + +static block_header_t* search_suitable_block(control_t* control, int* fli, int* sli) +{ + int fl = *fli; + int sl = *sli; + + /* + ** First, search for a block in the list associated with the given + ** fl/sl index. + */ + unsigned int sl_map = control->sl_bitmap[fl] & (~0U << sl); + if (!sl_map) + { + /* No block exists. Search in the next largest first-level list. */ + const unsigned int fl_map = control->fl_bitmap & (~0U << (fl + 1)); + if (!fl_map) + { + /* No free blocks available, memory has been exhausted. */ + return 0; + } + + fl = tlsf_ffs(fl_map); + *fli = fl; + sl_map = control->sl_bitmap[fl]; + } + tlsf_assert(sl_map && "internal error - second level bitmap is null"); + sl = tlsf_ffs(sl_map); + *sli = sl; + + /* Return the first block in the free list. */ + return control->blocks[fl][sl]; +} + +/* Remove a free block from the free list.*/ +static void remove_free_block(control_t* control, block_header_t* block, int fl, int sl) +{ + block_header_t* prev = block->prev_free; + block_header_t* next = block->next_free; + tlsf_assert(prev && "prev_free field can not be null"); + tlsf_assert(next && "next_free field can not be null"); + next->prev_free = prev; + prev->next_free = next; + + /* If this block is the head of the free list, set new head. */ + if (control->blocks[fl][sl] == block) + { + control->blocks[fl][sl] = next; + + /* If the new head is null, clear the bitmap. */ + if (next == &control->block_null) + { + control->sl_bitmap[fl] &= ~(1U << sl); + + /* If the second bitmap is now empty, clear the fl bitmap. */ + if (!control->sl_bitmap[fl]) + { + control->fl_bitmap &= ~(1U << fl); + } + } + } +} + +/* Insert a free block into the free block list. */ +static void insert_free_block(control_t* control, block_header_t* block, int fl, int sl) +{ + block_header_t* current = control->blocks[fl][sl]; + tlsf_assert(current && "free list cannot have a null entry"); + tlsf_assert(block && "cannot insert a null entry into the free list"); + block->next_free = current; + block->prev_free = &control->block_null; + current->prev_free = block; + + tlsf_assert(block_to_ptr(block) == align_ptr(block_to_ptr(block), ALIGN_SIZE) + && "block not aligned properly"); + /* + ** Insert the new block at the head of the list, and mark the first- + ** and second-level bitmaps appropriately. + */ + control->blocks[fl][sl] = block; + control->fl_bitmap |= (1U << fl); + control->sl_bitmap[fl] |= (1U << sl); +} + +/* Remove a given block from the free list. */ +static void block_remove(control_t* control, block_header_t* block) +{ + int fl, sl; + mapping_insert(block_size(block), &fl, &sl); + remove_free_block(control, block, fl, sl); +} + +/* Insert a given block into the free list. */ +static void block_insert(control_t* control, block_header_t* block) +{ + int fl, sl; + mapping_insert(block_size(block), &fl, &sl); + insert_free_block(control, block, fl, sl); +} + +static int block_can_split(block_header_t* block, size_t size) +{ + return block_size(block) >= sizeof(block_header_t) + size; +} + +/* Split a block into two, the second of which is free. */ +static block_header_t* block_split(block_header_t* block, size_t size) +{ + /* Calculate the amount of space left in the remaining block. */ + block_header_t* remaining = + offset_to_block(block_to_ptr(block), size - block_header_overhead); + + const size_t remain_size = block_size(block) - (size + block_header_overhead); + + tlsf_assert(block_to_ptr(remaining) == align_ptr(block_to_ptr(remaining), ALIGN_SIZE) + && "remaining block not aligned properly"); + + tlsf_assert(block_size(block) == remain_size + size + block_header_overhead); + block_set_size(remaining, remain_size); + tlsf_assert(block_size(remaining) >= block_size_min && "block split with invalid size"); + + block_set_size(block, size); + block_mark_as_free(remaining); + + return remaining; +} + +/* Absorb a free block's storage into an adjacent previous free block. */ +static block_header_t* block_absorb(block_header_t* prev, block_header_t* block) +{ + tlsf_assert(!block_is_last(prev) && "previous block can't be last"); + /* Note: Leaves flags untouched. */ + prev->size += block_size(block) + block_header_overhead; + block_link_next(prev); + return prev; +} + +/* Merge a just-freed block with an adjacent previous free block. */ +static block_header_t* block_merge_prev(control_t* control, block_header_t* block) +{ + if (block_is_prev_free(block)) + { + block_header_t* prev = block_prev(block); + tlsf_assert(prev && "prev physical block can't be null"); + tlsf_assert(block_is_free(prev) && "prev block is not free though marked as such"); + block_remove(control, prev); + block = block_absorb(prev, block); + } + + return block; +} + +/* Merge a just-freed block with an adjacent free block. */ +static block_header_t* block_merge_next(control_t* control, block_header_t* block) +{ + block_header_t* next = block_next(block); + tlsf_assert(next && "next physical block can't be null"); + + if (block_is_free(next)) + { + tlsf_assert(!block_is_last(block) && "previous block can't be last"); + block_remove(control, next); + block = block_absorb(block, next); + } + + return block; +} + +/* Trim any trailing block space off the end of a block, return to pool. */ +static void block_trim_free(control_t* control, block_header_t* block, size_t size) +{ + tlsf_assert(block_is_free(block) && "block must be free"); + if (block_can_split(block, size)) + { + block_header_t* remaining_block = block_split(block, size); + block_link_next(block); + block_set_prev_free(remaining_block); + block_insert(control, remaining_block); + } +} + +/* Trim any trailing block space off the end of a used block, return to pool. */ +static void block_trim_used(control_t* control, block_header_t* block, size_t size) +{ + tlsf_assert(!block_is_free(block) && "block must be used"); + if (block_can_split(block, size)) + { + /* If the next block is free, we must coalesce. */ + block_header_t* remaining_block = block_split(block, size); + block_set_prev_used(remaining_block); + + remaining_block = block_merge_next(control, remaining_block); + block_insert(control, remaining_block); + } +} + +static block_header_t* block_trim_free_leading(control_t* control, block_header_t* block, size_t size) +{ + block_header_t* remaining_block = block; + if (block_can_split(block, size)) + { + /* We want the 2nd block. */ + remaining_block = block_split(block, size - block_header_overhead); + block_set_prev_free(remaining_block); + + block_link_next(block); + block_insert(control, block); + } + + return remaining_block; +} + +static block_header_t* block_locate_free(control_t* control, size_t size) +{ + int fl = 0, sl = 0; + block_header_t* block = 0; + + if (size) + { + mapping_search(size, &fl, &sl); + + /* + ** mapping_search can futz with the size, so for excessively large sizes it can sometimes wind up + ** with indices that are off the end of the block array. + ** So, we protect against that here, since this is the only callsite of mapping_search. + ** Note that we don't need to check sl, since it comes from a modulo operation that guarantees it's always in range. + */ + if (fl < FL_INDEX_COUNT) + { + block = search_suitable_block(control, &fl, &sl); + } + } + + if (block) + { + tlsf_assert(block_size(block) >= size); + remove_free_block(control, block, fl, sl); + } + + return block; +} + +static void* block_prepare_used(control_t* control, block_header_t* block, size_t size) +{ + void* p = 0; + if (block) + { + tlsf_assert(size && "size must be non-zero"); + block_trim_free(control, block, size); + block_mark_as_used(block); + p = block_to_ptr(block); + } + return p; +} + +/* Clear structure and point all empty lists at the null block. */ +static void control_construct(control_t* control) +{ + int i, j; + + control->block_null.next_free = &control->block_null; + control->block_null.prev_free = &control->block_null; + + control->fl_bitmap = 0; + for (i = 0; i < FL_INDEX_COUNT; ++i) + { + control->sl_bitmap[i] = 0; + for (j = 0; j < SL_INDEX_COUNT; ++j) + { + control->blocks[i][j] = &control->block_null; + } + } +} + +/* +** Debugging utilities. +*/ + +typedef struct integrity_t +{ + int prev_status; + int status; +} integrity_t; + +#define tlsf_insist(x) { tlsf_assert(x); if (!(x)) { status--; } } + +static void integrity_walker(void* ptr, size_t size, int used, void* user) +{ + block_header_t* block = block_from_ptr(ptr); + integrity_t* integ = tlsf_cast(integrity_t*, user); + const int this_prev_status = block_is_prev_free(block) ? 1 : 0; + const int this_status = block_is_free(block) ? 1 : 0; + const size_t this_block_size = block_size(block); + + int status = 0; + (void)used; + tlsf_insist(integ->prev_status == this_prev_status && "prev status incorrect"); + tlsf_insist(size == this_block_size && "block size incorrect"); + + integ->prev_status = this_status; + integ->status += status; +} + +int tlsf_check(tlsf_t tlsf) +{ + int i, j; + + control_t* control = tlsf_cast(control_t*, tlsf); + int status = 0; + + /* Check that the free lists and bitmaps are accurate. */ + for (i = 0; i < FL_INDEX_COUNT; ++i) + { + for (j = 0; j < SL_INDEX_COUNT; ++j) + { + const int fl_map = control->fl_bitmap & (1U << i); + const int sl_list = control->sl_bitmap[i]; + const int sl_map = sl_list & (1U << j); + const block_header_t* block = control->blocks[i][j]; + + /* Check that first- and second-level lists agree. */ + if (!fl_map) + { + tlsf_insist(!sl_map && "second-level map must be null"); + } + + if (!sl_map) + { + tlsf_insist(block == &control->block_null && "block list must be null"); + continue; + } + + /* Check that there is at least one free block. */ + tlsf_insist(sl_list && "no free blocks in second-level map"); + tlsf_insist(block != &control->block_null && "block should not be null"); + + while (block != &control->block_null) + { + int fli, sli; + tlsf_insist(block_is_free(block) && "block should be free"); + tlsf_insist(!block_is_prev_free(block) && "blocks should have coalesced"); + tlsf_insist(!block_is_free(block_next(block)) && "blocks should have coalesced"); + tlsf_insist(block_is_prev_free(block_next(block)) && "block should be free"); + tlsf_insist(block_size(block) >= block_size_min && "block not minimum size"); + + mapping_insert(block_size(block), &fli, &sli); + tlsf_insist(fli == i && sli == j && "block size indexed in wrong list"); + block = block->next_free; + } + } + } + + return status; +} + +#undef tlsf_insist + +static void default_walker(void* ptr, size_t size, int used, void* user) +{ + (void)user; + printf("\t%p %s size: %x (%p)\n", ptr, used ? "used" : "free", (unsigned int)size, block_from_ptr(ptr)); +} + +void tlsf_walk_pool(pool_t pool, tlsf_walker walker, void* user) +{ + tlsf_walker pool_walker = walker ? walker : default_walker; + block_header_t* block = + offset_to_block(pool, -(int)block_header_overhead); + + while (block && !block_is_last(block)) + { + pool_walker( + block_to_ptr(block), + block_size(block), + !block_is_free(block), + user); + block = block_next(block); + } +} + +size_t tlsf_block_size(void* ptr) +{ + size_t size = 0; + if (ptr) + { + const block_header_t* block = block_from_ptr(ptr); + size = block_size(block); + } + return size; +} + +int tlsf_check_pool(pool_t pool) +{ + /* Check that the blocks are physically correct. */ + integrity_t integ = { 0, 0 }; + tlsf_walk_pool(pool, integrity_walker, &integ); + + return integ.status; +} + +/* +** Size of the TLSF structures in a given memory block passed to +** tlsf_create, equal to the size of a control_t +*/ +size_t tlsf_size(void) +{ + return sizeof(control_t); +} + +size_t tlsf_align_size(void) +{ + return ALIGN_SIZE; +} + +size_t tlsf_block_size_min(void) +{ + return block_size_min; +} + +size_t tlsf_block_size_max(void) +{ + return block_size_max; +} + +/* +** Overhead of the TLSF structures in a given memory block passed to +** tlsf_add_pool, equal to the overhead of a free block and the +** sentinel block. +*/ +size_t tlsf_pool_overhead(void) +{ + return 2 * block_header_overhead; +} + +size_t tlsf_alloc_overhead(void) +{ + return block_header_overhead; +} + +pool_t tlsf_add_pool(tlsf_t tlsf, void* mem, size_t bytes) +{ + block_header_t* block; + block_header_t* next; + + const size_t pool_overhead = tlsf_pool_overhead(); + const size_t pool_bytes = align_down(bytes - pool_overhead, ALIGN_SIZE); + + if (((ptrdiff_t)mem % ALIGN_SIZE) != 0) + { + printf("tlsf_add_pool: Memory must be aligned by %u bytes.\n", + (unsigned int)ALIGN_SIZE); + return 0; + } + + if (pool_bytes < block_size_min || pool_bytes > block_size_max) + { +#if defined (TLSF_64BIT) + printf("tlsf_add_pool: Memory size must be between 0x%x and 0x%x00 bytes.\n", + (unsigned int)(pool_overhead + block_size_min), + (unsigned int)((pool_overhead + block_size_max) / 256)); +#else + printf("tlsf_add_pool: Memory size must be between %u and %u bytes.\n", + (unsigned int)(pool_overhead + block_size_min), + (unsigned int)(pool_overhead + block_size_max)); +#endif + return 0; + } + + /* + ** Create the main free block. Offset the start of the block slightly + ** so that the prev_phys_block field falls outside of the pool - + ** it will never be used. + */ + block = offset_to_block(mem, -(tlsfptr_t)block_header_overhead); + block_set_size(block, pool_bytes); + block_set_free(block); + block_set_prev_used(block); + block_insert(tlsf_cast(control_t*, tlsf), block); + + /* Split the block to create a zero-size sentinel block. */ + next = block_link_next(block); + block_set_size(next, 0); + block_set_used(next); + block_set_prev_free(next); + + return mem; +} + +void tlsf_remove_pool(tlsf_t tlsf, pool_t pool) +{ + control_t* control = tlsf_cast(control_t*, tlsf); + block_header_t* block = offset_to_block(pool, -(int)block_header_overhead); + + int fl = 0, sl = 0; + + tlsf_assert(block_is_free(block) && "block should be free"); + tlsf_assert(!block_is_free(block_next(block)) && "next block should not be free"); + tlsf_assert(block_size(block_next(block)) == 0 && "next block size should be zero"); + + mapping_insert(block_size(block), &fl, &sl); + remove_free_block(control, block, fl, sl); +} + +/* +** TLSF main interface. +*/ + +#if _DEBUG +int test_ffs_fls() +{ + /* Verify ffs/fls work properly. */ + int rv = 0; + rv += (tlsf_ffs(0) == -1) ? 0 : 0x1; + rv += (tlsf_fls(0) == -1) ? 0 : 0x2; + rv += (tlsf_ffs(1) == 0) ? 0 : 0x4; + rv += (tlsf_fls(1) == 0) ? 0 : 0x8; + rv += (tlsf_ffs(0x80000000) == 31) ? 0 : 0x10; + rv += (tlsf_ffs(0x80008000) == 15) ? 0 : 0x20; + rv += (tlsf_fls(0x80000008) == 31) ? 0 : 0x40; + rv += (tlsf_fls(0x7FFFFFFF) == 30) ? 0 : 0x80; + +#if defined (TLSF_64BIT) + rv += (tlsf_fls_sizet(0x80000000) == 31) ? 0 : 0x100; + rv += (tlsf_fls_sizet(0x100000000) == 32) ? 0 : 0x200; + rv += (tlsf_fls_sizet(0xffffffffffffffff) == 63) ? 0 : 0x400; +#endif + + if (rv) + { + printf("test_ffs_fls: %x ffs/fls tests failed.\n", rv); + } + return rv; +} +#endif + +tlsf_t tlsf_create(void* mem) +{ +#if _DEBUG + if (test_ffs_fls()) + { + return 0; + } +#endif + + if (((tlsfptr_t)mem % ALIGN_SIZE) != 0) + { + printf("tlsf_create: Memory must be aligned to %u bytes.\n", + (unsigned int)ALIGN_SIZE); + return 0; + } + + control_construct(tlsf_cast(control_t*, mem)); + + return tlsf_cast(tlsf_t, mem); +} + +tlsf_t tlsf_create_with_pool(void* mem, size_t bytes) +{ + tlsf_t tlsf = tlsf_create(mem); + tlsf_add_pool(tlsf, (char*)mem + tlsf_size(), bytes - tlsf_size()); + return tlsf; +} + +void tlsf_destroy(tlsf_t tlsf) +{ + /* Nothing to do. */ + (void)tlsf; +} + +pool_t tlsf_get_pool(tlsf_t tlsf) +{ + return tlsf_cast(pool_t, (char*)tlsf + tlsf_size()); +} + +void* tlsf_malloc(tlsf_t tlsf, size_t size) +{ + control_t* control = tlsf_cast(control_t*, tlsf); + const size_t adjust = adjust_request_size(size, ALIGN_SIZE); + block_header_t* block = block_locate_free(control, adjust); + return block_prepare_used(control, block, adjust); +} + +void* tlsf_memalign(tlsf_t tlsf, size_t align, size_t size) +{ + control_t* control = tlsf_cast(control_t*, tlsf); + const size_t adjust = adjust_request_size(size, ALIGN_SIZE); + + /* + ** We must allocate an additional minimum block size bytes so that if + ** our free block will leave an alignment gap which is smaller, we can + ** trim a leading free block and release it back to the pool. We must + ** do this because the previous physical block is in use, therefore + ** the prev_phys_block field is not valid, and we can't simply adjust + ** the size of that block. + */ + const size_t gap_minimum = sizeof(block_header_t); + const size_t size_with_gap = adjust_request_size(adjust + align + gap_minimum, align); + + /* + ** If alignment is less than or equals base alignment, we're done. + ** If we requested 0 bytes, return null, as tlsf_malloc(0) does. + */ + const size_t aligned_size = (adjust && align > ALIGN_SIZE) ? size_with_gap : adjust; + + block_header_t* block = block_locate_free(control, aligned_size); + + /* This can't be a static assert. */ + tlsf_assert(sizeof(block_header_t) == block_size_min + block_header_overhead); + + if (block) + { + void* ptr = block_to_ptr(block); + void* aligned = align_ptr(ptr, align); + size_t gap = tlsf_cast(size_t, + tlsf_cast(tlsfptr_t, aligned) - tlsf_cast(tlsfptr_t, ptr)); + + /* If gap size is too small, offset to next aligned boundary. */ + if (gap && gap < gap_minimum) + { + const size_t gap_remain = gap_minimum - gap; + const size_t offset = tlsf_max(gap_remain, align); + const void* next_aligned = tlsf_cast(void*, + tlsf_cast(tlsfptr_t, aligned) + offset); + + aligned = align_ptr(next_aligned, align); + gap = tlsf_cast(size_t, + tlsf_cast(tlsfptr_t, aligned) - tlsf_cast(tlsfptr_t, ptr)); + } + + if (gap) + { + tlsf_assert(gap >= gap_minimum && "gap size too small"); + block = block_trim_free_leading(control, block, gap); + } + } + + return block_prepare_used(control, block, adjust); +} + +void tlsf_free(tlsf_t tlsf, void* ptr) +{ + /* Don't attempt to free a NULL pointer. */ + if (ptr) + { + control_t* control = tlsf_cast(control_t*, tlsf); + block_header_t* block = block_from_ptr(ptr); + tlsf_assert(!block_is_free(block) && "block already marked as free"); + block_mark_as_free(block); + block = block_merge_prev(control, block); + block = block_merge_next(control, block); + block_insert(control, block); + } +} + +/* +** The TLSF block information provides us with enough information to +** provide a reasonably intelligent implementation of realloc, growing or +** shrinking the currently allocated block as required. +** +** This routine handles the somewhat esoteric edge cases of realloc: +** - a non-zero size with a null pointer will behave like malloc +** - a zero size with a non-null pointer will behave like free +** - a request that cannot be satisfied will leave the original buffer +** untouched +** - an extended buffer size will leave the newly-allocated area with +** contents undefined +*/ +void* tlsf_realloc(tlsf_t tlsf, void* ptr, size_t size) +{ + control_t* control = tlsf_cast(control_t*, tlsf); + void* p = 0; + + /* Zero-size requests are treated as free. */ + if (ptr && size == 0) + { + tlsf_free(tlsf, ptr); + } + /* Requests with NULL pointers are treated as malloc. */ + else if (!ptr) + { + p = tlsf_malloc(tlsf, size); + } + else + { + block_header_t* block = block_from_ptr(ptr); + block_header_t* next = block_next(block); + + const size_t cursize = block_size(block); + const size_t combined = cursize + block_size(next) + block_header_overhead; + const size_t adjust = adjust_request_size(size, ALIGN_SIZE); + + tlsf_assert(!block_is_free(block) && "block already marked as free"); + + /* + ** If the next block is used, or when combined with the current + ** block, does not offer enough space, we must reallocate and copy. + */ + if (adjust > cursize && (!block_is_free(next) || adjust > combined)) + { + p = tlsf_malloc(tlsf, size); + if (p) + { + const size_t minsize = tlsf_min(cursize, size); + memcpy(p, ptr, minsize); + tlsf_free(tlsf, ptr); + } + } + else + { + /* Do we need to expand to the next block? */ + if (adjust > cursize) + { + block_merge_next(control, block); + block_mark_as_used(block); + } + + /* Trim the resulting block and return the original pointer. */ + block_trim_used(control, block, adjust); + p = ptr; + } + } + + return p; +} diff --git a/deps/lightrec/tlsf/tlsf.h b/deps/lightrec/tlsf/tlsf.h new file mode 100644 index 00000000..e9b5a91c --- /dev/null +++ b/deps/lightrec/tlsf/tlsf.h @@ -0,0 +1,90 @@ +#ifndef INCLUDED_tlsf +#define INCLUDED_tlsf + +/* +** Two Level Segregated Fit memory allocator, version 3.1. +** Written by Matthew Conte +** http://tlsf.baisoku.org +** +** Based on the original documentation by Miguel Masmano: +** http://www.gii.upv.es/tlsf/main/docs +** +** This implementation was written to the specification +** of the document, therefore no GPL restrictions apply. +** +** Copyright (c) 2006-2016, Matthew Conte +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions are met: +** * Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** * Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** * Neither the name of the copyright holder nor the +** names of its contributors may be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +** DISCLAIMED. IN NO EVENT SHALL MATTHEW CONTE BE LIABLE FOR ANY +** DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +** ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include + +#if defined(__cplusplus) +extern "C" { +#endif + +/* tlsf_t: a TLSF structure. Can contain 1 to N pools. */ +/* pool_t: a block of memory that TLSF can manage. */ +typedef void* tlsf_t; +typedef void* pool_t; + +/* Create/destroy a memory pool. */ +tlsf_t tlsf_create(void* mem); +tlsf_t tlsf_create_with_pool(void* mem, size_t bytes); +void tlsf_destroy(tlsf_t tlsf); +pool_t tlsf_get_pool(tlsf_t tlsf); + +/* Add/remove memory pools. */ +pool_t tlsf_add_pool(tlsf_t tlsf, void* mem, size_t bytes); +void tlsf_remove_pool(tlsf_t tlsf, pool_t pool); + +/* malloc/memalign/realloc/free replacements. */ +void* tlsf_malloc(tlsf_t tlsf, size_t bytes); +void* tlsf_memalign(tlsf_t tlsf, size_t align, size_t bytes); +void* tlsf_realloc(tlsf_t tlsf, void* ptr, size_t size); +void tlsf_free(tlsf_t tlsf, void* ptr); + +/* Returns internal block size, not original request size */ +size_t tlsf_block_size(void* ptr); + +/* Overheads/limits of internal structures. */ +size_t tlsf_size(void); +size_t tlsf_align_size(void); +size_t tlsf_block_size_min(void); +size_t tlsf_block_size_max(void); +size_t tlsf_pool_overhead(void); +size_t tlsf_alloc_overhead(void); + +/* Debugging. */ +typedef void (*tlsf_walker)(void* ptr, size_t size, int used, void* user); +void tlsf_walk_pool(pool_t pool, tlsf_walker walker, void* user); +/* Returns nonzero if any internal consistency check fails. */ +int tlsf_check(tlsf_t tlsf); +int tlsf_check_pool(pool_t pool); + +#if defined(__cplusplus) +}; +#endif + +#endif -- 2.39.5