git subrepo pull --force deps/lightrec
authorPaul Cercueil <paul@crapouillou.net>
Mon, 30 May 2022 17:00:15 +0000 (18:00 +0100)
committerPaul Cercueil <paul@crapouillou.net>
Mon, 30 May 2022 17:26:11 +0000 (18:26 +0100)
subrepo:
  subdir:   "deps/lightrec"
  merged:   "49ef275a"
upstream:
  origin:   "https://github.com/pcercuei/lightrec.git"
  branch:   "master"
  commit:   "49ef275a"
git-subrepo:
  version:  "0.4.3"
  origin:   "https://github.com/ingydotnet/git-subrepo.git"
  commit:   "2f68596"

14 files changed:
deps/lightrec/.gitrepo
deps/lightrec/CMakeLists.txt
deps/lightrec/blockcache.c
deps/lightrec/disassembler.c
deps/lightrec/emitter.c
deps/lightrec/lightrec-config.h.cmakein
deps/lightrec/lightrec-private.h
deps/lightrec/lightrec.c
deps/lightrec/lightrec.h
deps/lightrec/optimizer.c
deps/lightrec/tlsf/.gitrepo [new file with mode: 0644]
deps/lightrec/tlsf/README.md [new file with mode: 0644]
deps/lightrec/tlsf/tlsf.c [new file with mode: 0644]
deps/lightrec/tlsf/tlsf.h [new file with mode: 0644]

index ae2fc77..c9d423a 100644 (file)
@@ -6,7 +6,7 @@
 [subrepo]
        remote = https://github.com/pcercuei/lightrec.git
        branch = master
-       commit = de06670b257004d98d30e8585a4e6530e77d3acd
-       parent = e24732050e902bd5402b2b7da7c391d2ca8fa799
+       commit = 49ef275a66aad8540ab73b09b0dd2128ebe4d6dc
+       parent = a0467ff492a25521867fcfb7d66b9c617017151a
        method = merge
        cmdver = 0.4.3
index 9ff58d6..7b28518 100644 (file)
@@ -108,6 +108,16 @@ if (ENABLE_THREADED_COMPILER)
        target_link_libraries(${PROJECT_NAME} PRIVATE ${PTHREAD_LIBRARIES})
 endif (ENABLE_THREADED_COMPILER)
 
+option(ENABLE_CODE_BUFFER "Enable external code buffer" OFF)
+if (ENABLE_CODE_BUFFER)
+       target_sources(${PROJECT_NAME} PRIVATE tlsf/tlsf.c)
+       target_include_directories(${PROJECT_NAME} PRIVATE tlsf)
+endif (ENABLE_CODE_BUFFER)
+
+if (ENABLE_CODE_BUFFER AND ENABLE_THREADED_COMPILER)
+       message(SEND_ERROR "External code buffer cannot be used along with the threaded compiler")
+endif (ENABLE_CODE_BUFFER AND ENABLE_THREADED_COMPILER)
+
 find_library(LIBLIGHTNING lightning REQUIRED)
 find_path(LIBLIGHTNING_INCLUDE_DIR lightning.h REQUIRED)
 
index 4512392..2182f29 100644 (file)
@@ -63,8 +63,8 @@ void remove_from_code_lut(struct blockcache *cache, struct block *block)
        u32 offset = lut_offset(block->pc);
 
        if (block->function) {
-               memset(&state->code_lut[offset], 0,
-                      block->nb_ops * sizeof(*state->code_lut));
+               memset(lut_address(state, offset), 0,
+                      block->nb_ops * lut_elm_size(state));
        }
 }
 
@@ -152,10 +152,11 @@ u32 lightrec_calculate_block_hash(const struct block *block)
 
 bool lightrec_block_is_outdated(struct lightrec_state *state, struct block *block)
 {
-       void **lut_entry = &state->code_lut[lut_offset(block->pc)];
+       u32 offset = lut_offset(block->pc);
        bool outdated;
+       void *addr;
 
-       if (*lut_entry)
+       if (lut_read(state, offset))
                return false;
 
        outdated = block->hash != lightrec_calculate_block_hash(block);
@@ -163,9 +164,11 @@ bool lightrec_block_is_outdated(struct lightrec_state *state, struct block *bloc
                /* The block was marked as outdated, but the content is still
                 * the same */
                if (block->function)
-                       *lut_entry = block->function;
+                       addr = block->function;
                else
-                       *lut_entry = state->get_next_block;
+                       addr = state->get_next_block;
+
+               lut_write(state, offset, addr);
        }
 
        return outdated;
index 43ac677..cb332c6 100644 (file)
@@ -382,22 +382,23 @@ static int print_op(union code c, u32 pc, char *buf, size_t len,
        }
 }
 
-void lightrec_print_disassembly(const struct block *block, const u32 *code)
+void lightrec_print_disassembly(const struct block *block, const u32 *code_ptr)
 {
        const struct opcode *op;
        const char **flags_ptr;
        size_t nb_flags, count, count2;
        char buf[256], buf2[256], buf3[256];
        unsigned int i;
-       u32 pc, branch_pc;
+       u32 pc, branch_pc, code;
        bool is_io;
 
        for (i = 0; i < block->nb_ops; i++) {
                op = &block->opcode_list[i];
                branch_pc = get_branch_pc(block, i, 0);
                pc = block->pc + (i << 2);
+               code = LE32TOH(code_ptr[i]);
 
-               count = print_op((union code)code[i], pc, buf, sizeof(buf),
+               count = print_op((union code)code, pc, buf, sizeof(buf),
                                 &flags_ptr, &nb_flags, &is_io);
 
                flags_ptr = NULL;
@@ -406,7 +407,7 @@ void lightrec_print_disassembly(const struct block *block, const u32 *code)
                count2 = print_op(op->c, branch_pc, buf2, sizeof(buf2),
                                  &flags_ptr, &nb_flags, &is_io);
 
-               if (code[i] == op->c.opcode) {
+               if (code == op->c.opcode) {
                        *buf2 = '\0';
                        count2 = 0;
                }
index fa74cc0..fd28935 100644 (file)
@@ -392,11 +392,34 @@ static void rec_alu_shiftv(struct lightrec_cstate *state, const struct block *bl
        lightrec_free_reg(reg_cache, rd);
 }
 
+static void rec_movi(struct lightrec_cstate *state,
+                    const struct block *block, u16 offset)
+{
+       struct regcache *reg_cache = state->reg_cache;
+       union code c = block->opcode_list[offset].c;
+       jit_state_t *_jit = block->_jit;
+       u16 flags = REG_EXT;
+       u8 rt;
+
+       if (!(c.i.imm & 0x8000))
+               flags |= REG_ZEXT;
+
+       rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags);
+
+       jit_movi(rt, (s32)(s16) c.i.imm);
+
+       lightrec_free_reg(reg_cache, rt);
+}
+
 static void rec_ADDIU(struct lightrec_cstate *state,
                      const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_alu_imm(state, block, offset, jit_code_addi, false);
+
+       if (block->opcode_list[offset].c.i.rs)
+               rec_alu_imm(state, block, offset, jit_code_addi, false);
+       else
+               rec_movi(state, block, offset);
 }
 
 static void rec_ADDI(struct lightrec_cstate *state,
@@ -404,7 +427,7 @@ static void rec_ADDI(struct lightrec_cstate *state,
 {
        /* TODO: Handle the exception? */
        _jit_name(block->_jit, __func__);
-       rec_alu_imm(state, block, offset, jit_code_addi, false);
+       rec_ADDIU(state, block, offset);
 }
 
 static void rec_SLTIU(struct lightrec_cstate *state,
@@ -1022,22 +1045,31 @@ static void rec_io(struct lightrec_cstate *state,
        }
 }
 
+static u32 rec_ram_mask(struct lightrec_state *state)
+{
+       return (RAM_SIZE << (state->mirrors_mapped * 2)) - 1;
+}
+
 static void rec_store_memory(struct lightrec_cstate *cstate,
                             const struct block *block,
                             u16 offset, jit_code_t code,
+                            jit_code_t swap_code,
                             uintptr_t addr_offset, u32 addr_mask,
                             bool invalidate)
 {
+       const struct lightrec_state *state = cstate->state;
        struct regcache *reg_cache = cstate->reg_cache;
        struct opcode *op = &block->opcode_list[offset];
        jit_state_t *_jit = block->_jit;
        union code c = op->c;
        u8 rs, rt, tmp, tmp2, tmp3, addr_reg, addr_reg2;
        s16 imm = (s16)c.i.imm;
-       s32 simm = (s32)imm << (__WORDSIZE / 32 - 1);
+       s32 simm = (s32)imm << (1 - lut_is_32bit(state));
        s32 lut_offt = offsetof(struct lightrec_state, code_lut);
        bool no_mask = op->flags & LIGHTREC_NO_MASK;
-       bool add_imm = c.i.imm && invalidate && simm + lut_offt != (s16)(simm + lut_offt);
+       bool add_imm = c.i.imm &&
+               ((!state->mirrors_mapped && !no_mask) || (invalidate &&
+               ((imm & 0x3) || simm + lut_offt != (s16)(simm + lut_offt))));
        bool need_tmp = !no_mask || addr_offset || add_imm;
        bool need_tmp2 = addr_offset || invalidate;
 
@@ -1071,7 +1103,17 @@ static void rec_store_memory(struct lightrec_cstate *cstate,
                addr_reg2 = addr_reg;
        }
 
-       jit_new_node_www(code, imm, addr_reg2, rt);
+       if (is_big_endian() && swap_code && c.i.rt) {
+               tmp3 = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+               jit_new_node_ww(swap_code, tmp3, rt);
+               jit_new_node_www(code, imm, addr_reg2, tmp3);
+
+               lightrec_free_reg(reg_cache, tmp3);
+       } else {
+               jit_new_node_www(code, imm, addr_reg2, rt);
+       }
+
        lightrec_free_reg(reg_cache, rt);
 
        if (invalidate) {
@@ -1082,17 +1124,22 @@ static void rec_store_memory(struct lightrec_cstate *cstate,
                        addr_reg = tmp2;
                }
 
-               if (__WORDSIZE == 64) {
+               if (!lut_is_32bit(state)) {
                        jit_lshi(tmp2, addr_reg, 1);
                        addr_reg = tmp2;
                }
 
-               if (__WORDSIZE == 64 || addr_reg != rs || c.i.rs != 0) {
+               if (addr_reg == rs && c.i.rs == 0) {
+                       addr_reg = LIGHTREC_REG_STATE;
+               } else {
                        jit_addr(tmp2, addr_reg, LIGHTREC_REG_STATE);
                        addr_reg = tmp2;
                }
 
-               jit_stxi(lut_offt, addr_reg, tmp3);
+               if (lut_is_32bit(state))
+                       jit_stxi_i(lut_offt, addr_reg, tmp3);
+               else
+                       jit_stxi(lut_offt, addr_reg, tmp3);
 
                lightrec_free_reg(reg_cache, tmp3);
        }
@@ -1107,29 +1154,32 @@ static void rec_store_memory(struct lightrec_cstate *cstate,
 static void rec_store_ram(struct lightrec_cstate *cstate,
                          const struct block *block,
                          u16 offset, jit_code_t code,
-                         bool invalidate)
+                         jit_code_t swap_code, bool invalidate)
 {
+       struct lightrec_state *state = cstate->state;
+
        _jit_note(block->_jit, __FILE__, __LINE__);
 
-       return rec_store_memory(cstate, block, offset, code,
-                               cstate->state->offset_ram,
-                               RAM_SIZE - 1, invalidate);
+       return rec_store_memory(cstate, block, offset, code, swap_code,
+                               state->offset_ram, rec_ram_mask(state),
+                               invalidate);
 }
 
 static void rec_store_scratch(struct lightrec_cstate *cstate,
-                             const struct block *block,
-                             u16 offset, jit_code_t code)
+                             const struct block *block, u16 offset,
+                             jit_code_t code, jit_code_t swap_code)
 {
        _jit_note(block->_jit, __FILE__, __LINE__);
 
-       return rec_store_memory(cstate, block, offset, code,
+       return rec_store_memory(cstate, block, offset, code, swap_code,
                                cstate->state->offset_scratch,
                                0x1fffffff, false);
 }
 
 static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate,
                                           const struct block *block,
-                                          u16 offset, jit_code_t code)
+                                          u16 offset, jit_code_t code,
+                                          jit_code_t swap_code)
 {
        struct lightrec_state *state = cstate->state;
        struct regcache *reg_cache = cstate->reg_cache;
@@ -1181,14 +1231,24 @@ static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate,
        }
 
        rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
-       jit_new_node_www(code, imm, tmp, rt);
+
+       if (is_big_endian() && swap_code && c.i.rt) {
+               tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+               jit_new_node_ww(swap_code, tmp2, rt);
+               jit_new_node_www(code, imm, tmp, tmp2);
+
+               lightrec_free_reg(reg_cache, tmp2);
+       } else {
+               jit_new_node_www(code, imm, tmp, rt);
+       }
 
        lightrec_free_reg(reg_cache, rt);
        lightrec_free_reg(reg_cache, tmp);
 }
 
 static void rec_store_direct(struct lightrec_cstate *cstate, const struct block *block,
-                            u16 offset, jit_code_t code)
+                            u16 offset, jit_code_t code, jit_code_t swap_code)
 {
        struct lightrec_state *state = cstate->state;
        u32 ram_size = state->mirrors_mapped ? RAM_SIZE * 4 : RAM_SIZE;
@@ -1219,12 +1279,15 @@ static void rec_store_direct(struct lightrec_cstate *cstate, const struct block
 
        /* Compute the offset to the code LUT */
        jit_andi(tmp, tmp2, (RAM_SIZE - 1) & ~3);
-       if (__WORDSIZE == 64)
+       if (!lut_is_32bit(state))
                jit_lshi(tmp, tmp, 1);
        jit_addr(tmp, LIGHTREC_REG_STATE, tmp);
 
        /* Write NULL to the code LUT to invalidate any block that's there */
-       jit_stxi(offsetof(struct lightrec_state, code_lut), tmp, tmp3);
+       if (lut_is_32bit(state))
+               jit_stxi_i(offsetof(struct lightrec_state, code_lut), tmp, tmp3);
+       else
+               jit_stxi(offsetof(struct lightrec_state, code_lut), tmp, tmp3);
 
        if (state->offset_ram != state->offset_scratch) {
                jit_movi(tmp, state->offset_ram);
@@ -1247,14 +1310,25 @@ static void rec_store_direct(struct lightrec_cstate *cstate, const struct block
        lightrec_free_reg(reg_cache, tmp3);
 
        rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
-       jit_new_node_www(code, 0, tmp2, rt);
+
+       if (is_big_endian() && swap_code && c.i.rt) {
+               tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+               jit_new_node_ww(swap_code, tmp, rt);
+               jit_new_node_www(code, 0, tmp2, tmp);
+
+               lightrec_free_reg(reg_cache, tmp);
+       } else {
+               jit_new_node_www(code, 0, tmp2, rt);
+       }
 
        lightrec_free_reg(reg_cache, rt);
        lightrec_free_reg(reg_cache, tmp2);
 }
 
 static void rec_store(struct lightrec_cstate *state,
-                     const struct block *block, u16 offset, jit_code_t code)
+                     const struct block *block, u16 offset,
+                     jit_code_t code, jit_code_t swap_code)
 {
        u16 flags = block->opcode_list[offset].flags;
        bool no_invalidate = (flags & LIGHTREC_NO_INVALIDATE) ||
@@ -1262,16 +1336,19 @@ static void rec_store(struct lightrec_cstate *state,
 
        switch (LIGHTREC_FLAGS_GET_IO_MODE(flags)) {
        case LIGHTREC_IO_RAM:
-               rec_store_ram(state, block, offset, code, !no_invalidate);
+               rec_store_ram(state, block, offset, code,
+                             swap_code, !no_invalidate);
                break;
        case LIGHTREC_IO_SCRATCH:
-               rec_store_scratch(state, block, offset, code);
+               rec_store_scratch(state, block, offset, code, swap_code);
                break;
        case LIGHTREC_IO_DIRECT:
-               if (no_invalidate)
-                       rec_store_direct_no_invalidate(state, block, offset, code);
-               else
-                       rec_store_direct(state, block, offset, code);
+               if (no_invalidate) {
+                       rec_store_direct_no_invalidate(state, block, offset,
+                                                      code, swap_code);
+               } else {
+                       rec_store_direct(state, block, offset, code, swap_code);
+               }
                break;
        default:
                rec_io(state, block, offset, true, false);
@@ -1283,14 +1360,15 @@ static void rec_SB(struct lightrec_cstate *state,
                   const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_store(state, block, offset, jit_code_stxi_c);
+       rec_store(state, block, offset, jit_code_stxi_c, 0);
 }
 
 static void rec_SH(struct lightrec_cstate *state,
                   const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_store(state, block, offset, jit_code_stxi_s);
+       rec_store(state, block, offset,
+                 jit_code_stxi_s, jit_code_bswapr_us);
 }
 
 static void rec_SW(struct lightrec_cstate *state,
@@ -1298,7 +1376,8 @@ static void rec_SW(struct lightrec_cstate *state,
 
 {
        _jit_name(block->_jit, __func__);
-       rec_store(state, block, offset, jit_code_stxi_i);
+       rec_store(state, block, offset,
+                 jit_code_stxi_i, jit_code_bswapr_ui);
 }
 
 static void rec_SWL(struct lightrec_cstate *state,
@@ -1323,15 +1402,17 @@ static void rec_SWC2(struct lightrec_cstate *state,
 }
 
 static void rec_load_memory(struct lightrec_cstate *cstate,
-                           const struct block *block,
-                           u16 offset, jit_code_t code, bool is_unsigned,
+                           const struct block *block, u16 offset,
+                           jit_code_t code, jit_code_t swap_code, bool is_unsigned,
                            uintptr_t addr_offset, u32 addr_mask)
 {
        struct regcache *reg_cache = cstate->reg_cache;
        struct opcode *op = &block->opcode_list[offset];
        jit_state_t *_jit = block->_jit;
        u8 rs, rt, addr_reg, flags = REG_EXT;
+       bool no_mask = op->flags & LIGHTREC_NO_MASK;
        union code c = op->c;
+       s16 imm;
 
        if (!c.i.rt)
                return;
@@ -1342,11 +1423,18 @@ static void rec_load_memory(struct lightrec_cstate *cstate,
        rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
        rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags);
 
-       if (!(op->flags & LIGHTREC_NO_MASK)) {
-               jit_andi(rt, rs, addr_mask);
+       if (!cstate->state->mirrors_mapped && c.i.imm && !no_mask) {
+               jit_addi(rt, rs, (s16)c.i.imm);
                addr_reg = rt;
+               imm = 0;
        } else {
                addr_reg = rs;
+               imm = (s16)c.i.imm;
+       }
+
+       if (!no_mask) {
+               jit_andi(rt, addr_reg, addr_mask);
+               addr_reg = rt;
        }
 
        if (addr_offset) {
@@ -1354,44 +1442,55 @@ static void rec_load_memory(struct lightrec_cstate *cstate,
                addr_reg = rt;
        }
 
-       jit_new_node_www(code, rt, addr_reg, (s16)c.i.imm);
+       jit_new_node_www(code, rt, addr_reg, imm);
+
+       if (is_big_endian() && swap_code) {
+               jit_new_node_ww(swap_code, rt, rt);
+
+               if (c.i.op == OP_LH)
+                       jit_extr_s(rt, rt);
+               else if (c.i.op == OP_LW && __WORDSIZE == 64)
+                       jit_extr_i(rt, rt);
+       }
 
        lightrec_free_reg(reg_cache, rs);
        lightrec_free_reg(reg_cache, rt);
 }
 
 static void rec_load_ram(struct lightrec_cstate *cstate,
-                        const struct block *block,
-                        u16 offset, jit_code_t code, bool is_unsigned)
+                        const struct block *block, u16 offset,
+                        jit_code_t code, jit_code_t swap_code, bool is_unsigned)
 {
        _jit_note(block->_jit, __FILE__, __LINE__);
 
-       rec_load_memory(cstate, block, offset, code, is_unsigned,
-                       cstate->state->offset_ram, RAM_SIZE - 1);
+       rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned,
+                       cstate->state->offset_ram, rec_ram_mask(cstate->state));
 }
 
 static void rec_load_bios(struct lightrec_cstate *cstate,
-                         const struct block *block,
-                         u16 offset, jit_code_t code, bool is_unsigned)
+                         const struct block *block, u16 offset,
+                         jit_code_t code, jit_code_t swap_code, bool is_unsigned)
 {
        _jit_note(block->_jit, __FILE__, __LINE__);
 
-       rec_load_memory(cstate, block, offset, code, is_unsigned,
+       rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned,
                        cstate->state->offset_bios, 0x1fffffff);
 }
 
 static void rec_load_scratch(struct lightrec_cstate *cstate,
-                            const struct block *block,
-                            u16 offset, jit_code_t code, bool is_unsigned)
+                            const struct block *block, u16 offset,
+                            jit_code_t code, jit_code_t swap_code, bool is_unsigned)
 {
        _jit_note(block->_jit, __FILE__, __LINE__);
 
-       rec_load_memory(cstate, block, offset, code, is_unsigned,
+       rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned,
                        cstate->state->offset_scratch, 0x1fffffff);
 }
 
-static void rec_load_direct(struct lightrec_cstate *cstate, const struct block *block,
-                           u16 offset, jit_code_t code, bool is_unsigned)
+static void rec_load_direct(struct lightrec_cstate *cstate,
+                           const struct block *block, u16 offset,
+                           jit_code_t code, jit_code_t swap_code,
+                           bool is_unsigned)
 {
        struct lightrec_state *state = cstate->state;
        struct regcache *reg_cache = cstate->reg_cache;
@@ -1483,28 +1582,38 @@ static void rec_load_direct(struct lightrec_cstate *cstate, const struct block *
 
        jit_new_node_www(code, rt, rt, imm);
 
+       if (is_big_endian() && swap_code) {
+               jit_new_node_ww(swap_code, rt, rt);
+
+               if (c.i.op == OP_LH)
+                       jit_extr_s(rt, rt);
+               else if (c.i.op == OP_LW && __WORDSIZE == 64)
+                       jit_extr_i(rt, rt);
+       }
+
        lightrec_free_reg(reg_cache, addr_reg);
        lightrec_free_reg(reg_cache, rt);
        lightrec_free_reg(reg_cache, tmp);
 }
 
 static void rec_load(struct lightrec_cstate *state, const struct block *block,
-                    u16 offset, jit_code_t code, bool is_unsigned)
+                    u16 offset, jit_code_t code, jit_code_t swap_code,
+                    bool is_unsigned)
 {
        u16 flags = block->opcode_list[offset].flags;
 
        switch (LIGHTREC_FLAGS_GET_IO_MODE(flags)) {
        case LIGHTREC_IO_RAM:
-               rec_load_ram(state, block, offset, code, is_unsigned);
+               rec_load_ram(state, block, offset, code, swap_code, is_unsigned);
                break;
        case LIGHTREC_IO_BIOS:
-               rec_load_bios(state, block, offset, code, is_unsigned);
+               rec_load_bios(state, block, offset, code, swap_code, is_unsigned);
                break;
        case LIGHTREC_IO_SCRATCH:
-               rec_load_scratch(state, block, offset, code, is_unsigned);
+               rec_load_scratch(state, block, offset, code, swap_code, is_unsigned);
                break;
        case LIGHTREC_IO_DIRECT:
-               rec_load_direct(state, block, offset, code, is_unsigned);
+               rec_load_direct(state, block, offset, code, swap_code, is_unsigned);
                break;
        default:
                rec_io(state, block, offset, false, true);
@@ -1515,25 +1624,25 @@ static void rec_load(struct lightrec_cstate *state, const struct block *block,
 static void rec_LB(struct lightrec_cstate *state, const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_load(state, block, offset, jit_code_ldxi_c, false);
+       rec_load(state, block, offset, jit_code_ldxi_c, 0, false);
 }
 
 static void rec_LBU(struct lightrec_cstate *state, const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_load(state, block, offset, jit_code_ldxi_uc, true);
+       rec_load(state, block, offset, jit_code_ldxi_uc, 0, true);
 }
 
 static void rec_LH(struct lightrec_cstate *state, const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_load(state, block, offset, jit_code_ldxi_s, false);
+       rec_load(state, block, offset, jit_code_ldxi_s, jit_code_bswapr_us, false);
 }
 
 static void rec_LHU(struct lightrec_cstate *state, const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_load(state, block, offset, jit_code_ldxi_us, true);
+       rec_load(state, block, offset, jit_code_ldxi_us, jit_code_bswapr_us, true);
 }
 
 static void rec_LWL(struct lightrec_cstate *state, const struct block *block, u16 offset)
@@ -1551,7 +1660,7 @@ static void rec_LWR(struct lightrec_cstate *state, const struct block *block, u1
 static void rec_LW(struct lightrec_cstate *state, const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_load(state, block, offset, jit_code_ldxi_i, false);
+       rec_load(state, block, offset, jit_code_ldxi_i, jit_code_bswapr_ui, false);
 }
 
 static void rec_LWC2(struct lightrec_cstate *state, const struct block *block, u16 offset)
@@ -1759,6 +1868,26 @@ static void rec_cp0_CTC0(struct lightrec_cstate *state,
        rec_mtc0(state, block, offset);
 }
 
+static unsigned int cp2d_i_offset(u8 reg)
+{
+       return offsetof(struct lightrec_state, regs.cp2d[reg]);
+}
+
+static unsigned int cp2d_s_offset(u8 reg)
+{
+       return cp2d_i_offset(reg) + is_big_endian() * 2;
+}
+
+static unsigned int cp2c_i_offset(u8 reg)
+{
+       return offsetof(struct lightrec_state, regs.cp2c[reg]);
+}
+
+static unsigned int cp2c_s_offset(u8 reg)
+{
+       return cp2c_i_offset(reg) + is_big_endian() * 2;
+}
+
 static void rec_cp2_basic_MFC2(struct lightrec_cstate *state,
                               const struct block *block, u16 offset)
 {
@@ -1783,16 +1912,14 @@ static void rec_cp2_basic_MFC2(struct lightrec_cstate *state,
        case 9:
        case 10:
        case 11:
-               jit_ldxi_s(rt, LIGHTREC_REG_STATE,
-                          offsetof(struct lightrec_state, regs.cp2d[reg]));
+               jit_ldxi_s(rt, LIGHTREC_REG_STATE, cp2d_s_offset(reg));
                break;
        case 7:
        case 16:
        case 17:
        case 18:
        case 19:
-               jit_ldxi_us(rt, LIGHTREC_REG_STATE,
-                          offsetof(struct lightrec_state, regs.cp2d[reg]));
+               jit_ldxi_us(rt, LIGHTREC_REG_STATE, cp2d_s_offset(reg));
                break;
        case 28:
        case 29:
@@ -1803,8 +1930,7 @@ static void rec_cp2_basic_MFC2(struct lightrec_cstate *state,
                for (i = 0; i < 3; i++) {
                        out = i == 0 ? rt : tmp;
 
-                       jit_ldxi_s(tmp, LIGHTREC_REG_STATE,
-                                  offsetof(struct lightrec_state, regs.cp2d[9 + i]));
+                       jit_ldxi_s(tmp, LIGHTREC_REG_STATE, cp2d_s_offset(9 + i));
                        jit_movi(tmp2, 0x1f);
                        jit_rshi(out, tmp, 7);
 
@@ -1826,8 +1952,7 @@ static void rec_cp2_basic_MFC2(struct lightrec_cstate *state,
                lightrec_free_reg(reg_cache, tmp3);
                break;
        default:
-               jit_ldxi_i(rt, LIGHTREC_REG_STATE,
-                          offsetof(struct lightrec_state, regs.cp2d[reg]));
+               jit_ldxi_i(rt, LIGHTREC_REG_STATE, cp2d_i_offset(reg));
                break;
        }
 
@@ -1853,13 +1978,11 @@ static void rec_cp2_basic_CFC2(struct lightrec_cstate *state,
        case 29:
        case 30:
                rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_EXT);
-               jit_ldxi_s(rt, LIGHTREC_REG_STATE,
-                          offsetof(struct lightrec_state, regs.cp2c[c.r.rd]));
+               jit_ldxi_s(rt, LIGHTREC_REG_STATE, cp2c_s_offset(c.r.rd));
                break;
        default:
                rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_ZEXT);
-               jit_ldxi_i(rt, LIGHTREC_REG_STATE,
-                          offsetof(struct lightrec_state, regs.cp2c[c.r.rd]));
+               jit_ldxi_i(rt, LIGHTREC_REG_STATE, cp2c_i_offset(c.r.rd));
                break;
        }
 
@@ -1888,19 +2011,14 @@ static void rec_cp2_basic_MTC2(struct lightrec_cstate *state,
        switch (c.r.rd) {
        case 15:
                tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
-               jit_ldxi_i(tmp, LIGHTREC_REG_STATE,
-                          offsetof(struct lightrec_state, regs.cp2d[13]));
+               jit_ldxi_i(tmp, LIGHTREC_REG_STATE, cp2d_i_offset(13));
 
                tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
-               jit_ldxi_i(tmp2, LIGHTREC_REG_STATE,
-                          offsetof(struct lightrec_state, regs.cp2d[14]));
+               jit_ldxi_i(tmp2, LIGHTREC_REG_STATE, cp2d_i_offset(14));
 
-               jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[12]),
-                          LIGHTREC_REG_STATE, tmp);
-               jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[13]),
-                          LIGHTREC_REG_STATE, tmp2);
-               jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[14]),
-                          LIGHTREC_REG_STATE, rt);
+               jit_stxi_i(cp2d_i_offset(12), LIGHTREC_REG_STATE, tmp);
+               jit_stxi_i(cp2d_i_offset(13), LIGHTREC_REG_STATE, tmp2);
+               jit_stxi_i(cp2d_i_offset(14), LIGHTREC_REG_STATE, rt);
 
                lightrec_free_reg(reg_cache, tmp);
                lightrec_free_reg(reg_cache, tmp2);
@@ -1910,18 +2028,15 @@ static void rec_cp2_basic_MTC2(struct lightrec_cstate *state,
 
                jit_lshi(tmp, rt, 7);
                jit_andi(tmp, tmp, 0xf80);
-               jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[9]),
-                          LIGHTREC_REG_STATE, tmp);
+               jit_stxi_s(cp2d_s_offset(9), LIGHTREC_REG_STATE, tmp);
 
                jit_lshi(tmp, rt, 2);
                jit_andi(tmp, tmp, 0xf80);
-               jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[10]),
-                          LIGHTREC_REG_STATE, tmp);
+               jit_stxi_s(cp2d_s_offset(10), LIGHTREC_REG_STATE, tmp);
 
                jit_rshi(tmp, rt, 3);
                jit_andi(tmp, tmp, 0xf80);
-               jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[11]),
-                          LIGHTREC_REG_STATE, tmp);
+               jit_stxi_s(cp2d_s_offset(11), LIGHTREC_REG_STATE, tmp);
 
                lightrec_free_reg(reg_cache, tmp);
                break;
@@ -1945,17 +2060,14 @@ static void rec_cp2_basic_MTC2(struct lightrec_cstate *state,
 
                jit_patch_at(to_loop, loop);
 
-               jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[31]),
-                          LIGHTREC_REG_STATE, tmp2);
-               jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[30]),
-                          LIGHTREC_REG_STATE, rt);
+               jit_stxi_i(cp2d_i_offset(31), LIGHTREC_REG_STATE, tmp2);
+               jit_stxi_i(cp2d_i_offset(30), LIGHTREC_REG_STATE, rt);
 
                lightrec_free_reg(reg_cache, tmp);
                lightrec_free_reg(reg_cache, tmp2);
                break;
        default:
-               jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[c.r.rd]),
-                          LIGHTREC_REG_STATE, rt);
+               jit_stxi_i(cp2d_i_offset(c.r.rd), LIGHTREC_REG_STATE, rt);
                break;
        }
 
@@ -1982,8 +2094,7 @@ static void rec_cp2_basic_CTC2(struct lightrec_cstate *state,
        case 27:
        case 29:
        case 30:
-               jit_stxi_s(offsetof(struct lightrec_state, regs.cp2c[c.r.rd]),
-                          LIGHTREC_REG_STATE, rt);
+               jit_stxi_s(cp2c_s_offset(c.r.rd), LIGHTREC_REG_STATE, rt);
                break;
        case 31:
                tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
@@ -1996,16 +2107,14 @@ static void rec_cp2_basic_CTC2(struct lightrec_cstate *state,
                jit_andi(tmp2, rt, 0x7ffff000);
                jit_orr(tmp, tmp2, tmp);
 
-               jit_stxi_i(offsetof(struct lightrec_state, regs.cp2c[31]),
-                          LIGHTREC_REG_STATE, tmp);
+               jit_stxi_i(cp2c_i_offset(31), LIGHTREC_REG_STATE, tmp);
 
                lightrec_free_reg(reg_cache, tmp);
                lightrec_free_reg(reg_cache, tmp2);
                break;
 
        default:
-               jit_stxi_i(offsetof(struct lightrec_state, regs.cp2c[c.r.rd]),
-                          LIGHTREC_REG_STATE, rt);
+               jit_stxi_i(cp2c_i_offset(c.r.rd), LIGHTREC_REG_STATE, rt);
        }
 
        lightrec_free_reg(reg_cache, rt);
index 3cef2b8..f5524e9 100644 (file)
@@ -10,6 +10,7 @@
 #cmakedefine01 ENABLE_FIRST_PASS
 #cmakedefine01 ENABLE_DISASSEMBLER
 #cmakedefine01 ENABLE_TINYMM
+#cmakedefine01 ENABLE_CODE_BUFFER
 
 #cmakedefine01 HAS_DEFAULT_ELM
 
index 75940b3..87565a6 100644 (file)
@@ -6,6 +6,7 @@
 #ifndef __LIGHTREC_PRIVATE_H__
 #define __LIGHTREC_PRIVATE_H__
 
+#include "lightning-wrapper.h"
 #include "lightrec-config.h"
 #include "disassembler.h"
 #include "lightrec.h"
@@ -135,6 +136,7 @@ struct lightrec_state {
        struct recompiler *rec;
        struct lightrec_cstate *cstate;
        struct reaper *reaper;
+       void *tlsf;
        void (*eob_wrapper_func)(void);
        void (*memset_func)(void);
        void (*get_next_block)(void);
@@ -143,6 +145,7 @@ struct lightrec_state {
        unsigned int nb_maps;
        const struct lightrec_mem_map *maps;
        uintptr_t offset_ram, offset_bios, offset_scratch;
+       _Bool with_32bit_lut;
        _Bool mirrors_mapped;
        _Bool invalidate_from_dma_only;
        void *code_lut[];
@@ -156,6 +159,9 @@ void lightrec_free_block(struct lightrec_state *state, struct block *block);
 
 void remove_from_code_lut(struct blockcache *cache, struct block *block);
 
+enum psx_map
+lightrec_get_map_idx(struct lightrec_state *state, u32 kaddr);
+
 const struct lightrec_mem_map *
 lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr);
 
@@ -175,6 +181,50 @@ static inline u32 lut_offset(u32 pc)
                return (pc & (RAM_SIZE - 1)) >> 2; // RAM
 }
 
+static inline _Bool is_big_endian(void)
+{
+       return __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__;
+}
+
+static inline _Bool lut_is_32bit(const struct lightrec_state *state)
+{
+       return __WORDSIZE == 32 ||
+               (ENABLE_CODE_BUFFER && state->with_32bit_lut);
+}
+
+static inline size_t lut_elm_size(const struct lightrec_state *state)
+{
+       return lut_is_32bit(state) ? 4 : sizeof(void *);
+}
+
+static inline void ** lut_address(struct lightrec_state *state, u32 offset)
+{
+       if (lut_is_32bit(state))
+               return (void **) ((uintptr_t) state->code_lut + offset * 4);
+       else
+               return &state->code_lut[offset];
+}
+
+static inline void * lut_read(struct lightrec_state *state, u32 offset)
+{
+       void **lut_entry = lut_address(state, lut_offset(offset));
+
+       if (lut_is_32bit(state))
+               return (void *)(uintptr_t) *(u32 *) lut_entry;
+       else
+               return *lut_entry;
+}
+
+static inline void lut_write(struct lightrec_state *state, u32 offset, void *ptr)
+{
+       void **lut_entry = lut_address(state, offset);
+
+       if (lut_is_32bit(state))
+               *(u32 *) lut_entry = (u32)(uintptr_t) ptr;
+       else
+               *lut_entry = ptr;
+}
+
 static inline u32 get_ds_pc(const struct block *block, u16 offset, s16 imm)
 {
        u16 flags = block->opcode_list[offset].flags;
index 9889272..d172a30 100644 (file)
@@ -16,6 +16,7 @@
 #include "recompiler.h"
 #include "regcache.h"
 #include "optimizer.h"
+#include "tlsf/tlsf.h"
 
 #include <errno.h>
 #include <inttypes.h>
@@ -198,30 +199,39 @@ static void lightrec_invalidate_map(struct lightrec_state *state,
                const struct lightrec_mem_map *map, u32 addr, u32 len)
 {
        if (map == &state->maps[PSX_MAP_KERNEL_USER_RAM]) {
-               memset(&state->code_lut[lut_offset(addr)], 0,
-                      ((len + 3) / 4) * sizeof(void *));
+               memset(lut_address(state, lut_offset(addr)), 0,
+                      ((len + 3) / 4) * lut_elm_size(state));
        }
 }
 
-const struct lightrec_mem_map *
-lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr)
+enum psx_map
+lightrec_get_map_idx(struct lightrec_state *state, u32 kaddr)
 {
        const struct lightrec_mem_map *map;
        unsigned int i;
-       u32 addr;
 
        for (i = 0; i < state->nb_maps; i++) {
-               const struct lightrec_mem_map *mapi = &state->maps[i];
+               map = &state->maps[i];
 
-               if (kaddr >= mapi->pc && kaddr < mapi->pc + mapi->length) {
-                       map = mapi;
-                       break;
-               }
+               if (kaddr >= map->pc && kaddr < map->pc + map->length)
+                       return (enum psx_map) i;
        }
 
-       if (i == state->nb_maps)
+       return PSX_MAP_UNKNOWN;
+}
+
+const struct lightrec_mem_map *
+lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr)
+{
+       const struct lightrec_mem_map *map;
+       enum psx_map idx;
+       u32 addr;
+
+       idx = lightrec_get_map_idx(state, kaddr);
+       if (idx == PSX_MAP_UNKNOWN)
                return NULL;
 
+       map = &state->maps[idx];
        addr = kaddr - map->pc;
 
        while (map->mirror_of)
@@ -615,7 +625,7 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc)
        void *func;
 
        for (;;) {
-               func = state->code_lut[lut_offset(pc)];
+               func = lut_read(state, pc);
                if (func && func != state->get_next_block)
                        break;
 
@@ -686,13 +696,51 @@ static s32 c_function_wrapper(struct lightrec_state *state, s32 cycles_delta,
        return state->target_cycle - state->current_cycle;
 }
 
+static void * lightrec_emit_code(struct lightrec_state *state,
+                                jit_state_t *_jit, unsigned int *size)
+{
+       bool has_code_buffer = ENABLE_CODE_BUFFER && state->tlsf;
+       jit_word_t code_size, new_code_size;
+       void *code;
+
+       jit_realize();
+
+       if (!ENABLE_DISASSEMBLER)
+               jit_set_data(NULL, 0, JIT_DISABLE_DATA | JIT_DISABLE_NOTE);
+
+       if (has_code_buffer) {
+               jit_get_code(&code_size);
+               code = tlsf_malloc(state->tlsf, (size_t) code_size);
+               if (!code)
+                       return NULL;
+
+               jit_set_code(code, code_size);
+       }
+
+       code = jit_emit();
+
+       jit_get_code(&new_code_size);
+       lightrec_register(MEM_FOR_CODE, new_code_size);
+
+       if (has_code_buffer) {
+               tlsf_realloc(state->tlsf, code, new_code_size);
+
+               pr_debug("Creating code block at address 0x%" PRIxPTR ", "
+                        "code size: %" PRIuPTR " new: %" PRIuPTR "\n",
+                        (uintptr_t) code, code_size, new_code_size);
+       }
+
+       *size = (unsigned int) new_code_size;
+
+       return code;
+}
+
 static struct block * generate_wrapper(struct lightrec_state *state)
 {
        struct block *block;
        jit_state_t *_jit;
        unsigned int i;
        int stack_ptr;
-       jit_word_t code_size;
        jit_node_t *to_tramp, *to_fn_epilog;
        jit_node_t *addr[C_WRAPPERS_COUNT - 1];
 
@@ -767,21 +815,20 @@ static struct block * generate_wrapper(struct lightrec_state *state)
        jit_epilog();
 
        block->_jit = _jit;
-       block->function = jit_emit();
        block->opcode_list = NULL;
        block->flags = 0;
        block->nb_ops = 0;
 
+       block->function = lightrec_emit_code(state, _jit,
+                                            &block->code_size);
+       if (!block->function)
+               goto err_free_block;
+
        state->wrappers_eps[C_WRAPPERS_COUNT - 1] = block->function;
 
        for (i = 0; i < C_WRAPPERS_COUNT - 1; i++)
                state->wrappers_eps[i] = jit_address(addr[i]);
 
-       jit_get_code(&code_size);
-       lightrec_register(MEM_FOR_CODE, code_size);
-
-       block->code_size = code_size;
-
        if (ENABLE_DISASSEMBLER) {
                pr_debug("Wrapper block:\n");
                jit_disassemble();
@@ -825,10 +872,9 @@ static struct block * generate_dispatcher(struct lightrec_state *state)
 {
        struct block *block;
        jit_state_t *_jit;
-       jit_node_t *to_end, *to_c, *loop, *addr, *addr2, *addr3;
+       jit_node_t *to_end, *loop, *addr, *addr2, *addr3;
        unsigned int i;
-       u32 offset, ram_len;
-       jit_word_t code_size;
+       u32 offset;
 
        block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
        if (!block)
@@ -888,21 +934,27 @@ static struct block * generate_dispatcher(struct lightrec_state *state)
        to_end = jit_blei(LIGHTREC_REG_CYCLE, 0);
 
        /* Convert next PC to KUNSEG and avoid mirrors */
-       ram_len = state->maps[PSX_MAP_KERNEL_USER_RAM].length;
-       jit_andi(JIT_R0, JIT_V0, 0x10000000 | (ram_len - 1));
-       to_c = jit_bgei(JIT_R0, ram_len);
-
-       /* Fast path: code is running from RAM, use the code LUT */
-       if (__WORDSIZE == 64)
+       jit_andi(JIT_R0, JIT_V0, 0x10000000 | (RAM_SIZE - 1));
+       jit_rshi_u(JIT_R1, JIT_R0, 28);
+       jit_andi(JIT_R2, JIT_V0, BIOS_SIZE - 1);
+       jit_addi(JIT_R2, JIT_R2, RAM_SIZE);
+       jit_movnr(JIT_R0, JIT_R2, JIT_R1);
+
+       /* If possible, use the code LUT */
+       if (!lut_is_32bit(state))
                jit_lshi(JIT_R0, JIT_R0, 1);
        jit_addr(JIT_R0, JIT_R0, LIGHTREC_REG_STATE);
-       jit_ldxi(JIT_R0, JIT_R0, offsetof(struct lightrec_state, code_lut));
+
+       offset = offsetof(struct lightrec_state, code_lut);
+       if (lut_is_32bit(state))
+               jit_ldxi_ui(JIT_R0, JIT_R0, offset);
+       else
+               jit_ldxi(JIT_R0, JIT_R0, offset);
 
        /* If we get non-NULL, loop */
        jit_patch_at(jit_bnei(JIT_R0, 0), loop);
 
        /* Slow path: call C function get_next_block_func() */
-       jit_patch(to_c);
 
        if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) {
                /* We may call the interpreter - update state->current_cycle */
@@ -946,15 +998,14 @@ static struct block * generate_dispatcher(struct lightrec_state *state)
        jit_epilog();
 
        block->_jit = _jit;
-       block->function = jit_emit();
        block->opcode_list = NULL;
        block->flags = 0;
        block->nb_ops = 0;
 
-       jit_get_code(&code_size);
-       lightrec_register(MEM_FOR_CODE, code_size);
-
-       block->code_size = code_size;
+       block->function = lightrec_emit_code(state, _jit,
+                                            &block->code_size);
+       if (!block->function)
+               goto err_free_block;
 
        state->eob_wrapper_func = jit_address(addr2);
        if (OPT_REPLACE_MEMSET)
@@ -984,7 +1035,7 @@ union code lightrec_read_opcode(struct lightrec_state *state, u32 pc)
        lightrec_get_map(state, &host, kunseg(pc));
 
        const u32 *code = (u32 *)host;
-       return (union code) *code;
+       return (union code) LE32TOH(*code);
 }
 
 unsigned int lightrec_cycles_of_opcode(union code code)
@@ -1101,7 +1152,7 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state,
                block->flags |= BLOCK_FULLY_TAGGED;
 
        if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET))
-               state->code_lut[lut_offset(pc)] = state->memset_func;
+               lut_write(state, lut_offset(pc), state->memset_func);
 
        block->hash = lightrec_calculate_block_hash(block);
 
@@ -1160,6 +1211,19 @@ static void lightrec_reap_jit(struct lightrec_state *state, void *data)
        _jit_destroy_state(data);
 }
 
+static void lightrec_free_function(struct lightrec_state *state, void *fn)
+{
+       if (ENABLE_CODE_BUFFER && state->tlsf) {
+               pr_debug("Freeing code block at 0x%" PRIxPTR "\n", (uintptr_t) fn);
+               tlsf_free(state->tlsf, fn);
+       }
+}
+
+static void lightrec_reap_function(struct lightrec_state *state, void *data)
+{
+       lightrec_free_function(state, data);
+}
+
 int lightrec_compile_block(struct lightrec_cstate *cstate,
                           struct block *block)
 {
@@ -1171,7 +1235,7 @@ int lightrec_compile_block(struct lightrec_cstate *cstate,
        jit_state_t *_jit, *oldjit;
        jit_node_t *start_of_block;
        bool skip_next = false;
-       jit_word_t code_size;
+       void *old_fn;
        unsigned int i, j;
        u32 offset;
 
@@ -1184,6 +1248,7 @@ int lightrec_compile_block(struct lightrec_cstate *cstate,
                return -ENOMEM;
 
        oldjit = block->_jit;
+       old_fn = block->function;
        block->_jit = _jit;
 
        lightrec_regcache_reset(cstate->reg_cache);
@@ -1261,11 +1326,16 @@ int lightrec_compile_block(struct lightrec_cstate *cstate,
        jit_ret();
        jit_epilog();
 
-       block->function = jit_emit();
+       block->function = lightrec_emit_code(state, _jit,
+                                            &block->code_size);
+       if (!block->function) {
+               pr_err("Unable to compile block!\n");
+       }
+
        block->flags &= ~BLOCK_SHOULD_RECOMPILE;
 
        /* Add compiled function to the LUT */
-       state->code_lut[lut_offset(block->pc)] = block->function;
+       lut_write(state, lut_offset(block->pc), block->function);
 
        if (ENABLE_THREADED_COMPILER) {
                /* Since we might try to reap the same block multiple times,
@@ -1302,7 +1372,7 @@ int lightrec_compile_block(struct lightrec_cstate *cstate,
                 * be compiled. We can override the LUT entry with our new
                 * block's entry point. */
                offset = lut_offset(block->pc) + target->offset;
-               state->code_lut[offset] = jit_address(target->label);
+               lut_write(state, offset, jit_address(target->label));
 
                if (block2) {
                        pr_debug("Reap block 0x%08x as it's covered by block "
@@ -1323,11 +1393,6 @@ int lightrec_compile_block(struct lightrec_cstate *cstate,
        if (ENABLE_THREADED_COMPILER)
                lightrec_reaper_continue(state->reaper);
 
-       jit_get_code(&code_size);
-       lightrec_register(MEM_FOR_CODE, code_size);
-
-       block->code_size = code_size;
-
        if (ENABLE_DISASSEMBLER) {
                pr_debug("Compiling block at PC: 0x%08x\n", block->pc);
                jit_disassemble();
@@ -1350,11 +1415,15 @@ int lightrec_compile_block(struct lightrec_cstate *cstate,
                pr_debug("Block 0x%08x recompiled, reaping old jit context.\n",
                         block->pc);
 
-               if (ENABLE_THREADED_COMPILER)
+               if (ENABLE_THREADED_COMPILER) {
                        lightrec_reaper_add(state->reaper,
                                            lightrec_reap_jit, oldjit);
-               else
+                       lightrec_reaper_add(state->reaper,
+                                           lightrec_reap_function, old_fn);
+               } else {
                        _jit_destroy_state(oldjit);
+                       lightrec_free_function(state, old_fn);
+               }
        }
 
        return 0;
@@ -1435,6 +1504,7 @@ void lightrec_free_block(struct lightrec_state *state, struct block *block)
                lightrec_free_opcode_list(state, block);
        if (block->_jit)
                _jit_destroy_state(block->_jit);
+       lightrec_free_function(state, block->function);
        lightrec_unregister(MEM_FOR_CODE, block->code_size);
        lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
 }
@@ -1469,7 +1539,12 @@ struct lightrec_state * lightrec_init(char *argv0,
                                      size_t nb,
                                      const struct lightrec_ops *ops)
 {
+       const struct lightrec_mem_map *codebuf_map;
        struct lightrec_state *state;
+       uintptr_t addr;
+       void *tlsf = NULL;
+       bool with_32bit_lut = false;
+       size_t lut_size;
 
        /* Sanity-check ops */
        if (!ops || !ops->cop2_op || !ops->enable_ram) {
@@ -1477,15 +1552,37 @@ struct lightrec_state * lightrec_init(char *argv0,
                return NULL;
        }
 
+       if (ENABLE_CODE_BUFFER && nb > PSX_MAP_CODE_BUFFER) {
+               codebuf_map = &map[PSX_MAP_CODE_BUFFER];
+
+               tlsf = tlsf_create_with_pool(codebuf_map->address,
+                                            codebuf_map->length);
+               if (!tlsf) {
+                       pr_err("Unable to initialize code buffer\n");
+                       return NULL;
+               }
+
+               if (__WORDSIZE == 64) {
+                       addr = (uintptr_t) codebuf_map->address + codebuf_map->length - 1;
+                       with_32bit_lut = addr == (u32) addr;
+               }
+       }
+
+       if (with_32bit_lut)
+               lut_size = CODE_LUT_SIZE * 4;
+       else
+               lut_size = CODE_LUT_SIZE * sizeof(void *);
+
        init_jit(argv0);
 
-       state = calloc(1, sizeof(*state) +
-                      sizeof(*state->code_lut) * CODE_LUT_SIZE);
+       state = calloc(1, sizeof(*state) + lut_size);
        if (!state)
                goto err_finish_jit;
 
-       lightrec_register(MEM_FOR_LIGHTREC, sizeof(*state) +
-                         sizeof(*state->code_lut) * CODE_LUT_SIZE);
+       lightrec_register(MEM_FOR_LIGHTREC, sizeof(*state) + lut_size);
+
+       state->tlsf = tlsf;
+       state->with_32bit_lut = with_32bit_lut;
 
 #if ENABLE_TINYMM
        state->tinymm = tinymm_init(malloc, free, 4096);
@@ -1554,6 +1651,9 @@ struct lightrec_state * lightrec_init(char *argv0,
                pr_info("Memory map is sub-par. Emitted code will be slow.\n");
        }
 
+       if (state->with_32bit_lut)
+               pr_info("Using 32-bit LUT\n");
+
        return state;
 
 err_free_dispatcher:
@@ -1574,10 +1674,12 @@ err_free_tinymm:
 err_free_state:
 #endif
        lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) +
-                           sizeof(*state->code_lut) * CODE_LUT_SIZE);
+                           lut_elm_size(state) * CODE_LUT_SIZE);
        free(state);
 err_finish_jit:
        finish_jit();
+       if (ENABLE_CODE_BUFFER && tlsf)
+               tlsf_destroy(tlsf);
        return NULL;
 }
 
@@ -1598,12 +1700,14 @@ void lightrec_destroy(struct lightrec_state *state)
        lightrec_free_block(state, state->dispatcher);
        lightrec_free_block(state, state->c_wrapper_block);
        finish_jit();
+       if (ENABLE_CODE_BUFFER && state->tlsf)
+               tlsf_destroy(state->tlsf);
 
 #if ENABLE_TINYMM
        tinymm_shutdown(state->tinymm);
 #endif
        lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) +
-                           sizeof(*state->code_lut) * CODE_LUT_SIZE);
+                           lut_elm_size(state) * CODE_LUT_SIZE);
        free(state);
 }
 
@@ -1625,7 +1729,7 @@ void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len)
 
 void lightrec_invalidate_all(struct lightrec_state *state)
 {
-       memset(state->code_lut, 0, sizeof(*state->code_lut) * CODE_LUT_SIZE);
+       memset(state->code_lut, 0, lut_elm_size(state) * CODE_LUT_SIZE);
 }
 
 void lightrec_set_invalidate_mode(struct lightrec_state *state, bool dma_only)
index e418c70..4f51e1f 100644 (file)
@@ -58,6 +58,9 @@ enum psx_map {
        PSX_MAP_MIRROR1,
        PSX_MAP_MIRROR2,
        PSX_MAP_MIRROR3,
+       PSX_MAP_CODE_BUFFER,
+
+       PSX_MAP_UNKNOWN,
 };
 
 struct lightrec_mem_map_ops {
index 7304abc..562f7e0 100644 (file)
@@ -817,6 +817,66 @@ static void lightrec_optimize_sll_sra(struct opcode *list, unsigned int offset)
        to_nop->opcode = 0;
 }
 
+static void lightrec_remove_useless_lui(struct block *block, unsigned int offset,
+                                       u32 known, u32 *values)
+{
+       struct opcode *list = block->opcode_list,
+                     *op = &block->opcode_list[offset];
+       int reader;
+
+       if (!(op->flags & LIGHTREC_SYNC) && (known & BIT(op->i.rt)) &&
+           values[op->i.rt] == op->i.imm << 16) {
+               pr_debug("Converting duplicated LUI to NOP\n");
+               op->opcode = 0x0;
+               return;
+       }
+
+       if (op->i.imm != 0 || op->i.rt == 0)
+               return;
+
+       reader = find_next_reader(list, offset + 1, op->i.rt);
+       if (reader <= 0)
+               return;
+
+       if (opcode_writes_register(list[reader].c, op->i.rt) ||
+           reg_is_dead(list, reader, op->i.rt)) {
+               pr_debug("Removing useless LUI 0x0\n");
+
+               if (list[reader].i.rs == op->i.rt)
+                       list[reader].i.rs = 0;
+               if (list[reader].i.op == OP_SPECIAL &&
+                   list[reader].i.rt == op->i.rt)
+                       list[reader].i.rt = 0;
+               op->opcode = 0x0;
+       }
+}
+
+static void lightrec_modify_lui(struct block *block, unsigned int offset)
+{
+       union code c, *lui = &block->opcode_list[offset].c;
+       bool stop = false, stop_next = false;
+       unsigned int i;
+
+       for (i = offset + 1; !stop && i < block->nb_ops; i++) {
+               c = block->opcode_list[i].c;
+               stop = stop_next;
+
+               if ((opcode_is_store(c) && c.i.rt == lui->i.rt)
+                   || (!opcode_is_load(c) && opcode_reads_register(c, lui->i.rt)))
+                       break;
+
+               if (opcode_writes_register(c, lui->i.rt)) {
+                       pr_debug("Convert LUI at offset 0x%x to kuseg\n",
+                                i - 1 << 2);
+                       lui->i.imm = kunseg(lui->i.imm << 16) >> 16;
+                       break;
+               }
+
+               if (has_delay_slot(c))
+                       stop_next = true;
+       }
+}
+
 static int lightrec_transform_ops(struct lightrec_state *state, struct block *block)
 {
        struct opcode *list = block->opcode_list;
@@ -824,7 +884,6 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl
        u32 known = BIT(0);
        u32 values[32] = { 0 };
        unsigned int i;
-       int reader;
 
        for (i = 0; i < block->nb_ops; i++) {
                prev = op;
@@ -863,30 +922,8 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl
                        break;
 
                case OP_LUI:
-                       if (!(op->flags & LIGHTREC_SYNC) &&
-                           (known & BIT(op->i.rt)) &&
-                           values[op->i.rt] == op->i.imm << 16) {
-                               pr_debug("Converting duplicated LUI to NOP\n");
-                               op->opcode = 0x0;
-                       }
-
-                       if (op->i.imm != 0 || op->i.rt == 0)
-                               break;
-
-                       reader = find_next_reader(list, i + 1, op->i.rt);
-                       if (reader > 0 &&
-                           (opcode_writes_register(list[reader].c, op->i.rt) ||
-                            reg_is_dead(list, reader, op->i.rt))) {
-
-                               pr_debug("Removing useless LUI 0x0\n");
-
-                               if (list[reader].i.rs == op->i.rt)
-                                       list[reader].i.rs = 0;
-                               if (list[reader].i.op == OP_SPECIAL &&
-                                   list[reader].i.rt == op->i.rt)
-                                       list[reader].i.rt = 0;
-                               op->opcode = 0x0;
-                       }
+                       lightrec_modify_lui(block, i);
+                       lightrec_remove_useless_lui(block, i, known, values);
                        break;
 
                /* Transform ORI/ADDI/ADDIU with imm #0 or ORR/ADD/ADDU/SUB/SUBU
@@ -1233,15 +1270,14 @@ static int lightrec_early_unload(struct lightrec_state *state, struct block *blo
 
 static int lightrec_flag_io(struct lightrec_state *state, struct block *block)
 {
-       const struct lightrec_mem_map *map;
-       struct opcode *prev2, *prev = NULL, *list = NULL;
+       struct opcode *prev = NULL, *list = NULL;
+       enum psx_map psx_map;
        u32 known = BIT(0);
        u32 values[32] = { 0 };
        unsigned int i;
-       u32 val;
+       u32 val, kunseg_val;
 
        for (i = 0; i < block->nb_ops; i++) {
-               prev2 = prev;
                prev = list;
                list = &block->opcode_list[i];
 
@@ -1289,42 +1325,38 @@ static int lightrec_flag_io(struct lightrec_state *state, struct block *block)
                case OP_LWR:
                case OP_LWC2:
                        if (OPT_FLAG_IO && (known & BIT(list->i.rs))) {
-                               if (prev && prev->i.op == OP_LUI &&
-                                   !(prev2 && has_delay_slot(prev2->c)) &&
-                                   prev->i.rt == list->i.rs &&
-                                   list->i.rt == list->i.rs &&
-                                   prev->i.imm & 0x8000) {
-                                       pr_debug("Convert LUI at offset 0x%x to kuseg\n",
-                                                i - 1 << 2);
-
-                                       val = kunseg(prev->i.imm << 16);
-                                       prev->i.imm = val >> 16;
-                                       values[list->i.rs] = val;
-                               }
-
                                val = values[list->i.rs] + (s16) list->i.imm;
-                               map = lightrec_get_map(state, NULL, kunseg(val));
-
-                               if (!map || map->ops ||
-                                   map == &state->maps[PSX_MAP_PARALLEL_PORT]) {
-                                       pr_debug("Flagging opcode %u as I/O access\n",
-                                                i);
-                                       list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW);
-                                       break;
-                               }
-
-                               if (val - map->pc < map->length)
-                                       list->flags |= LIGHTREC_NO_MASK;
-
-                               if (map == &state->maps[PSX_MAP_KERNEL_USER_RAM]) {
+                               kunseg_val = kunseg(val);
+                               psx_map = lightrec_get_map_idx(state, kunseg_val);
+
+                               switch (psx_map) {
+                               case PSX_MAP_KERNEL_USER_RAM:
+                                       if (val == kunseg_val)
+                                               list->flags |= LIGHTREC_NO_MASK;
+                                       /* fall-through */
+                               case PSX_MAP_MIRROR1:
+                               case PSX_MAP_MIRROR2:
+                               case PSX_MAP_MIRROR3:
                                        pr_debug("Flaging opcode %u as RAM access\n", i);
                                        list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_RAM);
-                               } else if (map == &state->maps[PSX_MAP_BIOS]) {
+                                       break;
+                               case PSX_MAP_BIOS:
                                        pr_debug("Flaging opcode %u as BIOS access\n", i);
                                        list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_BIOS);
-                               } else if (map == &state->maps[PSX_MAP_SCRATCH_PAD]) {
+                                       break;
+                               case PSX_MAP_SCRATCH_PAD:
                                        pr_debug("Flaging opcode %u as scratchpad access\n", i);
                                        list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_SCRATCH);
+
+                                       /* Consider that we're never going to run code from
+                                        * the scratchpad. */
+                                       list->flags |= LIGHTREC_NO_INVALIDATE;
+                                       break;
+                               default:
+                                       pr_debug("Flagging opcode %u as I/O access\n",
+                                                i);
+                                       list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW);
+                                       break;
                                }
                        }
                default: /* fall-through */
diff --git a/deps/lightrec/tlsf/.gitrepo b/deps/lightrec/tlsf/.gitrepo
new file mode 100644 (file)
index 0000000..692e542
--- /dev/null
@@ -0,0 +1,12 @@
+; DO NOT EDIT (unless you know what you are doing)
+;
+; This subdirectory is a git "subrepo", and this file is maintained by the
+; git-subrepo command. See https://github.com/git-commands/git-subrepo#readme
+;
+[subrepo]
+       remote = https://github.com/mattconte/tlsf
+       branch = master
+       commit = deff9ab509341f264addbd3c8ada533678591905
+       parent = 1dc0344052e7379e16753e4a285c30fd158bf78d
+       method = merge
+       cmdver = 0.4.3
diff --git a/deps/lightrec/tlsf/README.md b/deps/lightrec/tlsf/README.md
new file mode 100644 (file)
index 0000000..982919f
--- /dev/null
@@ -0,0 +1,92 @@
+# tlsf
+Two-Level Segregated Fit memory allocator implementation.
+Written by Matthew Conte (matt@baisoku.org).
+Released under the BSD license.
+
+Features
+--------
+  * O(1) cost for malloc, free, realloc, memalign
+  * Extremely low overhead per allocation (4 bytes)
+  * Low overhead per TLSF management of pools (~3kB)
+  * Low fragmentation
+  * Compiles to only a few kB of code and data
+  * Support for adding and removing memory pool regions on the fly
+
+Caveats
+-------
+  * Currently, assumes architecture can make 4-byte aligned accesses
+  * Not designed to be thread safe; the user must provide this
+
+Notes
+-----
+This code was based on the TLSF 1.4 spec and documentation found at:
+
+       http://www.gii.upv.es/tlsf/main/docs
+
+It also leverages the TLSF 2.0 improvement to shrink the per-block overhead from 8 to 4 bytes.
+
+History
+-------
+2016/04/10 - v3.1
+  * Code moved to github
+  * tlsfbits.h rolled into tlsf.c
+  * License changed to BSD
+
+2014/02/08 - v3.0
+  * This version is based on improvements from 3DInteractive GmbH
+  * Interface changed to allow more than one memory pool
+  * Separated pool handling from control structure (adding, removing, debugging)
+  * Control structure and pools can still be constructed in the same memory block
+  * Memory blocks for control structure and pools are checked for alignment
+  * Added functions to retrieve control structure size, alignment size, min and max block size, overhead of pool structure, and overhead of a single allocation
+  * Minimal Pool size is tlsf_block_size_min() + tlsf_pool_overhead()
+  * Pool must be empty when it is removed, in order to allow O(1) removal
+
+2011/10/20 - v2.0
+  * 64-bit support
+  * More compiler intrinsics for ffs/fls
+  * ffs/fls verification during TLSF creation in debug builds
+
+2008/04/04 - v1.9
+  * Add tlsf_heap_check, a heap integrity check
+  * Support a predefined tlsf_assert macro
+  * Fix realloc case where block should shrink; if adjacent block is in use, execution would go down the slow path
+
+2007/02/08 - v1.8
+  * Fix for unnecessary reallocation in tlsf_realloc
+
+2007/02/03 - v1.7
+  * tlsf_heap_walk takes a callback
+  * tlsf_realloc now returns NULL on failure
+  * tlsf_memalign optimization for 4-byte alignment
+  * Usage of size_t where appropriate
+
+2006/11/21 - v1.6
+  * ffs/fls broken out into tlsfbits.h
+  * tlsf_overhead queries per-pool overhead
+
+2006/11/07 - v1.5
+  * Smart realloc implementation
+  * Smart memalign implementation
+
+2006/10/11 - v1.4
+  * Add some ffs/fls implementations
+  * Minor code footprint reduction
+
+2006/09/14 - v1.3
+  * Profiling indicates heavy use of blocks of size 1-128, so implement small block handling
+  * Reduce pool overhead by about 1kb
+  * Reduce minimum block size from 32 to 12 bytes
+  * Realloc bug fix
+
+2006/09/09 - v1.2
+  * Add tlsf_block_size
+  * Static assertion mechanism for invariants
+  * Minor bugfixes 
+
+2006/09/01 - v1.1
+  * Add tlsf_realloc
+  * Add tlsf_walk_heap
+
+2006/08/25 - v1.0
+  * First release
diff --git a/deps/lightrec/tlsf/tlsf.c b/deps/lightrec/tlsf/tlsf.c
new file mode 100644 (file)
index 0000000..af57573
--- /dev/null
@@ -0,0 +1,1264 @@
+#include <assert.h>\r
+#include <limits.h>\r
+#include <stddef.h>\r
+#include <stdio.h>\r
+#include <stdlib.h>\r
+#include <string.h>\r
+\r
+#include "tlsf.h"\r
+\r
+#if defined(__cplusplus)\r
+#define tlsf_decl inline\r
+#else\r
+#define tlsf_decl static\r
+#endif\r
+\r
+/*\r
+** Architecture-specific bit manipulation routines.\r
+**\r
+** TLSF achieves O(1) cost for malloc and free operations by limiting\r
+** the search for a free block to a free list of guaranteed size\r
+** adequate to fulfill the request, combined with efficient free list\r
+** queries using bitmasks and architecture-specific bit-manipulation\r
+** routines.\r
+**\r
+** Most modern processors provide instructions to count leading zeroes\r
+** in a word, find the lowest and highest set bit, etc. These\r
+** specific implementations will be used when available, falling back\r
+** to a reasonably efficient generic implementation.\r
+**\r
+** NOTE: TLSF spec relies on ffs/fls returning value 0..31.\r
+** ffs/fls return 1-32 by default, returning 0 for error.\r
+*/\r
+\r
+/*\r
+** Detect whether or not we are building for a 32- or 64-bit (LP/LLP)\r
+** architecture. There is no reliable portable method at compile-time.\r
+*/\r
+#if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) \\r
+       || defined (_WIN64) || defined (__LP64__) || defined (__LLP64__)\r
+#define TLSF_64BIT\r
+#endif\r
+\r
+/*\r
+** gcc 3.4 and above have builtin support, specialized for architecture.\r
+** Some compilers masquerade as gcc; patchlevel test filters them out.\r
+*/\r
+#if defined (__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) \\r
+       && defined (__GNUC_PATCHLEVEL__)\r
+\r
+#if defined (__SNC__)\r
+/* SNC for Playstation 3. */\r
+\r
+tlsf_decl int tlsf_ffs(unsigned int word)\r
+{\r
+       const unsigned int reverse = word & (~word + 1);\r
+       const int bit = 32 - __builtin_clz(reverse);\r
+       return bit - 1;\r
+}\r
+\r
+#else\r
+\r
+tlsf_decl int tlsf_ffs(unsigned int word)\r
+{\r
+       return __builtin_ffs(word) - 1;\r
+}\r
+\r
+#endif\r
+\r
+tlsf_decl int tlsf_fls(unsigned int word)\r
+{\r
+       const int bit = word ? 32 - __builtin_clz(word) : 0;\r
+       return bit - 1;\r
+}\r
+\r
+#elif defined (_MSC_VER) && (_MSC_VER >= 1400) && (defined (_M_IX86) || defined (_M_X64))\r
+/* Microsoft Visual C++ support on x86/X64 architectures. */\r
+\r
+#include <intrin.h>\r
+\r
+#pragma intrinsic(_BitScanReverse)\r
+#pragma intrinsic(_BitScanForward)\r
+\r
+tlsf_decl int tlsf_fls(unsigned int word)\r
+{\r
+       unsigned long index;\r
+       return _BitScanReverse(&index, word) ? index : -1;\r
+}\r
+\r
+tlsf_decl int tlsf_ffs(unsigned int word)\r
+{\r
+       unsigned long index;\r
+       return _BitScanForward(&index, word) ? index : -1;\r
+}\r
+\r
+#elif defined (_MSC_VER) && defined (_M_PPC)\r
+/* Microsoft Visual C++ support on PowerPC architectures. */\r
+\r
+#include <ppcintrinsics.h>\r
+\r
+tlsf_decl int tlsf_fls(unsigned int word)\r
+{\r
+       const int bit = 32 - _CountLeadingZeros(word);\r
+       return bit - 1;\r
+}\r
+\r
+tlsf_decl int tlsf_ffs(unsigned int word)\r
+{\r
+       const unsigned int reverse = word & (~word + 1);\r
+       const int bit = 32 - _CountLeadingZeros(reverse);\r
+       return bit - 1;\r
+}\r
+\r
+#elif defined (__ARMCC_VERSION)\r
+/* RealView Compilation Tools for ARM */\r
+\r
+tlsf_decl int tlsf_ffs(unsigned int word)\r
+{\r
+       const unsigned int reverse = word & (~word + 1);\r
+       const int bit = 32 - __clz(reverse);\r
+       return bit - 1;\r
+}\r
+\r
+tlsf_decl int tlsf_fls(unsigned int word)\r
+{\r
+       const int bit = word ? 32 - __clz(word) : 0;\r
+       return bit - 1;\r
+}\r
+\r
+#elif defined (__ghs__)\r
+/* Green Hills support for PowerPC */\r
+\r
+#include <ppc_ghs.h>\r
+\r
+tlsf_decl int tlsf_ffs(unsigned int word)\r
+{\r
+       const unsigned int reverse = word & (~word + 1);\r
+       const int bit = 32 - __CLZ32(reverse);\r
+       return bit - 1;\r
+}\r
+\r
+tlsf_decl int tlsf_fls(unsigned int word)\r
+{\r
+       const int bit = word ? 32 - __CLZ32(word) : 0;\r
+       return bit - 1;\r
+}\r
+\r
+#else\r
+/* Fall back to generic implementation. */\r
+\r
+tlsf_decl int tlsf_fls_generic(unsigned int word)\r
+{\r
+       int bit = 32;\r
+\r
+       if (!word) bit -= 1;\r
+       if (!(word & 0xffff0000)) { word <<= 16; bit -= 16; }\r
+       if (!(word & 0xff000000)) { word <<= 8; bit -= 8; }\r
+       if (!(word & 0xf0000000)) { word <<= 4; bit -= 4; }\r
+       if (!(word & 0xc0000000)) { word <<= 2; bit -= 2; }\r
+       if (!(word & 0x80000000)) { word <<= 1; bit -= 1; }\r
+\r
+       return bit;\r
+}\r
+\r
+/* Implement ffs in terms of fls. */\r
+tlsf_decl int tlsf_ffs(unsigned int word)\r
+{\r
+       return tlsf_fls_generic(word & (~word + 1)) - 1;\r
+}\r
+\r
+tlsf_decl int tlsf_fls(unsigned int word)\r
+{\r
+       return tlsf_fls_generic(word) - 1;\r
+}\r
+\r
+#endif\r
+\r
+/* Possibly 64-bit version of tlsf_fls. */\r
+#if defined (TLSF_64BIT)\r
+tlsf_decl int tlsf_fls_sizet(size_t size)\r
+{\r
+       int high = (int)(size >> 32);\r
+       int bits = 0;\r
+       if (high)\r
+       {\r
+               bits = 32 + tlsf_fls(high);\r
+       }\r
+       else\r
+       {\r
+               bits = tlsf_fls((int)size & 0xffffffff);\r
+\r
+       }\r
+       return bits;\r
+}\r
+#else\r
+#define tlsf_fls_sizet tlsf_fls\r
+#endif\r
+\r
+#undef tlsf_decl\r
+\r
+/*\r
+** Constants.\r
+*/\r
+\r
+/* Public constants: may be modified. */\r
+enum tlsf_public\r
+{\r
+       /* log2 of number of linear subdivisions of block sizes. Larger\r
+       ** values require more memory in the control structure. Values of\r
+       ** 4 or 5 are typical.\r
+       */\r
+       SL_INDEX_COUNT_LOG2 = 5,\r
+};\r
+\r
+/* Private constants: do not modify. */\r
+enum tlsf_private\r
+{\r
+#if defined (TLSF_64BIT)\r
+       /* All allocation sizes and addresses are aligned to 8 bytes. */\r
+       ALIGN_SIZE_LOG2 = 3,\r
+#else\r
+       /* All allocation sizes and addresses are aligned to 4 bytes. */\r
+       ALIGN_SIZE_LOG2 = 2,\r
+#endif\r
+       ALIGN_SIZE = (1 << ALIGN_SIZE_LOG2),\r
+\r
+       /*\r
+       ** We support allocations of sizes up to (1 << FL_INDEX_MAX) bits.\r
+       ** However, because we linearly subdivide the second-level lists, and\r
+       ** our minimum size granularity is 4 bytes, it doesn't make sense to\r
+       ** create first-level lists for sizes smaller than SL_INDEX_COUNT * 4,\r
+       ** or (1 << (SL_INDEX_COUNT_LOG2 + 2)) bytes, as there we will be\r
+       ** trying to split size ranges into more slots than we have available.\r
+       ** Instead, we calculate the minimum threshold size, and place all\r
+       ** blocks below that size into the 0th first-level list.\r
+       */\r
+\r
+#if defined (TLSF_64BIT)\r
+       /*\r
+       ** TODO: We can increase this to support larger sizes, at the expense\r
+       ** of more overhead in the TLSF structure.\r
+       */\r
+       FL_INDEX_MAX = 32,\r
+#else\r
+       FL_INDEX_MAX = 30,\r
+#endif\r
+       SL_INDEX_COUNT = (1 << SL_INDEX_COUNT_LOG2),\r
+       FL_INDEX_SHIFT = (SL_INDEX_COUNT_LOG2 + ALIGN_SIZE_LOG2),\r
+       FL_INDEX_COUNT = (FL_INDEX_MAX - FL_INDEX_SHIFT + 1),\r
+\r
+       SMALL_BLOCK_SIZE = (1 << FL_INDEX_SHIFT),\r
+};\r
+\r
+/*\r
+** Cast and min/max macros.\r
+*/\r
+\r
+#define tlsf_cast(t, exp)      ((t) (exp))\r
+#define tlsf_min(a, b)         ((a) < (b) ? (a) : (b))\r
+#define tlsf_max(a, b)         ((a) > (b) ? (a) : (b))\r
+\r
+/*\r
+** Set assert macro, if it has not been provided by the user.\r
+*/\r
+#if !defined (tlsf_assert)\r
+#define tlsf_assert assert\r
+#endif\r
+\r
+/*\r
+** Static assertion mechanism.\r
+*/\r
+\r
+#define _tlsf_glue2(x, y) x ## y\r
+#define _tlsf_glue(x, y) _tlsf_glue2(x, y)\r
+#define tlsf_static_assert(exp) \\r
+       typedef char _tlsf_glue(static_assert, __LINE__) [(exp) ? 1 : -1]\r
+\r
+/* This code has been tested on 32- and 64-bit (LP/LLP) architectures. */\r
+tlsf_static_assert(sizeof(int) * CHAR_BIT == 32);\r
+tlsf_static_assert(sizeof(size_t) * CHAR_BIT >= 32);\r
+tlsf_static_assert(sizeof(size_t) * CHAR_BIT <= 64);\r
+\r
+/* SL_INDEX_COUNT must be <= number of bits in sl_bitmap's storage type. */\r
+tlsf_static_assert(sizeof(unsigned int) * CHAR_BIT >= SL_INDEX_COUNT);\r
+\r
+/* Ensure we've properly tuned our sizes. */\r
+tlsf_static_assert(ALIGN_SIZE == SMALL_BLOCK_SIZE / SL_INDEX_COUNT);\r
+\r
+/*\r
+** Data structures and associated constants.\r
+*/\r
+\r
+/*\r
+** Block header structure.\r
+**\r
+** There are several implementation subtleties involved:\r
+** - The prev_phys_block field is only valid if the previous block is free.\r
+** - The prev_phys_block field is actually stored at the end of the\r
+**   previous block. It appears at the beginning of this structure only to\r
+**   simplify the implementation.\r
+** - The next_free / prev_free fields are only valid if the block is free.\r
+*/\r
+typedef struct block_header_t\r
+{\r
+       /* Points to the previous physical block. */\r
+       struct block_header_t* prev_phys_block;\r
+\r
+       /* The size of this block, excluding the block header. */\r
+       size_t size;\r
+\r
+       /* Next and previous free blocks. */\r
+       struct block_header_t* next_free;\r
+       struct block_header_t* prev_free;\r
+} block_header_t;\r
+\r
+/*\r
+** Since block sizes are always at least a multiple of 4, the two least\r
+** significant bits of the size field are used to store the block status:\r
+** - bit 0: whether block is busy or free\r
+** - bit 1: whether previous block is busy or free\r
+*/\r
+static const size_t block_header_free_bit = 1 << 0;\r
+static const size_t block_header_prev_free_bit = 1 << 1;\r
+\r
+/*\r
+** The size of the block header exposed to used blocks is the size field.\r
+** The prev_phys_block field is stored *inside* the previous free block.\r
+*/\r
+static const size_t block_header_overhead = sizeof(size_t);\r
+\r
+/* User data starts directly after the size field in a used block. */\r
+static const size_t block_start_offset =\r
+       offsetof(block_header_t, size) + sizeof(size_t);\r
+\r
+/*\r
+** A free block must be large enough to store its header minus the size of\r
+** the prev_phys_block field, and no larger than the number of addressable\r
+** bits for FL_INDEX.\r
+*/\r
+static const size_t block_size_min = \r
+       sizeof(block_header_t) - sizeof(block_header_t*);\r
+static const size_t block_size_max = tlsf_cast(size_t, 1) << FL_INDEX_MAX;\r
+\r
+\r
+/* The TLSF control structure. */\r
+typedef struct control_t\r
+{\r
+       /* Empty lists point at this block to indicate they are free. */\r
+       block_header_t block_null;\r
+\r
+       /* Bitmaps for free lists. */\r
+       unsigned int fl_bitmap;\r
+       unsigned int sl_bitmap[FL_INDEX_COUNT];\r
+\r
+       /* Head of free lists. */\r
+       block_header_t* blocks[FL_INDEX_COUNT][SL_INDEX_COUNT];\r
+} control_t;\r
+\r
+/* A type used for casting when doing pointer arithmetic. */\r
+typedef ptrdiff_t tlsfptr_t;\r
+\r
+/*\r
+** block_header_t member functions.\r
+*/\r
+\r
+static size_t block_size(const block_header_t* block)\r
+{\r
+       return block->size & ~(block_header_free_bit | block_header_prev_free_bit);\r
+}\r
+\r
+static void block_set_size(block_header_t* block, size_t size)\r
+{\r
+       const size_t oldsize = block->size;\r
+       block->size = size | (oldsize & (block_header_free_bit | block_header_prev_free_bit));\r
+}\r
+\r
+static int block_is_last(const block_header_t* block)\r
+{\r
+       return block_size(block) == 0;\r
+}\r
+\r
+static int block_is_free(const block_header_t* block)\r
+{\r
+       return tlsf_cast(int, block->size & block_header_free_bit);\r
+}\r
+\r
+static void block_set_free(block_header_t* block)\r
+{\r
+       block->size |= block_header_free_bit;\r
+}\r
+\r
+static void block_set_used(block_header_t* block)\r
+{\r
+       block->size &= ~block_header_free_bit;\r
+}\r
+\r
+static int block_is_prev_free(const block_header_t* block)\r
+{\r
+       return tlsf_cast(int, block->size & block_header_prev_free_bit);\r
+}\r
+\r
+static void block_set_prev_free(block_header_t* block)\r
+{\r
+       block->size |= block_header_prev_free_bit;\r
+}\r
+\r
+static void block_set_prev_used(block_header_t* block)\r
+{\r
+       block->size &= ~block_header_prev_free_bit;\r
+}\r
+\r
+static block_header_t* block_from_ptr(const void* ptr)\r
+{\r
+       return tlsf_cast(block_header_t*,\r
+               tlsf_cast(unsigned char*, ptr) - block_start_offset);\r
+}\r
+\r
+static void* block_to_ptr(const block_header_t* block)\r
+{\r
+       return tlsf_cast(void*,\r
+               tlsf_cast(unsigned char*, block) + block_start_offset);\r
+}\r
+\r
+/* Return location of next block after block of given size. */\r
+static block_header_t* offset_to_block(const void* ptr, size_t size)\r
+{\r
+       return tlsf_cast(block_header_t*, tlsf_cast(tlsfptr_t, ptr) + size);\r
+}\r
+\r
+/* Return location of previous block. */\r
+static block_header_t* block_prev(const block_header_t* block)\r
+{\r
+       tlsf_assert(block_is_prev_free(block) && "previous block must be free");\r
+       return block->prev_phys_block;\r
+}\r
+\r
+/* Return location of next existing block. */\r
+static block_header_t* block_next(const block_header_t* block)\r
+{\r
+       block_header_t* next = offset_to_block(block_to_ptr(block),\r
+               block_size(block) - block_header_overhead);\r
+       tlsf_assert(!block_is_last(block));\r
+       return next;\r
+}\r
+\r
+/* Link a new block with its physical neighbor, return the neighbor. */\r
+static block_header_t* block_link_next(block_header_t* block)\r
+{\r
+       block_header_t* next = block_next(block);\r
+       next->prev_phys_block = block;\r
+       return next;\r
+}\r
+\r
+static void block_mark_as_free(block_header_t* block)\r
+{\r
+       /* Link the block to the next block, first. */\r
+       block_header_t* next = block_link_next(block);\r
+       block_set_prev_free(next);\r
+       block_set_free(block);\r
+}\r
+\r
+static void block_mark_as_used(block_header_t* block)\r
+{\r
+       block_header_t* next = block_next(block);\r
+       block_set_prev_used(next);\r
+       block_set_used(block);\r
+}\r
+\r
+static size_t align_up(size_t x, size_t align)\r
+{\r
+       tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two");\r
+       return (x + (align - 1)) & ~(align - 1);\r
+}\r
+\r
+static size_t align_down(size_t x, size_t align)\r
+{\r
+       tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two");\r
+       return x - (x & (align - 1));\r
+}\r
+\r
+static void* align_ptr(const void* ptr, size_t align)\r
+{\r
+       const tlsfptr_t aligned =\r
+               (tlsf_cast(tlsfptr_t, ptr) + (align - 1)) & ~(align - 1);\r
+       tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two");\r
+       return tlsf_cast(void*, aligned);\r
+}\r
+\r
+/*\r
+** Adjust an allocation size to be aligned to word size, and no smaller\r
+** than internal minimum.\r
+*/\r
+static size_t adjust_request_size(size_t size, size_t align)\r
+{\r
+       size_t adjust = 0;\r
+       if (size)\r
+       {\r
+               const size_t aligned = align_up(size, align);\r
+\r
+               /* aligned sized must not exceed block_size_max or we'll go out of bounds on sl_bitmap */\r
+               if (aligned < block_size_max) \r
+               {\r
+                       adjust = tlsf_max(aligned, block_size_min);\r
+               }\r
+       }\r
+       return adjust;\r
+}\r
+\r
+/*\r
+** TLSF utility functions. In most cases, these are direct translations of\r
+** the documentation found in the white paper.\r
+*/\r
+\r
+static void mapping_insert(size_t size, int* fli, int* sli)\r
+{\r
+       int fl, sl;\r
+       if (size < SMALL_BLOCK_SIZE)\r
+       {\r
+               /* Store small blocks in first list. */\r
+               fl = 0;\r
+               sl = tlsf_cast(int, size) / (SMALL_BLOCK_SIZE / SL_INDEX_COUNT);\r
+       }\r
+       else\r
+       {\r
+               fl = tlsf_fls_sizet(size);\r
+               sl = tlsf_cast(int, size >> (fl - SL_INDEX_COUNT_LOG2)) ^ (1 << SL_INDEX_COUNT_LOG2);\r
+               fl -= (FL_INDEX_SHIFT - 1);\r
+       }\r
+       *fli = fl;\r
+       *sli = sl;\r
+}\r
+\r
+/* This version rounds up to the next block size (for allocations) */\r
+static void mapping_search(size_t size, int* fli, int* sli)\r
+{\r
+       if (size >= SMALL_BLOCK_SIZE)\r
+       {\r
+               const size_t round = (1 << (tlsf_fls_sizet(size) - SL_INDEX_COUNT_LOG2)) - 1;\r
+               size += round;\r
+       }\r
+       mapping_insert(size, fli, sli);\r
+}\r
+\r
+static block_header_t* search_suitable_block(control_t* control, int* fli, int* sli)\r
+{\r
+       int fl = *fli;\r
+       int sl = *sli;\r
+\r
+       /*\r
+       ** First, search for a block in the list associated with the given\r
+       ** fl/sl index.\r
+       */\r
+       unsigned int sl_map = control->sl_bitmap[fl] & (~0U << sl);\r
+       if (!sl_map)\r
+       {\r
+               /* No block exists. Search in the next largest first-level list. */\r
+               const unsigned int fl_map = control->fl_bitmap & (~0U << (fl + 1));\r
+               if (!fl_map)\r
+               {\r
+                       /* No free blocks available, memory has been exhausted. */\r
+                       return 0;\r
+               }\r
+\r
+               fl = tlsf_ffs(fl_map);\r
+               *fli = fl;\r
+               sl_map = control->sl_bitmap[fl];\r
+       }\r
+       tlsf_assert(sl_map && "internal error - second level bitmap is null");\r
+       sl = tlsf_ffs(sl_map);\r
+       *sli = sl;\r
+\r
+       /* Return the first block in the free list. */\r
+       return control->blocks[fl][sl];\r
+}\r
+\r
+/* Remove a free block from the free list.*/\r
+static void remove_free_block(control_t* control, block_header_t* block, int fl, int sl)\r
+{\r
+       block_header_t* prev = block->prev_free;\r
+       block_header_t* next = block->next_free;\r
+       tlsf_assert(prev && "prev_free field can not be null");\r
+       tlsf_assert(next && "next_free field can not be null");\r
+       next->prev_free = prev;\r
+       prev->next_free = next;\r
+\r
+       /* If this block is the head of the free list, set new head. */\r
+       if (control->blocks[fl][sl] == block)\r
+       {\r
+               control->blocks[fl][sl] = next;\r
+\r
+               /* If the new head is null, clear the bitmap. */\r
+               if (next == &control->block_null)\r
+               {\r
+                       control->sl_bitmap[fl] &= ~(1U << sl);\r
+\r
+                       /* If the second bitmap is now empty, clear the fl bitmap. */\r
+                       if (!control->sl_bitmap[fl])\r
+                       {\r
+                               control->fl_bitmap &= ~(1U << fl);\r
+                       }\r
+               }\r
+       }\r
+}\r
+\r
+/* Insert a free block into the free block list. */\r
+static void insert_free_block(control_t* control, block_header_t* block, int fl, int sl)\r
+{\r
+       block_header_t* current = control->blocks[fl][sl];\r
+       tlsf_assert(current && "free list cannot have a null entry");\r
+       tlsf_assert(block && "cannot insert a null entry into the free list");\r
+       block->next_free = current;\r
+       block->prev_free = &control->block_null;\r
+       current->prev_free = block;\r
+\r
+       tlsf_assert(block_to_ptr(block) == align_ptr(block_to_ptr(block), ALIGN_SIZE)\r
+               && "block not aligned properly");\r
+       /*\r
+       ** Insert the new block at the head of the list, and mark the first-\r
+       ** and second-level bitmaps appropriately.\r
+       */\r
+       control->blocks[fl][sl] = block;\r
+       control->fl_bitmap |= (1U << fl);\r
+       control->sl_bitmap[fl] |= (1U << sl);\r
+}\r
+\r
+/* Remove a given block from the free list. */\r
+static void block_remove(control_t* control, block_header_t* block)\r
+{\r
+       int fl, sl;\r
+       mapping_insert(block_size(block), &fl, &sl);\r
+       remove_free_block(control, block, fl, sl);\r
+}\r
+\r
+/* Insert a given block into the free list. */\r
+static void block_insert(control_t* control, block_header_t* block)\r
+{\r
+       int fl, sl;\r
+       mapping_insert(block_size(block), &fl, &sl);\r
+       insert_free_block(control, block, fl, sl);\r
+}\r
+\r
+static int block_can_split(block_header_t* block, size_t size)\r
+{\r
+       return block_size(block) >= sizeof(block_header_t) + size;\r
+}\r
+\r
+/* Split a block into two, the second of which is free. */\r
+static block_header_t* block_split(block_header_t* block, size_t size)\r
+{\r
+       /* Calculate the amount of space left in the remaining block. */\r
+       block_header_t* remaining =\r
+               offset_to_block(block_to_ptr(block), size - block_header_overhead);\r
+\r
+       const size_t remain_size = block_size(block) - (size + block_header_overhead);\r
+\r
+       tlsf_assert(block_to_ptr(remaining) == align_ptr(block_to_ptr(remaining), ALIGN_SIZE)\r
+               && "remaining block not aligned properly");\r
+\r
+       tlsf_assert(block_size(block) == remain_size + size + block_header_overhead);\r
+       block_set_size(remaining, remain_size);\r
+       tlsf_assert(block_size(remaining) >= block_size_min && "block split with invalid size");\r
+\r
+       block_set_size(block, size);\r
+       block_mark_as_free(remaining);\r
+\r
+       return remaining;\r
+}\r
+\r
+/* Absorb a free block's storage into an adjacent previous free block. */\r
+static block_header_t* block_absorb(block_header_t* prev, block_header_t* block)\r
+{\r
+       tlsf_assert(!block_is_last(prev) && "previous block can't be last");\r
+       /* Note: Leaves flags untouched. */\r
+       prev->size += block_size(block) + block_header_overhead;\r
+       block_link_next(prev);\r
+       return prev;\r
+}\r
+\r
+/* Merge a just-freed block with an adjacent previous free block. */\r
+static block_header_t* block_merge_prev(control_t* control, block_header_t* block)\r
+{\r
+       if (block_is_prev_free(block))\r
+       {\r
+               block_header_t* prev = block_prev(block);\r
+               tlsf_assert(prev && "prev physical block can't be null");\r
+               tlsf_assert(block_is_free(prev) && "prev block is not free though marked as such");\r
+               block_remove(control, prev);\r
+               block = block_absorb(prev, block);\r
+       }\r
+\r
+       return block;\r
+}\r
+\r
+/* Merge a just-freed block with an adjacent free block. */\r
+static block_header_t* block_merge_next(control_t* control, block_header_t* block)\r
+{\r
+       block_header_t* next = block_next(block);\r
+       tlsf_assert(next && "next physical block can't be null");\r
+\r
+       if (block_is_free(next))\r
+       {\r
+               tlsf_assert(!block_is_last(block) && "previous block can't be last");\r
+               block_remove(control, next);\r
+               block = block_absorb(block, next);\r
+       }\r
+\r
+       return block;\r
+}\r
+\r
+/* Trim any trailing block space off the end of a block, return to pool. */\r
+static void block_trim_free(control_t* control, block_header_t* block, size_t size)\r
+{\r
+       tlsf_assert(block_is_free(block) && "block must be free");\r
+       if (block_can_split(block, size))\r
+       {\r
+               block_header_t* remaining_block = block_split(block, size);\r
+               block_link_next(block);\r
+               block_set_prev_free(remaining_block);\r
+               block_insert(control, remaining_block);\r
+       }\r
+}\r
+\r
+/* Trim any trailing block space off the end of a used block, return to pool. */\r
+static void block_trim_used(control_t* control, block_header_t* block, size_t size)\r
+{\r
+       tlsf_assert(!block_is_free(block) && "block must be used");\r
+       if (block_can_split(block, size))\r
+       {\r
+               /* If the next block is free, we must coalesce. */\r
+               block_header_t* remaining_block = block_split(block, size);\r
+               block_set_prev_used(remaining_block);\r
+\r
+               remaining_block = block_merge_next(control, remaining_block);\r
+               block_insert(control, remaining_block);\r
+       }\r
+}\r
+\r
+static block_header_t* block_trim_free_leading(control_t* control, block_header_t* block, size_t size)\r
+{\r
+       block_header_t* remaining_block = block;\r
+       if (block_can_split(block, size))\r
+       {\r
+               /* We want the 2nd block. */\r
+               remaining_block = block_split(block, size - block_header_overhead);\r
+               block_set_prev_free(remaining_block);\r
+\r
+               block_link_next(block);\r
+               block_insert(control, block);\r
+       }\r
+\r
+       return remaining_block;\r
+}\r
+\r
+static block_header_t* block_locate_free(control_t* control, size_t size)\r
+{\r
+       int fl = 0, sl = 0;\r
+       block_header_t* block = 0;\r
+\r
+       if (size)\r
+       {\r
+               mapping_search(size, &fl, &sl);\r
+               \r
+               /*\r
+               ** mapping_search can futz with the size, so for excessively large sizes it can sometimes wind up \r
+               ** with indices that are off the end of the block array.\r
+               ** So, we protect against that here, since this is the only callsite of mapping_search.\r
+               ** Note that we don't need to check sl, since it comes from a modulo operation that guarantees it's always in range.\r
+               */\r
+               if (fl < FL_INDEX_COUNT)\r
+               {\r
+                       block = search_suitable_block(control, &fl, &sl);\r
+               }\r
+       }\r
+\r
+       if (block)\r
+       {\r
+               tlsf_assert(block_size(block) >= size);\r
+               remove_free_block(control, block, fl, sl);\r
+       }\r
+\r
+       return block;\r
+}\r
+\r
+static void* block_prepare_used(control_t* control, block_header_t* block, size_t size)\r
+{\r
+       void* p = 0;\r
+       if (block)\r
+       {\r
+               tlsf_assert(size && "size must be non-zero");\r
+               block_trim_free(control, block, size);\r
+               block_mark_as_used(block);\r
+               p = block_to_ptr(block);\r
+       }\r
+       return p;\r
+}\r
+\r
+/* Clear structure and point all empty lists at the null block. */\r
+static void control_construct(control_t* control)\r
+{\r
+       int i, j;\r
+\r
+       control->block_null.next_free = &control->block_null;\r
+       control->block_null.prev_free = &control->block_null;\r
+\r
+       control->fl_bitmap = 0;\r
+       for (i = 0; i < FL_INDEX_COUNT; ++i)\r
+       {\r
+               control->sl_bitmap[i] = 0;\r
+               for (j = 0; j < SL_INDEX_COUNT; ++j)\r
+               {\r
+                       control->blocks[i][j] = &control->block_null;\r
+               }\r
+       }\r
+}\r
+\r
+/*\r
+** Debugging utilities.\r
+*/\r
+\r
+typedef struct integrity_t\r
+{\r
+       int prev_status;\r
+       int status;\r
+} integrity_t;\r
+\r
+#define tlsf_insist(x) { tlsf_assert(x); if (!(x)) { status--; } }\r
+\r
+static void integrity_walker(void* ptr, size_t size, int used, void* user)\r
+{\r
+       block_header_t* block = block_from_ptr(ptr);\r
+       integrity_t* integ = tlsf_cast(integrity_t*, user);\r
+       const int this_prev_status = block_is_prev_free(block) ? 1 : 0;\r
+       const int this_status = block_is_free(block) ? 1 : 0;\r
+       const size_t this_block_size = block_size(block);\r
+\r
+       int status = 0;\r
+       (void)used;\r
+       tlsf_insist(integ->prev_status == this_prev_status && "prev status incorrect");\r
+       tlsf_insist(size == this_block_size && "block size incorrect");\r
+\r
+       integ->prev_status = this_status;\r
+       integ->status += status;\r
+}\r
+\r
+int tlsf_check(tlsf_t tlsf)\r
+{\r
+       int i, j;\r
+\r
+       control_t* control = tlsf_cast(control_t*, tlsf);\r
+       int status = 0;\r
+\r
+       /* Check that the free lists and bitmaps are accurate. */\r
+       for (i = 0; i < FL_INDEX_COUNT; ++i)\r
+       {\r
+               for (j = 0; j < SL_INDEX_COUNT; ++j)\r
+               {\r
+                       const int fl_map = control->fl_bitmap & (1U << i);\r
+                       const int sl_list = control->sl_bitmap[i];\r
+                       const int sl_map = sl_list & (1U << j);\r
+                       const block_header_t* block = control->blocks[i][j];\r
+\r
+                       /* Check that first- and second-level lists agree. */\r
+                       if (!fl_map)\r
+                       {\r
+                               tlsf_insist(!sl_map && "second-level map must be null");\r
+                       }\r
+\r
+                       if (!sl_map)\r
+                       {\r
+                               tlsf_insist(block == &control->block_null && "block list must be null");\r
+                               continue;\r
+                       }\r
+\r
+                       /* Check that there is at least one free block. */\r
+                       tlsf_insist(sl_list && "no free blocks in second-level map");\r
+                       tlsf_insist(block != &control->block_null && "block should not be null");\r
+\r
+                       while (block != &control->block_null)\r
+                       {\r
+                               int fli, sli;\r
+                               tlsf_insist(block_is_free(block) && "block should be free");\r
+                               tlsf_insist(!block_is_prev_free(block) && "blocks should have coalesced");\r
+                               tlsf_insist(!block_is_free(block_next(block)) && "blocks should have coalesced");\r
+                               tlsf_insist(block_is_prev_free(block_next(block)) && "block should be free");\r
+                               tlsf_insist(block_size(block) >= block_size_min && "block not minimum size");\r
+\r
+                               mapping_insert(block_size(block), &fli, &sli);\r
+                               tlsf_insist(fli == i && sli == j && "block size indexed in wrong list");\r
+                               block = block->next_free;\r
+                       }\r
+               }\r
+       }\r
+\r
+       return status;\r
+}\r
+\r
+#undef tlsf_insist\r
+\r
+static void default_walker(void* ptr, size_t size, int used, void* user)\r
+{\r
+       (void)user;\r
+       printf("\t%p %s size: %x (%p)\n", ptr, used ? "used" : "free", (unsigned int)size, block_from_ptr(ptr));\r
+}\r
+\r
+void tlsf_walk_pool(pool_t pool, tlsf_walker walker, void* user)\r
+{\r
+       tlsf_walker pool_walker = walker ? walker : default_walker;\r
+       block_header_t* block =\r
+               offset_to_block(pool, -(int)block_header_overhead);\r
+\r
+       while (block && !block_is_last(block))\r
+       {\r
+               pool_walker(\r
+                       block_to_ptr(block),\r
+                       block_size(block),\r
+                       !block_is_free(block),\r
+                       user);\r
+               block = block_next(block);\r
+       }\r
+}\r
+\r
+size_t tlsf_block_size(void* ptr)\r
+{\r
+       size_t size = 0;\r
+       if (ptr)\r
+       {\r
+               const block_header_t* block = block_from_ptr(ptr);\r
+               size = block_size(block);\r
+       }\r
+       return size;\r
+}\r
+\r
+int tlsf_check_pool(pool_t pool)\r
+{\r
+       /* Check that the blocks are physically correct. */\r
+       integrity_t integ = { 0, 0 };\r
+       tlsf_walk_pool(pool, integrity_walker, &integ);\r
+\r
+       return integ.status;\r
+}\r
+\r
+/*\r
+** Size of the TLSF structures in a given memory block passed to\r
+** tlsf_create, equal to the size of a control_t\r
+*/\r
+size_t tlsf_size(void)\r
+{\r
+       return sizeof(control_t);\r
+}\r
+\r
+size_t tlsf_align_size(void)\r
+{\r
+       return ALIGN_SIZE;\r
+}\r
+\r
+size_t tlsf_block_size_min(void)\r
+{\r
+       return block_size_min;\r
+}\r
+\r
+size_t tlsf_block_size_max(void)\r
+{\r
+       return block_size_max;\r
+}\r
+\r
+/*\r
+** Overhead of the TLSF structures in a given memory block passed to\r
+** tlsf_add_pool, equal to the overhead of a free block and the\r
+** sentinel block.\r
+*/\r
+size_t tlsf_pool_overhead(void)\r
+{\r
+       return 2 * block_header_overhead;\r
+}\r
+\r
+size_t tlsf_alloc_overhead(void)\r
+{\r
+       return block_header_overhead;\r
+}\r
+\r
+pool_t tlsf_add_pool(tlsf_t tlsf, void* mem, size_t bytes)\r
+{\r
+       block_header_t* block;\r
+       block_header_t* next;\r
+\r
+       const size_t pool_overhead = tlsf_pool_overhead();\r
+       const size_t pool_bytes = align_down(bytes - pool_overhead, ALIGN_SIZE);\r
+\r
+       if (((ptrdiff_t)mem % ALIGN_SIZE) != 0)\r
+       {\r
+               printf("tlsf_add_pool: Memory must be aligned by %u bytes.\n",\r
+                       (unsigned int)ALIGN_SIZE);\r
+               return 0;\r
+       }\r
+\r
+       if (pool_bytes < block_size_min || pool_bytes > block_size_max)\r
+       {\r
+#if defined (TLSF_64BIT)\r
+               printf("tlsf_add_pool: Memory size must be between 0x%x and 0x%x00 bytes.\n", \r
+                       (unsigned int)(pool_overhead + block_size_min),\r
+                       (unsigned int)((pool_overhead + block_size_max) / 256));\r
+#else\r
+               printf("tlsf_add_pool: Memory size must be between %u and %u bytes.\n", \r
+                       (unsigned int)(pool_overhead + block_size_min),\r
+                       (unsigned int)(pool_overhead + block_size_max));\r
+#endif\r
+               return 0;\r
+       }\r
+\r
+       /*\r
+       ** Create the main free block. Offset the start of the block slightly\r
+       ** so that the prev_phys_block field falls outside of the pool -\r
+       ** it will never be used.\r
+       */\r
+       block = offset_to_block(mem, -(tlsfptr_t)block_header_overhead);\r
+       block_set_size(block, pool_bytes);\r
+       block_set_free(block);\r
+       block_set_prev_used(block);\r
+       block_insert(tlsf_cast(control_t*, tlsf), block);\r
+\r
+       /* Split the block to create a zero-size sentinel block. */\r
+       next = block_link_next(block);\r
+       block_set_size(next, 0);\r
+       block_set_used(next);\r
+       block_set_prev_free(next);\r
+\r
+       return mem;\r
+}\r
+\r
+void tlsf_remove_pool(tlsf_t tlsf, pool_t pool)\r
+{\r
+       control_t* control = tlsf_cast(control_t*, tlsf);\r
+       block_header_t* block = offset_to_block(pool, -(int)block_header_overhead);\r
+\r
+       int fl = 0, sl = 0;\r
+\r
+       tlsf_assert(block_is_free(block) && "block should be free");\r
+       tlsf_assert(!block_is_free(block_next(block)) && "next block should not be free");\r
+       tlsf_assert(block_size(block_next(block)) == 0 && "next block size should be zero");\r
+\r
+       mapping_insert(block_size(block), &fl, &sl);\r
+       remove_free_block(control, block, fl, sl);\r
+}\r
+\r
+/*\r
+** TLSF main interface.\r
+*/\r
+\r
+#if _DEBUG\r
+int test_ffs_fls()\r
+{\r
+       /* Verify ffs/fls work properly. */\r
+       int rv = 0;\r
+       rv += (tlsf_ffs(0) == -1) ? 0 : 0x1;\r
+       rv += (tlsf_fls(0) == -1) ? 0 : 0x2;\r
+       rv += (tlsf_ffs(1) == 0) ? 0 : 0x4;\r
+       rv += (tlsf_fls(1) == 0) ? 0 : 0x8;\r
+       rv += (tlsf_ffs(0x80000000) == 31) ? 0 : 0x10;\r
+       rv += (tlsf_ffs(0x80008000) == 15) ? 0 : 0x20;\r
+       rv += (tlsf_fls(0x80000008) == 31) ? 0 : 0x40;\r
+       rv += (tlsf_fls(0x7FFFFFFF) == 30) ? 0 : 0x80;\r
+\r
+#if defined (TLSF_64BIT)\r
+       rv += (tlsf_fls_sizet(0x80000000) == 31) ? 0 : 0x100;\r
+       rv += (tlsf_fls_sizet(0x100000000) == 32) ? 0 : 0x200;\r
+       rv += (tlsf_fls_sizet(0xffffffffffffffff) == 63) ? 0 : 0x400;\r
+#endif\r
+\r
+       if (rv)\r
+       {\r
+               printf("test_ffs_fls: %x ffs/fls tests failed.\n", rv);\r
+       }\r
+       return rv;\r
+}\r
+#endif\r
+\r
+tlsf_t tlsf_create(void* mem)\r
+{\r
+#if _DEBUG\r
+       if (test_ffs_fls())\r
+       {\r
+               return 0;\r
+       }\r
+#endif\r
+\r
+       if (((tlsfptr_t)mem % ALIGN_SIZE) != 0)\r
+       {\r
+               printf("tlsf_create: Memory must be aligned to %u bytes.\n",\r
+                       (unsigned int)ALIGN_SIZE);\r
+               return 0;\r
+       }\r
+\r
+       control_construct(tlsf_cast(control_t*, mem));\r
+\r
+       return tlsf_cast(tlsf_t, mem);\r
+}\r
+\r
+tlsf_t tlsf_create_with_pool(void* mem, size_t bytes)\r
+{\r
+       tlsf_t tlsf = tlsf_create(mem);\r
+       tlsf_add_pool(tlsf, (char*)mem + tlsf_size(), bytes - tlsf_size());\r
+       return tlsf;\r
+}\r
+\r
+void tlsf_destroy(tlsf_t tlsf)\r
+{\r
+       /* Nothing to do. */\r
+       (void)tlsf;\r
+}\r
+\r
+pool_t tlsf_get_pool(tlsf_t tlsf)\r
+{\r
+       return tlsf_cast(pool_t, (char*)tlsf + tlsf_size());\r
+}\r
+\r
+void* tlsf_malloc(tlsf_t tlsf, size_t size)\r
+{\r
+       control_t* control = tlsf_cast(control_t*, tlsf);\r
+       const size_t adjust = adjust_request_size(size, ALIGN_SIZE);\r
+       block_header_t* block = block_locate_free(control, adjust);\r
+       return block_prepare_used(control, block, adjust);\r
+}\r
+\r
+void* tlsf_memalign(tlsf_t tlsf, size_t align, size_t size)\r
+{\r
+       control_t* control = tlsf_cast(control_t*, tlsf);\r
+       const size_t adjust = adjust_request_size(size, ALIGN_SIZE);\r
+\r
+       /*\r
+       ** We must allocate an additional minimum block size bytes so that if\r
+       ** our free block will leave an alignment gap which is smaller, we can\r
+       ** trim a leading free block and release it back to the pool. We must\r
+       ** do this because the previous physical block is in use, therefore\r
+       ** the prev_phys_block field is not valid, and we can't simply adjust\r
+       ** the size of that block.\r
+       */\r
+       const size_t gap_minimum = sizeof(block_header_t);\r
+       const size_t size_with_gap = adjust_request_size(adjust + align + gap_minimum, align);\r
+\r
+       /*\r
+       ** If alignment is less than or equals base alignment, we're done.\r
+       ** If we requested 0 bytes, return null, as tlsf_malloc(0) does.\r
+       */\r
+       const size_t aligned_size = (adjust && align > ALIGN_SIZE) ? size_with_gap : adjust;\r
+\r
+       block_header_t* block = block_locate_free(control, aligned_size);\r
+\r
+       /* This can't be a static assert. */\r
+       tlsf_assert(sizeof(block_header_t) == block_size_min + block_header_overhead);\r
+\r
+       if (block)\r
+       {\r
+               void* ptr = block_to_ptr(block);\r
+               void* aligned = align_ptr(ptr, align);\r
+               size_t gap = tlsf_cast(size_t,\r
+                       tlsf_cast(tlsfptr_t, aligned) - tlsf_cast(tlsfptr_t, ptr));\r
+\r
+               /* If gap size is too small, offset to next aligned boundary. */\r
+               if (gap && gap < gap_minimum)\r
+               {\r
+                       const size_t gap_remain = gap_minimum - gap;\r
+                       const size_t offset = tlsf_max(gap_remain, align);\r
+                       const void* next_aligned = tlsf_cast(void*,\r
+                               tlsf_cast(tlsfptr_t, aligned) + offset);\r
+\r
+                       aligned = align_ptr(next_aligned, align);\r
+                       gap = tlsf_cast(size_t,\r
+                               tlsf_cast(tlsfptr_t, aligned) - tlsf_cast(tlsfptr_t, ptr));\r
+               }\r
+\r
+               if (gap)\r
+               {\r
+                       tlsf_assert(gap >= gap_minimum && "gap size too small");\r
+                       block = block_trim_free_leading(control, block, gap);\r
+               }\r
+       }\r
+\r
+       return block_prepare_used(control, block, adjust);\r
+}\r
+\r
+void tlsf_free(tlsf_t tlsf, void* ptr)\r
+{\r
+       /* Don't attempt to free a NULL pointer. */\r
+       if (ptr)\r
+       {\r
+               control_t* control = tlsf_cast(control_t*, tlsf);\r
+               block_header_t* block = block_from_ptr(ptr);\r
+               tlsf_assert(!block_is_free(block) && "block already marked as free");\r
+               block_mark_as_free(block);\r
+               block = block_merge_prev(control, block);\r
+               block = block_merge_next(control, block);\r
+               block_insert(control, block);\r
+       }\r
+}\r
+\r
+/*\r
+** The TLSF block information provides us with enough information to\r
+** provide a reasonably intelligent implementation of realloc, growing or\r
+** shrinking the currently allocated block as required.\r
+**\r
+** This routine handles the somewhat esoteric edge cases of realloc:\r
+** - a non-zero size with a null pointer will behave like malloc\r
+** - a zero size with a non-null pointer will behave like free\r
+** - a request that cannot be satisfied will leave the original buffer\r
+**   untouched\r
+** - an extended buffer size will leave the newly-allocated area with\r
+**   contents undefined\r
+*/\r
+void* tlsf_realloc(tlsf_t tlsf, void* ptr, size_t size)\r
+{\r
+       control_t* control = tlsf_cast(control_t*, tlsf);\r
+       void* p = 0;\r
+\r
+       /* Zero-size requests are treated as free. */\r
+       if (ptr && size == 0)\r
+       {\r
+               tlsf_free(tlsf, ptr);\r
+       }\r
+       /* Requests with NULL pointers are treated as malloc. */\r
+       else if (!ptr)\r
+       {\r
+               p = tlsf_malloc(tlsf, size);\r
+       }\r
+       else\r
+       {\r
+               block_header_t* block = block_from_ptr(ptr);\r
+               block_header_t* next = block_next(block);\r
+\r
+               const size_t cursize = block_size(block);\r
+               const size_t combined = cursize + block_size(next) + block_header_overhead;\r
+               const size_t adjust = adjust_request_size(size, ALIGN_SIZE);\r
+\r
+               tlsf_assert(!block_is_free(block) && "block already marked as free");\r
+\r
+               /*\r
+               ** If the next block is used, or when combined with the current\r
+               ** block, does not offer enough space, we must reallocate and copy.\r
+               */\r
+               if (adjust > cursize && (!block_is_free(next) || adjust > combined))\r
+               {\r
+                       p = tlsf_malloc(tlsf, size);\r
+                       if (p)\r
+                       {\r
+                               const size_t minsize = tlsf_min(cursize, size);\r
+                               memcpy(p, ptr, minsize);\r
+                               tlsf_free(tlsf, ptr);\r
+                       }\r
+               }\r
+               else\r
+               {\r
+                       /* Do we need to expand to the next block? */\r
+                       if (adjust > cursize)\r
+                       {\r
+                               block_merge_next(control, block);\r
+                               block_mark_as_used(block);\r
+                       }\r
+\r
+                       /* Trim the resulting block and return the original pointer. */\r
+                       block_trim_used(control, block, adjust);\r
+                       p = ptr;\r
+               }\r
+       }\r
+\r
+       return p;\r
+}\r
diff --git a/deps/lightrec/tlsf/tlsf.h b/deps/lightrec/tlsf/tlsf.h
new file mode 100644 (file)
index 0000000..e9b5a91
--- /dev/null
@@ -0,0 +1,90 @@
+#ifndef INCLUDED_tlsf\r
+#define INCLUDED_tlsf\r
+\r
+/*\r
+** Two Level Segregated Fit memory allocator, version 3.1.\r
+** Written by Matthew Conte\r
+**     http://tlsf.baisoku.org\r
+**\r
+** Based on the original documentation by Miguel Masmano:\r
+**     http://www.gii.upv.es/tlsf/main/docs\r
+**\r
+** This implementation was written to the specification\r
+** of the document, therefore no GPL restrictions apply.\r
+** \r
+** Copyright (c) 2006-2016, Matthew Conte\r
+** All rights reserved.\r
+** \r
+** Redistribution and use in source and binary forms, with or without\r
+** modification, are permitted provided that the following conditions are met:\r
+**     * Redistributions of source code must retain the above copyright\r
+**       notice, this list of conditions and the following disclaimer.\r
+**     * Redistributions in binary form must reproduce the above copyright\r
+**       notice, this list of conditions and the following disclaimer in the\r
+**       documentation and/or other materials provided with the distribution.\r
+**     * Neither the name of the copyright holder nor the\r
+**       names of its contributors may be used to endorse or promote products\r
+**       derived from this software without specific prior written permission.\r
+** \r
+** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND\r
+** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\r
+** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\r
+** DISCLAIMED. IN NO EVENT SHALL MATTHEW CONTE BE LIABLE FOR ANY\r
+** DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\r
+** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\r
+** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND\r
+** ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\r
+** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
+** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+*/\r
+\r
+#include <stddef.h>\r
+\r
+#if defined(__cplusplus)\r
+extern "C" {\r
+#endif\r
+\r
+/* tlsf_t: a TLSF structure. Can contain 1 to N pools. */\r
+/* pool_t: a block of memory that TLSF can manage. */\r
+typedef void* tlsf_t;\r
+typedef void* pool_t;\r
+\r
+/* Create/destroy a memory pool. */\r
+tlsf_t tlsf_create(void* mem);\r
+tlsf_t tlsf_create_with_pool(void* mem, size_t bytes);\r
+void tlsf_destroy(tlsf_t tlsf);\r
+pool_t tlsf_get_pool(tlsf_t tlsf);\r
+\r
+/* Add/remove memory pools. */\r
+pool_t tlsf_add_pool(tlsf_t tlsf, void* mem, size_t bytes);\r
+void tlsf_remove_pool(tlsf_t tlsf, pool_t pool);\r
+\r
+/* malloc/memalign/realloc/free replacements. */\r
+void* tlsf_malloc(tlsf_t tlsf, size_t bytes);\r
+void* tlsf_memalign(tlsf_t tlsf, size_t align, size_t bytes);\r
+void* tlsf_realloc(tlsf_t tlsf, void* ptr, size_t size);\r
+void tlsf_free(tlsf_t tlsf, void* ptr);\r
+\r
+/* Returns internal block size, not original request size */\r
+size_t tlsf_block_size(void* ptr);\r
+\r
+/* Overheads/limits of internal structures. */\r
+size_t tlsf_size(void);\r
+size_t tlsf_align_size(void);\r
+size_t tlsf_block_size_min(void);\r
+size_t tlsf_block_size_max(void);\r
+size_t tlsf_pool_overhead(void);\r
+size_t tlsf_alloc_overhead(void);\r
+\r
+/* Debugging. */\r
+typedef void (*tlsf_walker)(void* ptr, size_t size, int used, void* user);\r
+void tlsf_walk_pool(pool_t pool, tlsf_walker walker, void* user);\r
+/* Returns nonzero if any internal consistency check fails. */\r
+int tlsf_check(tlsf_t tlsf);\r
+int tlsf_check_pool(pool_t pool);\r
+\r
+#if defined(__cplusplus)\r
+};\r
+#endif\r
+\r
+#endif\r