From: Paul Cercueil Date: Thu, 14 Dec 2023 18:10:11 +0000 (+0100) Subject: git subrepo pull --force deps/lightrec X-Git-Tag: r24l~18 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a5a6f7b82ed88f1ac3178c32c9bda22eb612814b;p=pcsx_rearmed.git git subrepo pull --force deps/lightrec subrepo: subdir: "deps/lightrec" merged: "d88760e40c" upstream: origin: "https://github.com/pcercuei/lightrec.git" branch: "master" commit: "d88760e40c" git-subrepo: version: "0.4.6" origin: "https://github.com/ingydotnet/git-subrepo.git" commit: "110b9eb" --- diff --git a/deps/lightrec/.gitrepo b/deps/lightrec/.gitrepo index 868112fb..1f5764a3 100644 --- a/deps/lightrec/.gitrepo +++ b/deps/lightrec/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/pcercuei/lightrec.git branch = master - commit = b8ce1f3dab45d7c665aa406a0ff183ae6565205d - parent = 305945333b4ec7d6910a077278c850b1cd887057 + commit = d88760e40c1d2a5698c7b6f6a53cce31fda799f0 + parent = 963f41620dce6ddb2527b7e3dced09564031f783 method = merge cmdver = 0.4.6 diff --git a/deps/lightrec/CMakeLists.txt b/deps/lightrec/CMakeLists.txt index dfe35a02..9ca058ec 100644 --- a/deps/lightrec/CMakeLists.txt +++ b/deps/lightrec/CMakeLists.txt @@ -54,7 +54,7 @@ list(APPEND LIGHTREC_HEADERS option(ENABLE_FIRST_PASS "Run the interpreter as first-pass optimization" ON) -option(ENABLE_THREADED_COMPILER "Enable threaded compiler" OFF) +option(ENABLE_THREADED_COMPILER "Enable threaded compiler" ON) if (ENABLE_THREADED_COMPILER) list(APPEND LIGHTREC_SOURCES recompiler.c reaper.c) diff --git a/deps/lightrec/emitter.c b/deps/lightrec/emitter.c index b8025aac..3875e58e 100644 --- a/deps/lightrec/emitter.c +++ b/deps/lightrec/emitter.c @@ -107,7 +107,7 @@ static void lightrec_emit_end_of_block(struct lightrec_cstate *state, } if (has_ds && op_flag_load_delay(ds->flags) - && opcode_is_load(ds->c) && !state->no_load_delay) { + && opcode_has_load_delay(ds->c) && !state->no_load_delay) { /* If the delay slot is a load opcode, its target register * will be written after the first opcode of the target is * executed. Handle this by jumping to a special section of diff --git a/deps/lightrec/interpreter.c b/deps/lightrec/interpreter.c index 74cbca50..4267bec2 100644 --- a/deps/lightrec/interpreter.c +++ b/deps/lightrec/interpreter.c @@ -189,7 +189,7 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch) * interpreter in that case. * Same goes for when we have a branch in a delay slot of another * branch. */ - load_in_ds = opcode_is_load(op->c) || opcode_is_mfc(op->c); + load_in_ds = opcode_has_load_delay(op->c); branch_in_ds = has_delay_slot(op->c); if (branch) { diff --git a/deps/lightrec/lightrec.c b/deps/lightrec/lightrec.c index 1cf755b6..1cfc4274 100644 --- a/deps/lightrec/lightrec.c +++ b/deps/lightrec/lightrec.c @@ -35,6 +35,8 @@ static bool lightrec_block_is_fully_tagged(const struct block *block); static void lightrec_mtc2(struct lightrec_state *state, u8 reg, u32 data); static u32 lightrec_mfc2(struct lightrec_state *state, u8 reg); +static void lightrec_reap_block(struct lightrec_state *state, void *data); + static void lightrec_default_sb(struct lightrec_state *state, u32 opcode, void *host, u32 addr, u32 data) { @@ -703,9 +705,15 @@ static struct block * lightrec_get_block(struct lightrec_state *state, u32 pc) if (ENABLE_THREADED_COMPILER) lightrec_recompiler_remove(state->rec, block); - lightrec_unregister_block(state->block_cache, block); remove_from_code_lut(state->block_cache, block); - lightrec_free_block(state, block); + + if (ENABLE_THREADED_COMPILER) { + lightrec_reaper_add(state->reaper, + lightrec_reap_block, block); + } else { + lightrec_unregister_block(state->block_cache, block); + lightrec_free_block(state, block); + } } block = NULL; @@ -1559,6 +1567,7 @@ static void lightrec_reap_opcode_list(struct lightrec_state *state, void *data) int lightrec_compile_block(struct lightrec_cstate *cstate, struct block *block) { + struct block *dead_blocks[ARRAY_SIZE(cstate->targets)]; u32 was_dead[ARRAY_SIZE(cstate->targets) / 8]; struct lightrec_state *state = cstate->state; struct lightrec_branch_target *target; @@ -1702,6 +1711,8 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, was_dead[i / 32] &= ~BIT(i % 32); } + dead_blocks[i] = block2; + /* If block2 was pending for compilation, cancel it. * If it's being compiled right now, wait until it finishes. */ if (block2) @@ -1720,8 +1731,12 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, offset = lut_offset(block->pc) + target->offset; lut_write(state, offset, jit_address(target->label)); - offset = block->pc + target->offset * sizeof(u32); - block2 = lightrec_find_block(state->block_cache, offset); + if (ENABLE_THREADED_COMPILER) { + block2 = dead_blocks[i]; + } else { + offset = block->pc + target->offset * sizeof(u32); + block2 = lightrec_find_block(state->block_cache, offset); + } if (block2) { pr_debug("Reap block 0x%08x as it's covered by block " "0x%08x\n", block2->pc, block->pc); diff --git a/deps/lightrec/optimizer.c b/deps/lightrec/optimizer.c index 792f35c8..199ca40b 100644 --- a/deps/lightrec/optimizer.c +++ b/deps/lightrec/optimizer.c @@ -345,7 +345,7 @@ static bool reg_is_read_or_written(const struct opcode *list, return reg_is_read(list, a, b, reg) || reg_is_written(list, a, b, reg); } -bool opcode_is_mfc(union code op) +static bool opcode_is_mfc(union code op) { switch (op.i.op) { case OP_CP0: @@ -377,7 +377,7 @@ bool opcode_is_mfc(union code op) return false; } -bool opcode_is_load(union code op) +static bool opcode_is_load(union code op) { switch (op.i.op) { case OP_LB: @@ -411,6 +411,12 @@ static bool opcode_is_store(union code op) } } +bool opcode_has_load_delay(union code op) +{ + return (opcode_is_load(op) && op.i.rt && op.i.op != OP_LWC2) + || opcode_is_mfc(op); +} + static u8 opcode_get_io_size(union code op) { switch (op.i.op) { @@ -1385,7 +1391,7 @@ static int lightrec_handle_load_delays(struct lightrec_state *state, for (i = 0; i < block->nb_ops; i++) { op = &list[i]; - if (!opcode_is_load(op->c) || !op->c.i.rt || op->c.i.op == OP_LWC2) + if (!opcode_has_load_delay(op->c)) continue; if (!is_delay_slot(list, i)) { diff --git a/deps/lightrec/optimizer.h b/deps/lightrec/optimizer.h index f2b1f30f..26666520 100644 --- a/deps/lightrec/optimizer.h +++ b/deps/lightrec/optimizer.h @@ -16,8 +16,7 @@ __cnst _Bool opcode_writes_register(union code op, u8 reg); __cnst u64 opcode_write_mask(union code op); __cnst _Bool has_delay_slot(union code op); _Bool is_delay_slot(const struct opcode *list, unsigned int offset); -__cnst _Bool opcode_is_mfc(union code op); -__cnst _Bool opcode_is_load(union code op); +__cnst _Bool opcode_has_load_delay(union code op); __cnst _Bool opcode_is_io(union code op); __cnst _Bool is_unconditional_jump(union code c); __cnst _Bool is_syscall(union code c); diff --git a/deps/lightrec/recompiler.c b/deps/lightrec/recompiler.c index c764f119..a6d2f322 100644 --- a/deps/lightrec/recompiler.c +++ b/deps/lightrec/recompiler.c @@ -23,6 +23,7 @@ struct block_rec { struct block *block; struct slist_elm slist; + unsigned int requests; bool compiling; }; @@ -64,19 +65,20 @@ static unsigned int get_processors_count(void) return nb < 1 ? 1 : nb; } -static struct slist_elm * lightrec_get_first_elm(struct slist_elm *head) +static struct block_rec * lightrec_get_best_elm(struct slist_elm *head) { - struct block_rec *block_rec; + struct block_rec *block_rec, *best = NULL; struct slist_elm *elm; for (elm = slist_first(head); elm; elm = elm->next) { block_rec = container_of(elm, struct block_rec, slist); - if (!block_rec->compiling) - return elm; + if (!block_rec->compiling + && (!best || block_rec->requests > best->requests)) + best = block_rec; } - return NULL; + return best; } static bool lightrec_cancel_block_rec(struct recompiler *rec, @@ -126,12 +128,10 @@ static void lightrec_compile_list(struct recompiler *rec, struct recompiler_thd *thd) { struct block_rec *block_rec; - struct slist_elm *next; struct block *block; int ret; - while (!!(next = lightrec_get_first_elm(&rec->slist))) { - block_rec = container_of(next, struct block_rec, slist); + while (!!(block_rec = lightrec_get_best_elm(&rec->slist))) { block_rec->compiling = true; block = block_rec->block; @@ -166,7 +166,7 @@ static void lightrec_compile_list(struct recompiler *rec, pthread_mutex_lock(&rec->mutex); - slist_remove(&rec->slist, next); + slist_remove(&rec->slist, &block_rec->slist); lightrec_free(rec->state, MEM_FOR_LIGHTREC, sizeof(*block_rec), block_rec); pthread_cond_broadcast(&rec->cond2); @@ -314,8 +314,9 @@ void lightrec_free_recompiler(struct recompiler *rec) int lightrec_recompiler_add(struct recompiler *rec, struct block *block) { - struct slist_elm *elm, *prev; + struct slist_elm *elm; struct block_rec *block_rec; + u32 pc1, pc2; int ret = 0; pthread_mutex_lock(&rec->mutex); @@ -331,20 +332,23 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block) if (block_has_flag(block, BLOCK_IS_DEAD)) goto out_unlock; - for (elm = slist_first(&rec->slist), prev = NULL; elm; - prev = elm, elm = elm->next) { + for (elm = slist_first(&rec->slist); elm; elm = elm->next) { block_rec = container_of(elm, struct block_rec, slist); if (block_rec->block == block) { - /* The block to compile is already in the queue - bump - * it to the top of the list, unless the block is being - * recompiled. */ - if (prev && !block_rec->compiling && - !block_has_flag(block, BLOCK_SHOULD_RECOMPILE)) { - slist_remove_next(prev); - slist_append(&rec->slist, elm); - } + /* The block to compile is already in the queue - + * increment its counter to increase its priority */ + block_rec->requests++; + goto out_unlock; + } + pc1 = kunseg(block_rec->block->pc); + pc2 = kunseg(block->pc); + if (pc2 >= pc1 && pc2 < pc1 + block_rec->block->nb_ops * 4) { + /* The block we want to compile is already covered by + * another one in the queue - increment its counter to + * increase its priority */ + block_rec->requests++; goto out_unlock; } } @@ -365,14 +369,11 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block) block_rec->block = block; block_rec->compiling = false; + block_rec->requests = 1; elm = &rec->slist; - /* If the block is being recompiled, push it to the end of the queue; - * otherwise push it to the front of the queue. */ - if (block_has_flag(block, BLOCK_SHOULD_RECOMPILE)) - for (; elm->next; elm = elm->next); - + /* Push the new entry to the front of the queue */ slist_append(elm, &block_rec->slist); /* Signal the thread */