X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=deps%2Flightrec%2Frecompiler.c;h=a6d2f322a71c03f89204399047abdecca9b8c0c4;hb=a5a6f7b82ed88f1ac3178c32c9bda22eb612814b;hp=634d3d015bb7ff64fe241b10b93285a6646dddd4;hpb=3918505613cb814f8f5e0e8e0471f7b2a2cd8464;p=pcsx_rearmed.git diff --git a/deps/lightrec/recompiler.c b/deps/lightrec/recompiler.c index 634d3d01..a6d2f322 100644 --- a/deps/lightrec/recompiler.c +++ b/deps/lightrec/recompiler.c @@ -1,21 +1,14 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later /* - * Copyright (C) 2019-2020 Paul Cercueil - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. + * Copyright (C) 2019-2021 Paul Cercueil */ +#include "blockcache.h" #include "debug.h" #include "interpreter.h" #include "lightrec-private.h" #include "memmanager.h" +#include "reaper.h" #include "slist.h" #include @@ -23,56 +16,167 @@ #include #include #include +#ifdef __linux__ +#include +#endif struct block_rec { struct block *block; struct slist_elm slist; + unsigned int requests; + bool compiling; +}; + +struct recompiler_thd { + struct lightrec_cstate *cstate; + unsigned int tid; + pthread_t thd; }; struct recompiler { struct lightrec_state *state; - pthread_t thd; pthread_cond_t cond; + pthread_cond_t cond2; pthread_mutex_t mutex; - bool stop; - struct block *current_block; + bool stop, must_flush; struct slist_elm slist; + + pthread_mutex_t alloc_mutex; + + unsigned int nb_recs; + struct recompiler_thd thds[]; }; -static void lightrec_compile_list(struct recompiler *rec) +static unsigned int get_processors_count(void) +{ + unsigned int nb = 1; + +#if defined(PTW32_VERSION) + nb = pthread_num_processors_np(); +#elif defined(__APPLE__) || defined(__FreeBSD__) + int count; + size_t size = sizeof(count); + + nb = sysctlbyname("hw.ncpu", &count, &size, NULL, 0) ? 1 : count; +#elif defined(_SC_NPROCESSORS_ONLN) + nb = sysconf(_SC_NPROCESSORS_ONLN); +#endif + + return nb < 1 ? 1 : nb; +} + +static struct block_rec * lightrec_get_best_elm(struct slist_elm *head) +{ + struct block_rec *block_rec, *best = NULL; + struct slist_elm *elm; + + for (elm = slist_first(head); elm; elm = elm->next) { + block_rec = container_of(elm, struct block_rec, slist); + + if (!block_rec->compiling + && (!best || block_rec->requests > best->requests)) + best = block_rec; + } + + return best; +} + +static bool lightrec_cancel_block_rec(struct recompiler *rec, + struct block_rec *block_rec) +{ + if (block_rec->compiling) { + /* Block is being recompiled - wait for + * completion */ + pthread_cond_wait(&rec->cond2, &rec->mutex); + + /* We can't guarantee the signal was for us. + * Since block_rec may have been removed while + * we were waiting on the condition, we cannot + * check block_rec->compiling again. The best + * thing is just to restart the function. */ + return false; + } + + /* Block is not yet being processed - remove it from the list */ + slist_remove(&rec->slist, &block_rec->slist); + lightrec_free(rec->state, MEM_FOR_LIGHTREC, + sizeof(*block_rec), block_rec); + + return true; +} + +static void lightrec_cancel_list(struct recompiler *rec) +{ + struct block_rec *block_rec; + struct slist_elm *elm, *head = &rec->slist; + + for (elm = slist_first(head); elm; elm = slist_first(head)) { + block_rec = container_of(elm, struct block_rec, slist); + lightrec_cancel_block_rec(rec, block_rec); + } +} + +static void lightrec_flush_code_buffer(struct lightrec_state *state, void *d) +{ + struct recompiler *rec = d; + + lightrec_remove_outdated_blocks(state->block_cache, NULL); + rec->must_flush = false; +} + +static void lightrec_compile_list(struct recompiler *rec, + struct recompiler_thd *thd) { struct block_rec *block_rec; - struct slist_elm *next; struct block *block; int ret; - while (!!(next = slist_first(&rec->slist))) { - block_rec = container_of(next, struct block_rec, slist); + while (!!(block_rec = lightrec_get_best_elm(&rec->slist))) { + block_rec->compiling = true; block = block_rec->block; - rec->current_block = block; pthread_mutex_unlock(&rec->mutex); - ret = lightrec_compile_block(block); - if (ret) { - pr_err("Unable to compile block at PC 0x%x: %d\n", - block->pc, ret); + if (likely(!block_has_flag(block, BLOCK_IS_DEAD))) { + ret = lightrec_compile_block(thd->cstate, block); + if (ret == -ENOMEM) { + /* Code buffer is full. Request the reaper to + * flush it. */ + + pthread_mutex_lock(&rec->mutex); + block_rec->compiling = false; + pthread_cond_broadcast(&rec->cond2); + + if (!rec->must_flush) { + rec->must_flush = true; + lightrec_cancel_list(rec); + + lightrec_reaper_add(rec->state->reaper, + lightrec_flush_code_buffer, + rec); + } + return; + } + + if (ret) { + pr_err("Unable to compile block at "PC_FMT": %d\n", + block->pc, ret); + } } pthread_mutex_lock(&rec->mutex); - slist_remove(&rec->slist, next); + slist_remove(&rec->slist, &block_rec->slist); lightrec_free(rec->state, MEM_FOR_LIGHTREC, sizeof(*block_rec), block_rec); - pthread_cond_signal(&rec->cond); + pthread_cond_broadcast(&rec->cond2); } - - rec->current_block = NULL; } static void * lightrec_recompiler_thd(void *d) { - struct recompiler *rec = d; + struct recompiler_thd *thd = d; + struct recompiler *rec = container_of(thd, struct recompiler, thds[thd->tid]); pthread_mutex_lock(&rec->mutex); @@ -85,7 +189,7 @@ static void * lightrec_recompiler_thd(void *d) } while (slist_empty(&rec->slist)); - lightrec_compile_list(rec); + lightrec_compile_list(rec, thd); } out_unlock: @@ -96,96 +200,162 @@ out_unlock: struct recompiler *lightrec_recompiler_init(struct lightrec_state *state) { struct recompiler *rec; + unsigned int i, nb_recs, nb_cpus; int ret; - rec = lightrec_malloc(state, MEM_FOR_LIGHTREC, sizeof(*rec)); + nb_cpus = get_processors_count(); + nb_recs = nb_cpus < 2 ? 1 : nb_cpus - 1; + + rec = lightrec_malloc(state, MEM_FOR_LIGHTREC, sizeof(*rec) + + nb_recs * sizeof(*rec->thds)); if (!rec) { pr_err("Cannot create recompiler: Out of memory\n"); return NULL; } + for (i = 0; i < nb_recs; i++) { + rec->thds[i].tid = i; + rec->thds[i].cstate = NULL; + } + + for (i = 0; i < nb_recs; i++) { + rec->thds[i].cstate = lightrec_create_cstate(state); + if (!rec->thds[i].cstate) { + pr_err("Cannot create recompiler: Out of memory\n"); + goto err_free_cstates; + } + } + rec->state = state; rec->stop = false; - rec->current_block = NULL; + rec->must_flush = false; + rec->nb_recs = nb_recs; slist_init(&rec->slist); ret = pthread_cond_init(&rec->cond, NULL); if (ret) { pr_err("Cannot init cond variable: %d\n", ret); - goto err_free_rec; + goto err_free_cstates; } - ret = pthread_mutex_init(&rec->mutex, NULL); + ret = pthread_cond_init(&rec->cond2, NULL); if (ret) { - pr_err("Cannot init mutex variable: %d\n", ret); + pr_err("Cannot init cond variable: %d\n", ret); goto err_cnd_destroy; } - ret = pthread_create(&rec->thd, NULL, lightrec_recompiler_thd, rec); + ret = pthread_mutex_init(&rec->alloc_mutex, NULL); if (ret) { - pr_err("Cannot create recompiler thread: %d\n", ret); - goto err_mtx_destroy; + pr_err("Cannot init alloc mutex variable: %d\n", ret); + goto err_cnd2_destroy; } + ret = pthread_mutex_init(&rec->mutex, NULL); + if (ret) { + pr_err("Cannot init mutex variable: %d\n", ret); + goto err_alloc_mtx_destroy; + } + + for (i = 0; i < nb_recs; i++) { + ret = pthread_create(&rec->thds[i].thd, NULL, + lightrec_recompiler_thd, &rec->thds[i]); + if (ret) { + pr_err("Cannot create recompiler thread: %d\n", ret); + /* TODO: Handle cleanup properly */ + goto err_mtx_destroy; + } + } + + pr_info("Threaded recompiler started with %u workers.\n", nb_recs); + return rec; err_mtx_destroy: pthread_mutex_destroy(&rec->mutex); +err_alloc_mtx_destroy: + pthread_mutex_destroy(&rec->alloc_mutex); +err_cnd2_destroy: + pthread_cond_destroy(&rec->cond2); err_cnd_destroy: pthread_cond_destroy(&rec->cond); -err_free_rec: +err_free_cstates: + for (i = 0; i < nb_recs; i++) { + if (rec->thds[i].cstate) + lightrec_free_cstate(rec->thds[i].cstate); + } lightrec_free(state, MEM_FOR_LIGHTREC, sizeof(*rec), rec); return NULL; } void lightrec_free_recompiler(struct recompiler *rec) { + unsigned int i; + rec->stop = true; /* Stop the thread */ pthread_mutex_lock(&rec->mutex); - pthread_cond_signal(&rec->cond); + pthread_cond_broadcast(&rec->cond); + lightrec_cancel_list(rec); pthread_mutex_unlock(&rec->mutex); - pthread_join(rec->thd, NULL); + + for (i = 0; i < rec->nb_recs; i++) + pthread_join(rec->thds[i].thd, NULL); + + for (i = 0; i < rec->nb_recs; i++) + lightrec_free_cstate(rec->thds[i].cstate); pthread_mutex_destroy(&rec->mutex); + pthread_mutex_destroy(&rec->alloc_mutex); pthread_cond_destroy(&rec->cond); + pthread_cond_destroy(&rec->cond2); lightrec_free(rec->state, MEM_FOR_LIGHTREC, sizeof(*rec), rec); } int lightrec_recompiler_add(struct recompiler *rec, struct block *block) { - struct slist_elm *elm, *prev; + struct slist_elm *elm; struct block_rec *block_rec; + u32 pc1, pc2; int ret = 0; pthread_mutex_lock(&rec->mutex); + /* If the recompiler must flush the code cache, we can't add the new + * job. It will be re-added next time the block's address is jumped to + * again. */ + if (rec->must_flush) + goto out_unlock; + /* If the block is marked as dead, don't compile it, it will be removed * as soon as it's safe. */ - if (block->flags & BLOCK_IS_DEAD) + if (block_has_flag(block, BLOCK_IS_DEAD)) goto out_unlock; - for (elm = slist_first(&rec->slist), prev = NULL; elm; - prev = elm, elm = elm->next) { + for (elm = slist_first(&rec->slist); elm; elm = elm->next) { block_rec = container_of(elm, struct block_rec, slist); if (block_rec->block == block) { - /* The block to compile is already in the queue - bump - * it to the top of the list, unless the block is being - * recompiled. */ - if (prev && !(block->flags & BLOCK_SHOULD_RECOMPILE)) { - slist_remove_next(prev); - slist_append(&rec->slist, elm); - } + /* The block to compile is already in the queue - + * increment its counter to increase its priority */ + block_rec->requests++; + goto out_unlock; + } + pc1 = kunseg(block_rec->block->pc); + pc2 = kunseg(block->pc); + if (pc2 >= pc1 && pc2 < pc1 + block_rec->block->nb_ops * 4) { + /* The block we want to compile is already covered by + * another one in the queue - increment its counter to + * increase its priority */ + block_rec->requests++; goto out_unlock; } } /* By the time this function was called, the block has been recompiled * and ins't in the wait list anymore. Just return here. */ - if (block->function && !(block->flags & BLOCK_SHOULD_RECOMPILE)) + if (block->function && !block_has_flag(block, BLOCK_SHOULD_RECOMPILE)) goto out_unlock; block_rec = lightrec_malloc(rec->state, MEM_FOR_LIGHTREC, @@ -195,17 +365,15 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block) goto out_unlock; } - pr_debug("Adding block PC 0x%x to recompiler\n", block->pc); + pr_debug("Adding block "PC_FMT" to recompiler\n", block->pc); block_rec->block = block; + block_rec->compiling = false; + block_rec->requests = 1; elm = &rec->slist; - /* If the block is being recompiled, push it to the end of the queue; - * otherwise push it to the front of the queue. */ - if (block->flags & BLOCK_SHOULD_RECOMPILE) - for (; elm->next; elm = elm->next); - + /* Push the new entry to the front of the queue */ slist_append(elm, &block_rec->slist); /* Signal the thread */ @@ -213,6 +381,7 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block) out_unlock: pthread_mutex_unlock(&rec->mutex); + return ret; } @@ -223,49 +392,59 @@ void lightrec_recompiler_remove(struct recompiler *rec, struct block *block) pthread_mutex_lock(&rec->mutex); - for (elm = slist_first(&rec->slist); elm; elm = elm->next) { - block_rec = container_of(elm, struct block_rec, slist); + while (true) { + for (elm = slist_first(&rec->slist); elm; elm = elm->next) { + block_rec = container_of(elm, struct block_rec, slist); - if (block_rec->block == block) { - if (block == rec->current_block) { - /* Block is being recompiled - wait for - * completion */ - do { - pthread_cond_wait(&rec->cond, - &rec->mutex); - } while (block == rec->current_block); - } else { - /* Block is not yet being processed - remove it - * from the list */ - slist_remove(&rec->slist, elm); - lightrec_free(rec->state, MEM_FOR_LIGHTREC, - sizeof(*block_rec), block_rec); + if (block_rec->block == block) { + if (lightrec_cancel_block_rec(rec, block_rec)) + goto out_unlock; + + break; } + } + if (!elm) break; - } } +out_unlock: pthread_mutex_unlock(&rec->mutex); } -void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc) +void * lightrec_recompiler_run_first_pass(struct lightrec_state *state, + struct block *block, u32 *pc) { - bool freed; + u8 old_flags; + + /* There's no point in running the first pass if the block will never + * be compiled. Let the main loop run the interpreter instead. */ + if (block_has_flag(block, BLOCK_NEVER_COMPILE)) + return NULL; + + /* The block is marked as dead, and will be removed the next time the + * reaper is run. In the meantime, the old function can still be + * executed. */ + if (block_has_flag(block, BLOCK_IS_DEAD)) + return block->function; + + /* If the block is already fully tagged, there is no point in running + * the first pass. Request a recompilation of the block, and maybe the + * interpreter will run the block in the meantime. */ + if (block_has_flag(block, BLOCK_FULLY_TAGGED)) + lightrec_recompiler_add(state->rec, block); if (likely(block->function)) { - if (block->flags & BLOCK_FULLY_TAGGED) { - freed = atomic_flag_test_and_set(&block->op_list_freed); + if (block_has_flag(block, BLOCK_FULLY_TAGGED)) { + old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST); - if (!freed) { - pr_debug("Block PC 0x%08x is fully tagged" + if (!(old_flags & BLOCK_NO_OPCODE_LIST)) { + pr_debug("Block "PC_FMT" is fully tagged" " - free opcode list\n", block->pc); /* The block was already compiled but the opcode list * didn't get freed yet - do it now */ - lightrec_free_opcode_list(block->state, - block->opcode_list); - block->opcode_list = NULL; + lightrec_free_opcode_list(state, block->opcode_list); } } @@ -274,24 +453,36 @@ void * lightrec_recompiler_run_first_pass(struct block *block, u32 *pc) /* Mark the opcode list as freed, so that the threaded compiler won't * free it while we're using it in the interpreter. */ - freed = atomic_flag_test_and_set(&block->op_list_freed); + old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST); /* Block wasn't compiled yet - run the interpreter */ - *pc = lightrec_emulate_block(block, *pc); + *pc = lightrec_emulate_block(state, block, *pc); - if (!freed) - atomic_flag_clear(&block->op_list_freed); + if (!(old_flags & BLOCK_NO_OPCODE_LIST)) + block_clear_flags(block, BLOCK_NO_OPCODE_LIST); /* The block got compiled while the interpreter was running. * We can free the opcode list now. */ - if (block->function && (block->flags & BLOCK_FULLY_TAGGED) && - !atomic_flag_test_and_set(&block->op_list_freed)) { - pr_debug("Block PC 0x%08x is fully tagged" - " - free opcode list\n", block->pc); + if (block->function && block_has_flag(block, BLOCK_FULLY_TAGGED)) { + old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST); + + if (!(old_flags & BLOCK_NO_OPCODE_LIST)) { + pr_debug("Block "PC_FMT" is fully tagged" + " - free opcode list\n", block->pc); - lightrec_free_opcode_list(block->state, block->opcode_list); - block->opcode_list = NULL; + lightrec_free_opcode_list(state, block->opcode_list); + } } return NULL; } + +void lightrec_code_alloc_lock(struct lightrec_state *state) +{ + pthread_mutex_lock(&state->rec->alloc_mutex); +} + +void lightrec_code_alloc_unlock(struct lightrec_state *state) +{ + pthread_mutex_unlock(&state->rec->alloc_mutex); +}