Merge pull request #836 from pcercuei/update-lightrec-20240611
[pcsx_rearmed.git] / deps / lightrec / recompiler.c
1 // SPDX-License-Identifier: LGPL-2.1-or-later
2 /*
3  * Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
4  */
5
6 #include "blockcache.h"
7 #include "debug.h"
8 #include "interpreter.h"
9 #include "lightrec-private.h"
10 #include "memmanager.h"
11 #include "reaper.h"
12 #include "slist.h"
13
14 #include <errno.h>
15 #include <stdatomic.h>
16 #include <stdbool.h>
17 #include <stdlib.h>
18 #include <pthread.h>
19 #ifdef __linux__
20 #include <unistd.h>
21 #endif
22
23 struct block_rec {
24         struct block *block;
25         struct slist_elm slist;
26         unsigned int requests;
27         bool compiling;
28 };
29
30 struct recompiler_thd {
31         struct lightrec_cstate *cstate;
32         unsigned int tid;
33         pthread_t thd;
34 };
35
36 struct recompiler {
37         struct lightrec_state *state;
38         pthread_cond_t cond;
39         pthread_cond_t cond2;
40         pthread_mutex_t mutex;
41         bool stop, pause, must_flush;
42         struct slist_elm slist;
43
44         pthread_mutex_t alloc_mutex;
45
46         unsigned int nb_recs;
47         struct recompiler_thd thds[];
48 };
49
50 static unsigned int get_processors_count(void)
51 {
52         int nb = 1;
53
54 #if defined(PTW32_VERSION)
55         nb = pthread_num_processors_np();
56 #elif defined(__APPLE__) || defined(__FreeBSD__)
57         int count;
58         size_t size = sizeof(count);
59
60         nb = sysctlbyname("hw.ncpu", &count, &size, NULL, 0) ? 1 : count;
61 #elif defined(_SC_NPROCESSORS_ONLN)
62         nb = (int)sysconf(_SC_NPROCESSORS_ONLN);
63 #endif
64
65         return nb < 1 ? 1 : nb;
66 }
67
68 static struct block_rec * lightrec_get_best_elm(struct slist_elm *head)
69 {
70         struct block_rec *block_rec, *best = NULL;
71         struct slist_elm *elm;
72
73         for (elm = slist_first(head); elm; elm = elm->next) {
74                 block_rec = container_of(elm, struct block_rec, slist);
75
76                 if (!block_rec->compiling
77                     && (!best || block_rec->requests > best->requests))
78                         best = block_rec;
79         }
80
81         return best;
82 }
83
84 static bool lightrec_cancel_block_rec(struct recompiler *rec,
85                                       struct block_rec *block_rec)
86 {
87         if (block_rec->compiling) {
88                 /* Block is being recompiled - wait for
89                  * completion */
90                 pthread_cond_wait(&rec->cond2, &rec->mutex);
91
92                 /* We can't guarantee the signal was for us.
93                  * Since block_rec may have been removed while
94                  * we were waiting on the condition, we cannot
95                  * check block_rec->compiling again. The best
96                  * thing is just to restart the function. */
97                 return false;
98         }
99
100         /* Block is not yet being processed - remove it from the list */
101         slist_remove(&rec->slist, &block_rec->slist);
102         lightrec_free(rec->state, MEM_FOR_LIGHTREC,
103                       sizeof(*block_rec), block_rec);
104
105         return true;
106 }
107
108 static void lightrec_cancel_list(struct recompiler *rec)
109 {
110         struct block_rec *block_rec;
111         struct slist_elm *elm, *head = &rec->slist;
112
113         for (elm = slist_first(head); elm; elm = slist_first(head)) {
114                 block_rec = container_of(elm, struct block_rec, slist);
115                 lightrec_cancel_block_rec(rec, block_rec);
116         }
117 }
118
119 static void lightrec_flush_code_buffer(struct lightrec_state *state, void *d)
120 {
121         struct recompiler *rec = d;
122
123         lightrec_remove_outdated_blocks(state->block_cache, NULL);
124         rec->must_flush = false;
125 }
126
127 static void lightrec_compile_list(struct recompiler *rec,
128                                   struct recompiler_thd *thd)
129 {
130         struct block_rec *block_rec;
131         struct block *block;
132         int ret;
133
134         while (!rec->pause &&
135                !!(block_rec = lightrec_get_best_elm(&rec->slist))) {
136                 block_rec->compiling = true;
137                 block = block_rec->block;
138
139                 pthread_mutex_unlock(&rec->mutex);
140
141                 if (likely(!block_has_flag(block, BLOCK_IS_DEAD))) {
142                         ret = lightrec_compile_block(thd->cstate, block);
143                         if (ret == -ENOMEM) {
144                                 /* Code buffer is full. Request the reaper to
145                                  * flush it. */
146
147                                 pthread_mutex_lock(&rec->mutex);
148                                 block_rec->compiling = false;
149                                 pthread_cond_broadcast(&rec->cond2);
150
151                                 if (!rec->must_flush) {
152                                         rec->must_flush = true;
153                                         lightrec_cancel_list(rec);
154
155                                         lightrec_reaper_add(rec->state->reaper,
156                                                             lightrec_flush_code_buffer,
157                                                             rec);
158                                 }
159                                 return;
160                         }
161
162                         if (ret) {
163                                 pr_err("Unable to compile block at "PC_FMT": %d\n",
164                                        block->pc, ret);
165                         }
166                 }
167
168                 pthread_mutex_lock(&rec->mutex);
169
170                 slist_remove(&rec->slist, &block_rec->slist);
171                 lightrec_free(rec->state, MEM_FOR_LIGHTREC,
172                               sizeof(*block_rec), block_rec);
173                 pthread_cond_broadcast(&rec->cond2);
174         }
175 }
176
177 static void * lightrec_recompiler_thd(void *d)
178 {
179         struct recompiler_thd *thd = d;
180         struct recompiler *rec = container_of(thd, struct recompiler, thds[thd->tid]);
181
182         pthread_mutex_lock(&rec->mutex);
183
184         while (!rec->stop) {
185                 do {
186                         pthread_cond_wait(&rec->cond, &rec->mutex);
187
188                         if (rec->stop)
189                                 goto out_unlock;
190
191                 } while (rec->pause || slist_empty(&rec->slist));
192
193                 lightrec_compile_list(rec, thd);
194         }
195
196 out_unlock:
197         pthread_mutex_unlock(&rec->mutex);
198         return NULL;
199 }
200
201 struct recompiler *lightrec_recompiler_init(struct lightrec_state *state)
202 {
203         struct recompiler *rec;
204         unsigned int i, nb_recs, nb_cpus;
205         int ret;
206
207         nb_cpus = get_processors_count();
208         nb_recs = nb_cpus < 2 ? 1 : nb_cpus - 1;
209
210         rec = lightrec_malloc(state, MEM_FOR_LIGHTREC, sizeof(*rec)
211                               + nb_recs * sizeof(*rec->thds));
212         if (!rec) {
213                 pr_err("Cannot create recompiler: Out of memory\n");
214                 return NULL;
215         }
216
217         for (i = 0; i < nb_recs; i++) {
218                 rec->thds[i].tid = i;
219                 rec->thds[i].cstate = NULL;
220         }
221
222         for (i = 0; i < nb_recs; i++) {
223                 rec->thds[i].cstate = lightrec_create_cstate(state);
224                 if (!rec->thds[i].cstate) {
225                         pr_err("Cannot create recompiler: Out of memory\n");
226                         goto err_free_cstates;
227                 }
228         }
229
230         rec->state = state;
231         rec->stop = false;
232         rec->pause = false;
233         rec->must_flush = false;
234         rec->nb_recs = nb_recs;
235         slist_init(&rec->slist);
236
237         ret = pthread_cond_init(&rec->cond, NULL);
238         if (ret) {
239                 pr_err("Cannot init cond variable: %d\n", ret);
240                 goto err_free_cstates;
241         }
242
243         ret = pthread_cond_init(&rec->cond2, NULL);
244         if (ret) {
245                 pr_err("Cannot init cond variable: %d\n", ret);
246                 goto err_cnd_destroy;
247         }
248
249         ret = pthread_mutex_init(&rec->alloc_mutex, NULL);
250         if (ret) {
251                 pr_err("Cannot init alloc mutex variable: %d\n", ret);
252                 goto err_cnd2_destroy;
253         }
254
255         ret = pthread_mutex_init(&rec->mutex, NULL);
256         if (ret) {
257                 pr_err("Cannot init mutex variable: %d\n", ret);
258                 goto err_alloc_mtx_destroy;
259         }
260
261         for (i = 0; i < nb_recs; i++) {
262                 ret = pthread_create(&rec->thds[i].thd, NULL,
263                                      lightrec_recompiler_thd, &rec->thds[i]);
264                 if (ret) {
265                         pr_err("Cannot create recompiler thread: %d\n", ret);
266                         /* TODO: Handle cleanup properly */
267                         goto err_mtx_destroy;
268                 }
269         }
270
271         pr_info("Threaded recompiler started with %u workers.\n", nb_recs);
272
273         return rec;
274
275 err_mtx_destroy:
276         pthread_mutex_destroy(&rec->mutex);
277 err_alloc_mtx_destroy:
278         pthread_mutex_destroy(&rec->alloc_mutex);
279 err_cnd2_destroy:
280         pthread_cond_destroy(&rec->cond2);
281 err_cnd_destroy:
282         pthread_cond_destroy(&rec->cond);
283 err_free_cstates:
284         for (i = 0; i < nb_recs; i++) {
285                 if (rec->thds[i].cstate)
286                         lightrec_free_cstate(rec->thds[i].cstate);
287         }
288         lightrec_free(state, MEM_FOR_LIGHTREC, sizeof(*rec), rec);
289         return NULL;
290 }
291
292 void lightrec_free_recompiler(struct recompiler *rec)
293 {
294         unsigned int i;
295
296         rec->stop = true;
297
298         /* Stop the thread */
299         pthread_mutex_lock(&rec->mutex);
300         pthread_cond_broadcast(&rec->cond);
301         lightrec_cancel_list(rec);
302         pthread_mutex_unlock(&rec->mutex);
303
304         for (i = 0; i < rec->nb_recs; i++)
305                 pthread_join(rec->thds[i].thd, NULL);
306
307         for (i = 0; i < rec->nb_recs; i++)
308                 lightrec_free_cstate(rec->thds[i].cstate);
309
310         pthread_mutex_destroy(&rec->mutex);
311         pthread_mutex_destroy(&rec->alloc_mutex);
312         pthread_cond_destroy(&rec->cond);
313         pthread_cond_destroy(&rec->cond2);
314         lightrec_free(rec->state, MEM_FOR_LIGHTREC, sizeof(*rec), rec);
315 }
316
317 int lightrec_recompiler_add(struct recompiler *rec, struct block *block)
318 {
319         struct slist_elm *elm;
320         struct block_rec *block_rec;
321         u32 pc1, pc2;
322         int ret = 0;
323
324         pthread_mutex_lock(&rec->mutex);
325
326         /* If the recompiler must flush the code cache, we can't add the new
327          * job. It will be re-added next time the block's address is jumped to
328          * again. */
329         if (rec->must_flush)
330                 goto out_unlock;
331
332         /* If the block is marked as dead, don't compile it, it will be removed
333          * as soon as it's safe. */
334         if (block_has_flag(block, BLOCK_IS_DEAD))
335                 goto out_unlock;
336
337         for (elm = slist_first(&rec->slist); elm; elm = elm->next) {
338                 block_rec = container_of(elm, struct block_rec, slist);
339
340                 if (block_rec->block == block) {
341                         /* The block to compile is already in the queue -
342                          * increment its counter to increase its priority */
343                         block_rec->requests++;
344                         goto out_unlock;
345                 }
346
347                 pc1 = kunseg(block_rec->block->pc);
348                 pc2 = kunseg(block->pc);
349                 if (pc2 >= pc1 && pc2 < pc1 + block_rec->block->nb_ops * 4) {
350                         /* The block we want to compile is already covered by
351                          * another one in the queue - increment its counter to
352                          * increase its priority */
353                         block_rec->requests++;
354                         goto out_unlock;
355                 }
356         }
357
358         /* By the time this function was called, the block has been recompiled
359          * and ins't in the wait list anymore. Just return here. */
360         if (block->function && !block_has_flag(block, BLOCK_SHOULD_RECOMPILE))
361                 goto out_unlock;
362
363         block_rec = lightrec_malloc(rec->state, MEM_FOR_LIGHTREC,
364                                     sizeof(*block_rec));
365         if (!block_rec) {
366                 ret = -ENOMEM;
367                 goto out_unlock;
368         }
369
370         pr_debug("Adding block "PC_FMT" to recompiler\n", block->pc);
371
372         block_rec->block = block;
373         block_rec->compiling = false;
374         block_rec->requests = 1;
375
376         elm = &rec->slist;
377
378         /* Push the new entry to the front of the queue */
379         slist_append(elm, &block_rec->slist);
380
381         /* Signal the thread */
382         pthread_cond_signal(&rec->cond);
383
384 out_unlock:
385         pthread_mutex_unlock(&rec->mutex);
386
387         return ret;
388 }
389
390 void lightrec_recompiler_remove(struct recompiler *rec, struct block *block)
391 {
392         struct block_rec *block_rec;
393         struct slist_elm *elm;
394
395         pthread_mutex_lock(&rec->mutex);
396
397         while (true) {
398                 for (elm = slist_first(&rec->slist); elm; elm = elm->next) {
399                         block_rec = container_of(elm, struct block_rec, slist);
400
401                         if (block_rec->block == block) {
402                                 if (lightrec_cancel_block_rec(rec, block_rec))
403                                         goto out_unlock;
404
405                                 break;
406                         }
407                 }
408
409                 if (!elm)
410                         break;
411         }
412
413 out_unlock:
414         pthread_mutex_unlock(&rec->mutex);
415 }
416
417 void * lightrec_recompiler_run_first_pass(struct lightrec_state *state,
418                                           struct block *block, u32 *pc)
419 {
420         u8 old_flags;
421
422         /* There's no point in running the first pass if the block will never
423          * be compiled. Let the main loop run the interpreter instead. */
424         if (block_has_flag(block, BLOCK_NEVER_COMPILE))
425                 return NULL;
426
427         /* The block is marked as dead, and will be removed the next time the
428          * reaper is run. In the meantime, the old function can still be
429          * executed. */
430         if (block_has_flag(block, BLOCK_IS_DEAD))
431                 return block->function;
432
433         /* If the block is already fully tagged, there is no point in running
434          * the first pass. Request a recompilation of the block, and maybe the
435          * interpreter will run the block in the meantime. */
436         if (block_has_flag(block, BLOCK_FULLY_TAGGED))
437                 lightrec_recompiler_add(state->rec, block);
438
439         if (likely(block->function)) {
440                 if (block_has_flag(block, BLOCK_FULLY_TAGGED)) {
441                         old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST);
442
443                         if (!(old_flags & BLOCK_NO_OPCODE_LIST)) {
444                                 pr_debug("Block "PC_FMT" is fully tagged"
445                                          " - free opcode list\n", block->pc);
446
447                                 /* The block was already compiled but the opcode list
448                                  * didn't get freed yet - do it now */
449                                 lightrec_free_opcode_list(state, block->opcode_list);
450                         }
451                 }
452
453                 return block->function;
454         }
455
456         /* Mark the opcode list as freed, so that the threaded compiler won't
457          * free it while we're using it in the interpreter. */
458         old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST);
459
460         /* Block wasn't compiled yet - run the interpreter */
461         *pc = lightrec_emulate_block(state, block, *pc);
462
463         if (!(old_flags & BLOCK_NO_OPCODE_LIST))
464                 block_clear_flags(block, BLOCK_NO_OPCODE_LIST);
465
466         /* The block got compiled while the interpreter was running.
467          * We can free the opcode list now. */
468         if (block->function && block_has_flag(block, BLOCK_FULLY_TAGGED)) {
469                 old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST);
470
471                 if (!(old_flags & BLOCK_NO_OPCODE_LIST)) {
472                         pr_debug("Block "PC_FMT" is fully tagged"
473                                  " - free opcode list\n", block->pc);
474
475                         lightrec_free_opcode_list(state, block->opcode_list);
476                 }
477         }
478
479         return NULL;
480 }
481
482 void lightrec_code_alloc_lock(struct lightrec_state *state)
483 {
484         pthread_mutex_lock(&state->rec->alloc_mutex);
485 }
486
487 void lightrec_code_alloc_unlock(struct lightrec_state *state)
488 {
489         pthread_mutex_unlock(&state->rec->alloc_mutex);
490 }
491
492 void lightrec_recompiler_pause(struct recompiler *rec)
493 {
494         rec->pause = true;
495
496         pthread_mutex_lock(&rec->mutex);
497         pthread_cond_broadcast(&rec->cond);
498         lightrec_cancel_list(rec);
499         pthread_mutex_unlock(&rec->mutex);
500 }
501
502 void lightrec_recompiler_unpause(struct recompiler *rec)
503 {
504         rec->pause = false;
505 }