gpu_neon: some hack to enhanced mode
[pcsx_rearmed.git] / deps / lightrec / recompiler.c
1 // SPDX-License-Identifier: LGPL-2.1-or-later
2 /*
3  * Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
4  */
5
6 #include "blockcache.h"
7 #include "debug.h"
8 #include "interpreter.h"
9 #include "lightrec-private.h"
10 #include "memmanager.h"
11 #include "reaper.h"
12 #include "slist.h"
13
14 #include <errno.h>
15 #include <stdatomic.h>
16 #include <stdbool.h>
17 #include <stdlib.h>
18 #include <pthread.h>
19 #ifdef __linux__
20 #include <unistd.h>
21 #endif
22
23 struct block_rec {
24         struct block *block;
25         struct slist_elm slist;
26         bool compiling;
27 };
28
29 struct recompiler_thd {
30         struct lightrec_cstate *cstate;
31         unsigned int tid;
32         pthread_t thd;
33 };
34
35 struct recompiler {
36         struct lightrec_state *state;
37         pthread_cond_t cond;
38         pthread_cond_t cond2;
39         pthread_mutex_t mutex;
40         bool stop, must_flush;
41         struct slist_elm slist;
42
43         pthread_mutex_t alloc_mutex;
44
45         unsigned int nb_recs;
46         struct recompiler_thd thds[];
47 };
48
49 static unsigned int get_processors_count(void)
50 {
51         unsigned int nb = 1;
52
53 #if defined(PTW32_VERSION)
54         nb = pthread_num_processors_np();
55 #elif defined(__APPLE__) || defined(__FreeBSD__)
56         int count;
57         size_t size = sizeof(count);
58
59         nb = sysctlbyname("hw.ncpu", &count, &size, NULL, 0) ? 1 : count;
60 #elif defined(_SC_NPROCESSORS_ONLN)
61         nb = sysconf(_SC_NPROCESSORS_ONLN);
62 #endif
63
64         return nb < 1 ? 1 : nb;
65 }
66
67 static struct slist_elm * lightrec_get_first_elm(struct slist_elm *head)
68 {
69         struct block_rec *block_rec;
70         struct slist_elm *elm;
71
72         for (elm = slist_first(head); elm; elm = elm->next) {
73                 block_rec = container_of(elm, struct block_rec, slist);
74
75                 if (!block_rec->compiling)
76                         return elm;
77         }
78
79         return NULL;
80 }
81
82 static bool lightrec_cancel_block_rec(struct recompiler *rec,
83                                       struct block_rec *block_rec)
84 {
85         if (block_rec->compiling) {
86                 /* Block is being recompiled - wait for
87                  * completion */
88                 pthread_cond_wait(&rec->cond2, &rec->mutex);
89
90                 /* We can't guarantee the signal was for us.
91                  * Since block_rec may have been removed while
92                  * we were waiting on the condition, we cannot
93                  * check block_rec->compiling again. The best
94                  * thing is just to restart the function. */
95                 return false;
96         }
97
98         /* Block is not yet being processed - remove it from the list */
99         slist_remove(&rec->slist, &block_rec->slist);
100         lightrec_free(rec->state, MEM_FOR_LIGHTREC,
101                       sizeof(*block_rec), block_rec);
102
103         return true;
104 }
105
106 static void lightrec_cancel_list(struct recompiler *rec)
107 {
108         struct block_rec *block_rec;
109         struct slist_elm *elm, *head = &rec->slist;
110
111         for (elm = slist_first(head); elm; elm = slist_first(head)) {
112                 block_rec = container_of(elm, struct block_rec, slist);
113                 lightrec_cancel_block_rec(rec, block_rec);
114         }
115 }
116
117 static void lightrec_flush_code_buffer(struct lightrec_state *state, void *d)
118 {
119         struct recompiler *rec = d;
120
121         lightrec_remove_outdated_blocks(state->block_cache, NULL);
122         rec->must_flush = false;
123 }
124
125 static void lightrec_compile_list(struct recompiler *rec,
126                                   struct recompiler_thd *thd)
127 {
128         struct block_rec *block_rec;
129         struct slist_elm *next;
130         struct block *block;
131         int ret;
132
133         while (!!(next = lightrec_get_first_elm(&rec->slist))) {
134                 block_rec = container_of(next, struct block_rec, slist);
135                 block_rec->compiling = true;
136                 block = block_rec->block;
137
138                 pthread_mutex_unlock(&rec->mutex);
139
140                 if (likely(!block_has_flag(block, BLOCK_IS_DEAD))) {
141                         ret = lightrec_compile_block(thd->cstate, block);
142                         if (ret == -ENOMEM) {
143                                 /* Code buffer is full. Request the reaper to
144                                  * flush it. */
145
146                                 pthread_mutex_lock(&rec->mutex);
147                                 block_rec->compiling = false;
148                                 pthread_cond_broadcast(&rec->cond2);
149
150                                 if (!rec->must_flush) {
151                                         rec->must_flush = true;
152                                         lightrec_cancel_list(rec);
153
154                                         lightrec_reaper_add(rec->state->reaper,
155                                                             lightrec_flush_code_buffer,
156                                                             rec);
157                                 }
158                                 return;
159                         }
160
161                         if (ret) {
162                                 pr_err("Unable to compile block at PC 0x%x: %d\n",
163                                        block->pc, ret);
164                         }
165                 }
166
167                 pthread_mutex_lock(&rec->mutex);
168
169                 slist_remove(&rec->slist, next);
170                 lightrec_free(rec->state, MEM_FOR_LIGHTREC,
171                               sizeof(*block_rec), block_rec);
172                 pthread_cond_broadcast(&rec->cond2);
173         }
174 }
175
176 static void * lightrec_recompiler_thd(void *d)
177 {
178         struct recompiler_thd *thd = d;
179         struct recompiler *rec = container_of(thd, struct recompiler, thds[thd->tid]);
180
181         pthread_mutex_lock(&rec->mutex);
182
183         while (!rec->stop) {
184                 do {
185                         pthread_cond_wait(&rec->cond, &rec->mutex);
186
187                         if (rec->stop)
188                                 goto out_unlock;
189
190                 } while (slist_empty(&rec->slist));
191
192                 lightrec_compile_list(rec, thd);
193         }
194
195 out_unlock:
196         pthread_mutex_unlock(&rec->mutex);
197         return NULL;
198 }
199
200 struct recompiler *lightrec_recompiler_init(struct lightrec_state *state)
201 {
202         struct recompiler *rec;
203         unsigned int i, nb_recs, nb_cpus;
204         int ret;
205
206         nb_cpus = get_processors_count();
207         nb_recs = nb_cpus < 2 ? 1 : nb_cpus - 1;
208
209         rec = lightrec_malloc(state, MEM_FOR_LIGHTREC, sizeof(*rec)
210                               + nb_recs * sizeof(*rec->thds));
211         if (!rec) {
212                 pr_err("Cannot create recompiler: Out of memory\n");
213                 return NULL;
214         }
215
216         for (i = 0; i < nb_recs; i++) {
217                 rec->thds[i].tid = i;
218                 rec->thds[i].cstate = NULL;
219         }
220
221         for (i = 0; i < nb_recs; i++) {
222                 rec->thds[i].cstate = lightrec_create_cstate(state);
223                 if (!rec->thds[i].cstate) {
224                         pr_err("Cannot create recompiler: Out of memory\n");
225                         goto err_free_cstates;
226                 }
227         }
228
229         rec->state = state;
230         rec->stop = false;
231         rec->must_flush = false;
232         rec->nb_recs = nb_recs;
233         slist_init(&rec->slist);
234
235         ret = pthread_cond_init(&rec->cond, NULL);
236         if (ret) {
237                 pr_err("Cannot init cond variable: %d\n", ret);
238                 goto err_free_cstates;
239         }
240
241         ret = pthread_cond_init(&rec->cond2, NULL);
242         if (ret) {
243                 pr_err("Cannot init cond variable: %d\n", ret);
244                 goto err_cnd_destroy;
245         }
246
247         ret = pthread_mutex_init(&rec->alloc_mutex, NULL);
248         if (ret) {
249                 pr_err("Cannot init alloc mutex variable: %d\n", ret);
250                 goto err_cnd2_destroy;
251         }
252
253         ret = pthread_mutex_init(&rec->mutex, NULL);
254         if (ret) {
255                 pr_err("Cannot init mutex variable: %d\n", ret);
256                 goto err_alloc_mtx_destroy;
257         }
258
259         for (i = 0; i < nb_recs; i++) {
260                 ret = pthread_create(&rec->thds[i].thd, NULL,
261                                      lightrec_recompiler_thd, &rec->thds[i]);
262                 if (ret) {
263                         pr_err("Cannot create recompiler thread: %d\n", ret);
264                         /* TODO: Handle cleanup properly */
265                         goto err_mtx_destroy;
266                 }
267         }
268
269         pr_info("Threaded recompiler started with %u workers.\n", nb_recs);
270
271         return rec;
272
273 err_mtx_destroy:
274         pthread_mutex_destroy(&rec->mutex);
275 err_alloc_mtx_destroy:
276         pthread_mutex_destroy(&rec->alloc_mutex);
277 err_cnd2_destroy:
278         pthread_cond_destroy(&rec->cond2);
279 err_cnd_destroy:
280         pthread_cond_destroy(&rec->cond);
281 err_free_cstates:
282         for (i = 0; i < nb_recs; i++) {
283                 if (rec->thds[i].cstate)
284                         lightrec_free_cstate(rec->thds[i].cstate);
285         }
286         lightrec_free(state, MEM_FOR_LIGHTREC, sizeof(*rec), rec);
287         return NULL;
288 }
289
290 void lightrec_free_recompiler(struct recompiler *rec)
291 {
292         unsigned int i;
293
294         rec->stop = true;
295
296         /* Stop the thread */
297         pthread_mutex_lock(&rec->mutex);
298         pthread_cond_broadcast(&rec->cond);
299         lightrec_cancel_list(rec);
300         pthread_mutex_unlock(&rec->mutex);
301
302         for (i = 0; i < rec->nb_recs; i++)
303                 pthread_join(rec->thds[i].thd, NULL);
304
305         for (i = 0; i < rec->nb_recs; i++)
306                 lightrec_free_cstate(rec->thds[i].cstate);
307
308         pthread_mutex_destroy(&rec->mutex);
309         pthread_mutex_destroy(&rec->alloc_mutex);
310         pthread_cond_destroy(&rec->cond);
311         pthread_cond_destroy(&rec->cond2);
312         lightrec_free(rec->state, MEM_FOR_LIGHTREC, sizeof(*rec), rec);
313 }
314
315 int lightrec_recompiler_add(struct recompiler *rec, struct block *block)
316 {
317         struct slist_elm *elm, *prev;
318         struct block_rec *block_rec;
319         int ret = 0;
320
321         pthread_mutex_lock(&rec->mutex);
322
323         /* If the recompiler must flush the code cache, we can't add the new
324          * job. It will be re-added next time the block's address is jumped to
325          * again. */
326         if (rec->must_flush)
327                 goto out_unlock;
328
329         /* If the block is marked as dead, don't compile it, it will be removed
330          * as soon as it's safe. */
331         if (block_has_flag(block, BLOCK_IS_DEAD))
332                 goto out_unlock;
333
334         for (elm = slist_first(&rec->slist), prev = NULL; elm;
335              prev = elm, elm = elm->next) {
336                 block_rec = container_of(elm, struct block_rec, slist);
337
338                 if (block_rec->block == block) {
339                         /* The block to compile is already in the queue - bump
340                          * it to the top of the list, unless the block is being
341                          * recompiled. */
342                         if (prev && !block_rec->compiling &&
343                             !block_has_flag(block, BLOCK_SHOULD_RECOMPILE)) {
344                                 slist_remove_next(prev);
345                                 slist_append(&rec->slist, elm);
346                         }
347
348                         goto out_unlock;
349                 }
350         }
351
352         /* By the time this function was called, the block has been recompiled
353          * and ins't in the wait list anymore. Just return here. */
354         if (block->function && !block_has_flag(block, BLOCK_SHOULD_RECOMPILE))
355                 goto out_unlock;
356
357         block_rec = lightrec_malloc(rec->state, MEM_FOR_LIGHTREC,
358                                     sizeof(*block_rec));
359         if (!block_rec) {
360                 ret = -ENOMEM;
361                 goto out_unlock;
362         }
363
364         pr_debug("Adding block PC 0x%x to recompiler\n", block->pc);
365
366         block_rec->block = block;
367         block_rec->compiling = false;
368
369         elm = &rec->slist;
370
371         /* If the block is being recompiled, push it to the end of the queue;
372          * otherwise push it to the front of the queue. */
373         if (block_has_flag(block, BLOCK_SHOULD_RECOMPILE))
374                 for (; elm->next; elm = elm->next);
375
376         slist_append(elm, &block_rec->slist);
377
378         /* Signal the thread */
379         pthread_cond_signal(&rec->cond);
380
381 out_unlock:
382         pthread_mutex_unlock(&rec->mutex);
383
384         return ret;
385 }
386
387 void lightrec_recompiler_remove(struct recompiler *rec, struct block *block)
388 {
389         struct block_rec *block_rec;
390         struct slist_elm *elm;
391
392         pthread_mutex_lock(&rec->mutex);
393
394         while (true) {
395                 for (elm = slist_first(&rec->slist); elm; elm = elm->next) {
396                         block_rec = container_of(elm, struct block_rec, slist);
397
398                         if (block_rec->block == block) {
399                                 if (lightrec_cancel_block_rec(rec, block_rec))
400                                         goto out_unlock;
401
402                                 break;
403                         }
404                 }
405
406                 if (!elm)
407                         break;
408         }
409
410 out_unlock:
411         pthread_mutex_unlock(&rec->mutex);
412 }
413
414 void * lightrec_recompiler_run_first_pass(struct lightrec_state *state,
415                                           struct block *block, u32 *pc)
416 {
417         u8 old_flags;
418
419         /* There's no point in running the first pass if the block will never
420          * be compiled. Let the main loop run the interpreter instead. */
421         if (block_has_flag(block, BLOCK_NEVER_COMPILE))
422                 return NULL;
423
424         /* The block is marked as dead, and will be removed the next time the
425          * reaper is run. In the meantime, the old function can still be
426          * executed. */
427         if (block_has_flag(block, BLOCK_IS_DEAD))
428                 return block->function;
429
430         /* If the block is already fully tagged, there is no point in running
431          * the first pass. Request a recompilation of the block, and maybe the
432          * interpreter will run the block in the meantime. */
433         if (block_has_flag(block, BLOCK_FULLY_TAGGED))
434                 lightrec_recompiler_add(state->rec, block);
435
436         if (likely(block->function)) {
437                 if (block_has_flag(block, BLOCK_FULLY_TAGGED)) {
438                         old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST);
439
440                         if (!(old_flags & BLOCK_NO_OPCODE_LIST)) {
441                                 pr_debug("Block PC 0x%08x is fully tagged"
442                                          " - free opcode list\n", block->pc);
443
444                                 /* The block was already compiled but the opcode list
445                                  * didn't get freed yet - do it now */
446                                 lightrec_free_opcode_list(state, block->opcode_list);
447                         }
448                 }
449
450                 return block->function;
451         }
452
453         /* Mark the opcode list as freed, so that the threaded compiler won't
454          * free it while we're using it in the interpreter. */
455         old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST);
456
457         /* Block wasn't compiled yet - run the interpreter */
458         *pc = lightrec_emulate_block(state, block, *pc);
459
460         if (!(old_flags & BLOCK_NO_OPCODE_LIST))
461                 block_clear_flags(block, BLOCK_NO_OPCODE_LIST);
462
463         /* The block got compiled while the interpreter was running.
464          * We can free the opcode list now. */
465         if (block->function && block_has_flag(block, BLOCK_FULLY_TAGGED)) {
466                 old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST);
467
468                 if (!(old_flags & BLOCK_NO_OPCODE_LIST)) {
469                         pr_debug("Block PC 0x%08x is fully tagged"
470                                  " - free opcode list\n", block->pc);
471
472                         lightrec_free_opcode_list(state, block->opcode_list);
473                 }
474         }
475
476         return NULL;
477 }
478
479 void lightrec_code_alloc_lock(struct lightrec_state *state)
480 {
481         pthread_mutex_lock(&state->rec->alloc_mutex);
482 }
483
484 void lightrec_code_alloc_unlock(struct lightrec_state *state)
485 {
486         pthread_mutex_unlock(&state->rec->alloc_mutex);
487 }