gpu_neon: don't include vector_ops.h in the main header
[pcsx_rearmed.git] / deps / lightrec / recompiler.c
1 // SPDX-License-Identifier: LGPL-2.1-or-later
2 /*
3  * Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
4  */
5
6 #include "blockcache.h"
7 #include "debug.h"
8 #include "interpreter.h"
9 #include "lightrec-private.h"
10 #include "memmanager.h"
11 #include "reaper.h"
12 #include "slist.h"
13
14 #include <errno.h>
15 #include <stdatomic.h>
16 #include <stdbool.h>
17 #include <stdlib.h>
18 #include <pthread.h>
19 #ifdef __linux__
20 #include <unistd.h>
21 #endif
22
23 struct block_rec {
24         struct block *block;
25         struct slist_elm slist;
26         bool compiling;
27 };
28
29 struct recompiler_thd {
30         struct lightrec_cstate *cstate;
31         unsigned int tid;
32         pthread_t thd;
33 };
34
35 struct recompiler {
36         struct lightrec_state *state;
37         pthread_cond_t cond;
38         pthread_cond_t cond2;
39         pthread_mutex_t mutex;
40         bool stop, must_flush;
41         struct slist_elm slist;
42
43         pthread_mutex_t alloc_mutex;
44
45         unsigned int nb_recs;
46         struct recompiler_thd thds[];
47 };
48
49 static unsigned int get_processors_count(void)
50 {
51         unsigned int nb = 1;
52
53 #if defined(PTW32_VERSION)
54         nb = pthread_num_processors_np();
55 #elif defined(__APPLE__) || defined(__FreeBSD__)
56         int count;
57         size_t size = sizeof(count);
58
59         nb = sysctlbyname("hw.ncpu", &count, &size, NULL, 0) ? 1 : count;
60 #elif defined(_SC_NPROCESSORS_ONLN)
61         nb = sysconf(_SC_NPROCESSORS_ONLN);
62 #endif
63
64         return nb < 1 ? 1 : nb;
65 }
66
67 static struct slist_elm * lightrec_get_first_elm(struct slist_elm *head)
68 {
69         struct block_rec *block_rec;
70         struct slist_elm *elm;
71
72         for (elm = slist_first(head); elm; elm = elm->next) {
73                 block_rec = container_of(elm, struct block_rec, slist);
74
75                 if (!block_rec->compiling)
76                         return elm;
77         }
78
79         return NULL;
80 }
81
82 static bool lightrec_cancel_block_rec(struct recompiler *rec,
83                                       struct block_rec *block_rec)
84 {
85         if (block_rec->compiling) {
86                 /* Block is being recompiled - wait for
87                  * completion */
88                 pthread_cond_wait(&rec->cond2, &rec->mutex);
89
90                 /* We can't guarantee the signal was for us.
91                  * Since block_rec may have been removed while
92                  * we were waiting on the condition, we cannot
93                  * check block_rec->compiling again. The best
94                  * thing is just to restart the function. */
95                 return false;
96         }
97
98         /* Block is not yet being processed - remove it from the list */
99         slist_remove(&rec->slist, &block_rec->slist);
100         lightrec_free(rec->state, MEM_FOR_LIGHTREC,
101                       sizeof(*block_rec), block_rec);
102
103         return true;
104 }
105
106 static void lightrec_cancel_list(struct recompiler *rec)
107 {
108         struct block_rec *block_rec;
109         struct slist_elm *next;
110
111         while (!!(next = lightrec_get_first_elm(&rec->slist))) {
112                 block_rec = container_of(next, struct block_rec, slist);
113
114                 lightrec_cancel_block_rec(rec, block_rec);
115         }
116
117         pthread_cond_broadcast(&rec->cond2);
118 }
119
120 static void lightrec_flush_code_buffer(struct lightrec_state *state, void *d)
121 {
122         struct recompiler *rec = d;
123
124         pthread_mutex_lock(&rec->mutex);
125
126         if (rec->must_flush) {
127                 lightrec_remove_outdated_blocks(state->block_cache, NULL);
128                 rec->must_flush = false;
129         }
130
131         pthread_mutex_unlock(&rec->mutex);
132 }
133
134 static void lightrec_compile_list(struct recompiler *rec,
135                                   struct recompiler_thd *thd)
136 {
137         struct block_rec *block_rec;
138         struct slist_elm *next;
139         struct block *block;
140         int ret;
141
142         while (!!(next = lightrec_get_first_elm(&rec->slist))) {
143                 block_rec = container_of(next, struct block_rec, slist);
144                 block_rec->compiling = true;
145                 block = block_rec->block;
146
147                 pthread_mutex_unlock(&rec->mutex);
148
149                 if (likely(!(block->flags & BLOCK_IS_DEAD))) {
150                         ret = lightrec_compile_block(thd->cstate, block);
151                         if (ret == -ENOMEM) {
152                                 /* Code buffer is full. Request the reaper to
153                                  * flush it. */
154
155                                 pthread_mutex_lock(&rec->mutex);
156                                 if (!rec->must_flush) {
157                                         lightrec_reaper_add(rec->state->reaper,
158                                                             lightrec_flush_code_buffer,
159                                                             rec);
160                                         lightrec_cancel_list(rec);
161                                         rec->must_flush = true;
162                                 }
163                                 return;
164                         }
165
166                         if (ret) {
167                                 pr_err("Unable to compile block at PC 0x%x: %d\n",
168                                        block->pc, ret);
169                         }
170                 }
171
172                 pthread_mutex_lock(&rec->mutex);
173
174                 slist_remove(&rec->slist, next);
175                 lightrec_free(rec->state, MEM_FOR_LIGHTREC,
176                               sizeof(*block_rec), block_rec);
177                 pthread_cond_signal(&rec->cond2);
178         }
179 }
180
181 static void * lightrec_recompiler_thd(void *d)
182 {
183         struct recompiler_thd *thd = d;
184         struct recompiler *rec = container_of(thd, struct recompiler, thds[thd->tid]);
185
186         pthread_mutex_lock(&rec->mutex);
187
188         while (!rec->stop) {
189                 do {
190                         pthread_cond_wait(&rec->cond, &rec->mutex);
191
192                         if (rec->stop)
193                                 goto out_unlock;
194
195                 } while (slist_empty(&rec->slist));
196
197                 lightrec_compile_list(rec, thd);
198         }
199
200 out_unlock:
201         pthread_mutex_unlock(&rec->mutex);
202         return NULL;
203 }
204
205 struct recompiler *lightrec_recompiler_init(struct lightrec_state *state)
206 {
207         struct recompiler *rec;
208         unsigned int i, nb_recs, nb_cpus;
209         int ret;
210
211         nb_cpus = get_processors_count();
212         nb_recs = nb_cpus < 2 ? 1 : nb_cpus - 1;
213
214         rec = lightrec_malloc(state, MEM_FOR_LIGHTREC, sizeof(*rec)
215                               + nb_recs * sizeof(*rec->thds));
216         if (!rec) {
217                 pr_err("Cannot create recompiler: Out of memory\n");
218                 return NULL;
219         }
220
221         for (i = 0; i < nb_recs; i++) {
222                 rec->thds[i].tid = i;
223                 rec->thds[i].cstate = NULL;
224         }
225
226         for (i = 0; i < nb_recs; i++) {
227                 rec->thds[i].cstate = lightrec_create_cstate(state);
228                 if (!rec->thds[i].cstate) {
229                         pr_err("Cannot create recompiler: Out of memory\n");
230                         goto err_free_cstates;
231                 }
232         }
233
234         rec->state = state;
235         rec->stop = false;
236         rec->must_flush = false;
237         rec->nb_recs = nb_recs;
238         slist_init(&rec->slist);
239
240         ret = pthread_cond_init(&rec->cond, NULL);
241         if (ret) {
242                 pr_err("Cannot init cond variable: %d\n", ret);
243                 goto err_free_cstates;
244         }
245
246         ret = pthread_cond_init(&rec->cond2, NULL);
247         if (ret) {
248                 pr_err("Cannot init cond variable: %d\n", ret);
249                 goto err_cnd_destroy;
250         }
251
252         ret = pthread_mutex_init(&rec->alloc_mutex, NULL);
253         if (ret) {
254                 pr_err("Cannot init alloc mutex variable: %d\n", ret);
255                 goto err_cnd2_destroy;
256         }
257
258         ret = pthread_mutex_init(&rec->mutex, NULL);
259         if (ret) {
260                 pr_err("Cannot init mutex variable: %d\n", ret);
261                 goto err_alloc_mtx_destroy;
262         }
263
264         for (i = 0; i < nb_recs; i++) {
265                 ret = pthread_create(&rec->thds[i].thd, NULL,
266                                      lightrec_recompiler_thd, &rec->thds[i]);
267                 if (ret) {
268                         pr_err("Cannot create recompiler thread: %d\n", ret);
269                         /* TODO: Handle cleanup properly */
270                         goto err_mtx_destroy;
271                 }
272         }
273
274         pr_info("Threaded recompiler started with %u workers.\n", nb_recs);
275
276         return rec;
277
278 err_mtx_destroy:
279         pthread_mutex_destroy(&rec->mutex);
280 err_alloc_mtx_destroy:
281         pthread_mutex_destroy(&rec->alloc_mutex);
282 err_cnd2_destroy:
283         pthread_cond_destroy(&rec->cond2);
284 err_cnd_destroy:
285         pthread_cond_destroy(&rec->cond);
286 err_free_cstates:
287         for (i = 0; i < nb_recs; i++) {
288                 if (rec->thds[i].cstate)
289                         lightrec_free_cstate(rec->thds[i].cstate);
290         }
291         lightrec_free(state, MEM_FOR_LIGHTREC, sizeof(*rec), rec);
292         return NULL;
293 }
294
295 void lightrec_free_recompiler(struct recompiler *rec)
296 {
297         unsigned int i;
298
299         rec->stop = true;
300
301         /* Stop the thread */
302         pthread_mutex_lock(&rec->mutex);
303         pthread_cond_broadcast(&rec->cond);
304         lightrec_cancel_list(rec);
305         pthread_mutex_unlock(&rec->mutex);
306
307         for (i = 0; i < rec->nb_recs; i++)
308                 pthread_join(rec->thds[i].thd, NULL);
309
310         for (i = 0; i < rec->nb_recs; i++)
311                 lightrec_free_cstate(rec->thds[i].cstate);
312
313         pthread_mutex_destroy(&rec->mutex);
314         pthread_mutex_destroy(&rec->alloc_mutex);
315         pthread_cond_destroy(&rec->cond);
316         pthread_cond_destroy(&rec->cond2);
317         lightrec_free(rec->state, MEM_FOR_LIGHTREC, sizeof(*rec), rec);
318 }
319
320 int lightrec_recompiler_add(struct recompiler *rec, struct block *block)
321 {
322         struct slist_elm *elm, *prev;
323         struct block_rec *block_rec;
324         int ret = 0;
325
326         pthread_mutex_lock(&rec->mutex);
327
328         /* If the recompiler must flush the code cache, we can't add the new
329          * job. It will be re-added next time the block's address is jumped to
330          * again. */
331         if (rec->must_flush)
332                 goto out_unlock;
333
334         /* If the block is marked as dead, don't compile it, it will be removed
335          * as soon as it's safe. */
336         if (block->flags & BLOCK_IS_DEAD)
337                 goto out_unlock;
338
339         for (elm = slist_first(&rec->slist), prev = NULL; elm;
340              prev = elm, elm = elm->next) {
341                 block_rec = container_of(elm, struct block_rec, slist);
342
343                 if (block_rec->block == block) {
344                         /* The block to compile is already in the queue - bump
345                          * it to the top of the list, unless the block is being
346                          * recompiled. */
347                         if (prev && !block_rec->compiling &&
348                             !(block->flags & BLOCK_SHOULD_RECOMPILE)) {
349                                 slist_remove_next(prev);
350                                 slist_append(&rec->slist, elm);
351                         }
352
353                         goto out_unlock;
354                 }
355         }
356
357         /* By the time this function was called, the block has been recompiled
358          * and ins't in the wait list anymore. Just return here. */
359         if (block->function && !(block->flags & BLOCK_SHOULD_RECOMPILE))
360                 goto out_unlock;
361
362         block_rec = lightrec_malloc(rec->state, MEM_FOR_LIGHTREC,
363                                     sizeof(*block_rec));
364         if (!block_rec) {
365                 ret = -ENOMEM;
366                 goto out_unlock;
367         }
368
369         pr_debug("Adding block PC 0x%x to recompiler\n", block->pc);
370
371         block_rec->block = block;
372         block_rec->compiling = false;
373
374         elm = &rec->slist;
375
376         /* If the block is being recompiled, push it to the end of the queue;
377          * otherwise push it to the front of the queue. */
378         if (block->flags & BLOCK_SHOULD_RECOMPILE)
379                 for (; elm->next; elm = elm->next);
380
381         slist_append(elm, &block_rec->slist);
382
383         /* Signal the thread */
384         pthread_cond_signal(&rec->cond);
385
386 out_unlock:
387         pthread_mutex_unlock(&rec->mutex);
388
389         return ret;
390 }
391
392 void lightrec_recompiler_remove(struct recompiler *rec, struct block *block)
393 {
394         struct block_rec *block_rec;
395         struct slist_elm *elm;
396
397         pthread_mutex_lock(&rec->mutex);
398
399         while (true) {
400                 for (elm = slist_first(&rec->slist); elm; elm = elm->next) {
401                         block_rec = container_of(elm, struct block_rec, slist);
402
403                         if (block_rec->block == block) {
404                                 if (lightrec_cancel_block_rec(rec, block_rec))
405                                         goto out_unlock;
406
407                                 break;
408                         }
409                 }
410
411                 if (!elm)
412                         break;
413         }
414
415 out_unlock:
416         pthread_mutex_unlock(&rec->mutex);
417 }
418
419 void * lightrec_recompiler_run_first_pass(struct lightrec_state *state,
420                                           struct block *block, u32 *pc)
421 {
422         bool freed;
423
424         /* There's no point in running the first pass if the block will never
425          * be compiled. Let the main loop run the interpreter instead. */
426         if (block->flags & BLOCK_NEVER_COMPILE)
427                 return NULL;
428
429         /* If the block is already fully tagged, there is no point in running
430          * the first pass. Request a recompilation of the block, and maybe the
431          * interpreter will run the block in the meantime. */
432         if (block->flags & BLOCK_FULLY_TAGGED)
433                 lightrec_recompiler_add(state->rec, block);
434
435         if (likely(block->function)) {
436                 if (block->flags & BLOCK_FULLY_TAGGED) {
437                         freed = atomic_flag_test_and_set(&block->op_list_freed);
438
439                         if (!freed) {
440                                 pr_debug("Block PC 0x%08x is fully tagged"
441                                          " - free opcode list\n", block->pc);
442
443                                 /* The block was already compiled but the opcode list
444                                  * didn't get freed yet - do it now */
445                                 lightrec_free_opcode_list(state, block);
446                                 block->opcode_list = NULL;
447                         }
448                 }
449
450                 return block->function;
451         }
452
453         /* Mark the opcode list as freed, so that the threaded compiler won't
454          * free it while we're using it in the interpreter. */
455         freed = atomic_flag_test_and_set(&block->op_list_freed);
456
457         /* Block wasn't compiled yet - run the interpreter */
458         *pc = lightrec_emulate_block(state, block, *pc);
459
460         if (!freed)
461                 atomic_flag_clear(&block->op_list_freed);
462
463         /* The block got compiled while the interpreter was running.
464          * We can free the opcode list now. */
465         if (block->function && (block->flags & BLOCK_FULLY_TAGGED) &&
466             !atomic_flag_test_and_set(&block->op_list_freed)) {
467                 pr_debug("Block PC 0x%08x is fully tagged"
468                          " - free opcode list\n", block->pc);
469
470                 lightrec_free_opcode_list(state, block);
471                 block->opcode_list = NULL;
472         }
473
474         return NULL;
475 }
476
477 void lightrec_code_alloc_lock(struct lightrec_state *state)
478 {
479         pthread_mutex_lock(&state->rec->alloc_mutex);
480 }
481
482 void lightrec_code_alloc_unlock(struct lightrec_state *state)
483 {
484         pthread_mutex_unlock(&state->rec->alloc_mutex);
485 }