3b3fd09037bd033fef73bdd996a48962ff978485
[pcsx_rearmed.git] / deps / lightrec / lightrec.c
1 // SPDX-License-Identifier: LGPL-2.1-or-later
2 /*
3  * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
4  */
5
6 #include "blockcache.h"
7 #include "debug.h"
8 #include "disassembler.h"
9 #include "emitter.h"
10 #include "interpreter.h"
11 #include "lightrec-config.h"
12 #include "lightning-wrapper.h"
13 #include "lightrec.h"
14 #include "memmanager.h"
15 #include "reaper.h"
16 #include "recompiler.h"
17 #include "regcache.h"
18 #include "optimizer.h"
19 #include "tlsf/tlsf.h"
20
21 #include <errno.h>
22 #include <inttypes.h>
23 #include <limits.h>
24 #if ENABLE_THREADED_COMPILER
25 #include <stdatomic.h>
26 #endif
27 #include <stdbool.h>
28 #include <stddef.h>
29 #include <string.h>
30
31 #define GENMASK(h, l) \
32         (((uintptr_t)-1 << (l)) & ((uintptr_t)-1 >> (__WORDSIZE - 1 - (h))))
33
34 static struct block * lightrec_precompile_block(struct lightrec_state *state,
35                                                 u32 pc);
36 static bool lightrec_block_is_fully_tagged(const struct block *block);
37
38 static void lightrec_mtc2(struct lightrec_state *state, u8 reg, u32 data);
39 static u32 lightrec_mfc2(struct lightrec_state *state, u8 reg);
40
41 static void lightrec_default_sb(struct lightrec_state *state, u32 opcode,
42                                 void *host, u32 addr, u8 data)
43 {
44         *(u8 *)host = data;
45
46         if (!state->invalidate_from_dma_only)
47                 lightrec_invalidate(state, addr, 1);
48 }
49
50 static void lightrec_default_sh(struct lightrec_state *state, u32 opcode,
51                                 void *host, u32 addr, u16 data)
52 {
53         *(u16 *)host = HTOLE16(data);
54
55         if (!state->invalidate_from_dma_only)
56                 lightrec_invalidate(state, addr, 2);
57 }
58
59 static void lightrec_default_sw(struct lightrec_state *state, u32 opcode,
60                                 void *host, u32 addr, u32 data)
61 {
62         *(u32 *)host = HTOLE32(data);
63
64         if (!state->invalidate_from_dma_only)
65                 lightrec_invalidate(state, addr, 4);
66 }
67
68 static u8 lightrec_default_lb(struct lightrec_state *state,
69                               u32 opcode, void *host, u32 addr)
70 {
71         return *(u8 *)host;
72 }
73
74 static u16 lightrec_default_lh(struct lightrec_state *state,
75                                u32 opcode, void *host, u32 addr)
76 {
77         return LE16TOH(*(u16 *)host);
78 }
79
80 static u32 lightrec_default_lw(struct lightrec_state *state,
81                                u32 opcode, void *host, u32 addr)
82 {
83         return LE32TOH(*(u32 *)host);
84 }
85
86 static const struct lightrec_mem_map_ops lightrec_default_ops = {
87         .sb = lightrec_default_sb,
88         .sh = lightrec_default_sh,
89         .sw = lightrec_default_sw,
90         .lb = lightrec_default_lb,
91         .lh = lightrec_default_lh,
92         .lw = lightrec_default_lw,
93 };
94
95 static void __segfault_cb(struct lightrec_state *state, u32 addr,
96                           const struct block *block)
97 {
98         lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT);
99         pr_err("Segmentation fault in recompiled code: invalid "
100                "load/store at address 0x%08x\n", addr);
101         if (block)
102                 pr_err("Was executing block PC 0x%08x\n", block->pc);
103 }
104
105 static void lightrec_swl(struct lightrec_state *state,
106                          const struct lightrec_mem_map_ops *ops,
107                          u32 opcode, void *host, u32 addr, u32 data)
108 {
109         unsigned int shift = addr & 0x3;
110         unsigned int mask = GENMASK(31, (shift + 1) * 8);
111         u32 old_data;
112
113         /* Align to 32 bits */
114         addr &= ~3;
115         host = (void *)((uintptr_t)host & ~3);
116
117         old_data = ops->lw(state, opcode, host, addr);
118
119         data = (data >> ((3 - shift) * 8)) | (old_data & mask);
120
121         ops->sw(state, opcode, host, addr, data);
122 }
123
124 static void lightrec_swr(struct lightrec_state *state,
125                          const struct lightrec_mem_map_ops *ops,
126                          u32 opcode, void *host, u32 addr, u32 data)
127 {
128         unsigned int shift = addr & 0x3;
129         unsigned int mask = (1 << (shift * 8)) - 1;
130         u32 old_data;
131
132         /* Align to 32 bits */
133         addr &= ~3;
134         host = (void *)((uintptr_t)host & ~3);
135
136         old_data = ops->lw(state, opcode, host, addr);
137
138         data = (data << (shift * 8)) | (old_data & mask);
139
140         ops->sw(state, opcode, host, addr, data);
141 }
142
143 static void lightrec_swc2(struct lightrec_state *state, union code op,
144                           const struct lightrec_mem_map_ops *ops,
145                           void *host, u32 addr)
146 {
147         u32 data = lightrec_mfc2(state, op.i.rt);
148
149         ops->sw(state, op.opcode, host, addr, data);
150 }
151
152 static u32 lightrec_lwl(struct lightrec_state *state,
153                         const struct lightrec_mem_map_ops *ops,
154                         u32 opcode, void *host, u32 addr, u32 data)
155 {
156         unsigned int shift = addr & 0x3;
157         unsigned int mask = (1 << (24 - shift * 8)) - 1;
158         u32 old_data;
159
160         /* Align to 32 bits */
161         addr &= ~3;
162         host = (void *)((uintptr_t)host & ~3);
163
164         old_data = ops->lw(state, opcode, host, addr);
165
166         return (data & mask) | (old_data << (24 - shift * 8));
167 }
168
169 static u32 lightrec_lwr(struct lightrec_state *state,
170                         const struct lightrec_mem_map_ops *ops,
171                         u32 opcode, void *host, u32 addr, u32 data)
172 {
173         unsigned int shift = addr & 0x3;
174         unsigned int mask = GENMASK(31, 32 - shift * 8);
175         u32 old_data;
176
177         /* Align to 32 bits */
178         addr &= ~3;
179         host = (void *)((uintptr_t)host & ~3);
180
181         old_data = ops->lw(state, opcode, host, addr);
182
183         return (data & mask) | (old_data >> (shift * 8));
184 }
185
186 static void lightrec_lwc2(struct lightrec_state *state, union code op,
187                           const struct lightrec_mem_map_ops *ops,
188                           void *host, u32 addr)
189 {
190         u32 data = ops->lw(state, op.opcode, host, addr);
191
192         lightrec_mtc2(state, op.i.rt, data);
193 }
194
195 static void lightrec_invalidate_map(struct lightrec_state *state,
196                 const struct lightrec_mem_map *map, u32 addr, u32 len)
197 {
198         if (map == &state->maps[PSX_MAP_KERNEL_USER_RAM]) {
199                 memset(lut_address(state, lut_offset(addr)), 0,
200                        ((len + 3) / 4) * lut_elm_size(state));
201         }
202 }
203
204 enum psx_map
205 lightrec_get_map_idx(struct lightrec_state *state, u32 kaddr)
206 {
207         const struct lightrec_mem_map *map;
208         unsigned int i;
209
210         for (i = 0; i < state->nb_maps; i++) {
211                 map = &state->maps[i];
212
213                 if (kaddr >= map->pc && kaddr < map->pc + map->length)
214                         return (enum psx_map) i;
215         }
216
217         return PSX_MAP_UNKNOWN;
218 }
219
220 const struct lightrec_mem_map *
221 lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr)
222 {
223         const struct lightrec_mem_map *map;
224         enum psx_map idx;
225         u32 addr;
226
227         idx = lightrec_get_map_idx(state, kaddr);
228         if (idx == PSX_MAP_UNKNOWN)
229                 return NULL;
230
231         map = &state->maps[idx];
232         addr = kaddr - map->pc;
233
234         while (map->mirror_of)
235                 map = map->mirror_of;
236
237         if (host)
238                 *host = map->address + addr;
239
240         return map;
241 }
242
243 u32 lightrec_rw(struct lightrec_state *state, union code op,
244                 u32 addr, u32 data, u16 *flags, struct block *block)
245 {
246         const struct lightrec_mem_map *map;
247         const struct lightrec_mem_map_ops *ops;
248         u32 opcode = op.opcode;
249         void *host;
250
251         addr += (s16) op.i.imm;
252
253         map = lightrec_get_map(state, &host, kunseg(addr));
254         if (!map) {
255                 __segfault_cb(state, addr, block);
256                 return 0;
257         }
258
259         if (unlikely(map->ops)) {
260                 if (flags && !LIGHTREC_FLAGS_GET_IO_MODE(*flags))
261                         *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW);
262
263                 ops = map->ops;
264         } else {
265                 if (flags && !LIGHTREC_FLAGS_GET_IO_MODE(*flags))
266                         *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT);
267
268                 ops = &lightrec_default_ops;
269         }
270
271         switch (op.i.op) {
272         case OP_SB:
273                 ops->sb(state, opcode, host, addr, (u8) data);
274                 return 0;
275         case OP_SH:
276                 ops->sh(state, opcode, host, addr, (u16) data);
277                 return 0;
278         case OP_SWL:
279                 lightrec_swl(state, ops, opcode, host, addr, data);
280                 return 0;
281         case OP_SWR:
282                 lightrec_swr(state, ops, opcode, host, addr, data);
283                 return 0;
284         case OP_SW:
285                 ops->sw(state, opcode, host, addr, data);
286                 return 0;
287         case OP_SWC2:
288                 lightrec_swc2(state, op, ops, host, addr);
289                 return 0;
290         case OP_LB:
291                 return (s32) (s8) ops->lb(state, opcode, host, addr);
292         case OP_LBU:
293                 return ops->lb(state, opcode, host, addr);
294         case OP_LH:
295                 return (s32) (s16) ops->lh(state, opcode, host, addr);
296         case OP_LHU:
297                 return ops->lh(state, opcode, host, addr);
298         case OP_LWC2:
299                 lightrec_lwc2(state, op, ops, host, addr);
300                 return 0;
301         case OP_LWL:
302                 return lightrec_lwl(state, ops, opcode, host, addr, data);
303         case OP_LWR:
304                 return lightrec_lwr(state, ops, opcode, host, addr, data);
305         case OP_LW:
306         default:
307                 return ops->lw(state, opcode, host, addr);
308         }
309 }
310
311 static void lightrec_rw_helper(struct lightrec_state *state,
312                                union code op, u16 *flags,
313                                struct block *block)
314 {
315         u32 ret = lightrec_rw(state, op, state->regs.gpr[op.i.rs],
316                               state->regs.gpr[op.i.rt], flags, block);
317
318         switch (op.i.op) {
319         case OP_LB:
320         case OP_LBU:
321         case OP_LH:
322         case OP_LHU:
323         case OP_LWL:
324         case OP_LWR:
325         case OP_LW:
326                 if (op.i.rt)
327                         state->regs.gpr[op.i.rt] = ret;
328                 fallthrough;
329         default:
330                 break;
331         }
332 }
333
334 static void lightrec_rw_cb(struct lightrec_state *state)
335 {
336         lightrec_rw_helper(state, (union code)state->c_wrapper_arg, NULL, NULL);
337 }
338
339 static void lightrec_rw_generic_cb(struct lightrec_state *state)
340 {
341         struct block *block;
342         struct opcode *op;
343         bool was_tagged;
344         u32 arg = state->c_wrapper_arg;
345         u16 offset = (u16)arg;
346
347         block = lightrec_find_block_from_lut(state->block_cache,
348                                              arg >> 16, state->next_pc);
349         if (unlikely(!block)) {
350                 pr_err("rw_generic: No block found in LUT for PC 0x%x offset 0x%x\n",
351                          state->next_pc, offset);
352                 return;
353         }
354
355         op = &block->opcode_list[offset];
356         was_tagged = LIGHTREC_FLAGS_GET_IO_MODE(op->flags);
357
358         lightrec_rw_helper(state, op->c, &op->flags, block);
359
360         if (!was_tagged) {
361                 pr_debug("Opcode of block at PC 0x%08x has been tagged - flag "
362                          "for recompilation\n", block->pc);
363
364                 block->flags |= BLOCK_SHOULD_RECOMPILE;
365         }
366 }
367
368 static u32 clamp_s32(s32 val, s32 min, s32 max)
369 {
370         return val < min ? min : val > max ? max : val;
371 }
372
373 static u16 load_u16(u32 *ptr)
374 {
375         return ((struct u16x2 *) ptr)->l;
376 }
377
378 static void store_u16(u32 *ptr, u16 value)
379 {
380         ((struct u16x2 *) ptr)->l = value;
381 }
382
383 static u32 lightrec_mfc2(struct lightrec_state *state, u8 reg)
384 {
385         s16 gteir1, gteir2, gteir3;
386
387         switch (reg) {
388         case 1:
389         case 3:
390         case 5:
391         case 8:
392         case 9:
393         case 10:
394         case 11:
395                 return (s32)(s16) load_u16(&state->regs.cp2d[reg]);
396         case 7:
397         case 16:
398         case 17:
399         case 18:
400         case 19:
401                 return load_u16(&state->regs.cp2d[reg]);
402         case 28:
403         case 29:
404                 gteir1 = (s16) load_u16(&state->regs.cp2d[9]);
405                 gteir2 = (s16) load_u16(&state->regs.cp2d[10]);
406                 gteir3 = (s16) load_u16(&state->regs.cp2d[11]);
407
408                 return clamp_s32(gteir1 >> 7, 0, 0x1f) << 0 |
409                         clamp_s32(gteir2 >> 7, 0, 0x1f) << 5 |
410                         clamp_s32(gteir3 >> 7, 0, 0x1f) << 10;
411         case 15:
412                 reg = 14;
413                 fallthrough;
414         default:
415                 return state->regs.cp2d[reg];
416         }
417 }
418
419 u32 lightrec_mfc(struct lightrec_state *state, union code op)
420 {
421         if (op.i.op == OP_CP0)
422                 return state->regs.cp0[op.r.rd];
423         else if (op.r.rs == OP_CP2_BASIC_MFC2)
424                 return lightrec_mfc2(state, op.r.rd);
425         else
426                 return state->regs.cp2c[op.r.rd];
427 }
428
429 static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data)
430 {
431         u32 status, oldstatus, cause;
432
433         switch (reg) {
434         case 1:
435         case 4:
436         case 8:
437         case 14:
438         case 15:
439                 /* Those registers are read-only */
440                 return;
441         default:
442                 break;
443         }
444
445         if (reg == 12) {
446                 status = state->regs.cp0[12];
447                 oldstatus = status;
448
449                 if (status & ~data & BIT(16)) {
450                         state->ops.enable_ram(state, true);
451                         lightrec_invalidate_all(state);
452                 } else if (~status & data & BIT(16)) {
453                         state->ops.enable_ram(state, false);
454                 }
455         }
456
457         if (reg == 13) {
458                 state->regs.cp0[13] &= ~0x300;
459                 state->regs.cp0[13] |= data & 0x300;
460         } else {
461                 state->regs.cp0[reg] = data;
462         }
463
464         if (reg == 12 || reg == 13) {
465                 cause = state->regs.cp0[13];
466                 status = state->regs.cp0[12];
467
468                 /* Handle software interrupts */
469                 if (!!(status & cause & 0x300) & status)
470                         lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT);
471
472                 /* Handle hardware interrupts */
473                 if (reg == 12 && !(~status & 0x401) && (~oldstatus & 0x401))
474                         lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT);
475         }
476 }
477
478 static u32 count_leading_bits(s32 data)
479 {
480         u32 cnt = 33;
481
482 #ifdef __has_builtin
483 #if __has_builtin(__builtin_clrsb)
484         return 1 + __builtin_clrsb(data);
485 #endif
486 #endif
487
488         data = (data ^ (data >> 31)) << 1;
489
490         do {
491                 cnt -= 1;
492                 data >>= 1;
493         } while (data);
494
495         return cnt;
496 }
497
498 static void lightrec_mtc2(struct lightrec_state *state, u8 reg, u32 data)
499 {
500         switch (reg) {
501         case 15:
502                 state->regs.cp2d[12] = state->regs.cp2d[13];
503                 state->regs.cp2d[13] = state->regs.cp2d[14];
504                 state->regs.cp2d[14] = data;
505                 break;
506         case 28:
507                 state->regs.cp2d[9] = (data << 7) & 0xf80;
508                 state->regs.cp2d[10] = (data << 2) & 0xf80;
509                 state->regs.cp2d[11] = (data >> 3) & 0xf80;
510                 break;
511         case 31:
512                 return;
513         case 30:
514                 state->regs.cp2d[31] = count_leading_bits((s32) data);
515                 fallthrough;
516         default:
517                 state->regs.cp2d[reg] = data;
518                 break;
519         }
520 }
521
522 static void lightrec_ctc2(struct lightrec_state *state, u8 reg, u32 data)
523 {
524         switch (reg) {
525         case 4:
526         case 12:
527         case 20:
528         case 26:
529         case 27:
530         case 29:
531         case 30:
532                 store_u16(&state->regs.cp2c[reg], data);
533                 break;
534         case 31:
535                 data = (data & 0x7ffff000) | !!(data & 0x7f87e000) << 31;
536                 fallthrough;
537         default:
538                 state->regs.cp2c[reg] = data;
539                 break;
540         }
541 }
542
543 void lightrec_mtc(struct lightrec_state *state, union code op, u32 data)
544 {
545         if (op.i.op == OP_CP0)
546                 lightrec_mtc0(state, op.r.rd, data);
547         else if (op.r.rs == OP_CP2_BASIC_CTC2)
548                 lightrec_ctc2(state, op.r.rd, data);
549         else
550                 lightrec_mtc2(state, op.r.rd, data);
551 }
552
553 static void lightrec_mtc_cb(struct lightrec_state *state)
554 {
555         union code op = (union code) state->c_wrapper_arg;
556
557         lightrec_mtc(state, op, state->regs.gpr[op.r.rt]);
558 }
559
560 void lightrec_rfe(struct lightrec_state *state)
561 {
562         u32 status;
563
564         /* Read CP0 Status register (r12) */
565         status = state->regs.cp0[12];
566
567         /* Switch the bits */
568         status = ((status & 0x3c) >> 2) | (status & ~0xf);
569
570         /* Write it back */
571         lightrec_mtc0(state, 12, status);
572 }
573
574 void lightrec_cp(struct lightrec_state *state, union code op)
575 {
576         if (op.i.op == OP_CP0) {
577                 pr_err("Invalid CP opcode to coprocessor #0\n");
578                 return;
579         }
580
581         (*state->ops.cop2_op)(state, op.opcode);
582 }
583
584 static void lightrec_cp_cb(struct lightrec_state *state)
585 {
586         lightrec_cp(state, (union code) state->c_wrapper_arg);
587 }
588
589 static void lightrec_syscall_cb(struct lightrec_state *state)
590 {
591         lightrec_set_exit_flags(state, LIGHTREC_EXIT_SYSCALL);
592 }
593
594 static void lightrec_break_cb(struct lightrec_state *state)
595 {
596         lightrec_set_exit_flags(state, LIGHTREC_EXIT_BREAK);
597 }
598
599 struct block * lightrec_get_block(struct lightrec_state *state, u32 pc)
600 {
601         struct block *block = lightrec_find_block(state->block_cache, pc);
602
603         if (block && lightrec_block_is_outdated(state, block)) {
604                 pr_debug("Block at PC 0x%08x is outdated!\n", block->pc);
605
606                 /* Make sure the recompiler isn't processing the block we'll
607                  * destroy */
608                 if (ENABLE_THREADED_COMPILER)
609                         lightrec_recompiler_remove(state->rec, block);
610
611                 lightrec_unregister_block(state->block_cache, block);
612                 remove_from_code_lut(state->block_cache, block);
613                 lightrec_free_block(state, block);
614                 block = NULL;
615         }
616
617         if (!block) {
618                 block = lightrec_precompile_block(state, pc);
619                 if (!block) {
620                         pr_err("Unable to recompile block at PC 0x%x\n", pc);
621                         lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT);
622                         return NULL;
623                 }
624
625                 lightrec_register_block(state->block_cache, block);
626         }
627
628         return block;
629 }
630
631 static void * get_next_block_func(struct lightrec_state *state, u32 pc)
632 {
633         struct block *block;
634         bool should_recompile;
635         void *func;
636         int err;
637
638         for (;;) {
639                 func = lut_read(state, lut_offset(pc));
640                 if (func && func != state->get_next_block)
641                         break;
642
643                 block = lightrec_get_block(state, pc);
644
645                 if (unlikely(!block))
646                         break;
647
648                 if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET)) {
649                         func = state->memset_func;
650                         break;
651                 }
652
653                 should_recompile = block->flags & BLOCK_SHOULD_RECOMPILE &&
654                         !(block->flags & BLOCK_IS_DEAD);
655
656                 if (unlikely(should_recompile)) {
657                         pr_debug("Block at PC 0x%08x should recompile\n", pc);
658
659                         lightrec_unregister(MEM_FOR_CODE, block->code_size);
660
661                         if (ENABLE_THREADED_COMPILER) {
662                                 lightrec_recompiler_add(state->rec, block);
663                         } else {
664                                 err = lightrec_compile_block(state->cstate, block);
665                                 if (err) {
666                                         state->exit_flags = LIGHTREC_EXIT_NOMEM;
667                                         return NULL;
668                                 }
669                         }
670                 }
671
672                 if (ENABLE_THREADED_COMPILER && likely(!should_recompile))
673                         func = lightrec_recompiler_run_first_pass(state, block, &pc);
674                 else
675                         func = block->function;
676
677                 if (likely(func))
678                         break;
679
680                 if (unlikely(block->flags & BLOCK_NEVER_COMPILE)) {
681                         pc = lightrec_emulate_block(state, block, pc);
682
683                 } else if (!ENABLE_THREADED_COMPILER) {
684                         /* Block wasn't compiled yet - run the interpreter */
685                         if (block->flags & BLOCK_FULLY_TAGGED)
686                                 pr_debug("Block fully tagged, skipping first pass\n");
687                         else if (ENABLE_FIRST_PASS && likely(!should_recompile))
688                                 pc = lightrec_emulate_block(state, block, pc);
689
690                         /* Then compile it using the profiled data */
691                         err = lightrec_compile_block(state->cstate, block);
692                         if (err) {
693                                 state->exit_flags = LIGHTREC_EXIT_NOMEM;
694                                 return NULL;
695                         }
696                 } else {
697                         lightrec_recompiler_add(state->rec, block);
698                 }
699
700                 if (state->exit_flags != LIGHTREC_EXIT_NORMAL ||
701                     state->current_cycle >= state->target_cycle)
702                         break;
703         }
704
705         state->next_pc = pc;
706         return func;
707 }
708
709 static s32 c_function_wrapper(struct lightrec_state *state, s32 cycles_delta,
710                               void (*f)(struct lightrec_state *))
711 {
712         state->current_cycle = state->target_cycle - cycles_delta;
713
714         (*f)(state);
715
716         return state->target_cycle - state->current_cycle;
717 }
718
719 static void * lightrec_alloc_code(struct lightrec_state *state, size_t size)
720 {
721         void *code;
722
723         if (ENABLE_THREADED_COMPILER)
724                 lightrec_code_alloc_lock(state);
725
726         code = tlsf_malloc(state->tlsf, size);
727
728         if (ENABLE_THREADED_COMPILER)
729                 lightrec_code_alloc_unlock(state);
730
731         return code;
732 }
733
734 static void lightrec_realloc_code(struct lightrec_state *state,
735                                   void *ptr, size_t size)
736 {
737         /* NOTE: 'size' MUST be smaller than the size specified during
738          * the allocation. */
739
740         if (ENABLE_THREADED_COMPILER)
741                 lightrec_code_alloc_lock(state);
742
743         tlsf_realloc(state->tlsf, ptr, size);
744
745         if (ENABLE_THREADED_COMPILER)
746                 lightrec_code_alloc_unlock(state);
747 }
748
749 static void lightrec_free_code(struct lightrec_state *state, void *ptr)
750 {
751         if (ENABLE_THREADED_COMPILER)
752                 lightrec_code_alloc_lock(state);
753
754         tlsf_free(state->tlsf, ptr);
755
756         if (ENABLE_THREADED_COMPILER)
757                 lightrec_code_alloc_unlock(state);
758 }
759
760 static void * lightrec_emit_code(struct lightrec_state *state,
761                                  const struct block *block,
762                                  jit_state_t *_jit, unsigned int *size)
763 {
764         bool has_code_buffer = ENABLE_CODE_BUFFER && state->tlsf;
765         jit_word_t code_size, new_code_size;
766         void *code;
767
768         jit_realize();
769
770         if (!ENABLE_DISASSEMBLER)
771                 jit_set_data(NULL, 0, JIT_DISABLE_DATA | JIT_DISABLE_NOTE);
772
773         if (has_code_buffer) {
774                 jit_get_code(&code_size);
775                 code = lightrec_alloc_code(state, (size_t) code_size);
776
777                 if (!code) {
778                         if (ENABLE_THREADED_COMPILER) {
779                                 /* If we're using the threaded compiler, return
780                                  * an allocation error here. The threaded
781                                  * compiler will then empty its job queue and
782                                  * request a code flush using the reaper. */
783                                 return NULL;
784                         }
785
786                         /* Remove outdated blocks, and try again */
787                         lightrec_remove_outdated_blocks(state->block_cache, block);
788
789                         pr_debug("Re-try to alloc %zu bytes...\n", code_size);
790
791                         code = lightrec_alloc_code(state, code_size);
792                         if (!code) {
793                                 pr_err("Could not alloc even after removing old blocks!\n");
794                                 return NULL;
795                         }
796                 }
797
798                 jit_set_code(code, code_size);
799         }
800
801         code = jit_emit();
802
803         jit_get_code(&new_code_size);
804         lightrec_register(MEM_FOR_CODE, new_code_size);
805
806         if (has_code_buffer) {
807                 lightrec_realloc_code(state, code, (size_t) new_code_size);
808
809                 pr_debug("Creating code block at address 0x%" PRIxPTR ", "
810                          "code size: %" PRIuPTR " new: %" PRIuPTR "\n",
811                          (uintptr_t) code, code_size, new_code_size);
812         }
813
814         *size = (unsigned int) new_code_size;
815
816         return code;
817 }
818
819 static struct block * generate_wrapper(struct lightrec_state *state)
820 {
821         struct block *block;
822         jit_state_t *_jit;
823         unsigned int i;
824         int stack_ptr;
825         jit_node_t *to_tramp, *to_fn_epilog;
826         jit_node_t *addr[C_WRAPPERS_COUNT - 1];
827
828         block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
829         if (!block)
830                 goto err_no_mem;
831
832         _jit = jit_new_state();
833         if (!_jit)
834                 goto err_free_block;
835
836         jit_name("RW wrapper");
837         jit_note(__FILE__, __LINE__);
838
839         /* Wrapper entry point */
840         jit_prolog();
841         jit_tramp(256);
842
843         /* Add entry points; separate them by opcodes that increment
844          * LIGHTREC_REG_STATE (since we cannot touch other registers).
845          * The difference will then tell us which C function to call. */
846         for (i = C_WRAPPERS_COUNT - 1; i > 0; i--) {
847                 jit_addi(LIGHTREC_REG_STATE, LIGHTREC_REG_STATE, __WORDSIZE / 8);
848                 addr[i - 1] = jit_indirect();
849         }
850
851         jit_epilog();
852         jit_prolog();
853
854         stack_ptr = jit_allocai(sizeof(uintptr_t) * NUM_TEMPS);
855
856         /* Save all temporaries on stack */
857         for (i = 0; i < NUM_TEMPS; i++)
858                 jit_stxi(stack_ptr + i * sizeof(uintptr_t), JIT_FP, JIT_R(i));
859
860         /* Jump to the trampoline */
861         to_tramp = jit_jmpi();
862
863         /* The trampoline will jump back here */
864         to_fn_epilog = jit_label();
865
866         /* Restore temporaries from stack */
867         for (i = 0; i < NUM_TEMPS; i++)
868                 jit_ldxi(JIT_R(i), JIT_FP, stack_ptr + i * sizeof(uintptr_t));
869
870         jit_ret();
871         jit_epilog();
872
873         /* Trampoline entry point.
874          * The sole purpose of the trampoline is to cheese Lightning not to
875          * save/restore the callee-saved register LIGHTREC_REG_CYCLE, since we
876          * do want to return to the caller with this register modified. */
877         jit_prolog();
878         jit_tramp(256);
879         jit_patch(to_tramp);
880
881         /* Retrieve the wrapper function */
882         jit_ldxi(JIT_R0, LIGHTREC_REG_STATE,
883                  offsetof(struct lightrec_state, c_wrappers));
884
885         /* Restore LIGHTREC_REG_STATE to its correct value */
886         jit_movi(LIGHTREC_REG_STATE, (uintptr_t) state);
887
888         jit_prepare();
889         jit_pushargr(LIGHTREC_REG_STATE);
890         jit_pushargr(LIGHTREC_REG_CYCLE);
891         jit_pushargr(JIT_R0);
892         jit_finishi(c_function_wrapper);
893         jit_retval_i(LIGHTREC_REG_CYCLE);
894
895         jit_patch_at(jit_jmpi(), to_fn_epilog);
896         jit_epilog();
897
898         block->_jit = _jit;
899         block->opcode_list = NULL;
900         block->flags = 0;
901         block->nb_ops = 0;
902
903         block->function = lightrec_emit_code(state, block, _jit,
904                                              &block->code_size);
905         if (!block->function)
906                 goto err_free_block;
907
908         state->wrappers_eps[C_WRAPPERS_COUNT - 1] = block->function;
909
910         for (i = 0; i < C_WRAPPERS_COUNT - 1; i++)
911                 state->wrappers_eps[i] = jit_address(addr[i]);
912
913         if (ENABLE_DISASSEMBLER) {
914                 pr_debug("Wrapper block:\n");
915                 jit_disassemble();
916         }
917
918         jit_clear_state();
919         return block;
920
921 err_free_block:
922         lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
923 err_no_mem:
924         pr_err("Unable to compile wrapper: Out of memory\n");
925         return NULL;
926 }
927
928 static u32 lightrec_memset(struct lightrec_state *state)
929 {
930         u32 kunseg_pc = kunseg(state->regs.gpr[4]);
931         void *host;
932         const struct lightrec_mem_map *map = lightrec_get_map(state, &host, kunseg_pc);
933         u32 length = state->regs.gpr[5] * 4;
934
935         if (!map) {
936                 pr_err("Unable to find memory map for memset target address "
937                        "0x%x\n", kunseg_pc);
938                 return 0;
939         }
940
941         pr_debug("Calling host memset, PC 0x%x (host address 0x%" PRIxPTR ") for %u bytes\n",
942                  kunseg_pc, (uintptr_t)host, length);
943         memset(host, 0, length);
944
945         if (!state->invalidate_from_dma_only)
946                 lightrec_invalidate_map(state, map, kunseg_pc, length);
947
948         /* Rough estimation of the number of cycles consumed */
949         return 8 + 5 * (length  + 3 / 4);
950 }
951
952 static struct block * generate_dispatcher(struct lightrec_state *state)
953 {
954         struct block *block;
955         jit_state_t *_jit;
956         jit_node_t *to_end, *loop, *addr, *addr2, *addr3;
957         unsigned int i;
958         u32 offset;
959
960         block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
961         if (!block)
962                 goto err_no_mem;
963
964         _jit = jit_new_state();
965         if (!_jit)
966                 goto err_free_block;
967
968         jit_name("dispatcher");
969         jit_note(__FILE__, __LINE__);
970
971         jit_prolog();
972         jit_frame(256);
973
974         jit_getarg(JIT_R0, jit_arg());
975         jit_getarg_i(LIGHTREC_REG_CYCLE, jit_arg());
976
977         /* Force all callee-saved registers to be pushed on the stack */
978         for (i = 0; i < NUM_REGS; i++)
979                 jit_movr(JIT_V(i), JIT_V(i));
980
981         /* Pass lightrec_state structure to blocks, using the last callee-saved
982          * register that Lightning provides */
983         jit_movi(LIGHTREC_REG_STATE, (intptr_t) state);
984
985         loop = jit_label();
986
987         /* Call the block's code */
988         jit_jmpr(JIT_R0);
989
990         if (OPT_REPLACE_MEMSET) {
991                 /* Blocks will jump here when they need to call
992                  * lightrec_memset() */
993                 addr3 = jit_indirect();
994
995                 jit_prepare();
996                 jit_pushargr(LIGHTREC_REG_STATE);
997                 jit_finishi(lightrec_memset);
998
999                 jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE,
1000                             offsetof(struct lightrec_state, regs.gpr[31]));
1001
1002                 jit_retval(JIT_R0);
1003                 jit_subr(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, JIT_R0);
1004         }
1005
1006         /* The block will jump here, with the number of cycles remaining in
1007          * LIGHTREC_REG_CYCLE */
1008         addr2 = jit_indirect();
1009
1010         /* Store back the next_pc to the lightrec_state structure */
1011         offset = offsetof(struct lightrec_state, next_pc);
1012         jit_stxi_i(offset, LIGHTREC_REG_STATE, JIT_V0);
1013
1014         /* Jump to end if state->target_cycle < state->current_cycle */
1015         to_end = jit_blei(LIGHTREC_REG_CYCLE, 0);
1016
1017         /* Convert next PC to KUNSEG and avoid mirrors */
1018         jit_andi(JIT_R0, JIT_V0, 0x10000000 | (RAM_SIZE - 1));
1019         jit_rshi_u(JIT_R1, JIT_R0, 28);
1020         jit_andi(JIT_R2, JIT_V0, BIOS_SIZE - 1);
1021         jit_addi(JIT_R2, JIT_R2, RAM_SIZE);
1022         jit_movnr(JIT_R0, JIT_R2, JIT_R1);
1023
1024         /* If possible, use the code LUT */
1025         if (!lut_is_32bit(state))
1026                 jit_lshi(JIT_R0, JIT_R0, 1);
1027         jit_addr(JIT_R0, JIT_R0, LIGHTREC_REG_STATE);
1028
1029         offset = offsetof(struct lightrec_state, code_lut);
1030         if (lut_is_32bit(state))
1031                 jit_ldxi_ui(JIT_R0, JIT_R0, offset);
1032         else
1033                 jit_ldxi(JIT_R0, JIT_R0, offset);
1034
1035         /* If we get non-NULL, loop */
1036         jit_patch_at(jit_bnei(JIT_R0, 0), loop);
1037
1038         /* Slow path: call C function get_next_block_func() */
1039
1040         if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) {
1041                 /* We may call the interpreter - update state->current_cycle */
1042                 jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE,
1043                            offsetof(struct lightrec_state, target_cycle));
1044                 jit_subr(JIT_R1, JIT_R2, LIGHTREC_REG_CYCLE);
1045                 jit_stxi_i(offsetof(struct lightrec_state, current_cycle),
1046                            LIGHTREC_REG_STATE, JIT_R1);
1047         }
1048
1049         /* The code LUT will be set to this address when the block at the target
1050          * PC has been preprocessed but not yet compiled by the threaded
1051          * recompiler */
1052         addr = jit_indirect();
1053
1054         /* Get the next block */
1055         jit_prepare();
1056         jit_pushargr(LIGHTREC_REG_STATE);
1057         jit_pushargr(JIT_V0);
1058         jit_finishi(&get_next_block_func);
1059         jit_retval(JIT_R0);
1060
1061         if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) {
1062                 /* The interpreter may have updated state->current_cycle and
1063                  * state->target_cycle - recalc the delta */
1064                 jit_ldxi_i(JIT_R1, LIGHTREC_REG_STATE,
1065                            offsetof(struct lightrec_state, current_cycle));
1066                 jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE,
1067                            offsetof(struct lightrec_state, target_cycle));
1068                 jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, JIT_R1);
1069         }
1070
1071         /* If we get non-NULL, loop */
1072         jit_patch_at(jit_bnei(JIT_R0, 0), loop);
1073
1074         /* When exiting, the recompiled code will jump to that address */
1075         jit_note(__FILE__, __LINE__);
1076         jit_patch(to_end);
1077
1078         jit_retr(LIGHTREC_REG_CYCLE);
1079         jit_epilog();
1080
1081         block->_jit = _jit;
1082         block->opcode_list = NULL;
1083         block->flags = 0;
1084         block->nb_ops = 0;
1085
1086         block->function = lightrec_emit_code(state, block, _jit,
1087                                              &block->code_size);
1088         if (!block->function)
1089                 goto err_free_block;
1090
1091         state->eob_wrapper_func = jit_address(addr2);
1092         if (OPT_REPLACE_MEMSET)
1093                 state->memset_func = jit_address(addr3);
1094         state->get_next_block = jit_address(addr);
1095
1096         if (ENABLE_DISASSEMBLER) {
1097                 pr_debug("Dispatcher block:\n");
1098                 jit_disassemble();
1099         }
1100
1101         /* We're done! */
1102         jit_clear_state();
1103         return block;
1104
1105 err_free_block:
1106         lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
1107 err_no_mem:
1108         pr_err("Unable to compile dispatcher: Out of memory\n");
1109         return NULL;
1110 }
1111
1112 union code lightrec_read_opcode(struct lightrec_state *state, u32 pc)
1113 {
1114         void *host = NULL;
1115
1116         lightrec_get_map(state, &host, kunseg(pc));
1117
1118         const u32 *code = (u32 *)host;
1119         return (union code) LE32TOH(*code);
1120 }
1121
1122 unsigned int lightrec_cycles_of_opcode(union code code)
1123 {
1124         return 2;
1125 }
1126
1127 void lightrec_free_opcode_list(struct lightrec_state *state, struct block *block)
1128 {
1129         lightrec_free(state, MEM_FOR_IR,
1130                       sizeof(*block->opcode_list) * block->nb_ops,
1131                       block->opcode_list);
1132 }
1133
1134 static unsigned int lightrec_get_mips_block_len(const u32 *src)
1135 {
1136         unsigned int i;
1137         union code c;
1138
1139         for (i = 1; ; i++) {
1140                 c.opcode = LE32TOH(*src++);
1141
1142                 if (is_syscall(c))
1143                         return i;
1144
1145                 if (is_unconditional_jump(c))
1146                         return i + 1;
1147         }
1148 }
1149
1150 static struct opcode * lightrec_disassemble(struct lightrec_state *state,
1151                                             const u32 *src, unsigned int *len)
1152 {
1153         struct opcode *list;
1154         unsigned int i, length;
1155
1156         length = lightrec_get_mips_block_len(src);
1157
1158         list = lightrec_malloc(state, MEM_FOR_IR, sizeof(*list) * length);
1159         if (!list) {
1160                 pr_err("Unable to allocate memory\n");
1161                 return NULL;
1162         }
1163
1164         for (i = 0; i < length; i++) {
1165                 list[i].opcode = LE32TOH(src[i]);
1166                 list[i].flags = 0;
1167         }
1168
1169         *len = length * sizeof(u32);
1170
1171         return list;
1172 }
1173
1174 static struct block * lightrec_precompile_block(struct lightrec_state *state,
1175                                                 u32 pc)
1176 {
1177         struct opcode *list;
1178         struct block *block;
1179         void *host;
1180         const struct lightrec_mem_map *map = lightrec_get_map(state, &host, kunseg(pc));
1181         const u32 *code = (u32 *) host;
1182         unsigned int length;
1183         bool fully_tagged;
1184
1185         if (!map)
1186                 return NULL;
1187
1188         block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
1189         if (!block) {
1190                 pr_err("Unable to recompile block: Out of memory\n");
1191                 return NULL;
1192         }
1193
1194         list = lightrec_disassemble(state, code, &length);
1195         if (!list) {
1196                 lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
1197                 return NULL;
1198         }
1199
1200         block->pc = pc;
1201         block->_jit = NULL;
1202         block->function = NULL;
1203         block->opcode_list = list;
1204         block->code = code;
1205         block->next = NULL;
1206         block->flags = 0;
1207         block->code_size = 0;
1208         block->precompile_date = state->current_cycle;
1209 #if ENABLE_THREADED_COMPILER
1210         block->op_list_freed = (atomic_flag)ATOMIC_FLAG_INIT;
1211 #endif
1212         block->nb_ops = length / sizeof(u32);
1213
1214         lightrec_optimize(state, block);
1215
1216         length = block->nb_ops * sizeof(u32);
1217
1218         lightrec_register(MEM_FOR_MIPS_CODE, length);
1219
1220         if (ENABLE_DISASSEMBLER) {
1221                 pr_debug("Disassembled block at PC: 0x%08x\n", block->pc);
1222                 lightrec_print_disassembly(block, code);
1223         }
1224
1225         pr_debug("Block size: %hu opcodes\n", block->nb_ops);
1226
1227         /* If the first opcode is an 'impossible' branch, never compile the
1228          * block */
1229         if (should_emulate(block->opcode_list))
1230                 block->flags |= BLOCK_NEVER_COMPILE;
1231
1232         fully_tagged = lightrec_block_is_fully_tagged(block);
1233         if (fully_tagged)
1234                 block->flags |= BLOCK_FULLY_TAGGED;
1235
1236         if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET))
1237                 lut_write(state, lut_offset(pc), state->memset_func);
1238
1239         block->hash = lightrec_calculate_block_hash(block);
1240
1241         pr_debug("Recompile count: %u\n", state->nb_precompile++);
1242
1243         return block;
1244 }
1245
1246 static bool lightrec_block_is_fully_tagged(const struct block *block)
1247 {
1248         const struct opcode *op;
1249         unsigned int i;
1250
1251         for (i = 0; i < block->nb_ops; i++) {
1252                 op = &block->opcode_list[i];
1253
1254                 /* Verify that all load/stores of the opcode list
1255                  * Check all loads/stores of the opcode list and mark the
1256                  * block as fully compiled if they all have been tagged. */
1257                 switch (op->c.i.op) {
1258                 case OP_LB:
1259                 case OP_LH:
1260                 case OP_LWL:
1261                 case OP_LW:
1262                 case OP_LBU:
1263                 case OP_LHU:
1264                 case OP_LWR:
1265                 case OP_SB:
1266                 case OP_SH:
1267                 case OP_SWL:
1268                 case OP_SW:
1269                 case OP_SWR:
1270                 case OP_LWC2:
1271                 case OP_SWC2:
1272                         if (!LIGHTREC_FLAGS_GET_IO_MODE(op->flags))
1273                                 return false;
1274                         fallthrough;
1275                 default:
1276                         continue;
1277                 }
1278         }
1279
1280         return true;
1281 }
1282
1283 static void lightrec_reap_block(struct lightrec_state *state, void *data)
1284 {
1285         struct block *block = data;
1286
1287         pr_debug("Reap dead block at PC 0x%08x\n", block->pc);
1288         lightrec_unregister_block(state->block_cache, block);
1289         lightrec_free_block(state, block);
1290 }
1291
1292 static void lightrec_reap_jit(struct lightrec_state *state, void *data)
1293 {
1294         _jit_destroy_state(data);
1295 }
1296
1297 static void lightrec_free_function(struct lightrec_state *state, void *fn)
1298 {
1299         if (ENABLE_CODE_BUFFER && state->tlsf) {
1300                 pr_debug("Freeing code block at 0x%" PRIxPTR "\n", (uintptr_t) fn);
1301                 lightrec_free_code(state, fn);
1302         }
1303 }
1304
1305 static void lightrec_reap_function(struct lightrec_state *state, void *data)
1306 {
1307         lightrec_free_function(state, data);
1308 }
1309
1310 int lightrec_compile_block(struct lightrec_cstate *cstate,
1311                            struct block *block)
1312 {
1313         struct lightrec_state *state = cstate->state;
1314         struct lightrec_branch_target *target;
1315         bool op_list_freed = false, fully_tagged = false;
1316         struct block *block2;
1317         struct opcode *elm;
1318         jit_state_t *_jit, *oldjit;
1319         jit_node_t *start_of_block;
1320         bool skip_next = false;
1321         void *old_fn, *new_fn;
1322         unsigned int i, j;
1323         u32 offset;
1324
1325         fully_tagged = lightrec_block_is_fully_tagged(block);
1326         if (fully_tagged)
1327                 block->flags |= BLOCK_FULLY_TAGGED;
1328
1329         _jit = jit_new_state();
1330         if (!_jit)
1331                 return -ENOMEM;
1332
1333         oldjit = block->_jit;
1334         old_fn = block->function;
1335         block->_jit = _jit;
1336
1337         lightrec_regcache_reset(cstate->reg_cache);
1338         cstate->cycles = 0;
1339         cstate->nb_branches = 0;
1340         cstate->nb_local_branches = 0;
1341         cstate->nb_targets = 0;
1342
1343         jit_prolog();
1344         jit_tramp(256);
1345
1346         start_of_block = jit_label();
1347
1348         for (i = 0; i < block->nb_ops; i++) {
1349                 elm = &block->opcode_list[i];
1350
1351                 if (skip_next) {
1352                         skip_next = false;
1353                         continue;
1354                 }
1355
1356                 cstate->cycles += lightrec_cycles_of_opcode(elm->c);
1357
1358                 if (should_emulate(elm)) {
1359                         pr_debug("Branch at offset 0x%x will be emulated\n",
1360                                  i << 2);
1361
1362                         lightrec_emit_eob(cstate, block, i, false);
1363                         skip_next = !(elm->flags & LIGHTREC_NO_DS);
1364                 } else {
1365                         lightrec_rec_opcode(cstate, block, i);
1366                         skip_next = has_delay_slot(elm->c) &&
1367                                 !(elm->flags & LIGHTREC_NO_DS);
1368 #if _WIN32
1369                         /* FIXME: GNU Lightning on Windows seems to use our
1370                          * mapped registers as temporaries. Until the actual bug
1371                          * is found and fixed, unconditionally mark our
1372                          * registers as live here. */
1373                         lightrec_regcache_mark_live(cstate->reg_cache, _jit);
1374 #endif
1375                 }
1376         }
1377
1378         for (i = 0; i < cstate->nb_branches; i++)
1379                 jit_patch(cstate->branches[i]);
1380
1381         for (i = 0; i < cstate->nb_local_branches; i++) {
1382                 struct lightrec_branch *branch = &cstate->local_branches[i];
1383
1384                 pr_debug("Patch local branch to offset 0x%x\n",
1385                          branch->target << 2);
1386
1387                 if (branch->target == 0) {
1388                         jit_patch_at(branch->branch, start_of_block);
1389                         continue;
1390                 }
1391
1392                 for (j = 0; j < cstate->nb_targets; j++) {
1393                         if (cstate->targets[j].offset == branch->target) {
1394                                 jit_patch_at(branch->branch,
1395                                              cstate->targets[j].label);
1396                                 break;
1397                         }
1398                 }
1399
1400                 if (j == cstate->nb_targets)
1401                         pr_err("Unable to find branch target\n");
1402         }
1403
1404         jit_ldxi(JIT_R0, LIGHTREC_REG_STATE,
1405                  offsetof(struct lightrec_state, eob_wrapper_func));
1406
1407         jit_jmpr(JIT_R0);
1408
1409         jit_ret();
1410         jit_epilog();
1411
1412         new_fn = lightrec_emit_code(state, block, _jit, &block->code_size);
1413         if (!new_fn) {
1414                 if (!ENABLE_THREADED_COMPILER)
1415                         pr_err("Unable to compile block!\n");
1416                 block->_jit = oldjit;
1417                 _jit_destroy_state(_jit);
1418                 return -ENOMEM;
1419         }
1420
1421         block->function = new_fn;
1422         block->flags &= ~BLOCK_SHOULD_RECOMPILE;
1423
1424         /* Add compiled function to the LUT */
1425         lut_write(state, lut_offset(block->pc), block->function);
1426
1427         if (ENABLE_THREADED_COMPILER) {
1428                 /* Since we might try to reap the same block multiple times,
1429                  * we need the reaper to wait until everything has been
1430                  * submitted, so that the duplicate entries can be dropped. */
1431                 lightrec_reaper_pause(state->reaper);
1432         }
1433
1434         /* Detect old blocks that have been covered by the new one */
1435         for (i = 0; i < cstate->nb_targets; i++) {
1436                 target = &cstate->targets[i];
1437
1438                 if (!target->offset)
1439                         continue;
1440
1441                 offset = block->pc + target->offset * sizeof(u32);
1442                 block2 = lightrec_find_block(state->block_cache, offset);
1443                 if (block2) {
1444                         /* No need to check if block2 is compilable - it must
1445                          * be, otherwise block wouldn't be compilable either */
1446
1447                         /* Set the "block dead" flag to prevent the dynarec from
1448                          * recompiling this block */
1449                         block2->flags |= BLOCK_IS_DEAD;
1450
1451                         /* If block2 was pending for compilation, cancel it.
1452                          * If it's being compiled right now, wait until it
1453                          * finishes. */
1454                         if (ENABLE_THREADED_COMPILER)
1455                                 lightrec_recompiler_remove(state->rec, block2);
1456                 }
1457
1458                 /* We know from now on that block2 (if present) isn't going to
1459                  * be compiled. We can override the LUT entry with our new
1460                  * block's entry point. */
1461                 offset = lut_offset(block->pc) + target->offset;
1462                 lut_write(state, offset, jit_address(target->label));
1463
1464                 if (block2) {
1465                         pr_debug("Reap block 0x%08x as it's covered by block "
1466                                  "0x%08x\n", block2->pc, block->pc);
1467
1468                         /* Finally, reap the block. */
1469                         if (ENABLE_THREADED_COMPILER) {
1470                                 lightrec_reaper_add(state->reaper,
1471                                                     lightrec_reap_block,
1472                                                     block2);
1473                         } else {
1474                                 lightrec_unregister_block(state->block_cache, block2);
1475                                 lightrec_free_block(state, block2);
1476                         }
1477                 }
1478         }
1479
1480         if (ENABLE_THREADED_COMPILER)
1481                 lightrec_reaper_continue(state->reaper);
1482
1483         if (ENABLE_DISASSEMBLER) {
1484                 pr_debug("Compiling block at PC: 0x%08x\n", block->pc);
1485                 jit_disassemble();
1486         }
1487
1488         jit_clear_state();
1489
1490 #if ENABLE_THREADED_COMPILER
1491         if (fully_tagged)
1492                 op_list_freed = atomic_flag_test_and_set(&block->op_list_freed);
1493 #endif
1494         if (fully_tagged && !op_list_freed) {
1495                 pr_debug("Block PC 0x%08x is fully tagged"
1496                          " - free opcode list\n", block->pc);
1497                 lightrec_free_opcode_list(state, block);
1498                 block->opcode_list = NULL;
1499         }
1500
1501         if (oldjit) {
1502                 pr_debug("Block 0x%08x recompiled, reaping old jit context.\n",
1503                          block->pc);
1504
1505                 if (ENABLE_THREADED_COMPILER) {
1506                         lightrec_reaper_add(state->reaper,
1507                                             lightrec_reap_jit, oldjit);
1508                         lightrec_reaper_add(state->reaper,
1509                                             lightrec_reap_function, old_fn);
1510                 } else {
1511                         _jit_destroy_state(oldjit);
1512                         lightrec_free_function(state, old_fn);
1513                 }
1514         }
1515
1516         return 0;
1517 }
1518
1519 static void lightrec_print_info(struct lightrec_state *state)
1520 {
1521         if ((state->current_cycle & ~0xfffffff) != state->old_cycle_counter) {
1522                 pr_info("Lightrec RAM usage: IR %u KiB, CODE %u KiB, "
1523                         "MIPS %u KiB, TOTAL %u KiB, avg. IPI %f\n",
1524                         lightrec_get_mem_usage(MEM_FOR_IR) / 1024,
1525                         lightrec_get_mem_usage(MEM_FOR_CODE) / 1024,
1526                         lightrec_get_mem_usage(MEM_FOR_MIPS_CODE) / 1024,
1527                         lightrec_get_total_mem_usage() / 1024,
1528                        lightrec_get_average_ipi());
1529                 state->old_cycle_counter = state->current_cycle & ~0xfffffff;
1530         }
1531 }
1532
1533 u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle)
1534 {
1535         s32 (*func)(void *, s32) = (void *)state->dispatcher->function;
1536         void *block_trace;
1537         s32 cycles_delta;
1538
1539         state->exit_flags = LIGHTREC_EXIT_NORMAL;
1540
1541         /* Handle the cycle counter overflowing */
1542         if (unlikely(target_cycle < state->current_cycle))
1543                 target_cycle = UINT_MAX;
1544
1545         state->target_cycle = target_cycle;
1546         state->next_pc = pc;
1547
1548         block_trace = get_next_block_func(state, pc);
1549         if (block_trace) {
1550                 cycles_delta = state->target_cycle - state->current_cycle;
1551
1552                 cycles_delta = (*func)(block_trace, cycles_delta);
1553
1554                 state->current_cycle = state->target_cycle - cycles_delta;
1555         }
1556
1557         if (ENABLE_THREADED_COMPILER)
1558                 lightrec_reaper_reap(state->reaper);
1559
1560         if (LOG_LEVEL >= INFO_L)
1561                 lightrec_print_info(state);
1562
1563         return state->next_pc;
1564 }
1565
1566 u32 lightrec_execute_one(struct lightrec_state *state, u32 pc)
1567 {
1568         return lightrec_execute(state, pc, state->current_cycle);
1569 }
1570
1571 u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc)
1572 {
1573         struct block *block = lightrec_get_block(state, pc);
1574         if (!block)
1575                 return 0;
1576
1577         state->exit_flags = LIGHTREC_EXIT_NORMAL;
1578
1579         pc = lightrec_emulate_block(state, block, pc);
1580
1581         if (LOG_LEVEL >= INFO_L)
1582                 lightrec_print_info(state);
1583
1584         return pc;
1585 }
1586
1587 void lightrec_free_block(struct lightrec_state *state, struct block *block)
1588 {
1589         lightrec_unregister(MEM_FOR_MIPS_CODE, block->nb_ops * sizeof(u32));
1590         if (block->opcode_list)
1591                 lightrec_free_opcode_list(state, block);
1592         if (block->_jit)
1593                 _jit_destroy_state(block->_jit);
1594         if (block->function) {
1595                 lightrec_free_function(state, block->function);
1596                 lightrec_unregister(MEM_FOR_CODE, block->code_size);
1597         }
1598         lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
1599 }
1600
1601 struct lightrec_cstate * lightrec_create_cstate(struct lightrec_state *state)
1602 {
1603         struct lightrec_cstate *cstate;
1604
1605         cstate = lightrec_malloc(state, MEM_FOR_LIGHTREC, sizeof(*cstate));
1606         if (!cstate)
1607                 return NULL;
1608
1609         cstate->reg_cache = lightrec_regcache_init(state);
1610         if (!cstate->reg_cache) {
1611                 lightrec_free(state, MEM_FOR_LIGHTREC, sizeof(*cstate), cstate);
1612                 return NULL;
1613         }
1614
1615         cstate->state = state;
1616
1617         return cstate;
1618 }
1619
1620 void lightrec_free_cstate(struct lightrec_cstate *cstate)
1621 {
1622         lightrec_free_regcache(cstate->reg_cache);
1623         lightrec_free(cstate->state, MEM_FOR_LIGHTREC, sizeof(*cstate), cstate);
1624 }
1625
1626 struct lightrec_state * lightrec_init(char *argv0,
1627                                       const struct lightrec_mem_map *map,
1628                                       size_t nb,
1629                                       const struct lightrec_ops *ops)
1630 {
1631         const struct lightrec_mem_map *codebuf_map = &map[PSX_MAP_CODE_BUFFER];
1632         struct lightrec_state *state;
1633         uintptr_t addr;
1634         void *tlsf = NULL;
1635         bool with_32bit_lut = false;
1636         size_t lut_size;
1637
1638         /* Sanity-check ops */
1639         if (!ops || !ops->cop2_op || !ops->enable_ram) {
1640                 pr_err("Missing callbacks in lightrec_ops structure\n");
1641                 return NULL;
1642         }
1643
1644         if (ENABLE_CODE_BUFFER && nb > PSX_MAP_CODE_BUFFER
1645             && codebuf_map->address) {
1646                 tlsf = tlsf_create_with_pool(codebuf_map->address,
1647                                              codebuf_map->length);
1648                 if (!tlsf) {
1649                         pr_err("Unable to initialize code buffer\n");
1650                         return NULL;
1651                 }
1652
1653                 if (__WORDSIZE == 64) {
1654                         addr = (uintptr_t) codebuf_map->address + codebuf_map->length - 1;
1655                         with_32bit_lut = addr == (u32) addr;
1656                 }
1657         }
1658
1659         if (with_32bit_lut)
1660                 lut_size = CODE_LUT_SIZE * 4;
1661         else
1662                 lut_size = CODE_LUT_SIZE * sizeof(void *);
1663
1664         init_jit(argv0);
1665
1666         state = calloc(1, sizeof(*state) + lut_size);
1667         if (!state)
1668                 goto err_finish_jit;
1669
1670         lightrec_register(MEM_FOR_LIGHTREC, sizeof(*state) + lut_size);
1671
1672         state->tlsf = tlsf;
1673         state->with_32bit_lut = with_32bit_lut;
1674
1675         state->block_cache = lightrec_blockcache_init(state);
1676         if (!state->block_cache)
1677                 goto err_free_state;
1678
1679         if (ENABLE_THREADED_COMPILER) {
1680                 state->rec = lightrec_recompiler_init(state);
1681                 if (!state->rec)
1682                         goto err_free_block_cache;
1683
1684                 state->reaper = lightrec_reaper_init(state);
1685                 if (!state->reaper)
1686                         goto err_free_recompiler;
1687         } else {
1688                 state->cstate = lightrec_create_cstate(state);
1689                 if (!state->cstate)
1690                         goto err_free_block_cache;
1691         }
1692
1693         state->nb_maps = nb;
1694         state->maps = map;
1695
1696         memcpy(&state->ops, ops, sizeof(*ops));
1697
1698         state->dispatcher = generate_dispatcher(state);
1699         if (!state->dispatcher)
1700                 goto err_free_reaper;
1701
1702         state->c_wrapper_block = generate_wrapper(state);
1703         if (!state->c_wrapper_block)
1704                 goto err_free_dispatcher;
1705
1706         state->c_wrappers[C_WRAPPER_RW] = lightrec_rw_cb;
1707         state->c_wrappers[C_WRAPPER_RW_GENERIC] = lightrec_rw_generic_cb;
1708         state->c_wrappers[C_WRAPPER_MTC] = lightrec_mtc_cb;
1709         state->c_wrappers[C_WRAPPER_CP] = lightrec_cp_cb;
1710         state->c_wrappers[C_WRAPPER_SYSCALL] = lightrec_syscall_cb;
1711         state->c_wrappers[C_WRAPPER_BREAK] = lightrec_break_cb;
1712
1713         map = &state->maps[PSX_MAP_BIOS];
1714         state->offset_bios = (uintptr_t)map->address - map->pc;
1715
1716         map = &state->maps[PSX_MAP_SCRATCH_PAD];
1717         state->offset_scratch = (uintptr_t)map->address - map->pc;
1718
1719         map = &state->maps[PSX_MAP_KERNEL_USER_RAM];
1720         state->offset_ram = (uintptr_t)map->address - map->pc;
1721
1722         if (state->maps[PSX_MAP_MIRROR1].address == map->address + 0x200000 &&
1723             state->maps[PSX_MAP_MIRROR2].address == map->address + 0x400000 &&
1724             state->maps[PSX_MAP_MIRROR3].address == map->address + 0x600000)
1725                 state->mirrors_mapped = true;
1726
1727         if (state->offset_bios == 0 &&
1728             state->offset_scratch == 0 &&
1729             state->offset_ram == 0 &&
1730             state->mirrors_mapped) {
1731                 pr_info("Memory map is perfect. Emitted code will be best.\n");
1732         } else {
1733                 pr_info("Memory map is sub-par. Emitted code will be slow.\n");
1734         }
1735
1736         if (state->with_32bit_lut)
1737                 pr_info("Using 32-bit LUT\n");
1738
1739         return state;
1740
1741 err_free_dispatcher:
1742         lightrec_free_block(state, state->dispatcher);
1743 err_free_reaper:
1744         if (ENABLE_THREADED_COMPILER)
1745                 lightrec_reaper_destroy(state->reaper);
1746 err_free_recompiler:
1747         if (ENABLE_THREADED_COMPILER)
1748                 lightrec_free_recompiler(state->rec);
1749         else
1750                 lightrec_free_cstate(state->cstate);
1751 err_free_block_cache:
1752         lightrec_free_block_cache(state->block_cache);
1753 err_free_state:
1754         lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) +
1755                             lut_elm_size(state) * CODE_LUT_SIZE);
1756         free(state);
1757 err_finish_jit:
1758         finish_jit();
1759         if (ENABLE_CODE_BUFFER && tlsf)
1760                 tlsf_destroy(tlsf);
1761         return NULL;
1762 }
1763
1764 void lightrec_destroy(struct lightrec_state *state)
1765 {
1766         /* Force a print info on destroy*/
1767         state->current_cycle = ~state->current_cycle;
1768         lightrec_print_info(state);
1769
1770         if (ENABLE_THREADED_COMPILER) {
1771                 lightrec_free_recompiler(state->rec);
1772                 lightrec_reaper_destroy(state->reaper);
1773         } else {
1774                 lightrec_free_cstate(state->cstate);
1775         }
1776
1777         lightrec_free_block_cache(state->block_cache);
1778         lightrec_free_block(state, state->dispatcher);
1779         lightrec_free_block(state, state->c_wrapper_block);
1780         finish_jit();
1781         if (ENABLE_CODE_BUFFER && state->tlsf)
1782                 tlsf_destroy(state->tlsf);
1783
1784         lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) +
1785                             lut_elm_size(state) * CODE_LUT_SIZE);
1786         free(state);
1787 }
1788
1789 void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len)
1790 {
1791         u32 kaddr = kunseg(addr & ~0x3);
1792         enum psx_map idx = lightrec_get_map_idx(state, kaddr);
1793
1794         switch (idx) {
1795         case PSX_MAP_MIRROR1:
1796         case PSX_MAP_MIRROR2:
1797         case PSX_MAP_MIRROR3:
1798                 /* Handle mirrors */
1799                 kaddr &= RAM_SIZE - 1;
1800                 fallthrough;
1801         case PSX_MAP_KERNEL_USER_RAM:
1802                 break;
1803         default:
1804                 return;
1805         }
1806
1807         memset(lut_address(state, lut_offset(kaddr)), 0,
1808                ((len + 3) / 4) * lut_elm_size(state));
1809 }
1810
1811 void lightrec_invalidate_all(struct lightrec_state *state)
1812 {
1813         memset(state->code_lut, 0, lut_elm_size(state) * CODE_LUT_SIZE);
1814 }
1815
1816 void lightrec_set_invalidate_mode(struct lightrec_state *state, bool dma_only)
1817 {
1818         if (state->invalidate_from_dma_only != dma_only)
1819                 lightrec_invalidate_all(state);
1820
1821         state->invalidate_from_dma_only = dma_only;
1822 }
1823
1824 void lightrec_set_exit_flags(struct lightrec_state *state, u32 flags)
1825 {
1826         if (flags != LIGHTREC_EXIT_NORMAL) {
1827                 state->exit_flags |= flags;
1828                 state->target_cycle = state->current_cycle;
1829         }
1830 }
1831
1832 u32 lightrec_exit_flags(struct lightrec_state *state)
1833 {
1834         return state->exit_flags;
1835 }
1836
1837 u32 lightrec_current_cycle_count(const struct lightrec_state *state)
1838 {
1839         return state->current_cycle;
1840 }
1841
1842 void lightrec_reset_cycle_count(struct lightrec_state *state, u32 cycles)
1843 {
1844         state->current_cycle = cycles;
1845
1846         if (state->target_cycle < cycles)
1847                 state->target_cycle = cycles;
1848 }
1849
1850 void lightrec_set_target_cycle_count(struct lightrec_state *state, u32 cycles)
1851 {
1852         if (state->exit_flags == LIGHTREC_EXIT_NORMAL) {
1853                 if (cycles < state->current_cycle)
1854                         cycles = state->current_cycle;
1855
1856                 state->target_cycle = cycles;
1857         }
1858 }
1859
1860 struct lightrec_registers * lightrec_get_registers(struct lightrec_state *state)
1861 {
1862         return &state->regs;
1863 }