git subrepo clone https://github.com/pcercuei/lightrec.git deps/lightrec
[pcsx_rearmed.git] / deps / lightrec / lightrec.c
1 /*
2  * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  */
14
15 #include "blockcache.h"
16 #include "config.h"
17 #include "debug.h"
18 #include "disassembler.h"
19 #include "emitter.h"
20 #include "interpreter.h"
21 #include "lightrec.h"
22 #include "memmanager.h"
23 #include "recompiler.h"
24 #include "regcache.h"
25 #include "optimizer.h"
26
27 #include <errno.h>
28 #include <lightning.h>
29 #include <limits.h>
30 #if ENABLE_THREADED_COMPILER
31 #include <stdatomic.h>
32 #endif
33 #include <stdbool.h>
34 #include <stddef.h>
35 #include <string.h>
36 #if ENABLE_TINYMM
37 #include <tinymm.h>
38 #endif
39
40 #define GENMASK(h, l) \
41         (((uintptr_t)-1 << (l)) & ((uintptr_t)-1 >> (__WORDSIZE - 1 - (h))))
42
43 static struct block * lightrec_precompile_block(struct lightrec_state *state,
44                                                 u32 pc);
45
46 static void __segfault_cb(struct lightrec_state *state, u32 addr)
47 {
48         lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT);
49         pr_err("Segmentation fault in recompiled code: invalid "
50                "load/store at address 0x%08x\n", addr);
51 }
52
53 static u32 lightrec_rw_ops(struct lightrec_state *state, union code op,
54                 const struct lightrec_mem_map_ops *ops, u32 addr, u32 data)
55 {
56         switch (op.i.op) {
57         case OP_SB:
58                 ops->sb(state, addr, (u8) data);
59                 return 0;
60         case OP_SH:
61                 ops->sh(state, addr, (u16) data);
62                 return 0;
63         case OP_SWL:
64         case OP_SWR:
65         case OP_SW:
66                 ops->sw(state, addr, data);
67                 return 0;
68         case OP_LB:
69                 return (s32) (s8) ops->lb(state, addr);
70         case OP_LBU:
71                 return ops->lb(state, addr);
72         case OP_LH:
73                 return (s32) (s16) ops->lh(state, addr);
74         case OP_LHU:
75                 return ops->lh(state, addr);
76         case OP_LW:
77         default:
78                 return ops->lw(state, addr);
79         }
80 }
81
82 static void lightrec_invalidate_map(struct lightrec_state *state,
83                 const struct lightrec_mem_map *map, u32 addr)
84 {
85         if (map == &state->maps[PSX_MAP_KERNEL_USER_RAM])
86                 state->code_lut[lut_offset(addr)] = NULL;
87 }
88
89 static const struct lightrec_mem_map *
90 lightrec_get_map(struct lightrec_state *state, u32 kaddr)
91 {
92         unsigned int i;
93
94         for (i = 0; i < state->nb_maps; i++) {
95                 const struct lightrec_mem_map *map = &state->maps[i];
96
97                 if (kaddr >= map->pc && kaddr < map->pc + map->length)
98                         return map;
99         }
100
101         return NULL;
102 }
103
104 u32 lightrec_rw(struct lightrec_state *state, union code op,
105                 u32 addr, u32 data, u16 *flags)
106 {
107         const struct lightrec_mem_map *map;
108         u32 shift, mem_data, mask, pc;
109         uintptr_t new_addr;
110         u32 kaddr;
111
112         addr += (s16) op.i.imm;
113         kaddr = kunseg(addr);
114
115         map = lightrec_get_map(state, kaddr);
116         if (!map) {
117                 __segfault_cb(state, addr);
118                 return 0;
119         }
120
121         pc = map->pc;
122
123         if (unlikely(map->ops)) {
124                 if (flags)
125                         *flags |= LIGHTREC_HW_IO;
126
127                 return lightrec_rw_ops(state, op, map->ops, addr, data);
128         }
129
130         while (map->mirror_of)
131                 map = map->mirror_of;
132
133         if (flags)
134                 *flags |= LIGHTREC_DIRECT_IO;
135
136         kaddr -= pc;
137         new_addr = (uintptr_t) map->address + kaddr;
138
139         switch (op.i.op) {
140         case OP_SB:
141                 *(u8 *) new_addr = (u8) data;
142                 if (!state->invalidate_from_dma_only)
143                         lightrec_invalidate_map(state, map, kaddr);
144                 return 0;
145         case OP_SH:
146                 *(u16 *) new_addr = HTOLE16((u16) data);
147                 if (!state->invalidate_from_dma_only)
148                         lightrec_invalidate_map(state, map, kaddr);
149                 return 0;
150         case OP_SWL:
151                 shift = kaddr & 3;
152                 mem_data = LE32TOH(*(u32 *)(new_addr & ~3));
153                 mask = GENMASK(31, (shift + 1) * 8);
154
155                 *(u32 *)(new_addr & ~3) = HTOLE32((data >> ((3 - shift) * 8))
156                                                   | (mem_data & mask));
157                 if (!state->invalidate_from_dma_only)
158                         lightrec_invalidate_map(state, map, kaddr & ~0x3);
159                 return 0;
160         case OP_SWR:
161                 shift = kaddr & 3;
162                 mem_data = LE32TOH(*(u32 *)(new_addr & ~3));
163                 mask = (1 << (shift * 8)) - 1;
164
165                 *(u32 *)(new_addr & ~3) = HTOLE32((data << (shift * 8))
166                                                   | (mem_data & mask));
167                 if (!state->invalidate_from_dma_only)
168                         lightrec_invalidate_map(state, map, kaddr & ~0x3);
169                 return 0;
170         case OP_SW:
171                 *(u32 *) new_addr = HTOLE32(data);
172                 if (!state->invalidate_from_dma_only)
173                         lightrec_invalidate_map(state, map, kaddr);
174                 return 0;
175         case OP_SWC2:
176                 *(u32 *) new_addr = HTOLE32(state->ops.cop2_ops.mfc(state,
177                                                                     op.i.rt));
178                 if (!state->invalidate_from_dma_only)
179                         lightrec_invalidate_map(state, map, kaddr);
180                 return 0;
181         case OP_LB:
182                 return (s32) *(s8 *) new_addr;
183         case OP_LBU:
184                 return *(u8 *) new_addr;
185         case OP_LH:
186                 return (s32)(s16) LE16TOH(*(u16 *) new_addr);
187         case OP_LHU:
188                 return LE16TOH(*(u16 *) new_addr);
189         case OP_LWL:
190                 shift = kaddr & 3;
191                 mem_data = LE32TOH(*(u32 *)(new_addr & ~3));
192                 mask = (1 << (24 - shift * 8)) - 1;
193
194                 return (data & mask) | (mem_data << (24 - shift * 8));
195         case OP_LWR:
196                 shift = kaddr & 3;
197                 mem_data = LE32TOH(*(u32 *)(new_addr & ~3));
198                 mask = GENMASK(31, 32 - shift * 8);
199
200                 return (data & mask) | (mem_data >> (shift * 8));
201         case OP_LWC2:
202                 state->ops.cop2_ops.mtc(state, op.i.rt,
203                                         LE32TOH(*(u32 *) new_addr));
204                 return 0;
205         case OP_LW:
206         default:
207                 return LE32TOH(*(u32 *) new_addr);
208         }
209 }
210
211 static void lightrec_rw_helper(struct lightrec_state *state,
212                                union code op, u16 *flags)
213 {
214         u32 ret = lightrec_rw(state, op,
215                           state->native_reg_cache[op.i.rs],
216                           state->native_reg_cache[op.i.rt], flags);
217
218         switch (op.i.op) {
219         case OP_LB:
220         case OP_LBU:
221         case OP_LH:
222         case OP_LHU:
223         case OP_LWL:
224         case OP_LWR:
225         case OP_LW:
226                 if (op.i.rt)
227                         state->native_reg_cache[op.i.rt] = ret;
228         default: /* fall-through */
229                 break;
230         }
231 }
232
233 static void lightrec_rw_cb(struct lightrec_state *state, union code op)
234 {
235         lightrec_rw_helper(state, op, NULL);
236 }
237
238 static void lightrec_rw_generic_cb(struct lightrec_state *state,
239                                    struct opcode *op, struct block *block)
240 {
241         bool was_tagged = op->flags & (LIGHTREC_HW_IO | LIGHTREC_DIRECT_IO);
242
243         lightrec_rw_helper(state, op->c, &op->flags);
244
245         if (!was_tagged) {
246                 pr_debug("Opcode of block at PC 0x%08x offset 0x%x has been "
247                          "tagged - flag for recompilation\n",
248                          block->pc, op->offset << 2);
249
250                 lightrec_mark_for_recompilation(state->block_cache, block);
251         }
252 }
253
254 u32 lightrec_mfc(struct lightrec_state *state, union code op)
255 {
256         bool is_cfc = (op.i.op == OP_CP0 && op.r.rs == OP_CP0_CFC0) ||
257                       (op.i.op == OP_CP2 && op.r.rs == OP_CP2_BASIC_CFC2);
258         u32 (*func)(struct lightrec_state *, u8);
259         const struct lightrec_cop_ops *ops;
260
261         if (op.i.op == OP_CP0)
262                 ops = &state->ops.cop0_ops;
263         else
264                 ops = &state->ops.cop2_ops;
265
266         if (is_cfc)
267                 func = ops->cfc;
268         else
269                 func = ops->mfc;
270
271         return (*func)(state, op.r.rd);
272 }
273
274 static void lightrec_mfc_cb(struct lightrec_state *state, union code op)
275 {
276         u32 rt = lightrec_mfc(state, op);
277
278         if (op.r.rt)
279                 state->native_reg_cache[op.r.rt] = rt;
280 }
281
282 void lightrec_mtc(struct lightrec_state *state, union code op, u32 data)
283 {
284         bool is_ctc = (op.i.op == OP_CP0 && op.r.rs == OP_CP0_CTC0) ||
285                       (op.i.op == OP_CP2 && op.r.rs == OP_CP2_BASIC_CTC2);
286         void (*func)(struct lightrec_state *, u8, u32);
287         const struct lightrec_cop_ops *ops;
288
289         if (op.i.op == OP_CP0)
290                 ops = &state->ops.cop0_ops;
291         else
292                 ops = &state->ops.cop2_ops;
293
294         if (is_ctc)
295                 func = ops->ctc;
296         else
297                 func = ops->mtc;
298
299         (*func)(state, op.r.rd, data);
300 }
301
302 static void lightrec_mtc_cb(struct lightrec_state *state, union code op)
303 {
304         lightrec_mtc(state, op, state->native_reg_cache[op.r.rt]);
305 }
306
307 static void lightrec_rfe_cb(struct lightrec_state *state, union code op)
308 {
309         u32 status;
310
311         /* Read CP0 Status register (r12) */
312         status = state->ops.cop0_ops.mfc(state, 12);
313
314         /* Switch the bits */
315         status = ((status & 0x3c) >> 2) | (status & ~0xf);
316
317         /* Write it back */
318         state->ops.cop0_ops.ctc(state, 12, status);
319 }
320
321 static void lightrec_cp_cb(struct lightrec_state *state, union code op)
322 {
323         void (*func)(struct lightrec_state *, u32);
324
325         if ((op.opcode >> 25) & 1)
326                 func = state->ops.cop2_ops.op;
327         else
328                 func = state->ops.cop0_ops.op;
329
330         (*func)(state, op.opcode);
331 }
332
333 static void lightrec_syscall_cb(struct lightrec_state *state, union code op)
334 {
335         lightrec_set_exit_flags(state, LIGHTREC_EXIT_SYSCALL);
336 }
337
338 static void lightrec_break_cb(struct lightrec_state *state, union code op)
339 {
340         lightrec_set_exit_flags(state, LIGHTREC_EXIT_BREAK);
341 }
342
343 struct block * lightrec_get_block(struct lightrec_state *state, u32 pc)
344 {
345         struct block *block = lightrec_find_block(state->block_cache, pc);
346
347         if (block && lightrec_block_is_outdated(block)) {
348                 pr_debug("Block at PC 0x%08x is outdated!\n", block->pc);
349
350                 /* Make sure the recompiler isn't processing the block we'll
351                  * destroy */
352                 if (ENABLE_THREADED_COMPILER)
353                         lightrec_recompiler_remove(state->rec, block);
354
355                 lightrec_unregister_block(state->block_cache, block);
356                 lightrec_free_block(block);
357                 block = NULL;
358         }
359
360         if (!block) {
361                 block = lightrec_precompile_block(state, pc);
362                 if (!block) {
363                         pr_err("Unable to recompile block at PC 0x%x\n", pc);
364                         lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT);
365                         return NULL;
366                 }
367
368                 lightrec_register_block(state->block_cache, block);
369         }
370
371         return block;
372 }
373
374 static void * get_next_block_func(struct lightrec_state *state, u32 pc)
375 {
376         struct block *block;
377         bool should_recompile;
378         void *func;
379
380         for (;;) {
381                 func = state->code_lut[lut_offset(pc)];
382                 if (func && func != state->get_next_block)
383                         return func;
384
385                 block = lightrec_get_block(state, pc);
386
387                 if (unlikely(!block))
388                         return NULL;
389
390                 should_recompile = block->flags & BLOCK_SHOULD_RECOMPILE;
391
392                 if (unlikely(should_recompile)) {
393                         pr_debug("Block at PC 0x%08x should recompile"
394                                  " - freeing old code\n", pc);
395
396                         if (ENABLE_THREADED_COMPILER)
397                                 lightrec_recompiler_remove(state->rec, block);
398
399                         remove_from_code_lut(state->block_cache, block);
400                         lightrec_unregister(MEM_FOR_CODE, block->code_size);
401                         if (block->_jit)
402                                 _jit_destroy_state(block->_jit);
403                         block->_jit = NULL;
404                         block->function = NULL;
405                         block->flags &= ~BLOCK_SHOULD_RECOMPILE;
406                 }
407
408                 if (ENABLE_THREADED_COMPILER && likely(!should_recompile))
409                         func = lightrec_recompiler_run_first_pass(block, &pc);
410                 else
411                         func = block->function;
412
413                 if (likely(func))
414                         return func;
415
416                 /* Block wasn't compiled yet - run the interpreter */
417                 if (!ENABLE_THREADED_COMPILER &&
418                     ((ENABLE_FIRST_PASS && likely(!should_recompile)) ||
419                      unlikely(block->flags & BLOCK_NEVER_COMPILE)))
420                         pc = lightrec_emulate_block(block, pc);
421
422                 if (likely(!(block->flags & BLOCK_NEVER_COMPILE))) {
423                         /* Then compile it using the profiled data */
424                         if (ENABLE_THREADED_COMPILER)
425                                 lightrec_recompiler_add(state->rec, block);
426                         else
427                                 lightrec_compile_block(block);
428                 }
429
430                 if (state->exit_flags != LIGHTREC_EXIT_NORMAL ||
431                     state->current_cycle >= state->target_cycle) {
432                         state->next_pc = pc;
433                         return NULL;
434                 }
435         }
436 }
437
438 static s32 c_generic_function_wrapper(struct lightrec_state *state,
439                                       s32 cycles_delta,
440                                       void (*f)(struct lightrec_state *,
441                                                 struct opcode *,
442                                                 struct block *),
443                                       struct opcode *op, struct block *block)
444 {
445         state->current_cycle = state->target_cycle - cycles_delta;
446
447         (*f)(state, op, block);
448
449         return state->target_cycle - state->current_cycle;
450 }
451
452 static s32 c_function_wrapper(struct lightrec_state *state, s32 cycles_delta,
453                               void (*f)(struct lightrec_state *, union code),
454                               union code op)
455 {
456         state->current_cycle = state->target_cycle - cycles_delta;
457
458         (*f)(state, op);
459
460         return state->target_cycle - state->current_cycle;
461 }
462
463 static struct block * generate_wrapper(struct lightrec_state *state,
464                                        void *f, bool generic)
465 {
466         struct block *block;
467         jit_state_t *_jit;
468         unsigned int i;
469         int stack_ptr;
470         jit_word_t code_size;
471         jit_node_t *to_tramp, *to_fn_epilog;
472
473         block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
474         if (!block)
475                 goto err_no_mem;
476
477         _jit = jit_new_state();
478         if (!_jit)
479                 goto err_free_block;
480
481         jit_name("RW wrapper");
482         jit_note(__FILE__, __LINE__);
483
484         /* Wrapper entry point */
485         jit_prolog();
486
487         stack_ptr = jit_allocai(sizeof(uintptr_t) * NUM_TEMPS);
488
489         for (i = 0; i < NUM_TEMPS; i++)
490                 jit_stxi(stack_ptr + i * sizeof(uintptr_t), JIT_FP, JIT_R(i));
491
492         /* Jump to the trampoline */
493         to_tramp = jit_jmpi();
494
495         /* The trampoline will jump back here */
496         to_fn_epilog = jit_label();
497
498         for (i = 0; i < NUM_TEMPS; i++)
499                 jit_ldxi(JIT_R(i), JIT_FP, stack_ptr + i * sizeof(uintptr_t));
500
501         jit_ret();
502         jit_epilog();
503
504         /* Trampoline entry point.
505          * The sole purpose of the trampoline is to cheese Lightning not to
506          * save/restore the callee-saved register LIGHTREC_REG_CYCLE, since we
507          * do want to return to the caller with this register modified. */
508         jit_prolog();
509         jit_tramp(256);
510         jit_patch(to_tramp);
511
512         jit_prepare();
513         jit_pushargr(LIGHTREC_REG_STATE);
514         jit_pushargr(LIGHTREC_REG_CYCLE);
515         jit_pushargi((uintptr_t)f);
516         jit_pushargr(JIT_R0);
517         if (generic) {
518                 jit_pushargr(JIT_R1);
519                 jit_finishi(c_generic_function_wrapper);
520         } else {
521                 jit_finishi(c_function_wrapper);
522         }
523
524 #if __WORDSIZE == 64
525         jit_retval_i(LIGHTREC_REG_CYCLE);
526 #else
527         jit_retval(LIGHTREC_REG_CYCLE);
528 #endif
529
530         jit_patch_at(jit_jmpi(), to_fn_epilog);
531         jit_epilog();
532
533         block->state = state;
534         block->_jit = _jit;
535         block->function = jit_emit();
536         block->opcode_list = NULL;
537         block->flags = 0;
538         block->nb_ops = 0;
539
540         jit_get_code(&code_size);
541         lightrec_register(MEM_FOR_CODE, code_size);
542
543         block->code_size = code_size;
544
545         if (ENABLE_DISASSEMBLER) {
546                 pr_debug("Wrapper block:\n");
547                 jit_disassemble();
548         }
549
550         jit_clear_state();
551         return block;
552
553 err_free_block:
554         lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
555 err_no_mem:
556         pr_err("Unable to compile wrapper: Out of memory\n");
557         return NULL;
558 }
559
560 static struct block * generate_dispatcher(struct lightrec_state *state)
561 {
562         struct block *block;
563         jit_state_t *_jit;
564         jit_node_t *to_end, *to_end2, *to_c, *loop, *addr, *addr2;
565         unsigned int i;
566         u32 offset, ram_len;
567         jit_word_t code_size;
568
569         block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
570         if (!block)
571                 goto err_no_mem;
572
573         _jit = jit_new_state();
574         if (!_jit)
575                 goto err_free_block;
576
577         jit_name("dispatcher");
578         jit_note(__FILE__, __LINE__);
579
580         jit_prolog();
581         jit_frame(256);
582
583         jit_getarg(JIT_R0, jit_arg());
584 #if __WORDSIZE == 64
585         jit_getarg_i(LIGHTREC_REG_CYCLE, jit_arg());
586 #else
587         jit_getarg(LIGHTREC_REG_CYCLE, jit_arg());
588 #endif
589
590         /* Force all callee-saved registers to be pushed on the stack */
591         for (i = 0; i < NUM_REGS; i++)
592                 jit_movr(JIT_V(i), JIT_V(i));
593
594         /* Pass lightrec_state structure to blocks, using the last callee-saved
595          * register that Lightning provides */
596         jit_movi(LIGHTREC_REG_STATE, (intptr_t) state);
597
598         loop = jit_label();
599
600         /* Call the block's code */
601         jit_jmpr(JIT_R0);
602
603         /* The block will jump here, with the number of cycles remaining in
604          * LIGHTREC_REG_CYCLE */
605         addr2 = jit_indirect();
606
607         /* Jump to end if state->target_cycle < state->current_cycle */
608         to_end = jit_blei(LIGHTREC_REG_CYCLE, 0);
609
610         /* Convert next PC to KUNSEG and avoid mirrors */
611         ram_len = state->maps[PSX_MAP_KERNEL_USER_RAM].length;
612         jit_andi(JIT_R0, JIT_V0, 0x10000000 | (ram_len - 1));
613         to_c = jit_bgei(JIT_R0, ram_len);
614
615         /* Fast path: code is running from RAM, use the code LUT */
616 #if __WORDSIZE == 64
617         jit_lshi(JIT_R0, JIT_R0, 1);
618 #endif
619         jit_addr(JIT_R0, JIT_R0, LIGHTREC_REG_STATE);
620         jit_ldxi(JIT_R0, JIT_R0, offsetof(struct lightrec_state, code_lut));
621
622         /* If we get non-NULL, loop */
623         jit_patch_at(jit_bnei(JIT_R0, 0), loop);
624
625         /* Slow path: call C function get_next_block_func() */
626         jit_patch(to_c);
627
628         if (ENABLE_FIRST_PASS) {
629                 /* We may call the interpreter - update state->current_cycle */
630                 jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE,
631                            offsetof(struct lightrec_state, target_cycle));
632                 jit_subr(JIT_R1, JIT_R2, LIGHTREC_REG_CYCLE);
633                 jit_stxi_i(offsetof(struct lightrec_state, current_cycle),
634                            LIGHTREC_REG_STATE, JIT_R1);
635         }
636
637         /* The code LUT will be set to this address when the block at the target
638          * PC has been preprocessed but not yet compiled by the threaded
639          * recompiler */
640         addr = jit_indirect();
641
642         /* Get the next block */
643         jit_prepare();
644         jit_pushargr(LIGHTREC_REG_STATE);
645         jit_pushargr(JIT_V0);
646         jit_finishi(&get_next_block_func);
647         jit_retval(JIT_R0);
648
649         if (ENABLE_FIRST_PASS) {
650                 /* The interpreter may have updated state->current_cycle and
651                  * state->target_cycle - recalc the delta */
652                 jit_ldxi_i(JIT_R1, LIGHTREC_REG_STATE,
653                            offsetof(struct lightrec_state, current_cycle));
654                 jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE,
655                            offsetof(struct lightrec_state, target_cycle));
656                 jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, JIT_R1);
657         }
658
659         /* If we get non-NULL, loop */
660         jit_patch_at(jit_bnei(JIT_R0, 0), loop);
661
662         to_end2 = jit_jmpi();
663
664         /* When exiting, the recompiled code will jump to that address */
665         jit_note(__FILE__, __LINE__);
666         jit_patch(to_end);
667
668         /* Store back the next_pc to the lightrec_state structure */
669         offset = offsetof(struct lightrec_state, next_pc);
670         jit_stxi_i(offset, LIGHTREC_REG_STATE, JIT_V0);
671
672         jit_patch(to_end2);
673
674         jit_retr(LIGHTREC_REG_CYCLE);
675         jit_epilog();
676
677         block->state = state;
678         block->_jit = _jit;
679         block->function = jit_emit();
680         block->opcode_list = NULL;
681         block->flags = 0;
682         block->nb_ops = 0;
683
684         jit_get_code(&code_size);
685         lightrec_register(MEM_FOR_CODE, code_size);
686
687         block->code_size = code_size;
688
689         state->eob_wrapper_func = jit_address(addr2);
690         state->get_next_block = jit_address(addr);
691
692         if (ENABLE_DISASSEMBLER) {
693                 pr_debug("Dispatcher block:\n");
694                 jit_disassemble();
695         }
696
697         /* We're done! */
698         jit_clear_state();
699         return block;
700
701 err_free_block:
702         lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
703 err_no_mem:
704         pr_err("Unable to compile dispatcher: Out of memory\n");
705         return NULL;
706 }
707
708 union code lightrec_read_opcode(struct lightrec_state *state, u32 pc)
709 {
710         u32 addr, kunseg_pc = kunseg(pc);
711         const u32 *code;
712         const struct lightrec_mem_map *map = lightrec_get_map(state, kunseg_pc);
713
714         addr = kunseg_pc - map->pc;
715
716         while (map->mirror_of)
717                 map = map->mirror_of;
718
719         code = map->address + addr;
720
721         return (union code) *code;
722 }
723
724 static struct block * lightrec_precompile_block(struct lightrec_state *state,
725                                                 u32 pc)
726 {
727         struct opcode *list;
728         struct block *block;
729         const u32 *code;
730         u32 addr, kunseg_pc = kunseg(pc);
731         const struct lightrec_mem_map *map = lightrec_get_map(state, kunseg_pc);
732         unsigned int length;
733
734         if (!map)
735                 return NULL;
736
737         addr = kunseg_pc - map->pc;
738
739         while (map->mirror_of)
740                 map = map->mirror_of;
741
742         code = map->address + addr;
743
744         block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
745         if (!block) {
746                 pr_err("Unable to recompile block: Out of memory\n");
747                 return NULL;
748         }
749
750         list = lightrec_disassemble(state, code, &length);
751         if (!list) {
752                 lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
753                 return NULL;
754         }
755
756         block->pc = pc;
757         block->state = state;
758         block->_jit = NULL;
759         block->function = NULL;
760         block->opcode_list = list;
761         block->map = map;
762         block->next = NULL;
763         block->flags = 0;
764         block->code_size = 0;
765 #if ENABLE_THREADED_COMPILER
766         block->op_list_freed = (atomic_flag)ATOMIC_FLAG_INIT;
767 #endif
768         block->nb_ops = length / sizeof(u32);
769
770         lightrec_optimize(block);
771
772         length = block->nb_ops * sizeof(u32);
773
774         lightrec_register(MEM_FOR_MIPS_CODE, length);
775
776         if (ENABLE_DISASSEMBLER) {
777                 pr_debug("Disassembled block at PC: 0x%x\n", block->pc);
778                 lightrec_print_disassembly(block, code, length);
779         }
780
781         pr_debug("Block size: %lu opcodes\n", block->nb_ops);
782
783         /* If the first opcode is an 'impossible' branch, never compile the
784          * block */
785         if (list->flags & LIGHTREC_EMULATE_BRANCH)
786                 block->flags |= BLOCK_NEVER_COMPILE;
787
788         block->hash = lightrec_calculate_block_hash(block);
789
790         return block;
791 }
792
793 static bool lightrec_block_is_fully_tagged(struct block *block)
794 {
795         struct opcode *op;
796
797         for (op = block->opcode_list; op; op = op->next) {
798                 /* Verify that all load/stores of the opcode list
799                  * Check all loads/stores of the opcode list and mark the
800                  * block as fully compiled if they all have been tagged. */
801                 switch (op->c.i.op) {
802                 case OP_LB:
803                 case OP_LH:
804                 case OP_LWL:
805                 case OP_LW:
806                 case OP_LBU:
807                 case OP_LHU:
808                 case OP_LWR:
809                 case OP_SB:
810                 case OP_SH:
811                 case OP_SWL:
812                 case OP_SW:
813                 case OP_SWR:
814                 case OP_LWC2:
815                 case OP_SWC2:
816                         if (!(op->flags & (LIGHTREC_DIRECT_IO |
817                                            LIGHTREC_HW_IO)))
818                                 return false;
819                 default: /* fall-through */
820                         continue;
821                 }
822         }
823
824         return true;
825 }
826
827 int lightrec_compile_block(struct block *block)
828 {
829         struct lightrec_state *state = block->state;
830         bool op_list_freed = false, fully_tagged = false;
831         struct opcode *elm;
832         jit_state_t *_jit;
833         jit_node_t *start_of_block;
834         bool skip_next = false;
835         jit_word_t code_size;
836         unsigned int i, j;
837         u32 next_pc;
838
839         fully_tagged = lightrec_block_is_fully_tagged(block);
840         if (fully_tagged)
841                 block->flags |= BLOCK_FULLY_TAGGED;
842
843         _jit = jit_new_state();
844         if (!_jit)
845                 return -ENOMEM;
846
847         block->_jit = _jit;
848
849         lightrec_regcache_reset(state->reg_cache);
850         state->cycles = 0;
851         state->nb_branches = 0;
852         state->nb_local_branches = 0;
853         state->nb_targets = 0;
854
855         jit_prolog();
856         jit_tramp(256);
857
858         start_of_block = jit_label();
859
860         for (elm = block->opcode_list; elm; elm = elm->next) {
861                 next_pc = block->pc + elm->offset * sizeof(u32);
862
863                 if (skip_next) {
864                         skip_next = false;
865                         continue;
866                 }
867
868                 state->cycles += lightrec_cycles_of_opcode(elm->c);
869
870                 if (elm->flags & LIGHTREC_EMULATE_BRANCH) {
871                         pr_debug("Branch at offset 0x%x will be emulated\n",
872                                  elm->offset << 2);
873                         lightrec_emit_eob(block, elm, next_pc);
874                         skip_next = !(elm->flags & LIGHTREC_NO_DS);
875                 } else if (elm->opcode) {
876                         lightrec_rec_opcode(block, elm, next_pc);
877                         skip_next = has_delay_slot(elm->c) &&
878                                 !(elm->flags & LIGHTREC_NO_DS);
879 #if _WIN32
880                         /* FIXME: GNU Lightning on Windows seems to use our
881                          * mapped registers as temporaries. Until the actual bug
882                          * is found and fixed, unconditionally mark our
883                          * registers as live here. */
884                         lightrec_regcache_mark_live(state->reg_cache, _jit);
885 #endif
886                 }
887         }
888
889         for (i = 0; i < state->nb_branches; i++)
890                 jit_patch(state->branches[i]);
891
892         for (i = 0; i < state->nb_local_branches; i++) {
893                 struct lightrec_branch *branch = &state->local_branches[i];
894
895                 pr_debug("Patch local branch to offset 0x%x\n",
896                          branch->target << 2);
897
898                 if (branch->target == 0) {
899                         jit_patch_at(branch->branch, start_of_block);
900                         continue;
901                 }
902
903                 for (j = 0; j < state->nb_targets; j++) {
904                         if (state->targets[j].offset == branch->target) {
905                                 jit_patch_at(branch->branch,
906                                              state->targets[j].label);
907                                 break;
908                         }
909                 }
910
911                 if (j == state->nb_targets)
912                         pr_err("Unable to find branch target\n");
913         }
914
915         jit_ldxi(JIT_R0, LIGHTREC_REG_STATE,
916                  offsetof(struct lightrec_state, eob_wrapper_func));
917
918         jit_jmpr(JIT_R0);
919
920         jit_ret();
921         jit_epilog();
922
923         block->function = jit_emit();
924
925         /* Add compiled function to the LUT */
926         state->code_lut[lut_offset(block->pc)] = block->function;
927
928         jit_get_code(&code_size);
929         lightrec_register(MEM_FOR_CODE, code_size);
930
931         block->code_size = code_size;
932
933         if (ENABLE_DISASSEMBLER) {
934                 pr_debug("Compiling block at PC: 0x%x\n", block->pc);
935                 jit_disassemble();
936         }
937
938         jit_clear_state();
939
940 #if ENABLE_THREADED_COMPILER
941         if (fully_tagged)
942                 op_list_freed = atomic_flag_test_and_set(&block->op_list_freed);
943 #endif
944         if (fully_tagged && !op_list_freed) {
945                 pr_debug("Block PC 0x%08x is fully tagged"
946                          " - free opcode list\n", block->pc);
947                 lightrec_free_opcode_list(state, block->opcode_list);
948                 block->opcode_list = NULL;
949         }
950
951         return 0;
952 }
953
954 u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle)
955 {
956         s32 (*func)(void *, s32) = (void *)state->dispatcher->function;
957         void *block_trace;
958         s32 cycles_delta;
959
960         state->exit_flags = LIGHTREC_EXIT_NORMAL;
961
962         /* Handle the cycle counter overflowing */
963         if (unlikely(target_cycle < state->current_cycle))
964                 target_cycle = UINT_MAX;
965
966         state->target_cycle = target_cycle;
967
968         block_trace = get_next_block_func(state, pc);
969         if (block_trace) {
970                 cycles_delta = state->target_cycle - state->current_cycle;
971
972                 cycles_delta = (*func)(block_trace, cycles_delta);
973
974                 state->current_cycle = state->target_cycle - cycles_delta;
975         }
976
977         return state->next_pc;
978 }
979
980 u32 lightrec_execute_one(struct lightrec_state *state, u32 pc)
981 {
982         return lightrec_execute(state, pc, state->current_cycle);
983 }
984
985 u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc)
986 {
987         struct block *block = lightrec_get_block(state, pc);
988         if (!block)
989                 return 0;
990
991         state->exit_flags = LIGHTREC_EXIT_NORMAL;
992
993         return lightrec_emulate_block(block, pc);
994 }
995
996 void lightrec_free_block(struct block *block)
997 {
998         lightrec_unregister(MEM_FOR_MIPS_CODE, block->nb_ops * sizeof(u32));
999         if (block->opcode_list)
1000                 lightrec_free_opcode_list(block->state, block->opcode_list);
1001         if (block->_jit)
1002                 _jit_destroy_state(block->_jit);
1003         lightrec_unregister(MEM_FOR_CODE, block->code_size);
1004         lightrec_free(block->state, MEM_FOR_IR, sizeof(*block), block);
1005 }
1006
1007 struct lightrec_state * lightrec_init(char *argv0,
1008                                       const struct lightrec_mem_map *map,
1009                                       size_t nb,
1010                                       const struct lightrec_ops *ops)
1011 {
1012         struct lightrec_state *state;
1013
1014         /* Sanity-check ops */
1015         if (!ops ||
1016             !ops->cop0_ops.mfc || !ops->cop0_ops.cfc || !ops->cop0_ops.mtc ||
1017             !ops->cop0_ops.ctc || !ops->cop0_ops.op ||
1018             !ops->cop2_ops.mfc || !ops->cop2_ops.cfc || !ops->cop2_ops.mtc ||
1019             !ops->cop2_ops.ctc || !ops->cop2_ops.op) {
1020                 pr_err("Missing callbacks in lightrec_ops structure\n");
1021                 return NULL;
1022         }
1023
1024         init_jit(argv0);
1025
1026         state = calloc(1, sizeof(*state) +
1027                        sizeof(*state->code_lut) * CODE_LUT_SIZE);
1028         if (!state)
1029                 goto err_finish_jit;
1030
1031         lightrec_register(MEM_FOR_LIGHTREC, sizeof(*state) +
1032                           sizeof(*state->code_lut) * CODE_LUT_SIZE);
1033
1034 #if ENABLE_TINYMM
1035         state->tinymm = tinymm_init(malloc, free, 4096);
1036         if (!state->tinymm)
1037                 goto err_free_state;
1038 #endif
1039
1040         state->block_cache = lightrec_blockcache_init(state);
1041         if (!state->block_cache)
1042                 goto err_free_tinymm;
1043
1044         state->reg_cache = lightrec_regcache_init(state);
1045         if (!state->reg_cache)
1046                 goto err_free_block_cache;
1047
1048         if (ENABLE_THREADED_COMPILER) {
1049                 state->rec = lightrec_recompiler_init(state);
1050                 if (!state->rec)
1051                         goto err_free_reg_cache;
1052         }
1053
1054         state->nb_maps = nb;
1055         state->maps = map;
1056
1057         memcpy(&state->ops, ops, sizeof(*ops));
1058
1059         state->dispatcher = generate_dispatcher(state);
1060         if (!state->dispatcher)
1061                 goto err_free_recompiler;
1062
1063         state->rw_generic_wrapper = generate_wrapper(state,
1064                                                      lightrec_rw_generic_cb,
1065                                                      true);
1066         if (!state->rw_generic_wrapper)
1067                 goto err_free_dispatcher;
1068
1069         state->rw_wrapper = generate_wrapper(state, lightrec_rw_cb, false);
1070         if (!state->rw_wrapper)
1071                 goto err_free_generic_rw_wrapper;
1072
1073         state->mfc_wrapper = generate_wrapper(state, lightrec_mfc_cb, false);
1074         if (!state->mfc_wrapper)
1075                 goto err_free_rw_wrapper;
1076
1077         state->mtc_wrapper = generate_wrapper(state, lightrec_mtc_cb, false);
1078         if (!state->mtc_wrapper)
1079                 goto err_free_mfc_wrapper;
1080
1081         state->rfe_wrapper = generate_wrapper(state, lightrec_rfe_cb, false);
1082         if (!state->rfe_wrapper)
1083                 goto err_free_mtc_wrapper;
1084
1085         state->cp_wrapper = generate_wrapper(state, lightrec_cp_cb, false);
1086         if (!state->cp_wrapper)
1087                 goto err_free_rfe_wrapper;
1088
1089         state->syscall_wrapper = generate_wrapper(state, lightrec_syscall_cb,
1090                                                   false);
1091         if (!state->syscall_wrapper)
1092                 goto err_free_cp_wrapper;
1093
1094         state->break_wrapper = generate_wrapper(state, lightrec_break_cb,
1095                                                 false);
1096         if (!state->break_wrapper)
1097                 goto err_free_syscall_wrapper;
1098
1099         state->rw_generic_func = state->rw_generic_wrapper->function;
1100         state->rw_func = state->rw_wrapper->function;
1101         state->mfc_func = state->mfc_wrapper->function;
1102         state->mtc_func = state->mtc_wrapper->function;
1103         state->rfe_func = state->rfe_wrapper->function;
1104         state->cp_func = state->cp_wrapper->function;
1105         state->syscall_func = state->syscall_wrapper->function;
1106         state->break_func = state->break_wrapper->function;
1107
1108         map = &state->maps[PSX_MAP_BIOS];
1109         state->offset_bios = (uintptr_t)map->address - map->pc;
1110
1111         map = &state->maps[PSX_MAP_SCRATCH_PAD];
1112         state->offset_scratch = (uintptr_t)map->address - map->pc;
1113
1114         map = &state->maps[PSX_MAP_KERNEL_USER_RAM];
1115         state->offset_ram = (uintptr_t)map->address - map->pc;
1116
1117         if (state->maps[PSX_MAP_MIRROR1].address == map->address + 0x200000 &&
1118             state->maps[PSX_MAP_MIRROR2].address == map->address + 0x400000 &&
1119             state->maps[PSX_MAP_MIRROR3].address == map->address + 0x600000)
1120                 state->mirrors_mapped = true;
1121
1122         return state;
1123
1124 err_free_syscall_wrapper:
1125         lightrec_free_block(state->syscall_wrapper);
1126 err_free_cp_wrapper:
1127         lightrec_free_block(state->cp_wrapper);
1128 err_free_rfe_wrapper:
1129         lightrec_free_block(state->rfe_wrapper);
1130 err_free_mtc_wrapper:
1131         lightrec_free_block(state->mtc_wrapper);
1132 err_free_mfc_wrapper:
1133         lightrec_free_block(state->mfc_wrapper);
1134 err_free_rw_wrapper:
1135         lightrec_free_block(state->rw_wrapper);
1136 err_free_generic_rw_wrapper:
1137         lightrec_free_block(state->rw_generic_wrapper);
1138 err_free_dispatcher:
1139         lightrec_free_block(state->dispatcher);
1140 err_free_recompiler:
1141         if (ENABLE_THREADED_COMPILER)
1142                 lightrec_free_recompiler(state->rec);
1143 err_free_reg_cache:
1144         lightrec_free_regcache(state->reg_cache);
1145 err_free_block_cache:
1146         lightrec_free_block_cache(state->block_cache);
1147 err_free_tinymm:
1148 #if ENABLE_TINYMM
1149         tinymm_shutdown(state->tinymm);
1150 err_free_state:
1151 #endif
1152         lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) +
1153                             sizeof(*state->code_lut) * CODE_LUT_SIZE);
1154         free(state);
1155 err_finish_jit:
1156         finish_jit();
1157         return NULL;
1158 }
1159
1160 void lightrec_destroy(struct lightrec_state *state)
1161 {
1162         if (ENABLE_THREADED_COMPILER)
1163                 lightrec_free_recompiler(state->rec);
1164
1165         lightrec_free_regcache(state->reg_cache);
1166         lightrec_free_block_cache(state->block_cache);
1167         lightrec_free_block(state->dispatcher);
1168         lightrec_free_block(state->rw_generic_wrapper);
1169         lightrec_free_block(state->rw_wrapper);
1170         lightrec_free_block(state->mfc_wrapper);
1171         lightrec_free_block(state->mtc_wrapper);
1172         lightrec_free_block(state->rfe_wrapper);
1173         lightrec_free_block(state->cp_wrapper);
1174         lightrec_free_block(state->syscall_wrapper);
1175         lightrec_free_block(state->break_wrapper);
1176         finish_jit();
1177
1178 #if ENABLE_TINYMM
1179         tinymm_shutdown(state->tinymm);
1180 #endif
1181         lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) +
1182                             sizeof(*state->code_lut) * CODE_LUT_SIZE);
1183         free(state);
1184 }
1185
1186 void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len)
1187 {
1188         u32 kaddr = kunseg(addr & ~0x3);
1189         const struct lightrec_mem_map *map = lightrec_get_map(state, kaddr);
1190
1191         if (map) {
1192                 while (map->mirror_of)
1193                         map = map->mirror_of;
1194
1195                 if (map != &state->maps[PSX_MAP_KERNEL_USER_RAM])
1196                         return;
1197
1198                 /* Handle mirrors */
1199                 kaddr &= (state->maps[PSX_MAP_KERNEL_USER_RAM].length - 1);
1200
1201                 for (; len > 4; len -= 4, kaddr += 4)
1202                         lightrec_invalidate_map(state, map, kaddr);
1203
1204                 lightrec_invalidate_map(state, map, kaddr);
1205         }
1206 }
1207
1208 void lightrec_invalidate_all(struct lightrec_state *state)
1209 {
1210         memset(state->code_lut, 0, sizeof(*state->code_lut) * CODE_LUT_SIZE);
1211 }
1212
1213 void lightrec_set_invalidate_mode(struct lightrec_state *state, bool dma_only)
1214 {
1215         if (state->invalidate_from_dma_only != dma_only)
1216                 lightrec_invalidate_all(state);
1217
1218         state->invalidate_from_dma_only = dma_only;
1219 }
1220
1221 void lightrec_set_exit_flags(struct lightrec_state *state, u32 flags)
1222 {
1223         if (flags != LIGHTREC_EXIT_NORMAL) {
1224                 state->exit_flags |= flags;
1225                 state->target_cycle = state->current_cycle;
1226         }
1227 }
1228
1229 u32 lightrec_exit_flags(struct lightrec_state *state)
1230 {
1231         return state->exit_flags;
1232 }
1233
1234 void lightrec_dump_registers(struct lightrec_state *state, u32 regs[34])
1235 {
1236         memcpy(regs, state->native_reg_cache, sizeof(state->native_reg_cache));
1237 }
1238
1239 void lightrec_restore_registers(struct lightrec_state *state, u32 regs[34])
1240 {
1241         memcpy(state->native_reg_cache, regs, sizeof(state->native_reg_cache));
1242 }
1243
1244 u32 lightrec_current_cycle_count(const struct lightrec_state *state)
1245 {
1246         return state->current_cycle;
1247 }
1248
1249 void lightrec_reset_cycle_count(struct lightrec_state *state, u32 cycles)
1250 {
1251         state->current_cycle = cycles;
1252
1253         if (state->target_cycle < cycles)
1254                 state->target_cycle = cycles;
1255 }
1256
1257 void lightrec_set_target_cycle_count(struct lightrec_state *state, u32 cycles)
1258 {
1259         if (state->exit_flags == LIGHTREC_EXIT_NORMAL) {
1260                 if (cycles < state->current_cycle)
1261                         cycles = state->current_cycle;
1262
1263                 state->target_cycle = cycles;
1264         }
1265 }