psxcounters: try to support a dynarec with a very long timeslice
[pcsx_rearmed.git] / deps / lightrec / lightrec.c
CommitLineData
98fa08a5 1// SPDX-License-Identifier: LGPL-2.1-or-later
d16005f8 2/*
98fa08a5 3 * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
d16005f8
PC
4 */
5
6#include "blockcache.h"
d16005f8
PC
7#include "debug.h"
8#include "disassembler.h"
9#include "emitter.h"
10#include "interpreter.h"
98fa08a5
PC
11#include "lightrec-config.h"
12#include "lightning-wrapper.h"
d16005f8
PC
13#include "lightrec.h"
14#include "memmanager.h"
a59e5536 15#include "reaper.h"
d16005f8
PC
16#include "recompiler.h"
17#include "regcache.h"
18#include "optimizer.h"
02487de7 19#include "tlsf/tlsf.h"
d16005f8
PC
20
21#include <errno.h>
98fa08a5 22#include <inttypes.h>
d16005f8
PC
23#include <limits.h>
24#if ENABLE_THREADED_COMPILER
25#include <stdatomic.h>
26#endif
27#include <stdbool.h>
28#include <stddef.h>
29#include <string.h>
30#if ENABLE_TINYMM
31#include <tinymm.h>
32#endif
33
34#define GENMASK(h, l) \
35 (((uintptr_t)-1 << (l)) & ((uintptr_t)-1 >> (__WORDSIZE - 1 - (h))))
36
37static struct block * lightrec_precompile_block(struct lightrec_state *state,
38 u32 pc);
98fa08a5
PC
39static bool lightrec_block_is_fully_tagged(const struct block *block);
40
41static void lightrec_mtc2(struct lightrec_state *state, u8 reg, u32 data);
42static u32 lightrec_mfc2(struct lightrec_state *state, u8 reg);
d16005f8 43
a59e5536 44static void lightrec_default_sb(struct lightrec_state *state, u32 opcode,
45 void *host, u32 addr, u8 data)
46{
47 *(u8 *)host = data;
48
49 if (!state->invalidate_from_dma_only)
50 lightrec_invalidate(state, addr, 1);
51}
52
53static void lightrec_default_sh(struct lightrec_state *state, u32 opcode,
54 void *host, u32 addr, u16 data)
55{
56 *(u16 *)host = HTOLE16(data);
57
58 if (!state->invalidate_from_dma_only)
59 lightrec_invalidate(state, addr, 2);
60}
61
62static void lightrec_default_sw(struct lightrec_state *state, u32 opcode,
63 void *host, u32 addr, u32 data)
64{
65 *(u32 *)host = HTOLE32(data);
66
67 if (!state->invalidate_from_dma_only)
68 lightrec_invalidate(state, addr, 4);
69}
70
71static u8 lightrec_default_lb(struct lightrec_state *state,
72 u32 opcode, void *host, u32 addr)
73{
74 return *(u8 *)host;
75}
76
77static u16 lightrec_default_lh(struct lightrec_state *state,
78 u32 opcode, void *host, u32 addr)
79{
80 return LE16TOH(*(u16 *)host);
81}
82
83static u32 lightrec_default_lw(struct lightrec_state *state,
84 u32 opcode, void *host, u32 addr)
85{
86 return LE32TOH(*(u32 *)host);
87}
88
89static const struct lightrec_mem_map_ops lightrec_default_ops = {
90 .sb = lightrec_default_sb,
91 .sh = lightrec_default_sh,
92 .sw = lightrec_default_sw,
93 .lb = lightrec_default_lb,
94 .lh = lightrec_default_lh,
95 .lw = lightrec_default_lw,
96};
97
98fa08a5
PC
98static void __segfault_cb(struct lightrec_state *state, u32 addr,
99 const struct block *block)
d16005f8
PC
100{
101 lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT);
102 pr_err("Segmentation fault in recompiled code: invalid "
103 "load/store at address 0x%08x\n", addr);
98fa08a5
PC
104 if (block)
105 pr_err("Was executing block PC 0x%08x\n", block->pc);
d16005f8
PC
106}
107
a59e5536 108static void lightrec_swl(struct lightrec_state *state,
109 const struct lightrec_mem_map_ops *ops,
110 u32 opcode, void *host, u32 addr, u32 data)
d16005f8 111{
a59e5536 112 unsigned int shift = addr & 0x3;
113 unsigned int mask = GENMASK(31, (shift + 1) * 8);
114 u32 old_data;
115
116 /* Align to 32 bits */
117 addr &= ~3;
118 host = (void *)((uintptr_t)host & ~3);
119
120 old_data = ops->lw(state, opcode, host, addr);
121
122 data = (data >> ((3 - shift) * 8)) | (old_data & mask);
123
124 ops->sw(state, opcode, host, addr, data);
125}
126
127static void lightrec_swr(struct lightrec_state *state,
128 const struct lightrec_mem_map_ops *ops,
129 u32 opcode, void *host, u32 addr, u32 data)
130{
131 unsigned int shift = addr & 0x3;
132 unsigned int mask = (1 << (shift * 8)) - 1;
133 u32 old_data;
134
135 /* Align to 32 bits */
136 addr &= ~3;
137 host = (void *)((uintptr_t)host & ~3);
138
139 old_data = ops->lw(state, opcode, host, addr);
140
141 data = (data << (shift * 8)) | (old_data & mask);
142
143 ops->sw(state, opcode, host, addr, data);
144}
145
146static void lightrec_swc2(struct lightrec_state *state, union code op,
147 const struct lightrec_mem_map_ops *ops,
148 void *host, u32 addr)
149{
98fa08a5 150 u32 data = lightrec_mfc2(state, op.i.rt);
a59e5536 151
152 ops->sw(state, op.opcode, host, addr, data);
153}
154
155static u32 lightrec_lwl(struct lightrec_state *state,
156 const struct lightrec_mem_map_ops *ops,
157 u32 opcode, void *host, u32 addr, u32 data)
158{
159 unsigned int shift = addr & 0x3;
160 unsigned int mask = (1 << (24 - shift * 8)) - 1;
161 u32 old_data;
162
163 /* Align to 32 bits */
164 addr &= ~3;
165 host = (void *)((uintptr_t)host & ~3);
166
167 old_data = ops->lw(state, opcode, host, addr);
168
169 return (data & mask) | (old_data << (24 - shift * 8));
170}
171
172static u32 lightrec_lwr(struct lightrec_state *state,
173 const struct lightrec_mem_map_ops *ops,
174 u32 opcode, void *host, u32 addr, u32 data)
175{
176 unsigned int shift = addr & 0x3;
177 unsigned int mask = GENMASK(31, 32 - shift * 8);
178 u32 old_data;
179
180 /* Align to 32 bits */
181 addr &= ~3;
182 host = (void *)((uintptr_t)host & ~3);
183
184 old_data = ops->lw(state, opcode, host, addr);
185
186 return (data & mask) | (old_data >> (shift * 8));
187}
188
189static void lightrec_lwc2(struct lightrec_state *state, union code op,
190 const struct lightrec_mem_map_ops *ops,
191 void *host, u32 addr)
192{
193 u32 data = ops->lw(state, op.opcode, host, addr);
194
98fa08a5 195 lightrec_mtc2(state, op.i.rt, data);
d16005f8
PC
196}
197
198static void lightrec_invalidate_map(struct lightrec_state *state,
98fa08a5 199 const struct lightrec_mem_map *map, u32 addr, u32 len)
d16005f8 200{
98fa08a5 201 if (map == &state->maps[PSX_MAP_KERNEL_USER_RAM]) {
02487de7
PC
202 memset(lut_address(state, lut_offset(addr)), 0,
203 ((len + 3) / 4) * lut_elm_size(state));
98fa08a5 204 }
d16005f8
PC
205}
206
02487de7
PC
207enum psx_map
208lightrec_get_map_idx(struct lightrec_state *state, u32 kaddr)
d16005f8 209{
98fa08a5 210 const struct lightrec_mem_map *map;
d16005f8
PC
211 unsigned int i;
212
213 for (i = 0; i < state->nb_maps; i++) {
02487de7 214 map = &state->maps[i];
d16005f8 215
02487de7
PC
216 if (kaddr >= map->pc && kaddr < map->pc + map->length)
217 return (enum psx_map) i;
d16005f8
PC
218 }
219
02487de7
PC
220 return PSX_MAP_UNKNOWN;
221}
222
223const struct lightrec_mem_map *
224lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr)
225{
226 const struct lightrec_mem_map *map;
227 enum psx_map idx;
228 u32 addr;
229
230 idx = lightrec_get_map_idx(state, kaddr);
231 if (idx == PSX_MAP_UNKNOWN)
98fa08a5
PC
232 return NULL;
233
02487de7 234 map = &state->maps[idx];
98fa08a5
PC
235 addr = kaddr - map->pc;
236
237 while (map->mirror_of)
238 map = map->mirror_of;
239
240 if (host)
241 *host = map->address + addr;
242
243 return map;
d16005f8
PC
244}
245
246u32 lightrec_rw(struct lightrec_state *state, union code op,
98fa08a5 247 u32 addr, u32 data, u16 *flags, struct block *block)
d16005f8
PC
248{
249 const struct lightrec_mem_map *map;
a59e5536 250 const struct lightrec_mem_map_ops *ops;
98fa08a5 251 u32 opcode = op.opcode;
a59e5536 252 void *host;
d16005f8
PC
253
254 addr += (s16) op.i.imm;
d16005f8 255
98fa08a5 256 map = lightrec_get_map(state, &host, kunseg(addr));
d16005f8 257 if (!map) {
98fa08a5 258 __segfault_cb(state, addr, block);
d16005f8
PC
259 return 0;
260 }
261
d16005f8 262 if (unlikely(map->ops)) {
22eee2ac
PC
263 if (flags && !LIGHTREC_FLAGS_GET_IO_MODE(*flags))
264 *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW);
d16005f8 265
a59e5536 266 ops = map->ops;
267 } else {
22eee2ac
PC
268 if (flags && !LIGHTREC_FLAGS_GET_IO_MODE(*flags))
269 *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT);
d16005f8 270
a59e5536 271 ops = &lightrec_default_ops;
272 }
d16005f8
PC
273
274 switch (op.i.op) {
275 case OP_SB:
a59e5536 276 ops->sb(state, opcode, host, addr, (u8) data);
d16005f8
PC
277 return 0;
278 case OP_SH:
a59e5536 279 ops->sh(state, opcode, host, addr, (u16) data);
d16005f8
PC
280 return 0;
281 case OP_SWL:
a59e5536 282 lightrec_swl(state, ops, opcode, host, addr, data);
d16005f8
PC
283 return 0;
284 case OP_SWR:
a59e5536 285 lightrec_swr(state, ops, opcode, host, addr, data);
d16005f8
PC
286 return 0;
287 case OP_SW:
a59e5536 288 ops->sw(state, opcode, host, addr, data);
d16005f8
PC
289 return 0;
290 case OP_SWC2:
a59e5536 291 lightrec_swc2(state, op, ops, host, addr);
d16005f8
PC
292 return 0;
293 case OP_LB:
a59e5536 294 return (s32) (s8) ops->lb(state, opcode, host, addr);
d16005f8 295 case OP_LBU:
a59e5536 296 return ops->lb(state, opcode, host, addr);
d16005f8 297 case OP_LH:
a59e5536 298 return (s32) (s16) ops->lh(state, opcode, host, addr);
d16005f8 299 case OP_LHU:
a59e5536 300 return ops->lh(state, opcode, host, addr);
d16005f8 301 case OP_LWC2:
a59e5536 302 lightrec_lwc2(state, op, ops, host, addr);
d16005f8 303 return 0;
a59e5536 304 case OP_LWL:
305 return lightrec_lwl(state, ops, opcode, host, addr, data);
306 case OP_LWR:
307 return lightrec_lwr(state, ops, opcode, host, addr, data);
d16005f8
PC
308 case OP_LW:
309 default:
a59e5536 310 return ops->lw(state, opcode, host, addr);
d16005f8
PC
311 }
312}
313
314static void lightrec_rw_helper(struct lightrec_state *state,
98fa08a5
PC
315 union code op, u16 *flags,
316 struct block *block)
d16005f8 317{
98fa08a5
PC
318 u32 ret = lightrec_rw(state, op, state->regs.gpr[op.i.rs],
319 state->regs.gpr[op.i.rt], flags, block);
d16005f8
PC
320
321 switch (op.i.op) {
322 case OP_LB:
323 case OP_LBU:
324 case OP_LH:
325 case OP_LHU:
326 case OP_LWL:
327 case OP_LWR:
328 case OP_LW:
329 if (op.i.rt)
98fa08a5 330 state->regs.gpr[op.i.rt] = ret;
d16005f8
PC
331 default: /* fall-through */
332 break;
333 }
334}
335
22eee2ac 336static void lightrec_rw_cb(struct lightrec_state *state)
d16005f8 337{
22eee2ac 338 lightrec_rw_helper(state, (union code)state->c_wrapper_arg, NULL, NULL);
d16005f8
PC
339}
340
22eee2ac 341static void lightrec_rw_generic_cb(struct lightrec_state *state)
d16005f8 342{
98fa08a5
PC
343 struct block *block;
344 struct opcode *op;
345 bool was_tagged;
22eee2ac 346 u32 arg = state->c_wrapper_arg;
98fa08a5
PC
347 u16 offset = (u16)arg;
348
349 block = lightrec_find_block_from_lut(state->block_cache,
350 arg >> 16, state->next_pc);
351 if (unlikely(!block)) {
352 pr_err("rw_generic: No block found in LUT for PC 0x%x offset 0x%x\n",
353 state->next_pc, offset);
354 return;
355 }
356
357 op = &block->opcode_list[offset];
22eee2ac 358 was_tagged = LIGHTREC_FLAGS_GET_IO_MODE(op->flags);
d16005f8 359
98fa08a5 360 lightrec_rw_helper(state, op->c, &op->flags, block);
d16005f8
PC
361
362 if (!was_tagged) {
98fa08a5
PC
363 pr_debug("Opcode of block at PC 0x%08x has been tagged - flag "
364 "for recompilation\n", block->pc);
d16005f8 365
a59e5536 366 block->flags |= BLOCK_SHOULD_RECOMPILE;
d16005f8
PC
367 }
368}
369
98fa08a5 370static u32 clamp_s32(s32 val, s32 min, s32 max)
d16005f8 371{
98fa08a5
PC
372 return val < min ? min : val > max ? max : val;
373}
d16005f8 374
98fa08a5
PC
375static u32 lightrec_mfc2(struct lightrec_state *state, u8 reg)
376{
377 s16 gteir1, gteir2, gteir3;
378
379 switch (reg) {
380 case 1:
381 case 3:
382 case 5:
383 case 8:
384 case 9:
385 case 10:
386 case 11:
387 return (s32)(s16) state->regs.cp2d[reg];
388 case 7:
389 case 16:
390 case 17:
391 case 18:
392 case 19:
393 return (u16) state->regs.cp2d[reg];
394 case 28:
395 case 29:
396 gteir1 = (s16) state->regs.cp2d[9];
397 gteir2 = (s16) state->regs.cp2d[10];
398 gteir3 = (s16) state->regs.cp2d[11];
399
400 return clamp_s32(gteir1 >> 7, 0, 0x1f) << 0 |
401 clamp_s32(gteir2 >> 7, 0, 0x1f) << 5 |
402 clamp_s32(gteir3 >> 7, 0, 0x1f) << 10;
403 case 15:
404 reg = 14;
405 default: /* fall-through */
406 return state->regs.cp2d[reg];
407 }
408}
d16005f8 409
98fa08a5
PC
410u32 lightrec_mfc(struct lightrec_state *state, union code op)
411{
412 if (op.i.op == OP_CP0)
413 return state->regs.cp0[op.r.rd];
414 else if (op.r.rs == OP_CP2_BASIC_MFC2)
415 return lightrec_mfc2(state, op.r.rd);
d16005f8 416 else
98fa08a5 417 return state->regs.cp2c[op.r.rd];
d16005f8
PC
418}
419
98fa08a5 420static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data)
d16005f8 421{
fd58fa32 422 u32 status, oldstatus, cause;
98fa08a5
PC
423
424 switch (reg) {
425 case 1:
426 case 4:
427 case 8:
428 case 14:
429 case 15:
430 /* Those registers are read-only */
431 return;
fd58fa32 432 default:
98fa08a5
PC
433 break;
434 }
d16005f8 435
98fa08a5
PC
436 if (reg == 12) {
437 status = state->regs.cp0[12];
fd58fa32 438 oldstatus = status;
d16005f8 439
98fa08a5
PC
440 if (status & ~data & BIT(16)) {
441 state->ops.enable_ram(state, true);
442 lightrec_invalidate_all(state);
443 } else if (~status & data & BIT(16)) {
444 state->ops.enable_ram(state, false);
445 }
446 }
447
fd58fa32
PC
448 if (reg == 13) {
449 state->regs.cp0[13] &= ~0x300;
450 state->regs.cp0[13] |= data & 0x300;
451 } else {
452 state->regs.cp0[reg] = data;
453 }
98fa08a5
PC
454
455 if (reg == 12 || reg == 13) {
456 cause = state->regs.cp0[13];
457 status = state->regs.cp0[12];
458
fd58fa32 459 /* Handle software interrupts */
98fa08a5
PC
460 if (!!(status & cause & 0x300) & status)
461 lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT);
fd58fa32
PC
462
463 /* Handle hardware interrupts */
464 if (reg == 12 && !(~status & 0x401) && (~oldstatus & 0x401))
465 lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT);
98fa08a5
PC
466 }
467}
468
469static u32 count_leading_bits(s32 data)
470{
98fa08a5
PC
471 u32 cnt = 33;
472
de742fa0
PC
473#ifdef __has_builtin
474#if __has_builtin(__builtin_clrsb)
475 return 1 + __builtin_clrsb(data);
476#endif
477#endif
478
98fa08a5
PC
479 data = (data ^ (data >> 31)) << 1;
480
481 do {
482 cnt -= 1;
483 data >>= 1;
484 } while (data);
485
486 return cnt;
98fa08a5
PC
487}
488
489static void lightrec_mtc2(struct lightrec_state *state, u8 reg, u32 data)
490{
491 switch (reg) {
492 case 15:
493 state->regs.cp2d[12] = state->regs.cp2d[13];
494 state->regs.cp2d[13] = state->regs.cp2d[14];
495 state->regs.cp2d[14] = data;
496 break;
497 case 28:
498 state->regs.cp2d[9] = (data << 7) & 0xf80;
499 state->regs.cp2d[10] = (data << 2) & 0xf80;
500 state->regs.cp2d[11] = (data >> 3) & 0xf80;
501 break;
502 case 31:
503 return;
504 case 30:
505 state->regs.cp2d[31] = count_leading_bits((s32) data);
506 default: /* fall-through */
507 state->regs.cp2d[reg] = data;
508 break;
509 }
510}
d16005f8 511
98fa08a5
PC
512static void lightrec_ctc2(struct lightrec_state *state, u8 reg, u32 data)
513{
514 switch (reg) {
515 case 4:
516 case 12:
517 case 20:
518 case 26:
519 case 27:
520 case 29:
521 case 30:
522 data = (s32)(s16) data;
523 break;
524 case 31:
525 data = (data & 0x7ffff000) | !!(data & 0x7f87e000) << 31;
526 default: /* fall-through */
527 break;
528 }
529
530 state->regs.cp2c[reg] = data;
531}
532
533void lightrec_mtc(struct lightrec_state *state, union code op, u32 data)
534{
535 if (op.i.op == OP_CP0)
536 lightrec_mtc0(state, op.r.rd, data);
537 else if (op.r.rs == OP_CP2_BASIC_CTC2)
538 lightrec_ctc2(state, op.r.rd, data);
539 else
540 lightrec_mtc2(state, op.r.rd, data);
d16005f8
PC
541}
542
22eee2ac 543static void lightrec_mtc_cb(struct lightrec_state *state)
d16005f8 544{
22eee2ac
PC
545 union code op = (union code) state->c_wrapper_arg;
546
98fa08a5 547 lightrec_mtc(state, op, state->regs.gpr[op.r.rt]);
d16005f8
PC
548}
549
98fa08a5 550void lightrec_rfe(struct lightrec_state *state)
d16005f8
PC
551{
552 u32 status;
553
554 /* Read CP0 Status register (r12) */
98fa08a5 555 status = state->regs.cp0[12];
d16005f8
PC
556
557 /* Switch the bits */
558 status = ((status & 0x3c) >> 2) | (status & ~0xf);
559
560 /* Write it back */
98fa08a5 561 lightrec_mtc0(state, 12, status);
d16005f8
PC
562}
563
98fa08a5 564void lightrec_cp(struct lightrec_state *state, union code op)
d16005f8 565{
98fa08a5
PC
566 if (op.i.op == OP_CP0) {
567 pr_err("Invalid CP opcode to coprocessor #0\n");
568 return;
569 }
d16005f8 570
98fa08a5 571 (*state->ops.cop2_op)(state, op.opcode);
d16005f8
PC
572}
573
22eee2ac
PC
574static void lightrec_cp_cb(struct lightrec_state *state)
575{
576 lightrec_cp(state, (union code) state->c_wrapper_arg);
577}
578
579static void lightrec_syscall_cb(struct lightrec_state *state)
d16005f8
PC
580{
581 lightrec_set_exit_flags(state, LIGHTREC_EXIT_SYSCALL);
582}
583
22eee2ac 584static void lightrec_break_cb(struct lightrec_state *state)
d16005f8
PC
585{
586 lightrec_set_exit_flags(state, LIGHTREC_EXIT_BREAK);
587}
588
589struct block * lightrec_get_block(struct lightrec_state *state, u32 pc)
590{
591 struct block *block = lightrec_find_block(state->block_cache, pc);
592
98fa08a5 593 if (block && lightrec_block_is_outdated(state, block)) {
d16005f8
PC
594 pr_debug("Block at PC 0x%08x is outdated!\n", block->pc);
595
596 /* Make sure the recompiler isn't processing the block we'll
597 * destroy */
598 if (ENABLE_THREADED_COMPILER)
599 lightrec_recompiler_remove(state->rec, block);
600
601 lightrec_unregister_block(state->block_cache, block);
a59e5536 602 remove_from_code_lut(state->block_cache, block);
98fa08a5 603 lightrec_free_block(state, block);
d16005f8
PC
604 block = NULL;
605 }
606
607 if (!block) {
608 block = lightrec_precompile_block(state, pc);
609 if (!block) {
610 pr_err("Unable to recompile block at PC 0x%x\n", pc);
611 lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT);
612 return NULL;
613 }
614
615 lightrec_register_block(state->block_cache, block);
616 }
617
618 return block;
619}
620
621static void * get_next_block_func(struct lightrec_state *state, u32 pc)
622{
623 struct block *block;
624 bool should_recompile;
625 void *func;
626
627 for (;;) {
02487de7 628 func = lut_read(state, pc);
d16005f8 629 if (func && func != state->get_next_block)
98fa08a5 630 break;
d16005f8
PC
631
632 block = lightrec_get_block(state, pc);
633
634 if (unlikely(!block))
98fa08a5
PC
635 break;
636
637 if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET)) {
638 func = state->memset_func;
639 break;
640 }
d16005f8 641
a59e5536 642 should_recompile = block->flags & BLOCK_SHOULD_RECOMPILE &&
643 !(block->flags & BLOCK_IS_DEAD);
d16005f8
PC
644
645 if (unlikely(should_recompile)) {
a59e5536 646 pr_debug("Block at PC 0x%08x should recompile\n", pc);
d16005f8 647
d16005f8 648 lightrec_unregister(MEM_FOR_CODE, block->code_size);
a59e5536 649
650 if (ENABLE_THREADED_COMPILER)
651 lightrec_recompiler_add(state->rec, block);
652 else
98fa08a5 653 lightrec_compile_block(state->cstate, block);
d16005f8
PC
654 }
655
656 if (ENABLE_THREADED_COMPILER && likely(!should_recompile))
98fa08a5 657 func = lightrec_recompiler_run_first_pass(state, block, &pc);
d16005f8
PC
658 else
659 func = block->function;
660
661 if (likely(func))
98fa08a5 662 break;
d16005f8 663
98fa08a5
PC
664 if (unlikely(block->flags & BLOCK_NEVER_COMPILE)) {
665 pc = lightrec_emulate_block(state, block, pc);
666
667 } else if (!ENABLE_THREADED_COMPILER) {
668 /* Block wasn't compiled yet - run the interpreter */
669 if (block->flags & BLOCK_FULLY_TAGGED)
670 pr_debug("Block fully tagged, skipping first pass\n");
671 else if (ENABLE_FIRST_PASS && likely(!should_recompile))
672 pc = lightrec_emulate_block(state, block, pc);
d16005f8 673
d16005f8 674 /* Then compile it using the profiled data */
98fa08a5
PC
675 lightrec_compile_block(state->cstate, block);
676 } else {
677 lightrec_recompiler_add(state->rec, block);
d16005f8
PC
678 }
679
680 if (state->exit_flags != LIGHTREC_EXIT_NORMAL ||
98fa08a5
PC
681 state->current_cycle >= state->target_cycle)
682 break;
d16005f8 683 }
d16005f8 684
98fa08a5
PC
685 state->next_pc = pc;
686 return func;
d16005f8
PC
687}
688
689static s32 c_function_wrapper(struct lightrec_state *state, s32 cycles_delta,
22eee2ac 690 void (*f)(struct lightrec_state *))
d16005f8
PC
691{
692 state->current_cycle = state->target_cycle - cycles_delta;
693
22eee2ac 694 (*f)(state);
d16005f8
PC
695
696 return state->target_cycle - state->current_cycle;
697}
698
02487de7
PC
699static void * lightrec_emit_code(struct lightrec_state *state,
700 jit_state_t *_jit, unsigned int *size)
701{
702 bool has_code_buffer = ENABLE_CODE_BUFFER && state->tlsf;
703 jit_word_t code_size, new_code_size;
704 void *code;
705
706 jit_realize();
707
708 if (!ENABLE_DISASSEMBLER)
709 jit_set_data(NULL, 0, JIT_DISABLE_DATA | JIT_DISABLE_NOTE);
710
711 if (has_code_buffer) {
712 jit_get_code(&code_size);
713 code = tlsf_malloc(state->tlsf, (size_t) code_size);
714 if (!code)
715 return NULL;
716
717 jit_set_code(code, code_size);
718 }
719
720 code = jit_emit();
721
722 jit_get_code(&new_code_size);
723 lightrec_register(MEM_FOR_CODE, new_code_size);
724
725 if (has_code_buffer) {
726 tlsf_realloc(state->tlsf, code, new_code_size);
727
728 pr_debug("Creating code block at address 0x%" PRIxPTR ", "
729 "code size: %" PRIuPTR " new: %" PRIuPTR "\n",
730 (uintptr_t) code, code_size, new_code_size);
731 }
732
733 *size = (unsigned int) new_code_size;
734
735 return code;
736}
737
98fa08a5 738static struct block * generate_wrapper(struct lightrec_state *state)
d16005f8
PC
739{
740 struct block *block;
741 jit_state_t *_jit;
742 unsigned int i;
743 int stack_ptr;
d16005f8 744 jit_node_t *to_tramp, *to_fn_epilog;
fd58fa32 745 jit_node_t *addr[C_WRAPPERS_COUNT - 1];
d16005f8
PC
746
747 block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
748 if (!block)
749 goto err_no_mem;
750
751 _jit = jit_new_state();
752 if (!_jit)
753 goto err_free_block;
754
755 jit_name("RW wrapper");
756 jit_note(__FILE__, __LINE__);
757
758 /* Wrapper entry point */
759 jit_prolog();
fd58fa32
PC
760 jit_tramp(256);
761
762 /* Add entry points; separate them by opcodes that increment
763 * LIGHTREC_REG_STATE (since we cannot touch other registers).
764 * The difference will then tell us which C function to call. */
765 for (i = C_WRAPPERS_COUNT - 1; i > 0; i--) {
766 jit_addi(LIGHTREC_REG_STATE, LIGHTREC_REG_STATE, __WORDSIZE / 8);
767 addr[i - 1] = jit_indirect();
768 }
769
770 jit_epilog();
771 jit_prolog();
d16005f8
PC
772
773 stack_ptr = jit_allocai(sizeof(uintptr_t) * NUM_TEMPS);
774
fd58fa32 775 /* Save all temporaries on stack */
d16005f8
PC
776 for (i = 0; i < NUM_TEMPS; i++)
777 jit_stxi(stack_ptr + i * sizeof(uintptr_t), JIT_FP, JIT_R(i));
778
779 /* Jump to the trampoline */
780 to_tramp = jit_jmpi();
781
782 /* The trampoline will jump back here */
783 to_fn_epilog = jit_label();
784
fd58fa32 785 /* Restore temporaries from stack */
d16005f8
PC
786 for (i = 0; i < NUM_TEMPS; i++)
787 jit_ldxi(JIT_R(i), JIT_FP, stack_ptr + i * sizeof(uintptr_t));
788
789 jit_ret();
790 jit_epilog();
791
792 /* Trampoline entry point.
793 * The sole purpose of the trampoline is to cheese Lightning not to
794 * save/restore the callee-saved register LIGHTREC_REG_CYCLE, since we
795 * do want to return to the caller with this register modified. */
796 jit_prolog();
797 jit_tramp(256);
798 jit_patch(to_tramp);
799
fd58fa32
PC
800 /* Retrieve the wrapper function */
801 jit_ldxi(JIT_R0, LIGHTREC_REG_STATE,
802 offsetof(struct lightrec_state, c_wrappers));
803
804 /* Restore LIGHTREC_REG_STATE to its correct value */
805 jit_movi(LIGHTREC_REG_STATE, (uintptr_t) state);
806
d16005f8
PC
807 jit_prepare();
808 jit_pushargr(LIGHTREC_REG_STATE);
809 jit_pushargr(LIGHTREC_REG_CYCLE);
d16005f8 810 jit_pushargr(JIT_R0);
98fa08a5 811 jit_finishi(c_function_wrapper);
d16005f8 812 jit_retval_i(LIGHTREC_REG_CYCLE);
d16005f8
PC
813
814 jit_patch_at(jit_jmpi(), to_fn_epilog);
815 jit_epilog();
816
d16005f8 817 block->_jit = _jit;
d16005f8
PC
818 block->opcode_list = NULL;
819 block->flags = 0;
820 block->nb_ops = 0;
821
02487de7
PC
822 block->function = lightrec_emit_code(state, _jit,
823 &block->code_size);
824 if (!block->function)
825 goto err_free_block;
826
fd58fa32
PC
827 state->wrappers_eps[C_WRAPPERS_COUNT - 1] = block->function;
828
829 for (i = 0; i < C_WRAPPERS_COUNT - 1; i++)
830 state->wrappers_eps[i] = jit_address(addr[i]);
831
d16005f8
PC
832 if (ENABLE_DISASSEMBLER) {
833 pr_debug("Wrapper block:\n");
834 jit_disassemble();
835 }
836
837 jit_clear_state();
838 return block;
839
840err_free_block:
841 lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
842err_no_mem:
843 pr_err("Unable to compile wrapper: Out of memory\n");
844 return NULL;
845}
846
98fa08a5
PC
847static u32 lightrec_memset(struct lightrec_state *state)
848{
849 u32 kunseg_pc = kunseg(state->regs.gpr[4]);
850 void *host;
851 const struct lightrec_mem_map *map = lightrec_get_map(state, &host, kunseg_pc);
852 u32 length = state->regs.gpr[5] * 4;
853
854 if (!map) {
855 pr_err("Unable to find memory map for memset target address "
856 "0x%x\n", kunseg_pc);
857 return 0;
858 }
859
860 pr_debug("Calling host memset, PC 0x%x (host address 0x%" PRIxPTR ") for %u bytes\n",
861 kunseg_pc, (uintptr_t)host, length);
862 memset(host, 0, length);
863
864 if (!state->invalidate_from_dma_only)
865 lightrec_invalidate_map(state, map, kunseg_pc, length);
866
867 /* Rough estimation of the number of cycles consumed */
868 return 8 + 5 * (length + 3 / 4);
869}
870
d16005f8
PC
871static struct block * generate_dispatcher(struct lightrec_state *state)
872{
873 struct block *block;
874 jit_state_t *_jit;
02487de7 875 jit_node_t *to_end, *loop, *addr, *addr2, *addr3;
d16005f8 876 unsigned int i;
02487de7 877 u32 offset;
d16005f8
PC
878
879 block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
880 if (!block)
881 goto err_no_mem;
882
883 _jit = jit_new_state();
884 if (!_jit)
885 goto err_free_block;
886
887 jit_name("dispatcher");
888 jit_note(__FILE__, __LINE__);
889
890 jit_prolog();
891 jit_frame(256);
892
893 jit_getarg(JIT_R0, jit_arg());
d16005f8 894 jit_getarg_i(LIGHTREC_REG_CYCLE, jit_arg());
d16005f8
PC
895
896 /* Force all callee-saved registers to be pushed on the stack */
897 for (i = 0; i < NUM_REGS; i++)
898 jit_movr(JIT_V(i), JIT_V(i));
899
900 /* Pass lightrec_state structure to blocks, using the last callee-saved
901 * register that Lightning provides */
902 jit_movi(LIGHTREC_REG_STATE, (intptr_t) state);
903
904 loop = jit_label();
905
906 /* Call the block's code */
907 jit_jmpr(JIT_R0);
908
98fa08a5
PC
909 if (OPT_REPLACE_MEMSET) {
910 /* Blocks will jump here when they need to call
911 * lightrec_memset() */
912 addr3 = jit_indirect();
913
914 jit_prepare();
915 jit_pushargr(LIGHTREC_REG_STATE);
916 jit_finishi(lightrec_memset);
917
918 jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE,
919 offsetof(struct lightrec_state, regs.gpr[31]));
920
921 jit_retval(JIT_R0);
922 jit_subr(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, JIT_R0);
923 }
924
d16005f8
PC
925 /* The block will jump here, with the number of cycles remaining in
926 * LIGHTREC_REG_CYCLE */
927 addr2 = jit_indirect();
928
98fa08a5
PC
929 /* Store back the next_pc to the lightrec_state structure */
930 offset = offsetof(struct lightrec_state, next_pc);
931 jit_stxi_i(offset, LIGHTREC_REG_STATE, JIT_V0);
932
d16005f8
PC
933 /* Jump to end if state->target_cycle < state->current_cycle */
934 to_end = jit_blei(LIGHTREC_REG_CYCLE, 0);
935
936 /* Convert next PC to KUNSEG and avoid mirrors */
02487de7
PC
937 jit_andi(JIT_R0, JIT_V0, 0x10000000 | (RAM_SIZE - 1));
938 jit_rshi_u(JIT_R1, JIT_R0, 28);
939 jit_andi(JIT_R2, JIT_V0, BIOS_SIZE - 1);
940 jit_addi(JIT_R2, JIT_R2, RAM_SIZE);
941 jit_movnr(JIT_R0, JIT_R2, JIT_R1);
942
943 /* If possible, use the code LUT */
944 if (!lut_is_32bit(state))
98fa08a5 945 jit_lshi(JIT_R0, JIT_R0, 1);
d16005f8 946 jit_addr(JIT_R0, JIT_R0, LIGHTREC_REG_STATE);
02487de7
PC
947
948 offset = offsetof(struct lightrec_state, code_lut);
949 if (lut_is_32bit(state))
950 jit_ldxi_ui(JIT_R0, JIT_R0, offset);
951 else
952 jit_ldxi(JIT_R0, JIT_R0, offset);
d16005f8
PC
953
954 /* If we get non-NULL, loop */
955 jit_patch_at(jit_bnei(JIT_R0, 0), loop);
956
957 /* Slow path: call C function get_next_block_func() */
d16005f8 958
98fa08a5 959 if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) {
d16005f8
PC
960 /* We may call the interpreter - update state->current_cycle */
961 jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE,
962 offsetof(struct lightrec_state, target_cycle));
963 jit_subr(JIT_R1, JIT_R2, LIGHTREC_REG_CYCLE);
964 jit_stxi_i(offsetof(struct lightrec_state, current_cycle),
965 LIGHTREC_REG_STATE, JIT_R1);
966 }
967
968 /* The code LUT will be set to this address when the block at the target
969 * PC has been preprocessed but not yet compiled by the threaded
970 * recompiler */
971 addr = jit_indirect();
972
973 /* Get the next block */
974 jit_prepare();
975 jit_pushargr(LIGHTREC_REG_STATE);
976 jit_pushargr(JIT_V0);
977 jit_finishi(&get_next_block_func);
978 jit_retval(JIT_R0);
979
98fa08a5 980 if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) {
d16005f8
PC
981 /* The interpreter may have updated state->current_cycle and
982 * state->target_cycle - recalc the delta */
983 jit_ldxi_i(JIT_R1, LIGHTREC_REG_STATE,
984 offsetof(struct lightrec_state, current_cycle));
985 jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE,
986 offsetof(struct lightrec_state, target_cycle));
987 jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, JIT_R1);
988 }
989
990 /* If we get non-NULL, loop */
991 jit_patch_at(jit_bnei(JIT_R0, 0), loop);
992
d16005f8
PC
993 /* When exiting, the recompiled code will jump to that address */
994 jit_note(__FILE__, __LINE__);
995 jit_patch(to_end);
996
d16005f8
PC
997 jit_retr(LIGHTREC_REG_CYCLE);
998 jit_epilog();
999
d16005f8 1000 block->_jit = _jit;
d16005f8
PC
1001 block->opcode_list = NULL;
1002 block->flags = 0;
1003 block->nb_ops = 0;
1004
02487de7
PC
1005 block->function = lightrec_emit_code(state, _jit,
1006 &block->code_size);
1007 if (!block->function)
1008 goto err_free_block;
d16005f8
PC
1009
1010 state->eob_wrapper_func = jit_address(addr2);
98fa08a5
PC
1011 if (OPT_REPLACE_MEMSET)
1012 state->memset_func = jit_address(addr3);
d16005f8
PC
1013 state->get_next_block = jit_address(addr);
1014
1015 if (ENABLE_DISASSEMBLER) {
1016 pr_debug("Dispatcher block:\n");
1017 jit_disassemble();
1018 }
1019
1020 /* We're done! */
1021 jit_clear_state();
1022 return block;
1023
1024err_free_block:
1025 lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
1026err_no_mem:
1027 pr_err("Unable to compile dispatcher: Out of memory\n");
1028 return NULL;
1029}
1030
1031union code lightrec_read_opcode(struct lightrec_state *state, u32 pc)
1032{
fd58fa32 1033 void *host = NULL;
d16005f8 1034
98fa08a5 1035 lightrec_get_map(state, &host, kunseg(pc));
d16005f8 1036
98fa08a5 1037 const u32 *code = (u32 *)host;
02487de7 1038 return (union code) LE32TOH(*code);
98fa08a5 1039}
d16005f8 1040
98fa08a5
PC
1041unsigned int lightrec_cycles_of_opcode(union code code)
1042{
1043 return 2;
1044}
d16005f8 1045
98fa08a5
PC
1046void lightrec_free_opcode_list(struct lightrec_state *state, struct block *block)
1047{
1048 lightrec_free(state, MEM_FOR_IR,
1049 sizeof(*block->opcode_list) * block->nb_ops,
1050 block->opcode_list);
1051}
1052
1053static unsigned int lightrec_get_mips_block_len(const u32 *src)
1054{
1055 unsigned int i;
1056 union code c;
1057
1058 for (i = 1; ; i++) {
1059 c.opcode = LE32TOH(*src++);
1060
1061 if (is_syscall(c))
1062 return i;
1063
1064 if (is_unconditional_jump(c))
1065 return i + 1;
1066 }
1067}
1068
1069static struct opcode * lightrec_disassemble(struct lightrec_state *state,
1070 const u32 *src, unsigned int *len)
1071{
1072 struct opcode *list;
1073 unsigned int i, length;
1074
1075 length = lightrec_get_mips_block_len(src);
1076
1077 list = lightrec_malloc(state, MEM_FOR_IR, sizeof(*list) * length);
1078 if (!list) {
1079 pr_err("Unable to allocate memory\n");
1080 return NULL;
1081 }
1082
1083 for (i = 0; i < length; i++) {
1084 list[i].opcode = LE32TOH(src[i]);
1085 list[i].flags = 0;
1086 }
1087
1088 *len = length * sizeof(u32);
1089
1090 return list;
d16005f8
PC
1091}
1092
1093static struct block * lightrec_precompile_block(struct lightrec_state *state,
1094 u32 pc)
1095{
1096 struct opcode *list;
1097 struct block *block;
98fa08a5
PC
1098 void *host;
1099 const struct lightrec_mem_map *map = lightrec_get_map(state, &host, kunseg(pc));
1100 const u32 *code = (u32 *) host;
d16005f8 1101 unsigned int length;
98fa08a5 1102 bool fully_tagged;
d16005f8
PC
1103
1104 if (!map)
1105 return NULL;
1106
d16005f8
PC
1107 block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
1108 if (!block) {
1109 pr_err("Unable to recompile block: Out of memory\n");
1110 return NULL;
1111 }
1112
1113 list = lightrec_disassemble(state, code, &length);
1114 if (!list) {
1115 lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
1116 return NULL;
1117 }
1118
1119 block->pc = pc;
d16005f8
PC
1120 block->_jit = NULL;
1121 block->function = NULL;
1122 block->opcode_list = list;
98fa08a5 1123 block->code = code;
d16005f8
PC
1124 block->next = NULL;
1125 block->flags = 0;
1126 block->code_size = 0;
1127#if ENABLE_THREADED_COMPILER
1128 block->op_list_freed = (atomic_flag)ATOMIC_FLAG_INIT;
1129#endif
1130 block->nb_ops = length / sizeof(u32);
1131
98fa08a5 1132 lightrec_optimize(state, block);
d16005f8
PC
1133
1134 length = block->nb_ops * sizeof(u32);
1135
1136 lightrec_register(MEM_FOR_MIPS_CODE, length);
1137
1138 if (ENABLE_DISASSEMBLER) {
98fa08a5
PC
1139 pr_debug("Disassembled block at PC: 0x%08x\n", block->pc);
1140 lightrec_print_disassembly(block, code);
d16005f8
PC
1141 }
1142
98fa08a5 1143 pr_debug("Block size: %hu opcodes\n", block->nb_ops);
d16005f8
PC
1144
1145 /* If the first opcode is an 'impossible' branch, never compile the
1146 * block */
98fa08a5 1147 if (should_emulate(block->opcode_list))
d16005f8
PC
1148 block->flags |= BLOCK_NEVER_COMPILE;
1149
98fa08a5
PC
1150 fully_tagged = lightrec_block_is_fully_tagged(block);
1151 if (fully_tagged)
1152 block->flags |= BLOCK_FULLY_TAGGED;
1153
1154 if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET))
02487de7 1155 lut_write(state, lut_offset(pc), state->memset_func);
98fa08a5 1156
d16005f8
PC
1157 block->hash = lightrec_calculate_block_hash(block);
1158
a59e5536 1159 pr_debug("Recompile count: %u\n", state->nb_precompile++);
1160
d16005f8
PC
1161 return block;
1162}
1163
98fa08a5 1164static bool lightrec_block_is_fully_tagged(const struct block *block)
d16005f8 1165{
98fa08a5
PC
1166 const struct opcode *op;
1167 unsigned int i;
1168
1169 for (i = 0; i < block->nb_ops; i++) {
1170 op = &block->opcode_list[i];
d16005f8 1171
d16005f8
PC
1172 /* Verify that all load/stores of the opcode list
1173 * Check all loads/stores of the opcode list and mark the
1174 * block as fully compiled if they all have been tagged. */
1175 switch (op->c.i.op) {
1176 case OP_LB:
1177 case OP_LH:
1178 case OP_LWL:
1179 case OP_LW:
1180 case OP_LBU:
1181 case OP_LHU:
1182 case OP_LWR:
1183 case OP_SB:
1184 case OP_SH:
1185 case OP_SWL:
1186 case OP_SW:
1187 case OP_SWR:
1188 case OP_LWC2:
1189 case OP_SWC2:
22eee2ac 1190 if (!LIGHTREC_FLAGS_GET_IO_MODE(op->flags))
d16005f8
PC
1191 return false;
1192 default: /* fall-through */
1193 continue;
1194 }
1195 }
1196
1197 return true;
1198}
1199
98fa08a5 1200static void lightrec_reap_block(struct lightrec_state *state, void *data)
a59e5536 1201{
1202 struct block *block = data;
1203
1204 pr_debug("Reap dead block at PC 0x%08x\n", block->pc);
98fa08a5
PC
1205 lightrec_unregister_block(state->block_cache, block);
1206 lightrec_free_block(state, block);
a59e5536 1207}
1208
98fa08a5 1209static void lightrec_reap_jit(struct lightrec_state *state, void *data)
a59e5536 1210{
1211 _jit_destroy_state(data);
1212}
1213
02487de7
PC
1214static void lightrec_free_function(struct lightrec_state *state, void *fn)
1215{
1216 if (ENABLE_CODE_BUFFER && state->tlsf) {
1217 pr_debug("Freeing code block at 0x%" PRIxPTR "\n", (uintptr_t) fn);
1218 tlsf_free(state->tlsf, fn);
1219 }
1220}
1221
1222static void lightrec_reap_function(struct lightrec_state *state, void *data)
1223{
1224 lightrec_free_function(state, data);
1225}
1226
98fa08a5
PC
1227int lightrec_compile_block(struct lightrec_cstate *cstate,
1228 struct block *block)
d16005f8 1229{
98fa08a5 1230 struct lightrec_state *state = cstate->state;
a59e5536 1231 struct lightrec_branch_target *target;
d16005f8 1232 bool op_list_freed = false, fully_tagged = false;
a59e5536 1233 struct block *block2;
d16005f8 1234 struct opcode *elm;
a59e5536 1235 jit_state_t *_jit, *oldjit;
d16005f8
PC
1236 jit_node_t *start_of_block;
1237 bool skip_next = false;
02487de7 1238 void *old_fn;
d16005f8 1239 unsigned int i, j;
98fa08a5 1240 u32 offset;
d16005f8
PC
1241
1242 fully_tagged = lightrec_block_is_fully_tagged(block);
1243 if (fully_tagged)
1244 block->flags |= BLOCK_FULLY_TAGGED;
1245
1246 _jit = jit_new_state();
1247 if (!_jit)
1248 return -ENOMEM;
1249
a59e5536 1250 oldjit = block->_jit;
02487de7 1251 old_fn = block->function;
d16005f8
PC
1252 block->_jit = _jit;
1253
98fa08a5
PC
1254 lightrec_regcache_reset(cstate->reg_cache);
1255 cstate->cycles = 0;
1256 cstate->nb_branches = 0;
1257 cstate->nb_local_branches = 0;
1258 cstate->nb_targets = 0;
d16005f8
PC
1259
1260 jit_prolog();
1261 jit_tramp(256);
1262
1263 start_of_block = jit_label();
1264
98fa08a5
PC
1265 for (i = 0; i < block->nb_ops; i++) {
1266 elm = &block->opcode_list[i];
d16005f8
PC
1267
1268 if (skip_next) {
1269 skip_next = false;
1270 continue;
1271 }
1272
98fa08a5 1273 cstate->cycles += lightrec_cycles_of_opcode(elm->c);
d16005f8 1274
98fa08a5 1275 if (should_emulate(elm)) {
d16005f8 1276 pr_debug("Branch at offset 0x%x will be emulated\n",
98fa08a5
PC
1277 i << 2);
1278
1279 lightrec_emit_eob(cstate, block, i, false);
d16005f8 1280 skip_next = !(elm->flags & LIGHTREC_NO_DS);
98fa08a5
PC
1281 } else {
1282 lightrec_rec_opcode(cstate, block, i);
d16005f8
PC
1283 skip_next = has_delay_slot(elm->c) &&
1284 !(elm->flags & LIGHTREC_NO_DS);
1285#if _WIN32
1286 /* FIXME: GNU Lightning on Windows seems to use our
1287 * mapped registers as temporaries. Until the actual bug
1288 * is found and fixed, unconditionally mark our
1289 * registers as live here. */
98fa08a5 1290 lightrec_regcache_mark_live(cstate->reg_cache, _jit);
d16005f8
PC
1291#endif
1292 }
1293 }
1294
98fa08a5
PC
1295 for (i = 0; i < cstate->nb_branches; i++)
1296 jit_patch(cstate->branches[i]);
d16005f8 1297
98fa08a5
PC
1298 for (i = 0; i < cstate->nb_local_branches; i++) {
1299 struct lightrec_branch *branch = &cstate->local_branches[i];
d16005f8
PC
1300
1301 pr_debug("Patch local branch to offset 0x%x\n",
1302 branch->target << 2);
1303
1304 if (branch->target == 0) {
1305 jit_patch_at(branch->branch, start_of_block);
1306 continue;
1307 }
1308
98fa08a5
PC
1309 for (j = 0; j < cstate->nb_targets; j++) {
1310 if (cstate->targets[j].offset == branch->target) {
d16005f8 1311 jit_patch_at(branch->branch,
98fa08a5 1312 cstate->targets[j].label);
d16005f8
PC
1313 break;
1314 }
1315 }
1316
98fa08a5 1317 if (j == cstate->nb_targets)
d16005f8
PC
1318 pr_err("Unable to find branch target\n");
1319 }
1320
1321 jit_ldxi(JIT_R0, LIGHTREC_REG_STATE,
1322 offsetof(struct lightrec_state, eob_wrapper_func));
1323
1324 jit_jmpr(JIT_R0);
1325
1326 jit_ret();
1327 jit_epilog();
1328
02487de7
PC
1329 block->function = lightrec_emit_code(state, _jit,
1330 &block->code_size);
1331 if (!block->function) {
1332 pr_err("Unable to compile block!\n");
1333 }
1334
a59e5536 1335 block->flags &= ~BLOCK_SHOULD_RECOMPILE;
d16005f8
PC
1336
1337 /* Add compiled function to the LUT */
02487de7 1338 lut_write(state, lut_offset(block->pc), block->function);
d16005f8 1339
98fa08a5
PC
1340 if (ENABLE_THREADED_COMPILER) {
1341 /* Since we might try to reap the same block multiple times,
1342 * we need the reaper to wait until everything has been
1343 * submitted, so that the duplicate entries can be dropped. */
1344 lightrec_reaper_pause(state->reaper);
a59e5536 1345 }
1346
1347 /* Detect old blocks that have been covered by the new one */
98fa08a5
PC
1348 for (i = 0; i < cstate->nb_targets; i++) {
1349 target = &cstate->targets[i];
a59e5536 1350
1351 if (!target->offset)
1352 continue;
1353
1354 offset = block->pc + target->offset * sizeof(u32);
1355 block2 = lightrec_find_block(state->block_cache, offset);
1356 if (block2) {
1357 /* No need to check if block2 is compilable - it must
1358 * be, otherwise block wouldn't be compilable either */
1359
98fa08a5
PC
1360 /* Set the "block dead" flag to prevent the dynarec from
1361 * recompiling this block */
a59e5536 1362 block2->flags |= BLOCK_IS_DEAD;
1363
98fa08a5
PC
1364 /* If block2 was pending for compilation, cancel it.
1365 * If it's being compiled right now, wait until it
1366 * finishes. */
1367 if (ENABLE_THREADED_COMPILER)
1368 lightrec_recompiler_remove(state->rec, block2);
fd58fa32 1369 }
98fa08a5 1370
fd58fa32
PC
1371 /* We know from now on that block2 (if present) isn't going to
1372 * be compiled. We can override the LUT entry with our new
1373 * block's entry point. */
1374 offset = lut_offset(block->pc) + target->offset;
02487de7 1375 lut_write(state, offset, jit_address(target->label));
98fa08a5 1376
fd58fa32 1377 if (block2) {
a59e5536 1378 pr_debug("Reap block 0x%08x as it's covered by block "
1379 "0x%08x\n", block2->pc, block->pc);
1380
98fa08a5 1381 /* Finally, reap the block. */
a59e5536 1382 if (ENABLE_THREADED_COMPILER) {
a59e5536 1383 lightrec_reaper_add(state->reaper,
1384 lightrec_reap_block,
1385 block2);
1386 } else {
98fa08a5
PC
1387 lightrec_unregister_block(state->block_cache, block2);
1388 lightrec_free_block(state, block2);
a59e5536 1389 }
1390 }
1391 }
1392
b19ddc79 1393 if (ENABLE_THREADED_COMPILER)
98fa08a5
PC
1394 lightrec_reaper_continue(state->reaper);
1395
d16005f8 1396 if (ENABLE_DISASSEMBLER) {
98fa08a5 1397 pr_debug("Compiling block at PC: 0x%08x\n", block->pc);
d16005f8
PC
1398 jit_disassemble();
1399 }
1400
1401 jit_clear_state();
1402
1403#if ENABLE_THREADED_COMPILER
1404 if (fully_tagged)
1405 op_list_freed = atomic_flag_test_and_set(&block->op_list_freed);
1406#endif
1407 if (fully_tagged && !op_list_freed) {
1408 pr_debug("Block PC 0x%08x is fully tagged"
1409 " - free opcode list\n", block->pc);
98fa08a5 1410 lightrec_free_opcode_list(state, block);
d16005f8
PC
1411 block->opcode_list = NULL;
1412 }
1413
a59e5536 1414 if (oldjit) {
1415 pr_debug("Block 0x%08x recompiled, reaping old jit context.\n",
1416 block->pc);
1417
02487de7 1418 if (ENABLE_THREADED_COMPILER) {
a59e5536 1419 lightrec_reaper_add(state->reaper,
1420 lightrec_reap_jit, oldjit);
02487de7
PC
1421 lightrec_reaper_add(state->reaper,
1422 lightrec_reap_function, old_fn);
1423 } else {
a59e5536 1424 _jit_destroy_state(oldjit);
02487de7
PC
1425 lightrec_free_function(state, old_fn);
1426 }
a59e5536 1427 }
1428
d16005f8
PC
1429 return 0;
1430}
1431
98fa08a5
PC
1432static void lightrec_print_info(struct lightrec_state *state)
1433{
1434 if ((state->current_cycle & ~0xfffffff) != state->old_cycle_counter) {
1435 pr_info("Lightrec RAM usage: IR %u KiB, CODE %u KiB, "
1436 "MIPS %u KiB, TOTAL %u KiB, avg. IPI %f\n",
1437 lightrec_get_mem_usage(MEM_FOR_IR) / 1024,
1438 lightrec_get_mem_usage(MEM_FOR_CODE) / 1024,
1439 lightrec_get_mem_usage(MEM_FOR_MIPS_CODE) / 1024,
1440 lightrec_get_total_mem_usage() / 1024,
1441 lightrec_get_average_ipi());
1442 state->old_cycle_counter = state->current_cycle & ~0xfffffff;
1443 }
1444}
1445
d16005f8
PC
1446u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle)
1447{
1448 s32 (*func)(void *, s32) = (void *)state->dispatcher->function;
1449 void *block_trace;
1450 s32 cycles_delta;
1451
1452 state->exit_flags = LIGHTREC_EXIT_NORMAL;
1453
1454 /* Handle the cycle counter overflowing */
1455 if (unlikely(target_cycle < state->current_cycle))
1456 target_cycle = UINT_MAX;
1457
1458 state->target_cycle = target_cycle;
98fa08a5 1459 state->next_pc = pc;
d16005f8
PC
1460
1461 block_trace = get_next_block_func(state, pc);
1462 if (block_trace) {
1463 cycles_delta = state->target_cycle - state->current_cycle;
1464
1465 cycles_delta = (*func)(block_trace, cycles_delta);
1466
1467 state->current_cycle = state->target_cycle - cycles_delta;
1468 }
1469
a59e5536 1470 if (ENABLE_THREADED_COMPILER)
1471 lightrec_reaper_reap(state->reaper);
1472
98fa08a5
PC
1473 if (LOG_LEVEL >= INFO_L)
1474 lightrec_print_info(state);
1475
d16005f8
PC
1476 return state->next_pc;
1477}
1478
1479u32 lightrec_execute_one(struct lightrec_state *state, u32 pc)
1480{
1481 return lightrec_execute(state, pc, state->current_cycle);
1482}
1483
1484u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc)
1485{
1486 struct block *block = lightrec_get_block(state, pc);
1487 if (!block)
1488 return 0;
1489
1490 state->exit_flags = LIGHTREC_EXIT_NORMAL;
1491
98fa08a5
PC
1492 pc = lightrec_emulate_block(state, block, pc);
1493
1494 if (LOG_LEVEL >= INFO_L)
1495 lightrec_print_info(state);
1496
1497 return pc;
d16005f8
PC
1498}
1499
98fa08a5 1500void lightrec_free_block(struct lightrec_state *state, struct block *block)
d16005f8
PC
1501{
1502 lightrec_unregister(MEM_FOR_MIPS_CODE, block->nb_ops * sizeof(u32));
1503 if (block->opcode_list)
98fa08a5 1504 lightrec_free_opcode_list(state, block);
d16005f8
PC
1505 if (block->_jit)
1506 _jit_destroy_state(block->_jit);
02487de7 1507 lightrec_free_function(state, block->function);
d16005f8 1508 lightrec_unregister(MEM_FOR_CODE, block->code_size);
98fa08a5
PC
1509 lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
1510}
1511
1512struct lightrec_cstate * lightrec_create_cstate(struct lightrec_state *state)
1513{
1514 struct lightrec_cstate *cstate;
1515
1516 cstate = lightrec_malloc(state, MEM_FOR_LIGHTREC, sizeof(*cstate));
1517 if (!cstate)
1518 return NULL;
1519
1520 cstate->reg_cache = lightrec_regcache_init(state);
1521 if (!cstate->reg_cache) {
1522 lightrec_free(state, MEM_FOR_LIGHTREC, sizeof(*cstate), cstate);
1523 return NULL;
1524 }
1525
1526 cstate->state = state;
1527
1528 return cstate;
1529}
1530
1531void lightrec_free_cstate(struct lightrec_cstate *cstate)
1532{
1533 lightrec_free_regcache(cstate->reg_cache);
1534 lightrec_free(cstate->state, MEM_FOR_LIGHTREC, sizeof(*cstate), cstate);
d16005f8
PC
1535}
1536
1537struct lightrec_state * lightrec_init(char *argv0,
1538 const struct lightrec_mem_map *map,
1539 size_t nb,
1540 const struct lightrec_ops *ops)
1541{
02487de7 1542 const struct lightrec_mem_map *codebuf_map;
d16005f8 1543 struct lightrec_state *state;
02487de7
PC
1544 uintptr_t addr;
1545 void *tlsf = NULL;
1546 bool with_32bit_lut = false;
1547 size_t lut_size;
d16005f8
PC
1548
1549 /* Sanity-check ops */
98fa08a5 1550 if (!ops || !ops->cop2_op || !ops->enable_ram) {
d16005f8
PC
1551 pr_err("Missing callbacks in lightrec_ops structure\n");
1552 return NULL;
1553 }
1554
02487de7
PC
1555 if (ENABLE_CODE_BUFFER && nb > PSX_MAP_CODE_BUFFER) {
1556 codebuf_map = &map[PSX_MAP_CODE_BUFFER];
1557
1558 tlsf = tlsf_create_with_pool(codebuf_map->address,
1559 codebuf_map->length);
1560 if (!tlsf) {
1561 pr_err("Unable to initialize code buffer\n");
1562 return NULL;
1563 }
1564
1565 if (__WORDSIZE == 64) {
1566 addr = (uintptr_t) codebuf_map->address + codebuf_map->length - 1;
1567 with_32bit_lut = addr == (u32) addr;
1568 }
1569 }
1570
1571 if (with_32bit_lut)
1572 lut_size = CODE_LUT_SIZE * 4;
1573 else
1574 lut_size = CODE_LUT_SIZE * sizeof(void *);
1575
d16005f8
PC
1576 init_jit(argv0);
1577
02487de7 1578 state = calloc(1, sizeof(*state) + lut_size);
d16005f8
PC
1579 if (!state)
1580 goto err_finish_jit;
1581
02487de7
PC
1582 lightrec_register(MEM_FOR_LIGHTREC, sizeof(*state) + lut_size);
1583
1584 state->tlsf = tlsf;
1585 state->with_32bit_lut = with_32bit_lut;
d16005f8
PC
1586
1587#if ENABLE_TINYMM
1588 state->tinymm = tinymm_init(malloc, free, 4096);
1589 if (!state->tinymm)
1590 goto err_free_state;
1591#endif
1592
1593 state->block_cache = lightrec_blockcache_init(state);
1594 if (!state->block_cache)
1595 goto err_free_tinymm;
1596
d16005f8
PC
1597 if (ENABLE_THREADED_COMPILER) {
1598 state->rec = lightrec_recompiler_init(state);
1599 if (!state->rec)
98fa08a5 1600 goto err_free_block_cache;
a59e5536 1601
1602 state->reaper = lightrec_reaper_init(state);
1603 if (!state->reaper)
1604 goto err_free_recompiler;
98fa08a5
PC
1605 } else {
1606 state->cstate = lightrec_create_cstate(state);
1607 if (!state->cstate)
1608 goto err_free_block_cache;
d16005f8
PC
1609 }
1610
1611 state->nb_maps = nb;
1612 state->maps = map;
1613
1614 memcpy(&state->ops, ops, sizeof(*ops));
1615
1616 state->dispatcher = generate_dispatcher(state);
1617 if (!state->dispatcher)
a59e5536 1618 goto err_free_reaper;
d16005f8 1619
98fa08a5
PC
1620 state->c_wrapper_block = generate_wrapper(state);
1621 if (!state->c_wrapper_block)
d16005f8
PC
1622 goto err_free_dispatcher;
1623
98fa08a5
PC
1624 state->c_wrappers[C_WRAPPER_RW] = lightrec_rw_cb;
1625 state->c_wrappers[C_WRAPPER_RW_GENERIC] = lightrec_rw_generic_cb;
98fa08a5 1626 state->c_wrappers[C_WRAPPER_MTC] = lightrec_mtc_cb;
22eee2ac 1627 state->c_wrappers[C_WRAPPER_CP] = lightrec_cp_cb;
98fa08a5
PC
1628 state->c_wrappers[C_WRAPPER_SYSCALL] = lightrec_syscall_cb;
1629 state->c_wrappers[C_WRAPPER_BREAK] = lightrec_break_cb;
d16005f8
PC
1630
1631 map = &state->maps[PSX_MAP_BIOS];
1632 state->offset_bios = (uintptr_t)map->address - map->pc;
1633
1634 map = &state->maps[PSX_MAP_SCRATCH_PAD];
1635 state->offset_scratch = (uintptr_t)map->address - map->pc;
1636
1637 map = &state->maps[PSX_MAP_KERNEL_USER_RAM];
1638 state->offset_ram = (uintptr_t)map->address - map->pc;
1639
1640 if (state->maps[PSX_MAP_MIRROR1].address == map->address + 0x200000 &&
1641 state->maps[PSX_MAP_MIRROR2].address == map->address + 0x400000 &&
1642 state->maps[PSX_MAP_MIRROR3].address == map->address + 0x600000)
1643 state->mirrors_mapped = true;
1644
98fa08a5
PC
1645 if (state->offset_bios == 0 &&
1646 state->offset_scratch == 0 &&
1647 state->offset_ram == 0 &&
1648 state->mirrors_mapped) {
1649 pr_info("Memory map is perfect. Emitted code will be best.\n");
1650 } else {
1651 pr_info("Memory map is sub-par. Emitted code will be slow.\n");
1652 }
1653
02487de7
PC
1654 if (state->with_32bit_lut)
1655 pr_info("Using 32-bit LUT\n");
1656
d16005f8
PC
1657 return state;
1658
d16005f8 1659err_free_dispatcher:
98fa08a5 1660 lightrec_free_block(state, state->dispatcher);
a59e5536 1661err_free_reaper:
1662 if (ENABLE_THREADED_COMPILER)
1663 lightrec_reaper_destroy(state->reaper);
d16005f8
PC
1664err_free_recompiler:
1665 if (ENABLE_THREADED_COMPILER)
1666 lightrec_free_recompiler(state->rec);
98fa08a5
PC
1667 else
1668 lightrec_free_cstate(state->cstate);
d16005f8
PC
1669err_free_block_cache:
1670 lightrec_free_block_cache(state->block_cache);
1671err_free_tinymm:
1672#if ENABLE_TINYMM
1673 tinymm_shutdown(state->tinymm);
1674err_free_state:
1675#endif
1676 lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) +
02487de7 1677 lut_elm_size(state) * CODE_LUT_SIZE);
d16005f8
PC
1678 free(state);
1679err_finish_jit:
1680 finish_jit();
02487de7
PC
1681 if (ENABLE_CODE_BUFFER && tlsf)
1682 tlsf_destroy(tlsf);
d16005f8
PC
1683 return NULL;
1684}
1685
1686void lightrec_destroy(struct lightrec_state *state)
1687{
98fa08a5
PC
1688 /* Force a print info on destroy*/
1689 state->current_cycle = ~state->current_cycle;
1690 lightrec_print_info(state);
1691
a59e5536 1692 if (ENABLE_THREADED_COMPILER) {
d16005f8 1693 lightrec_free_recompiler(state->rec);
a59e5536 1694 lightrec_reaper_destroy(state->reaper);
98fa08a5
PC
1695 } else {
1696 lightrec_free_cstate(state->cstate);
a59e5536 1697 }
d16005f8 1698
d16005f8 1699 lightrec_free_block_cache(state->block_cache);
98fa08a5
PC
1700 lightrec_free_block(state, state->dispatcher);
1701 lightrec_free_block(state, state->c_wrapper_block);
d16005f8 1702 finish_jit();
02487de7
PC
1703 if (ENABLE_CODE_BUFFER && state->tlsf)
1704 tlsf_destroy(state->tlsf);
d16005f8
PC
1705
1706#if ENABLE_TINYMM
1707 tinymm_shutdown(state->tinymm);
1708#endif
1709 lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) +
02487de7 1710 lut_elm_size(state) * CODE_LUT_SIZE);
d16005f8
PC
1711 free(state);
1712}
1713
1714void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len)
1715{
1716 u32 kaddr = kunseg(addr & ~0x3);
98fa08a5 1717 const struct lightrec_mem_map *map = lightrec_get_map(state, NULL, kaddr);
d16005f8
PC
1718
1719 if (map) {
d16005f8
PC
1720 if (map != &state->maps[PSX_MAP_KERNEL_USER_RAM])
1721 return;
1722
1723 /* Handle mirrors */
1724 kaddr &= (state->maps[PSX_MAP_KERNEL_USER_RAM].length - 1);
1725
98fa08a5 1726 lightrec_invalidate_map(state, map, kaddr, len);
d16005f8
PC
1727 }
1728}
1729
1730void lightrec_invalidate_all(struct lightrec_state *state)
1731{
02487de7 1732 memset(state->code_lut, 0, lut_elm_size(state) * CODE_LUT_SIZE);
d16005f8
PC
1733}
1734
1735void lightrec_set_invalidate_mode(struct lightrec_state *state, bool dma_only)
1736{
1737 if (state->invalidate_from_dma_only != dma_only)
1738 lightrec_invalidate_all(state);
1739
1740 state->invalidate_from_dma_only = dma_only;
1741}
1742
1743void lightrec_set_exit_flags(struct lightrec_state *state, u32 flags)
1744{
1745 if (flags != LIGHTREC_EXIT_NORMAL) {
1746 state->exit_flags |= flags;
1747 state->target_cycle = state->current_cycle;
1748 }
1749}
1750
1751u32 lightrec_exit_flags(struct lightrec_state *state)
1752{
1753 return state->exit_flags;
1754}
1755
d16005f8
PC
1756u32 lightrec_current_cycle_count(const struct lightrec_state *state)
1757{
1758 return state->current_cycle;
1759}
1760
1761void lightrec_reset_cycle_count(struct lightrec_state *state, u32 cycles)
1762{
1763 state->current_cycle = cycles;
1764
1765 if (state->target_cycle < cycles)
1766 state->target_cycle = cycles;
1767}
1768
1769void lightrec_set_target_cycle_count(struct lightrec_state *state, u32 cycles)
1770{
1771 if (state->exit_flags == LIGHTREC_EXIT_NORMAL) {
1772 if (cycles < state->current_cycle)
1773 cycles = state->current_cycle;
1774
1775 state->target_cycle = cycles;
1776 }
1777}
98fa08a5
PC
1778
1779struct lightrec_registers * lightrec_get_registers(struct lightrec_state *state)
1780{
1781 return &state->regs;
1782}