Merge pull request #742 from pcercuei/update-lightrec-20230804
[pcsx_rearmed.git] / deps / lightrec / emitter.c
1 // SPDX-License-Identifier: LGPL-2.1-or-later
2 /*
3  * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
4  */
5
6 #include "blockcache.h"
7 #include "debug.h"
8 #include "disassembler.h"
9 #include "emitter.h"
10 #include "lightning-wrapper.h"
11 #include "optimizer.h"
12 #include "regcache.h"
13
14 #include <stdbool.h>
15 #include <stddef.h>
16
17 typedef void (*lightrec_rec_func_t)(struct lightrec_cstate *, const struct block *, u16);
18
19 /* Forward declarations */
20 static void rec_SPECIAL(struct lightrec_cstate *state, const struct block *block, u16 offset);
21 static void rec_REGIMM(struct lightrec_cstate *state, const struct block *block, u16 offset);
22 static void rec_CP0(struct lightrec_cstate *state, const struct block *block, u16 offset);
23 static void rec_CP2(struct lightrec_cstate *state, const struct block *block, u16 offset);
24 static void rec_META(struct lightrec_cstate *state, const struct block *block, u16 offset);
25 static void rec_cp2_do_mtc2(struct lightrec_cstate *state,
26                             const struct block *block, u16 offset, u8 reg, u8 in_reg);
27 static void rec_cp2_do_mfc2(struct lightrec_cstate *state,
28                             const struct block *block, u16 offset,
29                             u8 reg, u8 out_reg);
30
31 static void unknown_opcode(struct lightrec_cstate *state, const struct block *block, u16 offset)
32 {
33         pr_warn("Unknown opcode: 0x%08x at PC 0x%08x\n",
34                 block->opcode_list[offset].c.opcode,
35                 block->pc + (offset << 2));
36 }
37
38 static void
39 lightrec_jump_to_fn(jit_state_t *_jit, void (*fn)(void))
40 {
41         /* Prevent jit_jmpi() from using our cycles register as a temporary */
42         jit_live(LIGHTREC_REG_CYCLE);
43
44         jit_patch_abs(jit_jmpi(), fn);
45 }
46
47 static void
48 lightrec_jump_to_eob(struct lightrec_cstate *state, jit_state_t *_jit)
49 {
50         lightrec_jump_to_fn(_jit, state->state->eob_wrapper_func);
51 }
52
53 static void
54 lightrec_jump_to_ds_check(struct lightrec_cstate *state, jit_state_t *_jit)
55 {
56         lightrec_jump_to_fn(_jit, state->state->ds_check_func);
57 }
58
59 static void update_ra_register(struct regcache *reg_cache, jit_state_t *_jit,
60                                u8 ra_reg, u32 pc, u32 link)
61 {
62         u8 link_reg;
63
64         link_reg = lightrec_alloc_reg_out(reg_cache, _jit, ra_reg, 0);
65         lightrec_load_imm(reg_cache, _jit, link_reg, pc, link);
66         lightrec_free_reg(reg_cache, link_reg);
67 }
68
69 static void lightrec_emit_end_of_block(struct lightrec_cstate *state,
70                                        const struct block *block, u16 offset,
71                                        s8 reg_new_pc, u32 imm, u8 ra_reg,
72                                        u32 link, bool update_cycles)
73 {
74         struct regcache *reg_cache = state->reg_cache;
75         jit_state_t *_jit = block->_jit;
76         const struct opcode *op = &block->opcode_list[offset],
77                             *ds = get_delay_slot(block->opcode_list, offset);
78         u32 cycles = state->cycles + lightrec_cycles_of_opcode(op->c);
79
80         jit_note(__FILE__, __LINE__);
81
82         if (link && ra_reg != reg_new_pc)
83                 update_ra_register(reg_cache, _jit, ra_reg, block->pc, link);
84
85         if (reg_new_pc < 0)
86                 lightrec_load_next_pc_imm(reg_cache, _jit, block->pc, imm);
87         else
88                 lightrec_load_next_pc(reg_cache, _jit, reg_new_pc);
89
90         if (link && ra_reg == reg_new_pc) {
91                 /* Handle the special case: JALR $r0, $r0
92                  * In that case the target PC should be the old value of the
93                  * register. */
94                 update_ra_register(reg_cache, _jit, ra_reg, block->pc, link);
95         }
96
97         if (has_delay_slot(op->c) &&
98             !op_flag_no_ds(op->flags) && !op_flag_local_branch(op->flags)) {
99                 cycles += lightrec_cycles_of_opcode(ds->c);
100
101                 /* Recompile the delay slot */
102                 if (ds->c.opcode)
103                         lightrec_rec_opcode(state, block, offset + 1);
104         }
105
106         /* Clean the remaining registers */
107         lightrec_clean_regs(reg_cache, _jit);
108
109         if (cycles && update_cycles) {
110                 jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles);
111                 pr_debug("EOB: %u cycles\n", cycles);
112         }
113
114         if (op_flag_load_delay(ds->flags)
115             && opcode_is_load(ds->c) && !state->no_load_delay) {
116                 /* If the delay slot is a load opcode, its target register
117                  * will be written after the first opcode of the target is
118                  * executed. Handle this by jumping to a special section of
119                  * the dispatcher. It expects the loaded value to be in
120                  * REG_TEMP, and the target register number to be in JIT_V1.*/
121                 jit_movi(JIT_V1, ds->c.i.rt);
122
123                 lightrec_jump_to_ds_check(state, _jit);
124         } else {
125                 lightrec_jump_to_eob(state, _jit);
126         }
127
128         lightrec_regcache_reset(reg_cache);
129 }
130
131 void lightrec_emit_jump_to_interpreter(struct lightrec_cstate *state,
132                                        const struct block *block, u16 offset)
133 {
134         struct regcache *reg_cache = state->reg_cache;
135         jit_state_t *_jit = block->_jit;
136
137         lightrec_clean_regs(reg_cache, _jit);
138
139         /* Call the interpreter with the block's address in JIT_V1 and the
140          * PC (which might have an offset) in JIT_V0. */
141         lightrec_load_imm(reg_cache, _jit, JIT_V0, block->pc,
142                           block->pc + (offset << 2));
143         if (lightrec_store_next_pc()) {
144               jit_stxi_i(offsetof(struct lightrec_state, next_pc),
145                          LIGHTREC_REG_STATE, JIT_V0);
146         }
147
148         jit_movi(JIT_V1, (uintptr_t)block);
149
150         jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles);
151         lightrec_jump_to_fn(_jit, state->state->interpreter_func);
152 }
153
154 static void lightrec_emit_eob(struct lightrec_cstate *state,
155                               const struct block *block, u16 offset)
156 {
157         struct regcache *reg_cache = state->reg_cache;
158         jit_state_t *_jit = block->_jit;
159
160         lightrec_clean_regs(reg_cache, _jit);
161
162         lightrec_load_imm(reg_cache, _jit, JIT_V0, block->pc,
163                           block->pc + (offset << 2));
164         if (lightrec_store_next_pc()) {
165               jit_stxi_i(offsetof(struct lightrec_state, next_pc),
166                          LIGHTREC_REG_STATE, JIT_V0);
167         }
168
169         jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles);
170
171         lightrec_jump_to_eob(state, _jit);
172 }
173
174 static void rec_special_JR(struct lightrec_cstate *state, const struct block *block, u16 offset)
175 {
176         union code c = block->opcode_list[offset].c;
177
178         _jit_name(block->_jit, __func__);
179         lightrec_emit_end_of_block(state, block, offset, c.r.rs, 0, 31, 0, true);
180 }
181
182 static void rec_special_JALR(struct lightrec_cstate *state, const struct block *block, u16 offset)
183 {
184         union code c = block->opcode_list[offset].c;
185
186         _jit_name(block->_jit, __func__);
187         lightrec_emit_end_of_block(state, block, offset, c.r.rs, 0, c.r.rd,
188                                    get_branch_pc(block, offset, 2), true);
189 }
190
191 static void rec_J(struct lightrec_cstate *state, const struct block *block, u16 offset)
192 {
193         union code c = block->opcode_list[offset].c;
194
195         _jit_name(block->_jit, __func__);
196         lightrec_emit_end_of_block(state, block, offset, -1,
197                                    (block->pc & 0xf0000000) | (c.j.imm << 2),
198                                    31, 0, true);
199 }
200
201 static void rec_JAL(struct lightrec_cstate *state, const struct block *block, u16 offset)
202 {
203         union code c = block->opcode_list[offset].c;
204
205         _jit_name(block->_jit, __func__);
206         lightrec_emit_end_of_block(state, block, offset, -1,
207                                    (block->pc & 0xf0000000) | (c.j.imm << 2),
208                                    31, get_branch_pc(block, offset, 2), true);
209 }
210
211 static void lightrec_do_early_unload(struct lightrec_cstate *state,
212                                      const struct block *block, u16 offset)
213 {
214         struct regcache *reg_cache = state->reg_cache;
215         const struct opcode *op = &block->opcode_list[offset];
216         jit_state_t *_jit = block->_jit;
217         unsigned int i;
218         u8 reg;
219         struct {
220                 u8 reg, op;
221         } reg_ops[3] = {
222                 { op->r.rd, LIGHTREC_FLAGS_GET_RD(op->flags), },
223                 { op->i.rt, LIGHTREC_FLAGS_GET_RT(op->flags), },
224                 { op->i.rs, LIGHTREC_FLAGS_GET_RS(op->flags), },
225         };
226
227         for (i = 0; i < ARRAY_SIZE(reg_ops); i++) {
228                 reg = reg_ops[i].reg;
229
230                 switch (reg_ops[i].op) {
231                 case LIGHTREC_REG_UNLOAD:
232                         lightrec_clean_reg_if_loaded(reg_cache, _jit, reg, true);
233                         break;
234
235                 case LIGHTREC_REG_DISCARD:
236                         lightrec_discard_reg_if_loaded(reg_cache, reg);
237                         break;
238
239                 case LIGHTREC_REG_CLEAN:
240                         lightrec_clean_reg_if_loaded(reg_cache, _jit, reg, false);
241                         break;
242                 default:
243                         break;
244                 };
245         }
246 }
247
248 static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 offset,
249                   jit_code_t code, jit_code_t code2, u32 link, bool unconditional, bool bz)
250 {
251         struct regcache *reg_cache = state->reg_cache;
252         struct native_register *regs_backup;
253         jit_state_t *_jit = block->_jit;
254         struct lightrec_branch *branch;
255         const struct opcode *op = &block->opcode_list[offset],
256                             *ds = get_delay_slot(block->opcode_list, offset);
257         jit_node_t *addr;
258         bool is_forward = (s16)op->i.imm >= 0;
259         int op_cycles = lightrec_cycles_of_opcode(op->c);
260         u32 target_offset, cycles = state->cycles + op_cycles;
261         bool no_indirection = false;
262         u32 next_pc;
263         u8 rs, rt;
264
265         jit_note(__FILE__, __LINE__);
266
267         if (!op_flag_no_ds(op->flags))
268                 cycles += lightrec_cycles_of_opcode(ds->c);
269
270         state->cycles = -op_cycles;
271
272         if (!unconditional) {
273                 rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs, REG_EXT);
274                 rt = bz ? 0 : lightrec_alloc_reg_in(reg_cache,
275                                                     _jit, op->i.rt, REG_EXT);
276
277                 /* Unload dead registers before evaluating the branch */
278                 if (OPT_EARLY_UNLOAD)
279                         lightrec_do_early_unload(state, block, offset);
280
281                 if (op_flag_local_branch(op->flags) &&
282                     (op_flag_no_ds(op->flags) || !ds->opcode) &&
283                     is_forward && !lightrec_has_dirty_regs(reg_cache))
284                         no_indirection = true;
285
286                 if (no_indirection)
287                         pr_debug("Using no indirection for branch at offset 0x%hx\n", offset << 2);
288         }
289
290         if (cycles)
291                 jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles);
292
293         if (!unconditional) {
294                 /* Generate the branch opcode */
295                 if (!no_indirection)
296                         addr = jit_new_node_pww(code, NULL, rs, rt);
297
298                 lightrec_free_regs(reg_cache);
299                 regs_backup = lightrec_regcache_enter_branch(reg_cache);
300         }
301
302         if (op_flag_local_branch(op->flags)) {
303                 /* Recompile the delay slot */
304                 if (!op_flag_no_ds(op->flags) && ds->opcode) {
305                         /* Never handle load delays with local branches. */
306                         state->no_load_delay = true;
307                         lightrec_rec_opcode(state, block, offset + 1);
308                 }
309
310                 if (link)
311                         update_ra_register(reg_cache, _jit, 31, block->pc, link);
312
313                 /* Clean remaining registers */
314                 lightrec_clean_regs(reg_cache, _jit);
315
316                 target_offset = offset + 1 + (s16)op->i.imm
317                         - !!op_flag_no_ds(op->flags);
318                 pr_debug("Adding local branch to offset 0x%x\n",
319                          target_offset << 2);
320                 branch = &state->local_branches[
321                         state->nb_local_branches++];
322
323                 branch->target = target_offset;
324
325                 if (no_indirection)
326                         branch->branch = jit_new_node_pww(code2, NULL, rs, rt);
327                 else if (is_forward)
328                         branch->branch = jit_b();
329                 else
330                         branch->branch = jit_bgti(LIGHTREC_REG_CYCLE, 0);
331         }
332
333         if (!op_flag_local_branch(op->flags) || !is_forward) {
334                 next_pc = get_branch_pc(block, offset, 1 + (s16)op->i.imm);
335                 state->no_load_delay = op_flag_local_branch(op->flags);
336                 lightrec_emit_end_of_block(state, block, offset, -1, next_pc,
337                                            31, link, false);
338         }
339
340         if (!unconditional) {
341                 if (!no_indirection)
342                         jit_patch(addr);
343
344                 lightrec_regcache_leave_branch(reg_cache, regs_backup);
345
346                 if (bz && link)
347                         update_ra_register(reg_cache, _jit, 31, block->pc, link);
348
349                 if (!op_flag_no_ds(op->flags) && ds->opcode) {
350                         state->no_load_delay = true;
351                         lightrec_rec_opcode(state, block, offset + 1);
352                 }
353         }
354 }
355
356 static void rec_BNE(struct lightrec_cstate *state,
357                     const struct block *block, u16 offset)
358 {
359         union code c = block->opcode_list[offset].c;
360
361         _jit_name(block->_jit, __func__);
362
363         if (c.i.rt == 0)
364                 rec_b(state, block, offset, jit_code_beqi, jit_code_bnei, 0, false, true);
365         else
366                 rec_b(state, block, offset, jit_code_beqr, jit_code_bner, 0, false, false);
367 }
368
369 static void rec_BEQ(struct lightrec_cstate *state,
370                     const struct block *block, u16 offset)
371 {
372         union code c = block->opcode_list[offset].c;
373
374         _jit_name(block->_jit, __func__);
375
376         if (c.i.rt == 0)
377                 rec_b(state, block, offset, jit_code_bnei, jit_code_beqi, 0, c.i.rs == 0, true);
378         else
379                 rec_b(state, block, offset, jit_code_bner, jit_code_beqr, 0, c.i.rs == c.i.rt, false);
380 }
381
382 static void rec_BLEZ(struct lightrec_cstate *state,
383                      const struct block *block, u16 offset)
384 {
385         union code c = block->opcode_list[offset].c;
386
387         _jit_name(block->_jit, __func__);
388         rec_b(state, block, offset, jit_code_bgti, jit_code_blei, 0, c.i.rs == 0, true);
389 }
390
391 static void rec_BGTZ(struct lightrec_cstate *state,
392                      const struct block *block, u16 offset)
393 {
394         _jit_name(block->_jit, __func__);
395         rec_b(state, block, offset, jit_code_blei, jit_code_bgti, 0, false, true);
396 }
397
398 static void rec_regimm_BLTZ(struct lightrec_cstate *state,
399                             const struct block *block, u16 offset)
400 {
401         _jit_name(block->_jit, __func__);
402         rec_b(state, block, offset, jit_code_bgei, jit_code_blti, 0, false, true);
403 }
404
405 static void rec_regimm_BLTZAL(struct lightrec_cstate *state,
406                               const struct block *block, u16 offset)
407 {
408         _jit_name(block->_jit, __func__);
409         rec_b(state, block, offset, jit_code_bgei, jit_code_blti,
410               get_branch_pc(block, offset, 2), false, true);
411 }
412
413 static void rec_regimm_BGEZ(struct lightrec_cstate *state,
414                             const struct block *block, u16 offset)
415 {
416         union code c = block->opcode_list[offset].c;
417
418         _jit_name(block->_jit, __func__);
419         rec_b(state, block, offset, jit_code_blti, jit_code_bgei, 0, !c.i.rs, true);
420 }
421
422 static void rec_regimm_BGEZAL(struct lightrec_cstate *state,
423                               const struct block *block, u16 offset)
424 {
425         const struct opcode *op = &block->opcode_list[offset];
426         _jit_name(block->_jit, __func__);
427         rec_b(state, block, offset, jit_code_blti, jit_code_bgei,
428               get_branch_pc(block, offset, 2),
429               !op->i.rs, true);
430 }
431
432 static void rec_alu_imm(struct lightrec_cstate *state, const struct block *block,
433                         u16 offset, jit_code_t code, bool slti)
434 {
435         struct regcache *reg_cache = state->reg_cache;
436         union code c = block->opcode_list[offset].c;
437         jit_state_t *_jit = block->_jit;
438         u8 rs, rt, out_flags = REG_EXT;
439
440         if (slti)
441                 out_flags |= REG_ZEXT;
442
443         jit_note(__FILE__, __LINE__);
444         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, REG_EXT);
445         rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, out_flags);
446
447         jit_new_node_www(code, rt, rs, (s32)(s16) c.i.imm);
448
449         lightrec_free_reg(reg_cache, rs);
450         lightrec_free_reg(reg_cache, rt);
451 }
452
453 static void rec_alu_special(struct lightrec_cstate *state, const struct block *block,
454                             u16 offset, jit_code_t code, bool out_ext)
455 {
456         struct regcache *reg_cache = state->reg_cache;
457         union code c = block->opcode_list[offset].c;
458         jit_state_t *_jit = block->_jit;
459         u8 rd, rt, rs;
460
461         jit_note(__FILE__, __LINE__);
462         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, REG_EXT);
463         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, REG_EXT);
464         rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd,
465                                     out_ext ? REG_EXT | REG_ZEXT : 0);
466
467         jit_new_node_www(code, rd, rs, rt);
468
469         lightrec_free_reg(reg_cache, rs);
470         lightrec_free_reg(reg_cache, rt);
471         lightrec_free_reg(reg_cache, rd);
472 }
473
474 static void rec_alu_shiftv(struct lightrec_cstate *state, const struct block *block,
475                            u16 offset, jit_code_t code)
476 {
477         struct regcache *reg_cache = state->reg_cache;
478         union code c = block->opcode_list[offset].c;
479         jit_state_t *_jit = block->_jit;
480         u8 rd, rt, rs, temp, flags = 0;
481
482         jit_note(__FILE__, __LINE__);
483         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
484
485         if (code == jit_code_rshr)
486                 flags = REG_EXT;
487         else if (code == jit_code_rshr_u)
488                 flags = REG_ZEXT;
489
490         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, flags);
491         rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, flags);
492
493         if (rs != rd && rt != rd) {
494                 jit_andi(rd, rs, 0x1f);
495                 jit_new_node_www(code, rd, rt, rd);
496         } else {
497                 temp = lightrec_alloc_reg_temp(reg_cache, _jit);
498                 jit_andi(temp, rs, 0x1f);
499                 jit_new_node_www(code, rd, rt, temp);
500                 lightrec_free_reg(reg_cache, temp);
501         }
502
503         lightrec_free_reg(reg_cache, rs);
504         lightrec_free_reg(reg_cache, rt);
505         lightrec_free_reg(reg_cache, rd);
506 }
507
508 static void rec_movi(struct lightrec_cstate *state,
509                      const struct block *block, u16 offset)
510 {
511         struct regcache *reg_cache = state->reg_cache;
512         union code c = block->opcode_list[offset].c;
513         jit_state_t *_jit = block->_jit;
514         u16 flags = REG_EXT;
515         u8 rt;
516
517         if (!(c.i.imm & 0x8000))
518                 flags |= REG_ZEXT;
519
520         rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags);
521
522         jit_movi(rt, (s32)(s16) c.i.imm);
523
524         lightrec_free_reg(reg_cache, rt);
525 }
526
527 static void rec_ADDIU(struct lightrec_cstate *state,
528                       const struct block *block, u16 offset)
529 {
530         _jit_name(block->_jit, __func__);
531
532         if (block->opcode_list[offset].c.i.rs)
533                 rec_alu_imm(state, block, offset, jit_code_addi, false);
534         else
535                 rec_movi(state, block, offset);
536 }
537
538 static void rec_ADDI(struct lightrec_cstate *state,
539                      const struct block *block, u16 offset)
540 {
541         /* TODO: Handle the exception? */
542         _jit_name(block->_jit, __func__);
543         rec_ADDIU(state, block, offset);
544 }
545
546 static void rec_SLTIU(struct lightrec_cstate *state,
547                       const struct block *block, u16 offset)
548 {
549         _jit_name(block->_jit, __func__);
550         rec_alu_imm(state, block, offset, jit_code_lti_u, true);
551 }
552
553 static void rec_SLTI(struct lightrec_cstate *state,
554                      const struct block *block, u16 offset)
555 {
556         _jit_name(block->_jit, __func__);
557         rec_alu_imm(state, block, offset, jit_code_lti, true);
558 }
559
560 static void rec_ANDI(struct lightrec_cstate *state,
561                      const struct block *block, u16 offset)
562 {
563         struct regcache *reg_cache = state->reg_cache;
564         union code c = block->opcode_list[offset].c;
565         jit_state_t *_jit = block->_jit;
566         u8 rs, rt;
567
568         _jit_name(block->_jit, __func__);
569         jit_note(__FILE__, __LINE__);
570         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
571         rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt,
572                                     REG_EXT | REG_ZEXT);
573
574         /* PSX code uses ANDI 0xff / ANDI 0xffff a lot, which are basically
575          * casts to uint8_t / uint16_t. */
576         if (c.i.imm == 0xff)
577                 jit_extr_uc(rt, rs);
578         else if (c.i.imm == 0xffff)
579                 jit_extr_us(rt, rs);
580         else
581                 jit_andi(rt, rs, (u32)(u16) c.i.imm);
582
583         lightrec_free_reg(reg_cache, rs);
584         lightrec_free_reg(reg_cache, rt);
585 }
586
587 static void rec_alu_or_xor(struct lightrec_cstate *state, const struct block *block,
588                            u16 offset, jit_code_t code)
589 {
590         struct regcache *reg_cache = state->reg_cache;
591         union code c = block->opcode_list[offset].c;
592         jit_state_t *_jit = block->_jit;
593         u8 rs, rt, flags;
594
595         jit_note(__FILE__, __LINE__);
596         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
597         rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, 0);
598
599         flags = lightrec_get_reg_in_flags(reg_cache, rs);
600         lightrec_set_reg_out_flags(reg_cache, rt, flags);
601
602         jit_new_node_www(code, rt, rs, (u32)(u16) c.i.imm);
603
604         lightrec_free_reg(reg_cache, rs);
605         lightrec_free_reg(reg_cache, rt);
606 }
607
608
609 static void rec_ORI(struct lightrec_cstate *state,
610                     const struct block *block, u16 offset)
611 {
612         _jit_name(block->_jit, __func__);
613         rec_alu_or_xor(state, block, offset, jit_code_ori);
614 }
615
616 static void rec_XORI(struct lightrec_cstate *state,
617                      const struct block *block, u16 offset)
618 {
619         _jit_name(block->_jit, __func__);
620         rec_alu_or_xor(state, block, offset, jit_code_xori);
621 }
622
623 static void rec_LUI(struct lightrec_cstate *state,
624                     const struct block *block, u16 offset)
625 {
626         struct regcache *reg_cache = state->reg_cache;
627         union code c = block->opcode_list[offset].c;
628         jit_state_t *_jit = block->_jit;
629         u8 rt, flags = REG_EXT;
630
631         jit_name(__func__);
632         jit_note(__FILE__, __LINE__);
633
634         if (!(c.i.imm & BIT(15)))
635                 flags |= REG_ZEXT;
636
637         rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags);
638
639         jit_movi(rt, (s32)(c.i.imm << 16));
640
641         lightrec_free_reg(reg_cache, rt);
642 }
643
644 static void rec_special_ADDU(struct lightrec_cstate *state,
645                              const struct block *block, u16 offset)
646 {
647         _jit_name(block->_jit, __func__);
648         rec_alu_special(state, block, offset, jit_code_addr, false);
649 }
650
651 static void rec_special_ADD(struct lightrec_cstate *state,
652                             const struct block *block, u16 offset)
653 {
654         /* TODO: Handle the exception? */
655         _jit_name(block->_jit, __func__);
656         rec_alu_special(state, block, offset, jit_code_addr, false);
657 }
658
659 static void rec_special_SUBU(struct lightrec_cstate *state,
660                              const struct block *block, u16 offset)
661 {
662         _jit_name(block->_jit, __func__);
663         rec_alu_special(state, block, offset, jit_code_subr, false);
664 }
665
666 static void rec_special_SUB(struct lightrec_cstate *state,
667                             const struct block *block, u16 offset)
668 {
669         /* TODO: Handle the exception? */
670         _jit_name(block->_jit, __func__);
671         rec_alu_special(state, block, offset, jit_code_subr, false);
672 }
673
674 static void rec_special_AND(struct lightrec_cstate *state,
675                             const struct block *block, u16 offset)
676 {
677         struct regcache *reg_cache = state->reg_cache;
678         union code c = block->opcode_list[offset].c;
679         jit_state_t *_jit = block->_jit;
680         u8 rd, rt, rs, flags_rs, flags_rt, flags_rd;
681
682         _jit_name(block->_jit, __func__);
683         jit_note(__FILE__, __LINE__);
684         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
685         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0);
686         rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, 0);
687
688         flags_rs = lightrec_get_reg_in_flags(reg_cache, rs);
689         flags_rt = lightrec_get_reg_in_flags(reg_cache, rt);
690
691         /* Z(rd) = Z(rs) | Z(rt) */
692         flags_rd = REG_ZEXT & (flags_rs | flags_rt);
693
694         /* E(rd) = (E(rt) & Z(rt)) | (E(rs) & Z(rs)) | (E(rs) & E(rt)) */
695         if (((flags_rs & REG_EXT) && (flags_rt & REG_ZEXT)) ||
696             ((flags_rt & REG_EXT) && (flags_rs & REG_ZEXT)) ||
697             (REG_EXT & flags_rs & flags_rt))
698                 flags_rd |= REG_EXT;
699
700         lightrec_set_reg_out_flags(reg_cache, rd, flags_rd);
701
702         jit_andr(rd, rs, rt);
703
704         lightrec_free_reg(reg_cache, rs);
705         lightrec_free_reg(reg_cache, rt);
706         lightrec_free_reg(reg_cache, rd);
707 }
708
709 static void rec_special_or_nor(struct lightrec_cstate *state,
710                                const struct block *block, u16 offset, bool nor)
711 {
712         struct regcache *reg_cache = state->reg_cache;
713         union code c = block->opcode_list[offset].c;
714         jit_state_t *_jit = block->_jit;
715         u8 rd, rt, rs, flags_rs, flags_rt, flags_rd = 0;
716
717         jit_note(__FILE__, __LINE__);
718         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
719         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0);
720         rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, 0);
721
722         flags_rs = lightrec_get_reg_in_flags(reg_cache, rs);
723         flags_rt = lightrec_get_reg_in_flags(reg_cache, rt);
724
725         /* or: Z(rd) = Z(rs) & Z(rt)
726          * nor: Z(rd) = 0 */
727         if (!nor)
728                 flags_rd = REG_ZEXT & flags_rs & flags_rt;
729
730         /* E(rd) = E(rs) & E(rt) */
731         if (REG_EXT & flags_rs & flags_rt)
732                 flags_rd |= REG_EXT;
733
734         lightrec_set_reg_out_flags(reg_cache, rd, flags_rd);
735
736         jit_orr(rd, rs, rt);
737
738         if (nor)
739                 jit_comr(rd, rd);
740
741         lightrec_free_reg(reg_cache, rs);
742         lightrec_free_reg(reg_cache, rt);
743         lightrec_free_reg(reg_cache, rd);
744 }
745
746 static void rec_special_OR(struct lightrec_cstate *state,
747                            const struct block *block, u16 offset)
748 {
749         _jit_name(block->_jit, __func__);
750         rec_special_or_nor(state, block, offset, false);
751 }
752
753 static void rec_special_NOR(struct lightrec_cstate *state,
754                             const struct block *block, u16 offset)
755 {
756         _jit_name(block->_jit, __func__);
757         rec_special_or_nor(state, block, offset, true);
758 }
759
760 static void rec_special_XOR(struct lightrec_cstate *state,
761                             const struct block *block, u16 offset)
762 {
763         struct regcache *reg_cache = state->reg_cache;
764         union code c = block->opcode_list[offset].c;
765         jit_state_t *_jit = block->_jit;
766         u8 rd, rt, rs, flags_rs, flags_rt, flags_rd;
767
768         _jit_name(block->_jit, __func__);
769
770         jit_note(__FILE__, __LINE__);
771         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
772         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0);
773         rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, 0);
774
775         flags_rs = lightrec_get_reg_in_flags(reg_cache, rs);
776         flags_rt = lightrec_get_reg_in_flags(reg_cache, rt);
777
778         /* Z(rd) = Z(rs) & Z(rt) */
779         flags_rd = REG_ZEXT & flags_rs & flags_rt;
780
781         /* E(rd) = E(rs) & E(rt) */
782         flags_rd |= REG_EXT & flags_rs & flags_rt;
783
784         lightrec_set_reg_out_flags(reg_cache, rd, flags_rd);
785
786         jit_xorr(rd, rs, rt);
787
788         lightrec_free_reg(reg_cache, rs);
789         lightrec_free_reg(reg_cache, rt);
790         lightrec_free_reg(reg_cache, rd);
791 }
792
793 static void rec_special_SLTU(struct lightrec_cstate *state,
794                              const struct block *block, u16 offset)
795 {
796         _jit_name(block->_jit, __func__);
797         rec_alu_special(state, block, offset, jit_code_ltr_u, true);
798 }
799
800 static void rec_special_SLT(struct lightrec_cstate *state,
801                             const struct block *block, u16 offset)
802 {
803         _jit_name(block->_jit, __func__);
804         rec_alu_special(state, block, offset, jit_code_ltr, true);
805 }
806
807 static void rec_special_SLLV(struct lightrec_cstate *state,
808                              const struct block *block, u16 offset)
809 {
810         _jit_name(block->_jit, __func__);
811         rec_alu_shiftv(state, block, offset, jit_code_lshr);
812 }
813
814 static void rec_special_SRLV(struct lightrec_cstate *state,
815                              const struct block *block, u16 offset)
816 {
817         _jit_name(block->_jit, __func__);
818         rec_alu_shiftv(state, block, offset, jit_code_rshr_u);
819 }
820
821 static void rec_special_SRAV(struct lightrec_cstate *state,
822                              const struct block *block, u16 offset)
823 {
824         _jit_name(block->_jit, __func__);
825         rec_alu_shiftv(state, block, offset, jit_code_rshr);
826 }
827
828 static void rec_alu_shift(struct lightrec_cstate *state, const struct block *block,
829                           u16 offset, jit_code_t code)
830 {
831         struct regcache *reg_cache = state->reg_cache;
832         union code c = block->opcode_list[offset].c;
833         jit_state_t *_jit = block->_jit;
834         u8 rd, rt, flags = 0;
835
836         jit_note(__FILE__, __LINE__);
837
838         if (code == jit_code_rshi)
839                 flags = REG_EXT;
840         else if (code == jit_code_rshi_u)
841                 flags = REG_ZEXT;
842
843         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, flags);
844
845         /* Input reg is zero-extended, if we SRL at least by one bit, we know
846          * the output reg will be both zero-extended and sign-extended. */
847         if (code == jit_code_rshi_u && c.r.imm)
848                 flags |= REG_EXT;
849         rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, flags);
850
851         jit_new_node_www(code, rd, rt, c.r.imm);
852
853         lightrec_free_reg(reg_cache, rt);
854         lightrec_free_reg(reg_cache, rd);
855 }
856
857 static void rec_special_SLL(struct lightrec_cstate *state,
858                             const struct block *block, u16 offset)
859 {
860         _jit_name(block->_jit, __func__);
861         rec_alu_shift(state, block, offset, jit_code_lshi);
862 }
863
864 static void rec_special_SRL(struct lightrec_cstate *state,
865                             const struct block *block, u16 offset)
866 {
867         _jit_name(block->_jit, __func__);
868         rec_alu_shift(state, block, offset, jit_code_rshi_u);
869 }
870
871 static void rec_special_SRA(struct lightrec_cstate *state,
872                             const struct block *block, u16 offset)
873 {
874         _jit_name(block->_jit, __func__);
875         rec_alu_shift(state, block, offset, jit_code_rshi);
876 }
877
878 static void rec_alu_mult(struct lightrec_cstate *state,
879                          const struct block *block, u16 offset, bool is_signed)
880 {
881         struct regcache *reg_cache = state->reg_cache;
882         union code c = block->opcode_list[offset].c;
883         u32 flags = block->opcode_list[offset].flags;
884         u8 reg_lo = get_mult_div_lo(c);
885         u8 reg_hi = get_mult_div_hi(c);
886         jit_state_t *_jit = block->_jit;
887         u8 lo, hi, rs, rt, rflags = 0;
888
889         jit_note(__FILE__, __LINE__);
890
891         if (is_signed)
892                 rflags = REG_EXT;
893         else
894                 rflags = REG_ZEXT;
895
896         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, rflags);
897         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, rflags);
898
899         if (!op_flag_no_lo(flags))
900                 lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0);
901         else if (__WORDSIZE == 32)
902                 lo = lightrec_alloc_reg_temp(reg_cache, _jit);
903
904         if (!op_flag_no_hi(flags))
905                 hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, REG_EXT);
906
907         if (__WORDSIZE == 32) {
908                 /* On 32-bit systems, do a 32*32->64 bit operation, or a 32*32->32 bit
909                  * operation if the MULT was detected a 32-bit only. */
910                 if (!op_flag_no_hi(flags)) {
911                         if (is_signed)
912                                 jit_qmulr(lo, hi, rs, rt);
913                         else
914                                 jit_qmulr_u(lo, hi, rs, rt);
915                 } else {
916                         jit_mulr(lo, rs, rt);
917                 }
918         } else {
919                 /* On 64-bit systems, do a 64*64->64 bit operation. */
920                 if (op_flag_no_lo(flags)) {
921                         jit_mulr(hi, rs, rt);
922                         jit_rshi(hi, hi, 32);
923                 } else {
924                         jit_mulr(lo, rs, rt);
925
926                         /* The 64-bit output value is in $lo, store the upper 32 bits in $hi */
927                         if (!op_flag_no_hi(flags))
928                                 jit_rshi(hi, lo, 32);
929                 }
930         }
931
932         lightrec_free_reg(reg_cache, rs);
933         lightrec_free_reg(reg_cache, rt);
934         if (!op_flag_no_lo(flags) || __WORDSIZE == 32)
935                 lightrec_free_reg(reg_cache, lo);
936         if (!op_flag_no_hi(flags))
937                 lightrec_free_reg(reg_cache, hi);
938 }
939
940 static void rec_alu_div(struct lightrec_cstate *state,
941                         const struct block *block, u16 offset, bool is_signed)
942 {
943         struct regcache *reg_cache = state->reg_cache;
944         union code c = block->opcode_list[offset].c;
945         u32 flags = block->opcode_list[offset].flags;
946         bool no_check = op_flag_no_div_check(flags);
947         u8 reg_lo = get_mult_div_lo(c);
948         u8 reg_hi = get_mult_div_hi(c);
949         jit_state_t *_jit = block->_jit;
950         jit_node_t *branch, *to_end;
951         u8 lo = 0, hi = 0, rs, rt, rflags = 0;
952
953         jit_note(__FILE__, __LINE__);
954
955         if (is_signed)
956                 rflags = REG_EXT;
957         else
958                 rflags = REG_ZEXT;
959
960         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, rflags);
961         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, rflags);
962
963         if (!op_flag_no_lo(flags))
964                 lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0);
965
966         if (!op_flag_no_hi(flags))
967                 hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, 0);
968
969         /* Jump to special handler if dividing by zero  */
970         if (!no_check)
971                 branch = jit_beqi(rt, 0);
972
973         if (op_flag_no_lo(flags)) {
974                 if (is_signed)
975                         jit_remr(hi, rs, rt);
976                 else
977                         jit_remr_u(hi, rs, rt);
978         } else if (op_flag_no_hi(flags)) {
979                 if (is_signed)
980                         jit_divr(lo, rs, rt);
981                 else
982                         jit_divr_u(lo, rs, rt);
983         } else {
984                 if (is_signed)
985                         jit_qdivr(lo, hi, rs, rt);
986                 else
987                         jit_qdivr_u(lo, hi, rs, rt);
988         }
989
990         if (!no_check) {
991                 /* Jump above the div-by-zero handler */
992                 to_end = jit_b();
993
994                 jit_patch(branch);
995
996                 if (!op_flag_no_lo(flags)) {
997                         if (is_signed) {
998                                 jit_ltr(lo, rs, rt);
999                                 jit_lshi(lo, lo, 1);
1000                                 jit_subi(lo, lo, 1);
1001                         } else {
1002                                 jit_subi(lo, rt, 1);
1003                         }
1004                 }
1005
1006                 if (!op_flag_no_hi(flags))
1007                         jit_movr(hi, rs);
1008
1009                 jit_patch(to_end);
1010         }
1011
1012         lightrec_free_reg(reg_cache, rs);
1013         lightrec_free_reg(reg_cache, rt);
1014
1015         if (!op_flag_no_lo(flags))
1016                 lightrec_free_reg(reg_cache, lo);
1017
1018         if (!op_flag_no_hi(flags))
1019                 lightrec_free_reg(reg_cache, hi);
1020 }
1021
1022 static void rec_special_MULT(struct lightrec_cstate *state,
1023                              const struct block *block, u16 offset)
1024 {
1025         _jit_name(block->_jit, __func__);
1026         rec_alu_mult(state, block, offset, true);
1027 }
1028
1029 static void rec_special_MULTU(struct lightrec_cstate *state,
1030                               const struct block *block, u16 offset)
1031 {
1032         _jit_name(block->_jit, __func__);
1033         rec_alu_mult(state, block, offset, false);
1034 }
1035
1036 static void rec_special_DIV(struct lightrec_cstate *state,
1037                             const struct block *block, u16 offset)
1038 {
1039         _jit_name(block->_jit, __func__);
1040         rec_alu_div(state, block, offset, true);
1041 }
1042
1043 static void rec_special_DIVU(struct lightrec_cstate *state,
1044                              const struct block *block, u16 offset)
1045 {
1046         _jit_name(block->_jit, __func__);
1047         rec_alu_div(state, block, offset, false);
1048 }
1049
1050 static void rec_alu_mv_lo_hi(struct lightrec_cstate *state,
1051                              const struct block *block, u8 dst, u8 src)
1052 {
1053         struct regcache *reg_cache = state->reg_cache;
1054         jit_state_t *_jit = block->_jit;
1055
1056         jit_note(__FILE__, __LINE__);
1057         src = lightrec_alloc_reg_in(reg_cache, _jit, src, 0);
1058         dst = lightrec_alloc_reg_out(reg_cache, _jit, dst, REG_EXT);
1059
1060         jit_extr_i(dst, src);
1061
1062         lightrec_free_reg(reg_cache, src);
1063         lightrec_free_reg(reg_cache, dst);
1064 }
1065
1066 static void rec_special_MFHI(struct lightrec_cstate *state,
1067                              const struct block *block, u16 offset)
1068 {
1069         union code c = block->opcode_list[offset].c;
1070
1071         _jit_name(block->_jit, __func__);
1072         rec_alu_mv_lo_hi(state, block, c.r.rd, REG_HI);
1073 }
1074
1075 static void rec_special_MTHI(struct lightrec_cstate *state,
1076                              const struct block *block, u16 offset)
1077 {
1078         union code c = block->opcode_list[offset].c;
1079
1080         _jit_name(block->_jit, __func__);
1081         rec_alu_mv_lo_hi(state, block, REG_HI, c.r.rs);
1082 }
1083
1084 static void rec_special_MFLO(struct lightrec_cstate *state,
1085                              const struct block *block, u16 offset)
1086 {
1087         union code c = block->opcode_list[offset].c;
1088
1089         _jit_name(block->_jit, __func__);
1090         rec_alu_mv_lo_hi(state, block, c.r.rd, REG_LO);
1091 }
1092
1093 static void rec_special_MTLO(struct lightrec_cstate *state,
1094                              const struct block *block, u16 offset)
1095 {
1096         union code c = block->opcode_list[offset].c;
1097
1098         _jit_name(block->_jit, __func__);
1099         rec_alu_mv_lo_hi(state, block, REG_LO, c.r.rs);
1100 }
1101
1102 static void call_to_c_wrapper(struct lightrec_cstate *state,
1103                               const struct block *block, u32 arg,
1104                               enum c_wrappers wrapper)
1105 {
1106         struct regcache *reg_cache = state->reg_cache;
1107         jit_state_t *_jit = block->_jit;
1108         s8 tmp, tmp2;
1109
1110         /* Make sure JIT_R1 is not mapped; it will be used in the C wrapper. */
1111         tmp2 = lightrec_alloc_reg(reg_cache, _jit, JIT_R1);
1112
1113         tmp = lightrec_get_reg_with_value(reg_cache,
1114                                           (intptr_t) state->state->wrappers_eps[wrapper]);
1115         if (tmp < 0) {
1116                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1117                 jit_ldxi(tmp, LIGHTREC_REG_STATE,
1118                          offsetof(struct lightrec_state, wrappers_eps[wrapper]));
1119
1120                 lightrec_temp_set_value(reg_cache, tmp,
1121                                         (intptr_t) state->state->wrappers_eps[wrapper]);
1122         }
1123
1124         lightrec_free_reg(reg_cache, tmp2);
1125
1126 #ifdef __mips__
1127         /* On MIPS, register t9 is always used as the target register for JALR.
1128          * Therefore if it does not contain the target address we must
1129          * invalidate it. */
1130         if (tmp != _T9)
1131                 lightrec_unload_reg(reg_cache, _jit, _T9);
1132 #endif
1133
1134         jit_prepare();
1135         jit_pushargi(arg);
1136
1137         lightrec_regcache_mark_live(reg_cache, _jit);
1138         jit_callr(tmp);
1139
1140         lightrec_free_reg(reg_cache, tmp);
1141         lightrec_regcache_mark_live(reg_cache, _jit);
1142 }
1143
1144 static void rec_io(struct lightrec_cstate *state,
1145                    const struct block *block, u16 offset,
1146                    bool load_rt, bool read_rt)
1147 {
1148         struct regcache *reg_cache = state->reg_cache;
1149         jit_state_t *_jit = block->_jit;
1150         union code c = block->opcode_list[offset].c;
1151         u32 flags = block->opcode_list[offset].flags;
1152         bool is_tagged = LIGHTREC_FLAGS_GET_IO_MODE(flags);
1153         u32 lut_entry;
1154         u8 zero;
1155
1156         jit_note(__FILE__, __LINE__);
1157
1158         lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rs, false);
1159
1160         if (read_rt && likely(c.i.rt))
1161                 lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, true);
1162         else if (load_rt)
1163                 lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, false);
1164
1165         if (op_flag_load_delay(flags) && !state->no_load_delay) {
1166                 /* Clear state->in_delay_slot_n. This notifies the lightrec_rw
1167                  * wrapper that it should write the REG_TEMP register instead of
1168                  * the actual output register of the opcode. */
1169                 zero = lightrec_alloc_reg_in(reg_cache, _jit, 0, 0);
1170                 jit_stxi_c(offsetof(struct lightrec_state, in_delay_slot_n),
1171                             LIGHTREC_REG_STATE, zero);
1172                 lightrec_free_reg(reg_cache, zero);
1173         }
1174
1175         if (is_tagged) {
1176                 call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_RW);
1177         } else {
1178                 lut_entry = lightrec_get_lut_entry(block);
1179                 call_to_c_wrapper(state, block, (lut_entry << 16) | offset,
1180                                   C_WRAPPER_RW_GENERIC);
1181         }
1182 }
1183
1184 static u32 rec_ram_mask(struct lightrec_state *state)
1185 {
1186         return (RAM_SIZE << (state->mirrors_mapped * 2)) - 1;
1187 }
1188
1189 static u32 rec_io_mask(const struct lightrec_state *state)
1190 {
1191         u32 length = state->maps[PSX_MAP_HW_REGISTERS].length;
1192
1193         return 0x1f800000 | GENMASK(31 - clz32(length - 1), 0);
1194 }
1195
1196 static void rec_store_memory(struct lightrec_cstate *cstate,
1197                              const struct block *block,
1198                              u16 offset, jit_code_t code,
1199                              jit_code_t swap_code,
1200                              uintptr_t addr_offset, u32 addr_mask,
1201                              bool invalidate)
1202 {
1203         const struct lightrec_state *state = cstate->state;
1204         struct regcache *reg_cache = cstate->reg_cache;
1205         struct opcode *op = &block->opcode_list[offset];
1206         jit_state_t *_jit = block->_jit;
1207         union code c = op->c;
1208         u8 rs, rt, tmp, tmp2, tmp3, addr_reg, addr_reg2;
1209         s16 imm = (s16)c.i.imm;
1210         s32 simm = (s32)imm << (1 - lut_is_32bit(state));
1211         s32 lut_offt = offsetof(struct lightrec_state, code_lut);
1212         bool no_mask = op_flag_no_mask(op->flags);
1213         bool add_imm = c.i.imm &&
1214                 ((!state->mirrors_mapped && !no_mask) || (invalidate &&
1215                 ((imm & 0x3) || simm + lut_offt != (s16)(simm + lut_offt))));
1216         bool need_tmp = !no_mask || addr_offset || add_imm || invalidate;
1217         bool swc2 = c.i.op == OP_SWC2;
1218         u8 in_reg = swc2 ? REG_TEMP : c.i.rt;
1219
1220         rt = lightrec_alloc_reg_in(reg_cache, _jit, in_reg, 0);
1221         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
1222         if (need_tmp)
1223                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1224
1225         addr_reg = rs;
1226
1227         if (add_imm) {
1228                 jit_addi(tmp, addr_reg, (s16)c.i.imm);
1229                 lightrec_free_reg(reg_cache, rs);
1230                 addr_reg = tmp;
1231                 imm = 0;
1232         } else if (simm) {
1233                 lut_offt += simm;
1234         }
1235
1236         if (!no_mask) {
1237                 jit_andi(tmp, addr_reg, addr_mask);
1238                 addr_reg = tmp;
1239         }
1240
1241         if (addr_offset) {
1242                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
1243                 jit_addi(tmp2, addr_reg, addr_offset);
1244                 addr_reg2 = tmp2;
1245         } else {
1246                 addr_reg2 = addr_reg;
1247         }
1248
1249         if (is_big_endian() && swap_code && in_reg) {
1250                 tmp3 = lightrec_alloc_reg_temp(reg_cache, _jit);
1251
1252                 jit_new_node_ww(swap_code, tmp3, rt);
1253                 jit_new_node_www(code, imm, addr_reg2, tmp3);
1254
1255                 lightrec_free_reg(reg_cache, tmp3);
1256         } else {
1257                 jit_new_node_www(code, imm, addr_reg2, rt);
1258         }
1259
1260         lightrec_free_reg(reg_cache, rt);
1261
1262         if (invalidate) {
1263                 tmp3 = lightrec_alloc_reg_in(reg_cache, _jit, 0, 0);
1264
1265                 if (c.i.op != OP_SW) {
1266                         jit_andi(tmp, addr_reg, ~3);
1267                         addr_reg = tmp;
1268                 }
1269
1270                 if (!lut_is_32bit(state)) {
1271                         jit_lshi(tmp, addr_reg, 1);
1272                         addr_reg = tmp;
1273                 }
1274
1275                 if (addr_reg == rs && c.i.rs == 0) {
1276                         addr_reg = LIGHTREC_REG_STATE;
1277                 } else {
1278                         jit_add_state(tmp, addr_reg);
1279                         addr_reg = tmp;
1280                 }
1281
1282                 if (lut_is_32bit(state))
1283                         jit_stxi_i(lut_offt, addr_reg, tmp3);
1284                 else
1285                         jit_stxi(lut_offt, addr_reg, tmp3);
1286
1287                 lightrec_free_reg(reg_cache, tmp3);
1288         }
1289
1290         if (addr_offset)
1291                 lightrec_free_reg(reg_cache, tmp2);
1292         if (need_tmp)
1293                 lightrec_free_reg(reg_cache, tmp);
1294         lightrec_free_reg(reg_cache, rs);
1295 }
1296
1297 static void rec_store_ram(struct lightrec_cstate *cstate,
1298                           const struct block *block,
1299                           u16 offset, jit_code_t code,
1300                           jit_code_t swap_code, bool invalidate)
1301 {
1302         struct lightrec_state *state = cstate->state;
1303
1304         _jit_note(block->_jit, __FILE__, __LINE__);
1305
1306         return rec_store_memory(cstate, block, offset, code, swap_code,
1307                                 state->offset_ram, rec_ram_mask(state),
1308                                 invalidate);
1309 }
1310
1311 static void rec_store_scratch(struct lightrec_cstate *cstate,
1312                               const struct block *block, u16 offset,
1313                               jit_code_t code, jit_code_t swap_code)
1314 {
1315         _jit_note(block->_jit, __FILE__, __LINE__);
1316
1317         return rec_store_memory(cstate, block, offset, code, swap_code,
1318                                 cstate->state->offset_scratch,
1319                                 0x1fffffff, false);
1320 }
1321
1322 static void rec_store_io(struct lightrec_cstate *cstate,
1323                          const struct block *block, u16 offset,
1324                          jit_code_t code, jit_code_t swap_code)
1325 {
1326         _jit_note(block->_jit, __FILE__, __LINE__);
1327
1328         return rec_store_memory(cstate, block, offset, code, swap_code,
1329                                 cstate->state->offset_io,
1330                                 rec_io_mask(cstate->state), false);
1331 }
1332
1333 static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate,
1334                                            const struct block *block,
1335                                            u16 offset, jit_code_t code,
1336                                            jit_code_t swap_code)
1337 {
1338         struct lightrec_state *state = cstate->state;
1339         struct regcache *reg_cache = cstate->reg_cache;
1340         union code c = block->opcode_list[offset].c;
1341         jit_state_t *_jit = block->_jit;
1342         jit_node_t *to_not_ram, *to_end;
1343         bool swc2 = c.i.op == OP_SWC2;
1344         bool offset_ram_or_scratch = state->offset_ram || state->offset_scratch;
1345         u8 tmp, tmp2, rs, rt, in_reg = swc2 ? REG_TEMP : c.i.rt;
1346         s16 imm;
1347
1348         jit_note(__FILE__, __LINE__);
1349         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
1350         tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1351
1352         if (offset_ram_or_scratch)
1353                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
1354
1355         /* Convert to KUNSEG and avoid RAM mirrors */
1356         if (state->mirrors_mapped) {
1357                 imm = (s16)c.i.imm;
1358                 jit_andi(tmp, rs, 0x1f800000 | (4 * RAM_SIZE - 1));
1359         } else if (c.i.imm) {
1360                 imm = 0;
1361                 jit_addi(tmp, rs, (s16)c.i.imm);
1362                 jit_andi(tmp, tmp, 0x1f800000 | (RAM_SIZE - 1));
1363         } else {
1364                 imm = 0;
1365                 jit_andi(tmp, rs, 0x1f800000 | (RAM_SIZE - 1));
1366         }
1367
1368         lightrec_free_reg(reg_cache, rs);
1369
1370         if (state->offset_ram != state->offset_scratch) {
1371                 to_not_ram = jit_bmsi(tmp, BIT(28));
1372
1373                 jit_movi(tmp2, state->offset_ram);
1374
1375                 to_end = jit_b();
1376                 jit_patch(to_not_ram);
1377
1378                 jit_movi(tmp2, state->offset_scratch);
1379                 jit_patch(to_end);
1380         } else if (state->offset_ram) {
1381                 jit_movi(tmp2, state->offset_ram);
1382         }
1383
1384         if (offset_ram_or_scratch) {
1385                 jit_addr(tmp, tmp, tmp2);
1386                 lightrec_free_reg(reg_cache, tmp2);
1387         }
1388
1389         rt = lightrec_alloc_reg_in(reg_cache, _jit, in_reg, 0);
1390
1391         if (is_big_endian() && swap_code && in_reg) {
1392                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
1393
1394                 jit_new_node_ww(swap_code, tmp2, rt);
1395                 jit_new_node_www(code, imm, tmp, tmp2);
1396
1397                 lightrec_free_reg(reg_cache, tmp2);
1398         } else {
1399                 jit_new_node_www(code, imm, tmp, rt);
1400         }
1401
1402         lightrec_free_reg(reg_cache, rt);
1403         lightrec_free_reg(reg_cache, tmp);
1404 }
1405
1406 static void rec_store_direct(struct lightrec_cstate *cstate, const struct block *block,
1407                              u16 offset, jit_code_t code, jit_code_t swap_code)
1408 {
1409         struct lightrec_state *state = cstate->state;
1410         u32 ram_size = state->mirrors_mapped ? RAM_SIZE * 4 : RAM_SIZE;
1411         struct regcache *reg_cache = cstate->reg_cache;
1412         union code c = block->opcode_list[offset].c;
1413         jit_state_t *_jit = block->_jit;
1414         jit_node_t *to_not_ram, *to_end;
1415         bool swc2 = c.i.op == OP_SWC2;
1416         u8 tmp, tmp2, tmp3, masked_reg, rs, rt;
1417         u8 in_reg = swc2 ? REG_TEMP : c.i.rt;
1418
1419         jit_note(__FILE__, __LINE__);
1420
1421         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
1422         tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
1423         tmp3 = lightrec_alloc_reg_in(reg_cache, _jit, 0, 0);
1424
1425         /* Convert to KUNSEG and avoid RAM mirrors */
1426         if (c.i.imm) {
1427                 jit_addi(tmp2, rs, (s16)c.i.imm);
1428                 jit_andi(tmp2, tmp2, 0x1f800000 | (ram_size - 1));
1429         } else {
1430                 jit_andi(tmp2, rs, 0x1f800000 | (ram_size - 1));
1431         }
1432
1433         lightrec_free_reg(reg_cache, rs);
1434         tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1435
1436         if (state->offset_ram != state->offset_scratch) {
1437                 to_not_ram = jit_bgti(tmp2, ram_size);
1438                 masked_reg = tmp2;
1439         } else {
1440                 jit_lti_u(tmp, tmp2, ram_size);
1441                 jit_movnr(tmp, tmp2, tmp);
1442                 masked_reg = tmp;
1443         }
1444
1445         /* Compute the offset to the code LUT */
1446         if (c.i.op == OP_SW)
1447                 jit_andi(tmp, masked_reg, RAM_SIZE - 1);
1448         else
1449                 jit_andi(tmp, masked_reg, (RAM_SIZE - 1) & ~3);
1450
1451         if (!lut_is_32bit(state))
1452                 jit_lshi(tmp, tmp, 1);
1453         jit_add_state(tmp, tmp);
1454
1455         /* Write NULL to the code LUT to invalidate any block that's there */
1456         if (lut_is_32bit(state))
1457                 jit_stxi_i(offsetof(struct lightrec_state, code_lut), tmp, tmp3);
1458         else
1459                 jit_stxi(offsetof(struct lightrec_state, code_lut), tmp, tmp3);
1460
1461         if (state->offset_ram != state->offset_scratch) {
1462                 jit_movi(tmp, state->offset_ram);
1463
1464                 to_end = jit_b();
1465                 jit_patch(to_not_ram);
1466         }
1467
1468         if (state->offset_ram || state->offset_scratch)
1469                 jit_movi(tmp, state->offset_scratch);
1470
1471         if (state->offset_ram != state->offset_scratch)
1472                 jit_patch(to_end);
1473
1474         if (state->offset_ram || state->offset_scratch)
1475                 jit_addr(tmp2, tmp2, tmp);
1476
1477         lightrec_free_reg(reg_cache, tmp);
1478         lightrec_free_reg(reg_cache, tmp3);
1479
1480         rt = lightrec_alloc_reg_in(reg_cache, _jit, in_reg, 0);
1481
1482         if (is_big_endian() && swap_code && in_reg) {
1483                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1484
1485                 jit_new_node_ww(swap_code, tmp, rt);
1486                 jit_new_node_www(code, 0, tmp2, tmp);
1487
1488                 lightrec_free_reg(reg_cache, tmp);
1489         } else {
1490                 jit_new_node_www(code, 0, tmp2, rt);
1491         }
1492
1493         lightrec_free_reg(reg_cache, rt);
1494         lightrec_free_reg(reg_cache, tmp2);
1495 }
1496
1497 static void rec_store(struct lightrec_cstate *state,
1498                       const struct block *block, u16 offset,
1499                       jit_code_t code, jit_code_t swap_code)
1500 {
1501         u32 flags = block->opcode_list[offset].flags;
1502         u32 mode = LIGHTREC_FLAGS_GET_IO_MODE(flags);
1503         bool no_invalidate = op_flag_no_invalidate(flags) ||
1504                 state->state->invalidate_from_dma_only;
1505         union code c = block->opcode_list[offset].c;
1506         bool is_swc2 = c.i.op == OP_SWC2;
1507
1508         if (is_swc2) {
1509                 switch (mode) {
1510                 case LIGHTREC_IO_RAM:
1511                 case LIGHTREC_IO_SCRATCH:
1512                 case LIGHTREC_IO_DIRECT:
1513                 case LIGHTREC_IO_DIRECT_HW:
1514                         rec_cp2_do_mfc2(state, block, offset, c.i.rt, REG_TEMP);
1515                         break;
1516                 default:
1517                         break;
1518                 }
1519         }
1520
1521         switch (mode) {
1522         case LIGHTREC_IO_RAM:
1523                 rec_store_ram(state, block, offset, code,
1524                               swap_code, !no_invalidate);
1525                 break;
1526         case LIGHTREC_IO_SCRATCH:
1527                 rec_store_scratch(state, block, offset, code, swap_code);
1528                 break;
1529         case LIGHTREC_IO_DIRECT:
1530                 if (no_invalidate) {
1531                         rec_store_direct_no_invalidate(state, block, offset,
1532                                                        code, swap_code);
1533                 } else {
1534                         rec_store_direct(state, block, offset, code, swap_code);
1535                 }
1536                 break;
1537         case LIGHTREC_IO_DIRECT_HW:
1538                 rec_store_io(state, block, offset, code, swap_code);
1539                 break;
1540         default:
1541                 rec_io(state, block, offset, true, false);
1542                 return;
1543         }
1544
1545         if (is_swc2)
1546                 lightrec_discard_reg_if_loaded(state->reg_cache, REG_TEMP);
1547 }
1548
1549 static void rec_SB(struct lightrec_cstate *state,
1550                    const struct block *block, u16 offset)
1551 {
1552         _jit_name(block->_jit, __func__);
1553         rec_store(state, block, offset, jit_code_stxi_c, 0);
1554 }
1555
1556 static void rec_SH(struct lightrec_cstate *state,
1557                    const struct block *block, u16 offset)
1558 {
1559         _jit_name(block->_jit, __func__);
1560         rec_store(state, block, offset,
1561                   jit_code_stxi_s, jit_code_bswapr_us);
1562 }
1563
1564 static void rec_SW(struct lightrec_cstate *state,
1565                    const struct block *block, u16 offset)
1566
1567 {
1568         union code c = block->opcode_list[offset].c;
1569
1570         _jit_name(block->_jit, c.i.op == OP_SWC2 ? "rec_SWC2" : "rec_SW");
1571         rec_store(state, block, offset,
1572                   jit_code_stxi_i, jit_code_bswapr_ui);
1573 }
1574
1575 static void rec_SWL(struct lightrec_cstate *state,
1576                     const struct block *block, u16 offset)
1577 {
1578         _jit_name(block->_jit, __func__);
1579         rec_io(state, block, offset, true, false);
1580 }
1581
1582 static void rec_SWR(struct lightrec_cstate *state,
1583                     const struct block *block, u16 offset)
1584 {
1585         _jit_name(block->_jit, __func__);
1586         rec_io(state, block, offset, true, false);
1587 }
1588
1589 static void rec_load_memory(struct lightrec_cstate *cstate,
1590                             const struct block *block, u16 offset,
1591                             jit_code_t code, jit_code_t swap_code, bool is_unsigned,
1592                             uintptr_t addr_offset, u32 addr_mask)
1593 {
1594         struct regcache *reg_cache = cstate->reg_cache;
1595         struct opcode *op = &block->opcode_list[offset];
1596         bool load_delay = op_flag_load_delay(op->flags) && !cstate->no_load_delay;
1597         jit_state_t *_jit = block->_jit;
1598         u8 rs, rt, out_reg, addr_reg, flags = REG_EXT;
1599         bool no_mask = op_flag_no_mask(op->flags);
1600         union code c = op->c;
1601         s16 imm;
1602
1603         if (load_delay || c.i.op == OP_LWC2)
1604                 out_reg = REG_TEMP;
1605         else if (c.i.rt)
1606                 out_reg = c.i.rt;
1607         else
1608                 return;
1609
1610         if (is_unsigned)
1611                 flags |= REG_ZEXT;
1612
1613         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
1614         rt = lightrec_alloc_reg_out(reg_cache, _jit, out_reg, flags);
1615
1616         if (!cstate->state->mirrors_mapped && c.i.imm && !no_mask) {
1617                 jit_addi(rt, rs, (s16)c.i.imm);
1618                 addr_reg = rt;
1619                 imm = 0;
1620         } else {
1621                 addr_reg = rs;
1622                 imm = (s16)c.i.imm;
1623         }
1624
1625         if (!no_mask) {
1626                 jit_andi(rt, addr_reg, addr_mask);
1627                 addr_reg = rt;
1628         }
1629
1630         if (addr_offset) {
1631                 jit_addi(rt, addr_reg, addr_offset);
1632                 addr_reg = rt;
1633         }
1634
1635         jit_new_node_www(code, rt, addr_reg, imm);
1636
1637         if (is_big_endian() && swap_code) {
1638                 jit_new_node_ww(swap_code, rt, rt);
1639
1640                 if (c.i.op == OP_LH)
1641                         jit_extr_s(rt, rt);
1642                 else if (c.i.op == OP_LW && __WORDSIZE == 64)
1643                         jit_extr_i(rt, rt);
1644         }
1645
1646         lightrec_free_reg(reg_cache, rs);
1647         lightrec_free_reg(reg_cache, rt);
1648 }
1649
1650 static void rec_load_ram(struct lightrec_cstate *cstate,
1651                          const struct block *block, u16 offset,
1652                          jit_code_t code, jit_code_t swap_code, bool is_unsigned)
1653 {
1654         _jit_note(block->_jit, __FILE__, __LINE__);
1655
1656         rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned,
1657                         cstate->state->offset_ram, rec_ram_mask(cstate->state));
1658 }
1659
1660 static void rec_load_bios(struct lightrec_cstate *cstate,
1661                           const struct block *block, u16 offset,
1662                           jit_code_t code, jit_code_t swap_code, bool is_unsigned)
1663 {
1664         _jit_note(block->_jit, __FILE__, __LINE__);
1665
1666         rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned,
1667                         cstate->state->offset_bios, 0x1fffffff);
1668 }
1669
1670 static void rec_load_scratch(struct lightrec_cstate *cstate,
1671                              const struct block *block, u16 offset,
1672                              jit_code_t code, jit_code_t swap_code, bool is_unsigned)
1673 {
1674         _jit_note(block->_jit, __FILE__, __LINE__);
1675
1676         rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned,
1677                         cstate->state->offset_scratch, 0x1fffffff);
1678 }
1679
1680 static void rec_load_io(struct lightrec_cstate *cstate,
1681                         const struct block *block, u16 offset,
1682                         jit_code_t code, jit_code_t swap_code, bool is_unsigned)
1683 {
1684         _jit_note(block->_jit, __FILE__, __LINE__);
1685
1686         rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned,
1687                         cstate->state->offset_io, rec_io_mask(cstate->state));
1688 }
1689
1690 static void rec_load_direct(struct lightrec_cstate *cstate,
1691                             const struct block *block, u16 offset,
1692                             jit_code_t code, jit_code_t swap_code,
1693                             bool is_unsigned)
1694 {
1695         struct lightrec_state *state = cstate->state;
1696         struct regcache *reg_cache = cstate->reg_cache;
1697         struct opcode *op = &block->opcode_list[offset];
1698         bool load_delay = op_flag_load_delay(op->flags) && !cstate->no_load_delay;
1699         jit_state_t *_jit = block->_jit;
1700         jit_node_t *to_not_ram, *to_not_bios, *to_end, *to_end2;
1701         u8 tmp, rs, rt, out_reg, addr_reg, flags = REG_EXT;
1702         union code c = op->c;
1703         s16 imm;
1704
1705         if (load_delay || c.i.op == OP_LWC2)
1706                 out_reg = REG_TEMP;
1707         else if (c.i.rt)
1708                 out_reg = c.i.rt;
1709         else
1710                 return;
1711
1712         if (is_unsigned)
1713                 flags |= REG_ZEXT;
1714
1715         jit_note(__FILE__, __LINE__);
1716         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
1717         rt = lightrec_alloc_reg_out(reg_cache, _jit, out_reg, flags);
1718
1719         if ((state->offset_ram == state->offset_bios &&
1720             state->offset_ram == state->offset_scratch &&
1721             state->mirrors_mapped) || !c.i.imm) {
1722                 addr_reg = rs;
1723                 imm = (s16)c.i.imm;
1724         } else {
1725                 jit_addi(rt, rs, (s16)c.i.imm);
1726                 addr_reg = rt;
1727                 imm = 0;
1728
1729                 if (c.i.rs != c.i.rt)
1730                         lightrec_free_reg(reg_cache, rs);
1731         }
1732
1733         tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1734
1735         if (state->offset_ram == state->offset_bios &&
1736             state->offset_ram == state->offset_scratch) {
1737                 if (!state->mirrors_mapped) {
1738                         jit_andi(tmp, addr_reg, BIT(28));
1739                         jit_rshi_u(tmp, tmp, 28 - 22);
1740                         jit_ori(tmp, tmp, 0x1f800000 | (RAM_SIZE - 1));
1741                         jit_andr(rt, addr_reg, tmp);
1742                 } else {
1743                         jit_andi(rt, addr_reg, 0x1fffffff);
1744                 }
1745
1746                 if (state->offset_ram)
1747                         jit_movi(tmp, state->offset_ram);
1748         } else {
1749                 to_not_ram = jit_bmsi(addr_reg, BIT(28));
1750
1751                 /* Convert to KUNSEG and avoid RAM mirrors */
1752                 jit_andi(rt, addr_reg, RAM_SIZE - 1);
1753
1754                 if (state->offset_ram)
1755                         jit_movi(tmp, state->offset_ram);
1756
1757                 to_end = jit_b();
1758
1759                 jit_patch(to_not_ram);
1760
1761                 if (state->offset_bios != state->offset_scratch)
1762                         to_not_bios = jit_bmci(addr_reg, BIT(22));
1763
1764                 /* Convert to KUNSEG */
1765                 jit_andi(rt, addr_reg, 0x1fc00000 | (BIOS_SIZE - 1));
1766
1767                 jit_movi(tmp, state->offset_bios);
1768
1769                 if (state->offset_bios != state->offset_scratch) {
1770                         to_end2 = jit_b();
1771
1772                         jit_patch(to_not_bios);
1773
1774                         /* Convert to KUNSEG */
1775                         jit_andi(rt, addr_reg, 0x1f800fff);
1776
1777                         if (state->offset_scratch)
1778                                 jit_movi(tmp, state->offset_scratch);
1779
1780                         jit_patch(to_end2);
1781                 }
1782
1783                 jit_patch(to_end);
1784         }
1785
1786         if (state->offset_ram || state->offset_bios || state->offset_scratch)
1787                 jit_addr(rt, rt, tmp);
1788
1789         jit_new_node_www(code, rt, rt, imm);
1790
1791         if (is_big_endian() && swap_code) {
1792                 jit_new_node_ww(swap_code, rt, rt);
1793
1794                 if (c.i.op == OP_LH)
1795                         jit_extr_s(rt, rt);
1796                 else if (c.i.op == OP_LW && __WORDSIZE == 64)
1797                         jit_extr_i(rt, rt);
1798         }
1799
1800         lightrec_free_reg(reg_cache, addr_reg);
1801         lightrec_free_reg(reg_cache, rt);
1802         lightrec_free_reg(reg_cache, tmp);
1803 }
1804
1805 static void rec_load(struct lightrec_cstate *state, const struct block *block,
1806                      u16 offset, jit_code_t code, jit_code_t swap_code,
1807                      bool is_unsigned)
1808 {
1809         const struct opcode *op = &block->opcode_list[offset];
1810         u32 flags = op->flags;
1811
1812         switch (LIGHTREC_FLAGS_GET_IO_MODE(flags)) {
1813         case LIGHTREC_IO_RAM:
1814                 rec_load_ram(state, block, offset, code, swap_code, is_unsigned);
1815                 break;
1816         case LIGHTREC_IO_BIOS:
1817                 rec_load_bios(state, block, offset, code, swap_code, is_unsigned);
1818                 break;
1819         case LIGHTREC_IO_SCRATCH:
1820                 rec_load_scratch(state, block, offset, code, swap_code, is_unsigned);
1821                 break;
1822         case LIGHTREC_IO_DIRECT_HW:
1823                 rec_load_io(state, block, offset, code, swap_code, is_unsigned);
1824                 break;
1825         case LIGHTREC_IO_DIRECT:
1826                 rec_load_direct(state, block, offset, code, swap_code, is_unsigned);
1827                 break;
1828         default:
1829                 rec_io(state, block, offset, false, true);
1830                 return;
1831         }
1832
1833         if (op->i.op == OP_LWC2) {
1834                 rec_cp2_do_mtc2(state, block, offset, op->i.rt, REG_TEMP);
1835                 lightrec_discard_reg_if_loaded(state->reg_cache, REG_TEMP);
1836         }
1837 }
1838
1839 static void rec_LB(struct lightrec_cstate *state, const struct block *block, u16 offset)
1840 {
1841         _jit_name(block->_jit, __func__);
1842         rec_load(state, block, offset, jit_code_ldxi_c, 0, false);
1843 }
1844
1845 static void rec_LBU(struct lightrec_cstate *state, const struct block *block, u16 offset)
1846 {
1847         _jit_name(block->_jit, __func__);
1848         rec_load(state, block, offset, jit_code_ldxi_uc, 0, true);
1849 }
1850
1851 static void rec_LH(struct lightrec_cstate *state, const struct block *block, u16 offset)
1852 {
1853         jit_code_t code = is_big_endian() ? jit_code_ldxi_us : jit_code_ldxi_s;
1854
1855         _jit_name(block->_jit, __func__);
1856         rec_load(state, block, offset, code, jit_code_bswapr_us, false);
1857 }
1858
1859 static void rec_LHU(struct lightrec_cstate *state, const struct block *block, u16 offset)
1860 {
1861         _jit_name(block->_jit, __func__);
1862         rec_load(state, block, offset, jit_code_ldxi_us, jit_code_bswapr_us, true);
1863 }
1864
1865 static void rec_LWL(struct lightrec_cstate *state, const struct block *block, u16 offset)
1866 {
1867         _jit_name(block->_jit, __func__);
1868         rec_io(state, block, offset, true, true);
1869 }
1870
1871 static void rec_LWR(struct lightrec_cstate *state, const struct block *block, u16 offset)
1872 {
1873         _jit_name(block->_jit, __func__);
1874         rec_io(state, block, offset, true, true);
1875 }
1876
1877 static void rec_LW(struct lightrec_cstate *state, const struct block *block, u16 offset)
1878 {
1879         union code c = block->opcode_list[offset].c;
1880         jit_code_t code;
1881
1882         if (is_big_endian() && __WORDSIZE == 64)
1883                 code = jit_code_ldxi_ui;
1884         else
1885                 code = jit_code_ldxi_i;
1886
1887         _jit_name(block->_jit, c.i.op == OP_LWC2 ? "rec_LWC2" : "rec_LW");
1888         rec_load(state, block, offset, code, jit_code_bswapr_ui, false);
1889 }
1890
1891 static void rec_break_syscall(struct lightrec_cstate *state,
1892                               const struct block *block, u16 offset,
1893                               u32 exit_code)
1894 {
1895         struct regcache *reg_cache = state->reg_cache;
1896         jit_state_t *_jit = block->_jit;
1897         u8 tmp;
1898
1899         _jit_note(block->_jit, __FILE__, __LINE__);
1900
1901         tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1902
1903         jit_movi(tmp, exit_code);
1904         jit_stxi_i(offsetof(struct lightrec_state, exit_flags),
1905                    LIGHTREC_REG_STATE, tmp);
1906
1907         jit_ldxi_i(tmp, LIGHTREC_REG_STATE,
1908                    offsetof(struct lightrec_state, target_cycle));
1909         jit_subr(tmp, tmp, LIGHTREC_REG_CYCLE);
1910         jit_movi(LIGHTREC_REG_CYCLE, 0);
1911         jit_stxi_i(offsetof(struct lightrec_state, target_cycle),
1912                    LIGHTREC_REG_STATE, tmp);
1913         jit_stxi_i(offsetof(struct lightrec_state, current_cycle),
1914                    LIGHTREC_REG_STATE, tmp);
1915
1916         lightrec_free_reg(reg_cache, tmp);
1917
1918         /* TODO: the return address should be "pc - 4" if we're a delay slot */
1919         lightrec_emit_end_of_block(state, block, offset, -1,
1920                                    get_ds_pc(block, offset, 0),
1921                                    31, 0, true);
1922 }
1923
1924 static void rec_special_SYSCALL(struct lightrec_cstate *state,
1925                                 const struct block *block, u16 offset)
1926 {
1927         _jit_name(block->_jit, __func__);
1928         rec_break_syscall(state, block, offset, LIGHTREC_EXIT_SYSCALL);
1929 }
1930
1931 static void rec_special_BREAK(struct lightrec_cstate *state,
1932                               const struct block *block, u16 offset)
1933 {
1934         _jit_name(block->_jit, __func__);
1935         rec_break_syscall(state, block, offset, LIGHTREC_EXIT_BREAK);
1936 }
1937
1938 static void rec_mfc(struct lightrec_cstate *state, const struct block *block, u16 offset)
1939 {
1940         struct regcache *reg_cache = state->reg_cache;
1941         union code c = block->opcode_list[offset].c;
1942         jit_state_t *_jit = block->_jit;
1943
1944         jit_note(__FILE__, __LINE__);
1945
1946         if (c.i.op != OP_SWC2)
1947                 lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, true);
1948
1949         call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_MFC);
1950 }
1951
1952 static void rec_mtc(struct lightrec_cstate *state, const struct block *block, u16 offset)
1953 {
1954         struct regcache *reg_cache = state->reg_cache;
1955         union code c = block->opcode_list[offset].c;
1956         jit_state_t *_jit = block->_jit;
1957
1958         jit_note(__FILE__, __LINE__);
1959         lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rs, false);
1960         lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, false);
1961         lightrec_clean_reg_if_loaded(reg_cache, _jit, REG_TEMP, false);
1962
1963         call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_MTC);
1964
1965         if (c.i.op == OP_CP0 &&
1966             !op_flag_no_ds(block->opcode_list[offset].flags) &&
1967             (c.r.rd == 12 || c.r.rd == 13))
1968                 lightrec_emit_end_of_block(state, block, offset, -1,
1969                                            get_ds_pc(block, offset, 1),
1970                                            0, 0, true);
1971 }
1972
1973 static void
1974 rec_mfc0(struct lightrec_cstate *state, const struct block *block, u16 offset)
1975 {
1976         struct regcache *reg_cache = state->reg_cache;
1977         union code c = block->opcode_list[offset].c;
1978         jit_state_t *_jit = block->_jit;
1979         u8 rt;
1980
1981         jit_note(__FILE__, __LINE__);
1982
1983         rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, REG_EXT);
1984
1985         jit_ldxi_i(rt, LIGHTREC_REG_STATE,
1986                    offsetof(struct lightrec_state, regs.cp0[c.r.rd]));
1987
1988         lightrec_free_reg(reg_cache, rt);
1989 }
1990
1991 static bool block_uses_icache(const struct lightrec_cstate *state,
1992                               const struct block *block)
1993 {
1994         const struct lightrec_mem_map *map = &state->state->maps[PSX_MAP_KERNEL_USER_RAM];
1995         u32 pc = kunseg(block->pc);
1996
1997         if (pc < map->pc || pc >= map->pc + map->length)
1998                 return false;
1999
2000         return (block->pc >> 28) < 0xa;
2001 }
2002
2003 static void
2004 rec_mtc0(struct lightrec_cstate *state, const struct block *block, u16 offset)
2005 {
2006         struct regcache *reg_cache = state->reg_cache;
2007         const union code c = block->opcode_list[offset].c;
2008         jit_state_t *_jit = block->_jit;
2009         u8 rt, tmp = 0, tmp2, status;
2010         jit_node_t *to_end;
2011
2012         jit_note(__FILE__, __LINE__);
2013
2014         switch(c.r.rd) {
2015         case 1:
2016         case 4:
2017         case 8:
2018         case 14:
2019         case 15:
2020                 /* Those registers are read-only */
2021                 return;
2022         default:
2023                 break;
2024         }
2025
2026         if (!block_uses_icache(state, block) && c.r.rd == 12) {
2027                 /* If we are not running code from the RAM through kuseg or
2028                  * kseg0, handle writes to the Status register in C; as the
2029                  * code may toggle bit 16 which isolates the cache. Code
2030                  * running from kuseg or kseg0 in RAM cannot do that. */
2031                 rec_mtc(state, block, offset);
2032                 return;
2033         }
2034
2035         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
2036
2037         if (c.r.rd != 13) {
2038                 jit_stxi_i(offsetof(struct lightrec_state, regs.cp0[c.r.rd]),
2039                            LIGHTREC_REG_STATE, rt);
2040         }
2041
2042         if (c.r.rd == 12 || c.r.rd == 13) {
2043                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
2044                 jit_ldxi_i(tmp, LIGHTREC_REG_STATE,
2045                            offsetof(struct lightrec_state, regs.cp0[13]));
2046
2047                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
2048         }
2049
2050         if (c.r.rd == 12) {
2051                 status = rt;
2052         } else if (c.r.rd == 13) {
2053                 /* Cause = (Cause & ~0x0300) | (value & 0x0300) */
2054                 jit_andi(tmp2, rt, 0x0300);
2055                 jit_ori(tmp, tmp, 0x0300);
2056                 jit_xori(tmp, tmp, 0x0300);
2057                 jit_orr(tmp, tmp, tmp2);
2058                 jit_ldxi_i(tmp2, LIGHTREC_REG_STATE,
2059                            offsetof(struct lightrec_state, regs.cp0[12]));
2060                 jit_stxi_i(offsetof(struct lightrec_state, regs.cp0[13]),
2061                            LIGHTREC_REG_STATE, tmp);
2062                 status = tmp2;
2063         }
2064
2065         if (c.r.rd == 12 || c.r.rd == 13) {
2066                 /* Exit dynarec in case there's a software interrupt.
2067                  * exit_flags = !!(status & tmp & 0x0300) & status; */
2068                 jit_andr(tmp, tmp, status);
2069                 jit_andi(tmp, tmp, 0x0300);
2070                 jit_nei(tmp, tmp, 0);
2071                 jit_andr(tmp, tmp, status);
2072         }
2073
2074         if (c.r.rd == 12) {
2075                 /* Exit dynarec in case we unmask a hardware interrupt.
2076                  * exit_flags = !(~status & 0x401) */
2077
2078                 jit_comr(tmp2, status);
2079                 jit_andi(tmp2, tmp2, 0x401);
2080                 jit_eqi(tmp2, tmp2, 0);
2081                 jit_orr(tmp, tmp, tmp2);
2082         }
2083
2084         lightrec_free_reg(reg_cache, rt);
2085
2086         if (c.r.rd == 12 || c.r.rd == 13) {
2087                 to_end = jit_beqi(tmp, 0);
2088
2089                 jit_ldxi_i(tmp2, LIGHTREC_REG_STATE,
2090                            offsetof(struct lightrec_state, target_cycle));
2091                 jit_subr(tmp2, tmp2, LIGHTREC_REG_CYCLE);
2092                 jit_movi(LIGHTREC_REG_CYCLE, 0);
2093                 jit_stxi_i(offsetof(struct lightrec_state, target_cycle),
2094                            LIGHTREC_REG_STATE, tmp2);
2095                 jit_stxi_i(offsetof(struct lightrec_state, current_cycle),
2096                            LIGHTREC_REG_STATE, tmp2);
2097
2098
2099                 jit_patch(to_end);
2100         }
2101
2102         if (!op_flag_no_ds(block->opcode_list[offset].flags) &&
2103             (c.r.rd == 12 || c.r.rd == 13)) {
2104                 state->cycles += lightrec_cycles_of_opcode(c);
2105                 lightrec_emit_eob(state, block, offset + 1);
2106         }
2107 }
2108
2109 static void rec_cp0_MFC0(struct lightrec_cstate *state,
2110                          const struct block *block, u16 offset)
2111 {
2112         _jit_name(block->_jit, __func__);
2113         rec_mfc0(state, block, offset);
2114 }
2115
2116 static void rec_cp0_CFC0(struct lightrec_cstate *state,
2117                          const struct block *block, u16 offset)
2118 {
2119         _jit_name(block->_jit, __func__);
2120         rec_mfc0(state, block, offset);
2121 }
2122
2123 static void rec_cp0_MTC0(struct lightrec_cstate *state,
2124                          const struct block *block, u16 offset)
2125 {
2126         _jit_name(block->_jit, __func__);
2127         rec_mtc0(state, block, offset);
2128 }
2129
2130 static void rec_cp0_CTC0(struct lightrec_cstate *state,
2131                          const struct block *block, u16 offset)
2132 {
2133         _jit_name(block->_jit, __func__);
2134         rec_mtc0(state, block, offset);
2135 }
2136
2137 static unsigned int cp2d_i_offset(u8 reg)
2138 {
2139         return offsetof(struct lightrec_state, regs.cp2d[reg]);
2140 }
2141
2142 static unsigned int cp2d_s_offset(u8 reg)
2143 {
2144         return cp2d_i_offset(reg) + is_big_endian() * 2;
2145 }
2146
2147 static unsigned int cp2c_i_offset(u8 reg)
2148 {
2149         return offsetof(struct lightrec_state, regs.cp2c[reg]);
2150 }
2151
2152 static unsigned int cp2c_s_offset(u8 reg)
2153 {
2154         return cp2c_i_offset(reg) + is_big_endian() * 2;
2155 }
2156
2157 static void rec_cp2_do_mfc2(struct lightrec_cstate *state,
2158                             const struct block *block, u16 offset,
2159                             u8 reg, u8 out_reg)
2160 {
2161         struct regcache *reg_cache = state->reg_cache;
2162         jit_state_t *_jit = block->_jit;
2163         const u32 zext_regs = 0x300f0080;
2164         u8 rt, tmp, tmp2, tmp3, out, flags;
2165         unsigned int i;
2166
2167         _jit_name(block->_jit, __func__);
2168
2169         if (state->state->ops.cop2_notify) {
2170                 /* We must call cop2_notify, handle that in C. */
2171                 rec_mfc(state, block, offset);
2172                 return;
2173         }
2174
2175         flags = (zext_regs & BIT(reg)) ? REG_ZEXT : REG_EXT;
2176         rt = lightrec_alloc_reg_out(reg_cache, _jit, out_reg, flags);
2177
2178         if (reg == 15)
2179                 reg = 14;
2180
2181         switch (reg) {
2182         case 1:
2183         case 3:
2184         case 5:
2185         case 8:
2186         case 9:
2187         case 10:
2188         case 11:
2189                 jit_ldxi_s(rt, LIGHTREC_REG_STATE, cp2d_s_offset(reg));
2190                 break;
2191         case 7:
2192         case 16:
2193         case 17:
2194         case 18:
2195         case 19:
2196                 jit_ldxi_us(rt, LIGHTREC_REG_STATE, cp2d_s_offset(reg));
2197                 break;
2198         case 28:
2199         case 29:
2200                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
2201                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
2202                 tmp3 = lightrec_alloc_reg_temp(reg_cache, _jit);
2203
2204                 for (i = 0; i < 3; i++) {
2205                         out = i == 0 ? rt : tmp;
2206
2207                         jit_ldxi_s(tmp, LIGHTREC_REG_STATE, cp2d_s_offset(9 + i));
2208                         jit_movi(tmp2, 0x1f);
2209                         jit_rshi(out, tmp, 7);
2210
2211                         jit_ltr(tmp3, tmp2, out);
2212                         jit_movnr(out, tmp2, tmp3);
2213
2214                         jit_gei(tmp2, out, 0);
2215                         jit_movzr(out, tmp2, tmp2);
2216
2217                         if (i > 0) {
2218                                 jit_lshi(tmp, tmp, 5 * i);
2219                                 jit_orr(rt, rt, tmp);
2220                         }
2221                 }
2222
2223
2224                 lightrec_free_reg(reg_cache, tmp);
2225                 lightrec_free_reg(reg_cache, tmp2);
2226                 lightrec_free_reg(reg_cache, tmp3);
2227                 break;
2228         default:
2229                 jit_ldxi_i(rt, LIGHTREC_REG_STATE, cp2d_i_offset(reg));
2230                 break;
2231         }
2232
2233         lightrec_free_reg(reg_cache, rt);
2234 }
2235
2236 static void rec_cp2_basic_MFC2(struct lightrec_cstate *state,
2237                                const struct block *block, u16 offset)
2238 {
2239         const union code c = block->opcode_list[offset].c;
2240
2241         rec_cp2_do_mfc2(state, block, offset, c.r.rd, c.r.rt);
2242 }
2243
2244 static void rec_cp2_basic_CFC2(struct lightrec_cstate *state,
2245                                const struct block *block, u16 offset)
2246 {
2247         struct regcache *reg_cache = state->reg_cache;
2248         const union code c = block->opcode_list[offset].c;
2249         jit_state_t *_jit = block->_jit;
2250         u8 rt;
2251
2252         _jit_name(block->_jit, __func__);
2253
2254         if (state->state->ops.cop2_notify) {
2255                 /* We must call cop2_notify, handle that in C. */
2256                 rec_mfc(state, block, offset);
2257                 return;
2258         }
2259
2260         switch (c.r.rd) {
2261         case 4:
2262         case 12:
2263         case 20:
2264         case 26:
2265         case 27:
2266         case 29:
2267         case 30:
2268                 rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_EXT);
2269                 jit_ldxi_s(rt, LIGHTREC_REG_STATE, cp2c_s_offset(c.r.rd));
2270                 break;
2271         default:
2272                 rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_ZEXT);
2273                 jit_ldxi_ui(rt, LIGHTREC_REG_STATE, cp2c_i_offset(c.r.rd));
2274                 break;
2275         }
2276
2277         lightrec_free_reg(reg_cache, rt);
2278 }
2279
2280 static void rec_cp2_do_mtc2(struct lightrec_cstate *state,
2281                             const struct block *block, u16 offset,
2282                             u8 reg, u8 in_reg)
2283 {
2284         struct regcache *reg_cache = state->reg_cache;
2285         jit_state_t *_jit = block->_jit;
2286         u8 rt, tmp, tmp2, flags = 0;
2287
2288         _jit_name(block->_jit, __func__);
2289
2290         if (state->state->ops.cop2_notify) {
2291                 /* We must call cop2_notify, handle that in C. */
2292                 rec_mtc(state, block, offset);
2293                 return;
2294         }
2295
2296         if (reg == 31)
2297                 return;
2298
2299         if (reg == 30)
2300                 flags |= REG_EXT;
2301
2302         rt = lightrec_alloc_reg_in(reg_cache, _jit, in_reg, flags);
2303
2304         switch (reg) {
2305         case 15:
2306                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
2307                 jit_ldxi_i(tmp, LIGHTREC_REG_STATE, cp2d_i_offset(13));
2308
2309                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
2310                 jit_ldxi_i(tmp2, LIGHTREC_REG_STATE, cp2d_i_offset(14));
2311
2312                 jit_stxi_i(cp2d_i_offset(12), LIGHTREC_REG_STATE, tmp);
2313                 jit_stxi_i(cp2d_i_offset(13), LIGHTREC_REG_STATE, tmp2);
2314                 jit_stxi_i(cp2d_i_offset(14), LIGHTREC_REG_STATE, rt);
2315
2316                 lightrec_free_reg(reg_cache, tmp);
2317                 lightrec_free_reg(reg_cache, tmp2);
2318                 break;
2319         case 28:
2320                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
2321
2322                 jit_lshi(tmp, rt, 7);
2323                 jit_andi(tmp, tmp, 0xf80);
2324                 jit_stxi_s(cp2d_s_offset(9), LIGHTREC_REG_STATE, tmp);
2325
2326                 jit_lshi(tmp, rt, 2);
2327                 jit_andi(tmp, tmp, 0xf80);
2328                 jit_stxi_s(cp2d_s_offset(10), LIGHTREC_REG_STATE, tmp);
2329
2330                 jit_rshi(tmp, rt, 3);
2331                 jit_andi(tmp, tmp, 0xf80);
2332                 jit_stxi_s(cp2d_s_offset(11), LIGHTREC_REG_STATE, tmp);
2333
2334                 lightrec_free_reg(reg_cache, tmp);
2335                 break;
2336         case 30:
2337                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
2338
2339                 /* if (rt < 0) rt = ~rt; */
2340                 jit_rshi(tmp, rt, 31);
2341                 jit_xorr(tmp, rt, tmp);
2342
2343                 /* Count leading zeros */
2344                 jit_clzr(tmp, tmp);
2345                 if (__WORDSIZE != 32)
2346                         jit_subi(tmp, tmp, __WORDSIZE - 32);
2347
2348                 jit_stxi_i(cp2d_i_offset(31), LIGHTREC_REG_STATE, tmp);
2349
2350                 lightrec_free_reg(reg_cache, tmp);
2351                 fallthrough;
2352         default:
2353                 jit_stxi_i(cp2d_i_offset(reg), LIGHTREC_REG_STATE, rt);
2354                 break;
2355         }
2356
2357         lightrec_free_reg(reg_cache, rt);
2358 }
2359
2360 static void rec_cp2_basic_MTC2(struct lightrec_cstate *state,
2361                                const struct block *block, u16 offset)
2362 {
2363         const union code c = block->opcode_list[offset].c;
2364
2365         rec_cp2_do_mtc2(state, block, offset, c.r.rd, c.r.rt);
2366 }
2367
2368 static void rec_cp2_basic_CTC2(struct lightrec_cstate *state,
2369                                const struct block *block, u16 offset)
2370 {
2371         struct regcache *reg_cache = state->reg_cache;
2372         const union code c = block->opcode_list[offset].c;
2373         jit_state_t *_jit = block->_jit;
2374         u8 rt, tmp, tmp2;
2375
2376         _jit_name(block->_jit, __func__);
2377
2378         if (state->state->ops.cop2_notify) {
2379                 /* We must call cop2_notify, handle that in C. */
2380                 rec_mtc(state, block, offset);
2381                 return;
2382         }
2383
2384         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0);
2385
2386         switch (c.r.rd) {
2387         case 4:
2388         case 12:
2389         case 20:
2390         case 26:
2391         case 27:
2392         case 29:
2393         case 30:
2394                 jit_stxi_s(cp2c_s_offset(c.r.rd), LIGHTREC_REG_STATE, rt);
2395                 break;
2396         case 31:
2397                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
2398                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
2399
2400                 jit_andi(tmp, rt, 0x7f87e000);
2401                 jit_nei(tmp, tmp, 0);
2402                 jit_lshi(tmp, tmp, 31);
2403
2404                 jit_andi(tmp2, rt, 0x7ffff000);
2405                 jit_orr(tmp, tmp2, tmp);
2406
2407                 jit_stxi_i(cp2c_i_offset(31), LIGHTREC_REG_STATE, tmp);
2408
2409                 lightrec_free_reg(reg_cache, tmp);
2410                 lightrec_free_reg(reg_cache, tmp2);
2411                 break;
2412
2413         default:
2414                 jit_stxi_i(cp2c_i_offset(c.r.rd), LIGHTREC_REG_STATE, rt);
2415         }
2416
2417         lightrec_free_reg(reg_cache, rt);
2418 }
2419
2420 static void rec_cp0_RFE(struct lightrec_cstate *state,
2421                         const struct block *block, u16 offset)
2422 {
2423         struct regcache *reg_cache = state->reg_cache;
2424         jit_state_t *_jit = block->_jit;
2425         u8 status, tmp;
2426
2427         jit_name(__func__);
2428         jit_note(__FILE__, __LINE__);
2429
2430         status = lightrec_alloc_reg_temp(reg_cache, _jit);
2431         jit_ldxi_i(status, LIGHTREC_REG_STATE,
2432                    offsetof(struct lightrec_state, regs.cp0[12]));
2433
2434         tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
2435
2436         /* status = ((status >> 2) & 0xf) | status & ~0xf; */
2437         jit_rshi(tmp, status, 2);
2438         jit_andi(tmp, tmp, 0xf);
2439         jit_andi(status, status, ~0xful);
2440         jit_orr(status, status, tmp);
2441
2442         jit_ldxi_i(tmp, LIGHTREC_REG_STATE,
2443                    offsetof(struct lightrec_state, regs.cp0[13]));
2444         jit_stxi_i(offsetof(struct lightrec_state, regs.cp0[12]),
2445                    LIGHTREC_REG_STATE, status);
2446
2447         /* Exit dynarec in case there's a software interrupt.
2448          * exit_flags = !!(status & cause & 0x0300) & status; */
2449         jit_andr(tmp, tmp, status);
2450         jit_andi(tmp, tmp, 0x0300);
2451         jit_nei(tmp, tmp, 0);
2452         jit_andr(tmp, tmp, status);
2453         jit_stxi_i(offsetof(struct lightrec_state, exit_flags),
2454                    LIGHTREC_REG_STATE, tmp);
2455
2456         lightrec_free_reg(reg_cache, status);
2457         lightrec_free_reg(reg_cache, tmp);
2458 }
2459
2460 static void rec_CP(struct lightrec_cstate *state,
2461                    const struct block *block, u16 offset)
2462 {
2463         union code c = block->opcode_list[offset].c;
2464         jit_state_t *_jit = block->_jit;
2465
2466         jit_name(__func__);
2467         jit_note(__FILE__, __LINE__);
2468
2469         call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_CP);
2470 }
2471
2472 static void rec_meta_MOV(struct lightrec_cstate *state,
2473                          const struct block *block, u16 offset)
2474 {
2475         struct regcache *reg_cache = state->reg_cache;
2476         const struct opcode *op = &block->opcode_list[offset];
2477         union code c = op->c;
2478         jit_state_t *_jit = block->_jit;
2479         bool unload_rd;
2480         u8 rs, rd;
2481
2482         _jit_name(block->_jit, __func__);
2483         jit_note(__FILE__, __LINE__);
2484
2485         unload_rd = OPT_EARLY_UNLOAD
2486                 && LIGHTREC_FLAGS_GET_RD(op->flags) == LIGHTREC_REG_UNLOAD;
2487
2488         if (c.m.rs && !lightrec_reg_is_loaded(reg_cache, c.m.rs)) {
2489                 /* The source register is not yet loaded - we can load its value
2490                  * from the register cache directly into the target register. */
2491                 rd = lightrec_alloc_reg_out(reg_cache, _jit, c.m.rd, REG_EXT);
2492
2493                 jit_ldxi_i(rd, LIGHTREC_REG_STATE,
2494                            offsetof(struct lightrec_state, regs.gpr) + (c.m.rs << 2));
2495
2496                 lightrec_free_reg(reg_cache, rd);
2497         } else if (unload_rd) {
2498                 /* If the destination register will be unloaded right after the
2499                  * MOV meta-opcode, we don't actually need to write any host
2500                  * register - we can just store the source register directly to
2501                  * the register cache, at the offset corresponding to the
2502                  * destination register. */
2503                 lightrec_discard_reg_if_loaded(reg_cache, c.m.rd);
2504
2505                 rs = lightrec_alloc_reg_in(reg_cache, _jit, c.m.rs, 0);
2506
2507                 jit_stxi_i(offsetof(struct lightrec_state, regs.gpr)
2508                            + (c.m.rd << 2), LIGHTREC_REG_STATE, rs);
2509
2510                 lightrec_free_reg(reg_cache, rs);
2511         } else {
2512                 if (c.m.rs)
2513                         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.m.rs, 0);
2514
2515                 rd = lightrec_alloc_reg_out(reg_cache, _jit, c.m.rd, REG_EXT);
2516
2517                 if (c.m.rs == 0) {
2518                         jit_movi(rd, 0);
2519                 } else {
2520                         jit_extr_i(rd, rs);
2521                         lightrec_free_reg(reg_cache, rs);
2522                 }
2523
2524                 lightrec_free_reg(reg_cache, rd);
2525         }
2526 }
2527
2528 static void rec_meta_EXTC_EXTS(struct lightrec_cstate *state,
2529                                const struct block *block,
2530                                u16 offset)
2531 {
2532         struct regcache *reg_cache = state->reg_cache;
2533         union code c = block->opcode_list[offset].c;
2534         jit_state_t *_jit = block->_jit;
2535         u8 rs, rd;
2536
2537         _jit_name(block->_jit, __func__);
2538         jit_note(__FILE__, __LINE__);
2539
2540         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.m.rs, 0);
2541         rd = lightrec_alloc_reg_out(reg_cache, _jit, c.m.rd, REG_EXT);
2542
2543         if (c.m.op == OP_META_EXTC)
2544                 jit_extr_c(rd, rs);
2545         else
2546                 jit_extr_s(rd, rs);
2547
2548         lightrec_free_reg(reg_cache, rs);
2549         lightrec_free_reg(reg_cache, rd);
2550 }
2551
2552 static void rec_meta_MULT2(struct lightrec_cstate *state,
2553                            const struct block *block,
2554                            u16 offset)
2555 {
2556         struct regcache *reg_cache = state->reg_cache;
2557         union code c = block->opcode_list[offset].c;
2558         jit_state_t *_jit = block->_jit;
2559         u8 reg_lo = get_mult_div_lo(c);
2560         u8 reg_hi = get_mult_div_hi(c);
2561         u32 flags = block->opcode_list[offset].flags;
2562         bool is_signed = c.i.op == OP_META_MULT2;
2563         u8 rs, lo, hi, rflags = 0, hiflags = 0;
2564         unsigned int i;
2565
2566         if (!op_flag_no_hi(flags) && c.r.op < 32) {
2567                 rflags = is_signed ? REG_EXT : REG_ZEXT;
2568                 hiflags = is_signed ? REG_EXT : (REG_EXT | REG_ZEXT);
2569         }
2570
2571         _jit_name(block->_jit, __func__);
2572         jit_note(__FILE__, __LINE__);
2573
2574         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, rflags);
2575
2576         /*
2577          * We must handle the case where one of the output registers is our rs
2578          * input register. Thanksfully, computing LO/HI can be done in any
2579          * order. Here, we make sure that the computation that overwrites the
2580          * input register is always performed last.
2581          */
2582         for (i = 0; i < 2; i++) {
2583                 if ((!i ^ (reg_lo == c.i.rs)) && !op_flag_no_lo(flags)) {
2584                         lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0);
2585
2586                         if (c.r.op < 32)
2587                                 jit_lshi(lo, rs, c.r.op);
2588                         else
2589                                 jit_movi(lo, 0);
2590
2591                         lightrec_free_reg(reg_cache, lo);
2592                         continue;
2593                 }
2594
2595                 if ((!!i ^ (reg_lo == c.i.rs)) && !op_flag_no_hi(flags)) {
2596                         hi = lightrec_alloc_reg_out(reg_cache, _jit,
2597                                                     reg_hi, hiflags);
2598
2599                         if (c.r.op >= 32)
2600                                 jit_lshi(hi, rs, c.r.op - 32);
2601                         else if (is_signed)
2602                                 jit_rshi(hi, rs, 32 - c.r.op);
2603                         else
2604                                 jit_rshi_u(hi, rs, 32 - c.r.op);
2605
2606                         lightrec_free_reg(reg_cache, hi);
2607                 }
2608         }
2609
2610         lightrec_free_reg(reg_cache, rs);
2611
2612         _jit_name(block->_jit, __func__);
2613         jit_note(__FILE__, __LINE__);
2614 }
2615
2616 static void rec_meta_COM(struct lightrec_cstate *state,
2617                          const struct block *block, u16 offset)
2618 {
2619         struct regcache *reg_cache = state->reg_cache;
2620         union code c = block->opcode_list[offset].c;
2621         jit_state_t *_jit = block->_jit;
2622         u8 rd, rs, flags;
2623
2624         jit_note(__FILE__, __LINE__);
2625         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.m.rs, 0);
2626         rd = lightrec_alloc_reg_out(reg_cache, _jit, c.m.rd, 0);
2627
2628         flags = lightrec_get_reg_in_flags(reg_cache, rs);
2629
2630         lightrec_set_reg_out_flags(reg_cache, rd,
2631                                    flags & REG_EXT);
2632
2633         jit_comr(rd, rs);
2634
2635         lightrec_free_reg(reg_cache, rs);
2636         lightrec_free_reg(reg_cache, rd);
2637 }
2638
2639 static const lightrec_rec_func_t rec_standard[64] = {
2640         SET_DEFAULT_ELM(rec_standard, unknown_opcode),
2641         [OP_SPECIAL]            = rec_SPECIAL,
2642         [OP_REGIMM]             = rec_REGIMM,
2643         [OP_J]                  = rec_J,
2644         [OP_JAL]                = rec_JAL,
2645         [OP_BEQ]                = rec_BEQ,
2646         [OP_BNE]                = rec_BNE,
2647         [OP_BLEZ]               = rec_BLEZ,
2648         [OP_BGTZ]               = rec_BGTZ,
2649         [OP_ADDI]               = rec_ADDI,
2650         [OP_ADDIU]              = rec_ADDIU,
2651         [OP_SLTI]               = rec_SLTI,
2652         [OP_SLTIU]              = rec_SLTIU,
2653         [OP_ANDI]               = rec_ANDI,
2654         [OP_ORI]                = rec_ORI,
2655         [OP_XORI]               = rec_XORI,
2656         [OP_LUI]                = rec_LUI,
2657         [OP_CP0]                = rec_CP0,
2658         [OP_CP2]                = rec_CP2,
2659         [OP_LB]                 = rec_LB,
2660         [OP_LH]                 = rec_LH,
2661         [OP_LWL]                = rec_LWL,
2662         [OP_LW]                 = rec_LW,
2663         [OP_LBU]                = rec_LBU,
2664         [OP_LHU]                = rec_LHU,
2665         [OP_LWR]                = rec_LWR,
2666         [OP_SB]                 = rec_SB,
2667         [OP_SH]                 = rec_SH,
2668         [OP_SWL]                = rec_SWL,
2669         [OP_SW]                 = rec_SW,
2670         [OP_SWR]                = rec_SWR,
2671         [OP_LWC2]               = rec_LW,
2672         [OP_SWC2]               = rec_SW,
2673
2674         [OP_META]               = rec_META,
2675         [OP_META_MULT2]         = rec_meta_MULT2,
2676         [OP_META_MULTU2]        = rec_meta_MULT2,
2677 };
2678
2679 static const lightrec_rec_func_t rec_special[64] = {
2680         SET_DEFAULT_ELM(rec_special, unknown_opcode),
2681         [OP_SPECIAL_SLL]        = rec_special_SLL,
2682         [OP_SPECIAL_SRL]        = rec_special_SRL,
2683         [OP_SPECIAL_SRA]        = rec_special_SRA,
2684         [OP_SPECIAL_SLLV]       = rec_special_SLLV,
2685         [OP_SPECIAL_SRLV]       = rec_special_SRLV,
2686         [OP_SPECIAL_SRAV]       = rec_special_SRAV,
2687         [OP_SPECIAL_JR]         = rec_special_JR,
2688         [OP_SPECIAL_JALR]       = rec_special_JALR,
2689         [OP_SPECIAL_SYSCALL]    = rec_special_SYSCALL,
2690         [OP_SPECIAL_BREAK]      = rec_special_BREAK,
2691         [OP_SPECIAL_MFHI]       = rec_special_MFHI,
2692         [OP_SPECIAL_MTHI]       = rec_special_MTHI,
2693         [OP_SPECIAL_MFLO]       = rec_special_MFLO,
2694         [OP_SPECIAL_MTLO]       = rec_special_MTLO,
2695         [OP_SPECIAL_MULT]       = rec_special_MULT,
2696         [OP_SPECIAL_MULTU]      = rec_special_MULTU,
2697         [OP_SPECIAL_DIV]        = rec_special_DIV,
2698         [OP_SPECIAL_DIVU]       = rec_special_DIVU,
2699         [OP_SPECIAL_ADD]        = rec_special_ADD,
2700         [OP_SPECIAL_ADDU]       = rec_special_ADDU,
2701         [OP_SPECIAL_SUB]        = rec_special_SUB,
2702         [OP_SPECIAL_SUBU]       = rec_special_SUBU,
2703         [OP_SPECIAL_AND]        = rec_special_AND,
2704         [OP_SPECIAL_OR]         = rec_special_OR,
2705         [OP_SPECIAL_XOR]        = rec_special_XOR,
2706         [OP_SPECIAL_NOR]        = rec_special_NOR,
2707         [OP_SPECIAL_SLT]        = rec_special_SLT,
2708         [OP_SPECIAL_SLTU]       = rec_special_SLTU,
2709 };
2710
2711 static const lightrec_rec_func_t rec_regimm[64] = {
2712         SET_DEFAULT_ELM(rec_regimm, unknown_opcode),
2713         [OP_REGIMM_BLTZ]        = rec_regimm_BLTZ,
2714         [OP_REGIMM_BGEZ]        = rec_regimm_BGEZ,
2715         [OP_REGIMM_BLTZAL]      = rec_regimm_BLTZAL,
2716         [OP_REGIMM_BGEZAL]      = rec_regimm_BGEZAL,
2717 };
2718
2719 static const lightrec_rec_func_t rec_cp0[64] = {
2720         SET_DEFAULT_ELM(rec_cp0, rec_CP),
2721         [OP_CP0_MFC0]           = rec_cp0_MFC0,
2722         [OP_CP0_CFC0]           = rec_cp0_CFC0,
2723         [OP_CP0_MTC0]           = rec_cp0_MTC0,
2724         [OP_CP0_CTC0]           = rec_cp0_CTC0,
2725         [OP_CP0_RFE]            = rec_cp0_RFE,
2726 };
2727
2728 static const lightrec_rec_func_t rec_cp2_basic[64] = {
2729         SET_DEFAULT_ELM(rec_cp2_basic, rec_CP),
2730         [OP_CP2_BASIC_MFC2]     = rec_cp2_basic_MFC2,
2731         [OP_CP2_BASIC_CFC2]     = rec_cp2_basic_CFC2,
2732         [OP_CP2_BASIC_MTC2]     = rec_cp2_basic_MTC2,
2733         [OP_CP2_BASIC_CTC2]     = rec_cp2_basic_CTC2,
2734 };
2735
2736 static const lightrec_rec_func_t rec_meta[64] = {
2737         SET_DEFAULT_ELM(rec_meta, unknown_opcode),
2738         [OP_META_MOV]           = rec_meta_MOV,
2739         [OP_META_EXTC]          = rec_meta_EXTC_EXTS,
2740         [OP_META_EXTS]          = rec_meta_EXTC_EXTS,
2741         [OP_META_COM]           = rec_meta_COM,
2742 };
2743
2744 static void rec_SPECIAL(struct lightrec_cstate *state,
2745                         const struct block *block, u16 offset)
2746 {
2747         union code c = block->opcode_list[offset].c;
2748         lightrec_rec_func_t f = rec_special[c.r.op];
2749
2750         if (!HAS_DEFAULT_ELM && unlikely(!f))
2751                 unknown_opcode(state, block, offset);
2752         else
2753                 (*f)(state, block, offset);
2754 }
2755
2756 static void rec_REGIMM(struct lightrec_cstate *state,
2757                        const struct block *block, u16 offset)
2758 {
2759         union code c = block->opcode_list[offset].c;
2760         lightrec_rec_func_t f = rec_regimm[c.r.rt];
2761
2762         if (!HAS_DEFAULT_ELM && unlikely(!f))
2763                 unknown_opcode(state, block, offset);
2764         else
2765                 (*f)(state, block, offset);
2766 }
2767
2768 static void rec_CP0(struct lightrec_cstate *state,
2769                     const struct block *block, u16 offset)
2770 {
2771         union code c = block->opcode_list[offset].c;
2772         lightrec_rec_func_t f = rec_cp0[c.r.rs];
2773
2774         if (!HAS_DEFAULT_ELM && unlikely(!f))
2775                 rec_CP(state, block, offset);
2776         else
2777                 (*f)(state, block, offset);
2778 }
2779
2780 static void rec_CP2(struct lightrec_cstate *state,
2781                     const struct block *block, u16 offset)
2782 {
2783         union code c = block->opcode_list[offset].c;
2784
2785         if (c.r.op == OP_CP2_BASIC) {
2786                 lightrec_rec_func_t f = rec_cp2_basic[c.r.rs];
2787
2788                 if (HAS_DEFAULT_ELM || likely(f)) {
2789                         (*f)(state, block, offset);
2790                         return;
2791                 }
2792         }
2793
2794         rec_CP(state, block, offset);
2795 }
2796
2797 static void rec_META(struct lightrec_cstate *state,
2798                      const struct block *block, u16 offset)
2799 {
2800         union code c = block->opcode_list[offset].c;
2801         lightrec_rec_func_t f = rec_meta[c.m.op];
2802
2803         if (!HAS_DEFAULT_ELM && unlikely(!f))
2804                 unknown_opcode(state, block, offset);
2805         else
2806                 (*f)(state, block, offset);
2807 }
2808
2809 void lightrec_rec_opcode(struct lightrec_cstate *state,
2810                          const struct block *block, u16 offset)
2811 {
2812         struct regcache *reg_cache = state->reg_cache;
2813         struct lightrec_branch_target *target;
2814         const struct opcode *op = &block->opcode_list[offset];
2815         jit_state_t *_jit = block->_jit;
2816         lightrec_rec_func_t f;
2817         u16 unload_offset;
2818
2819         if (op_flag_sync(op->flags)) {
2820                 if (state->cycles)
2821                         jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles);
2822                 state->cycles = 0;
2823
2824                 lightrec_storeback_regs(reg_cache, _jit);
2825                 lightrec_regcache_reset(reg_cache);
2826
2827                 pr_debug("Adding branch target at offset 0x%x\n", offset << 2);
2828                 target = &state->targets[state->nb_targets++];
2829                 target->offset = offset;
2830                 target->label = jit_indirect();
2831         }
2832
2833         if (likely(op->opcode)) {
2834                 f = rec_standard[op->i.op];
2835
2836                 if (!HAS_DEFAULT_ELM && unlikely(!f))
2837                         unknown_opcode(state, block, offset);
2838                 else
2839                         (*f)(state, block, offset);
2840         }
2841
2842         if (OPT_EARLY_UNLOAD) {
2843                 unload_offset = offset +
2844                         (has_delay_slot(op->c) && !op_flag_no_ds(op->flags));
2845
2846                 lightrec_do_early_unload(state, block, unload_offset);
2847         }
2848
2849         state->no_load_delay = false;
2850 }