git subrepo pull --force deps/lightrec
[pcsx_rearmed.git] / deps / lightrec / emitter.c
1 // SPDX-License-Identifier: LGPL-2.1-or-later
2 /*
3  * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
4  */
5
6 #include "blockcache.h"
7 #include "debug.h"
8 #include "disassembler.h"
9 #include "emitter.h"
10 #include "lightning-wrapper.h"
11 #include "optimizer.h"
12 #include "regcache.h"
13
14 #include <stdbool.h>
15 #include <stddef.h>
16
17 typedef void (*lightrec_rec_func_t)(struct lightrec_cstate *, const struct block *, u16);
18
19 /* Forward declarations */
20 static void rec_SPECIAL(struct lightrec_cstate *state, const struct block *block, u16 offset);
21 static void rec_REGIMM(struct lightrec_cstate *state, const struct block *block, u16 offset);
22 static void rec_CP0(struct lightrec_cstate *state, const struct block *block, u16 offset);
23 static void rec_CP2(struct lightrec_cstate *state, const struct block *block, u16 offset);
24 static void rec_META(struct lightrec_cstate *state, const struct block *block, u16 offset);
25 static void rec_cp2_do_mtc2(struct lightrec_cstate *state,
26                             const struct block *block, u16 offset, u8 reg, u8 in_reg);
27 static void rec_cp2_do_mfc2(struct lightrec_cstate *state,
28                             const struct block *block, u16 offset,
29                             u8 reg, u8 out_reg);
30
31 static void unknown_opcode(struct lightrec_cstate *state, const struct block *block, u16 offset)
32 {
33         pr_warn("Unknown opcode: 0x%08x at PC 0x%08x\n",
34                 block->opcode_list[offset].c.opcode,
35                 block->pc + (offset << 2));
36 }
37
38 static void
39 lightrec_jump_to_fn(jit_state_t *_jit, void (*fn)(void))
40 {
41         /* Prevent jit_jmpi() from using our cycles register as a temporary */
42         jit_live(LIGHTREC_REG_CYCLE);
43
44         jit_patch_abs(jit_jmpi(), fn);
45 }
46
47 static void
48 lightrec_jump_to_eob(struct lightrec_cstate *state, jit_state_t *_jit)
49 {
50         lightrec_jump_to_fn(_jit, state->state->eob_wrapper_func);
51 }
52
53 static void
54 lightrec_jump_to_ds_check(struct lightrec_cstate *state, jit_state_t *_jit)
55 {
56         lightrec_jump_to_fn(_jit, state->state->ds_check_func);
57 }
58
59 static void update_ra_register(struct regcache *reg_cache, jit_state_t *_jit,
60                                u8 ra_reg, u32 pc, u32 link)
61 {
62         u8 link_reg;
63
64         link_reg = lightrec_alloc_reg_out(reg_cache, _jit, ra_reg, 0);
65         lightrec_load_imm(reg_cache, _jit, link_reg, pc, link);
66         lightrec_free_reg(reg_cache, link_reg);
67 }
68
69 static void lightrec_emit_end_of_block(struct lightrec_cstate *state,
70                                        const struct block *block, u16 offset,
71                                        s8 reg_new_pc, u32 imm, u8 ra_reg,
72                                        u32 link, bool update_cycles)
73 {
74         struct regcache *reg_cache = state->reg_cache;
75         jit_state_t *_jit = block->_jit;
76         const struct opcode *op = &block->opcode_list[offset],
77                             *ds = get_delay_slot(block->opcode_list, offset);
78         u32 cycles = state->cycles + lightrec_cycles_of_opcode(op->c);
79
80         jit_note(__FILE__, __LINE__);
81
82         if (link && ra_reg != reg_new_pc)
83                 update_ra_register(reg_cache, _jit, ra_reg, block->pc, link);
84
85         if (reg_new_pc < 0)
86                 lightrec_load_next_pc_imm(reg_cache, _jit, block->pc, imm);
87         else
88                 lightrec_load_next_pc(reg_cache, _jit, reg_new_pc);
89
90         if (link && ra_reg == reg_new_pc) {
91                 /* Handle the special case: JALR $r0, $r0
92                  * In that case the target PC should be the old value of the
93                  * register. */
94                 update_ra_register(reg_cache, _jit, ra_reg, block->pc, link);
95         }
96
97         if (has_delay_slot(op->c) &&
98             !op_flag_no_ds(op->flags) && !op_flag_local_branch(op->flags)) {
99                 cycles += lightrec_cycles_of_opcode(ds->c);
100
101                 /* Recompile the delay slot */
102                 if (ds->c.opcode)
103                         lightrec_rec_opcode(state, block, offset + 1);
104         }
105
106         /* Clean the remaining registers */
107         lightrec_clean_regs(reg_cache, _jit);
108
109         if (cycles && update_cycles) {
110                 jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles);
111                 pr_debug("EOB: %u cycles\n", cycles);
112         }
113
114         if (op_flag_load_delay(ds->flags)
115             && opcode_is_load(ds->c) && !state->no_load_delay) {
116                 /* If the delay slot is a load opcode, its target register
117                  * will be written after the first opcode of the target is
118                  * executed. Handle this by jumping to a special section of
119                  * the dispatcher. It expects the loaded value to be in
120                  * REG_TEMP, and the target register number to be in JIT_V1.*/
121                 jit_movi(JIT_V1, ds->c.i.rt);
122
123                 lightrec_jump_to_ds_check(state, _jit);
124         } else {
125                 lightrec_jump_to_eob(state, _jit);
126         }
127 }
128
129 void lightrec_emit_jump_to_interpreter(struct lightrec_cstate *state,
130                                        const struct block *block, u16 offset)
131 {
132         struct regcache *reg_cache = state->reg_cache;
133         jit_state_t *_jit = block->_jit;
134
135         lightrec_clean_regs(reg_cache, _jit);
136
137         /* Call the interpreter with the block's address in JIT_V1 and the
138          * PC (which might have an offset) in JIT_V0. */
139         lightrec_load_imm(reg_cache, _jit, JIT_V0, block->pc,
140                           block->pc + (offset << 2));
141         jit_movi(JIT_V1, (uintptr_t)block);
142
143         jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles);
144         lightrec_jump_to_fn(_jit, state->state->interpreter_func);
145 }
146
147 static void lightrec_emit_eob(struct lightrec_cstate *state,
148                               const struct block *block, u16 offset)
149 {
150         struct regcache *reg_cache = state->reg_cache;
151         jit_state_t *_jit = block->_jit;
152
153         lightrec_clean_regs(reg_cache, _jit);
154
155         lightrec_load_imm(reg_cache, _jit, JIT_V0, block->pc,
156                           block->pc + (offset << 2));
157         jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles);
158
159         lightrec_jump_to_eob(state, _jit);
160 }
161
162 static void rec_special_JR(struct lightrec_cstate *state, const struct block *block, u16 offset)
163 {
164         union code c = block->opcode_list[offset].c;
165
166         _jit_name(block->_jit, __func__);
167         lightrec_emit_end_of_block(state, block, offset, c.r.rs, 0, 31, 0, true);
168 }
169
170 static void rec_special_JALR(struct lightrec_cstate *state, const struct block *block, u16 offset)
171 {
172         union code c = block->opcode_list[offset].c;
173
174         _jit_name(block->_jit, __func__);
175         lightrec_emit_end_of_block(state, block, offset, c.r.rs, 0, c.r.rd,
176                                    get_branch_pc(block, offset, 2), true);
177 }
178
179 static void rec_J(struct lightrec_cstate *state, const struct block *block, u16 offset)
180 {
181         union code c = block->opcode_list[offset].c;
182
183         _jit_name(block->_jit, __func__);
184         lightrec_emit_end_of_block(state, block, offset, -1,
185                                    (block->pc & 0xf0000000) | (c.j.imm << 2),
186                                    31, 0, true);
187 }
188
189 static void rec_JAL(struct lightrec_cstate *state, const struct block *block, u16 offset)
190 {
191         union code c = block->opcode_list[offset].c;
192
193         _jit_name(block->_jit, __func__);
194         lightrec_emit_end_of_block(state, block, offset, -1,
195                                    (block->pc & 0xf0000000) | (c.j.imm << 2),
196                                    31, get_branch_pc(block, offset, 2), true);
197 }
198
199 static void lightrec_do_early_unload(struct lightrec_cstate *state,
200                                      const struct block *block, u16 offset)
201 {
202         struct regcache *reg_cache = state->reg_cache;
203         const struct opcode *op = &block->opcode_list[offset];
204         jit_state_t *_jit = block->_jit;
205         unsigned int i;
206         u8 reg;
207         struct {
208                 u8 reg, op;
209         } reg_ops[3] = {
210                 { op->r.rd, LIGHTREC_FLAGS_GET_RD(op->flags), },
211                 { op->i.rt, LIGHTREC_FLAGS_GET_RT(op->flags), },
212                 { op->i.rs, LIGHTREC_FLAGS_GET_RS(op->flags), },
213         };
214
215         for (i = 0; i < ARRAY_SIZE(reg_ops); i++) {
216                 reg = reg_ops[i].reg;
217
218                 switch (reg_ops[i].op) {
219                 case LIGHTREC_REG_UNLOAD:
220                         lightrec_clean_reg_if_loaded(reg_cache, _jit, reg, true);
221                         break;
222
223                 case LIGHTREC_REG_DISCARD:
224                         lightrec_discard_reg_if_loaded(reg_cache, reg);
225                         break;
226
227                 case LIGHTREC_REG_CLEAN:
228                         lightrec_clean_reg_if_loaded(reg_cache, _jit, reg, false);
229                         break;
230                 default:
231                         break;
232                 };
233         }
234 }
235
236 static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 offset,
237                   jit_code_t code, jit_code_t code2, u32 link, bool unconditional, bool bz)
238 {
239         struct regcache *reg_cache = state->reg_cache;
240         struct native_register *regs_backup;
241         jit_state_t *_jit = block->_jit;
242         struct lightrec_branch *branch;
243         const struct opcode *op = &block->opcode_list[offset],
244                             *ds = get_delay_slot(block->opcode_list, offset);
245         jit_node_t *addr;
246         bool is_forward = (s16)op->i.imm >= 0;
247         int op_cycles = lightrec_cycles_of_opcode(op->c);
248         u32 target_offset, cycles = state->cycles + op_cycles;
249         bool no_indirection = false;
250         u32 next_pc;
251         u8 rs, rt;
252
253         jit_note(__FILE__, __LINE__);
254
255         if (!op_flag_no_ds(op->flags))
256                 cycles += lightrec_cycles_of_opcode(ds->c);
257
258         state->cycles = -op_cycles;
259
260         if (!unconditional) {
261                 rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs, REG_EXT);
262                 rt = bz ? 0 : lightrec_alloc_reg_in(reg_cache,
263                                                     _jit, op->i.rt, REG_EXT);
264
265                 /* Unload dead registers before evaluating the branch */
266                 if (OPT_EARLY_UNLOAD)
267                         lightrec_do_early_unload(state, block, offset);
268
269                 if (op_flag_local_branch(op->flags) &&
270                     (op_flag_no_ds(op->flags) || !ds->opcode) &&
271                     is_forward && !lightrec_has_dirty_regs(reg_cache))
272                         no_indirection = true;
273
274                 if (no_indirection)
275                         pr_debug("Using no indirection for branch at offset 0x%hx\n", offset << 2);
276         }
277
278         if (cycles)
279                 jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles);
280
281         if (!unconditional) {
282                 /* Generate the branch opcode */
283                 if (!no_indirection)
284                         addr = jit_new_node_pww(code, NULL, rs, rt);
285
286                 lightrec_free_regs(reg_cache);
287                 regs_backup = lightrec_regcache_enter_branch(reg_cache);
288         }
289
290         if (op_flag_local_branch(op->flags)) {
291                 /* Recompile the delay slot */
292                 if (!op_flag_no_ds(op->flags) && ds->opcode) {
293                         /* Never handle load delays with local branches. */
294                         state->no_load_delay = true;
295                         lightrec_rec_opcode(state, block, offset + 1);
296                 }
297
298                 if (link)
299                         update_ra_register(reg_cache, _jit, 31, block->pc, link);
300
301                 /* Clean remaining registers */
302                 lightrec_clean_regs(reg_cache, _jit);
303
304                 target_offset = offset + 1 + (s16)op->i.imm
305                         - !!op_flag_no_ds(op->flags);
306                 pr_debug("Adding local branch to offset 0x%x\n",
307                          target_offset << 2);
308                 branch = &state->local_branches[
309                         state->nb_local_branches++];
310
311                 branch->target = target_offset;
312
313                 if (no_indirection)
314                         branch->branch = jit_new_node_pww(code2, NULL, rs, rt);
315                 else if (is_forward)
316                         branch->branch = jit_b();
317                 else
318                         branch->branch = jit_bgti(LIGHTREC_REG_CYCLE, 0);
319         }
320
321         if (!op_flag_local_branch(op->flags) || !is_forward) {
322                 next_pc = get_branch_pc(block, offset, 1 + (s16)op->i.imm);
323                 state->no_load_delay = op_flag_local_branch(op->flags);
324                 lightrec_emit_end_of_block(state, block, offset, -1, next_pc,
325                                            31, link, false);
326         }
327
328         if (!unconditional) {
329                 if (!no_indirection)
330                         jit_patch(addr);
331
332                 lightrec_regcache_leave_branch(reg_cache, regs_backup);
333
334                 if (bz && link)
335                         update_ra_register(reg_cache, _jit, 31, block->pc, link);
336
337                 if (!op_flag_no_ds(op->flags) && ds->opcode) {
338                         state->no_load_delay = true;
339                         lightrec_rec_opcode(state, block, offset + 1);
340                 }
341         }
342 }
343
344 static void rec_BNE(struct lightrec_cstate *state,
345                     const struct block *block, u16 offset)
346 {
347         union code c = block->opcode_list[offset].c;
348
349         _jit_name(block->_jit, __func__);
350
351         if (c.i.rt == 0)
352                 rec_b(state, block, offset, jit_code_beqi, jit_code_bnei, 0, false, true);
353         else
354                 rec_b(state, block, offset, jit_code_beqr, jit_code_bner, 0, false, false);
355 }
356
357 static void rec_BEQ(struct lightrec_cstate *state,
358                     const struct block *block, u16 offset)
359 {
360         union code c = block->opcode_list[offset].c;
361
362         _jit_name(block->_jit, __func__);
363
364         if (c.i.rt == 0)
365                 rec_b(state, block, offset, jit_code_bnei, jit_code_beqi, 0, c.i.rs == 0, true);
366         else
367                 rec_b(state, block, offset, jit_code_bner, jit_code_beqr, 0, c.i.rs == c.i.rt, false);
368 }
369
370 static void rec_BLEZ(struct lightrec_cstate *state,
371                      const struct block *block, u16 offset)
372 {
373         union code c = block->opcode_list[offset].c;
374
375         _jit_name(block->_jit, __func__);
376         rec_b(state, block, offset, jit_code_bgti, jit_code_blei, 0, c.i.rs == 0, true);
377 }
378
379 static void rec_BGTZ(struct lightrec_cstate *state,
380                      const struct block *block, u16 offset)
381 {
382         _jit_name(block->_jit, __func__);
383         rec_b(state, block, offset, jit_code_blei, jit_code_bgti, 0, false, true);
384 }
385
386 static void rec_regimm_BLTZ(struct lightrec_cstate *state,
387                             const struct block *block, u16 offset)
388 {
389         _jit_name(block->_jit, __func__);
390         rec_b(state, block, offset, jit_code_bgei, jit_code_blti, 0, false, true);
391 }
392
393 static void rec_regimm_BLTZAL(struct lightrec_cstate *state,
394                               const struct block *block, u16 offset)
395 {
396         _jit_name(block->_jit, __func__);
397         rec_b(state, block, offset, jit_code_bgei, jit_code_blti,
398               get_branch_pc(block, offset, 2), false, true);
399 }
400
401 static void rec_regimm_BGEZ(struct lightrec_cstate *state,
402                             const struct block *block, u16 offset)
403 {
404         union code c = block->opcode_list[offset].c;
405
406         _jit_name(block->_jit, __func__);
407         rec_b(state, block, offset, jit_code_blti, jit_code_bgei, 0, !c.i.rs, true);
408 }
409
410 static void rec_regimm_BGEZAL(struct lightrec_cstate *state,
411                               const struct block *block, u16 offset)
412 {
413         const struct opcode *op = &block->opcode_list[offset];
414         _jit_name(block->_jit, __func__);
415         rec_b(state, block, offset, jit_code_blti, jit_code_bgei,
416               get_branch_pc(block, offset, 2),
417               !op->i.rs, true);
418 }
419
420 static void rec_alu_imm(struct lightrec_cstate *state, const struct block *block,
421                         u16 offset, jit_code_t code, bool slti)
422 {
423         struct regcache *reg_cache = state->reg_cache;
424         union code c = block->opcode_list[offset].c;
425         jit_state_t *_jit = block->_jit;
426         u8 rs, rt, out_flags = REG_EXT;
427
428         if (slti)
429                 out_flags |= REG_ZEXT;
430
431         jit_note(__FILE__, __LINE__);
432         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, REG_EXT);
433         rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, out_flags);
434
435         jit_new_node_www(code, rt, rs, (s32)(s16) c.i.imm);
436
437         lightrec_free_reg(reg_cache, rs);
438         lightrec_free_reg(reg_cache, rt);
439 }
440
441 static void rec_alu_special(struct lightrec_cstate *state, const struct block *block,
442                             u16 offset, jit_code_t code, bool out_ext)
443 {
444         struct regcache *reg_cache = state->reg_cache;
445         union code c = block->opcode_list[offset].c;
446         jit_state_t *_jit = block->_jit;
447         u8 rd, rt, rs;
448
449         jit_note(__FILE__, __LINE__);
450         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, REG_EXT);
451         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, REG_EXT);
452         rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd,
453                                     out_ext ? REG_EXT | REG_ZEXT : 0);
454
455         jit_new_node_www(code, rd, rs, rt);
456
457         lightrec_free_reg(reg_cache, rs);
458         lightrec_free_reg(reg_cache, rt);
459         lightrec_free_reg(reg_cache, rd);
460 }
461
462 static void rec_alu_shiftv(struct lightrec_cstate *state, const struct block *block,
463                            u16 offset, jit_code_t code)
464 {
465         struct regcache *reg_cache = state->reg_cache;
466         union code c = block->opcode_list[offset].c;
467         jit_state_t *_jit = block->_jit;
468         u8 rd, rt, rs, temp, flags = 0;
469
470         jit_note(__FILE__, __LINE__);
471         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
472
473         if (code == jit_code_rshr)
474                 flags = REG_EXT;
475         else if (code == jit_code_rshr_u)
476                 flags = REG_ZEXT;
477
478         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, flags);
479         rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, flags);
480
481         if (rs != rd && rt != rd) {
482                 jit_andi(rd, rs, 0x1f);
483                 jit_new_node_www(code, rd, rt, rd);
484         } else {
485                 temp = lightrec_alloc_reg_temp(reg_cache, _jit);
486                 jit_andi(temp, rs, 0x1f);
487                 jit_new_node_www(code, rd, rt, temp);
488                 lightrec_free_reg(reg_cache, temp);
489         }
490
491         lightrec_free_reg(reg_cache, rs);
492         lightrec_free_reg(reg_cache, rt);
493         lightrec_free_reg(reg_cache, rd);
494 }
495
496 static void rec_movi(struct lightrec_cstate *state,
497                      const struct block *block, u16 offset)
498 {
499         struct regcache *reg_cache = state->reg_cache;
500         union code c = block->opcode_list[offset].c;
501         jit_state_t *_jit = block->_jit;
502         u16 flags = REG_EXT;
503         u8 rt;
504
505         if (!(c.i.imm & 0x8000))
506                 flags |= REG_ZEXT;
507
508         rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags);
509
510         jit_movi(rt, (s32)(s16) c.i.imm);
511
512         lightrec_free_reg(reg_cache, rt);
513 }
514
515 static void rec_ADDIU(struct lightrec_cstate *state,
516                       const struct block *block, u16 offset)
517 {
518         _jit_name(block->_jit, __func__);
519
520         if (block->opcode_list[offset].c.i.rs)
521                 rec_alu_imm(state, block, offset, jit_code_addi, false);
522         else
523                 rec_movi(state, block, offset);
524 }
525
526 static void rec_ADDI(struct lightrec_cstate *state,
527                      const struct block *block, u16 offset)
528 {
529         /* TODO: Handle the exception? */
530         _jit_name(block->_jit, __func__);
531         rec_ADDIU(state, block, offset);
532 }
533
534 static void rec_SLTIU(struct lightrec_cstate *state,
535                       const struct block *block, u16 offset)
536 {
537         _jit_name(block->_jit, __func__);
538         rec_alu_imm(state, block, offset, jit_code_lti_u, true);
539 }
540
541 static void rec_SLTI(struct lightrec_cstate *state,
542                      const struct block *block, u16 offset)
543 {
544         _jit_name(block->_jit, __func__);
545         rec_alu_imm(state, block, offset, jit_code_lti, true);
546 }
547
548 static void rec_ANDI(struct lightrec_cstate *state,
549                      const struct block *block, u16 offset)
550 {
551         struct regcache *reg_cache = state->reg_cache;
552         union code c = block->opcode_list[offset].c;
553         jit_state_t *_jit = block->_jit;
554         u8 rs, rt;
555
556         _jit_name(block->_jit, __func__);
557         jit_note(__FILE__, __LINE__);
558         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
559         rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt,
560                                     REG_EXT | REG_ZEXT);
561
562         /* PSX code uses ANDI 0xff / ANDI 0xffff a lot, which are basically
563          * casts to uint8_t / uint16_t. */
564         if (c.i.imm == 0xff)
565                 jit_extr_uc(rt, rs);
566         else if (c.i.imm == 0xffff)
567                 jit_extr_us(rt, rs);
568         else
569                 jit_andi(rt, rs, (u32)(u16) c.i.imm);
570
571         lightrec_free_reg(reg_cache, rs);
572         lightrec_free_reg(reg_cache, rt);
573 }
574
575 static void rec_alu_or_xor(struct lightrec_cstate *state, const struct block *block,
576                            u16 offset, jit_code_t code)
577 {
578         struct regcache *reg_cache = state->reg_cache;
579         union code c = block->opcode_list[offset].c;
580         jit_state_t *_jit = block->_jit;
581         u8 rs, rt, flags;
582
583         jit_note(__FILE__, __LINE__);
584         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
585         rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, 0);
586
587         flags = lightrec_get_reg_in_flags(reg_cache, rs);
588         lightrec_set_reg_out_flags(reg_cache, rt, flags);
589
590         jit_new_node_www(code, rt, rs, (u32)(u16) c.i.imm);
591
592         lightrec_free_reg(reg_cache, rs);
593         lightrec_free_reg(reg_cache, rt);
594 }
595
596
597 static void rec_ORI(struct lightrec_cstate *state,
598                     const struct block *block, u16 offset)
599 {
600         _jit_name(block->_jit, __func__);
601         rec_alu_or_xor(state, block, offset, jit_code_ori);
602 }
603
604 static void rec_XORI(struct lightrec_cstate *state,
605                      const struct block *block, u16 offset)
606 {
607         _jit_name(block->_jit, __func__);
608         rec_alu_or_xor(state, block, offset, jit_code_xori);
609 }
610
611 static void rec_LUI(struct lightrec_cstate *state,
612                     const struct block *block, u16 offset)
613 {
614         struct regcache *reg_cache = state->reg_cache;
615         union code c = block->opcode_list[offset].c;
616         jit_state_t *_jit = block->_jit;
617         u8 rt, flags = REG_EXT;
618
619         jit_name(__func__);
620         jit_note(__FILE__, __LINE__);
621
622         if (!(c.i.imm & BIT(15)))
623                 flags |= REG_ZEXT;
624
625         rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags);
626
627         jit_movi(rt, (s32)(c.i.imm << 16));
628
629         lightrec_free_reg(reg_cache, rt);
630 }
631
632 static void rec_special_ADDU(struct lightrec_cstate *state,
633                              const struct block *block, u16 offset)
634 {
635         _jit_name(block->_jit, __func__);
636         rec_alu_special(state, block, offset, jit_code_addr, false);
637 }
638
639 static void rec_special_ADD(struct lightrec_cstate *state,
640                             const struct block *block, u16 offset)
641 {
642         /* TODO: Handle the exception? */
643         _jit_name(block->_jit, __func__);
644         rec_alu_special(state, block, offset, jit_code_addr, false);
645 }
646
647 static void rec_special_SUBU(struct lightrec_cstate *state,
648                              const struct block *block, u16 offset)
649 {
650         _jit_name(block->_jit, __func__);
651         rec_alu_special(state, block, offset, jit_code_subr, false);
652 }
653
654 static void rec_special_SUB(struct lightrec_cstate *state,
655                             const struct block *block, u16 offset)
656 {
657         /* TODO: Handle the exception? */
658         _jit_name(block->_jit, __func__);
659         rec_alu_special(state, block, offset, jit_code_subr, false);
660 }
661
662 static void rec_special_AND(struct lightrec_cstate *state,
663                             const struct block *block, u16 offset)
664 {
665         struct regcache *reg_cache = state->reg_cache;
666         union code c = block->opcode_list[offset].c;
667         jit_state_t *_jit = block->_jit;
668         u8 rd, rt, rs, flags_rs, flags_rt, flags_rd;
669
670         _jit_name(block->_jit, __func__);
671         jit_note(__FILE__, __LINE__);
672         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
673         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0);
674         rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, 0);
675
676         flags_rs = lightrec_get_reg_in_flags(reg_cache, rs);
677         flags_rt = lightrec_get_reg_in_flags(reg_cache, rt);
678
679         /* Z(rd) = Z(rs) | Z(rt) */
680         flags_rd = REG_ZEXT & (flags_rs | flags_rt);
681
682         /* E(rd) = (E(rt) & Z(rt)) | (E(rs) & Z(rs)) | (E(rs) & E(rt)) */
683         if (((flags_rs & REG_EXT) && (flags_rt & REG_ZEXT)) ||
684             ((flags_rt & REG_EXT) && (flags_rs & REG_ZEXT)) ||
685             (REG_EXT & flags_rs & flags_rt))
686                 flags_rd |= REG_EXT;
687
688         lightrec_set_reg_out_flags(reg_cache, rd, flags_rd);
689
690         jit_andr(rd, rs, rt);
691
692         lightrec_free_reg(reg_cache, rs);
693         lightrec_free_reg(reg_cache, rt);
694         lightrec_free_reg(reg_cache, rd);
695 }
696
697 static void rec_special_or_nor(struct lightrec_cstate *state,
698                                const struct block *block, u16 offset, bool nor)
699 {
700         struct regcache *reg_cache = state->reg_cache;
701         union code c = block->opcode_list[offset].c;
702         jit_state_t *_jit = block->_jit;
703         u8 rd, rt, rs, flags_rs, flags_rt, flags_rd = 0;
704
705         jit_note(__FILE__, __LINE__);
706         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
707         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0);
708         rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, 0);
709
710         flags_rs = lightrec_get_reg_in_flags(reg_cache, rs);
711         flags_rt = lightrec_get_reg_in_flags(reg_cache, rt);
712
713         /* or: Z(rd) = Z(rs) & Z(rt)
714          * nor: Z(rd) = 0 */
715         if (!nor)
716                 flags_rd = REG_ZEXT & flags_rs & flags_rt;
717
718         /* E(rd) = E(rs) & E(rt) */
719         if (REG_EXT & flags_rs & flags_rt)
720                 flags_rd |= REG_EXT;
721
722         lightrec_set_reg_out_flags(reg_cache, rd, flags_rd);
723
724         jit_orr(rd, rs, rt);
725
726         if (nor)
727                 jit_comr(rd, rd);
728
729         lightrec_free_reg(reg_cache, rs);
730         lightrec_free_reg(reg_cache, rt);
731         lightrec_free_reg(reg_cache, rd);
732 }
733
734 static void rec_special_OR(struct lightrec_cstate *state,
735                            const struct block *block, u16 offset)
736 {
737         _jit_name(block->_jit, __func__);
738         rec_special_or_nor(state, block, offset, false);
739 }
740
741 static void rec_special_NOR(struct lightrec_cstate *state,
742                             const struct block *block, u16 offset)
743 {
744         _jit_name(block->_jit, __func__);
745         rec_special_or_nor(state, block, offset, true);
746 }
747
748 static void rec_special_XOR(struct lightrec_cstate *state,
749                             const struct block *block, u16 offset)
750 {
751         struct regcache *reg_cache = state->reg_cache;
752         union code c = block->opcode_list[offset].c;
753         jit_state_t *_jit = block->_jit;
754         u8 rd, rt, rs, flags_rs, flags_rt, flags_rd;
755
756         _jit_name(block->_jit, __func__);
757
758         jit_note(__FILE__, __LINE__);
759         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
760         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0);
761         rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, 0);
762
763         flags_rs = lightrec_get_reg_in_flags(reg_cache, rs);
764         flags_rt = lightrec_get_reg_in_flags(reg_cache, rt);
765
766         /* Z(rd) = Z(rs) & Z(rt) */
767         flags_rd = REG_ZEXT & flags_rs & flags_rt;
768
769         /* E(rd) = E(rs) & E(rt) */
770         flags_rd |= REG_EXT & flags_rs & flags_rt;
771
772         lightrec_set_reg_out_flags(reg_cache, rd, flags_rd);
773
774         jit_xorr(rd, rs, rt);
775
776         lightrec_free_reg(reg_cache, rs);
777         lightrec_free_reg(reg_cache, rt);
778         lightrec_free_reg(reg_cache, rd);
779 }
780
781 static void rec_special_SLTU(struct lightrec_cstate *state,
782                              const struct block *block, u16 offset)
783 {
784         _jit_name(block->_jit, __func__);
785         rec_alu_special(state, block, offset, jit_code_ltr_u, true);
786 }
787
788 static void rec_special_SLT(struct lightrec_cstate *state,
789                             const struct block *block, u16 offset)
790 {
791         _jit_name(block->_jit, __func__);
792         rec_alu_special(state, block, offset, jit_code_ltr, true);
793 }
794
795 static void rec_special_SLLV(struct lightrec_cstate *state,
796                              const struct block *block, u16 offset)
797 {
798         _jit_name(block->_jit, __func__);
799         rec_alu_shiftv(state, block, offset, jit_code_lshr);
800 }
801
802 static void rec_special_SRLV(struct lightrec_cstate *state,
803                              const struct block *block, u16 offset)
804 {
805         _jit_name(block->_jit, __func__);
806         rec_alu_shiftv(state, block, offset, jit_code_rshr_u);
807 }
808
809 static void rec_special_SRAV(struct lightrec_cstate *state,
810                              const struct block *block, u16 offset)
811 {
812         _jit_name(block->_jit, __func__);
813         rec_alu_shiftv(state, block, offset, jit_code_rshr);
814 }
815
816 static void rec_alu_shift(struct lightrec_cstate *state, const struct block *block,
817                           u16 offset, jit_code_t code)
818 {
819         struct regcache *reg_cache = state->reg_cache;
820         union code c = block->opcode_list[offset].c;
821         jit_state_t *_jit = block->_jit;
822         u8 rd, rt, flags = 0;
823
824         jit_note(__FILE__, __LINE__);
825
826         if (code == jit_code_rshi)
827                 flags = REG_EXT;
828         else if (code == jit_code_rshi_u)
829                 flags = REG_ZEXT;
830
831         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, flags);
832
833         /* Input reg is zero-extended, if we SRL at least by one bit, we know
834          * the output reg will be both zero-extended and sign-extended. */
835         if (code == jit_code_rshi_u && c.r.imm)
836                 flags |= REG_EXT;
837         rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, flags);
838
839         jit_new_node_www(code, rd, rt, c.r.imm);
840
841         lightrec_free_reg(reg_cache, rt);
842         lightrec_free_reg(reg_cache, rd);
843 }
844
845 static void rec_special_SLL(struct lightrec_cstate *state,
846                             const struct block *block, u16 offset)
847 {
848         _jit_name(block->_jit, __func__);
849         rec_alu_shift(state, block, offset, jit_code_lshi);
850 }
851
852 static void rec_special_SRL(struct lightrec_cstate *state,
853                             const struct block *block, u16 offset)
854 {
855         _jit_name(block->_jit, __func__);
856         rec_alu_shift(state, block, offset, jit_code_rshi_u);
857 }
858
859 static void rec_special_SRA(struct lightrec_cstate *state,
860                             const struct block *block, u16 offset)
861 {
862         _jit_name(block->_jit, __func__);
863         rec_alu_shift(state, block, offset, jit_code_rshi);
864 }
865
866 static void rec_alu_mult(struct lightrec_cstate *state,
867                          const struct block *block, u16 offset, bool is_signed)
868 {
869         struct regcache *reg_cache = state->reg_cache;
870         union code c = block->opcode_list[offset].c;
871         u32 flags = block->opcode_list[offset].flags;
872         u8 reg_lo = get_mult_div_lo(c);
873         u8 reg_hi = get_mult_div_hi(c);
874         jit_state_t *_jit = block->_jit;
875         u8 lo, hi, rs, rt, rflags = 0;
876
877         jit_note(__FILE__, __LINE__);
878
879         if (is_signed)
880                 rflags = REG_EXT;
881         else
882                 rflags = REG_ZEXT;
883
884         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, rflags);
885         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, rflags);
886
887         if (!op_flag_no_lo(flags))
888                 lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0);
889         else if (__WORDSIZE == 32)
890                 lo = lightrec_alloc_reg_temp(reg_cache, _jit);
891
892         if (!op_flag_no_hi(flags))
893                 hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, REG_EXT);
894
895         if (__WORDSIZE == 32) {
896                 /* On 32-bit systems, do a 32*32->64 bit operation, or a 32*32->32 bit
897                  * operation if the MULT was detected a 32-bit only. */
898                 if (!op_flag_no_hi(flags)) {
899                         if (is_signed)
900                                 jit_qmulr(lo, hi, rs, rt);
901                         else
902                                 jit_qmulr_u(lo, hi, rs, rt);
903                 } else {
904                         jit_mulr(lo, rs, rt);
905                 }
906         } else {
907                 /* On 64-bit systems, do a 64*64->64 bit operation. */
908                 if (op_flag_no_lo(flags)) {
909                         jit_mulr(hi, rs, rt);
910                         jit_rshi(hi, hi, 32);
911                 } else {
912                         jit_mulr(lo, rs, rt);
913
914                         /* The 64-bit output value is in $lo, store the upper 32 bits in $hi */
915                         if (!op_flag_no_hi(flags))
916                                 jit_rshi(hi, lo, 32);
917                 }
918         }
919
920         lightrec_free_reg(reg_cache, rs);
921         lightrec_free_reg(reg_cache, rt);
922         if (!op_flag_no_lo(flags) || __WORDSIZE == 32)
923                 lightrec_free_reg(reg_cache, lo);
924         if (!op_flag_no_hi(flags))
925                 lightrec_free_reg(reg_cache, hi);
926 }
927
928 static void rec_alu_div(struct lightrec_cstate *state,
929                         const struct block *block, u16 offset, bool is_signed)
930 {
931         struct regcache *reg_cache = state->reg_cache;
932         union code c = block->opcode_list[offset].c;
933         u32 flags = block->opcode_list[offset].flags;
934         bool no_check = op_flag_no_div_check(flags);
935         u8 reg_lo = get_mult_div_lo(c);
936         u8 reg_hi = get_mult_div_hi(c);
937         jit_state_t *_jit = block->_jit;
938         jit_node_t *branch, *to_end;
939         u8 lo = 0, hi = 0, rs, rt, rflags = 0;
940
941         jit_note(__FILE__, __LINE__);
942
943         if (is_signed)
944                 rflags = REG_EXT;
945         else
946                 rflags = REG_ZEXT;
947
948         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, rflags);
949         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, rflags);
950
951         if (!op_flag_no_lo(flags))
952                 lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0);
953
954         if (!op_flag_no_hi(flags))
955                 hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, 0);
956
957         /* Jump to special handler if dividing by zero  */
958         if (!no_check)
959                 branch = jit_beqi(rt, 0);
960
961         if (op_flag_no_lo(flags)) {
962                 if (is_signed)
963                         jit_remr(hi, rs, rt);
964                 else
965                         jit_remr_u(hi, rs, rt);
966         } else if (op_flag_no_hi(flags)) {
967                 if (is_signed)
968                         jit_divr(lo, rs, rt);
969                 else
970                         jit_divr_u(lo, rs, rt);
971         } else {
972                 if (is_signed)
973                         jit_qdivr(lo, hi, rs, rt);
974                 else
975                         jit_qdivr_u(lo, hi, rs, rt);
976         }
977
978         if (!no_check) {
979                 /* Jump above the div-by-zero handler */
980                 to_end = jit_b();
981
982                 jit_patch(branch);
983
984                 if (!op_flag_no_lo(flags)) {
985                         if (is_signed) {
986                                 jit_ltr(lo, rs, rt);
987                                 jit_lshi(lo, lo, 1);
988                                 jit_subi(lo, lo, 1);
989                         } else {
990                                 jit_subi(lo, rt, 1);
991                         }
992                 }
993
994                 if (!op_flag_no_hi(flags))
995                         jit_movr(hi, rs);
996
997                 jit_patch(to_end);
998         }
999
1000         lightrec_free_reg(reg_cache, rs);
1001         lightrec_free_reg(reg_cache, rt);
1002
1003         if (!op_flag_no_lo(flags))
1004                 lightrec_free_reg(reg_cache, lo);
1005
1006         if (!op_flag_no_hi(flags))
1007                 lightrec_free_reg(reg_cache, hi);
1008 }
1009
1010 static void rec_special_MULT(struct lightrec_cstate *state,
1011                              const struct block *block, u16 offset)
1012 {
1013         _jit_name(block->_jit, __func__);
1014         rec_alu_mult(state, block, offset, true);
1015 }
1016
1017 static void rec_special_MULTU(struct lightrec_cstate *state,
1018                               const struct block *block, u16 offset)
1019 {
1020         _jit_name(block->_jit, __func__);
1021         rec_alu_mult(state, block, offset, false);
1022 }
1023
1024 static void rec_special_DIV(struct lightrec_cstate *state,
1025                             const struct block *block, u16 offset)
1026 {
1027         _jit_name(block->_jit, __func__);
1028         rec_alu_div(state, block, offset, true);
1029 }
1030
1031 static void rec_special_DIVU(struct lightrec_cstate *state,
1032                              const struct block *block, u16 offset)
1033 {
1034         _jit_name(block->_jit, __func__);
1035         rec_alu_div(state, block, offset, false);
1036 }
1037
1038 static void rec_alu_mv_lo_hi(struct lightrec_cstate *state,
1039                              const struct block *block, u8 dst, u8 src)
1040 {
1041         struct regcache *reg_cache = state->reg_cache;
1042         jit_state_t *_jit = block->_jit;
1043
1044         jit_note(__FILE__, __LINE__);
1045         src = lightrec_alloc_reg_in(reg_cache, _jit, src, 0);
1046         dst = lightrec_alloc_reg_out(reg_cache, _jit, dst, REG_EXT);
1047
1048         jit_extr_i(dst, src);
1049
1050         lightrec_free_reg(reg_cache, src);
1051         lightrec_free_reg(reg_cache, dst);
1052 }
1053
1054 static void rec_special_MFHI(struct lightrec_cstate *state,
1055                              const struct block *block, u16 offset)
1056 {
1057         union code c = block->opcode_list[offset].c;
1058
1059         _jit_name(block->_jit, __func__);
1060         rec_alu_mv_lo_hi(state, block, c.r.rd, REG_HI);
1061 }
1062
1063 static void rec_special_MTHI(struct lightrec_cstate *state,
1064                              const struct block *block, u16 offset)
1065 {
1066         union code c = block->opcode_list[offset].c;
1067
1068         _jit_name(block->_jit, __func__);
1069         rec_alu_mv_lo_hi(state, block, REG_HI, c.r.rs);
1070 }
1071
1072 static void rec_special_MFLO(struct lightrec_cstate *state,
1073                              const struct block *block, u16 offset)
1074 {
1075         union code c = block->opcode_list[offset].c;
1076
1077         _jit_name(block->_jit, __func__);
1078         rec_alu_mv_lo_hi(state, block, c.r.rd, REG_LO);
1079 }
1080
1081 static void rec_special_MTLO(struct lightrec_cstate *state,
1082                              const struct block *block, u16 offset)
1083 {
1084         union code c = block->opcode_list[offset].c;
1085
1086         _jit_name(block->_jit, __func__);
1087         rec_alu_mv_lo_hi(state, block, REG_LO, c.r.rs);
1088 }
1089
1090 static void call_to_c_wrapper(struct lightrec_cstate *state,
1091                               const struct block *block, u32 arg,
1092                               enum c_wrappers wrapper)
1093 {
1094         struct regcache *reg_cache = state->reg_cache;
1095         jit_state_t *_jit = block->_jit;
1096         s8 tmp, tmp2;
1097
1098         /* Make sure JIT_R1 is not mapped; it will be used in the C wrapper. */
1099         tmp2 = lightrec_alloc_reg(reg_cache, _jit, JIT_R1);
1100
1101         tmp = lightrec_get_reg_with_value(reg_cache,
1102                                           (intptr_t) state->state->wrappers_eps[wrapper]);
1103         if (tmp < 0) {
1104                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1105                 jit_ldxi(tmp, LIGHTREC_REG_STATE,
1106                          offsetof(struct lightrec_state, wrappers_eps[wrapper]));
1107
1108                 lightrec_temp_set_value(reg_cache, tmp,
1109                                         (intptr_t) state->state->wrappers_eps[wrapper]);
1110         }
1111
1112         lightrec_free_reg(reg_cache, tmp2);
1113
1114 #ifdef __mips__
1115         /* On MIPS, register t9 is always used as the target register for JALR.
1116          * Therefore if it does not contain the target address we must
1117          * invalidate it. */
1118         if (tmp != _T9)
1119                 lightrec_unload_reg(reg_cache, _jit, _T9);
1120 #endif
1121
1122         jit_prepare();
1123         jit_pushargi(arg);
1124
1125         lightrec_regcache_mark_live(reg_cache, _jit);
1126         jit_callr(tmp);
1127
1128         lightrec_free_reg(reg_cache, tmp);
1129         lightrec_regcache_mark_live(reg_cache, _jit);
1130 }
1131
1132 static void rec_io(struct lightrec_cstate *state,
1133                    const struct block *block, u16 offset,
1134                    bool load_rt, bool read_rt)
1135 {
1136         struct regcache *reg_cache = state->reg_cache;
1137         jit_state_t *_jit = block->_jit;
1138         union code c = block->opcode_list[offset].c;
1139         u32 flags = block->opcode_list[offset].flags;
1140         bool is_tagged = LIGHTREC_FLAGS_GET_IO_MODE(flags);
1141         u32 lut_entry;
1142         u8 zero;
1143
1144         jit_note(__FILE__, __LINE__);
1145
1146         lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rs, false);
1147
1148         if (read_rt && likely(c.i.rt))
1149                 lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, true);
1150         else if (load_rt)
1151                 lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, false);
1152
1153         if (op_flag_load_delay(flags) && !state->no_load_delay) {
1154                 /* Clear state->in_delay_slot_n. This notifies the lightrec_rw
1155                  * wrapper that it should write the REG_TEMP register instead of
1156                  * the actual output register of the opcode. */
1157                 zero = lightrec_alloc_reg_in(reg_cache, _jit, 0, 0);
1158                 jit_stxi_c(offsetof(struct lightrec_state, in_delay_slot_n),
1159                             LIGHTREC_REG_STATE, zero);
1160                 lightrec_free_reg(reg_cache, zero);
1161         }
1162
1163         if (is_tagged) {
1164                 call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_RW);
1165         } else {
1166                 lut_entry = lightrec_get_lut_entry(block);
1167                 call_to_c_wrapper(state, block, (lut_entry << 16) | offset,
1168                                   C_WRAPPER_RW_GENERIC);
1169         }
1170 }
1171
1172 static u32 rec_ram_mask(struct lightrec_state *state)
1173 {
1174         return (RAM_SIZE << (state->mirrors_mapped * 2)) - 1;
1175 }
1176
1177 static u32 rec_io_mask(const struct lightrec_state *state)
1178 {
1179         u32 length = state->maps[PSX_MAP_HW_REGISTERS].length;
1180
1181         return GENMASK(31 - clz32(length - 1), 0);
1182 }
1183
1184 static void rec_store_memory(struct lightrec_cstate *cstate,
1185                              const struct block *block,
1186                              u16 offset, jit_code_t code,
1187                              jit_code_t swap_code,
1188                              uintptr_t addr_offset, u32 addr_mask,
1189                              bool invalidate)
1190 {
1191         const struct lightrec_state *state = cstate->state;
1192         struct regcache *reg_cache = cstate->reg_cache;
1193         struct opcode *op = &block->opcode_list[offset];
1194         jit_state_t *_jit = block->_jit;
1195         union code c = op->c;
1196         u8 rs, rt, tmp, tmp2, tmp3, addr_reg, addr_reg2;
1197         s16 imm = (s16)c.i.imm;
1198         s32 simm = (s32)imm << (1 - lut_is_32bit(state));
1199         s32 lut_offt = offsetof(struct lightrec_state, code_lut);
1200         bool no_mask = op_flag_no_mask(op->flags);
1201         bool add_imm = c.i.imm &&
1202                 ((!state->mirrors_mapped && !no_mask) || (invalidate &&
1203                 ((imm & 0x3) || simm + lut_offt != (s16)(simm + lut_offt))));
1204         bool need_tmp = !no_mask || addr_offset || add_imm || invalidate;
1205         bool swc2 = c.i.op == OP_SWC2;
1206         u8 in_reg = swc2 ? REG_TEMP : c.i.rt;
1207
1208         rt = lightrec_alloc_reg_in(reg_cache, _jit, in_reg, 0);
1209         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
1210         if (need_tmp)
1211                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1212
1213         addr_reg = rs;
1214
1215         if (add_imm) {
1216                 jit_addi(tmp, addr_reg, (s16)c.i.imm);
1217                 addr_reg = tmp;
1218                 imm = 0;
1219         } else if (simm) {
1220                 lut_offt += simm;
1221         }
1222
1223         if (!no_mask) {
1224                 jit_andi(tmp, addr_reg, addr_mask);
1225                 addr_reg = tmp;
1226         }
1227
1228         if (addr_offset) {
1229                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
1230                 jit_addi(tmp2, addr_reg, addr_offset);
1231                 addr_reg2 = tmp2;
1232         } else {
1233                 addr_reg2 = addr_reg;
1234         }
1235
1236         if (is_big_endian() && swap_code && in_reg) {
1237                 tmp3 = lightrec_alloc_reg_temp(reg_cache, _jit);
1238
1239                 jit_new_node_ww(swap_code, tmp3, rt);
1240                 jit_new_node_www(code, imm, addr_reg2, tmp3);
1241
1242                 lightrec_free_reg(reg_cache, tmp3);
1243         } else {
1244                 jit_new_node_www(code, imm, addr_reg2, rt);
1245         }
1246
1247         lightrec_free_reg(reg_cache, rt);
1248
1249         if (invalidate) {
1250                 tmp3 = lightrec_alloc_reg_in(reg_cache, _jit, 0, 0);
1251
1252                 if (c.i.op != OP_SW) {
1253                         jit_andi(tmp, addr_reg, ~3);
1254                         addr_reg = tmp;
1255                 }
1256
1257                 if (!lut_is_32bit(state)) {
1258                         jit_lshi(tmp, addr_reg, 1);
1259                         addr_reg = tmp;
1260                 }
1261
1262                 if (addr_reg == rs && c.i.rs == 0) {
1263                         addr_reg = LIGHTREC_REG_STATE;
1264                 } else {
1265                         jit_add_state(tmp, addr_reg);
1266                         addr_reg = tmp;
1267                 }
1268
1269                 if (lut_is_32bit(state))
1270                         jit_stxi_i(lut_offt, addr_reg, tmp3);
1271                 else
1272                         jit_stxi(lut_offt, addr_reg, tmp3);
1273
1274                 lightrec_free_reg(reg_cache, tmp3);
1275         }
1276
1277         if (addr_offset)
1278                 lightrec_free_reg(reg_cache, tmp2);
1279         if (need_tmp)
1280                 lightrec_free_reg(reg_cache, tmp);
1281         lightrec_free_reg(reg_cache, rs);
1282 }
1283
1284 static void rec_store_ram(struct lightrec_cstate *cstate,
1285                           const struct block *block,
1286                           u16 offset, jit_code_t code,
1287                           jit_code_t swap_code, bool invalidate)
1288 {
1289         struct lightrec_state *state = cstate->state;
1290
1291         _jit_note(block->_jit, __FILE__, __LINE__);
1292
1293         return rec_store_memory(cstate, block, offset, code, swap_code,
1294                                 state->offset_ram, rec_ram_mask(state),
1295                                 invalidate);
1296 }
1297
1298 static void rec_store_scratch(struct lightrec_cstate *cstate,
1299                               const struct block *block, u16 offset,
1300                               jit_code_t code, jit_code_t swap_code)
1301 {
1302         _jit_note(block->_jit, __FILE__, __LINE__);
1303
1304         return rec_store_memory(cstate, block, offset, code, swap_code,
1305                                 cstate->state->offset_scratch,
1306                                 0x1fffffff, false);
1307 }
1308
1309 static void rec_store_io(struct lightrec_cstate *cstate,
1310                          const struct block *block, u16 offset,
1311                          jit_code_t code, jit_code_t swap_code)
1312 {
1313         _jit_note(block->_jit, __FILE__, __LINE__);
1314
1315         return rec_store_memory(cstate, block, offset, code, swap_code,
1316                                 cstate->state->offset_io,
1317                                 rec_io_mask(cstate->state), false);
1318 }
1319
1320 static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate,
1321                                            const struct block *block,
1322                                            u16 offset, jit_code_t code,
1323                                            jit_code_t swap_code)
1324 {
1325         struct lightrec_state *state = cstate->state;
1326         struct regcache *reg_cache = cstate->reg_cache;
1327         union code c = block->opcode_list[offset].c;
1328         jit_state_t *_jit = block->_jit;
1329         jit_node_t *to_not_ram, *to_end;
1330         bool swc2 = c.i.op == OP_SWC2;
1331         bool offset_ram_or_scratch = state->offset_ram || state->offset_scratch;
1332         u8 tmp, tmp2, rs, rt, in_reg = swc2 ? REG_TEMP : c.i.rt;
1333         s16 imm;
1334
1335         jit_note(__FILE__, __LINE__);
1336         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
1337         tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1338
1339         if (offset_ram_or_scratch)
1340                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
1341
1342         /* Convert to KUNSEG and avoid RAM mirrors */
1343         if (state->mirrors_mapped) {
1344                 imm = (s16)c.i.imm;
1345                 jit_andi(tmp, rs, 0x1f800000 | (4 * RAM_SIZE - 1));
1346         } else if (c.i.imm) {
1347                 imm = 0;
1348                 jit_addi(tmp, rs, (s16)c.i.imm);
1349                 jit_andi(tmp, tmp, 0x1f800000 | (RAM_SIZE - 1));
1350         } else {
1351                 imm = 0;
1352                 jit_andi(tmp, rs, 0x1f800000 | (RAM_SIZE - 1));
1353         }
1354
1355         lightrec_free_reg(reg_cache, rs);
1356
1357         if (state->offset_ram != state->offset_scratch) {
1358                 to_not_ram = jit_bmsi(tmp, BIT(28));
1359
1360                 jit_movi(tmp2, state->offset_ram);
1361
1362                 to_end = jit_b();
1363                 jit_patch(to_not_ram);
1364
1365                 jit_movi(tmp2, state->offset_scratch);
1366                 jit_patch(to_end);
1367         } else if (state->offset_ram) {
1368                 jit_movi(tmp2, state->offset_ram);
1369         }
1370
1371         if (offset_ram_or_scratch) {
1372                 jit_addr(tmp, tmp, tmp2);
1373                 lightrec_free_reg(reg_cache, tmp2);
1374         }
1375
1376         rt = lightrec_alloc_reg_in(reg_cache, _jit, in_reg, 0);
1377
1378         if (is_big_endian() && swap_code && in_reg) {
1379                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
1380
1381                 jit_new_node_ww(swap_code, tmp2, rt);
1382                 jit_new_node_www(code, imm, tmp, tmp2);
1383
1384                 lightrec_free_reg(reg_cache, tmp2);
1385         } else {
1386                 jit_new_node_www(code, imm, tmp, rt);
1387         }
1388
1389         lightrec_free_reg(reg_cache, rt);
1390         lightrec_free_reg(reg_cache, tmp);
1391 }
1392
1393 static void rec_store_direct(struct lightrec_cstate *cstate, const struct block *block,
1394                              u16 offset, jit_code_t code, jit_code_t swap_code)
1395 {
1396         struct lightrec_state *state = cstate->state;
1397         u32 ram_size = state->mirrors_mapped ? RAM_SIZE * 4 : RAM_SIZE;
1398         struct regcache *reg_cache = cstate->reg_cache;
1399         union code c = block->opcode_list[offset].c;
1400         jit_state_t *_jit = block->_jit;
1401         jit_node_t *to_not_ram, *to_end;
1402         bool swc2 = c.i.op == OP_SWC2;
1403         u8 tmp, tmp2, tmp3, masked_reg, rs, rt;
1404         u8 in_reg = swc2 ? REG_TEMP : c.i.rt;
1405
1406         jit_note(__FILE__, __LINE__);
1407
1408         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
1409         tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
1410         tmp3 = lightrec_alloc_reg_in(reg_cache, _jit, 0, 0);
1411
1412         /* Convert to KUNSEG and avoid RAM mirrors */
1413         if (c.i.imm) {
1414                 jit_addi(tmp2, rs, (s16)c.i.imm);
1415                 jit_andi(tmp2, tmp2, 0x1f800000 | (ram_size - 1));
1416         } else {
1417                 jit_andi(tmp2, rs, 0x1f800000 | (ram_size - 1));
1418         }
1419
1420         lightrec_free_reg(reg_cache, rs);
1421         tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1422
1423         if (state->offset_ram != state->offset_scratch) {
1424                 to_not_ram = jit_bgti(tmp2, ram_size);
1425                 masked_reg = tmp2;
1426         } else {
1427                 jit_lti_u(tmp, tmp2, ram_size);
1428                 jit_movnr(tmp, tmp2, tmp);
1429                 masked_reg = tmp;
1430         }
1431
1432         /* Compute the offset to the code LUT */
1433         if (c.i.op == OP_SW)
1434                 jit_andi(tmp, masked_reg, RAM_SIZE - 1);
1435         else
1436                 jit_andi(tmp, masked_reg, (RAM_SIZE - 1) & ~3);
1437
1438         if (!lut_is_32bit(state))
1439                 jit_lshi(tmp, tmp, 1);
1440         jit_add_state(tmp, tmp);
1441
1442         /* Write NULL to the code LUT to invalidate any block that's there */
1443         if (lut_is_32bit(state))
1444                 jit_stxi_i(offsetof(struct lightrec_state, code_lut), tmp, tmp3);
1445         else
1446                 jit_stxi(offsetof(struct lightrec_state, code_lut), tmp, tmp3);
1447
1448         if (state->offset_ram != state->offset_scratch) {
1449                 jit_movi(tmp, state->offset_ram);
1450
1451                 to_end = jit_b();
1452                 jit_patch(to_not_ram);
1453         }
1454
1455         if (state->offset_ram || state->offset_scratch)
1456                 jit_movi(tmp, state->offset_scratch);
1457
1458         if (state->offset_ram != state->offset_scratch)
1459                 jit_patch(to_end);
1460
1461         if (state->offset_ram || state->offset_scratch)
1462                 jit_addr(tmp2, tmp2, tmp);
1463
1464         lightrec_free_reg(reg_cache, tmp);
1465         lightrec_free_reg(reg_cache, tmp3);
1466
1467         rt = lightrec_alloc_reg_in(reg_cache, _jit, in_reg, 0);
1468
1469         if (is_big_endian() && swap_code && in_reg) {
1470                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1471
1472                 jit_new_node_ww(swap_code, tmp, rt);
1473                 jit_new_node_www(code, 0, tmp2, tmp);
1474
1475                 lightrec_free_reg(reg_cache, tmp);
1476         } else {
1477                 jit_new_node_www(code, 0, tmp2, rt);
1478         }
1479
1480         lightrec_free_reg(reg_cache, rt);
1481         lightrec_free_reg(reg_cache, tmp2);
1482 }
1483
1484 static void rec_store(struct lightrec_cstate *state,
1485                       const struct block *block, u16 offset,
1486                       jit_code_t code, jit_code_t swap_code)
1487 {
1488         u32 flags = block->opcode_list[offset].flags;
1489         u32 mode = LIGHTREC_FLAGS_GET_IO_MODE(flags);
1490         bool no_invalidate = op_flag_no_invalidate(flags) ||
1491                 state->state->invalidate_from_dma_only;
1492         union code c = block->opcode_list[offset].c;
1493         bool is_swc2 = c.i.op == OP_SWC2;
1494
1495         if (is_swc2) {
1496                 switch (mode) {
1497                 case LIGHTREC_IO_RAM:
1498                 case LIGHTREC_IO_SCRATCH:
1499                 case LIGHTREC_IO_DIRECT:
1500                 case LIGHTREC_IO_DIRECT_HW:
1501                         rec_cp2_do_mfc2(state, block, offset, c.i.rt, REG_TEMP);
1502                         break;
1503                 default:
1504                         break;
1505                 }
1506         }
1507
1508         switch (mode) {
1509         case LIGHTREC_IO_RAM:
1510                 rec_store_ram(state, block, offset, code,
1511                               swap_code, !no_invalidate);
1512                 break;
1513         case LIGHTREC_IO_SCRATCH:
1514                 rec_store_scratch(state, block, offset, code, swap_code);
1515                 break;
1516         case LIGHTREC_IO_DIRECT:
1517                 if (no_invalidate) {
1518                         rec_store_direct_no_invalidate(state, block, offset,
1519                                                        code, swap_code);
1520                 } else {
1521                         rec_store_direct(state, block, offset, code, swap_code);
1522                 }
1523                 break;
1524         case LIGHTREC_IO_DIRECT_HW:
1525                 rec_store_io(state, block, offset, code, swap_code);
1526                 break;
1527         default:
1528                 rec_io(state, block, offset, true, false);
1529                 return;
1530         }
1531
1532         if (is_swc2)
1533                 lightrec_discard_reg_if_loaded(state->reg_cache, REG_TEMP);
1534 }
1535
1536 static void rec_SB(struct lightrec_cstate *state,
1537                    const struct block *block, u16 offset)
1538 {
1539         _jit_name(block->_jit, __func__);
1540         rec_store(state, block, offset, jit_code_stxi_c, 0);
1541 }
1542
1543 static void rec_SH(struct lightrec_cstate *state,
1544                    const struct block *block, u16 offset)
1545 {
1546         _jit_name(block->_jit, __func__);
1547         rec_store(state, block, offset,
1548                   jit_code_stxi_s, jit_code_bswapr_us);
1549 }
1550
1551 static void rec_SW(struct lightrec_cstate *state,
1552                    const struct block *block, u16 offset)
1553
1554 {
1555         union code c = block->opcode_list[offset].c;
1556
1557         _jit_name(block->_jit, c.i.op == OP_SWC2 ? "rec_SWC2" : "rec_SW");
1558         rec_store(state, block, offset,
1559                   jit_code_stxi_i, jit_code_bswapr_ui);
1560 }
1561
1562 static void rec_SWL(struct lightrec_cstate *state,
1563                     const struct block *block, u16 offset)
1564 {
1565         _jit_name(block->_jit, __func__);
1566         rec_io(state, block, offset, true, false);
1567 }
1568
1569 static void rec_SWR(struct lightrec_cstate *state,
1570                     const struct block *block, u16 offset)
1571 {
1572         _jit_name(block->_jit, __func__);
1573         rec_io(state, block, offset, true, false);
1574 }
1575
1576 static void rec_load_memory(struct lightrec_cstate *cstate,
1577                             const struct block *block, u16 offset,
1578                             jit_code_t code, jit_code_t swap_code, bool is_unsigned,
1579                             uintptr_t addr_offset, u32 addr_mask)
1580 {
1581         struct regcache *reg_cache = cstate->reg_cache;
1582         struct opcode *op = &block->opcode_list[offset];
1583         bool load_delay = op_flag_load_delay(op->flags) && !cstate->no_load_delay;
1584         jit_state_t *_jit = block->_jit;
1585         u8 rs, rt, out_reg, addr_reg, flags = REG_EXT;
1586         bool no_mask = op_flag_no_mask(op->flags);
1587         union code c = op->c;
1588         s16 imm;
1589
1590         if (load_delay || c.i.op == OP_LWC2)
1591                 out_reg = REG_TEMP;
1592         else if (c.i.rt)
1593                 out_reg = c.i.rt;
1594         else
1595                 return;
1596
1597         if (is_unsigned)
1598                 flags |= REG_ZEXT;
1599
1600         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
1601         rt = lightrec_alloc_reg_out(reg_cache, _jit, out_reg, flags);
1602
1603         if (!cstate->state->mirrors_mapped && c.i.imm && !no_mask) {
1604                 jit_addi(rt, rs, (s16)c.i.imm);
1605                 addr_reg = rt;
1606                 imm = 0;
1607         } else {
1608                 addr_reg = rs;
1609                 imm = (s16)c.i.imm;
1610         }
1611
1612         if (!no_mask) {
1613                 jit_andi(rt, addr_reg, addr_mask);
1614                 addr_reg = rt;
1615         }
1616
1617         if (addr_offset) {
1618                 jit_addi(rt, addr_reg, addr_offset);
1619                 addr_reg = rt;
1620         }
1621
1622         jit_new_node_www(code, rt, addr_reg, imm);
1623
1624         if (is_big_endian() && swap_code) {
1625                 jit_new_node_ww(swap_code, rt, rt);
1626
1627                 if (c.i.op == OP_LH)
1628                         jit_extr_s(rt, rt);
1629                 else if (c.i.op == OP_LW && __WORDSIZE == 64)
1630                         jit_extr_i(rt, rt);
1631         }
1632
1633         lightrec_free_reg(reg_cache, rs);
1634         lightrec_free_reg(reg_cache, rt);
1635 }
1636
1637 static void rec_load_ram(struct lightrec_cstate *cstate,
1638                          const struct block *block, u16 offset,
1639                          jit_code_t code, jit_code_t swap_code, bool is_unsigned)
1640 {
1641         _jit_note(block->_jit, __FILE__, __LINE__);
1642
1643         rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned,
1644                         cstate->state->offset_ram, rec_ram_mask(cstate->state));
1645 }
1646
1647 static void rec_load_bios(struct lightrec_cstate *cstate,
1648                           const struct block *block, u16 offset,
1649                           jit_code_t code, jit_code_t swap_code, bool is_unsigned)
1650 {
1651         _jit_note(block->_jit, __FILE__, __LINE__);
1652
1653         rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned,
1654                         cstate->state->offset_bios, 0x1fffffff);
1655 }
1656
1657 static void rec_load_scratch(struct lightrec_cstate *cstate,
1658                              const struct block *block, u16 offset,
1659                              jit_code_t code, jit_code_t swap_code, bool is_unsigned)
1660 {
1661         _jit_note(block->_jit, __FILE__, __LINE__);
1662
1663         rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned,
1664                         cstate->state->offset_scratch, 0x1fffffff);
1665 }
1666
1667 static void rec_load_io(struct lightrec_cstate *cstate,
1668                         const struct block *block, u16 offset,
1669                         jit_code_t code, jit_code_t swap_code, bool is_unsigned)
1670 {
1671         _jit_note(block->_jit, __FILE__, __LINE__);
1672
1673         rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned,
1674                         cstate->state->offset_io, rec_io_mask(cstate->state));
1675 }
1676
1677 static void rec_load_direct(struct lightrec_cstate *cstate,
1678                             const struct block *block, u16 offset,
1679                             jit_code_t code, jit_code_t swap_code,
1680                             bool is_unsigned)
1681 {
1682         struct lightrec_state *state = cstate->state;
1683         struct regcache *reg_cache = cstate->reg_cache;
1684         struct opcode *op = &block->opcode_list[offset];
1685         bool load_delay = op_flag_load_delay(op->flags) && !cstate->no_load_delay;
1686         jit_state_t *_jit = block->_jit;
1687         jit_node_t *to_not_ram, *to_not_bios, *to_end, *to_end2;
1688         u8 tmp, rs, rt, out_reg, addr_reg, flags = REG_EXT;
1689         union code c = op->c;
1690         s16 imm;
1691
1692         if (load_delay || c.i.op == OP_LWC2)
1693                 out_reg = REG_TEMP;
1694         else if (c.i.rt)
1695                 out_reg = c.i.rt;
1696         else
1697                 return;
1698
1699         if (is_unsigned)
1700                 flags |= REG_ZEXT;
1701
1702         jit_note(__FILE__, __LINE__);
1703         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
1704         rt = lightrec_alloc_reg_out(reg_cache, _jit, out_reg, flags);
1705
1706         if ((state->offset_ram == state->offset_bios &&
1707             state->offset_ram == state->offset_scratch &&
1708             state->mirrors_mapped) || !c.i.imm) {
1709                 addr_reg = rs;
1710                 imm = (s16)c.i.imm;
1711         } else {
1712                 jit_addi(rt, rs, (s16)c.i.imm);
1713                 addr_reg = rt;
1714                 imm = 0;
1715
1716                 if (c.i.rs != c.i.rt)
1717                         lightrec_free_reg(reg_cache, rs);
1718         }
1719
1720         tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1721
1722         if (state->offset_ram == state->offset_bios &&
1723             state->offset_ram == state->offset_scratch) {
1724                 if (!state->mirrors_mapped) {
1725                         jit_andi(tmp, addr_reg, BIT(28));
1726                         jit_rshi_u(tmp, tmp, 28 - 22);
1727                         jit_ori(tmp, tmp, 0x1f800000 | (RAM_SIZE - 1));
1728                         jit_andr(rt, addr_reg, tmp);
1729                 } else {
1730                         jit_andi(rt, addr_reg, 0x1fffffff);
1731                 }
1732
1733                 if (state->offset_ram)
1734                         jit_movi(tmp, state->offset_ram);
1735         } else {
1736                 to_not_ram = jit_bmsi(addr_reg, BIT(28));
1737
1738                 /* Convert to KUNSEG and avoid RAM mirrors */
1739                 jit_andi(rt, addr_reg, RAM_SIZE - 1);
1740
1741                 if (state->offset_ram)
1742                         jit_movi(tmp, state->offset_ram);
1743
1744                 to_end = jit_b();
1745
1746                 jit_patch(to_not_ram);
1747
1748                 if (state->offset_bios != state->offset_scratch)
1749                         to_not_bios = jit_bmci(addr_reg, BIT(22));
1750
1751                 /* Convert to KUNSEG */
1752                 jit_andi(rt, addr_reg, 0x1fc00000 | (BIOS_SIZE - 1));
1753
1754                 jit_movi(tmp, state->offset_bios);
1755
1756                 if (state->offset_bios != state->offset_scratch) {
1757                         to_end2 = jit_b();
1758
1759                         jit_patch(to_not_bios);
1760
1761                         /* Convert to KUNSEG */
1762                         jit_andi(rt, addr_reg, 0x1f800fff);
1763
1764                         if (state->offset_scratch)
1765                                 jit_movi(tmp, state->offset_scratch);
1766
1767                         jit_patch(to_end2);
1768                 }
1769
1770                 jit_patch(to_end);
1771         }
1772
1773         if (state->offset_ram || state->offset_bios || state->offset_scratch)
1774                 jit_addr(rt, rt, tmp);
1775
1776         jit_new_node_www(code, rt, rt, imm);
1777
1778         if (is_big_endian() && swap_code) {
1779                 jit_new_node_ww(swap_code, rt, rt);
1780
1781                 if (c.i.op == OP_LH)
1782                         jit_extr_s(rt, rt);
1783                 else if (c.i.op == OP_LW && __WORDSIZE == 64)
1784                         jit_extr_i(rt, rt);
1785         }
1786
1787         lightrec_free_reg(reg_cache, addr_reg);
1788         lightrec_free_reg(reg_cache, rt);
1789         lightrec_free_reg(reg_cache, tmp);
1790 }
1791
1792 static void rec_load(struct lightrec_cstate *state, const struct block *block,
1793                      u16 offset, jit_code_t code, jit_code_t swap_code,
1794                      bool is_unsigned)
1795 {
1796         const struct opcode *op = &block->opcode_list[offset];
1797         u32 flags = op->flags;
1798
1799         switch (LIGHTREC_FLAGS_GET_IO_MODE(flags)) {
1800         case LIGHTREC_IO_RAM:
1801                 rec_load_ram(state, block, offset, code, swap_code, is_unsigned);
1802                 break;
1803         case LIGHTREC_IO_BIOS:
1804                 rec_load_bios(state, block, offset, code, swap_code, is_unsigned);
1805                 break;
1806         case LIGHTREC_IO_SCRATCH:
1807                 rec_load_scratch(state, block, offset, code, swap_code, is_unsigned);
1808                 break;
1809         case LIGHTREC_IO_DIRECT_HW:
1810                 rec_load_io(state, block, offset, code, swap_code, is_unsigned);
1811                 break;
1812         case LIGHTREC_IO_DIRECT:
1813                 rec_load_direct(state, block, offset, code, swap_code, is_unsigned);
1814                 break;
1815         default:
1816                 rec_io(state, block, offset, false, true);
1817                 return;
1818         }
1819
1820         if (op->i.op == OP_LWC2) {
1821                 rec_cp2_do_mtc2(state, block, offset, op->i.rt, REG_TEMP);
1822                 lightrec_discard_reg_if_loaded(state->reg_cache, REG_TEMP);
1823         }
1824 }
1825
1826 static void rec_LB(struct lightrec_cstate *state, const struct block *block, u16 offset)
1827 {
1828         _jit_name(block->_jit, __func__);
1829         rec_load(state, block, offset, jit_code_ldxi_c, 0, false);
1830 }
1831
1832 static void rec_LBU(struct lightrec_cstate *state, const struct block *block, u16 offset)
1833 {
1834         _jit_name(block->_jit, __func__);
1835         rec_load(state, block, offset, jit_code_ldxi_uc, 0, true);
1836 }
1837
1838 static void rec_LH(struct lightrec_cstate *state, const struct block *block, u16 offset)
1839 {
1840         jit_code_t code = is_big_endian() ? jit_code_ldxi_us : jit_code_ldxi_s;
1841
1842         _jit_name(block->_jit, __func__);
1843         rec_load(state, block, offset, code, jit_code_bswapr_us, false);
1844 }
1845
1846 static void rec_LHU(struct lightrec_cstate *state, const struct block *block, u16 offset)
1847 {
1848         _jit_name(block->_jit, __func__);
1849         rec_load(state, block, offset, jit_code_ldxi_us, jit_code_bswapr_us, true);
1850 }
1851
1852 static void rec_LWL(struct lightrec_cstate *state, const struct block *block, u16 offset)
1853 {
1854         _jit_name(block->_jit, __func__);
1855         rec_io(state, block, offset, true, true);
1856 }
1857
1858 static void rec_LWR(struct lightrec_cstate *state, const struct block *block, u16 offset)
1859 {
1860         _jit_name(block->_jit, __func__);
1861         rec_io(state, block, offset, true, true);
1862 }
1863
1864 static void rec_LW(struct lightrec_cstate *state, const struct block *block, u16 offset)
1865 {
1866         union code c = block->opcode_list[offset].c;
1867         jit_code_t code;
1868
1869         if (is_big_endian() && __WORDSIZE == 64)
1870                 code = jit_code_ldxi_ui;
1871         else
1872                 code = jit_code_ldxi_i;
1873
1874         _jit_name(block->_jit, c.i.op == OP_LWC2 ? "rec_LWC2" : "rec_LW");
1875         rec_load(state, block, offset, code, jit_code_bswapr_ui, false);
1876 }
1877
1878 static void rec_break_syscall(struct lightrec_cstate *state,
1879                               const struct block *block, u16 offset,
1880                               u32 exit_code)
1881 {
1882         struct regcache *reg_cache = state->reg_cache;
1883         jit_state_t *_jit = block->_jit;
1884         u8 tmp;
1885
1886         _jit_note(block->_jit, __FILE__, __LINE__);
1887
1888         tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1889
1890         jit_movi(tmp, exit_code);
1891         jit_stxi_i(offsetof(struct lightrec_state, exit_flags),
1892                    LIGHTREC_REG_STATE, tmp);
1893
1894         jit_ldxi_i(tmp, LIGHTREC_REG_STATE,
1895                    offsetof(struct lightrec_state, target_cycle));
1896         jit_subr(tmp, tmp, LIGHTREC_REG_CYCLE);
1897         jit_movi(LIGHTREC_REG_CYCLE, 0);
1898         jit_stxi_i(offsetof(struct lightrec_state, target_cycle),
1899                    LIGHTREC_REG_STATE, tmp);
1900         jit_stxi_i(offsetof(struct lightrec_state, current_cycle),
1901                    LIGHTREC_REG_STATE, tmp);
1902
1903         lightrec_free_reg(reg_cache, tmp);
1904
1905         /* TODO: the return address should be "pc - 4" if we're a delay slot */
1906         lightrec_emit_end_of_block(state, block, offset, -1,
1907                                    get_ds_pc(block, offset, 0),
1908                                    31, 0, true);
1909 }
1910
1911 static void rec_special_SYSCALL(struct lightrec_cstate *state,
1912                                 const struct block *block, u16 offset)
1913 {
1914         _jit_name(block->_jit, __func__);
1915         rec_break_syscall(state, block, offset, LIGHTREC_EXIT_SYSCALL);
1916 }
1917
1918 static void rec_special_BREAK(struct lightrec_cstate *state,
1919                               const struct block *block, u16 offset)
1920 {
1921         _jit_name(block->_jit, __func__);
1922         rec_break_syscall(state, block, offset, LIGHTREC_EXIT_BREAK);
1923 }
1924
1925 static void rec_mfc(struct lightrec_cstate *state, const struct block *block, u16 offset)
1926 {
1927         struct regcache *reg_cache = state->reg_cache;
1928         union code c = block->opcode_list[offset].c;
1929         jit_state_t *_jit = block->_jit;
1930
1931         jit_note(__FILE__, __LINE__);
1932
1933         if (c.i.op != OP_SWC2)
1934                 lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, true);
1935
1936         call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_MFC);
1937 }
1938
1939 static void rec_mtc(struct lightrec_cstate *state, const struct block *block, u16 offset)
1940 {
1941         struct regcache *reg_cache = state->reg_cache;
1942         union code c = block->opcode_list[offset].c;
1943         jit_state_t *_jit = block->_jit;
1944
1945         jit_note(__FILE__, __LINE__);
1946         lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rs, false);
1947         lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, false);
1948         lightrec_clean_reg_if_loaded(reg_cache, _jit, REG_TEMP, false);
1949
1950         call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_MTC);
1951
1952         if (c.i.op == OP_CP0 &&
1953             !op_flag_no_ds(block->opcode_list[offset].flags) &&
1954             (c.r.rd == 12 || c.r.rd == 13))
1955                 lightrec_emit_end_of_block(state, block, offset, -1,
1956                                            get_ds_pc(block, offset, 1),
1957                                            0, 0, true);
1958 }
1959
1960 static void
1961 rec_mfc0(struct lightrec_cstate *state, const struct block *block, u16 offset)
1962 {
1963         struct regcache *reg_cache = state->reg_cache;
1964         union code c = block->opcode_list[offset].c;
1965         jit_state_t *_jit = block->_jit;
1966         u8 rt;
1967
1968         jit_note(__FILE__, __LINE__);
1969
1970         rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, REG_EXT);
1971
1972         jit_ldxi_i(rt, LIGHTREC_REG_STATE,
1973                    offsetof(struct lightrec_state, regs.cp0[c.r.rd]));
1974
1975         lightrec_free_reg(reg_cache, rt);
1976 }
1977
1978 static bool block_uses_icache(const struct lightrec_cstate *state,
1979                               const struct block *block)
1980 {
1981         const struct lightrec_mem_map *map = &state->state->maps[PSX_MAP_KERNEL_USER_RAM];
1982         u32 pc = kunseg(block->pc);
1983
1984         if (pc < map->pc || pc >= map->pc + map->length)
1985                 return false;
1986
1987         return (block->pc >> 28) < 0xa;
1988 }
1989
1990 static void
1991 rec_mtc0(struct lightrec_cstate *state, const struct block *block, u16 offset)
1992 {
1993         struct regcache *reg_cache = state->reg_cache;
1994         const union code c = block->opcode_list[offset].c;
1995         jit_state_t *_jit = block->_jit;
1996         u8 rt, tmp = 0, tmp2, status;
1997         jit_node_t *to_end;
1998
1999         jit_note(__FILE__, __LINE__);
2000
2001         switch(c.r.rd) {
2002         case 1:
2003         case 4:
2004         case 8:
2005         case 14:
2006         case 15:
2007                 /* Those registers are read-only */
2008                 return;
2009         default:
2010                 break;
2011         }
2012
2013         if (!block_uses_icache(state, block) && c.r.rd == 12) {
2014                 /* If we are not running code from the RAM through kuseg or
2015                  * kseg0, handle writes to the Status register in C; as the
2016                  * code may toggle bit 16 which isolates the cache. Code
2017                  * running from kuseg or kseg0 in RAM cannot do that. */
2018                 rec_mtc(state, block, offset);
2019                 return;
2020         }
2021
2022         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
2023
2024         if (c.r.rd != 13) {
2025                 jit_stxi_i(offsetof(struct lightrec_state, regs.cp0[c.r.rd]),
2026                            LIGHTREC_REG_STATE, rt);
2027         }
2028
2029         if (c.r.rd == 12 || c.r.rd == 13) {
2030                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
2031                 jit_ldxi_i(tmp, LIGHTREC_REG_STATE,
2032                            offsetof(struct lightrec_state, regs.cp0[13]));
2033
2034                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
2035         }
2036
2037         if (c.r.rd == 12) {
2038                 status = rt;
2039         } else if (c.r.rd == 13) {
2040                 /* Cause = (Cause & ~0x0300) | (value & 0x0300) */
2041                 jit_andi(tmp2, rt, 0x0300);
2042                 jit_ori(tmp, tmp, 0x0300);
2043                 jit_xori(tmp, tmp, 0x0300);
2044                 jit_orr(tmp, tmp, tmp2);
2045                 jit_ldxi_i(tmp2, LIGHTREC_REG_STATE,
2046                            offsetof(struct lightrec_state, regs.cp0[12]));
2047                 jit_stxi_i(offsetof(struct lightrec_state, regs.cp0[13]),
2048                            LIGHTREC_REG_STATE, tmp);
2049                 status = tmp2;
2050         }
2051
2052         if (c.r.rd == 12 || c.r.rd == 13) {
2053                 /* Exit dynarec in case there's a software interrupt.
2054                  * exit_flags = !!(status & tmp & 0x0300) & status; */
2055                 jit_andr(tmp, tmp, status);
2056                 jit_andi(tmp, tmp, 0x0300);
2057                 jit_nei(tmp, tmp, 0);
2058                 jit_andr(tmp, tmp, status);
2059         }
2060
2061         if (c.r.rd == 12) {
2062                 /* Exit dynarec in case we unmask a hardware interrupt.
2063                  * exit_flags = !(~status & 0x401) */
2064
2065                 jit_comr(tmp2, status);
2066                 jit_andi(tmp2, tmp2, 0x401);
2067                 jit_eqi(tmp2, tmp2, 0);
2068                 jit_orr(tmp, tmp, tmp2);
2069         }
2070
2071         lightrec_free_reg(reg_cache, rt);
2072
2073         if (c.r.rd == 12 || c.r.rd == 13) {
2074                 to_end = jit_beqi(tmp, 0);
2075
2076                 jit_ldxi_i(tmp2, LIGHTREC_REG_STATE,
2077                            offsetof(struct lightrec_state, target_cycle));
2078                 jit_subr(tmp2, tmp2, LIGHTREC_REG_CYCLE);
2079                 jit_movi(LIGHTREC_REG_CYCLE, 0);
2080                 jit_stxi_i(offsetof(struct lightrec_state, target_cycle),
2081                            LIGHTREC_REG_STATE, tmp2);
2082                 jit_stxi_i(offsetof(struct lightrec_state, current_cycle),
2083                            LIGHTREC_REG_STATE, tmp2);
2084
2085
2086                 jit_patch(to_end);
2087         }
2088
2089         if (!op_flag_no_ds(block->opcode_list[offset].flags) &&
2090             (c.r.rd == 12 || c.r.rd == 13)) {
2091                 state->cycles += lightrec_cycles_of_opcode(c);
2092                 lightrec_emit_eob(state, block, offset + 1);
2093         }
2094 }
2095
2096 static void rec_cp0_MFC0(struct lightrec_cstate *state,
2097                          const struct block *block, u16 offset)
2098 {
2099         _jit_name(block->_jit, __func__);
2100         rec_mfc0(state, block, offset);
2101 }
2102
2103 static void rec_cp0_CFC0(struct lightrec_cstate *state,
2104                          const struct block *block, u16 offset)
2105 {
2106         _jit_name(block->_jit, __func__);
2107         rec_mfc0(state, block, offset);
2108 }
2109
2110 static void rec_cp0_MTC0(struct lightrec_cstate *state,
2111                          const struct block *block, u16 offset)
2112 {
2113         _jit_name(block->_jit, __func__);
2114         rec_mtc0(state, block, offset);
2115 }
2116
2117 static void rec_cp0_CTC0(struct lightrec_cstate *state,
2118                          const struct block *block, u16 offset)
2119 {
2120         _jit_name(block->_jit, __func__);
2121         rec_mtc0(state, block, offset);
2122 }
2123
2124 static unsigned int cp2d_i_offset(u8 reg)
2125 {
2126         return offsetof(struct lightrec_state, regs.cp2d[reg]);
2127 }
2128
2129 static unsigned int cp2d_s_offset(u8 reg)
2130 {
2131         return cp2d_i_offset(reg) + is_big_endian() * 2;
2132 }
2133
2134 static unsigned int cp2c_i_offset(u8 reg)
2135 {
2136         return offsetof(struct lightrec_state, regs.cp2c[reg]);
2137 }
2138
2139 static unsigned int cp2c_s_offset(u8 reg)
2140 {
2141         return cp2c_i_offset(reg) + is_big_endian() * 2;
2142 }
2143
2144 static void rec_cp2_do_mfc2(struct lightrec_cstate *state,
2145                             const struct block *block, u16 offset,
2146                             u8 reg, u8 out_reg)
2147 {
2148         struct regcache *reg_cache = state->reg_cache;
2149         jit_state_t *_jit = block->_jit;
2150         const u32 zext_regs = 0x300f0080;
2151         u8 rt, tmp, tmp2, tmp3, out, flags;
2152         unsigned int i;
2153
2154         _jit_name(block->_jit, __func__);
2155
2156         if (state->state->ops.cop2_notify) {
2157                 /* We must call cop2_notify, handle that in C. */
2158                 rec_mfc(state, block, offset);
2159                 return;
2160         }
2161
2162         flags = (zext_regs & BIT(reg)) ? REG_ZEXT : REG_EXT;
2163         rt = lightrec_alloc_reg_out(reg_cache, _jit, out_reg, flags);
2164
2165         if (reg == 15)
2166                 reg = 14;
2167
2168         switch (reg) {
2169         case 1:
2170         case 3:
2171         case 5:
2172         case 8:
2173         case 9:
2174         case 10:
2175         case 11:
2176                 jit_ldxi_s(rt, LIGHTREC_REG_STATE, cp2d_s_offset(reg));
2177                 break;
2178         case 7:
2179         case 16:
2180         case 17:
2181         case 18:
2182         case 19:
2183                 jit_ldxi_us(rt, LIGHTREC_REG_STATE, cp2d_s_offset(reg));
2184                 break;
2185         case 28:
2186         case 29:
2187                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
2188                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
2189                 tmp3 = lightrec_alloc_reg_temp(reg_cache, _jit);
2190
2191                 for (i = 0; i < 3; i++) {
2192                         out = i == 0 ? rt : tmp;
2193
2194                         jit_ldxi_s(tmp, LIGHTREC_REG_STATE, cp2d_s_offset(9 + i));
2195                         jit_movi(tmp2, 0x1f);
2196                         jit_rshi(out, tmp, 7);
2197
2198                         jit_ltr(tmp3, tmp2, out);
2199                         jit_movnr(out, tmp2, tmp3);
2200
2201                         jit_gei(tmp2, out, 0);
2202                         jit_movzr(out, tmp2, tmp2);
2203
2204                         if (i > 0) {
2205                                 jit_lshi(tmp, tmp, 5 * i);
2206                                 jit_orr(rt, rt, tmp);
2207                         }
2208                 }
2209
2210
2211                 lightrec_free_reg(reg_cache, tmp);
2212                 lightrec_free_reg(reg_cache, tmp2);
2213                 lightrec_free_reg(reg_cache, tmp3);
2214                 break;
2215         default:
2216                 jit_ldxi_i(rt, LIGHTREC_REG_STATE, cp2d_i_offset(reg));
2217                 break;
2218         }
2219
2220         lightrec_free_reg(reg_cache, rt);
2221 }
2222
2223 static void rec_cp2_basic_MFC2(struct lightrec_cstate *state,
2224                                const struct block *block, u16 offset)
2225 {
2226         const union code c = block->opcode_list[offset].c;
2227
2228         rec_cp2_do_mfc2(state, block, offset, c.r.rd, c.r.rt);
2229 }
2230
2231 static void rec_cp2_basic_CFC2(struct lightrec_cstate *state,
2232                                const struct block *block, u16 offset)
2233 {
2234         struct regcache *reg_cache = state->reg_cache;
2235         const union code c = block->opcode_list[offset].c;
2236         jit_state_t *_jit = block->_jit;
2237         u8 rt;
2238
2239         _jit_name(block->_jit, __func__);
2240
2241         if (state->state->ops.cop2_notify) {
2242                 /* We must call cop2_notify, handle that in C. */
2243                 rec_mfc(state, block, offset);
2244                 return;
2245         }
2246
2247         switch (c.r.rd) {
2248         case 4:
2249         case 12:
2250         case 20:
2251         case 26:
2252         case 27:
2253         case 29:
2254         case 30:
2255                 rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_EXT);
2256                 jit_ldxi_s(rt, LIGHTREC_REG_STATE, cp2c_s_offset(c.r.rd));
2257                 break;
2258         default:
2259                 rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_ZEXT);
2260                 jit_ldxi_ui(rt, LIGHTREC_REG_STATE, cp2c_i_offset(c.r.rd));
2261                 break;
2262         }
2263
2264         lightrec_free_reg(reg_cache, rt);
2265 }
2266
2267 static void rec_cp2_do_mtc2(struct lightrec_cstate *state,
2268                             const struct block *block, u16 offset,
2269                             u8 reg, u8 in_reg)
2270 {
2271         struct regcache *reg_cache = state->reg_cache;
2272         jit_state_t *_jit = block->_jit;
2273         u8 rt, tmp, tmp2, flags = 0;
2274
2275         _jit_name(block->_jit, __func__);
2276
2277         if (state->state->ops.cop2_notify) {
2278                 /* We must call cop2_notify, handle that in C. */
2279                 rec_mtc(state, block, offset);
2280                 return;
2281         }
2282
2283         if (reg == 31)
2284                 return;
2285
2286         if (reg == 30)
2287                 flags |= REG_EXT;
2288
2289         rt = lightrec_alloc_reg_in(reg_cache, _jit, in_reg, flags);
2290
2291         switch (reg) {
2292         case 15:
2293                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
2294                 jit_ldxi_i(tmp, LIGHTREC_REG_STATE, cp2d_i_offset(13));
2295
2296                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
2297                 jit_ldxi_i(tmp2, LIGHTREC_REG_STATE, cp2d_i_offset(14));
2298
2299                 jit_stxi_i(cp2d_i_offset(12), LIGHTREC_REG_STATE, tmp);
2300                 jit_stxi_i(cp2d_i_offset(13), LIGHTREC_REG_STATE, tmp2);
2301                 jit_stxi_i(cp2d_i_offset(14), LIGHTREC_REG_STATE, rt);
2302
2303                 lightrec_free_reg(reg_cache, tmp);
2304                 lightrec_free_reg(reg_cache, tmp2);
2305                 break;
2306         case 28:
2307                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
2308
2309                 jit_lshi(tmp, rt, 7);
2310                 jit_andi(tmp, tmp, 0xf80);
2311                 jit_stxi_s(cp2d_s_offset(9), LIGHTREC_REG_STATE, tmp);
2312
2313                 jit_lshi(tmp, rt, 2);
2314                 jit_andi(tmp, tmp, 0xf80);
2315                 jit_stxi_s(cp2d_s_offset(10), LIGHTREC_REG_STATE, tmp);
2316
2317                 jit_rshi(tmp, rt, 3);
2318                 jit_andi(tmp, tmp, 0xf80);
2319                 jit_stxi_s(cp2d_s_offset(11), LIGHTREC_REG_STATE, tmp);
2320
2321                 lightrec_free_reg(reg_cache, tmp);
2322                 break;
2323         case 30:
2324                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
2325
2326                 /* if (rt < 0) rt = ~rt; */
2327                 jit_rshi(tmp, rt, 31);
2328                 jit_xorr(tmp, rt, tmp);
2329
2330                 /* Count leading zeros */
2331                 jit_clzr(tmp, tmp);
2332                 if (__WORDSIZE != 32)
2333                         jit_subi(tmp, tmp, __WORDSIZE - 32);
2334
2335                 jit_stxi_i(cp2d_i_offset(31), LIGHTREC_REG_STATE, tmp);
2336
2337                 lightrec_free_reg(reg_cache, tmp);
2338                 fallthrough;
2339         default:
2340                 jit_stxi_i(cp2d_i_offset(reg), LIGHTREC_REG_STATE, rt);
2341                 break;
2342         }
2343
2344         lightrec_free_reg(reg_cache, rt);
2345 }
2346
2347 static void rec_cp2_basic_MTC2(struct lightrec_cstate *state,
2348                                const struct block *block, u16 offset)
2349 {
2350         const union code c = block->opcode_list[offset].c;
2351
2352         rec_cp2_do_mtc2(state, block, offset, c.r.rd, c.r.rt);
2353 }
2354
2355 static void rec_cp2_basic_CTC2(struct lightrec_cstate *state,
2356                                const struct block *block, u16 offset)
2357 {
2358         struct regcache *reg_cache = state->reg_cache;
2359         const union code c = block->opcode_list[offset].c;
2360         jit_state_t *_jit = block->_jit;
2361         u8 rt, tmp, tmp2;
2362
2363         _jit_name(block->_jit, __func__);
2364
2365         if (state->state->ops.cop2_notify) {
2366                 /* We must call cop2_notify, handle that in C. */
2367                 rec_mtc(state, block, offset);
2368                 return;
2369         }
2370
2371         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0);
2372
2373         switch (c.r.rd) {
2374         case 4:
2375         case 12:
2376         case 20:
2377         case 26:
2378         case 27:
2379         case 29:
2380         case 30:
2381                 jit_stxi_s(cp2c_s_offset(c.r.rd), LIGHTREC_REG_STATE, rt);
2382                 break;
2383         case 31:
2384                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
2385                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
2386
2387                 jit_andi(tmp, rt, 0x7f87e000);
2388                 jit_nei(tmp, tmp, 0);
2389                 jit_lshi(tmp, tmp, 31);
2390
2391                 jit_andi(tmp2, rt, 0x7ffff000);
2392                 jit_orr(tmp, tmp2, tmp);
2393
2394                 jit_stxi_i(cp2c_i_offset(31), LIGHTREC_REG_STATE, tmp);
2395
2396                 lightrec_free_reg(reg_cache, tmp);
2397                 lightrec_free_reg(reg_cache, tmp2);
2398                 break;
2399
2400         default:
2401                 jit_stxi_i(cp2c_i_offset(c.r.rd), LIGHTREC_REG_STATE, rt);
2402         }
2403
2404         lightrec_free_reg(reg_cache, rt);
2405 }
2406
2407 static void rec_cp0_RFE(struct lightrec_cstate *state,
2408                         const struct block *block, u16 offset)
2409 {
2410         struct regcache *reg_cache = state->reg_cache;
2411         jit_state_t *_jit = block->_jit;
2412         u8 status, tmp;
2413
2414         jit_name(__func__);
2415         jit_note(__FILE__, __LINE__);
2416
2417         status = lightrec_alloc_reg_temp(reg_cache, _jit);
2418         jit_ldxi_i(status, LIGHTREC_REG_STATE,
2419                    offsetof(struct lightrec_state, regs.cp0[12]));
2420
2421         tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
2422
2423         /* status = ((status >> 2) & 0xf) | status & ~0xf; */
2424         jit_rshi(tmp, status, 2);
2425         jit_andi(tmp, tmp, 0xf);
2426         jit_andi(status, status, ~0xful);
2427         jit_orr(status, status, tmp);
2428
2429         jit_ldxi_i(tmp, LIGHTREC_REG_STATE,
2430                    offsetof(struct lightrec_state, regs.cp0[13]));
2431         jit_stxi_i(offsetof(struct lightrec_state, regs.cp0[12]),
2432                    LIGHTREC_REG_STATE, status);
2433
2434         /* Exit dynarec in case there's a software interrupt.
2435          * exit_flags = !!(status & cause & 0x0300) & status; */
2436         jit_andr(tmp, tmp, status);
2437         jit_andi(tmp, tmp, 0x0300);
2438         jit_nei(tmp, tmp, 0);
2439         jit_andr(tmp, tmp, status);
2440         jit_stxi_i(offsetof(struct lightrec_state, exit_flags),
2441                    LIGHTREC_REG_STATE, tmp);
2442
2443         lightrec_free_reg(reg_cache, status);
2444         lightrec_free_reg(reg_cache, tmp);
2445 }
2446
2447 static void rec_CP(struct lightrec_cstate *state,
2448                    const struct block *block, u16 offset)
2449 {
2450         union code c = block->opcode_list[offset].c;
2451         jit_state_t *_jit = block->_jit;
2452
2453         jit_name(__func__);
2454         jit_note(__FILE__, __LINE__);
2455
2456         call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_CP);
2457 }
2458
2459 static void rec_meta_MOV(struct lightrec_cstate *state,
2460                          const struct block *block, u16 offset)
2461 {
2462         struct regcache *reg_cache = state->reg_cache;
2463         const struct opcode *op = &block->opcode_list[offset];
2464         union code c = op->c;
2465         jit_state_t *_jit = block->_jit;
2466         bool unload_rd;
2467         u8 rs, rd;
2468
2469         _jit_name(block->_jit, __func__);
2470         jit_note(__FILE__, __LINE__);
2471
2472         unload_rd = OPT_EARLY_UNLOAD
2473                 && LIGHTREC_FLAGS_GET_RD(op->flags) == LIGHTREC_REG_UNLOAD;
2474
2475         if (c.m.rs && !lightrec_reg_is_loaded(reg_cache, c.m.rs)) {
2476                 /* The source register is not yet loaded - we can load its value
2477                  * from the register cache directly into the target register. */
2478                 rd = lightrec_alloc_reg_out(reg_cache, _jit, c.m.rd, REG_EXT);
2479
2480                 jit_ldxi_i(rd, LIGHTREC_REG_STATE,
2481                            offsetof(struct lightrec_state, regs.gpr) + (c.m.rs << 2));
2482
2483                 lightrec_free_reg(reg_cache, rd);
2484         } else if (unload_rd) {
2485                 /* If the destination register will be unloaded right after the
2486                  * MOV meta-opcode, we don't actually need to write any host
2487                  * register - we can just store the source register directly to
2488                  * the register cache, at the offset corresponding to the
2489                  * destination register. */
2490                 lightrec_discard_reg_if_loaded(reg_cache, c.m.rd);
2491
2492                 rs = lightrec_alloc_reg_in(reg_cache, _jit, c.m.rs, 0);
2493
2494                 jit_stxi_i(offsetof(struct lightrec_state, regs.gpr)
2495                            + (c.m.rd << 2), LIGHTREC_REG_STATE, rs);
2496
2497                 lightrec_free_reg(reg_cache, rs);
2498         } else {
2499                 if (c.m.rs)
2500                         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.m.rs, 0);
2501
2502                 rd = lightrec_alloc_reg_out(reg_cache, _jit, c.m.rd, REG_EXT);
2503
2504                 if (c.m.rs == 0) {
2505                         jit_movi(rd, 0);
2506                 } else {
2507                         jit_extr_i(rd, rs);
2508                         lightrec_free_reg(reg_cache, rs);
2509                 }
2510
2511                 lightrec_free_reg(reg_cache, rd);
2512         }
2513 }
2514
2515 static void rec_meta_EXTC_EXTS(struct lightrec_cstate *state,
2516                                const struct block *block,
2517                                u16 offset)
2518 {
2519         struct regcache *reg_cache = state->reg_cache;
2520         union code c = block->opcode_list[offset].c;
2521         jit_state_t *_jit = block->_jit;
2522         u8 rs, rd;
2523
2524         _jit_name(block->_jit, __func__);
2525         jit_note(__FILE__, __LINE__);
2526
2527         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.m.rs, 0);
2528         rd = lightrec_alloc_reg_out(reg_cache, _jit, c.m.rd, REG_EXT);
2529
2530         if (c.m.op == OP_META_EXTC)
2531                 jit_extr_c(rd, rs);
2532         else
2533                 jit_extr_s(rd, rs);
2534
2535         lightrec_free_reg(reg_cache, rs);
2536         lightrec_free_reg(reg_cache, rd);
2537 }
2538
2539 static void rec_meta_MULT2(struct lightrec_cstate *state,
2540                            const struct block *block,
2541                            u16 offset)
2542 {
2543         struct regcache *reg_cache = state->reg_cache;
2544         union code c = block->opcode_list[offset].c;
2545         jit_state_t *_jit = block->_jit;
2546         u8 reg_lo = get_mult_div_lo(c);
2547         u8 reg_hi = get_mult_div_hi(c);
2548         u32 flags = block->opcode_list[offset].flags;
2549         bool is_signed = c.i.op == OP_META_MULT2;
2550         u8 rs, lo, hi, rflags = 0, hiflags = 0;
2551         unsigned int i;
2552
2553         if (!op_flag_no_hi(flags) && c.r.op < 32) {
2554                 rflags = is_signed ? REG_EXT : REG_ZEXT;
2555                 hiflags = is_signed ? REG_EXT : (REG_EXT | REG_ZEXT);
2556         }
2557
2558         _jit_name(block->_jit, __func__);
2559         jit_note(__FILE__, __LINE__);
2560
2561         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, rflags);
2562
2563         /*
2564          * We must handle the case where one of the output registers is our rs
2565          * input register. Thanksfully, computing LO/HI can be done in any
2566          * order. Here, we make sure that the computation that overwrites the
2567          * input register is always performed last.
2568          */
2569         for (i = 0; i < 2; i++) {
2570                 if ((!i ^ (reg_lo == c.i.rs)) && !op_flag_no_lo(flags)) {
2571                         lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0);
2572
2573                         if (c.r.op < 32)
2574                                 jit_lshi(lo, rs, c.r.op);
2575                         else
2576                                 jit_movi(lo, 0);
2577
2578                         lightrec_free_reg(reg_cache, lo);
2579                         continue;
2580                 }
2581
2582                 if ((!!i ^ (reg_lo == c.i.rs)) && !op_flag_no_hi(flags)) {
2583                         hi = lightrec_alloc_reg_out(reg_cache, _jit,
2584                                                     reg_hi, hiflags);
2585
2586                         if (c.r.op >= 32)
2587                                 jit_lshi(hi, rs, c.r.op - 32);
2588                         else if (is_signed)
2589                                 jit_rshi(hi, rs, 32 - c.r.op);
2590                         else
2591                                 jit_rshi_u(hi, rs, 32 - c.r.op);
2592
2593                         lightrec_free_reg(reg_cache, hi);
2594                 }
2595         }
2596
2597         lightrec_free_reg(reg_cache, rs);
2598
2599         _jit_name(block->_jit, __func__);
2600         jit_note(__FILE__, __LINE__);
2601 }
2602
2603 static void rec_meta_COM(struct lightrec_cstate *state,
2604                          const struct block *block, u16 offset)
2605 {
2606         struct regcache *reg_cache = state->reg_cache;
2607         union code c = block->opcode_list[offset].c;
2608         jit_state_t *_jit = block->_jit;
2609         u8 rd, rs, flags;
2610
2611         jit_note(__FILE__, __LINE__);
2612         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.m.rs, 0);
2613         rd = lightrec_alloc_reg_out(reg_cache, _jit, c.m.rd, 0);
2614
2615         flags = lightrec_get_reg_in_flags(reg_cache, rs);
2616
2617         lightrec_set_reg_out_flags(reg_cache, rd,
2618                                    flags & REG_EXT);
2619
2620         jit_comr(rd, rs);
2621
2622         lightrec_free_reg(reg_cache, rs);
2623         lightrec_free_reg(reg_cache, rd);
2624 }
2625
2626 static const lightrec_rec_func_t rec_standard[64] = {
2627         SET_DEFAULT_ELM(rec_standard, unknown_opcode),
2628         [OP_SPECIAL]            = rec_SPECIAL,
2629         [OP_REGIMM]             = rec_REGIMM,
2630         [OP_J]                  = rec_J,
2631         [OP_JAL]                = rec_JAL,
2632         [OP_BEQ]                = rec_BEQ,
2633         [OP_BNE]                = rec_BNE,
2634         [OP_BLEZ]               = rec_BLEZ,
2635         [OP_BGTZ]               = rec_BGTZ,
2636         [OP_ADDI]               = rec_ADDI,
2637         [OP_ADDIU]              = rec_ADDIU,
2638         [OP_SLTI]               = rec_SLTI,
2639         [OP_SLTIU]              = rec_SLTIU,
2640         [OP_ANDI]               = rec_ANDI,
2641         [OP_ORI]                = rec_ORI,
2642         [OP_XORI]               = rec_XORI,
2643         [OP_LUI]                = rec_LUI,
2644         [OP_CP0]                = rec_CP0,
2645         [OP_CP2]                = rec_CP2,
2646         [OP_LB]                 = rec_LB,
2647         [OP_LH]                 = rec_LH,
2648         [OP_LWL]                = rec_LWL,
2649         [OP_LW]                 = rec_LW,
2650         [OP_LBU]                = rec_LBU,
2651         [OP_LHU]                = rec_LHU,
2652         [OP_LWR]                = rec_LWR,
2653         [OP_SB]                 = rec_SB,
2654         [OP_SH]                 = rec_SH,
2655         [OP_SWL]                = rec_SWL,
2656         [OP_SW]                 = rec_SW,
2657         [OP_SWR]                = rec_SWR,
2658         [OP_LWC2]               = rec_LW,
2659         [OP_SWC2]               = rec_SW,
2660
2661         [OP_META]               = rec_META,
2662         [OP_META_MULT2]         = rec_meta_MULT2,
2663         [OP_META_MULTU2]        = rec_meta_MULT2,
2664 };
2665
2666 static const lightrec_rec_func_t rec_special[64] = {
2667         SET_DEFAULT_ELM(rec_special, unknown_opcode),
2668         [OP_SPECIAL_SLL]        = rec_special_SLL,
2669         [OP_SPECIAL_SRL]        = rec_special_SRL,
2670         [OP_SPECIAL_SRA]        = rec_special_SRA,
2671         [OP_SPECIAL_SLLV]       = rec_special_SLLV,
2672         [OP_SPECIAL_SRLV]       = rec_special_SRLV,
2673         [OP_SPECIAL_SRAV]       = rec_special_SRAV,
2674         [OP_SPECIAL_JR]         = rec_special_JR,
2675         [OP_SPECIAL_JALR]       = rec_special_JALR,
2676         [OP_SPECIAL_SYSCALL]    = rec_special_SYSCALL,
2677         [OP_SPECIAL_BREAK]      = rec_special_BREAK,
2678         [OP_SPECIAL_MFHI]       = rec_special_MFHI,
2679         [OP_SPECIAL_MTHI]       = rec_special_MTHI,
2680         [OP_SPECIAL_MFLO]       = rec_special_MFLO,
2681         [OP_SPECIAL_MTLO]       = rec_special_MTLO,
2682         [OP_SPECIAL_MULT]       = rec_special_MULT,
2683         [OP_SPECIAL_MULTU]      = rec_special_MULTU,
2684         [OP_SPECIAL_DIV]        = rec_special_DIV,
2685         [OP_SPECIAL_DIVU]       = rec_special_DIVU,
2686         [OP_SPECIAL_ADD]        = rec_special_ADD,
2687         [OP_SPECIAL_ADDU]       = rec_special_ADDU,
2688         [OP_SPECIAL_SUB]        = rec_special_SUB,
2689         [OP_SPECIAL_SUBU]       = rec_special_SUBU,
2690         [OP_SPECIAL_AND]        = rec_special_AND,
2691         [OP_SPECIAL_OR]         = rec_special_OR,
2692         [OP_SPECIAL_XOR]        = rec_special_XOR,
2693         [OP_SPECIAL_NOR]        = rec_special_NOR,
2694         [OP_SPECIAL_SLT]        = rec_special_SLT,
2695         [OP_SPECIAL_SLTU]       = rec_special_SLTU,
2696 };
2697
2698 static const lightrec_rec_func_t rec_regimm[64] = {
2699         SET_DEFAULT_ELM(rec_regimm, unknown_opcode),
2700         [OP_REGIMM_BLTZ]        = rec_regimm_BLTZ,
2701         [OP_REGIMM_BGEZ]        = rec_regimm_BGEZ,
2702         [OP_REGIMM_BLTZAL]      = rec_regimm_BLTZAL,
2703         [OP_REGIMM_BGEZAL]      = rec_regimm_BGEZAL,
2704 };
2705
2706 static const lightrec_rec_func_t rec_cp0[64] = {
2707         SET_DEFAULT_ELM(rec_cp0, rec_CP),
2708         [OP_CP0_MFC0]           = rec_cp0_MFC0,
2709         [OP_CP0_CFC0]           = rec_cp0_CFC0,
2710         [OP_CP0_MTC0]           = rec_cp0_MTC0,
2711         [OP_CP0_CTC0]           = rec_cp0_CTC0,
2712         [OP_CP0_RFE]            = rec_cp0_RFE,
2713 };
2714
2715 static const lightrec_rec_func_t rec_cp2_basic[64] = {
2716         SET_DEFAULT_ELM(rec_cp2_basic, rec_CP),
2717         [OP_CP2_BASIC_MFC2]     = rec_cp2_basic_MFC2,
2718         [OP_CP2_BASIC_CFC2]     = rec_cp2_basic_CFC2,
2719         [OP_CP2_BASIC_MTC2]     = rec_cp2_basic_MTC2,
2720         [OP_CP2_BASIC_CTC2]     = rec_cp2_basic_CTC2,
2721 };
2722
2723 static const lightrec_rec_func_t rec_meta[64] = {
2724         SET_DEFAULT_ELM(rec_meta, unknown_opcode),
2725         [OP_META_MOV]           = rec_meta_MOV,
2726         [OP_META_EXTC]          = rec_meta_EXTC_EXTS,
2727         [OP_META_EXTS]          = rec_meta_EXTC_EXTS,
2728         [OP_META_COM]           = rec_meta_COM,
2729 };
2730
2731 static void rec_SPECIAL(struct lightrec_cstate *state,
2732                         const struct block *block, u16 offset)
2733 {
2734         union code c = block->opcode_list[offset].c;
2735         lightrec_rec_func_t f = rec_special[c.r.op];
2736
2737         if (!HAS_DEFAULT_ELM && unlikely(!f))
2738                 unknown_opcode(state, block, offset);
2739         else
2740                 (*f)(state, block, offset);
2741 }
2742
2743 static void rec_REGIMM(struct lightrec_cstate *state,
2744                        const struct block *block, u16 offset)
2745 {
2746         union code c = block->opcode_list[offset].c;
2747         lightrec_rec_func_t f = rec_regimm[c.r.rt];
2748
2749         if (!HAS_DEFAULT_ELM && unlikely(!f))
2750                 unknown_opcode(state, block, offset);
2751         else
2752                 (*f)(state, block, offset);
2753 }
2754
2755 static void rec_CP0(struct lightrec_cstate *state,
2756                     const struct block *block, u16 offset)
2757 {
2758         union code c = block->opcode_list[offset].c;
2759         lightrec_rec_func_t f = rec_cp0[c.r.rs];
2760
2761         if (!HAS_DEFAULT_ELM && unlikely(!f))
2762                 rec_CP(state, block, offset);
2763         else
2764                 (*f)(state, block, offset);
2765 }
2766
2767 static void rec_CP2(struct lightrec_cstate *state,
2768                     const struct block *block, u16 offset)
2769 {
2770         union code c = block->opcode_list[offset].c;
2771
2772         if (c.r.op == OP_CP2_BASIC) {
2773                 lightrec_rec_func_t f = rec_cp2_basic[c.r.rs];
2774
2775                 if (HAS_DEFAULT_ELM || likely(f)) {
2776                         (*f)(state, block, offset);
2777                         return;
2778                 }
2779         }
2780
2781         rec_CP(state, block, offset);
2782 }
2783
2784 static void rec_META(struct lightrec_cstate *state,
2785                      const struct block *block, u16 offset)
2786 {
2787         union code c = block->opcode_list[offset].c;
2788         lightrec_rec_func_t f = rec_meta[c.m.op];
2789
2790         if (!HAS_DEFAULT_ELM && unlikely(!f))
2791                 unknown_opcode(state, block, offset);
2792         else
2793                 (*f)(state, block, offset);
2794 }
2795
2796 void lightrec_rec_opcode(struct lightrec_cstate *state,
2797                          const struct block *block, u16 offset)
2798 {
2799         struct regcache *reg_cache = state->reg_cache;
2800         struct lightrec_branch_target *target;
2801         const struct opcode *op = &block->opcode_list[offset];
2802         jit_state_t *_jit = block->_jit;
2803         lightrec_rec_func_t f;
2804         u16 unload_offset;
2805
2806         if (op_flag_sync(op->flags)) {
2807                 if (state->cycles)
2808                         jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles);
2809                 state->cycles = 0;
2810
2811                 lightrec_storeback_regs(reg_cache, _jit);
2812                 lightrec_regcache_reset(reg_cache);
2813
2814                 pr_debug("Adding branch target at offset 0x%x\n", offset << 2);
2815                 target = &state->targets[state->nb_targets++];
2816                 target->offset = offset;
2817                 target->label = jit_indirect();
2818         }
2819
2820         if (likely(op->opcode)) {
2821                 f = rec_standard[op->i.op];
2822
2823                 if (!HAS_DEFAULT_ELM && unlikely(!f))
2824                         unknown_opcode(state, block, offset);
2825                 else
2826                         (*f)(state, block, offset);
2827         }
2828
2829         if (OPT_EARLY_UNLOAD) {
2830                 unload_offset = offset +
2831                         (has_delay_slot(op->c) && !op_flag_no_ds(op->flags));
2832
2833                 lightrec_do_early_unload(state, block, unload_offset);
2834         }
2835
2836         state->no_load_delay = false;
2837 }