Merge pull request #836 from pcercuei/update-lightrec-20240611
[pcsx_rearmed.git] / deps / lightrec / emitter.c
1 // SPDX-License-Identifier: LGPL-2.1-or-later
2 /*
3  * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
4  */
5
6 #include "blockcache.h"
7 #include "debug.h"
8 #include "disassembler.h"
9 #include "emitter.h"
10 #include "lightning-wrapper.h"
11 #include "optimizer.h"
12 #include "regcache.h"
13
14 #include <stdbool.h>
15 #include <stddef.h>
16
17 #define LIGHTNING_UNALIGNED_32BIT 4
18
19 typedef void (*lightrec_rec_func_t)(struct lightrec_cstate *, const struct block *, u16);
20
21 /* Forward declarations */
22 static void rec_SPECIAL(struct lightrec_cstate *state, const struct block *block, u16 offset);
23 static void rec_REGIMM(struct lightrec_cstate *state, const struct block *block, u16 offset);
24 static void rec_CP0(struct lightrec_cstate *state, const struct block *block, u16 offset);
25 static void rec_CP2(struct lightrec_cstate *state, const struct block *block, u16 offset);
26 static void rec_META(struct lightrec_cstate *state, const struct block *block, u16 offset);
27 static void rec_cp2_do_mtc2(struct lightrec_cstate *state,
28                             const struct block *block, u16 offset, u8 reg, u8 in_reg);
29 static void rec_cp2_do_mfc2(struct lightrec_cstate *state,
30                             const struct block *block, u16 offset,
31                             u8 reg, u8 out_reg);
32
33 static void
34 lightrec_jump_to_fn(jit_state_t *_jit, void (*fn)(void))
35 {
36         /* Prevent jit_jmpi() from using our cycles register as a temporary */
37         jit_live(LIGHTREC_REG_CYCLE);
38
39         jit_patch_abs(jit_jmpi(), fn);
40 }
41
42 static void
43 lightrec_jump_to_eob(struct lightrec_cstate *state, jit_state_t *_jit)
44 {
45         lightrec_jump_to_fn(_jit, state->state->eob_wrapper_func);
46 }
47
48 static void
49 lightrec_jump_to_ds_check(struct lightrec_cstate *state, jit_state_t *_jit)
50 {
51         lightrec_jump_to_fn(_jit, state->state->ds_check_func);
52 }
53
54 static void update_ra_register(struct regcache *reg_cache, jit_state_t *_jit,
55                                u8 ra_reg, u32 pc, u32 link)
56 {
57         u8 link_reg;
58
59         link_reg = lightrec_alloc_reg_out(reg_cache, _jit, ra_reg, 0);
60         lightrec_load_imm(reg_cache, _jit, link_reg, pc, link);
61         lightrec_free_reg(reg_cache, link_reg);
62 }
63
64 static void lightrec_emit_end_of_block(struct lightrec_cstate *state,
65                                        const struct block *block, u16 offset,
66                                        s8 reg_new_pc, u32 imm, u8 ra_reg,
67                                        u32 link, bool update_cycles)
68 {
69         struct regcache *reg_cache = state->reg_cache;
70         jit_state_t *_jit = block->_jit;
71         const struct opcode *op = &block->opcode_list[offset],
72                             *ds = get_delay_slot(block->opcode_list, offset);
73         u32 cycles = state->cycles + lightrec_cycles_of_opcode(state->state, op->c);
74         bool has_ds = has_delay_slot(op->c);
75
76         jit_note(__FILE__, __LINE__);
77
78         if (link && ra_reg != reg_new_pc)
79                 update_ra_register(reg_cache, _jit, ra_reg, block->pc, link);
80
81         if (reg_new_pc < 0)
82                 lightrec_load_next_pc_imm(reg_cache, _jit, block->pc, imm);
83         else
84                 lightrec_load_next_pc(reg_cache, _jit, reg_new_pc);
85
86         if (link && ra_reg == reg_new_pc) {
87                 /* Handle the special case: JALR $r0, $r0
88                  * In that case the target PC should be the old value of the
89                  * register. */
90                 update_ra_register(reg_cache, _jit, ra_reg, block->pc, link);
91         }
92
93         if (has_ds && !op_flag_no_ds(op->flags) && !op_flag_local_branch(op->flags)) {
94                 cycles += lightrec_cycles_of_opcode(state->state, ds->c);
95
96                 /* Recompile the delay slot */
97                 if (ds->c.opcode)
98                         lightrec_rec_opcode(state, block, offset + 1);
99         }
100
101         /* Clean the remaining registers */
102         lightrec_clean_regs(reg_cache, _jit);
103
104         if (cycles && update_cycles) {
105                 jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles);
106                 pr_debug("EOB: %u cycles\n", cycles);
107         }
108
109         if (has_ds && op_flag_load_delay(ds->flags)
110             && opcode_has_load_delay(ds->c) && !state->no_load_delay) {
111                 /* If the delay slot is a load opcode, its target register
112                  * will be written after the first opcode of the target is
113                  * executed. Handle this by jumping to a special section of
114                  * the dispatcher. It expects the loaded value to be in
115                  * REG_TEMP, and the target register number to be in JIT_V1.*/
116                 jit_movi(JIT_V1, ds->c.i.rt);
117
118                 lightrec_jump_to_ds_check(state, _jit);
119         } else {
120                 lightrec_jump_to_eob(state, _jit);
121         }
122
123         lightrec_regcache_reset(reg_cache);
124 }
125
126 void lightrec_emit_jump_to_interpreter(struct lightrec_cstate *state,
127                                        const struct block *block, u16 offset)
128 {
129         struct regcache *reg_cache = state->reg_cache;
130         jit_state_t *_jit = block->_jit;
131
132         lightrec_clean_regs(reg_cache, _jit);
133
134         /* Call the interpreter with the block's address in JIT_V1 and the
135          * PC (which might have an offset) in JIT_V0. */
136         lightrec_load_imm(reg_cache, _jit, JIT_V0, block->pc,
137                           block->pc + (offset << 2));
138         if (lightrec_store_next_pc()) {
139               jit_stxi_i(lightrec_offset(next_pc), LIGHTREC_REG_STATE, JIT_V0);
140         }
141
142         jit_movi(JIT_V1, (uintptr_t)block);
143
144         jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles);
145         lightrec_jump_to_fn(_jit, state->state->interpreter_func);
146 }
147
148 static void lightrec_emit_eob(struct lightrec_cstate *state,
149                               const struct block *block, u16 offset)
150 {
151         struct regcache *reg_cache = state->reg_cache;
152         jit_state_t *_jit = block->_jit;
153
154         lightrec_clean_regs(reg_cache, _jit);
155
156         lightrec_load_imm(reg_cache, _jit, JIT_V0, block->pc,
157                           block->pc + (offset << 2));
158         if (lightrec_store_next_pc()) {
159               jit_stxi_i(lightrec_offset(next_pc), LIGHTREC_REG_STATE, JIT_V0);
160         }
161
162         jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles);
163
164         lightrec_jump_to_eob(state, _jit);
165 }
166
167 static void rec_special_JR(struct lightrec_cstate *state, const struct block *block, u16 offset)
168 {
169         union code c = block->opcode_list[offset].c;
170
171         _jit_name(block->_jit, __func__);
172         lightrec_emit_end_of_block(state, block, offset, c.r.rs, 0, 31, 0, true);
173 }
174
175 static void rec_special_JALR(struct lightrec_cstate *state, const struct block *block, u16 offset)
176 {
177         union code c = block->opcode_list[offset].c;
178
179         _jit_name(block->_jit, __func__);
180         lightrec_emit_end_of_block(state, block, offset, c.r.rs, 0, c.r.rd,
181                                    get_branch_pc(block, offset, 2), true);
182 }
183
184 static void rec_J(struct lightrec_cstate *state, const struct block *block, u16 offset)
185 {
186         union code c = block->opcode_list[offset].c;
187
188         _jit_name(block->_jit, __func__);
189         lightrec_emit_end_of_block(state, block, offset, -1,
190                                    (block->pc & 0xf0000000) | (c.j.imm << 2),
191                                    31, 0, true);
192 }
193
194 static void rec_JAL(struct lightrec_cstate *state, const struct block *block, u16 offset)
195 {
196         union code c = block->opcode_list[offset].c;
197
198         _jit_name(block->_jit, __func__);
199         lightrec_emit_end_of_block(state, block, offset, -1,
200                                    (block->pc & 0xf0000000) | (c.j.imm << 2),
201                                    31, get_branch_pc(block, offset, 2), true);
202 }
203
204 static void lightrec_do_early_unload(struct lightrec_cstate *state,
205                                      const struct block *block, u16 offset)
206 {
207         struct regcache *reg_cache = state->reg_cache;
208         const struct opcode *op = &block->opcode_list[offset];
209         jit_state_t *_jit = block->_jit;
210         unsigned int i;
211         u8 reg;
212         struct {
213                 u8 reg, op;
214         } reg_ops[3] = {
215                 { op->r.rd, LIGHTREC_FLAGS_GET_RD(op->flags), },
216                 { op->i.rt, LIGHTREC_FLAGS_GET_RT(op->flags), },
217                 { op->i.rs, LIGHTREC_FLAGS_GET_RS(op->flags), },
218         };
219
220         for (i = 0; i < ARRAY_SIZE(reg_ops); i++) {
221                 reg = reg_ops[i].reg;
222
223                 switch (reg_ops[i].op) {
224                 case LIGHTREC_REG_UNLOAD:
225                         lightrec_clean_reg_if_loaded(reg_cache, _jit, reg, true);
226                         break;
227
228                 case LIGHTREC_REG_DISCARD:
229                         lightrec_discard_reg_if_loaded(reg_cache, reg);
230                         break;
231
232                 case LIGHTREC_REG_CLEAN:
233                         lightrec_clean_reg_if_loaded(reg_cache, _jit, reg, false);
234                         break;
235                 default:
236                         break;
237                 };
238         }
239 }
240
241 static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 offset,
242                   jit_code_t code, jit_code_t code2, u32 link, bool unconditional, bool bz)
243 {
244         struct regcache *reg_cache = state->reg_cache;
245         struct native_register *regs_backup;
246         jit_state_t *_jit = block->_jit;
247         struct lightrec_branch *branch;
248         const struct opcode *op = &block->opcode_list[offset],
249                             *ds = get_delay_slot(block->opcode_list, offset);
250         jit_node_t *addr;
251         bool is_forward = (s16)op->i.imm >= 0;
252         int op_cycles = lightrec_cycles_of_opcode(state->state, op->c);
253         u32 target_offset, cycles = state->cycles + op_cycles;
254         bool no_indirection = false;
255         u32 next_pc;
256         u8 rs, rt;
257
258         jit_note(__FILE__, __LINE__);
259
260         if (!op_flag_no_ds(op->flags))
261                 cycles += lightrec_cycles_of_opcode(state->state, ds->c);
262
263         state->cycles = -op_cycles;
264
265         if (!unconditional) {
266                 rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs, REG_EXT);
267                 rt = bz ? 0 : lightrec_alloc_reg_in(reg_cache,
268                                                     _jit, op->i.rt, REG_EXT);
269
270                 /* Unload dead registers before evaluating the branch */
271                 if (OPT_EARLY_UNLOAD)
272                         lightrec_do_early_unload(state, block, offset);
273
274                 if (op_flag_local_branch(op->flags) &&
275                     (op_flag_no_ds(op->flags) || !ds->opcode) &&
276                     is_forward && !lightrec_has_dirty_regs(reg_cache))
277                         no_indirection = true;
278
279                 if (no_indirection)
280                         pr_debug("Using no indirection for branch at offset 0x%hx\n", offset << 2);
281         }
282
283         if (cycles)
284                 jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles);
285
286         if (!unconditional) {
287                 /* Generate the branch opcode */
288                 if (!no_indirection)
289                         addr = jit_new_node_pww(code, NULL, rs, rt);
290
291                 lightrec_free_regs(reg_cache);
292                 regs_backup = lightrec_regcache_enter_branch(reg_cache);
293         }
294
295         if (op_flag_local_branch(op->flags)) {
296                 /* Recompile the delay slot */
297                 if (!op_flag_no_ds(op->flags) && ds->opcode) {
298                         /* Never handle load delays with local branches. */
299                         state->no_load_delay = true;
300                         lightrec_rec_opcode(state, block, offset + 1);
301                 }
302
303                 if (link)
304                         update_ra_register(reg_cache, _jit, 31, block->pc, link);
305
306                 /* Clean remaining registers */
307                 lightrec_clean_regs(reg_cache, _jit);
308
309                 target_offset = offset + 1 + (s16)op->i.imm
310                         - !!op_flag_no_ds(op->flags);
311                 pr_debug("Adding local branch to offset 0x%x\n",
312                          target_offset << 2);
313                 branch = &state->local_branches[
314                         state->nb_local_branches++];
315
316                 branch->target = target_offset;
317
318                 if (no_indirection)
319                         branch->branch = jit_new_node_pww(code2, NULL, rs, rt);
320                 else if (is_forward)
321                         branch->branch = jit_b();
322                 else
323                         branch->branch = jit_bgti(LIGHTREC_REG_CYCLE, 0);
324         }
325
326         if (!op_flag_local_branch(op->flags) || !is_forward) {
327                 next_pc = get_branch_pc(block, offset, 1 + (s16)op->i.imm);
328                 state->no_load_delay = op_flag_local_branch(op->flags);
329                 lightrec_emit_end_of_block(state, block, offset, -1, next_pc,
330                                            31, link, false);
331         }
332
333         if (!unconditional) {
334                 if (!no_indirection)
335                         jit_patch(addr);
336
337                 lightrec_regcache_leave_branch(reg_cache, regs_backup);
338
339                 if (bz && link)
340                         update_ra_register(reg_cache, _jit, 31, block->pc, link);
341
342                 if (!op_flag_no_ds(op->flags) && ds->opcode) {
343                         state->no_load_delay = true;
344                         lightrec_rec_opcode(state, block, offset + 1);
345                 }
346         }
347 }
348
349 static void rec_BNE(struct lightrec_cstate *state,
350                     const struct block *block, u16 offset)
351 {
352         union code c = block->opcode_list[offset].c;
353
354         _jit_name(block->_jit, __func__);
355
356         if (c.i.rt == 0)
357                 rec_b(state, block, offset, jit_code_beqi, jit_code_bnei, 0, false, true);
358         else
359                 rec_b(state, block, offset, jit_code_beqr, jit_code_bner, 0, false, false);
360 }
361
362 static void rec_BEQ(struct lightrec_cstate *state,
363                     const struct block *block, u16 offset)
364 {
365         union code c = block->opcode_list[offset].c;
366
367         _jit_name(block->_jit, __func__);
368
369         if (c.i.rt == 0)
370                 rec_b(state, block, offset, jit_code_bnei, jit_code_beqi, 0, c.i.rs == 0, true);
371         else
372                 rec_b(state, block, offset, jit_code_bner, jit_code_beqr, 0, c.i.rs == c.i.rt, false);
373 }
374
375 static void rec_BLEZ(struct lightrec_cstate *state,
376                      const struct block *block, u16 offset)
377 {
378         union code c = block->opcode_list[offset].c;
379
380         _jit_name(block->_jit, __func__);
381         rec_b(state, block, offset, jit_code_bgti, jit_code_blei, 0, c.i.rs == 0, true);
382 }
383
384 static void rec_BGTZ(struct lightrec_cstate *state,
385                      const struct block *block, u16 offset)
386 {
387         _jit_name(block->_jit, __func__);
388         rec_b(state, block, offset, jit_code_blei, jit_code_bgti, 0, false, true);
389 }
390
391 static void rec_regimm_BLTZ(struct lightrec_cstate *state,
392                             const struct block *block, u16 offset)
393 {
394         _jit_name(block->_jit, __func__);
395         rec_b(state, block, offset, jit_code_bgei, jit_code_blti, 0, false, true);
396 }
397
398 static void rec_regimm_BLTZAL(struct lightrec_cstate *state,
399                               const struct block *block, u16 offset)
400 {
401         _jit_name(block->_jit, __func__);
402         rec_b(state, block, offset, jit_code_bgei, jit_code_blti,
403               get_branch_pc(block, offset, 2), false, true);
404 }
405
406 static void rec_regimm_BGEZ(struct lightrec_cstate *state,
407                             const struct block *block, u16 offset)
408 {
409         union code c = block->opcode_list[offset].c;
410
411         _jit_name(block->_jit, __func__);
412         rec_b(state, block, offset, jit_code_blti, jit_code_bgei, 0, !c.i.rs, true);
413 }
414
415 static void rec_regimm_BGEZAL(struct lightrec_cstate *state,
416                               const struct block *block, u16 offset)
417 {
418         const struct opcode *op = &block->opcode_list[offset];
419         _jit_name(block->_jit, __func__);
420         rec_b(state, block, offset, jit_code_blti, jit_code_bgei,
421               get_branch_pc(block, offset, 2),
422               !op->i.rs, true);
423 }
424
425 static void rec_alloc_rs_rd(struct regcache *reg_cache,
426                             jit_state_t *_jit,
427                             const struct opcode *op,
428                             u8 rs, u8 rd,
429                             u8 in_flags, u8 out_flags,
430                             u8 *rs_out, u8 *rd_out)
431 {
432         bool unload, discard;
433         u32 unload_flags;
434
435         if (OPT_EARLY_UNLOAD) {
436                 unload_flags = LIGHTREC_FLAGS_GET_RS(op->flags);
437                 unload = unload_flags == LIGHTREC_REG_UNLOAD;
438                 discard = unload_flags == LIGHTREC_REG_DISCARD;
439         }
440
441         if (OPT_EARLY_UNLOAD && rs && rd != rs && (unload || discard)) {
442                 rs = lightrec_alloc_reg_in(reg_cache, _jit, rs, in_flags);
443                 lightrec_remap_reg(reg_cache, _jit, rs, rd, discard);
444                 lightrec_set_reg_out_flags(reg_cache, rs, out_flags);
445                 rd = rs;
446         } else {
447                 rs = lightrec_alloc_reg_in(reg_cache, _jit, rs, in_flags);
448                 rd = lightrec_alloc_reg_out(reg_cache, _jit, rd, out_flags);
449         }
450
451         *rs_out = rs;
452         *rd_out = rd;
453 }
454
455 static void rec_alu_imm(struct lightrec_cstate *state, const struct block *block,
456                         u16 offset, jit_code_t code, bool slti)
457 {
458         struct regcache *reg_cache = state->reg_cache;
459         union code c = block->opcode_list[offset].c;
460         jit_state_t *_jit = block->_jit;
461         u8 rs, rt, out_flags = REG_EXT;
462
463         if (slti)
464                 out_flags |= REG_ZEXT;
465
466         jit_note(__FILE__, __LINE__);
467
468         rec_alloc_rs_rd(reg_cache, _jit, &block->opcode_list[offset],
469                         c.i.rs, c.i.rt, REG_EXT, out_flags, &rs, &rt);
470
471         jit_new_node_www(code, rt, rs, (s32)(s16) c.i.imm);
472
473         lightrec_free_reg(reg_cache, rs);
474         lightrec_free_reg(reg_cache, rt);
475 }
476
477 static void rec_alu_special(struct lightrec_cstate *state, const struct block *block,
478                             u16 offset, jit_code_t code, bool out_ext)
479 {
480         struct regcache *reg_cache = state->reg_cache;
481         union code c = block->opcode_list[offset].c;
482         jit_state_t *_jit = block->_jit;
483         u8 rd, rt, rs;
484
485         jit_note(__FILE__, __LINE__);
486
487         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, REG_EXT);
488         rec_alloc_rs_rd(reg_cache, _jit, &block->opcode_list[offset],
489                         c.r.rs, c.r.rd, REG_EXT,
490                         out_ext ? REG_EXT | REG_ZEXT : 0, &rs, &rd);
491
492         jit_new_node_www(code, rd, rs, rt);
493
494         lightrec_free_reg(reg_cache, rs);
495         lightrec_free_reg(reg_cache, rt);
496         lightrec_free_reg(reg_cache, rd);
497 }
498
499 static void rec_alu_shiftv(struct lightrec_cstate *state, const struct block *block,
500                            u16 offset, jit_code_t code)
501 {
502         struct regcache *reg_cache = state->reg_cache;
503         union code c = block->opcode_list[offset].c;
504         jit_state_t *_jit = block->_jit;
505         u8 rd, rt, rs, temp, flags = 0;
506
507         jit_note(__FILE__, __LINE__);
508
509         if (code == jit_code_rshr)
510                 flags = REG_EXT;
511         else if (code == jit_code_rshr_u)
512                 flags = REG_ZEXT;
513
514         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
515         rec_alloc_rs_rd(reg_cache, _jit, &block->opcode_list[offset],
516                         c.r.rt, c.r.rd, flags, flags, &rt, &rd);
517
518         if (rt != rd) {
519                 jit_andi(rd, rs, 0x1f);
520                 jit_new_node_www(code, rd, rt, rd);
521         } else {
522                 temp = lightrec_alloc_reg_temp(reg_cache, _jit);
523                 jit_andi(temp, rs, 0x1f);
524                 jit_new_node_www(code, rd, rt, temp);
525                 lightrec_free_reg(reg_cache, temp);
526         }
527
528         lightrec_free_reg(reg_cache, rs);
529         lightrec_free_reg(reg_cache, rt);
530         lightrec_free_reg(reg_cache, rd);
531 }
532
533 static void rec_movi(struct lightrec_cstate *state,
534                      const struct block *block, u16 offset)
535 {
536         struct regcache *reg_cache = state->reg_cache;
537         union code c = block->opcode_list[offset].c;
538         jit_state_t *_jit = block->_jit;
539         u16 flags = REG_EXT;
540         s32 value = (s32)(s16) c.i.imm;
541         u8 rt;
542
543         if (block->opcode_list[offset].flags & LIGHTREC_MOVI)
544                 value += (s32)((u32)state->movi_temp[c.i.rt] << 16);
545
546         if (value >= 0)
547                 flags |= REG_ZEXT;
548
549         rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags);
550
551         jit_movi(rt, value);
552
553         lightrec_free_reg(reg_cache, rt);
554 }
555
556 static void rec_ADDIU(struct lightrec_cstate *state,
557                       const struct block *block, u16 offset)
558 {
559         const struct opcode *op = &block->opcode_list[offset];
560
561         _jit_name(block->_jit, __func__);
562
563         if (op->i.rs && !(op->flags & LIGHTREC_MOVI))
564                 rec_alu_imm(state, block, offset, jit_code_addi, false);
565         else
566                 rec_movi(state, block, offset);
567 }
568
569 static void rec_ADDI(struct lightrec_cstate *state,
570                      const struct block *block, u16 offset)
571 {
572         /* TODO: Handle the exception? */
573         _jit_name(block->_jit, __func__);
574         rec_ADDIU(state, block, offset);
575 }
576
577 static void rec_SLTIU(struct lightrec_cstate *state,
578                       const struct block *block, u16 offset)
579 {
580         _jit_name(block->_jit, __func__);
581         rec_alu_imm(state, block, offset, jit_code_lti_u, true);
582 }
583
584 static void rec_SLTI(struct lightrec_cstate *state,
585                      const struct block *block, u16 offset)
586 {
587         _jit_name(block->_jit, __func__);
588         rec_alu_imm(state, block, offset, jit_code_lti, true);
589 }
590
591 static void rec_ANDI(struct lightrec_cstate *state,
592                      const struct block *block, u16 offset)
593 {
594         struct regcache *reg_cache = state->reg_cache;
595         union code c = block->opcode_list[offset].c;
596         jit_state_t *_jit = block->_jit;
597         u8 rs, rt;
598
599         _jit_name(block->_jit, __func__);
600         jit_note(__FILE__, __LINE__);
601
602         rec_alloc_rs_rd(reg_cache, _jit, &block->opcode_list[offset],
603                         c.i.rs, c.i.rt, 0, REG_EXT | REG_ZEXT, &rs, &rt);
604
605         /* PSX code uses ANDI 0xff / ANDI 0xffff a lot, which are basically
606          * casts to uint8_t / uint16_t. */
607         if (c.i.imm == 0xff)
608                 jit_extr_uc(rt, rs);
609         else if (c.i.imm == 0xffff)
610                 jit_extr_us(rt, rs);
611         else
612                 jit_andi(rt, rs, (u32)(u16) c.i.imm);
613
614         lightrec_free_reg(reg_cache, rs);
615         lightrec_free_reg(reg_cache, rt);
616 }
617
618 static void rec_alu_or_xor(struct lightrec_cstate *state, const struct block *block,
619                            u16 offset, jit_code_t code)
620 {
621         struct regcache *reg_cache = state->reg_cache;
622         union code c = block->opcode_list[offset].c;
623         jit_state_t *_jit = block->_jit;
624         u8 rs, rt, flags;
625
626         jit_note(__FILE__, __LINE__);
627
628         rec_alloc_rs_rd(reg_cache, _jit, &block->opcode_list[offset],
629                         c.i.rs, c.i.rt, 0, 0, &rs, &rt);
630
631         flags = lightrec_get_reg_in_flags(reg_cache, rs);
632         lightrec_set_reg_out_flags(reg_cache, rt, flags);
633
634         jit_new_node_www(code, rt, rs, (u32)(u16) c.i.imm);
635
636         lightrec_free_reg(reg_cache, rs);
637         lightrec_free_reg(reg_cache, rt);
638 }
639
640
641 static void rec_ORI(struct lightrec_cstate *state,
642                     const struct block *block, u16 offset)
643 {
644         const struct opcode *op = &block->opcode_list[offset];
645         struct regcache *reg_cache = state->reg_cache;
646         jit_state_t *_jit = block->_jit;
647         s32 val;
648         u8 rt;
649
650         _jit_name(_jit, __func__);
651
652         if (op->flags & LIGHTREC_MOVI) {
653                 rt = lightrec_alloc_reg_out(reg_cache, _jit, op->i.rt, REG_EXT);
654
655                 val = ((u32)state->movi_temp[op->i.rt] << 16) | op->i.imm;
656                 jit_movi(rt, val);
657
658                 lightrec_free_reg(reg_cache, rt);
659         } else {
660                 rec_alu_or_xor(state, block, offset, jit_code_ori);
661         }
662 }
663
664 static void rec_XORI(struct lightrec_cstate *state,
665                      const struct block *block, u16 offset)
666 {
667         _jit_name(block->_jit, __func__);
668         rec_alu_or_xor(state, block, offset, jit_code_xori);
669 }
670
671 static void rec_LUI(struct lightrec_cstate *state,
672                     const struct block *block, u16 offset)
673 {
674         struct regcache *reg_cache = state->reg_cache;
675         union code c = block->opcode_list[offset].c;
676         jit_state_t *_jit = block->_jit;
677         u8 rt, flags = REG_EXT;
678
679         if (block->opcode_list[offset].flags & LIGHTREC_MOVI) {
680                 state->movi_temp[c.i.rt] = c.i.imm;
681                 return;
682         }
683
684         jit_name(__func__);
685         jit_note(__FILE__, __LINE__);
686
687         if (!(c.i.imm & BIT(15)))
688                 flags |= REG_ZEXT;
689
690         rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags);
691
692         jit_movi(rt, (s32)(c.i.imm << 16));
693
694         lightrec_free_reg(reg_cache, rt);
695 }
696
697 static void rec_special_ADDU(struct lightrec_cstate *state,
698                              const struct block *block, u16 offset)
699 {
700         _jit_name(block->_jit, __func__);
701         rec_alu_special(state, block, offset, jit_code_addr, false);
702 }
703
704 static void rec_special_ADD(struct lightrec_cstate *state,
705                             const struct block *block, u16 offset)
706 {
707         /* TODO: Handle the exception? */
708         _jit_name(block->_jit, __func__);
709         rec_alu_special(state, block, offset, jit_code_addr, false);
710 }
711
712 static void rec_special_SUBU(struct lightrec_cstate *state,
713                              const struct block *block, u16 offset)
714 {
715         _jit_name(block->_jit, __func__);
716         rec_alu_special(state, block, offset, jit_code_subr, false);
717 }
718
719 static void rec_special_SUB(struct lightrec_cstate *state,
720                             const struct block *block, u16 offset)
721 {
722         /* TODO: Handle the exception? */
723         _jit_name(block->_jit, __func__);
724         rec_alu_special(state, block, offset, jit_code_subr, false);
725 }
726
727 static void rec_special_AND(struct lightrec_cstate *state,
728                             const struct block *block, u16 offset)
729 {
730         struct regcache *reg_cache = state->reg_cache;
731         union code c = block->opcode_list[offset].c;
732         jit_state_t *_jit = block->_jit;
733         u8 rd, rt, rs, flags_rs, flags_rt, flags_rd;
734
735         _jit_name(block->_jit, __func__);
736         jit_note(__FILE__, __LINE__);
737
738         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0);
739         rec_alloc_rs_rd(reg_cache, _jit, &block->opcode_list[offset],
740                         c.r.rs, c.r.rd, 0, 0, &rs, &rd);
741
742         flags_rs = lightrec_get_reg_in_flags(reg_cache, rs);
743         flags_rt = lightrec_get_reg_in_flags(reg_cache, rt);
744
745         /* Z(rd) = Z(rs) | Z(rt) */
746         flags_rd = REG_ZEXT & (flags_rs | flags_rt);
747
748         /* E(rd) = (E(rt) & Z(rt)) | (E(rs) & Z(rs)) | (E(rs) & E(rt)) */
749         if (((flags_rs & REG_EXT) && (flags_rt & REG_ZEXT)) ||
750             ((flags_rt & REG_EXT) && (flags_rs & REG_ZEXT)) ||
751             (REG_EXT & flags_rs & flags_rt))
752                 flags_rd |= REG_EXT;
753
754         lightrec_set_reg_out_flags(reg_cache, rd, flags_rd);
755
756         jit_andr(rd, rs, rt);
757
758         lightrec_free_reg(reg_cache, rs);
759         lightrec_free_reg(reg_cache, rt);
760         lightrec_free_reg(reg_cache, rd);
761 }
762
763 static void rec_special_or_nor(struct lightrec_cstate *state,
764                                const struct block *block, u16 offset, bool nor)
765 {
766         struct regcache *reg_cache = state->reg_cache;
767         union code c = block->opcode_list[offset].c;
768         jit_state_t *_jit = block->_jit;
769         u8 rd, rt, rs, flags_rs, flags_rt, flags_rd = 0;
770
771         jit_note(__FILE__, __LINE__);
772
773         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0);
774         rec_alloc_rs_rd(reg_cache, _jit, &block->opcode_list[offset],
775                         c.r.rs, c.r.rd, 0, 0, &rs, &rd);
776
777         flags_rs = lightrec_get_reg_in_flags(reg_cache, rs);
778         flags_rt = lightrec_get_reg_in_flags(reg_cache, rt);
779
780         /* or: Z(rd) = Z(rs) & Z(rt)
781          * nor: Z(rd) = 0 */
782         if (!nor)
783                 flags_rd = REG_ZEXT & flags_rs & flags_rt;
784
785         /* E(rd) = E(rs) & E(rt) */
786         if (REG_EXT & flags_rs & flags_rt)
787                 flags_rd |= REG_EXT;
788
789         lightrec_set_reg_out_flags(reg_cache, rd, flags_rd);
790
791         jit_orr(rd, rs, rt);
792
793         if (nor)
794                 jit_comr(rd, rd);
795
796         lightrec_free_reg(reg_cache, rs);
797         lightrec_free_reg(reg_cache, rt);
798         lightrec_free_reg(reg_cache, rd);
799 }
800
801 static void rec_special_OR(struct lightrec_cstate *state,
802                            const struct block *block, u16 offset)
803 {
804         _jit_name(block->_jit, __func__);
805         rec_special_or_nor(state, block, offset, false);
806 }
807
808 static void rec_special_NOR(struct lightrec_cstate *state,
809                             const struct block *block, u16 offset)
810 {
811         _jit_name(block->_jit, __func__);
812         rec_special_or_nor(state, block, offset, true);
813 }
814
815 static void rec_special_XOR(struct lightrec_cstate *state,
816                             const struct block *block, u16 offset)
817 {
818         struct regcache *reg_cache = state->reg_cache;
819         union code c = block->opcode_list[offset].c;
820         jit_state_t *_jit = block->_jit;
821         u8 rd, rt, rs, flags_rs, flags_rt, flags_rd;
822
823         _jit_name(block->_jit, __func__);
824
825         jit_note(__FILE__, __LINE__);
826
827         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0);
828         rec_alloc_rs_rd(reg_cache, _jit, &block->opcode_list[offset],
829                         c.r.rs, c.r.rd, 0, 0, &rs, &rd);
830
831         flags_rs = lightrec_get_reg_in_flags(reg_cache, rs);
832         flags_rt = lightrec_get_reg_in_flags(reg_cache, rt);
833
834         /* Z(rd) = Z(rs) & Z(rt) */
835         flags_rd = REG_ZEXT & flags_rs & flags_rt;
836
837         /* E(rd) = E(rs) & E(rt) */
838         flags_rd |= REG_EXT & flags_rs & flags_rt;
839
840         lightrec_set_reg_out_flags(reg_cache, rd, flags_rd);
841
842         jit_xorr(rd, rs, rt);
843
844         lightrec_free_reg(reg_cache, rs);
845         lightrec_free_reg(reg_cache, rt);
846         lightrec_free_reg(reg_cache, rd);
847 }
848
849 static void rec_special_SLTU(struct lightrec_cstate *state,
850                              const struct block *block, u16 offset)
851 {
852         _jit_name(block->_jit, __func__);
853         rec_alu_special(state, block, offset, jit_code_ltr_u, true);
854 }
855
856 static void rec_special_SLT(struct lightrec_cstate *state,
857                             const struct block *block, u16 offset)
858 {
859         _jit_name(block->_jit, __func__);
860         rec_alu_special(state, block, offset, jit_code_ltr, true);
861 }
862
863 static void rec_special_SLLV(struct lightrec_cstate *state,
864                              const struct block *block, u16 offset)
865 {
866         _jit_name(block->_jit, __func__);
867         rec_alu_shiftv(state, block, offset, jit_code_lshr);
868 }
869
870 static void rec_special_SRLV(struct lightrec_cstate *state,
871                              const struct block *block, u16 offset)
872 {
873         _jit_name(block->_jit, __func__);
874         rec_alu_shiftv(state, block, offset, jit_code_rshr_u);
875 }
876
877 static void rec_special_SRAV(struct lightrec_cstate *state,
878                              const struct block *block, u16 offset)
879 {
880         _jit_name(block->_jit, __func__);
881         rec_alu_shiftv(state, block, offset, jit_code_rshr);
882 }
883
884 static void rec_alu_shift(struct lightrec_cstate *state, const struct block *block,
885                           u16 offset, jit_code_t code)
886 {
887         struct regcache *reg_cache = state->reg_cache;
888         union code c = block->opcode_list[offset].c;
889         jit_state_t *_jit = block->_jit;
890         u8 rd, rt, flags = 0, out_flags = 0;
891
892         jit_note(__FILE__, __LINE__);
893
894         if (code == jit_code_rshi)
895                 flags = REG_EXT;
896         else if (code == jit_code_rshi_u)
897                 flags = REG_ZEXT;
898
899         /* Input reg is zero-extended, if we SRL at least by one bit, we know
900          * the output reg will be both zero-extended and sign-extended. */
901         out_flags = flags;
902         if (code == jit_code_rshi_u && c.r.imm)
903                 out_flags |= REG_EXT;
904
905         rec_alloc_rs_rd(reg_cache, _jit, &block->opcode_list[offset],
906                         c.r.rt, c.r.rd, flags, out_flags, &rt, &rd);
907
908         jit_new_node_www(code, rd, rt, c.r.imm);
909
910         lightrec_free_reg(reg_cache, rt);
911         lightrec_free_reg(reg_cache, rd);
912 }
913
914 static void rec_special_SLL(struct lightrec_cstate *state,
915                             const struct block *block, u16 offset)
916 {
917         _jit_name(block->_jit, __func__);
918         rec_alu_shift(state, block, offset, jit_code_lshi);
919 }
920
921 static void rec_special_SRL(struct lightrec_cstate *state,
922                             const struct block *block, u16 offset)
923 {
924         _jit_name(block->_jit, __func__);
925         rec_alu_shift(state, block, offset, jit_code_rshi_u);
926 }
927
928 static void rec_special_SRA(struct lightrec_cstate *state,
929                             const struct block *block, u16 offset)
930 {
931         _jit_name(block->_jit, __func__);
932         rec_alu_shift(state, block, offset, jit_code_rshi);
933 }
934
935 static void rec_alu_mult(struct lightrec_cstate *state,
936                          const struct block *block, u16 offset, bool is_signed)
937 {
938         struct regcache *reg_cache = state->reg_cache;
939         union code c = block->opcode_list[offset].c;
940         u32 flags = block->opcode_list[offset].flags;
941         u8 reg_lo = get_mult_div_lo(c);
942         u8 reg_hi = get_mult_div_hi(c);
943         jit_state_t *_jit = block->_jit;
944         u8 lo, hi, rs, rt, rflags = 0;
945         bool no_lo = op_flag_no_lo(flags);
946         bool no_hi = op_flag_no_hi(flags);
947
948         jit_note(__FILE__, __LINE__);
949
950         if (is_signed)
951                 rflags = REG_EXT;
952         else
953                 rflags = REG_ZEXT;
954
955         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, rflags);
956         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, rflags);
957
958         if (!no_lo)
959                 lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0);
960
961         if (!no_hi)
962                 hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, REG_EXT);
963
964         if (__WORDSIZE == 32) {
965                 /* On 32-bit systems, do a 32*32->64 bit operation, or a 32*32->32 bit
966                  * operation if the MULT was detected a 32-bit only. */
967                 if (no_lo) {
968                         if (is_signed)
969                                 jit_hmulr(hi, rs, rt);
970                         else
971                                 jit_hmulr_u(hi, rs, rt);
972                 } else if (no_hi) {
973                         jit_mulr(lo, rs, rt);
974                 } else if (is_signed) {
975                         jit_qmulr(lo, hi, rs, rt);
976                 } else {
977                         jit_qmulr_u(lo, hi, rs, rt);
978                 }
979         } else {
980                 /* On 64-bit systems, do a 64*64->64 bit operation. */
981                 if (no_lo) {
982                         jit_mulr(hi, rs, rt);
983                         jit_rshi(hi, hi, 32);
984                 } else {
985                         jit_mulr(lo, rs, rt);
986
987                         /* The 64-bit output value is in $lo, store the upper 32 bits in $hi */
988                         if (!no_hi)
989                                 jit_rshi(hi, lo, 32);
990                 }
991         }
992
993         lightrec_free_reg(reg_cache, rs);
994         lightrec_free_reg(reg_cache, rt);
995         if (!no_lo)
996                 lightrec_free_reg(reg_cache, lo);
997         if (!no_hi)
998                 lightrec_free_reg(reg_cache, hi);
999 }
1000
1001 static void rec_alu_div(struct lightrec_cstate *state,
1002                         const struct block *block, u16 offset, bool is_signed)
1003 {
1004         struct regcache *reg_cache = state->reg_cache;
1005         union code c = block->opcode_list[offset].c;
1006         u32 flags = block->opcode_list[offset].flags;
1007         bool no_check = op_flag_no_div_check(flags);
1008         u8 reg_lo = get_mult_div_lo(c);
1009         u8 reg_hi = get_mult_div_hi(c);
1010         jit_state_t *_jit = block->_jit;
1011         jit_node_t *branch, *to_end;
1012         u8 lo = 0, hi = 0, rs, rt, rflags = 0;
1013
1014         jit_note(__FILE__, __LINE__);
1015
1016         if (is_signed)
1017                 rflags = REG_EXT;
1018         else
1019                 rflags = REG_ZEXT;
1020
1021         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, rflags);
1022         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, rflags);
1023
1024         if (!op_flag_no_lo(flags))
1025                 lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0);
1026
1027         if (!op_flag_no_hi(flags))
1028                 hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, 0);
1029
1030         /* Jump to special handler if dividing by zero  */
1031         if (!no_check)
1032                 branch = jit_beqi(rt, 0);
1033
1034         if (op_flag_no_lo(flags)) {
1035                 if (is_signed)
1036                         jit_remr(hi, rs, rt);
1037                 else
1038                         jit_remr_u(hi, rs, rt);
1039         } else if (op_flag_no_hi(flags)) {
1040                 if (is_signed)
1041                         jit_divr(lo, rs, rt);
1042                 else
1043                         jit_divr_u(lo, rs, rt);
1044         } else {
1045                 if (is_signed)
1046                         jit_qdivr(lo, hi, rs, rt);
1047                 else
1048                         jit_qdivr_u(lo, hi, rs, rt);
1049         }
1050
1051         if (!no_check) {
1052                 /* Jump above the div-by-zero handler */
1053                 to_end = jit_b();
1054
1055                 jit_patch(branch);
1056
1057                 if (!op_flag_no_lo(flags)) {
1058                         if (is_signed) {
1059                                 jit_ltr(lo, rs, rt);
1060                                 jit_lshi(lo, lo, 1);
1061                                 jit_subi(lo, lo, 1);
1062                         } else {
1063                                 jit_subi(lo, rt, 1);
1064                         }
1065                 }
1066
1067                 if (!op_flag_no_hi(flags))
1068                         jit_movr(hi, rs);
1069
1070                 jit_patch(to_end);
1071         }
1072
1073         lightrec_free_reg(reg_cache, rs);
1074         lightrec_free_reg(reg_cache, rt);
1075
1076         if (!op_flag_no_lo(flags))
1077                 lightrec_free_reg(reg_cache, lo);
1078
1079         if (!op_flag_no_hi(flags))
1080                 lightrec_free_reg(reg_cache, hi);
1081 }
1082
1083 static void rec_special_MULT(struct lightrec_cstate *state,
1084                              const struct block *block, u16 offset)
1085 {
1086         _jit_name(block->_jit, __func__);
1087         rec_alu_mult(state, block, offset, true);
1088 }
1089
1090 static void rec_special_MULTU(struct lightrec_cstate *state,
1091                               const struct block *block, u16 offset)
1092 {
1093         _jit_name(block->_jit, __func__);
1094         rec_alu_mult(state, block, offset, false);
1095 }
1096
1097 static void rec_special_DIV(struct lightrec_cstate *state,
1098                             const struct block *block, u16 offset)
1099 {
1100         _jit_name(block->_jit, __func__);
1101         rec_alu_div(state, block, offset, true);
1102 }
1103
1104 static void rec_special_DIVU(struct lightrec_cstate *state,
1105                              const struct block *block, u16 offset)
1106 {
1107         _jit_name(block->_jit, __func__);
1108         rec_alu_div(state, block, offset, false);
1109 }
1110
1111 static void rec_alu_mv_lo_hi(struct lightrec_cstate *state,
1112                              const struct block *block, u16 offset,
1113                              u8 dst, u8 src)
1114 {
1115         struct regcache *reg_cache = state->reg_cache;
1116         jit_state_t *_jit = block->_jit;
1117
1118         jit_note(__FILE__, __LINE__);
1119
1120         rec_alloc_rs_rd(reg_cache, _jit, &block->opcode_list[offset],
1121                         src, dst, 0, REG_EXT, &src, &dst);
1122
1123         jit_extr_i(dst, src);
1124
1125         lightrec_free_reg(reg_cache, src);
1126         lightrec_free_reg(reg_cache, dst);
1127 }
1128
1129 static void rec_special_MFHI(struct lightrec_cstate *state,
1130                              const struct block *block, u16 offset)
1131 {
1132         union code c = block->opcode_list[offset].c;
1133
1134         _jit_name(block->_jit, __func__);
1135         rec_alu_mv_lo_hi(state, block, offset, c.r.rd, REG_HI);
1136 }
1137
1138 static void rec_special_MTHI(struct lightrec_cstate *state,
1139                              const struct block *block, u16 offset)
1140 {
1141         union code c = block->opcode_list[offset].c;
1142
1143         _jit_name(block->_jit, __func__);
1144         rec_alu_mv_lo_hi(state, block, offset, REG_HI, c.r.rs);
1145 }
1146
1147 static void rec_special_MFLO(struct lightrec_cstate *state,
1148                              const struct block *block, u16 offset)
1149 {
1150         union code c = block->opcode_list[offset].c;
1151
1152         _jit_name(block->_jit, __func__);
1153         rec_alu_mv_lo_hi(state, block, offset, c.r.rd, REG_LO);
1154 }
1155
1156 static void rec_special_MTLO(struct lightrec_cstate *state,
1157                              const struct block *block, u16 offset)
1158 {
1159         union code c = block->opcode_list[offset].c;
1160
1161         _jit_name(block->_jit, __func__);
1162         rec_alu_mv_lo_hi(state, block, offset, REG_LO, c.r.rs);
1163 }
1164
1165 static void call_to_c_wrapper(struct lightrec_cstate *state,
1166                               const struct block *block, u32 arg,
1167                               enum c_wrappers wrapper)
1168 {
1169         struct regcache *reg_cache = state->reg_cache;
1170         jit_state_t *_jit = block->_jit;
1171         s8 tmp, tmp2;
1172
1173         /* Make sure JIT_R1 is not mapped; it will be used in the C wrapper. */
1174         tmp2 = lightrec_alloc_reg(reg_cache, _jit, JIT_R1);
1175
1176         jit_movi(tmp2, (unsigned int)wrapper << (1 + __WORDSIZE / 32));
1177
1178         tmp = lightrec_get_reg_with_value(reg_cache,
1179                                           (intptr_t) state->state->c_wrapper);
1180         if (tmp < 0) {
1181                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1182                 jit_ldxi(tmp, LIGHTREC_REG_STATE, lightrec_offset(c_wrapper));
1183
1184                 lightrec_temp_set_value(reg_cache, tmp,
1185                                         (intptr_t) state->state->c_wrapper);
1186         }
1187
1188         lightrec_free_reg(reg_cache, tmp2);
1189
1190 #ifdef __mips__
1191         /* On MIPS, register t9 is always used as the target register for JALR.
1192          * Therefore if it does not contain the target address we must
1193          * invalidate it. */
1194         if (tmp != _T9)
1195                 lightrec_unload_reg(reg_cache, _jit, _T9);
1196 #endif
1197
1198         jit_prepare();
1199         jit_pushargi(arg);
1200
1201         lightrec_regcache_mark_live(reg_cache, _jit);
1202         jit_callr(tmp);
1203
1204         lightrec_free_reg(reg_cache, tmp);
1205         lightrec_regcache_mark_live(reg_cache, _jit);
1206 }
1207
1208 static void rec_io(struct lightrec_cstate *state,
1209                    const struct block *block, u16 offset,
1210                    bool load_rt, bool read_rt)
1211 {
1212         struct regcache *reg_cache = state->reg_cache;
1213         jit_state_t *_jit = block->_jit;
1214         union code c = block->opcode_list[offset].c;
1215         u32 flags = block->opcode_list[offset].flags;
1216         bool is_tagged = LIGHTREC_FLAGS_GET_IO_MODE(flags);
1217         u32 lut_entry;
1218         u8 zero;
1219
1220         jit_note(__FILE__, __LINE__);
1221
1222         lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rs, false);
1223
1224         if (read_rt && likely(c.i.rt))
1225                 lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, true);
1226         else if (load_rt)
1227                 lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, false);
1228
1229         if (op_flag_load_delay(flags) && !state->no_load_delay) {
1230                 /* Clear state->in_delay_slot_n. This notifies the lightrec_rw
1231                  * wrapper that it should write the REG_TEMP register instead of
1232                  * the actual output register of the opcode. */
1233                 zero = lightrec_alloc_reg_in(reg_cache, _jit, 0, 0);
1234                 jit_stxi_c(lightrec_offset(in_delay_slot_n),
1235                            LIGHTREC_REG_STATE, zero);
1236                 lightrec_free_reg(reg_cache, zero);
1237         }
1238
1239         if (is_tagged) {
1240                 call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_RW);
1241         } else {
1242                 lut_entry = lightrec_get_lut_entry(block);
1243                 call_to_c_wrapper(state, block, (lut_entry << 16) | offset,
1244                                   C_WRAPPER_RW_GENERIC);
1245         }
1246 }
1247
1248 static u32 rec_ram_mask(const struct lightrec_state *state)
1249 {
1250         return (RAM_SIZE << (state->mirrors_mapped * 2)) - 1;
1251 }
1252
1253 static u32 rec_io_mask(const struct lightrec_state *state)
1254 {
1255         u32 length = state->maps[PSX_MAP_HW_REGISTERS].length;
1256
1257         return 0x1f800000 | GENMASK(31 - clz32(length - 1), 0);
1258 }
1259
1260 static void rec_add_offset(struct lightrec_cstate *cstate,
1261                            jit_state_t *_jit, u8 reg_out, u8 reg_in,
1262                            uintptr_t offset)
1263 {
1264         struct regcache *reg_cache = cstate->reg_cache;
1265         u8 reg_imm;
1266
1267         reg_imm = lightrec_alloc_reg_temp_with_value(reg_cache, _jit, offset);
1268         jit_addr(reg_out, reg_in, reg_imm);
1269
1270         lightrec_free_reg(reg_cache, reg_imm);
1271 }
1272
1273 static void rec_and_mask(struct lightrec_cstate *cstate,
1274                          jit_state_t *_jit, u8 reg_out, u8 reg_in, u32 mask)
1275 {
1276         struct regcache *reg_cache = cstate->reg_cache;
1277         u8 reg_imm;
1278
1279         reg_imm = lightrec_alloc_reg_temp_with_value(reg_cache, _jit, mask);
1280         jit_andr(reg_out, reg_in, reg_imm);
1281
1282         lightrec_free_reg(reg_cache, reg_imm);
1283 }
1284
1285 static void rec_store_memory(struct lightrec_cstate *cstate,
1286                              const struct block *block,
1287                              u16 offset, jit_code_t code,
1288                              jit_code_t swap_code, uintptr_t addr_offset,
1289                              u32 addr_mask, bool invalidate)
1290 {
1291         const struct lightrec_state *state = cstate->state;
1292         struct regcache *reg_cache = cstate->reg_cache;
1293         struct opcode *op = &block->opcode_list[offset];
1294         jit_state_t *_jit = block->_jit;
1295         union code c = op->c;
1296         u8 rs, rt, tmp = 0, tmp2 = 0, tmp3, addr_reg, addr_reg2;
1297         s16 imm = (s16)c.i.imm;
1298         s32 simm = (s32)imm << (1 - lut_is_32bit(state));
1299         s32 lut_offt = lightrec_offset(code_lut);
1300         bool no_mask = op_flag_no_mask(op->flags);
1301         bool add_imm = c.i.imm &&
1302                 (c.i.op == OP_META_SWU
1303                  || (!state->mirrors_mapped && !no_mask) || (invalidate &&
1304                 ((imm & 0x3) || simm + lut_offt != (s16)(simm + lut_offt))));
1305         bool need_tmp = !no_mask || add_imm || invalidate;
1306         bool swc2 = c.i.op == OP_SWC2;
1307         u8 in_reg = swc2 ? REG_TEMP : c.i.rt;
1308
1309         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
1310         if (need_tmp)
1311                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1312
1313         addr_reg = rs;
1314
1315         if (add_imm) {
1316                 jit_addi(tmp, addr_reg, (s16)c.i.imm);
1317                 lightrec_free_reg(reg_cache, rs);
1318                 addr_reg = tmp;
1319                 imm = 0;
1320         } else if (simm) {
1321                 lut_offt += simm;
1322         }
1323
1324         if (!no_mask) {
1325                 rec_and_mask(cstate, _jit, tmp, addr_reg, addr_mask);
1326                 addr_reg = tmp;
1327         }
1328
1329         if (addr_offset) {
1330                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
1331                 rec_add_offset(cstate, _jit, tmp2, addr_reg, addr_offset);
1332                 addr_reg2 = tmp2;
1333         } else {
1334                 addr_reg2 = addr_reg;
1335         }
1336
1337         rt = lightrec_alloc_reg_in(reg_cache, _jit, in_reg, 0);
1338
1339         if (is_big_endian() && swap_code && in_reg) {
1340                 tmp3 = lightrec_alloc_reg_temp(reg_cache, _jit);
1341
1342                 jit_new_node_ww(swap_code, tmp3, rt);
1343
1344                 if (c.i.op == OP_META_SWU)
1345                         jit_unstr(addr_reg2, tmp3, LIGHTNING_UNALIGNED_32BIT);
1346                 else
1347                         jit_new_node_www(code, imm, addr_reg2, tmp3);
1348
1349                 lightrec_free_reg(reg_cache, tmp3);
1350         } else if (c.i.op == OP_META_SWU) {
1351                 jit_unstr(addr_reg2, rt, LIGHTNING_UNALIGNED_32BIT);
1352         } else {
1353                 jit_new_node_www(code, imm, addr_reg2, rt);
1354         }
1355
1356         lightrec_free_reg(reg_cache, rt);
1357
1358         if (invalidate) {
1359                 tmp3 = lightrec_alloc_reg_in(reg_cache, _jit, 0, 0);
1360
1361                 if (c.i.op != OP_SW) {
1362                         jit_andi(tmp, addr_reg, ~3);
1363                         addr_reg = tmp;
1364                 }
1365
1366                 if (!lut_is_32bit(state)) {
1367                         jit_lshi(tmp, addr_reg, 1);
1368                         addr_reg = tmp;
1369                 }
1370
1371                 if (addr_reg == rs && c.i.rs == 0) {
1372                         addr_reg = LIGHTREC_REG_STATE;
1373                 } else {
1374                         jit_add_state(tmp, addr_reg);
1375                         addr_reg = tmp;
1376                 }
1377
1378                 if (lut_is_32bit(state))
1379                         jit_stxi_i(lut_offt, addr_reg, tmp3);
1380                 else
1381                         jit_stxi(lut_offt, addr_reg, tmp3);
1382
1383                 lightrec_free_reg(reg_cache, tmp3);
1384         }
1385
1386         if (addr_offset)
1387                 lightrec_free_reg(reg_cache, tmp2);
1388         if (need_tmp)
1389                 lightrec_free_reg(reg_cache, tmp);
1390         lightrec_free_reg(reg_cache, rs);
1391 }
1392
1393 static void rec_store_ram(struct lightrec_cstate *cstate,
1394                           const struct block *block,
1395                           u16 offset, jit_code_t code,
1396                           jit_code_t swap_code, bool invalidate)
1397 {
1398         const struct lightrec_state *state = cstate->state;
1399
1400         _jit_note(block->_jit, __FILE__, __LINE__);
1401
1402         return rec_store_memory(cstate, block, offset, code, swap_code,
1403                                 state->offset_ram, rec_ram_mask(state),
1404                                 invalidate);
1405 }
1406
1407 static void rec_store_scratch(struct lightrec_cstate *cstate,
1408                               const struct block *block, u16 offset,
1409                               jit_code_t code, jit_code_t swap_code)
1410 {
1411         _jit_note(block->_jit, __FILE__, __LINE__);
1412
1413         return rec_store_memory(cstate, block, offset, code, swap_code,
1414                                 cstate->state->offset_scratch,
1415                                 0x1fffffff, false);
1416 }
1417
1418 static void rec_store_io(struct lightrec_cstate *cstate,
1419                          const struct block *block, u16 offset,
1420                          jit_code_t code, jit_code_t swap_code)
1421 {
1422         _jit_note(block->_jit, __FILE__, __LINE__);
1423
1424         return rec_store_memory(cstate, block, offset, code, swap_code,
1425                                 cstate->state->offset_io,
1426                                 rec_io_mask(cstate->state), false);
1427 }
1428
1429 static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate,
1430                                            const struct block *block,
1431                                            u16 offset, jit_code_t code,
1432                                            jit_code_t swap_code)
1433 {
1434         const struct lightrec_state *state = cstate->state;
1435         u32 ram_size = state->mirrors_mapped ? RAM_SIZE * 4 : RAM_SIZE;
1436         struct regcache *reg_cache = cstate->reg_cache;
1437         union code c = block->opcode_list[offset].c;
1438         jit_state_t *_jit = block->_jit;
1439         jit_node_t *to_not_ram, *to_end;
1440         bool swc2 = c.i.op == OP_SWC2;
1441         u8 addr_reg, tmp, tmp2 = 0, rs, rt, in_reg = swc2 ? REG_TEMP : c.i.rt;
1442         s16 imm;
1443
1444         jit_note(__FILE__, __LINE__);
1445         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
1446         tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1447
1448         /* Convert to KUNSEG and avoid RAM mirrors */
1449         if ((c.i.op == OP_META_SWU || !state->mirrors_mapped) && c.i.imm) {
1450                 imm = 0;
1451                 jit_addi(tmp, rs, (s16)c.i.imm);
1452                 addr_reg = tmp;
1453         } else {
1454                 imm = (s16)c.i.imm;
1455                 addr_reg = rs;
1456         }
1457
1458         rec_and_mask(cstate, _jit, tmp, addr_reg, 0x1f800000 | (ram_size - 1));
1459
1460         lightrec_free_reg(reg_cache, rs);
1461
1462         if (state->offset_ram != state->offset_scratch) {
1463                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
1464
1465                 to_not_ram = jit_bmsi(tmp, BIT(28));
1466
1467                 jit_movi(tmp2, state->offset_ram);
1468
1469                 to_end = jit_b();
1470                 jit_patch(to_not_ram);
1471
1472                 jit_movi(tmp2, state->offset_scratch);
1473                 jit_patch(to_end);
1474         } else if (state->offset_ram) {
1475                 tmp2 = lightrec_alloc_reg_temp_with_value(reg_cache, _jit,
1476                                                           state->offset_ram);
1477         }
1478
1479         if (state->offset_ram || state->offset_scratch) {
1480                 jit_addr(tmp, tmp, tmp2);
1481                 lightrec_free_reg(reg_cache, tmp2);
1482         }
1483
1484         rt = lightrec_alloc_reg_in(reg_cache, _jit, in_reg, 0);
1485
1486         if (is_big_endian() && swap_code && in_reg) {
1487                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
1488
1489                 jit_new_node_ww(swap_code, tmp2, rt);
1490
1491                 if (c.i.op == OP_META_SWU)
1492                         jit_unstr(tmp, tmp2, LIGHTNING_UNALIGNED_32BIT);
1493                 else
1494                         jit_new_node_www(code, imm, tmp, tmp2);
1495
1496                 lightrec_free_reg(reg_cache, tmp2);
1497         } else if (c.i.op == OP_META_SWU) {
1498                 jit_unstr(tmp, rt, LIGHTNING_UNALIGNED_32BIT);
1499         } else {
1500                 jit_new_node_www(code, imm, tmp, rt);
1501         }
1502
1503         lightrec_free_reg(reg_cache, rt);
1504         lightrec_free_reg(reg_cache, tmp);
1505 }
1506
1507 static void rec_store_direct(struct lightrec_cstate *cstate, const struct block *block,
1508                              u16 offset, jit_code_t code, jit_code_t swap_code)
1509 {
1510         const struct lightrec_state *state = cstate->state;
1511         u32 ram_size = state->mirrors_mapped ? RAM_SIZE * 4 : RAM_SIZE;
1512         struct regcache *reg_cache = cstate->reg_cache;
1513         union code c = block->opcode_list[offset].c;
1514         jit_state_t *_jit = block->_jit;
1515         jit_node_t *to_not_ram, *to_end;
1516         bool swc2 = c.i.op == OP_SWC2;
1517         u8 addr_reg, tmp, tmp2, tmp3, rs, rt, reg_imm;
1518         u8 in_reg = swc2 ? REG_TEMP : c.i.rt;
1519         u32 mask;
1520         bool different_offsets = state->offset_ram != state->offset_scratch;
1521
1522         jit_note(__FILE__, __LINE__);
1523
1524         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
1525         tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
1526         tmp3 = lightrec_alloc_reg_in(reg_cache, _jit, 0, 0);
1527
1528         /* Convert to KUNSEG and avoid RAM mirrors */
1529         if (c.i.imm) {
1530                 jit_addi(tmp2, rs, (s16)c.i.imm);
1531                 addr_reg = tmp2;
1532         } else {
1533                 addr_reg = rs;
1534         }
1535
1536         rec_and_mask(cstate, _jit, tmp2, addr_reg, 0x1f800000 | (ram_size - 1));
1537
1538         lightrec_free_reg(reg_cache, rs);
1539         tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1540
1541         mask = c.i.op == OP_SW ? RAM_SIZE - 1 : (RAM_SIZE - 1) & ~3;
1542         reg_imm = lightrec_alloc_reg_temp_with_value(reg_cache, _jit, mask);
1543
1544         if (different_offsets) {
1545                 to_not_ram = jit_bgti(tmp2, ram_size);
1546                 addr_reg = tmp2;
1547         } else {
1548                 jit_lti_u(tmp, tmp2, ram_size);
1549                 jit_movnr(tmp, tmp2, tmp);
1550                 addr_reg = tmp;
1551         }
1552
1553         /* Compute the offset to the code LUT */
1554         jit_andr(tmp, addr_reg, reg_imm);
1555
1556         if (!lut_is_32bit(state))
1557                 jit_lshi(tmp, tmp, 1);
1558         jit_add_state(tmp, tmp);
1559
1560         /* Write NULL to the code LUT to invalidate any block that's there */
1561         if (lut_is_32bit(state))
1562                 jit_stxi_i(lightrec_offset(code_lut), tmp, tmp3);
1563         else
1564                 jit_stxi(lightrec_offset(code_lut), tmp, tmp3);
1565
1566         if (c.i.op == OP_META_SWU) {
1567                 /* With a SWU opcode, we might have touched the following 32-bit
1568                  * word, so invalidate it as well */
1569                 if (lut_is_32bit(state)) {
1570                         jit_stxi_i(lightrec_offset(code_lut) + 4, tmp, tmp3);
1571                 } else {
1572                         jit_stxi(lightrec_offset(code_lut) + sizeof(uintptr_t),
1573                                  tmp, tmp3);
1574                 }
1575         }
1576
1577         if (different_offsets) {
1578                 jit_movi(tmp, state->offset_ram);
1579
1580                 to_end = jit_b();
1581                 jit_patch(to_not_ram);
1582         }
1583
1584         if (state->offset_ram || state->offset_scratch)
1585                 jit_movi(tmp, state->offset_scratch);
1586
1587         if (different_offsets)
1588                 jit_patch(to_end);
1589
1590         if (state->offset_ram || state->offset_scratch)
1591                 jit_addr(tmp2, tmp2, tmp);
1592
1593         lightrec_free_reg(reg_cache, tmp);
1594         lightrec_free_reg(reg_cache, tmp3);
1595         lightrec_free_reg(reg_cache, reg_imm);
1596
1597         rt = lightrec_alloc_reg_in(reg_cache, _jit, in_reg, 0);
1598
1599         if (is_big_endian() && swap_code && in_reg) {
1600                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1601
1602                 jit_new_node_ww(swap_code, tmp, rt);
1603
1604                 if (c.i.op == OP_META_SWU)
1605                         jit_unstr(tmp2, tmp, LIGHTNING_UNALIGNED_32BIT);
1606                 else
1607                         jit_new_node_www(code, 0, tmp2, tmp);
1608
1609                 lightrec_free_reg(reg_cache, tmp);
1610         } else if (c.i.op == OP_META_SWU) {
1611                 jit_unstr(tmp2, rt, LIGHTNING_UNALIGNED_32BIT);
1612         } else {
1613                 jit_new_node_www(code, 0, tmp2, rt);
1614         }
1615
1616         lightrec_free_reg(reg_cache, rt);
1617         lightrec_free_reg(reg_cache, tmp2);
1618 }
1619
1620 static void rec_store(struct lightrec_cstate *state,
1621                       const struct block *block, u16 offset,
1622                       jit_code_t code, jit_code_t swap_code)
1623 {
1624         u32 flags = block->opcode_list[offset].flags;
1625         u32 mode = LIGHTREC_FLAGS_GET_IO_MODE(flags);
1626         bool no_invalidate = op_flag_no_invalidate(flags) ||
1627                 (state->state->opt_flags & LIGHTREC_OPT_INV_DMA_ONLY);
1628         union code c = block->opcode_list[offset].c;
1629         bool is_swc2 = c.i.op == OP_SWC2;
1630
1631         if (is_swc2) {
1632                 switch (mode) {
1633                 case LIGHTREC_IO_RAM:
1634                 case LIGHTREC_IO_SCRATCH:
1635                 case LIGHTREC_IO_DIRECT:
1636                 case LIGHTREC_IO_DIRECT_HW:
1637                         rec_cp2_do_mfc2(state, block, offset, c.i.rt, REG_TEMP);
1638                         break;
1639                 default:
1640                         break;
1641                 }
1642         }
1643
1644         switch (mode) {
1645         case LIGHTREC_IO_RAM:
1646                 rec_store_ram(state, block, offset, code,
1647                               swap_code, !no_invalidate);
1648                 break;
1649         case LIGHTREC_IO_SCRATCH:
1650                 rec_store_scratch(state, block, offset, code, swap_code);
1651                 break;
1652         case LIGHTREC_IO_DIRECT:
1653                 if (no_invalidate) {
1654                         rec_store_direct_no_invalidate(state, block, offset,
1655                                                        code, swap_code);
1656                 } else {
1657                         rec_store_direct(state, block, offset, code, swap_code);
1658                 }
1659                 break;
1660         case LIGHTREC_IO_DIRECT_HW:
1661                 rec_store_io(state, block, offset, code, swap_code);
1662                 break;
1663         default:
1664                 rec_io(state, block, offset, true, false);
1665                 return;
1666         }
1667
1668         if (is_swc2)
1669                 lightrec_discard_reg_if_loaded(state->reg_cache, REG_TEMP);
1670 }
1671
1672 static void rec_SB(struct lightrec_cstate *state,
1673                    const struct block *block, u16 offset)
1674 {
1675         _jit_name(block->_jit, __func__);
1676         rec_store(state, block, offset, jit_code_stxi_c, 0);
1677 }
1678
1679 static void rec_SH(struct lightrec_cstate *state,
1680                    const struct block *block, u16 offset)
1681 {
1682         _jit_name(block->_jit, __func__);
1683         rec_store(state, block, offset,
1684                   jit_code_stxi_s, jit_code_bswapr_us);
1685 }
1686
1687 static void rec_SW(struct lightrec_cstate *state,
1688                    const struct block *block, u16 offset)
1689
1690 {
1691         union code c = block->opcode_list[offset].c;
1692
1693         _jit_name(block->_jit, c.i.op == OP_SWC2 ? "rec_SWC2" : "rec_SW");
1694         rec_store(state, block, offset,
1695                   jit_code_stxi_i, jit_code_bswapr_ui);
1696 }
1697
1698 static void rec_SWL(struct lightrec_cstate *state,
1699                     const struct block *block, u16 offset)
1700 {
1701         _jit_name(block->_jit, __func__);
1702         rec_io(state, block, offset, true, false);
1703 }
1704
1705 static void rec_SWR(struct lightrec_cstate *state,
1706                     const struct block *block, u16 offset)
1707 {
1708         _jit_name(block->_jit, __func__);
1709         rec_io(state, block, offset, true, false);
1710 }
1711
1712 static void rec_load_memory(struct lightrec_cstate *cstate,
1713                             const struct block *block, u16 offset,
1714                             jit_code_t code, jit_code_t swap_code, bool is_unsigned,
1715                             uintptr_t addr_offset, u32 addr_mask)
1716 {
1717         struct lightrec_state *state = cstate->state;
1718         struct regcache *reg_cache = cstate->reg_cache;
1719         struct opcode *op = &block->opcode_list[offset];
1720         bool load_delay = op_flag_load_delay(op->flags) && !cstate->no_load_delay;
1721         jit_state_t *_jit = block->_jit;
1722         u8 rs, rt, out_reg, addr_reg, flags = REG_EXT;
1723         bool no_mask = op_flag_no_mask(op->flags);
1724         union code c = op->c;
1725         s16 imm;
1726
1727         if (load_delay || c.i.op == OP_LWC2)
1728                 out_reg = REG_TEMP;
1729         else if (c.i.rt)
1730                 out_reg = c.i.rt;
1731         else
1732                 return;
1733
1734         if (is_unsigned)
1735                 flags |= REG_ZEXT;
1736
1737         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
1738         rt = lightrec_alloc_reg_out(reg_cache, _jit, out_reg, flags);
1739
1740         if ((op->i.op == OP_META_LWU && c.i.imm)
1741             || (!state->mirrors_mapped && c.i.imm && !no_mask)) {
1742                 jit_addi(rt, rs, (s16)c.i.imm);
1743                 addr_reg = rt;
1744                 imm = 0;
1745         } else {
1746                 addr_reg = rs;
1747                 imm = (s16)c.i.imm;
1748         }
1749
1750         if (op->i.op == OP_META_LWU)
1751                 imm = LIGHTNING_UNALIGNED_32BIT;
1752
1753         if (!no_mask) {
1754                 rec_and_mask(cstate, _jit, rt, addr_reg, addr_mask);
1755                 addr_reg = rt;
1756         }
1757
1758         if (addr_offset) {
1759                 rec_add_offset(cstate, _jit, rt, addr_reg, addr_offset);
1760                 addr_reg = rt;
1761         }
1762
1763         jit_new_node_www(code, rt, addr_reg, imm);
1764
1765         if (is_big_endian() && swap_code) {
1766                 jit_new_node_ww(swap_code, rt, rt);
1767
1768                 if (c.i.op == OP_LH)
1769                         jit_extr_s(rt, rt);
1770                 else if (c.i.op == OP_LW && __WORDSIZE == 64)
1771                         jit_extr_i(rt, rt);
1772         }
1773
1774         lightrec_free_reg(reg_cache, rs);
1775         lightrec_free_reg(reg_cache, rt);
1776 }
1777
1778 static void rec_load_ram(struct lightrec_cstate *cstate,
1779                          const struct block *block, u16 offset,
1780                          jit_code_t code, jit_code_t swap_code, bool is_unsigned)
1781 {
1782         _jit_note(block->_jit, __FILE__, __LINE__);
1783
1784         rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned,
1785                         cstate->state->offset_ram, rec_ram_mask(cstate->state));
1786 }
1787
1788 static void rec_load_bios(struct lightrec_cstate *cstate,
1789                           const struct block *block, u16 offset,
1790                           jit_code_t code, jit_code_t swap_code, bool is_unsigned)
1791 {
1792         _jit_note(block->_jit, __FILE__, __LINE__);
1793
1794         rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned,
1795                         cstate->state->offset_bios, 0x1fffffff);
1796 }
1797
1798 static void rec_load_scratch(struct lightrec_cstate *cstate,
1799                              const struct block *block, u16 offset,
1800                              jit_code_t code, jit_code_t swap_code, bool is_unsigned)
1801 {
1802         _jit_note(block->_jit, __FILE__, __LINE__);
1803
1804         rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned,
1805                         cstate->state->offset_scratch, 0x1fffffff);
1806 }
1807
1808 static void rec_load_io(struct lightrec_cstate *cstate,
1809                         const struct block *block, u16 offset,
1810                         jit_code_t code, jit_code_t swap_code, bool is_unsigned)
1811 {
1812         _jit_note(block->_jit, __FILE__, __LINE__);
1813
1814         rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned,
1815                         cstate->state->offset_io, rec_io_mask(cstate->state));
1816 }
1817
1818 static void rec_load_direct(struct lightrec_cstate *cstate,
1819                             const struct block *block, u16 offset,
1820                             jit_code_t code, jit_code_t swap_code,
1821                             bool is_unsigned)
1822 {
1823         const struct lightrec_state *state = cstate->state;
1824         struct regcache *reg_cache = cstate->reg_cache;
1825         struct opcode *op = &block->opcode_list[offset];
1826         bool load_delay = op_flag_load_delay(op->flags) && !cstate->no_load_delay;
1827         jit_state_t *_jit = block->_jit;
1828         jit_node_t *to_not_ram, *to_not_bios, *to_end, *to_end2;
1829         u8 tmp, rs, rt, out_reg, addr_reg, flags = REG_EXT;
1830         bool different_offsets = state->offset_bios != state->offset_scratch;
1831         union code c = op->c;
1832         s32 addr_mask;
1833         u32 reg_imm;
1834         s8 offt_reg;
1835         s16 imm;
1836
1837         if (load_delay || c.i.op == OP_LWC2)
1838                 out_reg = REG_TEMP;
1839         else if (c.i.rt)
1840                 out_reg = c.i.rt;
1841         else
1842                 return;
1843
1844         if (is_unsigned)
1845                 flags |= REG_ZEXT;
1846
1847         jit_note(__FILE__, __LINE__);
1848         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
1849         rt = lightrec_alloc_reg_out(reg_cache, _jit, out_reg, flags);
1850
1851         if ((state->offset_ram == state->offset_bios &&
1852             state->offset_ram == state->offset_scratch &&
1853             state->mirrors_mapped && c.i.op != OP_META_LWU)
1854             || !c.i.imm) {
1855                 addr_reg = rs;
1856                 imm = (s16)c.i.imm;
1857         } else {
1858                 jit_addi(rt, rs, (s16)c.i.imm);
1859                 addr_reg = rt;
1860                 imm = 0;
1861
1862                 if (c.i.rs != c.i.rt)
1863                         lightrec_free_reg(reg_cache, rs);
1864         }
1865
1866         if (op->i.op == OP_META_LWU)
1867                 imm = LIGHTNING_UNALIGNED_32BIT;
1868
1869         tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1870
1871         if (state->offset_ram == state->offset_bios &&
1872             state->offset_ram == state->offset_scratch) {
1873                 if (!state->mirrors_mapped)
1874                         addr_mask = 0x1f800000 | (RAM_SIZE - 1);
1875                 else
1876                         addr_mask = 0x1fffffff;
1877
1878                 reg_imm = lightrec_alloc_reg_temp_with_value(reg_cache, _jit,
1879                                                              addr_mask);
1880                 if (!state->mirrors_mapped) {
1881                         jit_andi(tmp, addr_reg, BIT(28));
1882                         jit_rshi_u(tmp, tmp, 28 - 22);
1883                         jit_orr(tmp, tmp, reg_imm);
1884                         jit_andr(rt, addr_reg, tmp);
1885                 } else {
1886                         jit_andr(rt, addr_reg, reg_imm);
1887                 }
1888
1889                 lightrec_free_reg(reg_cache, reg_imm);
1890
1891                 if (state->offset_ram) {
1892                         offt_reg = lightrec_get_reg_with_value(reg_cache,
1893                                                                state->offset_ram);
1894                         if (offt_reg < 0) {
1895                                 jit_movi(tmp, state->offset_ram);
1896                                 lightrec_temp_set_value(reg_cache, tmp,
1897                                                         state->offset_ram);
1898                         } else {
1899                                 lightrec_free_reg(reg_cache, tmp);
1900                                 tmp = offt_reg;
1901                         }
1902                 }
1903         } else {
1904                 to_not_ram = jit_bmsi(addr_reg, BIT(28));
1905
1906                 /* Convert to KUNSEG and avoid RAM mirrors */
1907                 jit_andi(rt, addr_reg, RAM_SIZE - 1);
1908
1909                 if (state->offset_ram)
1910                         jit_movi(tmp, state->offset_ram);
1911
1912                 to_end = jit_b();
1913
1914                 jit_patch(to_not_ram);
1915
1916                 if (different_offsets)
1917                         to_not_bios = jit_bmci(addr_reg, BIT(22));
1918
1919                 /* Convert to KUNSEG */
1920                 jit_andi(rt, addr_reg, 0x1fc00000 | (BIOS_SIZE - 1));
1921
1922                 jit_movi(tmp, state->offset_bios);
1923
1924                 if (different_offsets) {
1925                         to_end2 = jit_b();
1926
1927                         jit_patch(to_not_bios);
1928
1929                         /* Convert to KUNSEG */
1930                         jit_andi(rt, addr_reg, 0x1f800fff);
1931
1932                         if (state->offset_scratch)
1933                                 jit_movi(tmp, state->offset_scratch);
1934
1935                         jit_patch(to_end2);
1936                 }
1937
1938                 jit_patch(to_end);
1939         }
1940
1941         if (state->offset_ram || state->offset_bios || state->offset_scratch)
1942                 jit_addr(rt, rt, tmp);
1943
1944         jit_new_node_www(code, rt, rt, imm);
1945
1946         if (is_big_endian() && swap_code) {
1947                 jit_new_node_ww(swap_code, rt, rt);
1948
1949                 if (c.i.op == OP_LH)
1950                         jit_extr_s(rt, rt);
1951                 else if (c.i.op == OP_LW && __WORDSIZE == 64)
1952                         jit_extr_i(rt, rt);
1953         }
1954
1955         lightrec_free_reg(reg_cache, addr_reg);
1956         lightrec_free_reg(reg_cache, rt);
1957         lightrec_free_reg(reg_cache, tmp);
1958 }
1959
1960 static void rec_load(struct lightrec_cstate *state, const struct block *block,
1961                      u16 offset, jit_code_t code, jit_code_t swap_code,
1962                      bool is_unsigned)
1963 {
1964         const struct opcode *op = &block->opcode_list[offset];
1965         u32 flags = op->flags;
1966
1967         switch (LIGHTREC_FLAGS_GET_IO_MODE(flags)) {
1968         case LIGHTREC_IO_RAM:
1969                 rec_load_ram(state, block, offset, code, swap_code, is_unsigned);
1970                 break;
1971         case LIGHTREC_IO_BIOS:
1972                 rec_load_bios(state, block, offset, code, swap_code, is_unsigned);
1973                 break;
1974         case LIGHTREC_IO_SCRATCH:
1975                 rec_load_scratch(state, block, offset, code, swap_code, is_unsigned);
1976                 break;
1977         case LIGHTREC_IO_DIRECT_HW:
1978                 rec_load_io(state, block, offset, code, swap_code, is_unsigned);
1979                 break;
1980         case LIGHTREC_IO_DIRECT:
1981                 rec_load_direct(state, block, offset, code, swap_code, is_unsigned);
1982                 break;
1983         default:
1984                 rec_io(state, block, offset, false, true);
1985                 return;
1986         }
1987
1988         if (op->i.op == OP_LWC2) {
1989                 rec_cp2_do_mtc2(state, block, offset, op->i.rt, REG_TEMP);
1990                 lightrec_discard_reg_if_loaded(state->reg_cache, REG_TEMP);
1991         }
1992 }
1993
1994 static void rec_LB(struct lightrec_cstate *state, const struct block *block, u16 offset)
1995 {
1996         _jit_name(block->_jit, __func__);
1997         rec_load(state, block, offset, jit_code_ldxi_c, 0, false);
1998 }
1999
2000 static void rec_LBU(struct lightrec_cstate *state, const struct block *block, u16 offset)
2001 {
2002         _jit_name(block->_jit, __func__);
2003         rec_load(state, block, offset, jit_code_ldxi_uc, 0, true);
2004 }
2005
2006 static void rec_LH(struct lightrec_cstate *state, const struct block *block, u16 offset)
2007 {
2008         jit_code_t code = is_big_endian() ? jit_code_ldxi_us : jit_code_ldxi_s;
2009
2010         _jit_name(block->_jit, __func__);
2011         rec_load(state, block, offset, code, jit_code_bswapr_us, false);
2012 }
2013
2014 static void rec_LHU(struct lightrec_cstate *state, const struct block *block, u16 offset)
2015 {
2016         _jit_name(block->_jit, __func__);
2017         rec_load(state, block, offset, jit_code_ldxi_us, jit_code_bswapr_us, true);
2018 }
2019
2020 static void rec_LWL(struct lightrec_cstate *state, const struct block *block, u16 offset)
2021 {
2022         _jit_name(block->_jit, __func__);
2023         rec_io(state, block, offset, true, true);
2024 }
2025
2026 static void rec_LWR(struct lightrec_cstate *state, const struct block *block, u16 offset)
2027 {
2028         _jit_name(block->_jit, __func__);
2029         rec_io(state, block, offset, true, true);
2030 }
2031
2032 static void rec_LW(struct lightrec_cstate *state, const struct block *block, u16 offset)
2033 {
2034         union code c = block->opcode_list[offset].c;
2035         jit_code_t code;
2036
2037         if (is_big_endian() && __WORDSIZE == 64)
2038                 code = jit_code_ldxi_ui;
2039         else
2040                 code = jit_code_ldxi_i;
2041
2042         _jit_name(block->_jit, c.i.op == OP_LWC2 ? "rec_LWC2" : "rec_LW");
2043         rec_load(state, block, offset, code, jit_code_bswapr_ui, false);
2044 }
2045
2046 static void rec_exit_early(struct lightrec_cstate *state,
2047                            const struct block *block, u16 offset,
2048                            u32 exit_code, u32 pc)
2049 {
2050         struct regcache *reg_cache = state->reg_cache;
2051         jit_state_t *_jit = block->_jit;
2052         u8 tmp;
2053
2054         _jit_note(block->_jit, __FILE__, __LINE__);
2055
2056         tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
2057
2058         jit_movi(tmp, exit_code);
2059         jit_stxi_i(lightrec_offset(exit_flags), LIGHTREC_REG_STATE, tmp);
2060
2061         jit_ldxi_i(tmp, LIGHTREC_REG_STATE, lightrec_offset(target_cycle));
2062         jit_subr(tmp, tmp, LIGHTREC_REG_CYCLE);
2063         jit_movi(LIGHTREC_REG_CYCLE, 0);
2064         jit_stxi_i(lightrec_offset(target_cycle), LIGHTREC_REG_STATE, tmp);
2065         jit_stxi_i(lightrec_offset(current_cycle), LIGHTREC_REG_STATE, tmp);
2066
2067         lightrec_free_reg(reg_cache, tmp);
2068
2069         lightrec_emit_end_of_block(state, block, offset, -1, pc, 31, 0, true);
2070 }
2071
2072 static void rec_special_SYSCALL(struct lightrec_cstate *state,
2073                                 const struct block *block, u16 offset)
2074 {
2075         _jit_name(block->_jit, __func__);
2076
2077         /* TODO: the return address should be "pc - 4" if we're a delay slot */
2078         rec_exit_early(state, block, offset, LIGHTREC_EXIT_SYSCALL,
2079                        get_ds_pc(block, offset, 0));
2080 }
2081
2082 static void rec_special_BREAK(struct lightrec_cstate *state,
2083                               const struct block *block, u16 offset)
2084 {
2085         _jit_name(block->_jit, __func__);
2086         rec_exit_early(state, block, offset, LIGHTREC_EXIT_BREAK,
2087                        get_ds_pc(block, offset, 0));
2088 }
2089
2090 static void rec_mfc(struct lightrec_cstate *state, const struct block *block, u16 offset)
2091 {
2092         struct regcache *reg_cache = state->reg_cache;
2093         union code c = block->opcode_list[offset].c;
2094         jit_state_t *_jit = block->_jit;
2095
2096         jit_note(__FILE__, __LINE__);
2097
2098         if (c.i.op != OP_SWC2)
2099                 lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, true);
2100
2101         call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_MFC);
2102 }
2103
2104 static void rec_mtc(struct lightrec_cstate *state, const struct block *block, u16 offset)
2105 {
2106         struct regcache *reg_cache = state->reg_cache;
2107         union code c = block->opcode_list[offset].c;
2108         jit_state_t *_jit = block->_jit;
2109
2110         jit_note(__FILE__, __LINE__);
2111         lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rs, false);
2112         lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, false);
2113         lightrec_clean_reg_if_loaded(reg_cache, _jit, REG_TEMP, false);
2114
2115         call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_MTC);
2116
2117         if (c.i.op == OP_CP0 &&
2118             !op_flag_no_ds(block->opcode_list[offset].flags) &&
2119             (c.r.rd == 12 || c.r.rd == 13))
2120                 lightrec_emit_end_of_block(state, block, offset, -1,
2121                                            get_ds_pc(block, offset, 1),
2122                                            0, 0, true);
2123 }
2124
2125 static void
2126 rec_mfc0(struct lightrec_cstate *state, const struct block *block, u16 offset)
2127 {
2128         struct regcache *reg_cache = state->reg_cache;
2129         union code c = block->opcode_list[offset].c;
2130         jit_state_t *_jit = block->_jit;
2131         u8 rt;
2132
2133         jit_note(__FILE__, __LINE__);
2134
2135         rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, REG_EXT);
2136
2137         jit_ldxi_i(rt, LIGHTREC_REG_STATE, lightrec_offset(regs.cp0[c.r.rd]));
2138
2139         lightrec_free_reg(reg_cache, rt);
2140 }
2141
2142 static bool block_uses_icache(const struct lightrec_cstate *state,
2143                               const struct block *block)
2144 {
2145         const struct lightrec_mem_map *map = &state->state->maps[PSX_MAP_KERNEL_USER_RAM];
2146         u32 pc = kunseg(block->pc);
2147
2148         if (pc < map->pc || pc >= map->pc + map->length)
2149                 return false;
2150
2151         return (block->pc >> 28) < 0xa;
2152 }
2153
2154 static void
2155 rec_mtc0(struct lightrec_cstate *state, const struct block *block, u16 offset)
2156 {
2157         struct regcache *reg_cache = state->reg_cache;
2158         const union code c = block->opcode_list[offset].c;
2159         jit_state_t *_jit = block->_jit;
2160         u8 rt, tmp = 0, tmp2, status;
2161         jit_node_t *to_end;
2162
2163         jit_note(__FILE__, __LINE__);
2164
2165         switch(c.r.rd) {
2166         case 1:
2167         case 4:
2168         case 8:
2169         case 14:
2170         case 15:
2171                 /* Those registers are read-only */
2172                 return;
2173         default:
2174                 break;
2175         }
2176
2177         if (!block_uses_icache(state, block) && c.r.rd == 12) {
2178                 /* If we are not running code from the RAM through kuseg or
2179                  * kseg0, handle writes to the Status register in C; as the
2180                  * code may toggle bit 16 which isolates the cache. Code
2181                  * running from kuseg or kseg0 in RAM cannot do that. */
2182                 rec_mtc(state, block, offset);
2183                 return;
2184         }
2185
2186         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
2187
2188         if (c.r.rd != 13)
2189                 jit_stxi_i(lightrec_offset(regs.cp0[c.r.rd]), LIGHTREC_REG_STATE, rt);
2190
2191         if (c.r.rd == 12 || c.r.rd == 13) {
2192                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
2193                 jit_ldxi_i(tmp, LIGHTREC_REG_STATE, lightrec_offset(regs.cp0[13]));
2194
2195                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
2196         }
2197
2198         if (c.r.rd == 12) {
2199                 status = rt;
2200         } else if (c.r.rd == 13) {
2201                 /* Cause = (Cause & ~0x0300) | (value & 0x0300) */
2202                 jit_andi(tmp2, rt, 0x0300);
2203                 jit_ori(tmp, tmp, 0x0300);
2204                 jit_xori(tmp, tmp, 0x0300);
2205                 jit_orr(tmp, tmp, tmp2);
2206                 jit_ldxi_i(tmp2, LIGHTREC_REG_STATE, lightrec_offset(regs.cp0[12]));
2207                 jit_stxi_i(lightrec_offset(regs.cp0[13]), LIGHTREC_REG_STATE, tmp);
2208                 status = tmp2;
2209         }
2210
2211         if (c.r.rd == 12 || c.r.rd == 13) {
2212                 /* Exit dynarec in case there's a software interrupt.
2213                  * exit_flags = !!(status & tmp & 0x0300) & status; */
2214                 jit_andr(tmp, tmp, status);
2215                 jit_andi(tmp, tmp, 0x0300);
2216                 jit_nei(tmp, tmp, 0);
2217                 jit_andr(tmp, tmp, status);
2218         }
2219
2220         if (c.r.rd == 12) {
2221                 /* Exit dynarec in case we unmask a hardware interrupt.
2222                  * exit_flags = !(~status & 0x401) */
2223
2224                 jit_comr(tmp2, status);
2225                 jit_andi(tmp2, tmp2, 0x401);
2226                 jit_eqi(tmp2, tmp2, 0);
2227                 jit_orr(tmp, tmp, tmp2);
2228         }
2229
2230         lightrec_free_reg(reg_cache, rt);
2231
2232         if (c.r.rd == 12 || c.r.rd == 13) {
2233                 to_end = jit_beqi(tmp, 0);
2234
2235                 jit_ldxi_i(tmp2, LIGHTREC_REG_STATE, lightrec_offset(target_cycle));
2236                 jit_subr(tmp2, tmp2, LIGHTREC_REG_CYCLE);
2237                 jit_movi(LIGHTREC_REG_CYCLE, 0);
2238                 jit_stxi_i(lightrec_offset(target_cycle), LIGHTREC_REG_STATE, tmp2);
2239                 jit_stxi_i(lightrec_offset(current_cycle), LIGHTREC_REG_STATE, tmp2);
2240
2241
2242                 jit_patch(to_end);
2243         }
2244
2245         if (!op_flag_no_ds(block->opcode_list[offset].flags) &&
2246             (c.r.rd == 12 || c.r.rd == 13)) {
2247                 state->cycles += lightrec_cycles_of_opcode(state->state, c);
2248                 lightrec_emit_eob(state, block, offset + 1);
2249         }
2250 }
2251
2252 static void rec_cp0_MFC0(struct lightrec_cstate *state,
2253                          const struct block *block, u16 offset)
2254 {
2255         _jit_name(block->_jit, __func__);
2256         rec_mfc0(state, block, offset);
2257 }
2258
2259 static void rec_cp0_CFC0(struct lightrec_cstate *state,
2260                          const struct block *block, u16 offset)
2261 {
2262         _jit_name(block->_jit, __func__);
2263         rec_mfc0(state, block, offset);
2264 }
2265
2266 static void rec_cp0_MTC0(struct lightrec_cstate *state,
2267                          const struct block *block, u16 offset)
2268 {
2269         _jit_name(block->_jit, __func__);
2270         rec_mtc0(state, block, offset);
2271 }
2272
2273 static void rec_cp0_CTC0(struct lightrec_cstate *state,
2274                          const struct block *block, u16 offset)
2275 {
2276         _jit_name(block->_jit, __func__);
2277         rec_mtc0(state, block, offset);
2278 }
2279
2280 static unsigned int cp2d_i_offset(u8 reg)
2281 {
2282         return lightrec_offset(regs.cp2d[reg]);
2283 }
2284
2285 static unsigned int cp2d_s_offset(u8 reg)
2286 {
2287         return cp2d_i_offset(reg) + is_big_endian() * 2;
2288 }
2289
2290 static unsigned int cp2c_i_offset(u8 reg)
2291 {
2292         return lightrec_offset(regs.cp2c[reg]);
2293 }
2294
2295 static unsigned int cp2c_s_offset(u8 reg)
2296 {
2297         return cp2c_i_offset(reg) + is_big_endian() * 2;
2298 }
2299
2300 static void rec_cp2_do_mfc2(struct lightrec_cstate *state,
2301                             const struct block *block, u16 offset,
2302                             u8 reg, u8 out_reg)
2303 {
2304         struct regcache *reg_cache = state->reg_cache;
2305         jit_state_t *_jit = block->_jit;
2306         const u32 zext_regs = 0x300f0080;
2307         u8 rt, tmp, tmp2, tmp3, out, flags;
2308         unsigned int i;
2309
2310         _jit_name(block->_jit, __func__);
2311
2312         if (state->state->ops.cop2_notify) {
2313                 /* We must call cop2_notify, handle that in C. */
2314                 rec_mfc(state, block, offset);
2315                 return;
2316         }
2317
2318         flags = (zext_regs & BIT(reg)) ? REG_ZEXT : REG_EXT;
2319         rt = lightrec_alloc_reg_out(reg_cache, _jit, out_reg, flags);
2320
2321         if (reg == 15)
2322                 reg = 14;
2323
2324         switch (reg) {
2325         case 1:
2326         case 3:
2327         case 5:
2328         case 8:
2329         case 9:
2330         case 10:
2331         case 11:
2332                 jit_ldxi_s(rt, LIGHTREC_REG_STATE, cp2d_s_offset(reg));
2333                 break;
2334         case 7:
2335         case 16:
2336         case 17:
2337         case 18:
2338         case 19:
2339                 jit_ldxi_us(rt, LIGHTREC_REG_STATE, cp2d_s_offset(reg));
2340                 break;
2341         case 28:
2342         case 29:
2343                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
2344                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
2345                 tmp3 = lightrec_alloc_reg_temp(reg_cache, _jit);
2346
2347                 for (i = 0; i < 3; i++) {
2348                         out = i == 0 ? rt : tmp;
2349
2350                         jit_ldxi_s(tmp, LIGHTREC_REG_STATE, cp2d_s_offset(9 + i));
2351                         jit_movi(tmp2, 0x1f);
2352                         jit_rshi(out, tmp, 7);
2353
2354                         jit_ltr(tmp3, tmp2, out);
2355                         jit_movnr(out, tmp2, tmp3);
2356
2357                         jit_gei(tmp2, out, 0);
2358                         jit_movzr(out, tmp2, tmp2);
2359
2360                         if (i > 0) {
2361                                 jit_lshi(tmp, tmp, 5 * i);
2362                                 jit_orr(rt, rt, tmp);
2363                         }
2364                 }
2365
2366
2367                 lightrec_free_reg(reg_cache, tmp);
2368                 lightrec_free_reg(reg_cache, tmp2);
2369                 lightrec_free_reg(reg_cache, tmp3);
2370                 break;
2371         default:
2372                 jit_ldxi_i(rt, LIGHTREC_REG_STATE, cp2d_i_offset(reg));
2373                 break;
2374         }
2375
2376         lightrec_free_reg(reg_cache, rt);
2377 }
2378
2379 static void rec_cp2_basic_MFC2(struct lightrec_cstate *state,
2380                                const struct block *block, u16 offset)
2381 {
2382         const union code c = block->opcode_list[offset].c;
2383
2384         rec_cp2_do_mfc2(state, block, offset, c.r.rd, c.r.rt);
2385 }
2386
2387 static void rec_cp2_basic_CFC2(struct lightrec_cstate *state,
2388                                const struct block *block, u16 offset)
2389 {
2390         struct regcache *reg_cache = state->reg_cache;
2391         const union code c = block->opcode_list[offset].c;
2392         jit_state_t *_jit = block->_jit;
2393         u8 rt;
2394
2395         _jit_name(block->_jit, __func__);
2396
2397         if (state->state->ops.cop2_notify) {
2398                 /* We must call cop2_notify, handle that in C. */
2399                 rec_mfc(state, block, offset);
2400                 return;
2401         }
2402
2403         switch (c.r.rd) {
2404         case 4:
2405         case 12:
2406         case 20:
2407         case 26:
2408         case 27:
2409         case 29:
2410         case 30:
2411                 rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_EXT);
2412                 jit_ldxi_s(rt, LIGHTREC_REG_STATE, cp2c_s_offset(c.r.rd));
2413                 break;
2414         default:
2415                 rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_ZEXT);
2416                 jit_ldxi_ui(rt, LIGHTREC_REG_STATE, cp2c_i_offset(c.r.rd));
2417                 break;
2418         }
2419
2420         lightrec_free_reg(reg_cache, rt);
2421 }
2422
2423 static void rec_cp2_do_mtc2(struct lightrec_cstate *state,
2424                             const struct block *block, u16 offset,
2425                             u8 reg, u8 in_reg)
2426 {
2427         struct regcache *reg_cache = state->reg_cache;
2428         jit_state_t *_jit = block->_jit;
2429         u8 rt, tmp, tmp2, flags = 0;
2430
2431         _jit_name(block->_jit, __func__);
2432
2433         if (state->state->ops.cop2_notify) {
2434                 /* We must call cop2_notify, handle that in C. */
2435                 rec_mtc(state, block, offset);
2436                 return;
2437         }
2438
2439         if (reg == 31)
2440                 return;
2441
2442         if (reg == 30)
2443                 flags |= REG_EXT;
2444
2445         rt = lightrec_alloc_reg_in(reg_cache, _jit, in_reg, flags);
2446
2447         switch (reg) {
2448         case 15:
2449                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
2450                 jit_ldxi_i(tmp, LIGHTREC_REG_STATE, cp2d_i_offset(13));
2451
2452                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
2453                 jit_ldxi_i(tmp2, LIGHTREC_REG_STATE, cp2d_i_offset(14));
2454
2455                 jit_stxi_i(cp2d_i_offset(12), LIGHTREC_REG_STATE, tmp);
2456                 jit_stxi_i(cp2d_i_offset(13), LIGHTREC_REG_STATE, tmp2);
2457                 jit_stxi_i(cp2d_i_offset(14), LIGHTREC_REG_STATE, rt);
2458
2459                 lightrec_free_reg(reg_cache, tmp);
2460                 lightrec_free_reg(reg_cache, tmp2);
2461                 break;
2462         case 28:
2463                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
2464
2465                 jit_lshi(tmp, rt, 7);
2466                 jit_andi(tmp, tmp, 0xf80);
2467                 jit_stxi_s(cp2d_s_offset(9), LIGHTREC_REG_STATE, tmp);
2468
2469                 jit_lshi(tmp, rt, 2);
2470                 jit_andi(tmp, tmp, 0xf80);
2471                 jit_stxi_s(cp2d_s_offset(10), LIGHTREC_REG_STATE, tmp);
2472
2473                 jit_rshi(tmp, rt, 3);
2474                 jit_andi(tmp, tmp, 0xf80);
2475                 jit_stxi_s(cp2d_s_offset(11), LIGHTREC_REG_STATE, tmp);
2476
2477                 lightrec_free_reg(reg_cache, tmp);
2478                 break;
2479         case 30:
2480                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
2481
2482                 /* if (rt < 0) rt = ~rt; */
2483                 jit_rshi(tmp, rt, 31);
2484                 jit_xorr(tmp, rt, tmp);
2485
2486                 /* Count leading zeros */
2487                 jit_clzr(tmp, tmp);
2488                 if (__WORDSIZE != 32)
2489                         jit_subi(tmp, tmp, __WORDSIZE - 32);
2490
2491                 jit_stxi_i(cp2d_i_offset(31), LIGHTREC_REG_STATE, tmp);
2492
2493                 lightrec_free_reg(reg_cache, tmp);
2494                 fallthrough;
2495         default:
2496                 jit_stxi_i(cp2d_i_offset(reg), LIGHTREC_REG_STATE, rt);
2497                 break;
2498         }
2499
2500         lightrec_free_reg(reg_cache, rt);
2501 }
2502
2503 static void rec_cp2_basic_MTC2(struct lightrec_cstate *state,
2504                                const struct block *block, u16 offset)
2505 {
2506         const union code c = block->opcode_list[offset].c;
2507
2508         rec_cp2_do_mtc2(state, block, offset, c.r.rd, c.r.rt);
2509 }
2510
2511 static void rec_cp2_basic_CTC2(struct lightrec_cstate *state,
2512                                const struct block *block, u16 offset)
2513 {
2514         struct regcache *reg_cache = state->reg_cache;
2515         const union code c = block->opcode_list[offset].c;
2516         jit_state_t *_jit = block->_jit;
2517         u8 rt, tmp, tmp2;
2518
2519         _jit_name(block->_jit, __func__);
2520
2521         if (state->state->ops.cop2_notify) {
2522                 /* We must call cop2_notify, handle that in C. */
2523                 rec_mtc(state, block, offset);
2524                 return;
2525         }
2526
2527         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0);
2528
2529         switch (c.r.rd) {
2530         case 4:
2531         case 12:
2532         case 20:
2533         case 26:
2534         case 27:
2535         case 29:
2536         case 30:
2537                 jit_stxi_s(cp2c_s_offset(c.r.rd), LIGHTREC_REG_STATE, rt);
2538                 break;
2539         case 31:
2540                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
2541                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
2542
2543                 jit_andi(tmp, rt, 0x7f87e000);
2544                 jit_nei(tmp, tmp, 0);
2545                 jit_lshi(tmp, tmp, 31);
2546
2547                 jit_andi(tmp2, rt, 0x7ffff000);
2548                 jit_orr(tmp, tmp2, tmp);
2549
2550                 jit_stxi_i(cp2c_i_offset(31), LIGHTREC_REG_STATE, tmp);
2551
2552                 lightrec_free_reg(reg_cache, tmp);
2553                 lightrec_free_reg(reg_cache, tmp2);
2554                 break;
2555
2556         default:
2557                 jit_stxi_i(cp2c_i_offset(c.r.rd), LIGHTREC_REG_STATE, rt);
2558         }
2559
2560         lightrec_free_reg(reg_cache, rt);
2561 }
2562
2563 static void rec_cp0_RFE(struct lightrec_cstate *state,
2564                         const struct block *block, u16 offset)
2565 {
2566         struct regcache *reg_cache = state->reg_cache;
2567         jit_state_t *_jit = block->_jit;
2568         u8 status, tmp;
2569
2570         jit_name(__func__);
2571         jit_note(__FILE__, __LINE__);
2572
2573         status = lightrec_alloc_reg_temp(reg_cache, _jit);
2574         jit_ldxi_i(status, LIGHTREC_REG_STATE, lightrec_offset(regs.cp0[12]));
2575
2576         tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
2577
2578         /* status = ((status >> 2) & 0xf) | status & ~0xf; */
2579         jit_rshi(tmp, status, 2);
2580         jit_andi(tmp, tmp, 0xf);
2581         jit_andi(status, status, ~0xful);
2582         jit_orr(status, status, tmp);
2583
2584         jit_ldxi_i(tmp, LIGHTREC_REG_STATE, lightrec_offset(regs.cp0[13]));
2585         jit_stxi_i(lightrec_offset(regs.cp0[12]), LIGHTREC_REG_STATE, status);
2586
2587         /* Exit dynarec in case there's a software interrupt.
2588          * exit_flags = !!(status & cause & 0x0300) & status; */
2589         jit_andr(tmp, tmp, status);
2590         jit_andi(tmp, tmp, 0x0300);
2591         jit_nei(tmp, tmp, 0);
2592         jit_andr(tmp, tmp, status);
2593         jit_stxi_i(lightrec_offset(exit_flags), LIGHTREC_REG_STATE, tmp);
2594
2595         lightrec_free_reg(reg_cache, status);
2596         lightrec_free_reg(reg_cache, tmp);
2597 }
2598
2599 static void rec_CP(struct lightrec_cstate *state,
2600                    const struct block *block, u16 offset)
2601 {
2602         union code c = block->opcode_list[offset].c;
2603         jit_state_t *_jit = block->_jit;
2604
2605         jit_name(__func__);
2606         jit_note(__FILE__, __LINE__);
2607
2608         call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_CP);
2609 }
2610
2611 static void rec_meta_MOV(struct lightrec_cstate *state,
2612                          const struct block *block, u16 offset)
2613 {
2614         struct regcache *reg_cache = state->reg_cache;
2615         const struct opcode *op = &block->opcode_list[offset];
2616         union code c = op->c;
2617         jit_state_t *_jit = block->_jit;
2618         bool unload_rd;
2619         bool unload_rs, discard_rs;
2620         u8 rs, rd;
2621
2622         _jit_name(block->_jit, __func__);
2623         jit_note(__FILE__, __LINE__);
2624
2625         unload_rs = OPT_EARLY_UNLOAD
2626                 && LIGHTREC_FLAGS_GET_RS(op->flags) == LIGHTREC_REG_UNLOAD;
2627         discard_rs = OPT_EARLY_UNLOAD
2628                 && LIGHTREC_FLAGS_GET_RS(op->flags) == LIGHTREC_REG_DISCARD;
2629
2630         if ((unload_rs || discard_rs) && c.m.rs) {
2631                 /* If the source register is going to be unloaded or discarded,
2632                  * then we can simply mark its host register as now pointing to
2633                  * the destination register. */
2634                 pr_debug("Remap %s to %s at offset 0x%x\n",
2635                          lightrec_reg_name(c.m.rs), lightrec_reg_name(c.m.rd),
2636                          offset << 2);
2637                 rs = lightrec_alloc_reg_in(reg_cache, _jit, c.m.rs, 0);
2638                 lightrec_remap_reg(reg_cache, _jit, rs, c.m.rd, discard_rs);
2639                 lightrec_free_reg(reg_cache, rs);
2640                 return;
2641         }
2642
2643         unload_rd = OPT_EARLY_UNLOAD
2644                 && LIGHTREC_FLAGS_GET_RD(op->flags) == LIGHTREC_REG_UNLOAD;
2645
2646         if (unload_rd) {
2647                 /* If the destination register will be unloaded right after the
2648                  * MOV meta-opcode, we don't actually need to write any host
2649                  * register - we can just store the source register directly to
2650                  * the register cache, at the offset corresponding to the
2651                  * destination register. */
2652                 lightrec_discard_reg_if_loaded(reg_cache, c.m.rd);
2653
2654                 rs = lightrec_alloc_reg_in(reg_cache, _jit, c.m.rs, 0);
2655
2656                 jit_stxi_i(lightrec_offset(regs.gpr) + (c.m.rd << 2), LIGHTREC_REG_STATE, rs);
2657
2658                 lightrec_free_reg(reg_cache, rs);
2659         } else {
2660                 if (c.m.rs)
2661                         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.m.rs, 0);
2662
2663                 rd = lightrec_alloc_reg_out(reg_cache, _jit, c.m.rd, REG_EXT);
2664
2665                 if (c.m.rs == 0) {
2666                         jit_movi(rd, 0);
2667                 } else {
2668                         jit_extr_i(rd, rs);
2669                         lightrec_free_reg(reg_cache, rs);
2670                 }
2671
2672                 lightrec_free_reg(reg_cache, rd);
2673         }
2674 }
2675
2676 static void rec_meta_EXTC_EXTS(struct lightrec_cstate *state,
2677                                const struct block *block,
2678                                u16 offset)
2679 {
2680         struct regcache *reg_cache = state->reg_cache;
2681         union code c = block->opcode_list[offset].c;
2682         jit_state_t *_jit = block->_jit;
2683         u8 rs, rd;
2684
2685         _jit_name(block->_jit, __func__);
2686         jit_note(__FILE__, __LINE__);
2687
2688         rec_alloc_rs_rd(reg_cache, _jit, &block->opcode_list[offset],
2689                         c.m.rs, c.m.rd, 0, REG_EXT, &rs, &rd);
2690
2691         if (c.m.op == OP_META_EXTC)
2692                 jit_extr_c(rd, rs);
2693         else
2694                 jit_extr_s(rd, rs);
2695
2696         lightrec_free_reg(reg_cache, rs);
2697         lightrec_free_reg(reg_cache, rd);
2698 }
2699
2700 static void rec_meta_MULT2(struct lightrec_cstate *state,
2701                            const struct block *block,
2702                            u16 offset)
2703 {
2704         struct regcache *reg_cache = state->reg_cache;
2705         union code c = block->opcode_list[offset].c;
2706         jit_state_t *_jit = block->_jit;
2707         u8 reg_lo = get_mult_div_lo(c);
2708         u8 reg_hi = get_mult_div_hi(c);
2709         u32 flags = block->opcode_list[offset].flags;
2710         bool is_signed = c.i.op == OP_META_MULT2;
2711         u8 rs, lo, hi, rflags = 0, hiflags = 0;
2712         unsigned int i;
2713
2714         if (!op_flag_no_hi(flags) && c.r.op < 32) {
2715                 rflags = is_signed ? REG_EXT : REG_ZEXT;
2716                 hiflags = is_signed ? REG_EXT : (REG_EXT | REG_ZEXT);
2717         }
2718
2719         _jit_name(block->_jit, __func__);
2720         jit_note(__FILE__, __LINE__);
2721
2722         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, rflags);
2723
2724         /*
2725          * We must handle the case where one of the output registers is our rs
2726          * input register. Thanksfully, computing LO/HI can be done in any
2727          * order. Here, we make sure that the computation that overwrites the
2728          * input register is always performed last.
2729          */
2730         for (i = 0; i < 2; i++) {
2731                 if ((!i ^ (reg_lo == c.i.rs)) && !op_flag_no_lo(flags)) {
2732                         lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0);
2733
2734                         if (c.r.op < 32)
2735                                 jit_lshi(lo, rs, c.r.op);
2736                         else
2737                                 jit_movi(lo, 0);
2738
2739                         lightrec_free_reg(reg_cache, lo);
2740                         continue;
2741                 }
2742
2743                 if ((!!i ^ (reg_lo == c.i.rs)) && !op_flag_no_hi(flags)) {
2744                         hi = lightrec_alloc_reg_out(reg_cache, _jit,
2745                                                     reg_hi, hiflags);
2746
2747                         if (c.r.op >= 32) {
2748                                 jit_lshi(hi, rs, c.r.op - 32);
2749                         } else if (is_signed) {
2750                                 if (c.r.op)
2751                                         jit_rshi(hi, rs, 32 - c.r.op);
2752                                 else
2753                                         jit_rshi(hi, rs, 31);
2754                         } else {
2755                                 if (c.r.op)
2756                                         jit_rshi_u(hi, rs, 32 - c.r.op);
2757                                 else
2758                                         jit_movi(hi, 0);
2759                         }
2760
2761                         lightrec_free_reg(reg_cache, hi);
2762                 }
2763         }
2764
2765         lightrec_free_reg(reg_cache, rs);
2766
2767         _jit_name(block->_jit, __func__);
2768         jit_note(__FILE__, __LINE__);
2769 }
2770
2771 static void rec_meta_COM(struct lightrec_cstate *state,
2772                          const struct block *block, u16 offset)
2773 {
2774         struct regcache *reg_cache = state->reg_cache;
2775         union code c = block->opcode_list[offset].c;
2776         jit_state_t *_jit = block->_jit;
2777         u8 rd, rs, flags;
2778
2779         jit_note(__FILE__, __LINE__);
2780
2781         rec_alloc_rs_rd(reg_cache, _jit, &block->opcode_list[offset],
2782                         c.m.rs, c.m.rd, 0, 0, &rs, &rd);
2783
2784         flags = lightrec_get_reg_in_flags(reg_cache, rs);
2785
2786         lightrec_set_reg_out_flags(reg_cache, rd,
2787                                    flags & REG_EXT);
2788
2789         jit_comr(rd, rs);
2790
2791         lightrec_free_reg(reg_cache, rs);
2792         lightrec_free_reg(reg_cache, rd);
2793 }
2794
2795 static void rec_meta_LWU(struct lightrec_cstate *state,
2796                          const struct block *block,
2797                          u16 offset)
2798 {
2799         jit_code_t code;
2800
2801         if (is_big_endian() && __WORDSIZE == 64)
2802                 code = jit_code_unldr_u;
2803         else
2804                 code = jit_code_unldr;
2805
2806         _jit_name(block->_jit, __func__);
2807         rec_load(state, block, offset, code, jit_code_bswapr_ui, false);
2808 }
2809
2810 static void rec_meta_SWU(struct lightrec_cstate *state,
2811                          const struct block *block,
2812                          u16 offset)
2813 {
2814         _jit_name(block->_jit, __func__);
2815         rec_store(state, block, offset, jit_code_unstr, jit_code_bswapr_ui);
2816 }
2817
2818 static void unknown_opcode(struct lightrec_cstate *state,
2819                            const struct block *block, u16 offset)
2820 {
2821         rec_exit_early(state, block, offset, LIGHTREC_EXIT_UNKNOWN_OP,
2822                        block->pc + (offset << 2));
2823 }
2824
2825 static const lightrec_rec_func_t rec_standard[64] = {
2826         SET_DEFAULT_ELM(rec_standard, unknown_opcode),
2827         [OP_SPECIAL]            = rec_SPECIAL,
2828         [OP_REGIMM]             = rec_REGIMM,
2829         [OP_J]                  = rec_J,
2830         [OP_JAL]                = rec_JAL,
2831         [OP_BEQ]                = rec_BEQ,
2832         [OP_BNE]                = rec_BNE,
2833         [OP_BLEZ]               = rec_BLEZ,
2834         [OP_BGTZ]               = rec_BGTZ,
2835         [OP_ADDI]               = rec_ADDI,
2836         [OP_ADDIU]              = rec_ADDIU,
2837         [OP_SLTI]               = rec_SLTI,
2838         [OP_SLTIU]              = rec_SLTIU,
2839         [OP_ANDI]               = rec_ANDI,
2840         [OP_ORI]                = rec_ORI,
2841         [OP_XORI]               = rec_XORI,
2842         [OP_LUI]                = rec_LUI,
2843         [OP_CP0]                = rec_CP0,
2844         [OP_CP2]                = rec_CP2,
2845         [OP_LB]                 = rec_LB,
2846         [OP_LH]                 = rec_LH,
2847         [OP_LWL]                = rec_LWL,
2848         [OP_LW]                 = rec_LW,
2849         [OP_LBU]                = rec_LBU,
2850         [OP_LHU]                = rec_LHU,
2851         [OP_LWR]                = rec_LWR,
2852         [OP_SB]                 = rec_SB,
2853         [OP_SH]                 = rec_SH,
2854         [OP_SWL]                = rec_SWL,
2855         [OP_SW]                 = rec_SW,
2856         [OP_SWR]                = rec_SWR,
2857         [OP_LWC2]               = rec_LW,
2858         [OP_SWC2]               = rec_SW,
2859
2860         [OP_META]               = rec_META,
2861         [OP_META_MULT2]         = rec_meta_MULT2,
2862         [OP_META_MULTU2]        = rec_meta_MULT2,
2863         [OP_META_LWU]           = rec_meta_LWU,
2864         [OP_META_SWU]           = rec_meta_SWU,
2865 };
2866
2867 static const lightrec_rec_func_t rec_special[64] = {
2868         SET_DEFAULT_ELM(rec_special, unknown_opcode),
2869         [OP_SPECIAL_SLL]        = rec_special_SLL,
2870         [OP_SPECIAL_SRL]        = rec_special_SRL,
2871         [OP_SPECIAL_SRA]        = rec_special_SRA,
2872         [OP_SPECIAL_SLLV]       = rec_special_SLLV,
2873         [OP_SPECIAL_SRLV]       = rec_special_SRLV,
2874         [OP_SPECIAL_SRAV]       = rec_special_SRAV,
2875         [OP_SPECIAL_JR]         = rec_special_JR,
2876         [OP_SPECIAL_JALR]       = rec_special_JALR,
2877         [OP_SPECIAL_SYSCALL]    = rec_special_SYSCALL,
2878         [OP_SPECIAL_BREAK]      = rec_special_BREAK,
2879         [OP_SPECIAL_MFHI]       = rec_special_MFHI,
2880         [OP_SPECIAL_MTHI]       = rec_special_MTHI,
2881         [OP_SPECIAL_MFLO]       = rec_special_MFLO,
2882         [OP_SPECIAL_MTLO]       = rec_special_MTLO,
2883         [OP_SPECIAL_MULT]       = rec_special_MULT,
2884         [OP_SPECIAL_MULTU]      = rec_special_MULTU,
2885         [OP_SPECIAL_DIV]        = rec_special_DIV,
2886         [OP_SPECIAL_DIVU]       = rec_special_DIVU,
2887         [OP_SPECIAL_ADD]        = rec_special_ADD,
2888         [OP_SPECIAL_ADDU]       = rec_special_ADDU,
2889         [OP_SPECIAL_SUB]        = rec_special_SUB,
2890         [OP_SPECIAL_SUBU]       = rec_special_SUBU,
2891         [OP_SPECIAL_AND]        = rec_special_AND,
2892         [OP_SPECIAL_OR]         = rec_special_OR,
2893         [OP_SPECIAL_XOR]        = rec_special_XOR,
2894         [OP_SPECIAL_NOR]        = rec_special_NOR,
2895         [OP_SPECIAL_SLT]        = rec_special_SLT,
2896         [OP_SPECIAL_SLTU]       = rec_special_SLTU,
2897 };
2898
2899 static const lightrec_rec_func_t rec_regimm[64] = {
2900         SET_DEFAULT_ELM(rec_regimm, unknown_opcode),
2901         [OP_REGIMM_BLTZ]        = rec_regimm_BLTZ,
2902         [OP_REGIMM_BGEZ]        = rec_regimm_BGEZ,
2903         [OP_REGIMM_BLTZAL]      = rec_regimm_BLTZAL,
2904         [OP_REGIMM_BGEZAL]      = rec_regimm_BGEZAL,
2905 };
2906
2907 static const lightrec_rec_func_t rec_cp0[64] = {
2908         SET_DEFAULT_ELM(rec_cp0, rec_CP),
2909         [OP_CP0_MFC0]           = rec_cp0_MFC0,
2910         [OP_CP0_CFC0]           = rec_cp0_CFC0,
2911         [OP_CP0_MTC0]           = rec_cp0_MTC0,
2912         [OP_CP0_CTC0]           = rec_cp0_CTC0,
2913         [OP_CP0_RFE]            = rec_cp0_RFE,
2914 };
2915
2916 static const lightrec_rec_func_t rec_cp2_basic[64] = {
2917         SET_DEFAULT_ELM(rec_cp2_basic, rec_CP),
2918         [OP_CP2_BASIC_MFC2]     = rec_cp2_basic_MFC2,
2919         [OP_CP2_BASIC_CFC2]     = rec_cp2_basic_CFC2,
2920         [OP_CP2_BASIC_MTC2]     = rec_cp2_basic_MTC2,
2921         [OP_CP2_BASIC_CTC2]     = rec_cp2_basic_CTC2,
2922 };
2923
2924 static const lightrec_rec_func_t rec_meta[64] = {
2925         SET_DEFAULT_ELM(rec_meta, unknown_opcode),
2926         [OP_META_MOV]           = rec_meta_MOV,
2927         [OP_META_EXTC]          = rec_meta_EXTC_EXTS,
2928         [OP_META_EXTS]          = rec_meta_EXTC_EXTS,
2929         [OP_META_COM]           = rec_meta_COM,
2930 };
2931
2932 static void rec_SPECIAL(struct lightrec_cstate *state,
2933                         const struct block *block, u16 offset)
2934 {
2935         union code c = block->opcode_list[offset].c;
2936         lightrec_rec_func_t f = rec_special[c.r.op];
2937
2938         if (!HAS_DEFAULT_ELM && unlikely(!f))
2939                 unknown_opcode(state, block, offset);
2940         else
2941                 (*f)(state, block, offset);
2942 }
2943
2944 static void rec_REGIMM(struct lightrec_cstate *state,
2945                        const struct block *block, u16 offset)
2946 {
2947         union code c = block->opcode_list[offset].c;
2948         lightrec_rec_func_t f = rec_regimm[c.r.rt];
2949
2950         if (!HAS_DEFAULT_ELM && unlikely(!f))
2951                 unknown_opcode(state, block, offset);
2952         else
2953                 (*f)(state, block, offset);
2954 }
2955
2956 static void rec_CP0(struct lightrec_cstate *state,
2957                     const struct block *block, u16 offset)
2958 {
2959         union code c = block->opcode_list[offset].c;
2960         lightrec_rec_func_t f = rec_cp0[c.r.rs];
2961
2962         if (!HAS_DEFAULT_ELM && unlikely(!f))
2963                 rec_CP(state, block, offset);
2964         else
2965                 (*f)(state, block, offset);
2966 }
2967
2968 static void rec_CP2(struct lightrec_cstate *state,
2969                     const struct block *block, u16 offset)
2970 {
2971         union code c = block->opcode_list[offset].c;
2972
2973         if (c.r.op == OP_CP2_BASIC) {
2974                 lightrec_rec_func_t f = rec_cp2_basic[c.r.rs];
2975
2976                 if (HAS_DEFAULT_ELM || likely(f)) {
2977                         (*f)(state, block, offset);
2978                         return;
2979                 }
2980         }
2981
2982         rec_CP(state, block, offset);
2983 }
2984
2985 static void rec_META(struct lightrec_cstate *state,
2986                      const struct block *block, u16 offset)
2987 {
2988         union code c = block->opcode_list[offset].c;
2989         lightrec_rec_func_t f = rec_meta[c.m.op];
2990
2991         if (!HAS_DEFAULT_ELM && unlikely(!f))
2992                 unknown_opcode(state, block, offset);
2993         else
2994                 (*f)(state, block, offset);
2995 }
2996
2997 void lightrec_rec_opcode(struct lightrec_cstate *state,
2998                          const struct block *block, u16 offset)
2999 {
3000         struct regcache *reg_cache = state->reg_cache;
3001         struct lightrec_branch_target *target;
3002         const struct opcode *op = &block->opcode_list[offset];
3003         jit_state_t *_jit = block->_jit;
3004         lightrec_rec_func_t f;
3005         u16 unload_offset;
3006
3007         if (op_flag_sync(op->flags)) {
3008                 if (state->cycles)
3009                         jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles);
3010                 state->cycles = 0;
3011
3012                 lightrec_storeback_regs(reg_cache, _jit);
3013                 lightrec_regcache_reset(reg_cache);
3014
3015                 pr_debug("Adding branch target at offset 0x%x\n", offset << 2);
3016                 target = &state->targets[state->nb_targets++];
3017                 target->offset = offset;
3018                 target->label = jit_indirect();
3019         }
3020
3021         if (likely(op->opcode)) {
3022                 f = rec_standard[op->i.op];
3023
3024                 if (!HAS_DEFAULT_ELM && unlikely(!f))
3025                         unknown_opcode(state, block, offset);
3026                 else
3027                         (*f)(state, block, offset);
3028         }
3029
3030         if (OPT_EARLY_UNLOAD) {
3031                 unload_offset = offset +
3032                         (has_delay_slot(op->c) && !op_flag_no_ds(op->flags));
3033
3034                 lightrec_do_early_unload(state, block, unload_offset);
3035         }
3036
3037         state->no_load_delay = false;
3038 }