fa74cc09b91809cfba97717669bd879f6399de4b
[pcsx_rearmed.git] / deps / lightrec / emitter.c
1 // SPDX-License-Identifier: LGPL-2.1-or-later
2 /*
3  * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
4  */
5
6 #include "blockcache.h"
7 #include "debug.h"
8 #include "disassembler.h"
9 #include "emitter.h"
10 #include "lightning-wrapper.h"
11 #include "optimizer.h"
12 #include "regcache.h"
13
14 #include <stdbool.h>
15 #include <stddef.h>
16
17 typedef void (*lightrec_rec_func_t)(struct lightrec_cstate *, const struct block *, u16);
18
19 /* Forward declarations */
20 static void rec_SPECIAL(struct lightrec_cstate *state, const struct block *block, u16 offset);
21 static void rec_REGIMM(struct lightrec_cstate *state, const struct block *block, u16 offset);
22 static void rec_CP0(struct lightrec_cstate *state, const struct block *block, u16 offset);
23 static void rec_CP2(struct lightrec_cstate *state, const struct block *block, u16 offset);
24
25 static void unknown_opcode(struct lightrec_cstate *state, const struct block *block, u16 offset)
26 {
27         pr_warn("Unknown opcode: 0x%08x at PC 0x%08x\n",
28                 block->opcode_list[offset].c.opcode,
29                 block->pc + (offset << 2));
30 }
31
32 static void lightrec_emit_end_of_block(struct lightrec_cstate *state,
33                                        const struct block *block, u16 offset,
34                                        s8 reg_new_pc, u32 imm, u8 ra_reg,
35                                        u32 link, bool update_cycles)
36 {
37         struct regcache *reg_cache = state->reg_cache;
38         u32 cycles = state->cycles;
39         jit_state_t *_jit = block->_jit;
40         const struct opcode *op = &block->opcode_list[offset],
41                             *next = &block->opcode_list[offset + 1];
42
43         jit_note(__FILE__, __LINE__);
44
45         if (link) {
46                 /* Update the $ra register */
47                 u8 link_reg = lightrec_alloc_reg_out(reg_cache, _jit, ra_reg, 0);
48                 jit_movi(link_reg, link);
49                 lightrec_free_reg(reg_cache, link_reg);
50         }
51
52         if (reg_new_pc < 0) {
53                 reg_new_pc = lightrec_alloc_reg(reg_cache, _jit, JIT_V0);
54                 lightrec_lock_reg(reg_cache, _jit, reg_new_pc);
55
56                 jit_movi(reg_new_pc, imm);
57         }
58
59         if (has_delay_slot(op->c) &&
60             !(op->flags & (LIGHTREC_NO_DS | LIGHTREC_LOCAL_BRANCH))) {
61                 cycles += lightrec_cycles_of_opcode(next->c);
62
63                 /* Recompile the delay slot */
64                 if (next->c.opcode)
65                         lightrec_rec_opcode(state, block, offset + 1);
66         }
67
68         /* Store back remaining registers */
69         lightrec_storeback_regs(reg_cache, _jit);
70
71         jit_movr(JIT_V0, reg_new_pc);
72
73         if (cycles && update_cycles) {
74                 jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles);
75                 pr_debug("EOB: %u cycles\n", cycles);
76         }
77
78         if (offset + !!(op->flags & LIGHTREC_NO_DS) < block->nb_ops - 1)
79                 state->branches[state->nb_branches++] = jit_b();
80 }
81
82 void lightrec_emit_eob(struct lightrec_cstate *state, const struct block *block,
83                        u16 offset, bool after_op)
84 {
85         struct regcache *reg_cache = state->reg_cache;
86         jit_state_t *_jit = block->_jit;
87         union code c = block->opcode_list[offset].c;
88         u32 cycles = state->cycles;
89
90         if (!after_op)
91                 cycles -= lightrec_cycles_of_opcode(c);
92
93         lightrec_storeback_regs(reg_cache, _jit);
94
95         jit_movi(JIT_V0, block->pc + (offset << 2));
96         jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles);
97
98         state->branches[state->nb_branches++] = jit_b();
99 }
100
101 static u8 get_jr_jalr_reg(struct lightrec_cstate *state, const struct block *block, u16 offset)
102 {
103         struct regcache *reg_cache = state->reg_cache;
104         jit_state_t *_jit = block->_jit;
105         const struct opcode *op = &block->opcode_list[offset];
106         u8 rs;
107
108         rs = lightrec_request_reg_in(reg_cache, _jit, op->r.rs, JIT_V0);
109         lightrec_lock_reg(reg_cache, _jit, rs);
110
111         return rs;
112 }
113
114 static void rec_special_JR(struct lightrec_cstate *state, const struct block *block, u16 offset)
115 {
116         u8 rs = get_jr_jalr_reg(state, block, offset);
117
118         _jit_name(block->_jit, __func__);
119         lightrec_emit_end_of_block(state, block, offset, rs, 0, 31, 0, true);
120 }
121
122 static void rec_special_JALR(struct lightrec_cstate *state, const struct block *block, u16 offset)
123 {
124         u8 rs = get_jr_jalr_reg(state, block, offset);
125         union code c = block->opcode_list[offset].c;
126
127         _jit_name(block->_jit, __func__);
128         lightrec_emit_end_of_block(state, block, offset, rs, 0, c.r.rd,
129                                    get_branch_pc(block, offset, 2), true);
130 }
131
132 static void rec_J(struct lightrec_cstate *state, const struct block *block, u16 offset)
133 {
134         union code c = block->opcode_list[offset].c;
135
136         _jit_name(block->_jit, __func__);
137         lightrec_emit_end_of_block(state, block, offset, -1,
138                                    (block->pc & 0xf0000000) | (c.j.imm << 2),
139                                    31, 0, true);
140 }
141
142 static void rec_JAL(struct lightrec_cstate *state, const struct block *block, u16 offset)
143 {
144         union code c = block->opcode_list[offset].c;
145
146         _jit_name(block->_jit, __func__);
147         lightrec_emit_end_of_block(state, block, offset, -1,
148                                    (block->pc & 0xf0000000) | (c.j.imm << 2),
149                                    31, get_branch_pc(block, offset, 2), true);
150 }
151
152 static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 offset,
153                   jit_code_t code, u32 link, bool unconditional, bool bz)
154 {
155         struct regcache *reg_cache = state->reg_cache;
156         struct native_register *regs_backup;
157         jit_state_t *_jit = block->_jit;
158         struct lightrec_branch *branch;
159         const struct opcode *op = &block->opcode_list[offset],
160                             *next = &block->opcode_list[offset + 1];
161         jit_node_t *addr;
162         u8 link_reg;
163         u32 target_offset, cycles = state->cycles;
164         bool is_forward = (s16)op->i.imm >= -1;
165         u32 next_pc;
166
167         jit_note(__FILE__, __LINE__);
168
169         if (!(op->flags & LIGHTREC_NO_DS))
170                 cycles += lightrec_cycles_of_opcode(next->c);
171
172         state->cycles = 0;
173
174         if (cycles)
175                 jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles);
176
177         if (!unconditional) {
178                 u8 rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs, REG_EXT),
179                    rt = bz ? 0 : lightrec_alloc_reg_in(reg_cache,
180                                                        _jit, op->i.rt, REG_EXT);
181
182                 /* Generate the branch opcode */
183                 addr = jit_new_node_pww(code, NULL, rs, rt);
184
185                 lightrec_free_regs(reg_cache);
186                 regs_backup = lightrec_regcache_enter_branch(reg_cache);
187         }
188
189         if (op->flags & LIGHTREC_LOCAL_BRANCH) {
190                 if (next && !(op->flags & LIGHTREC_NO_DS)) {
191                         /* Recompile the delay slot */
192                         if (next->opcode)
193                                 lightrec_rec_opcode(state, block, offset + 1);
194                 }
195
196                 if (link) {
197                         /* Update the $ra register */
198                         link_reg = lightrec_alloc_reg_out(reg_cache, _jit, 31, 0);
199                         jit_movi(link_reg, link);
200                         lightrec_free_reg(reg_cache, link_reg);
201                 }
202
203                 /* Store back remaining registers */
204                 lightrec_storeback_regs(reg_cache, _jit);
205
206                 target_offset = offset + 1 + (s16)op->i.imm
207                         - !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS));
208                 pr_debug("Adding local branch to offset 0x%x\n",
209                          target_offset << 2);
210                 branch = &state->local_branches[
211                         state->nb_local_branches++];
212
213                 branch->target = target_offset;
214                 if (is_forward)
215                         branch->branch = jit_b();
216                 else
217                         branch->branch = jit_bgti(LIGHTREC_REG_CYCLE, 0);
218         }
219
220         if (!(op->flags & LIGHTREC_LOCAL_BRANCH) || !is_forward) {
221                 next_pc = get_branch_pc(block, offset, 1 + (s16)op->i.imm);
222                 lightrec_emit_end_of_block(state, block, offset, -1, next_pc,
223                                            31, link, false);
224         }
225
226         if (!unconditional) {
227                 jit_patch(addr);
228                 lightrec_regcache_leave_branch(reg_cache, regs_backup);
229
230                 if (bz && link) {
231                         /* Update the $ra register */
232                         link_reg = lightrec_alloc_reg_out(reg_cache, _jit,
233                                                           31, REG_EXT);
234                         jit_movi(link_reg, (s32)link);
235                         lightrec_free_reg(reg_cache, link_reg);
236                 }
237
238                 if (!(op->flags & LIGHTREC_NO_DS) && next->opcode)
239                         lightrec_rec_opcode(state, block, offset + 1);
240         }
241 }
242
243 static void rec_BNE(struct lightrec_cstate *state,
244                     const struct block *block, u16 offset)
245 {
246         union code c = block->opcode_list[offset].c;
247
248         _jit_name(block->_jit, __func__);
249
250         if (c.i.rt == 0)
251                 rec_b(state, block, offset, jit_code_beqi, 0, false, true);
252         else
253                 rec_b(state, block, offset, jit_code_beqr, 0, false, false);
254 }
255
256 static void rec_BEQ(struct lightrec_cstate *state,
257                     const struct block *block, u16 offset)
258 {
259         union code c = block->opcode_list[offset].c;
260
261         _jit_name(block->_jit, __func__);
262
263         if (c.i.rt == 0)
264                 rec_b(state, block, offset, jit_code_bnei, 0, c.i.rs == 0, true);
265         else
266                 rec_b(state, block, offset, jit_code_bner, 0, c.i.rs == c.i.rt, false);
267 }
268
269 static void rec_BLEZ(struct lightrec_cstate *state,
270                      const struct block *block, u16 offset)
271 {
272         union code c = block->opcode_list[offset].c;
273
274         _jit_name(block->_jit, __func__);
275         rec_b(state, block, offset, jit_code_bgti, 0, c.i.rs == 0, true);
276 }
277
278 static void rec_BGTZ(struct lightrec_cstate *state,
279                      const struct block *block, u16 offset)
280 {
281         _jit_name(block->_jit, __func__);
282         rec_b(state, block, offset, jit_code_blei, 0, false, true);
283 }
284
285 static void rec_regimm_BLTZ(struct lightrec_cstate *state,
286                             const struct block *block, u16 offset)
287 {
288         _jit_name(block->_jit, __func__);
289         rec_b(state, block, offset, jit_code_bgei, 0, false, true);
290 }
291
292 static void rec_regimm_BLTZAL(struct lightrec_cstate *state,
293                               const struct block *block, u16 offset)
294 {
295         _jit_name(block->_jit, __func__);
296         rec_b(state, block, offset, jit_code_bgei,
297               get_branch_pc(block, offset, 2), false, true);
298 }
299
300 static void rec_regimm_BGEZ(struct lightrec_cstate *state,
301                             const struct block *block, u16 offset)
302 {
303         union code c = block->opcode_list[offset].c;
304
305         _jit_name(block->_jit, __func__);
306         rec_b(state, block, offset, jit_code_blti, 0, !c.i.rs, true);
307 }
308
309 static void rec_regimm_BGEZAL(struct lightrec_cstate *state,
310                               const struct block *block, u16 offset)
311 {
312         const struct opcode *op = &block->opcode_list[offset];
313         _jit_name(block->_jit, __func__);
314         rec_b(state, block, offset, jit_code_blti,
315               get_branch_pc(block, offset, 2),
316               !op->i.rs, true);
317 }
318
319 static void rec_alu_imm(struct lightrec_cstate *state, const struct block *block,
320                         u16 offset, jit_code_t code, bool slti)
321 {
322         struct regcache *reg_cache = state->reg_cache;
323         union code c = block->opcode_list[offset].c;
324         jit_state_t *_jit = block->_jit;
325         u8 rs, rt, out_flags = REG_EXT;
326
327         if (slti)
328                 out_flags |= REG_ZEXT;
329
330         jit_note(__FILE__, __LINE__);
331         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, REG_EXT);
332         rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, out_flags);
333
334         jit_new_node_www(code, rt, rs, (s32)(s16) c.i.imm);
335
336         lightrec_free_reg(reg_cache, rs);
337         lightrec_free_reg(reg_cache, rt);
338 }
339
340 static void rec_alu_special(struct lightrec_cstate *state, const struct block *block,
341                             u16 offset, jit_code_t code, bool out_ext)
342 {
343         struct regcache *reg_cache = state->reg_cache;
344         union code c = block->opcode_list[offset].c;
345         jit_state_t *_jit = block->_jit;
346         u8 rd, rt, rs;
347
348         jit_note(__FILE__, __LINE__);
349         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, REG_EXT);
350         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, REG_EXT);
351         rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd,
352                                     out_ext ? REG_EXT | REG_ZEXT : 0);
353
354         jit_new_node_www(code, rd, rs, rt);
355
356         lightrec_free_reg(reg_cache, rs);
357         lightrec_free_reg(reg_cache, rt);
358         lightrec_free_reg(reg_cache, rd);
359 }
360
361 static void rec_alu_shiftv(struct lightrec_cstate *state, const struct block *block,
362                            u16 offset, jit_code_t code)
363 {
364         struct regcache *reg_cache = state->reg_cache;
365         union code c = block->opcode_list[offset].c;
366         jit_state_t *_jit = block->_jit;
367         u8 rd, rt, rs, temp, flags = 0;
368
369         jit_note(__FILE__, __LINE__);
370         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
371
372         if (code == jit_code_rshr)
373                 flags = REG_EXT;
374         else if (code == jit_code_rshr_u)
375                 flags = REG_ZEXT;
376
377         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, flags);
378         rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, flags);
379
380         if (rs != rd && rt != rd) {
381                 jit_andi(rd, rs, 0x1f);
382                 jit_new_node_www(code, rd, rt, rd);
383         } else {
384                 temp = lightrec_alloc_reg_temp(reg_cache, _jit);
385                 jit_andi(temp, rs, 0x1f);
386                 jit_new_node_www(code, rd, rt, temp);
387                 lightrec_free_reg(reg_cache, temp);
388         }
389
390         lightrec_free_reg(reg_cache, rs);
391         lightrec_free_reg(reg_cache, rt);
392         lightrec_free_reg(reg_cache, rd);
393 }
394
395 static void rec_ADDIU(struct lightrec_cstate *state,
396                       const struct block *block, u16 offset)
397 {
398         _jit_name(block->_jit, __func__);
399         rec_alu_imm(state, block, offset, jit_code_addi, false);
400 }
401
402 static void rec_ADDI(struct lightrec_cstate *state,
403                      const struct block *block, u16 offset)
404 {
405         /* TODO: Handle the exception? */
406         _jit_name(block->_jit, __func__);
407         rec_alu_imm(state, block, offset, jit_code_addi, false);
408 }
409
410 static void rec_SLTIU(struct lightrec_cstate *state,
411                       const struct block *block, u16 offset)
412 {
413         _jit_name(block->_jit, __func__);
414         rec_alu_imm(state, block, offset, jit_code_lti_u, true);
415 }
416
417 static void rec_SLTI(struct lightrec_cstate *state,
418                      const struct block *block, u16 offset)
419 {
420         _jit_name(block->_jit, __func__);
421         rec_alu_imm(state, block, offset, jit_code_lti, true);
422 }
423
424 static void rec_ANDI(struct lightrec_cstate *state,
425                      const struct block *block, u16 offset)
426 {
427         struct regcache *reg_cache = state->reg_cache;
428         union code c = block->opcode_list[offset].c;
429         jit_state_t *_jit = block->_jit;
430         u8 rs, rt;
431
432         _jit_name(block->_jit, __func__);
433         jit_note(__FILE__, __LINE__);
434         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
435         rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt,
436                                     REG_EXT | REG_ZEXT);
437
438         /* PSX code uses ANDI 0xff / ANDI 0xffff a lot, which are basically
439          * casts to uint8_t / uint16_t. */
440         if (c.i.imm == 0xff)
441                 jit_extr_uc(rt, rs);
442         else if (c.i.imm == 0xffff)
443                 jit_extr_us(rt, rs);
444         else
445                 jit_andi(rt, rs, (u32)(u16) c.i.imm);
446
447         lightrec_free_reg(reg_cache, rs);
448         lightrec_free_reg(reg_cache, rt);
449 }
450
451 static void rec_alu_or_xor(struct lightrec_cstate *state, const struct block *block,
452                            u16 offset, jit_code_t code)
453 {
454         struct regcache *reg_cache = state->reg_cache;
455         union code c = block->opcode_list[offset].c;
456         jit_state_t *_jit = block->_jit;
457         u8 rs, rt, flags;
458
459         jit_note(__FILE__, __LINE__);
460         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
461         rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, 0);
462
463         flags = lightrec_get_reg_in_flags(reg_cache, rs);
464         lightrec_set_reg_out_flags(reg_cache, rt, flags);
465
466         jit_new_node_www(code, rt, rs, (u32)(u16) c.i.imm);
467
468         lightrec_free_reg(reg_cache, rs);
469         lightrec_free_reg(reg_cache, rt);
470 }
471
472
473 static void rec_ORI(struct lightrec_cstate *state,
474                     const struct block *block, u16 offset)
475 {
476         _jit_name(block->_jit, __func__);
477         rec_alu_or_xor(state, block, offset, jit_code_ori);
478 }
479
480 static void rec_XORI(struct lightrec_cstate *state,
481                      const struct block *block, u16 offset)
482 {
483         _jit_name(block->_jit, __func__);
484         rec_alu_or_xor(state, block, offset, jit_code_xori);
485 }
486
487 static void rec_LUI(struct lightrec_cstate *state,
488                     const struct block *block, u16 offset)
489 {
490         struct regcache *reg_cache = state->reg_cache;
491         union code c = block->opcode_list[offset].c;
492         jit_state_t *_jit = block->_jit;
493         u8 rt, flags = REG_EXT;
494
495         jit_name(__func__);
496         jit_note(__FILE__, __LINE__);
497
498         if (!(c.i.imm & BIT(15)))
499                 flags |= REG_ZEXT;
500
501         rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags);
502
503         jit_movi(rt, (s32)(c.i.imm << 16));
504
505         lightrec_free_reg(reg_cache, rt);
506 }
507
508 static void rec_special_ADDU(struct lightrec_cstate *state,
509                              const struct block *block, u16 offset)
510 {
511         _jit_name(block->_jit, __func__);
512         rec_alu_special(state, block, offset, jit_code_addr, false);
513 }
514
515 static void rec_special_ADD(struct lightrec_cstate *state,
516                             const struct block *block, u16 offset)
517 {
518         /* TODO: Handle the exception? */
519         _jit_name(block->_jit, __func__);
520         rec_alu_special(state, block, offset, jit_code_addr, false);
521 }
522
523 static void rec_special_SUBU(struct lightrec_cstate *state,
524                              const struct block *block, u16 offset)
525 {
526         _jit_name(block->_jit, __func__);
527         rec_alu_special(state, block, offset, jit_code_subr, false);
528 }
529
530 static void rec_special_SUB(struct lightrec_cstate *state,
531                             const struct block *block, u16 offset)
532 {
533         /* TODO: Handle the exception? */
534         _jit_name(block->_jit, __func__);
535         rec_alu_special(state, block, offset, jit_code_subr, false);
536 }
537
538 static void rec_special_AND(struct lightrec_cstate *state,
539                             const struct block *block, u16 offset)
540 {
541         struct regcache *reg_cache = state->reg_cache;
542         union code c = block->opcode_list[offset].c;
543         jit_state_t *_jit = block->_jit;
544         u8 rd, rt, rs, flags_rs, flags_rt, flags_rd;
545
546         _jit_name(block->_jit, __func__);
547         jit_note(__FILE__, __LINE__);
548         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
549         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0);
550         rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, 0);
551
552         flags_rs = lightrec_get_reg_in_flags(reg_cache, rs);
553         flags_rt = lightrec_get_reg_in_flags(reg_cache, rt);
554
555         /* Z(rd) = Z(rs) | Z(rt) */
556         flags_rd = REG_ZEXT & (flags_rs | flags_rt);
557
558         /* E(rd) = (E(rt) & Z(rt)) | (E(rs) & Z(rs)) | (E(rs) & E(rt)) */
559         if (((flags_rs & REG_EXT) && (flags_rt & REG_ZEXT)) ||
560             ((flags_rt & REG_EXT) && (flags_rs & REG_ZEXT)) ||
561             (REG_EXT & flags_rs & flags_rt))
562                 flags_rd |= REG_EXT;
563
564         lightrec_set_reg_out_flags(reg_cache, rd, flags_rd);
565
566         jit_andr(rd, rs, rt);
567
568         lightrec_free_reg(reg_cache, rs);
569         lightrec_free_reg(reg_cache, rt);
570         lightrec_free_reg(reg_cache, rd);
571 }
572
573 static void rec_special_or_nor(struct lightrec_cstate *state,
574                                const struct block *block, u16 offset, bool nor)
575 {
576         struct regcache *reg_cache = state->reg_cache;
577         union code c = block->opcode_list[offset].c;
578         jit_state_t *_jit = block->_jit;
579         u8 rd, rt, rs, flags_rs, flags_rt, flags_rd = 0;
580
581         jit_note(__FILE__, __LINE__);
582         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
583         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0);
584         rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, 0);
585
586         flags_rs = lightrec_get_reg_in_flags(reg_cache, rs);
587         flags_rt = lightrec_get_reg_in_flags(reg_cache, rt);
588
589         /* or: Z(rd) = Z(rs) & Z(rt)
590          * nor: Z(rd) = 0 */
591         if (!nor)
592                 flags_rd = REG_ZEXT & flags_rs & flags_rt;
593
594         /* E(rd) = (E(rs) & E(rt)) | (E(rt) & !Z(rt)) | (E(rs) & !Z(rs)) */
595         if ((REG_EXT & flags_rs & flags_rt) ||
596             (flags_rt & (REG_EXT | REG_ZEXT) == REG_EXT) ||
597             (flags_rs & (REG_EXT | REG_ZEXT) == REG_EXT))
598                 flags_rd |= REG_EXT;
599
600         lightrec_set_reg_out_flags(reg_cache, rd, flags_rd);
601
602         jit_orr(rd, rs, rt);
603
604         if (nor)
605                 jit_comr(rd, rd);
606
607         lightrec_free_reg(reg_cache, rs);
608         lightrec_free_reg(reg_cache, rt);
609         lightrec_free_reg(reg_cache, rd);
610 }
611
612 static void rec_special_OR(struct lightrec_cstate *state,
613                            const struct block *block, u16 offset)
614 {
615         _jit_name(block->_jit, __func__);
616         rec_special_or_nor(state, block, offset, false);
617 }
618
619 static void rec_special_NOR(struct lightrec_cstate *state,
620                             const struct block *block, u16 offset)
621 {
622         _jit_name(block->_jit, __func__);
623         rec_special_or_nor(state, block, offset, true);
624 }
625
626 static void rec_special_XOR(struct lightrec_cstate *state,
627                             const struct block *block, u16 offset)
628 {
629         struct regcache *reg_cache = state->reg_cache;
630         union code c = block->opcode_list[offset].c;
631         jit_state_t *_jit = block->_jit;
632         u8 rd, rt, rs, flags_rs, flags_rt, flags_rd;
633
634         _jit_name(block->_jit, __func__);
635
636         jit_note(__FILE__, __LINE__);
637         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
638         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0);
639         rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, 0);
640
641         flags_rs = lightrec_get_reg_in_flags(reg_cache, rs);
642         flags_rt = lightrec_get_reg_in_flags(reg_cache, rt);
643
644         /* Z(rd) = Z(rs) & Z(rt) */
645         flags_rd = REG_ZEXT & flags_rs & flags_rt;
646
647         /* E(rd) = E(rs) & E(rt) */
648         flags_rd |= REG_EXT & flags_rs & flags_rt;
649
650         lightrec_set_reg_out_flags(reg_cache, rd, flags_rd);
651
652         jit_xorr(rd, rs, rt);
653
654         lightrec_free_reg(reg_cache, rs);
655         lightrec_free_reg(reg_cache, rt);
656         lightrec_free_reg(reg_cache, rd);
657 }
658
659 static void rec_special_SLTU(struct lightrec_cstate *state,
660                              const struct block *block, u16 offset)
661 {
662         _jit_name(block->_jit, __func__);
663         rec_alu_special(state, block, offset, jit_code_ltr_u, true);
664 }
665
666 static void rec_special_SLT(struct lightrec_cstate *state,
667                             const struct block *block, u16 offset)
668 {
669         _jit_name(block->_jit, __func__);
670         rec_alu_special(state, block, offset, jit_code_ltr, true);
671 }
672
673 static void rec_special_SLLV(struct lightrec_cstate *state,
674                              const struct block *block, u16 offset)
675 {
676         _jit_name(block->_jit, __func__);
677         rec_alu_shiftv(state, block, offset, jit_code_lshr);
678 }
679
680 static void rec_special_SRLV(struct lightrec_cstate *state,
681                              const struct block *block, u16 offset)
682 {
683         _jit_name(block->_jit, __func__);
684         rec_alu_shiftv(state, block, offset, jit_code_rshr_u);
685 }
686
687 static void rec_special_SRAV(struct lightrec_cstate *state,
688                              const struct block *block, u16 offset)
689 {
690         _jit_name(block->_jit, __func__);
691         rec_alu_shiftv(state, block, offset, jit_code_rshr);
692 }
693
694 static void rec_alu_shift(struct lightrec_cstate *state, const struct block *block,
695                           u16 offset, jit_code_t code)
696 {
697         struct regcache *reg_cache = state->reg_cache;
698         union code c = block->opcode_list[offset].c;
699         jit_state_t *_jit = block->_jit;
700         u8 rd, rt, flags = 0;
701
702         jit_note(__FILE__, __LINE__);
703
704         if (code == jit_code_rshi)
705                 flags = REG_EXT;
706         else if (code == jit_code_rshi_u)
707                 flags = REG_ZEXT;
708
709         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, flags);
710
711         /* Input reg is zero-extended, if we SRL at least by one bit, we know
712          * the output reg will be both zero-extended and sign-extended. */
713         if (code == jit_code_rshi_u && c.r.imm)
714                 flags |= REG_EXT;
715         rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, flags);
716
717         jit_new_node_www(code, rd, rt, c.r.imm);
718
719         lightrec_free_reg(reg_cache, rt);
720         lightrec_free_reg(reg_cache, rd);
721 }
722
723 static void rec_special_SLL(struct lightrec_cstate *state,
724                             const struct block *block, u16 offset)
725 {
726         _jit_name(block->_jit, __func__);
727         rec_alu_shift(state, block, offset, jit_code_lshi);
728 }
729
730 static void rec_special_SRL(struct lightrec_cstate *state,
731                             const struct block *block, u16 offset)
732 {
733         _jit_name(block->_jit, __func__);
734         rec_alu_shift(state, block, offset, jit_code_rshi_u);
735 }
736
737 static void rec_special_SRA(struct lightrec_cstate *state,
738                             const struct block *block, u16 offset)
739 {
740         _jit_name(block->_jit, __func__);
741         rec_alu_shift(state, block, offset, jit_code_rshi);
742 }
743
744 static void rec_alu_mult(struct lightrec_cstate *state,
745                          const struct block *block, u16 offset, bool is_signed)
746 {
747         struct regcache *reg_cache = state->reg_cache;
748         union code c = block->opcode_list[offset].c;
749         u16 flags = block->opcode_list[offset].flags;
750         u8 reg_lo = get_mult_div_lo(c);
751         u8 reg_hi = get_mult_div_hi(c);
752         jit_state_t *_jit = block->_jit;
753         u8 lo, hi, rs, rt, rflags = 0;
754
755         jit_note(__FILE__, __LINE__);
756
757         if (is_signed)
758                 rflags = REG_EXT;
759         else
760                 rflags = REG_ZEXT;
761
762         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, rflags);
763         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, rflags);
764
765         if (!(flags & LIGHTREC_NO_LO))
766                 lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0);
767         else if (__WORDSIZE == 32)
768                 lo = lightrec_alloc_reg_temp(reg_cache, _jit);
769
770         if (!(flags & LIGHTREC_NO_HI))
771                 hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, REG_EXT);
772
773         if (__WORDSIZE == 32) {
774                 /* On 32-bit systems, do a 32*32->64 bit operation, or a 32*32->32 bit
775                  * operation if the MULT was detected a 32-bit only. */
776                 if (!(flags & LIGHTREC_NO_HI)) {
777                         if (is_signed)
778                                 jit_qmulr(lo, hi, rs, rt);
779                         else
780                                 jit_qmulr_u(lo, hi, rs, rt);
781                 } else {
782                         jit_mulr(lo, rs, rt);
783                 }
784         } else {
785                 /* On 64-bit systems, do a 64*64->64 bit operation. */
786                 if (flags & LIGHTREC_NO_LO) {
787                         jit_mulr(hi, rs, rt);
788                         jit_rshi(hi, hi, 32);
789                 } else {
790                         jit_mulr(lo, rs, rt);
791
792                         /* The 64-bit output value is in $lo, store the upper 32 bits in $hi */
793                         if (!(flags & LIGHTREC_NO_HI))
794                                 jit_rshi(hi, lo, 32);
795                 }
796         }
797
798         lightrec_free_reg(reg_cache, rs);
799         lightrec_free_reg(reg_cache, rt);
800         if (!(flags & LIGHTREC_NO_LO) || __WORDSIZE == 32)
801                 lightrec_free_reg(reg_cache, lo);
802         if (!(flags & LIGHTREC_NO_HI))
803                 lightrec_free_reg(reg_cache, hi);
804 }
805
806 static void rec_alu_div(struct lightrec_cstate *state,
807                         const struct block *block, u16 offset, bool is_signed)
808 {
809         struct regcache *reg_cache = state->reg_cache;
810         union code c = block->opcode_list[offset].c;
811         u16 flags = block->opcode_list[offset].flags;
812         bool no_check = flags & LIGHTREC_NO_DIV_CHECK;
813         u8 reg_lo = get_mult_div_lo(c);
814         u8 reg_hi = get_mult_div_hi(c);
815         jit_state_t *_jit = block->_jit;
816         jit_node_t *branch, *to_end;
817         u8 lo = 0, hi = 0, rs, rt, rflags = 0;
818
819         jit_note(__FILE__, __LINE__);
820
821         if (is_signed)
822                 rflags = REG_EXT;
823         else
824                 rflags = REG_ZEXT;
825
826         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, rflags);
827         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, rflags);
828
829         if (!(flags & LIGHTREC_NO_LO))
830                 lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0);
831
832         if (!(flags & LIGHTREC_NO_HI))
833                 hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, 0);
834
835         /* Jump to special handler if dividing by zero  */
836         if (!no_check)
837                 branch = jit_beqi(rt, 0);
838
839         if (flags & LIGHTREC_NO_LO) {
840                 if (is_signed)
841                         jit_remr(hi, rs, rt);
842                 else
843                         jit_remr_u(hi, rs, rt);
844         } else if (flags & LIGHTREC_NO_HI) {
845                 if (is_signed)
846                         jit_divr(lo, rs, rt);
847                 else
848                         jit_divr_u(lo, rs, rt);
849         } else {
850                 if (is_signed)
851                         jit_qdivr(lo, hi, rs, rt);
852                 else
853                         jit_qdivr_u(lo, hi, rs, rt);
854         }
855
856         if (!no_check) {
857                 /* Jump above the div-by-zero handler */
858                 to_end = jit_b();
859
860                 jit_patch(branch);
861
862                 if (!(flags & LIGHTREC_NO_LO)) {
863                         if (is_signed) {
864                                 jit_lti(lo, rs, 0);
865                                 jit_lshi(lo, lo, 1);
866                                 jit_subi(lo, lo, 1);
867                         } else {
868                                 jit_movi(lo, 0xffffffff);
869                         }
870                 }
871
872                 if (!(flags & LIGHTREC_NO_HI))
873                         jit_movr(hi, rs);
874
875                 jit_patch(to_end);
876         }
877
878         lightrec_free_reg(reg_cache, rs);
879         lightrec_free_reg(reg_cache, rt);
880
881         if (!(flags & LIGHTREC_NO_LO))
882                 lightrec_free_reg(reg_cache, lo);
883
884         if (!(flags & LIGHTREC_NO_HI))
885                 lightrec_free_reg(reg_cache, hi);
886 }
887
888 static void rec_special_MULT(struct lightrec_cstate *state,
889                              const struct block *block, u16 offset)
890 {
891         _jit_name(block->_jit, __func__);
892         rec_alu_mult(state, block, offset, true);
893 }
894
895 static void rec_special_MULTU(struct lightrec_cstate *state,
896                               const struct block *block, u16 offset)
897 {
898         _jit_name(block->_jit, __func__);
899         rec_alu_mult(state, block, offset, false);
900 }
901
902 static void rec_special_DIV(struct lightrec_cstate *state,
903                             const struct block *block, u16 offset)
904 {
905         _jit_name(block->_jit, __func__);
906         rec_alu_div(state, block, offset, true);
907 }
908
909 static void rec_special_DIVU(struct lightrec_cstate *state,
910                              const struct block *block, u16 offset)
911 {
912         _jit_name(block->_jit, __func__);
913         rec_alu_div(state, block, offset, false);
914 }
915
916 static void rec_alu_mv_lo_hi(struct lightrec_cstate *state,
917                              const struct block *block, u8 dst, u8 src)
918 {
919         struct regcache *reg_cache = state->reg_cache;
920         jit_state_t *_jit = block->_jit;
921
922         jit_note(__FILE__, __LINE__);
923         src = lightrec_alloc_reg_in(reg_cache, _jit, src, 0);
924         dst = lightrec_alloc_reg_out(reg_cache, _jit, dst, REG_EXT);
925
926         jit_extr_i(dst, src);
927
928         lightrec_free_reg(reg_cache, src);
929         lightrec_free_reg(reg_cache, dst);
930 }
931
932 static void rec_special_MFHI(struct lightrec_cstate *state,
933                              const struct block *block, u16 offset)
934 {
935         union code c = block->opcode_list[offset].c;
936
937         _jit_name(block->_jit, __func__);
938         rec_alu_mv_lo_hi(state, block, c.r.rd, REG_HI);
939 }
940
941 static void rec_special_MTHI(struct lightrec_cstate *state,
942                              const struct block *block, u16 offset)
943 {
944         union code c = block->opcode_list[offset].c;
945
946         _jit_name(block->_jit, __func__);
947         rec_alu_mv_lo_hi(state, block, REG_HI, c.r.rs);
948 }
949
950 static void rec_special_MFLO(struct lightrec_cstate *state,
951                              const struct block *block, u16 offset)
952 {
953         union code c = block->opcode_list[offset].c;
954
955         _jit_name(block->_jit, __func__);
956         rec_alu_mv_lo_hi(state, block, c.r.rd, REG_LO);
957 }
958
959 static void rec_special_MTLO(struct lightrec_cstate *state,
960                              const struct block *block, u16 offset)
961 {
962         union code c = block->opcode_list[offset].c;
963
964         _jit_name(block->_jit, __func__);
965         rec_alu_mv_lo_hi(state, block, REG_LO, c.r.rs);
966 }
967
968 static void call_to_c_wrapper(struct lightrec_cstate *state, const struct block *block,
969                               u32 arg, bool with_arg, enum c_wrappers wrapper)
970 {
971         struct regcache *reg_cache = state->reg_cache;
972         jit_state_t *_jit = block->_jit;
973         u8 tmp, tmp2;
974
975         tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
976         jit_ldxi(tmp, LIGHTREC_REG_STATE,
977                  offsetof(struct lightrec_state, wrappers_eps[wrapper]));
978
979         if (with_arg) {
980                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
981                 jit_movi(tmp2, arg);
982
983                 jit_stxi_i(offsetof(struct lightrec_state, c_wrapper_arg),
984                            LIGHTREC_REG_STATE, tmp2);
985
986                 lightrec_free_reg(reg_cache, tmp2);
987         }
988
989         lightrec_regcache_mark_live(reg_cache, _jit);
990         jit_callr(tmp);
991
992         lightrec_free_reg(reg_cache, tmp);
993         lightrec_regcache_mark_live(reg_cache, _jit);
994 }
995
996 static void rec_io(struct lightrec_cstate *state,
997                    const struct block *block, u16 offset,
998                    bool load_rt, bool read_rt)
999 {
1000         struct regcache *reg_cache = state->reg_cache;
1001         jit_state_t *_jit = block->_jit;
1002         union code c = block->opcode_list[offset].c;
1003         u16 flags = block->opcode_list[offset].flags;
1004         bool is_tagged = LIGHTREC_FLAGS_GET_IO_MODE(flags);
1005         u32 lut_entry;
1006
1007         jit_note(__FILE__, __LINE__);
1008
1009         lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rs, false);
1010
1011         if (read_rt && likely(c.i.rt))
1012                 lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, true);
1013         else if (load_rt)
1014                 lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, false);
1015
1016         if (is_tagged) {
1017                 call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_RW);
1018         } else {
1019                 lut_entry = lightrec_get_lut_entry(block);
1020                 call_to_c_wrapper(state, block, (lut_entry << 16) | offset,
1021                                   true, C_WRAPPER_RW_GENERIC);
1022         }
1023 }
1024
1025 static void rec_store_memory(struct lightrec_cstate *cstate,
1026                              const struct block *block,
1027                              u16 offset, jit_code_t code,
1028                              uintptr_t addr_offset, u32 addr_mask,
1029                              bool invalidate)
1030 {
1031         struct regcache *reg_cache = cstate->reg_cache;
1032         struct opcode *op = &block->opcode_list[offset];
1033         jit_state_t *_jit = block->_jit;
1034         union code c = op->c;
1035         u8 rs, rt, tmp, tmp2, tmp3, addr_reg, addr_reg2;
1036         s16 imm = (s16)c.i.imm;
1037         s32 simm = (s32)imm << (__WORDSIZE / 32 - 1);
1038         s32 lut_offt = offsetof(struct lightrec_state, code_lut);
1039         bool no_mask = op->flags & LIGHTREC_NO_MASK;
1040         bool add_imm = c.i.imm && invalidate && simm + lut_offt != (s16)(simm + lut_offt);
1041         bool need_tmp = !no_mask || addr_offset || add_imm;
1042         bool need_tmp2 = addr_offset || invalidate;
1043
1044         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
1045         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
1046         if (need_tmp)
1047                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1048
1049         addr_reg = rs;
1050
1051         if (add_imm) {
1052                 jit_addi(tmp, addr_reg, (s16)c.i.imm);
1053                 addr_reg = tmp;
1054                 imm = 0;
1055         } else if (simm) {
1056                 lut_offt += simm;
1057         }
1058
1059         if (!no_mask) {
1060                 jit_andi(tmp, addr_reg, addr_mask);
1061                 addr_reg = tmp;
1062         }
1063
1064         if (need_tmp2)
1065                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
1066
1067         if (addr_offset) {
1068                 jit_addi(tmp2, addr_reg, addr_offset);
1069                 addr_reg2 = tmp2;
1070         } else {
1071                 addr_reg2 = addr_reg;
1072         }
1073
1074         jit_new_node_www(code, imm, addr_reg2, rt);
1075         lightrec_free_reg(reg_cache, rt);
1076
1077         if (invalidate) {
1078                 tmp3 = lightrec_alloc_reg_in(reg_cache, _jit, 0, 0);
1079
1080                 if (c.i.op != OP_SW) {
1081                         jit_andi(tmp2, addr_reg, ~3);
1082                         addr_reg = tmp2;
1083                 }
1084
1085                 if (__WORDSIZE == 64) {
1086                         jit_lshi(tmp2, addr_reg, 1);
1087                         addr_reg = tmp2;
1088                 }
1089
1090                 if (__WORDSIZE == 64 || addr_reg != rs || c.i.rs != 0) {
1091                         jit_addr(tmp2, addr_reg, LIGHTREC_REG_STATE);
1092                         addr_reg = tmp2;
1093                 }
1094
1095                 jit_stxi(lut_offt, addr_reg, tmp3);
1096
1097                 lightrec_free_reg(reg_cache, tmp3);
1098         }
1099
1100         if (need_tmp2)
1101                 lightrec_free_reg(reg_cache, tmp2);
1102         if (need_tmp)
1103                 lightrec_free_reg(reg_cache, tmp);
1104         lightrec_free_reg(reg_cache, rs);
1105 }
1106
1107 static void rec_store_ram(struct lightrec_cstate *cstate,
1108                           const struct block *block,
1109                           u16 offset, jit_code_t code,
1110                           bool invalidate)
1111 {
1112         _jit_note(block->_jit, __FILE__, __LINE__);
1113
1114         return rec_store_memory(cstate, block, offset, code,
1115                                 cstate->state->offset_ram,
1116                                 RAM_SIZE - 1, invalidate);
1117 }
1118
1119 static void rec_store_scratch(struct lightrec_cstate *cstate,
1120                               const struct block *block,
1121                               u16 offset, jit_code_t code)
1122 {
1123         _jit_note(block->_jit, __FILE__, __LINE__);
1124
1125         return rec_store_memory(cstate, block, offset, code,
1126                                 cstate->state->offset_scratch,
1127                                 0x1fffffff, false);
1128 }
1129
1130 static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate,
1131                                            const struct block *block,
1132                                            u16 offset, jit_code_t code)
1133 {
1134         struct lightrec_state *state = cstate->state;
1135         struct regcache *reg_cache = cstate->reg_cache;
1136         union code c = block->opcode_list[offset].c;
1137         jit_state_t *_jit = block->_jit;
1138         jit_node_t *to_not_ram, *to_end;
1139         u8 tmp, tmp2, rs, rt;
1140         s16 imm;
1141
1142         jit_note(__FILE__, __LINE__);
1143         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
1144         tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1145
1146         if (state->offset_ram || state->offset_scratch)
1147                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
1148
1149         /* Convert to KUNSEG and avoid RAM mirrors */
1150         if (state->mirrors_mapped) {
1151                 imm = (s16)c.i.imm;
1152                 jit_andi(tmp, rs, 0x1f800000 | (4 * RAM_SIZE - 1));
1153         } else if (c.i.imm) {
1154                 imm = 0;
1155                 jit_addi(tmp, rs, (s16)c.i.imm);
1156                 jit_andi(tmp, tmp, 0x1f800000 | (RAM_SIZE - 1));
1157         } else {
1158                 imm = 0;
1159                 jit_andi(tmp, rs, 0x1f800000 | (RAM_SIZE - 1));
1160         }
1161
1162         lightrec_free_reg(reg_cache, rs);
1163
1164         if (state->offset_ram != state->offset_scratch) {
1165                 to_not_ram = jit_bmsi(tmp, BIT(28));
1166
1167                 jit_movi(tmp2, state->offset_ram);
1168
1169                 to_end = jit_b();
1170                 jit_patch(to_not_ram);
1171
1172                 jit_movi(tmp2, state->offset_scratch);
1173                 jit_patch(to_end);
1174         } else if (state->offset_ram) {
1175                 jit_movi(tmp2, state->offset_ram);
1176         }
1177
1178         if (state->offset_ram || state->offset_scratch) {
1179                 jit_addr(tmp, tmp, tmp2);
1180                 lightrec_free_reg(reg_cache, tmp2);
1181         }
1182
1183         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
1184         jit_new_node_www(code, imm, tmp, rt);
1185
1186         lightrec_free_reg(reg_cache, rt);
1187         lightrec_free_reg(reg_cache, tmp);
1188 }
1189
1190 static void rec_store_direct(struct lightrec_cstate *cstate, const struct block *block,
1191                              u16 offset, jit_code_t code)
1192 {
1193         struct lightrec_state *state = cstate->state;
1194         u32 ram_size = state->mirrors_mapped ? RAM_SIZE * 4 : RAM_SIZE;
1195         struct regcache *reg_cache = cstate->reg_cache;
1196         union code c = block->opcode_list[offset].c;
1197         jit_state_t *_jit = block->_jit;
1198         jit_node_t *to_not_ram, *to_end;
1199         u8 tmp, tmp2, tmp3, rs, rt;
1200
1201         jit_note(__FILE__, __LINE__);
1202
1203         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
1204         tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
1205         tmp3 = lightrec_alloc_reg_in(reg_cache, _jit, 0, 0);
1206
1207         /* Convert to KUNSEG and avoid RAM mirrors */
1208         if (c.i.imm) {
1209                 jit_addi(tmp2, rs, (s16)c.i.imm);
1210                 jit_andi(tmp2, tmp2, 0x1f800000 | (ram_size - 1));
1211         } else {
1212                 jit_andi(tmp2, rs, 0x1f800000 | (ram_size - 1));
1213         }
1214
1215         lightrec_free_reg(reg_cache, rs);
1216         tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1217
1218         to_not_ram = jit_bgti(tmp2, ram_size);
1219
1220         /* Compute the offset to the code LUT */
1221         jit_andi(tmp, tmp2, (RAM_SIZE - 1) & ~3);
1222         if (__WORDSIZE == 64)
1223                 jit_lshi(tmp, tmp, 1);
1224         jit_addr(tmp, LIGHTREC_REG_STATE, tmp);
1225
1226         /* Write NULL to the code LUT to invalidate any block that's there */
1227         jit_stxi(offsetof(struct lightrec_state, code_lut), tmp, tmp3);
1228
1229         if (state->offset_ram != state->offset_scratch) {
1230                 jit_movi(tmp, state->offset_ram);
1231
1232                 to_end = jit_b();
1233         }
1234
1235         jit_patch(to_not_ram);
1236
1237         if (state->offset_ram || state->offset_scratch)
1238                 jit_movi(tmp, state->offset_scratch);
1239
1240         if (state->offset_ram != state->offset_scratch)
1241                 jit_patch(to_end);
1242
1243         if (state->offset_ram || state->offset_scratch)
1244                 jit_addr(tmp2, tmp2, tmp);
1245
1246         lightrec_free_reg(reg_cache, tmp);
1247         lightrec_free_reg(reg_cache, tmp3);
1248
1249         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
1250         jit_new_node_www(code, 0, tmp2, rt);
1251
1252         lightrec_free_reg(reg_cache, rt);
1253         lightrec_free_reg(reg_cache, tmp2);
1254 }
1255
1256 static void rec_store(struct lightrec_cstate *state,
1257                       const struct block *block, u16 offset, jit_code_t code)
1258 {
1259         u16 flags = block->opcode_list[offset].flags;
1260         bool no_invalidate = (flags & LIGHTREC_NO_INVALIDATE) ||
1261                 state->state->invalidate_from_dma_only;
1262
1263         switch (LIGHTREC_FLAGS_GET_IO_MODE(flags)) {
1264         case LIGHTREC_IO_RAM:
1265                 rec_store_ram(state, block, offset, code, !no_invalidate);
1266                 break;
1267         case LIGHTREC_IO_SCRATCH:
1268                 rec_store_scratch(state, block, offset, code);
1269                 break;
1270         case LIGHTREC_IO_DIRECT:
1271                 if (no_invalidate)
1272                         rec_store_direct_no_invalidate(state, block, offset, code);
1273                 else
1274                         rec_store_direct(state, block, offset, code);
1275                 break;
1276         default:
1277                 rec_io(state, block, offset, true, false);
1278                 break;
1279         }
1280 }
1281
1282 static void rec_SB(struct lightrec_cstate *state,
1283                    const struct block *block, u16 offset)
1284 {
1285         _jit_name(block->_jit, __func__);
1286         rec_store(state, block, offset, jit_code_stxi_c);
1287 }
1288
1289 static void rec_SH(struct lightrec_cstate *state,
1290                    const struct block *block, u16 offset)
1291 {
1292         _jit_name(block->_jit, __func__);
1293         rec_store(state, block, offset, jit_code_stxi_s);
1294 }
1295
1296 static void rec_SW(struct lightrec_cstate *state,
1297                    const struct block *block, u16 offset)
1298
1299 {
1300         _jit_name(block->_jit, __func__);
1301         rec_store(state, block, offset, jit_code_stxi_i);
1302 }
1303
1304 static void rec_SWL(struct lightrec_cstate *state,
1305                     const struct block *block, u16 offset)
1306 {
1307         _jit_name(block->_jit, __func__);
1308         rec_io(state, block, offset, true, false);
1309 }
1310
1311 static void rec_SWR(struct lightrec_cstate *state,
1312                     const struct block *block, u16 offset)
1313 {
1314         _jit_name(block->_jit, __func__);
1315         rec_io(state, block, offset, true, false);
1316 }
1317
1318 static void rec_SWC2(struct lightrec_cstate *state,
1319                      const struct block *block, u16 offset)
1320 {
1321         _jit_name(block->_jit, __func__);
1322         rec_io(state, block, offset, false, false);
1323 }
1324
1325 static void rec_load_memory(struct lightrec_cstate *cstate,
1326                             const struct block *block,
1327                             u16 offset, jit_code_t code, bool is_unsigned,
1328                             uintptr_t addr_offset, u32 addr_mask)
1329 {
1330         struct regcache *reg_cache = cstate->reg_cache;
1331         struct opcode *op = &block->opcode_list[offset];
1332         jit_state_t *_jit = block->_jit;
1333         u8 rs, rt, addr_reg, flags = REG_EXT;
1334         union code c = op->c;
1335
1336         if (!c.i.rt)
1337                 return;
1338
1339         if (is_unsigned)
1340                 flags |= REG_ZEXT;
1341
1342         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
1343         rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags);
1344
1345         if (!(op->flags & LIGHTREC_NO_MASK)) {
1346                 jit_andi(rt, rs, addr_mask);
1347                 addr_reg = rt;
1348         } else {
1349                 addr_reg = rs;
1350         }
1351
1352         if (addr_offset) {
1353                 jit_addi(rt, addr_reg, addr_offset);
1354                 addr_reg = rt;
1355         }
1356
1357         jit_new_node_www(code, rt, addr_reg, (s16)c.i.imm);
1358
1359         lightrec_free_reg(reg_cache, rs);
1360         lightrec_free_reg(reg_cache, rt);
1361 }
1362
1363 static void rec_load_ram(struct lightrec_cstate *cstate,
1364                          const struct block *block,
1365                          u16 offset, jit_code_t code, bool is_unsigned)
1366 {
1367         _jit_note(block->_jit, __FILE__, __LINE__);
1368
1369         rec_load_memory(cstate, block, offset, code, is_unsigned,
1370                         cstate->state->offset_ram, RAM_SIZE - 1);
1371 }
1372
1373 static void rec_load_bios(struct lightrec_cstate *cstate,
1374                           const struct block *block,
1375                           u16 offset, jit_code_t code, bool is_unsigned)
1376 {
1377         _jit_note(block->_jit, __FILE__, __LINE__);
1378
1379         rec_load_memory(cstate, block, offset, code, is_unsigned,
1380                         cstate->state->offset_bios, 0x1fffffff);
1381 }
1382
1383 static void rec_load_scratch(struct lightrec_cstate *cstate,
1384                              const struct block *block,
1385                              u16 offset, jit_code_t code, bool is_unsigned)
1386 {
1387         _jit_note(block->_jit, __FILE__, __LINE__);
1388
1389         rec_load_memory(cstate, block, offset, code, is_unsigned,
1390                         cstate->state->offset_scratch, 0x1fffffff);
1391 }
1392
1393 static void rec_load_direct(struct lightrec_cstate *cstate, const struct block *block,
1394                             u16 offset, jit_code_t code, bool is_unsigned)
1395 {
1396         struct lightrec_state *state = cstate->state;
1397         struct regcache *reg_cache = cstate->reg_cache;
1398         union code c = block->opcode_list[offset].c;
1399         jit_state_t *_jit = block->_jit;
1400         jit_node_t *to_not_ram, *to_not_bios, *to_end, *to_end2;
1401         u8 tmp, rs, rt, addr_reg, flags = REG_EXT;
1402         s16 imm;
1403
1404         if (!c.i.rt)
1405                 return;
1406
1407         if (is_unsigned)
1408                 flags |= REG_ZEXT;
1409
1410         jit_note(__FILE__, __LINE__);
1411         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
1412         rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags);
1413
1414         if ((state->offset_ram == state->offset_bios &&
1415             state->offset_ram == state->offset_scratch &&
1416             state->mirrors_mapped) || !c.i.imm) {
1417                 addr_reg = rs;
1418                 imm = (s16)c.i.imm;
1419         } else {
1420                 jit_addi(rt, rs, (s16)c.i.imm);
1421                 addr_reg = rt;
1422                 imm = 0;
1423
1424                 if (c.i.rs != c.i.rt)
1425                         lightrec_free_reg(reg_cache, rs);
1426         }
1427
1428         tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1429
1430         if (state->offset_ram == state->offset_bios &&
1431             state->offset_ram == state->offset_scratch) {
1432                 if (!state->mirrors_mapped) {
1433                         jit_andi(tmp, addr_reg, BIT(28));
1434                         jit_rshi_u(tmp, tmp, 28 - 22);
1435                         jit_ori(tmp, tmp, 0x1f800000 | (RAM_SIZE - 1));
1436                         jit_andr(rt, addr_reg, tmp);
1437                 } else {
1438                         jit_andi(rt, addr_reg, 0x1fffffff);
1439                 }
1440
1441                 if (state->offset_ram)
1442                         jit_movi(tmp, state->offset_ram);
1443         } else {
1444                 to_not_ram = jit_bmsi(addr_reg, BIT(28));
1445
1446                 /* Convert to KUNSEG and avoid RAM mirrors */
1447                 jit_andi(rt, addr_reg, RAM_SIZE - 1);
1448
1449                 if (state->offset_ram)
1450                         jit_movi(tmp, state->offset_ram);
1451
1452                 to_end = jit_b();
1453
1454                 jit_patch(to_not_ram);
1455
1456                 if (state->offset_bios != state->offset_scratch)
1457                         to_not_bios = jit_bmci(addr_reg, BIT(22));
1458
1459                 /* Convert to KUNSEG */
1460                 jit_andi(rt, addr_reg, 0x1fc00000 | (BIOS_SIZE - 1));
1461
1462                 jit_movi(tmp, state->offset_bios);
1463
1464                 if (state->offset_bios != state->offset_scratch) {
1465                         to_end2 = jit_b();
1466
1467                         jit_patch(to_not_bios);
1468
1469                         /* Convert to KUNSEG */
1470                         jit_andi(rt, addr_reg, 0x1f800fff);
1471
1472                         if (state->offset_scratch)
1473                                 jit_movi(tmp, state->offset_scratch);
1474
1475                         jit_patch(to_end2);
1476                 }
1477
1478                 jit_patch(to_end);
1479         }
1480
1481         if (state->offset_ram || state->offset_bios || state->offset_scratch)
1482                 jit_addr(rt, rt, tmp);
1483
1484         jit_new_node_www(code, rt, rt, imm);
1485
1486         lightrec_free_reg(reg_cache, addr_reg);
1487         lightrec_free_reg(reg_cache, rt);
1488         lightrec_free_reg(reg_cache, tmp);
1489 }
1490
1491 static void rec_load(struct lightrec_cstate *state, const struct block *block,
1492                      u16 offset, jit_code_t code, bool is_unsigned)
1493 {
1494         u16 flags = block->opcode_list[offset].flags;
1495
1496         switch (LIGHTREC_FLAGS_GET_IO_MODE(flags)) {
1497         case LIGHTREC_IO_RAM:
1498                 rec_load_ram(state, block, offset, code, is_unsigned);
1499                 break;
1500         case LIGHTREC_IO_BIOS:
1501                 rec_load_bios(state, block, offset, code, is_unsigned);
1502                 break;
1503         case LIGHTREC_IO_SCRATCH:
1504                 rec_load_scratch(state, block, offset, code, is_unsigned);
1505                 break;
1506         case LIGHTREC_IO_DIRECT:
1507                 rec_load_direct(state, block, offset, code, is_unsigned);
1508                 break;
1509         default:
1510                 rec_io(state, block, offset, false, true);
1511                 break;
1512         }
1513 }
1514
1515 static void rec_LB(struct lightrec_cstate *state, const struct block *block, u16 offset)
1516 {
1517         _jit_name(block->_jit, __func__);
1518         rec_load(state, block, offset, jit_code_ldxi_c, false);
1519 }
1520
1521 static void rec_LBU(struct lightrec_cstate *state, const struct block *block, u16 offset)
1522 {
1523         _jit_name(block->_jit, __func__);
1524         rec_load(state, block, offset, jit_code_ldxi_uc, true);
1525 }
1526
1527 static void rec_LH(struct lightrec_cstate *state, const struct block *block, u16 offset)
1528 {
1529         _jit_name(block->_jit, __func__);
1530         rec_load(state, block, offset, jit_code_ldxi_s, false);
1531 }
1532
1533 static void rec_LHU(struct lightrec_cstate *state, const struct block *block, u16 offset)
1534 {
1535         _jit_name(block->_jit, __func__);
1536         rec_load(state, block, offset, jit_code_ldxi_us, true);
1537 }
1538
1539 static void rec_LWL(struct lightrec_cstate *state, const struct block *block, u16 offset)
1540 {
1541         _jit_name(block->_jit, __func__);
1542         rec_io(state, block, offset, true, true);
1543 }
1544
1545 static void rec_LWR(struct lightrec_cstate *state, const struct block *block, u16 offset)
1546 {
1547         _jit_name(block->_jit, __func__);
1548         rec_io(state, block, offset, true, true);
1549 }
1550
1551 static void rec_LW(struct lightrec_cstate *state, const struct block *block, u16 offset)
1552 {
1553         _jit_name(block->_jit, __func__);
1554         rec_load(state, block, offset, jit_code_ldxi_i, false);
1555 }
1556
1557 static void rec_LWC2(struct lightrec_cstate *state, const struct block *block, u16 offset)
1558 {
1559         _jit_name(block->_jit, __func__);
1560         rec_io(state, block, offset, false, false);
1561 }
1562
1563 static void rec_break_syscall(struct lightrec_cstate *state,
1564                               const struct block *block, u16 offset, bool is_break)
1565 {
1566         _jit_note(block->_jit, __FILE__, __LINE__);
1567
1568         if (is_break)
1569                 call_to_c_wrapper(state, block, 0, false, C_WRAPPER_BREAK);
1570         else
1571                 call_to_c_wrapper(state, block, 0, false, C_WRAPPER_SYSCALL);
1572
1573         /* TODO: the return address should be "pc - 4" if we're a delay slot */
1574         lightrec_emit_end_of_block(state, block, offset, -1,
1575                                    get_ds_pc(block, offset, 0),
1576                                    31, 0, true);
1577 }
1578
1579 static void rec_special_SYSCALL(struct lightrec_cstate *state,
1580                                 const struct block *block, u16 offset)
1581 {
1582         _jit_name(block->_jit, __func__);
1583         rec_break_syscall(state, block, offset, false);
1584 }
1585
1586 static void rec_special_BREAK(struct lightrec_cstate *state,
1587                               const struct block *block, u16 offset)
1588 {
1589         _jit_name(block->_jit, __func__);
1590         rec_break_syscall(state, block, offset, true);
1591 }
1592
1593 static void rec_mtc(struct lightrec_cstate *state, const struct block *block, u16 offset)
1594 {
1595         struct regcache *reg_cache = state->reg_cache;
1596         union code c = block->opcode_list[offset].c;
1597         jit_state_t *_jit = block->_jit;
1598
1599         jit_note(__FILE__, __LINE__);
1600         lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rs, false);
1601         lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, false);
1602
1603         call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_MTC);
1604
1605         if (c.i.op == OP_CP0 &&
1606             !(block->opcode_list[offset].flags & LIGHTREC_NO_DS) &&
1607             (c.r.rd == 12 || c.r.rd == 13))
1608                 lightrec_emit_end_of_block(state, block, offset, -1,
1609                                            get_ds_pc(block, offset, 1),
1610                                            0, 0, true);
1611 }
1612
1613 static void
1614 rec_mfc0(struct lightrec_cstate *state, const struct block *block, u16 offset)
1615 {
1616         struct regcache *reg_cache = state->reg_cache;
1617         union code c = block->opcode_list[offset].c;
1618         jit_state_t *_jit = block->_jit;
1619         u8 rt;
1620
1621         jit_note(__FILE__, __LINE__);
1622
1623         rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, REG_EXT);
1624
1625         jit_ldxi_i(rt, LIGHTREC_REG_STATE,
1626                    offsetof(struct lightrec_state, regs.cp0[c.r.rd]));
1627
1628         lightrec_free_reg(reg_cache, rt);
1629 }
1630
1631 static bool block_in_bios(const struct lightrec_cstate *state,
1632                           const struct block *block)
1633 {
1634         const struct lightrec_mem_map *bios = &state->state->maps[PSX_MAP_BIOS];
1635         u32 pc = kunseg(block->pc);
1636
1637         return pc >= bios->pc && pc < bios->pc + bios->length;
1638 }
1639
1640 static void
1641 rec_mtc0(struct lightrec_cstate *state, const struct block *block, u16 offset)
1642 {
1643         struct regcache *reg_cache = state->reg_cache;
1644         const union code c = block->opcode_list[offset].c;
1645         jit_state_t *_jit = block->_jit;
1646         u8 rt, tmp = 0, tmp2, status;
1647
1648         jit_note(__FILE__, __LINE__);
1649
1650         switch(c.r.rd) {
1651         case 1:
1652         case 4:
1653         case 8:
1654         case 14:
1655         case 15:
1656                 /* Those registers are read-only */
1657                 return;
1658         default:
1659                 break;
1660         }
1661
1662         if (block_in_bios(state, block) && c.r.rd == 12) {
1663                 /* If we are running code from the BIOS, handle writes to the
1664                  * Status register in C. BIOS code may toggle bit 16 which will
1665                  * map/unmap the RAM, while game code cannot do that. */
1666                 rec_mtc(state, block, offset);
1667                 return;
1668         }
1669
1670         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
1671
1672         if (c.r.rd != 13) {
1673                 jit_stxi_i(offsetof(struct lightrec_state, regs.cp0[c.r.rd]),
1674                            LIGHTREC_REG_STATE, rt);
1675         }
1676
1677         if (c.r.rd == 12 || c.r.rd == 13) {
1678                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1679                 jit_ldxi_i(tmp, LIGHTREC_REG_STATE,
1680                            offsetof(struct lightrec_state, regs.cp0[13]));
1681
1682                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
1683         }
1684
1685         if (c.r.rd == 12) {
1686                 status = rt;
1687         } else if (c.r.rd == 13) {
1688                 /* Cause = (Cause & ~0x0300) | (value & 0x0300) */
1689                 jit_andi(tmp2, rt, 0x0300);
1690                 jit_ori(tmp, tmp, 0x0300);
1691                 jit_xori(tmp, tmp, 0x0300);
1692                 jit_orr(tmp, tmp, tmp2);
1693                 jit_ldxi_i(tmp2, LIGHTREC_REG_STATE,
1694                            offsetof(struct lightrec_state, regs.cp0[12]));
1695                 jit_stxi_i(offsetof(struct lightrec_state, regs.cp0[13]),
1696                            LIGHTREC_REG_STATE, tmp);
1697                 status = tmp2;
1698         }
1699
1700         if (c.r.rd == 12 || c.r.rd == 13) {
1701                 /* Exit dynarec in case there's a software interrupt.
1702                  * exit_flags = !!(status & tmp & 0x0300) & status; */
1703                 jit_andr(tmp, tmp, status);
1704                 jit_andi(tmp, tmp, 0x0300);
1705                 jit_nei(tmp, tmp, 0);
1706                 jit_andr(tmp, tmp, status);
1707         }
1708
1709         if (c.r.rd == 12) {
1710                 /* Exit dynarec in case we unmask a hardware interrupt.
1711                  * exit_flags = !(~status & 0x401) */
1712
1713                 jit_comr(tmp2, status);
1714                 jit_andi(tmp2, tmp2, 0x401);
1715                 jit_eqi(tmp2, tmp2, 0);
1716                 jit_orr(tmp, tmp, tmp2);
1717         }
1718
1719         if (c.r.rd == 12 || c.r.rd == 13) {
1720                 jit_stxi_i(offsetof(struct lightrec_state, exit_flags),
1721                            LIGHTREC_REG_STATE, tmp);
1722
1723                 lightrec_free_reg(reg_cache, tmp);
1724                 lightrec_free_reg(reg_cache, tmp2);
1725         }
1726
1727         lightrec_free_reg(reg_cache, rt);
1728
1729         if (!(block->opcode_list[offset].flags & LIGHTREC_NO_DS) &&
1730             (c.r.rd == 12 || c.r.rd == 13))
1731                 lightrec_emit_eob(state, block, offset + 1, true);
1732 }
1733
1734 static void rec_cp0_MFC0(struct lightrec_cstate *state,
1735                          const struct block *block, u16 offset)
1736 {
1737         _jit_name(block->_jit, __func__);
1738         rec_mfc0(state, block, offset);
1739 }
1740
1741 static void rec_cp0_CFC0(struct lightrec_cstate *state,
1742                          const struct block *block, u16 offset)
1743 {
1744         _jit_name(block->_jit, __func__);
1745         rec_mfc0(state, block, offset);
1746 }
1747
1748 static void rec_cp0_MTC0(struct lightrec_cstate *state,
1749                          const struct block *block, u16 offset)
1750 {
1751         _jit_name(block->_jit, __func__);
1752         rec_mtc0(state, block, offset);
1753 }
1754
1755 static void rec_cp0_CTC0(struct lightrec_cstate *state,
1756                          const struct block *block, u16 offset)
1757 {
1758         _jit_name(block->_jit, __func__);
1759         rec_mtc0(state, block, offset);
1760 }
1761
1762 static void rec_cp2_basic_MFC2(struct lightrec_cstate *state,
1763                                const struct block *block, u16 offset)
1764 {
1765         struct regcache *reg_cache = state->reg_cache;
1766         const union code c = block->opcode_list[offset].c;
1767         jit_state_t *_jit = block->_jit;
1768         const u32 zext_regs = 0x300f0080;
1769         u8 rt, tmp, tmp2, tmp3, out, flags;
1770         u8 reg = c.r.rd == 15 ? 14 : c.r.rd;
1771         unsigned int i;
1772
1773         _jit_name(block->_jit, __func__);
1774
1775         flags = (zext_regs & BIT(reg)) ? REG_ZEXT : REG_EXT;
1776         rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, flags);
1777
1778         switch (reg) {
1779         case 1:
1780         case 3:
1781         case 5:
1782         case 8:
1783         case 9:
1784         case 10:
1785         case 11:
1786                 jit_ldxi_s(rt, LIGHTREC_REG_STATE,
1787                            offsetof(struct lightrec_state, regs.cp2d[reg]));
1788                 break;
1789         case 7:
1790         case 16:
1791         case 17:
1792         case 18:
1793         case 19:
1794                 jit_ldxi_us(rt, LIGHTREC_REG_STATE,
1795                            offsetof(struct lightrec_state, regs.cp2d[reg]));
1796                 break;
1797         case 28:
1798         case 29:
1799                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1800                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
1801                 tmp3 = lightrec_alloc_reg_temp(reg_cache, _jit);
1802
1803                 for (i = 0; i < 3; i++) {
1804                         out = i == 0 ? rt : tmp;
1805
1806                         jit_ldxi_s(tmp, LIGHTREC_REG_STATE,
1807                                    offsetof(struct lightrec_state, regs.cp2d[9 + i]));
1808                         jit_movi(tmp2, 0x1f);
1809                         jit_rshi(out, tmp, 7);
1810
1811                         jit_ltr(tmp3, tmp2, out);
1812                         jit_movnr(out, tmp2, tmp3);
1813
1814                         jit_gei(tmp2, out, 0);
1815                         jit_movzr(out, tmp2, tmp2);
1816
1817                         if (i > 0) {
1818                                 jit_lshi(tmp, tmp, 5 * i);
1819                                 jit_orr(rt, rt, tmp);
1820                         }
1821                 }
1822
1823
1824                 lightrec_free_reg(reg_cache, tmp);
1825                 lightrec_free_reg(reg_cache, tmp2);
1826                 lightrec_free_reg(reg_cache, tmp3);
1827                 break;
1828         default:
1829                 jit_ldxi_i(rt, LIGHTREC_REG_STATE,
1830                            offsetof(struct lightrec_state, regs.cp2d[reg]));
1831                 break;
1832         }
1833
1834         lightrec_free_reg(reg_cache, rt);
1835 }
1836
1837 static void rec_cp2_basic_CFC2(struct lightrec_cstate *state,
1838                                const struct block *block, u16 offset)
1839 {
1840         struct regcache *reg_cache = state->reg_cache;
1841         const union code c = block->opcode_list[offset].c;
1842         jit_state_t *_jit = block->_jit;
1843         u8 rt;
1844
1845         _jit_name(block->_jit, __func__);
1846
1847         switch (c.r.rd) {
1848         case 4:
1849         case 12:
1850         case 20:
1851         case 26:
1852         case 27:
1853         case 29:
1854         case 30:
1855                 rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_EXT);
1856                 jit_ldxi_s(rt, LIGHTREC_REG_STATE,
1857                            offsetof(struct lightrec_state, regs.cp2c[c.r.rd]));
1858                 break;
1859         default:
1860                 rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_ZEXT);
1861                 jit_ldxi_i(rt, LIGHTREC_REG_STATE,
1862                            offsetof(struct lightrec_state, regs.cp2c[c.r.rd]));
1863                 break;
1864         }
1865
1866         lightrec_free_reg(reg_cache, rt);
1867 }
1868
1869 static void rec_cp2_basic_MTC2(struct lightrec_cstate *state,
1870                                const struct block *block, u16 offset)
1871 {
1872         struct regcache *reg_cache = state->reg_cache;
1873         const union code c = block->opcode_list[offset].c;
1874         jit_state_t *_jit = block->_jit;
1875         jit_node_t *loop, *to_loop;
1876         u8 rt, tmp, tmp2, flags = 0;
1877
1878         _jit_name(block->_jit, __func__);
1879
1880         if (c.r.rd == 31)
1881                 return;
1882
1883         if (c.r.rd == 30)
1884                 flags |= REG_EXT;
1885
1886         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, flags);
1887
1888         switch (c.r.rd) {
1889         case 15:
1890                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1891                 jit_ldxi_i(tmp, LIGHTREC_REG_STATE,
1892                            offsetof(struct lightrec_state, regs.cp2d[13]));
1893
1894                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
1895                 jit_ldxi_i(tmp2, LIGHTREC_REG_STATE,
1896                            offsetof(struct lightrec_state, regs.cp2d[14]));
1897
1898                 jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[12]),
1899                            LIGHTREC_REG_STATE, tmp);
1900                 jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[13]),
1901                            LIGHTREC_REG_STATE, tmp2);
1902                 jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[14]),
1903                            LIGHTREC_REG_STATE, rt);
1904
1905                 lightrec_free_reg(reg_cache, tmp);
1906                 lightrec_free_reg(reg_cache, tmp2);
1907                 break;
1908         case 28:
1909                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1910
1911                 jit_lshi(tmp, rt, 7);
1912                 jit_andi(tmp, tmp, 0xf80);
1913                 jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[9]),
1914                            LIGHTREC_REG_STATE, tmp);
1915
1916                 jit_lshi(tmp, rt, 2);
1917                 jit_andi(tmp, tmp, 0xf80);
1918                 jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[10]),
1919                            LIGHTREC_REG_STATE, tmp);
1920
1921                 jit_rshi(tmp, rt, 3);
1922                 jit_andi(tmp, tmp, 0xf80);
1923                 jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[11]),
1924                            LIGHTREC_REG_STATE, tmp);
1925
1926                 lightrec_free_reg(reg_cache, tmp);
1927                 break;
1928         case 30:
1929                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1930                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
1931
1932                 /* if (rt < 0) rt = ~rt; */
1933                 jit_rshi(tmp, rt, 31);
1934                 jit_xorr(tmp, rt, tmp);
1935
1936                 /* We know the sign bit is 0. Left-shift by 1 to start the algorithm */
1937                 jit_lshi(tmp, tmp, 1);
1938                 jit_movi(tmp2, 33);
1939
1940                 /* Decrement tmp2 and right-shift the value by 1 until it equals zero */
1941                 loop = jit_label();
1942                 jit_subi(tmp2, tmp2, 1);
1943                 jit_rshi_u(tmp, tmp, 1);
1944                 to_loop = jit_bnei(tmp, 0);
1945
1946                 jit_patch_at(to_loop, loop);
1947
1948                 jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[31]),
1949                            LIGHTREC_REG_STATE, tmp2);
1950                 jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[30]),
1951                            LIGHTREC_REG_STATE, rt);
1952
1953                 lightrec_free_reg(reg_cache, tmp);
1954                 lightrec_free_reg(reg_cache, tmp2);
1955                 break;
1956         default:
1957                 jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[c.r.rd]),
1958                            LIGHTREC_REG_STATE, rt);
1959                 break;
1960         }
1961
1962         lightrec_free_reg(reg_cache, rt);
1963 }
1964
1965 static void rec_cp2_basic_CTC2(struct lightrec_cstate *state,
1966                                const struct block *block, u16 offset)
1967 {
1968         struct regcache *reg_cache = state->reg_cache;
1969         const union code c = block->opcode_list[offset].c;
1970         jit_state_t *_jit = block->_jit;
1971         u8 rt, tmp, tmp2;
1972
1973         _jit_name(block->_jit, __func__);
1974
1975         rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0);
1976
1977         switch (c.r.rd) {
1978         case 4:
1979         case 12:
1980         case 20:
1981         case 26:
1982         case 27:
1983         case 29:
1984         case 30:
1985                 jit_stxi_s(offsetof(struct lightrec_state, regs.cp2c[c.r.rd]),
1986                            LIGHTREC_REG_STATE, rt);
1987                 break;
1988         case 31:
1989                 tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
1990                 tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
1991
1992                 jit_andi(tmp, rt, 0x7f87e000);
1993                 jit_nei(tmp, tmp, 0);
1994                 jit_lshi(tmp, tmp, 31);
1995
1996                 jit_andi(tmp2, rt, 0x7ffff000);
1997                 jit_orr(tmp, tmp2, tmp);
1998
1999                 jit_stxi_i(offsetof(struct lightrec_state, regs.cp2c[31]),
2000                            LIGHTREC_REG_STATE, tmp);
2001
2002                 lightrec_free_reg(reg_cache, tmp);
2003                 lightrec_free_reg(reg_cache, tmp2);
2004                 break;
2005
2006         default:
2007                 jit_stxi_i(offsetof(struct lightrec_state, regs.cp2c[c.r.rd]),
2008                            LIGHTREC_REG_STATE, rt);
2009         }
2010
2011         lightrec_free_reg(reg_cache, rt);
2012 }
2013
2014 static void rec_cp0_RFE(struct lightrec_cstate *state,
2015                         const struct block *block, u16 offset)
2016 {
2017         struct regcache *reg_cache = state->reg_cache;
2018         jit_state_t *_jit = block->_jit;
2019         u8 status, tmp;
2020
2021         jit_name(__func__);
2022         jit_note(__FILE__, __LINE__);
2023
2024         status = lightrec_alloc_reg_temp(reg_cache, _jit);
2025         jit_ldxi_i(status, LIGHTREC_REG_STATE,
2026                    offsetof(struct lightrec_state, regs.cp0[12]));
2027
2028         tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
2029
2030         /* status = ((status >> 2) & 0xf) | status & ~0xf; */
2031         jit_rshi(tmp, status, 2);
2032         jit_andi(tmp, tmp, 0xf);
2033         jit_andi(status, status, ~0xful);
2034         jit_orr(status, status, tmp);
2035
2036         jit_ldxi_i(tmp, LIGHTREC_REG_STATE,
2037                    offsetof(struct lightrec_state, regs.cp0[13]));
2038         jit_stxi_i(offsetof(struct lightrec_state, regs.cp0[12]),
2039                    LIGHTREC_REG_STATE, status);
2040
2041         /* Exit dynarec in case there's a software interrupt.
2042          * exit_flags = !!(status & cause & 0x0300) & status; */
2043         jit_andr(tmp, tmp, status);
2044         jit_andi(tmp, tmp, 0x0300);
2045         jit_nei(tmp, tmp, 0);
2046         jit_andr(tmp, tmp, status);
2047         jit_stxi_i(offsetof(struct lightrec_state, exit_flags),
2048                    LIGHTREC_REG_STATE, tmp);
2049
2050         lightrec_free_reg(reg_cache, status);
2051         lightrec_free_reg(reg_cache, tmp);
2052 }
2053
2054 static void rec_CP(struct lightrec_cstate *state,
2055                    const struct block *block, u16 offset)
2056 {
2057         union code c = block->opcode_list[offset].c;
2058         jit_state_t *_jit = block->_jit;
2059
2060         jit_name(__func__);
2061         jit_note(__FILE__, __LINE__);
2062
2063         call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_CP);
2064 }
2065
2066 static void rec_meta_MOV(struct lightrec_cstate *state,
2067                          const struct block *block, u16 offset)
2068 {
2069         struct regcache *reg_cache = state->reg_cache;
2070         union code c = block->opcode_list[offset].c;
2071         jit_state_t *_jit = block->_jit;
2072         u8 rs, rd;
2073
2074         _jit_name(block->_jit, __func__);
2075         jit_note(__FILE__, __LINE__);
2076         if (c.r.rs)
2077                 rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0);
2078         rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, REG_EXT);
2079
2080         if (c.r.rs == 0)
2081                 jit_movi(rd, 0);
2082         else
2083                 jit_extr_i(rd, rs);
2084
2085         if (c.r.rs)
2086                 lightrec_free_reg(reg_cache, rs);
2087         lightrec_free_reg(reg_cache, rd);
2088 }
2089
2090 static void rec_meta_EXTC_EXTS(struct lightrec_cstate *state,
2091                                const struct block *block,
2092                                u16 offset)
2093 {
2094         struct regcache *reg_cache = state->reg_cache;
2095         union code c = block->opcode_list[offset].c;
2096         jit_state_t *_jit = block->_jit;
2097         u8 rs, rt;
2098
2099         _jit_name(block->_jit, __func__);
2100         jit_note(__FILE__, __LINE__);
2101
2102         rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
2103         rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, REG_EXT);
2104
2105         if (c.i.op == OP_META_EXTC)
2106                 jit_extr_c(rt, rs);
2107         else
2108                 jit_extr_s(rt, rs);
2109
2110         lightrec_free_reg(reg_cache, rs);
2111         lightrec_free_reg(reg_cache, rt);
2112 }
2113
2114 static const lightrec_rec_func_t rec_standard[64] = {
2115         SET_DEFAULT_ELM(rec_standard, unknown_opcode),
2116         [OP_SPECIAL]            = rec_SPECIAL,
2117         [OP_REGIMM]             = rec_REGIMM,
2118         [OP_J]                  = rec_J,
2119         [OP_JAL]                = rec_JAL,
2120         [OP_BEQ]                = rec_BEQ,
2121         [OP_BNE]                = rec_BNE,
2122         [OP_BLEZ]               = rec_BLEZ,
2123         [OP_BGTZ]               = rec_BGTZ,
2124         [OP_ADDI]               = rec_ADDI,
2125         [OP_ADDIU]              = rec_ADDIU,
2126         [OP_SLTI]               = rec_SLTI,
2127         [OP_SLTIU]              = rec_SLTIU,
2128         [OP_ANDI]               = rec_ANDI,
2129         [OP_ORI]                = rec_ORI,
2130         [OP_XORI]               = rec_XORI,
2131         [OP_LUI]                = rec_LUI,
2132         [OP_CP0]                = rec_CP0,
2133         [OP_CP2]                = rec_CP2,
2134         [OP_LB]                 = rec_LB,
2135         [OP_LH]                 = rec_LH,
2136         [OP_LWL]                = rec_LWL,
2137         [OP_LW]                 = rec_LW,
2138         [OP_LBU]                = rec_LBU,
2139         [OP_LHU]                = rec_LHU,
2140         [OP_LWR]                = rec_LWR,
2141         [OP_SB]                 = rec_SB,
2142         [OP_SH]                 = rec_SH,
2143         [OP_SWL]                = rec_SWL,
2144         [OP_SW]                 = rec_SW,
2145         [OP_SWR]                = rec_SWR,
2146         [OP_LWC2]               = rec_LWC2,
2147         [OP_SWC2]               = rec_SWC2,
2148
2149         [OP_META_MOV]           = rec_meta_MOV,
2150         [OP_META_EXTC]          = rec_meta_EXTC_EXTS,
2151         [OP_META_EXTS]          = rec_meta_EXTC_EXTS,
2152 };
2153
2154 static const lightrec_rec_func_t rec_special[64] = {
2155         SET_DEFAULT_ELM(rec_special, unknown_opcode),
2156         [OP_SPECIAL_SLL]        = rec_special_SLL,
2157         [OP_SPECIAL_SRL]        = rec_special_SRL,
2158         [OP_SPECIAL_SRA]        = rec_special_SRA,
2159         [OP_SPECIAL_SLLV]       = rec_special_SLLV,
2160         [OP_SPECIAL_SRLV]       = rec_special_SRLV,
2161         [OP_SPECIAL_SRAV]       = rec_special_SRAV,
2162         [OP_SPECIAL_JR]         = rec_special_JR,
2163         [OP_SPECIAL_JALR]       = rec_special_JALR,
2164         [OP_SPECIAL_SYSCALL]    = rec_special_SYSCALL,
2165         [OP_SPECIAL_BREAK]      = rec_special_BREAK,
2166         [OP_SPECIAL_MFHI]       = rec_special_MFHI,
2167         [OP_SPECIAL_MTHI]       = rec_special_MTHI,
2168         [OP_SPECIAL_MFLO]       = rec_special_MFLO,
2169         [OP_SPECIAL_MTLO]       = rec_special_MTLO,
2170         [OP_SPECIAL_MULT]       = rec_special_MULT,
2171         [OP_SPECIAL_MULTU]      = rec_special_MULTU,
2172         [OP_SPECIAL_DIV]        = rec_special_DIV,
2173         [OP_SPECIAL_DIVU]       = rec_special_DIVU,
2174         [OP_SPECIAL_ADD]        = rec_special_ADD,
2175         [OP_SPECIAL_ADDU]       = rec_special_ADDU,
2176         [OP_SPECIAL_SUB]        = rec_special_SUB,
2177         [OP_SPECIAL_SUBU]       = rec_special_SUBU,
2178         [OP_SPECIAL_AND]        = rec_special_AND,
2179         [OP_SPECIAL_OR]         = rec_special_OR,
2180         [OP_SPECIAL_XOR]        = rec_special_XOR,
2181         [OP_SPECIAL_NOR]        = rec_special_NOR,
2182         [OP_SPECIAL_SLT]        = rec_special_SLT,
2183         [OP_SPECIAL_SLTU]       = rec_special_SLTU,
2184 };
2185
2186 static const lightrec_rec_func_t rec_regimm[64] = {
2187         SET_DEFAULT_ELM(rec_regimm, unknown_opcode),
2188         [OP_REGIMM_BLTZ]        = rec_regimm_BLTZ,
2189         [OP_REGIMM_BGEZ]        = rec_regimm_BGEZ,
2190         [OP_REGIMM_BLTZAL]      = rec_regimm_BLTZAL,
2191         [OP_REGIMM_BGEZAL]      = rec_regimm_BGEZAL,
2192 };
2193
2194 static const lightrec_rec_func_t rec_cp0[64] = {
2195         SET_DEFAULT_ELM(rec_cp0, rec_CP),
2196         [OP_CP0_MFC0]           = rec_cp0_MFC0,
2197         [OP_CP0_CFC0]           = rec_cp0_CFC0,
2198         [OP_CP0_MTC0]           = rec_cp0_MTC0,
2199         [OP_CP0_CTC0]           = rec_cp0_CTC0,
2200         [OP_CP0_RFE]            = rec_cp0_RFE,
2201 };
2202
2203 static const lightrec_rec_func_t rec_cp2_basic[64] = {
2204         SET_DEFAULT_ELM(rec_cp2_basic, rec_CP),
2205         [OP_CP2_BASIC_MFC2]     = rec_cp2_basic_MFC2,
2206         [OP_CP2_BASIC_CFC2]     = rec_cp2_basic_CFC2,
2207         [OP_CP2_BASIC_MTC2]     = rec_cp2_basic_MTC2,
2208         [OP_CP2_BASIC_CTC2]     = rec_cp2_basic_CTC2,
2209 };
2210
2211 static void rec_SPECIAL(struct lightrec_cstate *state,
2212                         const struct block *block, u16 offset)
2213 {
2214         union code c = block->opcode_list[offset].c;
2215         lightrec_rec_func_t f = rec_special[c.r.op];
2216
2217         if (!HAS_DEFAULT_ELM && unlikely(!f))
2218                 unknown_opcode(state, block, offset);
2219         else
2220                 (*f)(state, block, offset);
2221 }
2222
2223 static void rec_REGIMM(struct lightrec_cstate *state,
2224                        const struct block *block, u16 offset)
2225 {
2226         union code c = block->opcode_list[offset].c;
2227         lightrec_rec_func_t f = rec_regimm[c.r.rt];
2228
2229         if (!HAS_DEFAULT_ELM && unlikely(!f))
2230                 unknown_opcode(state, block, offset);
2231         else
2232                 (*f)(state, block, offset);
2233 }
2234
2235 static void rec_CP0(struct lightrec_cstate *state,
2236                     const struct block *block, u16 offset)
2237 {
2238         union code c = block->opcode_list[offset].c;
2239         lightrec_rec_func_t f = rec_cp0[c.r.rs];
2240
2241         if (!HAS_DEFAULT_ELM && unlikely(!f))
2242                 rec_CP(state, block, offset);
2243         else
2244                 (*f)(state, block, offset);
2245 }
2246
2247 static void rec_CP2(struct lightrec_cstate *state,
2248                     const struct block *block, u16 offset)
2249 {
2250         union code c = block->opcode_list[offset].c;
2251
2252         if (c.r.op == OP_CP2_BASIC) {
2253                 lightrec_rec_func_t f = rec_cp2_basic[c.r.rs];
2254
2255                 if (HAS_DEFAULT_ELM || likely(f)) {
2256                         (*f)(state, block, offset);
2257                         return;
2258                 }
2259         }
2260
2261         rec_CP(state, block, offset);
2262 }
2263
2264 void lightrec_rec_opcode(struct lightrec_cstate *state,
2265                          const struct block *block, u16 offset)
2266 {
2267         struct regcache *reg_cache = state->reg_cache;
2268         struct lightrec_branch_target *target;
2269         const struct opcode *op = &block->opcode_list[offset];
2270         jit_state_t *_jit = block->_jit;
2271         lightrec_rec_func_t f;
2272
2273         if (op->flags & LIGHTREC_SYNC) {
2274                 jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles);
2275                 state->cycles = 0;
2276
2277                 lightrec_storeback_regs(reg_cache, _jit);
2278                 lightrec_regcache_reset(reg_cache);
2279
2280                 pr_debug("Adding branch target at offset 0x%x\n", offset << 2);
2281                 target = &state->targets[state->nb_targets++];
2282                 target->offset = offset;
2283                 target->label = jit_indirect();
2284         }
2285
2286         if (likely(op->opcode)) {
2287                 f = rec_standard[op->i.op];
2288
2289                 if (!HAS_DEFAULT_ELM && unlikely(!f))
2290                         unknown_opcode(state, block, offset);
2291                 else
2292                         (*f)(state, block, offset);
2293         }
2294
2295         if (unlikely(op->flags & LIGHTREC_UNLOAD_RD)) {
2296                 lightrec_clean_reg_if_loaded(reg_cache, _jit, op->r.rd, true);
2297                 pr_debug("Cleaning RD reg %s\n", lightrec_reg_name(op->r.rd));
2298         }
2299         if (unlikely(op->flags & LIGHTREC_UNLOAD_RS)) {
2300                 lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rs, true);
2301                 pr_debug("Cleaning RS reg %s\n", lightrec_reg_name(op->i.rt));
2302         }
2303         if (unlikely(op->flags & LIGHTREC_UNLOAD_RT)) {
2304                 lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rt, true);
2305                 pr_debug("Cleaning RT reg %s\n", lightrec_reg_name(op->i.rt));
2306         }
2307 }