libretro: direct fb access requires duping support
[pcsx_rearmed.git] / deps / lightrec / interpreter.c
1 // SPDX-License-Identifier: LGPL-2.1-or-later
2 /*
3  * Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
4  */
5
6 #include "disassembler.h"
7 #include "interpreter.h"
8 #include "lightrec-private.h"
9 #include "optimizer.h"
10 #include "regcache.h"
11
12 #include <stdbool.h>
13
14 struct interpreter;
15
16 static u32 int_CP0(struct interpreter *inter);
17 static u32 int_CP2(struct interpreter *inter);
18 static u32 int_SPECIAL(struct interpreter *inter);
19 static u32 int_META(struct interpreter *inter);
20 static u32 int_REGIMM(struct interpreter *inter);
21 static u32 int_branch(struct interpreter *inter, u32 pc,
22                       union code code, bool branch);
23
24 typedef u32 (*lightrec_int_func_t)(struct interpreter *inter);
25
26 static const lightrec_int_func_t int_standard[64];
27
28 struct interpreter {
29         struct lightrec_state *state;
30         struct block *block;
31         struct opcode *op;
32         u32 cycles;
33         bool delay_slot;
34         bool load_delay;
35         u16 offset;
36 };
37
38 static u32 int_get_branch_pc(const struct interpreter *inter)
39 {
40         return get_branch_pc(inter->block, inter->offset, 0);
41 }
42
43 static inline u32 int_get_ds_pc(const struct interpreter *inter, s16 imm)
44 {
45         return get_ds_pc(inter->block, inter->offset, imm);
46 }
47
48 static inline struct opcode *next_op(const struct interpreter *inter)
49 {
50         return &inter->op[1];
51 }
52
53 static inline u32 execute(lightrec_int_func_t func, struct interpreter *inter)
54 {
55         return (*func)(inter);
56 }
57
58 static inline u32 lightrec_int_op(struct interpreter *inter)
59 {
60         return execute(int_standard[inter->op->i.op], inter);
61 }
62
63 static inline u32 jump_skip(struct interpreter *inter)
64 {
65         inter->op = next_op(inter);
66         inter->offset++;
67
68         if (op_flag_sync(inter->op->flags)) {
69                 inter->state->current_cycle += inter->cycles;
70                 inter->cycles = 0;
71         }
72
73         return lightrec_int_op(inter);
74 }
75
76 static inline u32 jump_next(struct interpreter *inter)
77 {
78         inter->cycles += lightrec_cycles_of_opcode(inter->state, inter->op->c);
79
80         if (unlikely(inter->delay_slot))
81                 return 0;
82
83         return jump_skip(inter);
84 }
85
86 static inline u32 jump_after_branch(struct interpreter *inter)
87 {
88         inter->cycles += lightrec_cycles_of_opcode(inter->state, inter->op->c);
89
90         if (unlikely(inter->delay_slot))
91                 return 0;
92
93         inter->op = next_op(inter);
94         inter->offset++;
95
96         return jump_skip(inter);
97 }
98
99 static void update_cycles_before_branch(struct interpreter *inter)
100 {
101         u32 cycles;
102
103         if (!inter->delay_slot) {
104                 cycles = lightrec_cycles_of_opcode(inter->state, inter->op->c);
105
106                 if (!op_flag_no_ds(inter->op->flags) &&
107                     has_delay_slot(inter->op->c))
108                         cycles += lightrec_cycles_of_opcode(inter->state, next_op(inter)->c);
109
110                 inter->cycles += cycles;
111                 inter->state->current_cycle += inter->cycles;
112                 inter->cycles = -cycles;
113         }
114 }
115
116 static bool is_branch_taken(const u32 *reg_cache, union code op)
117 {
118         switch (op.i.op) {
119         case OP_SPECIAL:
120                 return op.r.op == OP_SPECIAL_JR || op.r.op == OP_SPECIAL_JALR;
121         case OP_J:
122         case OP_JAL:
123                 return true;
124         case OP_BEQ:
125                 return reg_cache[op.r.rs] == reg_cache[op.r.rt];
126         case OP_BNE:
127                 return reg_cache[op.r.rs] != reg_cache[op.r.rt];
128         case OP_REGIMM:
129                 switch (op.r.rt) {
130                 case OP_REGIMM_BLTZ:
131                 case OP_REGIMM_BLTZAL:
132                         return (s32)reg_cache[op.r.rs] < 0;
133                 case OP_REGIMM_BGEZ:
134                 case OP_REGIMM_BGEZAL:
135                         return (s32)reg_cache[op.r.rs] >= 0;
136                 }
137         default:
138                 break;
139         }
140
141         return false;
142 }
143
144 static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
145 {
146         struct lightrec_state *state = inter->state;
147         u32 *reg_cache = state->regs.gpr;
148         struct opcode new_op, *op = next_op(inter);
149         union code op_next;
150         struct interpreter inter2 = {
151                 .state = state,
152                 .cycles = inter->cycles,
153                 .delay_slot = true,
154                 .load_delay = true,
155         };
156         bool run_first_op = false, dummy_ld = false, save_rs = false,
157              load_in_ds, branch_in_ds = false, branch_at_addr = false,
158              branch_taken;
159         u32 new_rt, old_rs = 0, new_rs = 0;
160         u32 next_pc, ds_next_pc, epc;
161
162         if (op->i.op == OP_CP0 && op->r.rs == OP_CP0_RFE) {
163                 /* When an IRQ happens, the PSX exception handlers (when done)
164                  * will jump back to the instruction that was executed right
165                  * before the IRQ, unless it was a GTE opcode; in that case, it
166                  * jumps to the instruction right after.
167                  * Since we will never handle the IRQ right after a GTE opcode,
168                  * but on branch boundaries, we need to adjust the return
169                  * address so that the GTE opcode is effectively executed.
170                  */
171                 epc = state->regs.cp0[14];
172
173                 if (epc == pc - 4) {
174                         op_next = lightrec_read_opcode(state, epc);
175                         if (op_next.i.op == OP_CP2)
176                                 pc -= 4;
177                 }
178         }
179
180         if (inter->delay_slot) {
181                 /* The branch opcode was in a delay slot of another branch
182                  * opcode. Just return the target address of the second
183                  * branch. */
184                 return pc;
185         }
186
187         /* An opcode located in the delay slot performing a delayed read
188          * requires special handling; we will always resort to using the
189          * interpreter in that case.
190          * Same goes for when we have a branch in a delay slot of another
191          * branch. */
192         load_in_ds = opcode_is_load(op->c) || opcode_is_mfc(op->c);
193         branch_in_ds = has_delay_slot(op->c);
194
195         if (branch) {
196                 if (load_in_ds || branch_in_ds)
197                         op_next = lightrec_read_opcode(state, pc);
198
199                 if (load_in_ds) {
200                         /* Verify that the next block actually reads the
201                          * destination register of the delay slot opcode. */
202                         run_first_op = opcode_reads_register(op_next, op->r.rt);
203                 }
204
205                 if (branch_in_ds) {
206                         run_first_op = true;
207                         next_pc = pc + 4;
208                 }
209
210                 if (load_in_ds && run_first_op) {
211                         next_pc = pc + 4;
212
213                         /* If the first opcode of the next block writes the
214                          * regiser used as the address for the load, we need to
215                          * reset to the old value after it has been executed,
216                          * then restore the new value after the delay slot
217                          * opcode has been executed. */
218                         save_rs = opcode_reads_register(op->c, op->r.rs) &&
219                                 opcode_writes_register(op_next, op->r.rs);
220                         if (save_rs)
221                                 old_rs = reg_cache[op->r.rs];
222
223                         /* If both the first opcode of the next block and the
224                          * delay slot opcode write to the same register, the
225                          * value written by the delay slot opcode is
226                          * discarded. */
227                         dummy_ld = opcode_writes_register(op_next, op->r.rt);
228                 }
229
230                 if (!run_first_op) {
231                         next_pc = pc;
232                 } else if (has_delay_slot(op_next)) {
233                         /* The first opcode of the next block is a branch, so we
234                          * cannot execute it here, because of the load delay.
235                          * Just check whether or not the branch would be taken,
236                          * and save that info into the interpreter struct. */
237                         branch_at_addr = true;
238                         branch_taken = is_branch_taken(reg_cache, op_next);
239                         pr_debug("Target of impossible branch is a branch, "
240                                  "%staken.\n", branch_taken ? "" : "not ");
241                         inter->cycles += lightrec_cycles_of_opcode(inter->state, op_next);
242                         old_rs = reg_cache[op_next.r.rs];
243                 } else {
244                         new_op.c = op_next;
245                         new_op.flags = 0;
246                         inter2.op = &new_op;
247                         inter2.offset = 0;
248
249                         /* Execute the first opcode of the next block */
250                         lightrec_int_op(&inter2);
251
252                         if (save_rs) {
253                                 new_rs = reg_cache[op->r.rs];
254                                 reg_cache[op->r.rs] = old_rs;
255                         }
256
257                         inter->cycles += lightrec_cycles_of_opcode(inter->state, op_next);
258                 }
259         } else {
260                 next_pc = int_get_ds_pc(inter, 2);
261         }
262
263         inter2.block = inter->block;
264         inter2.op = op;
265         inter2.cycles = inter->cycles;
266         inter2.offset = inter->offset + 1;
267
268         if (dummy_ld)
269                 new_rt = reg_cache[op->r.rt];
270
271         /* Execute delay slot opcode */
272         ds_next_pc = lightrec_int_op(&inter2);
273
274         if (branch_at_addr) {
275                 if (op_next.i.op == OP_SPECIAL)
276                         /* TODO: Handle JALR setting $ra */
277                         ds_next_pc = old_rs;
278                 else if (op_next.i.op == OP_J || op_next.i.op == OP_JAL)
279                         /* TODO: Handle JAL setting $ra */
280                         ds_next_pc = (pc & 0xf0000000) | (op_next.j.imm << 2);
281                 else
282                         ds_next_pc = pc + 4 + ((s16)op_next.i.imm << 2);
283         }
284
285         if (branch_at_addr && !branch_taken) {
286                 /* If the branch at the target of the branch opcode is not
287                  * taken, we jump to its delay slot */
288                 next_pc = pc + sizeof(u32);
289         } else if (branch_at_addr || (!branch && branch_in_ds)) {
290                 next_pc = ds_next_pc;
291         }
292
293         if (save_rs)
294                 reg_cache[op->r.rs] = new_rs;
295         if (dummy_ld)
296                 reg_cache[op->r.rt] = new_rt;
297
298         inter->cycles += lightrec_cycles_of_opcode(inter->state, op->c);
299
300         if (branch_at_addr && branch_taken) {
301                 /* If the branch at the target of the branch opcode is taken,
302                  * we execute its delay slot here, and jump to its target
303                  * address. */
304                 op_next = lightrec_read_opcode(state, pc + 4);
305
306                 new_op.c = op_next;
307                 new_op.flags = 0;
308                 inter2.op = &new_op;
309                 inter2.block = NULL;
310
311                 inter->cycles += lightrec_cycles_of_opcode(inter->state, op_next);
312
313                 pr_debug("Running delay slot of branch at target of impossible "
314                          "branch\n");
315                 lightrec_int_op(&inter2);
316         }
317
318         return next_pc;
319 }
320
321 static u32 int_unimplemented(struct interpreter *inter)
322 {
323         lightrec_set_exit_flags(inter->state, LIGHTREC_EXIT_UNKNOWN_OP);
324
325         return inter->block->pc + (inter->offset << 2);
326 }
327
328 static u32 int_jump(struct interpreter *inter, bool link)
329 {
330         struct lightrec_state *state = inter->state;
331         u32 old_pc = int_get_branch_pc(inter);
332         u32 pc = (old_pc & 0xf0000000) | (inter->op->j.imm << 2);
333
334         if (link)
335                 state->regs.gpr[31] = old_pc + 8;
336
337         if (op_flag_no_ds(inter->op->flags))
338                 return pc;
339
340         return int_delay_slot(inter, pc, true);
341 }
342
343 static u32 int_J(struct interpreter *inter)
344 {
345         return int_jump(inter, false);
346 }
347
348 static u32 int_JAL(struct interpreter *inter)
349 {
350         return int_jump(inter, true);
351 }
352
353 static u32 int_jumpr(struct interpreter *inter, u8 link_reg)
354 {
355         struct lightrec_state *state = inter->state;
356         u32 old_pc = int_get_branch_pc(inter);
357         u32 next_pc = state->regs.gpr[inter->op->r.rs];
358
359         if (link_reg)
360                 state->regs.gpr[link_reg] = old_pc + 8;
361
362         if (op_flag_no_ds(inter->op->flags))
363                 return next_pc;
364
365         return int_delay_slot(inter, next_pc, true);
366 }
367
368 static u32 int_special_JR(struct interpreter *inter)
369 {
370         return int_jumpr(inter, 0);
371 }
372
373 static u32 int_special_JALR(struct interpreter *inter)
374 {
375         return int_jumpr(inter, inter->op->r.rd);
376 }
377
378 static u32 int_do_branch(struct interpreter *inter, u32 old_pc, u32 next_pc)
379 {
380         if (!inter->delay_slot && op_flag_local_branch(inter->op->flags) &&
381             (s16)inter->op->c.i.imm >= 0) {
382                 next_pc = old_pc + ((1 + (s16)inter->op->c.i.imm) << 2);
383                 next_pc = lightrec_emulate_block(inter->state, inter->block, next_pc);
384         }
385
386         return next_pc;
387 }
388
389 static u32 int_branch(struct interpreter *inter, u32 pc,
390                       union code code, bool branch)
391 {
392         u32 next_pc = pc + 4 + ((s16)code.i.imm << 2);
393
394         update_cycles_before_branch(inter);
395
396         if (op_flag_no_ds(inter->op->flags)) {
397                 if (branch)
398                         return int_do_branch(inter, pc, next_pc);
399                 else
400                         return jump_next(inter);
401         }
402
403         if (!inter->delay_slot)
404                 next_pc = int_delay_slot(inter, next_pc, branch);
405
406         if (branch)
407                 return int_do_branch(inter, pc, next_pc);
408
409         if (op_flag_emulate_branch(inter->op->flags))
410                 return pc + 8;
411         else
412                 return jump_after_branch(inter);
413 }
414
415 static u32 int_beq(struct interpreter *inter, bool bne)
416 {
417         u32 rs, rt, old_pc = int_get_branch_pc(inter);
418
419         rs = inter->state->regs.gpr[inter->op->i.rs];
420         rt = inter->state->regs.gpr[inter->op->i.rt];
421
422         return int_branch(inter, old_pc, inter->op->c, (rs == rt) ^ bne);
423 }
424
425 static u32 int_BEQ(struct interpreter *inter)
426 {
427         return int_beq(inter, false);
428 }
429
430 static u32 int_BNE(struct interpreter *inter)
431 {
432         return int_beq(inter, true);
433 }
434
435 static u32 int_bgez(struct interpreter *inter, bool link, bool lt, bool regimm)
436 {
437         u32 old_pc = int_get_branch_pc(inter);
438         s32 rs;
439
440         if (link)
441                 inter->state->regs.gpr[31] = old_pc + 8;
442
443         rs = (s32)inter->state->regs.gpr[inter->op->i.rs];
444
445         return int_branch(inter, old_pc, inter->op->c,
446                           ((regimm && !rs) || rs > 0) ^ lt);
447 }
448
449 static u32 int_regimm_BLTZ(struct interpreter *inter)
450 {
451         return int_bgez(inter, false, true, true);
452 }
453
454 static u32 int_regimm_BGEZ(struct interpreter *inter)
455 {
456         return int_bgez(inter, false, false, true);
457 }
458
459 static u32 int_regimm_BLTZAL(struct interpreter *inter)
460 {
461         return int_bgez(inter, true, true, true);
462 }
463
464 static u32 int_regimm_BGEZAL(struct interpreter *inter)
465 {
466         return int_bgez(inter, true, false, true);
467 }
468
469 static u32 int_BLEZ(struct interpreter *inter)
470 {
471         return int_bgez(inter, false, true, false);
472 }
473
474 static u32 int_BGTZ(struct interpreter *inter)
475 {
476         return int_bgez(inter, false, false, false);
477 }
478
479 static u32 int_cfc(struct interpreter *inter)
480 {
481         struct lightrec_state *state = inter->state;
482         const struct opcode *op = inter->op;
483         u32 val;
484
485         val = lightrec_mfc(state, op->c);
486
487         if (likely(op->r.rt))
488                 state->regs.gpr[op->r.rt] = val;
489
490         return jump_next(inter);
491 }
492
493 static u32 int_ctc(struct interpreter *inter)
494 {
495         struct lightrec_state *state = inter->state;
496         const struct opcode *op = inter->op;
497
498         lightrec_mtc(state, op->c, op->r.rd, state->regs.gpr[op->r.rt]);
499
500         /* If we have a MTC0 or CTC0 to CP0 register 12 (Status) or 13 (Cause),
501          * return early so that the emulator will be able to check software
502          * interrupt status. */
503         if (!op_flag_no_ds(inter->op->flags) &&
504             op->i.op == OP_CP0 && (op->r.rd == 12 || op->r.rd == 13))
505                 return int_get_ds_pc(inter, 1);
506         else
507                 return jump_next(inter);
508 }
509
510 static u32 int_cp0_RFE(struct interpreter *inter)
511 {
512         lightrec_rfe(inter->state);
513
514         return jump_next(inter);
515 }
516
517 static u32 int_CP(struct interpreter *inter)
518 {
519         lightrec_cp(inter->state, inter->op->c);
520
521         return jump_next(inter);
522 }
523
524 static u32 int_ADDI(struct interpreter *inter)
525 {
526         u32 *reg_cache = inter->state->regs.gpr;
527         struct opcode_i *op = &inter->op->i;
528
529         if (likely(op->rt))
530                 reg_cache[op->rt] = reg_cache[op->rs] + (s32)(s16)op->imm;
531
532         return jump_next(inter);
533 }
534
535 static u32 int_SLTI(struct interpreter *inter)
536 {
537         u32 *reg_cache = inter->state->regs.gpr;
538         struct opcode_i *op = &inter->op->i;
539
540         if (likely(op->rt))
541                 reg_cache[op->rt] = (s32)reg_cache[op->rs] < (s32)(s16)op->imm;
542
543         return jump_next(inter);
544 }
545
546 static u32 int_SLTIU(struct interpreter *inter)
547 {
548         u32 *reg_cache = inter->state->regs.gpr;
549         struct opcode_i *op = &inter->op->i;
550
551         if (likely(op->rt))
552                 reg_cache[op->rt] = reg_cache[op->rs] < (u32)(s32)(s16)op->imm;
553
554         return jump_next(inter);
555 }
556
557 static u32 int_ANDI(struct interpreter *inter)
558 {
559         u32 *reg_cache = inter->state->regs.gpr;
560         struct opcode_i *op = &inter->op->i;
561
562         if (likely(op->rt))
563                 reg_cache[op->rt] = reg_cache[op->rs] & op->imm;
564
565         return jump_next(inter);
566 }
567
568 static u32 int_ORI(struct interpreter *inter)
569 {
570         u32 *reg_cache = inter->state->regs.gpr;
571         struct opcode_i *op = &inter->op->i;
572
573         if (likely(op->rt))
574                 reg_cache[op->rt] = reg_cache[op->rs] | op->imm;
575
576         return jump_next(inter);
577 }
578
579 static u32 int_XORI(struct interpreter *inter)
580 {
581         u32 *reg_cache = inter->state->regs.gpr;
582         struct opcode_i *op = &inter->op->i;
583
584         if (likely(op->rt))
585                 reg_cache[op->rt] = reg_cache[op->rs] ^ op->imm;
586
587         return jump_next(inter);
588 }
589
590 static u32 int_LUI(struct interpreter *inter)
591 {
592         struct opcode_i *op = &inter->op->i;
593
594         inter->state->regs.gpr[op->rt] = op->imm << 16;
595
596         return jump_next(inter);
597 }
598
599 static u32 int_io(struct interpreter *inter, bool is_load)
600 {
601         struct opcode_i *op = &inter->op->i;
602         u32 *reg_cache = inter->state->regs.gpr;
603         u32 val, *flags = NULL;
604
605         if (!inter->load_delay && inter->block)
606                 flags = &inter->op->flags;
607
608         val = lightrec_rw(inter->state, inter->op->c,
609                           reg_cache[op->rs], reg_cache[op->rt],
610                           flags, inter->block, inter->offset);
611
612         if (is_load && op->rt)
613                 reg_cache[op->rt] = val;
614
615         return jump_next(inter);
616 }
617
618 static u32 int_load(struct interpreter *inter)
619 {
620         return int_io(inter, true);
621 }
622
623 static u32 int_store(struct interpreter *inter)
624 {
625         u32 next_pc;
626
627         if (likely(!op_flag_smc(inter->op->flags)))
628                 return int_io(inter, false);
629
630         lightrec_rw(inter->state, inter->op->c,
631                     inter->state->regs.gpr[inter->op->i.rs],
632                     inter->state->regs.gpr[inter->op->i.rt],
633                     &inter->op->flags, inter->block, inter->offset);
634
635         next_pc = int_get_ds_pc(inter, 1);
636
637         /* Invalidate next PC, to force the rest of the block to be rebuilt */
638         lightrec_invalidate(inter->state, next_pc, 4);
639
640         return next_pc;
641 }
642
643 static u32 int_LWC2(struct interpreter *inter)
644 {
645         return int_io(inter, false);
646 }
647
648 static u32 int_special_SLL(struct interpreter *inter)
649 {
650         struct opcode *op = inter->op;
651         u32 rt;
652
653         if (op->opcode) { /* Handle NOPs */
654                 rt = inter->state->regs.gpr[op->r.rt];
655                 inter->state->regs.gpr[op->r.rd] = rt << op->r.imm;
656         }
657
658         return jump_next(inter);
659 }
660
661 static u32 int_special_SRL(struct interpreter *inter)
662 {
663         struct opcode *op = inter->op;
664         u32 rt = inter->state->regs.gpr[op->r.rt];
665
666         inter->state->regs.gpr[op->r.rd] = rt >> op->r.imm;
667
668         return jump_next(inter);
669 }
670
671 static u32 int_special_SRA(struct interpreter *inter)
672 {
673         struct opcode *op = inter->op;
674         s32 rt = inter->state->regs.gpr[op->r.rt];
675
676         inter->state->regs.gpr[op->r.rd] = rt >> op->r.imm;
677
678         return jump_next(inter);
679 }
680
681 static u32 int_special_SLLV(struct interpreter *inter)
682 {
683         struct opcode *op = inter->op;
684         u32 rs = inter->state->regs.gpr[op->r.rs];
685         u32 rt = inter->state->regs.gpr[op->r.rt];
686
687         inter->state->regs.gpr[op->r.rd] = rt << (rs & 0x1f);
688
689         return jump_next(inter);
690 }
691
692 static u32 int_special_SRLV(struct interpreter *inter)
693 {
694         struct opcode *op = inter->op;
695         u32 rs = inter->state->regs.gpr[op->r.rs];
696         u32 rt = inter->state->regs.gpr[op->r.rt];
697
698         inter->state->regs.gpr[op->r.rd] = rt >> (rs & 0x1f);
699
700         return jump_next(inter);
701 }
702
703 static u32 int_special_SRAV(struct interpreter *inter)
704 {
705         struct opcode *op = inter->op;
706         u32 rs = inter->state->regs.gpr[op->r.rs];
707         s32 rt = inter->state->regs.gpr[op->r.rt];
708
709         inter->state->regs.gpr[op->r.rd] = rt >> (rs & 0x1f);
710
711         return jump_next(inter);
712 }
713
714 static u32 int_syscall_break(struct interpreter *inter)
715 {
716
717         if (inter->op->r.op == OP_SPECIAL_BREAK)
718                 lightrec_set_exit_flags(inter->state, LIGHTREC_EXIT_BREAK);
719         else
720                 lightrec_set_exit_flags(inter->state, LIGHTREC_EXIT_SYSCALL);
721
722         return int_get_ds_pc(inter, 0);
723 }
724
725 static u32 int_special_MFHI(struct interpreter *inter)
726 {
727         u32 *reg_cache = inter->state->regs.gpr;
728         struct opcode_r *op = &inter->op->r;
729
730         if (likely(op->rd))
731                 reg_cache[op->rd] = reg_cache[REG_HI];
732
733         return jump_next(inter);
734 }
735
736 static u32 int_special_MTHI(struct interpreter *inter)
737 {
738         u32 *reg_cache = inter->state->regs.gpr;
739
740         reg_cache[REG_HI] = reg_cache[inter->op->r.rs];
741
742         return jump_next(inter);
743 }
744
745 static u32 int_special_MFLO(struct interpreter *inter)
746 {
747         u32 *reg_cache = inter->state->regs.gpr;
748         struct opcode_r *op = &inter->op->r;
749
750         if (likely(op->rd))
751                 reg_cache[op->rd] = reg_cache[REG_LO];
752
753         return jump_next(inter);
754 }
755
756 static u32 int_special_MTLO(struct interpreter *inter)
757 {
758         u32 *reg_cache = inter->state->regs.gpr;
759
760         reg_cache[REG_LO] = reg_cache[inter->op->r.rs];
761
762         return jump_next(inter);
763 }
764
765 static u32 int_special_MULT(struct interpreter *inter)
766 {
767         u32 *reg_cache = inter->state->regs.gpr;
768         s32 rs = reg_cache[inter->op->r.rs];
769         s32 rt = reg_cache[inter->op->r.rt];
770         u8 reg_lo = get_mult_div_lo(inter->op->c);
771         u8 reg_hi = get_mult_div_hi(inter->op->c);
772         u64 res = (s64)rs * (s64)rt;
773
774         if (!op_flag_no_hi(inter->op->flags))
775                 reg_cache[reg_hi] = res >> 32;
776         if (!op_flag_no_lo(inter->op->flags))
777                 reg_cache[reg_lo] = res;
778
779         return jump_next(inter);
780 }
781
782 static u32 int_special_MULTU(struct interpreter *inter)
783 {
784         u32 *reg_cache = inter->state->regs.gpr;
785         u32 rs = reg_cache[inter->op->r.rs];
786         u32 rt = reg_cache[inter->op->r.rt];
787         u8 reg_lo = get_mult_div_lo(inter->op->c);
788         u8 reg_hi = get_mult_div_hi(inter->op->c);
789         u64 res = (u64)rs * (u64)rt;
790
791         if (!op_flag_no_hi(inter->op->flags))
792                 reg_cache[reg_hi] = res >> 32;
793         if (!op_flag_no_lo(inter->op->flags))
794                 reg_cache[reg_lo] = res;
795
796         return jump_next(inter);
797 }
798
799 static u32 int_special_DIV(struct interpreter *inter)
800 {
801         u32 *reg_cache = inter->state->regs.gpr;
802         s32 rs = reg_cache[inter->op->r.rs];
803         s32 rt = reg_cache[inter->op->r.rt];
804         u8 reg_lo = get_mult_div_lo(inter->op->c);
805         u8 reg_hi = get_mult_div_hi(inter->op->c);
806         u32 lo, hi;
807
808         if (rt == 0) {
809                 hi = rs;
810                 lo = (rs < 0) * 2 - 1;
811         } else {
812                 lo = rs / rt;
813                 hi = rs % rt;
814         }
815
816         if (!op_flag_no_hi(inter->op->flags))
817                 reg_cache[reg_hi] = hi;
818         if (!op_flag_no_lo(inter->op->flags))
819                 reg_cache[reg_lo] = lo;
820
821         return jump_next(inter);
822 }
823
824 static u32 int_special_DIVU(struct interpreter *inter)
825 {
826         u32 *reg_cache = inter->state->regs.gpr;
827         u32 rs = reg_cache[inter->op->r.rs];
828         u32 rt = reg_cache[inter->op->r.rt];
829         u8 reg_lo = get_mult_div_lo(inter->op->c);
830         u8 reg_hi = get_mult_div_hi(inter->op->c);
831         u32 lo, hi;
832
833         if (rt == 0) {
834                 hi = rs;
835                 lo = (u32)-1;
836         } else {
837                 lo = rs / rt;
838                 hi = rs % rt;
839         }
840
841         if (!op_flag_no_hi(inter->op->flags))
842                 reg_cache[reg_hi] = hi;
843         if (!op_flag_no_lo(inter->op->flags))
844                 reg_cache[reg_lo] = lo;
845
846         return jump_next(inter);
847 }
848
849 static u32 int_special_ADD(struct interpreter *inter)
850 {
851         u32 *reg_cache = inter->state->regs.gpr;
852         struct opcode_r *op = &inter->op->r;
853         s32 rs = reg_cache[op->rs];
854         s32 rt = reg_cache[op->rt];
855
856         if (likely(op->rd))
857                 reg_cache[op->rd] = rs + rt;
858
859         return jump_next(inter);
860 }
861
862 static u32 int_special_SUB(struct interpreter *inter)
863 {
864         u32 *reg_cache = inter->state->regs.gpr;
865         struct opcode_r *op = &inter->op->r;
866         u32 rs = reg_cache[op->rs];
867         u32 rt = reg_cache[op->rt];
868
869         if (likely(op->rd))
870                 reg_cache[op->rd] = rs - rt;
871
872         return jump_next(inter);
873 }
874
875 static u32 int_special_AND(struct interpreter *inter)
876 {
877         u32 *reg_cache = inter->state->regs.gpr;
878         struct opcode_r *op = &inter->op->r;
879         u32 rs = reg_cache[op->rs];
880         u32 rt = reg_cache[op->rt];
881
882         if (likely(op->rd))
883                 reg_cache[op->rd] = rs & rt;
884
885         return jump_next(inter);
886 }
887
888 static u32 int_special_OR(struct interpreter *inter)
889 {
890         u32 *reg_cache = inter->state->regs.gpr;
891         struct opcode_r *op = &inter->op->r;
892         u32 rs = reg_cache[op->rs];
893         u32 rt = reg_cache[op->rt];
894
895         if (likely(op->rd))
896                 reg_cache[op->rd] = rs | rt;
897
898         return jump_next(inter);
899 }
900
901 static u32 int_special_XOR(struct interpreter *inter)
902 {
903         u32 *reg_cache = inter->state->regs.gpr;
904         struct opcode_r *op = &inter->op->r;
905         u32 rs = reg_cache[op->rs];
906         u32 rt = reg_cache[op->rt];
907
908         if (likely(op->rd))
909                 reg_cache[op->rd] = rs ^ rt;
910
911         return jump_next(inter);
912 }
913
914 static u32 int_special_NOR(struct interpreter *inter)
915 {
916         u32 *reg_cache = inter->state->regs.gpr;
917         struct opcode_r *op = &inter->op->r;
918         u32 rs = reg_cache[op->rs];
919         u32 rt = reg_cache[op->rt];
920
921         if (likely(op->rd))
922                 reg_cache[op->rd] = ~(rs | rt);
923
924         return jump_next(inter);
925 }
926
927 static u32 int_special_SLT(struct interpreter *inter)
928 {
929         u32 *reg_cache = inter->state->regs.gpr;
930         struct opcode_r *op = &inter->op->r;
931         s32 rs = reg_cache[op->rs];
932         s32 rt = reg_cache[op->rt];
933
934         if (likely(op->rd))
935                 reg_cache[op->rd] = rs < rt;
936
937         return jump_next(inter);
938 }
939
940 static u32 int_special_SLTU(struct interpreter *inter)
941 {
942         u32 *reg_cache = inter->state->regs.gpr;
943         struct opcode_r *op = &inter->op->r;
944         u32 rs = reg_cache[op->rs];
945         u32 rt = reg_cache[op->rt];
946
947         if (likely(op->rd))
948                 reg_cache[op->rd] = rs < rt;
949
950         return jump_next(inter);
951 }
952
953 static u32 int_META_MOV(struct interpreter *inter)
954 {
955         u32 *reg_cache = inter->state->regs.gpr;
956         struct opcode_m *op = &inter->op->m;
957
958         if (likely(op->rd))
959                 reg_cache[op->rd] = reg_cache[op->rs];
960
961         return jump_next(inter);
962 }
963
964 static u32 int_META_EXTC(struct interpreter *inter)
965 {
966         u32 *reg_cache = inter->state->regs.gpr;
967         struct opcode_m *op = &inter->op->m;
968
969         if (likely(op->rd))
970                 reg_cache[op->rd] = (u32)(s32)(s8)reg_cache[op->rs];
971
972         return jump_next(inter);
973 }
974
975 static u32 int_META_EXTS(struct interpreter *inter)
976 {
977         u32 *reg_cache = inter->state->regs.gpr;
978         struct opcode_m *op = &inter->op->m;
979
980         if (likely(op->rd))
981                 reg_cache[op->rd] = (u32)(s32)(s16)reg_cache[op->rs];
982
983         return jump_next(inter);
984 }
985
986 static u32 int_META_MULT2(struct interpreter *inter)
987 {
988         u32 *reg_cache = inter->state->regs.gpr;
989         union code c = inter->op->c;
990         u32 rs = reg_cache[c.r.rs];
991         u8 reg_lo = get_mult_div_lo(c);
992         u8 reg_hi = get_mult_div_hi(c);
993
994         if (!op_flag_no_lo(inter->op->flags)) {
995                 if (c.r.op < 32)
996                         reg_cache[reg_lo] = rs << c.r.op;
997                 else
998                         reg_cache[reg_lo] = 0;
999         }
1000
1001         if (!op_flag_no_hi(inter->op->flags)) {
1002                 if (c.r.op >= 32) {
1003                         reg_cache[reg_hi] = rs << (c.r.op - 32);
1004                 }
1005                 else if (c.i.op == OP_META_MULT2) {
1006                         if (c.r.op)
1007                                 reg_cache[reg_hi] = (s32) rs >> (32 - c.r.op);
1008                         else
1009                                 reg_cache[reg_hi] = (s32) rs >> 31;
1010                 } else {
1011                         if (c.r.op)
1012                                 reg_cache[reg_hi] = rs >> (32 - c.r.op);
1013                         else
1014                                 reg_cache[reg_hi] = 0;
1015                 }
1016         }
1017
1018         return jump_next(inter);
1019 }
1020
1021 static u32 int_META_COM(struct interpreter *inter)
1022 {
1023         u32 *reg_cache = inter->state->regs.gpr;
1024         union code c = inter->op->c;
1025
1026         if (likely(c.m.rd))
1027                 reg_cache[c.m.rd] = ~reg_cache[c.m.rs];
1028
1029         return jump_next(inter);
1030 }
1031
1032 static const lightrec_int_func_t int_standard[64] = {
1033         SET_DEFAULT_ELM(int_standard, int_unimplemented),
1034         [OP_SPECIAL]            = int_SPECIAL,
1035         [OP_REGIMM]             = int_REGIMM,
1036         [OP_J]                  = int_J,
1037         [OP_JAL]                = int_JAL,
1038         [OP_BEQ]                = int_BEQ,
1039         [OP_BNE]                = int_BNE,
1040         [OP_BLEZ]               = int_BLEZ,
1041         [OP_BGTZ]               = int_BGTZ,
1042         [OP_ADDI]               = int_ADDI,
1043         [OP_ADDIU]              = int_ADDI,
1044         [OP_SLTI]               = int_SLTI,
1045         [OP_SLTIU]              = int_SLTIU,
1046         [OP_ANDI]               = int_ANDI,
1047         [OP_ORI]                = int_ORI,
1048         [OP_XORI]               = int_XORI,
1049         [OP_LUI]                = int_LUI,
1050         [OP_CP0]                = int_CP0,
1051         [OP_CP2]                = int_CP2,
1052         [OP_LB]                 = int_load,
1053         [OP_LH]                 = int_load,
1054         [OP_LWL]                = int_load,
1055         [OP_LW]                 = int_load,
1056         [OP_LBU]                = int_load,
1057         [OP_LHU]                = int_load,
1058         [OP_LWR]                = int_load,
1059         [OP_SB]                 = int_store,
1060         [OP_SH]                 = int_store,
1061         [OP_SWL]                = int_store,
1062         [OP_SW]                 = int_store,
1063         [OP_SWR]                = int_store,
1064         [OP_LWC2]               = int_LWC2,
1065         [OP_SWC2]               = int_store,
1066
1067         [OP_META]               = int_META,
1068         [OP_META_MULT2]         = int_META_MULT2,
1069         [OP_META_MULTU2]        = int_META_MULT2,
1070         [OP_META_LWU]           = int_load,
1071         [OP_META_SWU]           = int_store,
1072 };
1073
1074 static const lightrec_int_func_t int_special[64] = {
1075         SET_DEFAULT_ELM(int_special, int_unimplemented),
1076         [OP_SPECIAL_SLL]        = int_special_SLL,
1077         [OP_SPECIAL_SRL]        = int_special_SRL,
1078         [OP_SPECIAL_SRA]        = int_special_SRA,
1079         [OP_SPECIAL_SLLV]       = int_special_SLLV,
1080         [OP_SPECIAL_SRLV]       = int_special_SRLV,
1081         [OP_SPECIAL_SRAV]       = int_special_SRAV,
1082         [OP_SPECIAL_JR]         = int_special_JR,
1083         [OP_SPECIAL_JALR]       = int_special_JALR,
1084         [OP_SPECIAL_SYSCALL]    = int_syscall_break,
1085         [OP_SPECIAL_BREAK]      = int_syscall_break,
1086         [OP_SPECIAL_MFHI]       = int_special_MFHI,
1087         [OP_SPECIAL_MTHI]       = int_special_MTHI,
1088         [OP_SPECIAL_MFLO]       = int_special_MFLO,
1089         [OP_SPECIAL_MTLO]       = int_special_MTLO,
1090         [OP_SPECIAL_MULT]       = int_special_MULT,
1091         [OP_SPECIAL_MULTU]      = int_special_MULTU,
1092         [OP_SPECIAL_DIV]        = int_special_DIV,
1093         [OP_SPECIAL_DIVU]       = int_special_DIVU,
1094         [OP_SPECIAL_ADD]        = int_special_ADD,
1095         [OP_SPECIAL_ADDU]       = int_special_ADD,
1096         [OP_SPECIAL_SUB]        = int_special_SUB,
1097         [OP_SPECIAL_SUBU]       = int_special_SUB,
1098         [OP_SPECIAL_AND]        = int_special_AND,
1099         [OP_SPECIAL_OR]         = int_special_OR,
1100         [OP_SPECIAL_XOR]        = int_special_XOR,
1101         [OP_SPECIAL_NOR]        = int_special_NOR,
1102         [OP_SPECIAL_SLT]        = int_special_SLT,
1103         [OP_SPECIAL_SLTU]       = int_special_SLTU,
1104 };
1105
1106 static const lightrec_int_func_t int_regimm[64] = {
1107         SET_DEFAULT_ELM(int_regimm, int_unimplemented),
1108         [OP_REGIMM_BLTZ]        = int_regimm_BLTZ,
1109         [OP_REGIMM_BGEZ]        = int_regimm_BGEZ,
1110         [OP_REGIMM_BLTZAL]      = int_regimm_BLTZAL,
1111         [OP_REGIMM_BGEZAL]      = int_regimm_BGEZAL,
1112 };
1113
1114 static const lightrec_int_func_t int_cp0[64] = {
1115         SET_DEFAULT_ELM(int_cp0, int_CP),
1116         [OP_CP0_MFC0]           = int_cfc,
1117         [OP_CP0_CFC0]           = int_cfc,
1118         [OP_CP0_MTC0]           = int_ctc,
1119         [OP_CP0_CTC0]           = int_ctc,
1120         [OP_CP0_RFE]            = int_cp0_RFE,
1121 };
1122
1123 static const lightrec_int_func_t int_cp2_basic[64] = {
1124         SET_DEFAULT_ELM(int_cp2_basic, int_CP),
1125         [OP_CP2_BASIC_MFC2]     = int_cfc,
1126         [OP_CP2_BASIC_CFC2]     = int_cfc,
1127         [OP_CP2_BASIC_MTC2]     = int_ctc,
1128         [OP_CP2_BASIC_CTC2]     = int_ctc,
1129 };
1130
1131 static const lightrec_int_func_t int_meta[64] = {
1132         SET_DEFAULT_ELM(int_meta, int_unimplemented),
1133         [OP_META_MOV]           = int_META_MOV,
1134         [OP_META_EXTC]          = int_META_EXTC,
1135         [OP_META_EXTS]          = int_META_EXTS,
1136         [OP_META_COM]           = int_META_COM,
1137 };
1138
1139 static u32 int_SPECIAL(struct interpreter *inter)
1140 {
1141         lightrec_int_func_t f = int_special[inter->op->r.op];
1142
1143         if (!HAS_DEFAULT_ELM && unlikely(!f))
1144                 return int_unimplemented(inter);
1145
1146         return execute(f, inter);
1147 }
1148
1149 static u32 int_REGIMM(struct interpreter *inter)
1150 {
1151         lightrec_int_func_t f = int_regimm[inter->op->r.rt];
1152
1153         if (!HAS_DEFAULT_ELM && unlikely(!f))
1154                 return int_unimplemented(inter);
1155
1156         return execute(f, inter);
1157 }
1158
1159 static u32 int_CP0(struct interpreter *inter)
1160 {
1161         lightrec_int_func_t f = int_cp0[inter->op->r.rs];
1162
1163         if (!HAS_DEFAULT_ELM && unlikely(!f))
1164                 return int_CP(inter);
1165
1166         return execute(f, inter);
1167 }
1168
1169 static u32 int_CP2(struct interpreter *inter)
1170 {
1171         if (inter->op->r.op == OP_CP2_BASIC) {
1172                 lightrec_int_func_t f = int_cp2_basic[inter->op->r.rs];
1173                 if (HAS_DEFAULT_ELM || likely(f))
1174                         return execute(f, inter);
1175         }
1176
1177         return int_CP(inter);
1178 }
1179
1180 static u32 int_META(struct interpreter *inter)
1181 {
1182         lightrec_int_func_t f = int_meta[inter->op->m.op];
1183
1184         if (!HAS_DEFAULT_ELM && unlikely(!f))
1185                 return int_unimplemented(inter);
1186
1187         return execute(f, inter);
1188 }
1189
1190 static u32 lightrec_emulate_block_list(struct lightrec_state *state,
1191                                        struct block *block, u32 offset)
1192 {
1193         struct interpreter inter = {
1194                 .block = block,
1195                 .state = state,
1196                 .offset = offset,
1197                 .op = &block->opcode_list[offset],
1198         };
1199         u32 pc;
1200
1201         pc = lightrec_int_op(&inter);
1202
1203         /* Add the cycles of the last branch */
1204         inter.cycles += lightrec_cycles_of_opcode(inter.state, inter.op->c);
1205
1206         state->current_cycle += inter.cycles;
1207
1208         return pc;
1209 }
1210
1211 u32 lightrec_emulate_block(struct lightrec_state *state, struct block *block, u32 pc)
1212 {
1213         u32 offset = (kunseg(pc) - kunseg(block->pc)) >> 2;
1214
1215         if (offset < block->nb_ops)
1216                 return lightrec_emulate_block_list(state, block, offset);
1217
1218         pr_err(PC_FMT" is outside block at "PC_FMT"\n", pc, block->pc);
1219
1220         lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT);
1221
1222         return 0;
1223 }
1224
1225 static u32 branch_get_next_pc(struct lightrec_state *state, union code c, u32 pc)
1226 {
1227         switch (c.i.op) {
1228         case OP_SPECIAL:
1229                 /* JR / JALR */
1230                 return state->regs.gpr[c.r.rs];
1231         case OP_J:
1232         case OP_JAL:
1233                 return (pc & 0xf0000000) | (c.j.imm << 2);
1234         default:
1235                 /* Branch opcodes */
1236                 return pc + 4 + ((s16)c.i.imm << 2);
1237         }
1238 }
1239
1240 u32 lightrec_handle_load_delay(struct lightrec_state *state,
1241                                struct block *block, u32 pc, u32 reg)
1242 {
1243         union code c = lightrec_read_opcode(state, pc);
1244         struct opcode op[2] = {
1245                 {
1246                         .c = c,
1247                         .flags = 0,
1248                 },
1249                 {
1250                         .flags = 0,
1251                 },
1252         };
1253         struct interpreter inter = {
1254                 .block = block,
1255                 .state = state,
1256                 .op = op,
1257                 .load_delay = true,
1258         };
1259         bool branch_taken;
1260         u32 reg_mask, next_pc;
1261
1262         if (has_delay_slot(c)) {
1263                 op[1].c = lightrec_read_opcode(state, pc + 4);
1264
1265                 branch_taken = is_branch_taken(state->regs.gpr, c);
1266                 next_pc = branch_get_next_pc(state, c, pc);
1267
1268                 /* Branch was evaluated, we can write the load opcode's target
1269                  * register now. */
1270                 state->regs.gpr[reg] = state->temp_reg;
1271
1272                 /* Handle JALR / regimm opcodes setting $ra (or any other
1273                  * register in the case of JALR) */
1274                 reg_mask = (u32)opcode_write_mask(c);
1275                 if (reg_mask)
1276                         state->regs.gpr[ctz32(reg_mask)] = pc + 8;
1277
1278                 /* Handle delay slot of the branch opcode */
1279                 pc = int_delay_slot(&inter, next_pc, branch_taken);
1280         } else {
1281                 /* Make sure we only run one instruction */
1282                 inter.delay_slot = true;
1283
1284                 lightrec_int_op(&inter);
1285                 pc += 4;
1286
1287                 if (!opcode_writes_register(c, reg))
1288                         state->regs.gpr[reg] = state->temp_reg;
1289         }
1290
1291         state->current_cycle += inter.cycles;
1292
1293         return pc;
1294 }