cdrom: adjust a timing hack
[pcsx_rearmed.git] / deps / lightrec / optimizer.c
CommitLineData
98fa08a5 1// SPDX-License-Identifier: LGPL-2.1-or-later
d16005f8 2/*
98fa08a5 3 * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
d16005f8
PC
4 */
5
98fa08a5 6#include "lightrec-config.h"
d16005f8
PC
7#include "disassembler.h"
8#include "lightrec.h"
9#include "memmanager.h"
10#include "optimizer.h"
11#include "regcache.h"
12
13#include <errno.h>
14#include <stdbool.h>
15#include <stdlib.h>
98fa08a5
PC
16#include <string.h>
17
18#define IF_OPT(opt, ptr) ((opt) ? (ptr) : NULL)
d16005f8
PC
19
20struct optimizer_list {
21 void (**optimizers)(struct opcode *);
22 unsigned int nb_optimizers;
23};
24
98fa08a5
PC
25static bool is_nop(union code op);
26
27bool is_unconditional_jump(union code c)
28{
29 switch (c.i.op) {
30 case OP_SPECIAL:
31 return c.r.op == OP_SPECIAL_JR || c.r.op == OP_SPECIAL_JALR;
32 case OP_J:
33 case OP_JAL:
34 return true;
35 case OP_BEQ:
36 case OP_BLEZ:
37 return c.i.rs == c.i.rt;
38 case OP_REGIMM:
39 return (c.r.rt == OP_REGIMM_BGEZ ||
40 c.r.rt == OP_REGIMM_BGEZAL) && c.i.rs == 0;
41 default:
42 return false;
43 }
44}
45
46bool is_syscall(union code c)
47{
48 return (c.i.op == OP_SPECIAL && c.r.op == OP_SPECIAL_SYSCALL) ||
49 (c.i.op == OP_CP0 && (c.r.rs == OP_CP0_MTC0 ||
50 c.r.rs == OP_CP0_CTC0) &&
51 (c.r.rd == 12 || c.r.rd == 13));
52}
53
54static u64 opcode_read_mask(union code op)
d16005f8
PC
55{
56 switch (op.i.op) {
57 case OP_SPECIAL:
58 switch (op.r.op) {
59 case OP_SPECIAL_SYSCALL:
60 case OP_SPECIAL_BREAK:
98fa08a5 61 return 0;
d16005f8
PC
62 case OP_SPECIAL_JR:
63 case OP_SPECIAL_JALR:
64 case OP_SPECIAL_MTHI:
65 case OP_SPECIAL_MTLO:
98fa08a5 66 return BIT(op.r.rs);
d16005f8 67 case OP_SPECIAL_MFHI:
98fa08a5 68 return BIT(REG_HI);
d16005f8 69 case OP_SPECIAL_MFLO:
98fa08a5 70 return BIT(REG_LO);
d16005f8 71 case OP_SPECIAL_SLL:
03535202
PC
72 if (!op.r.imm)
73 return 0;
74 fallthrough;
d16005f8
PC
75 case OP_SPECIAL_SRL:
76 case OP_SPECIAL_SRA:
98fa08a5 77 return BIT(op.r.rt);
d16005f8 78 default:
98fa08a5 79 return BIT(op.r.rs) | BIT(op.r.rt);
d16005f8
PC
80 }
81 case OP_CP0:
82 switch (op.r.rs) {
83 case OP_CP0_MTC0:
84 case OP_CP0_CTC0:
98fa08a5 85 return BIT(op.r.rt);
d16005f8 86 default:
98fa08a5 87 return 0;
d16005f8
PC
88 }
89 case OP_CP2:
90 if (op.r.op == OP_CP2_BASIC) {
91 switch (op.r.rs) {
92 case OP_CP2_BASIC_MTC2:
93 case OP_CP2_BASIC_CTC2:
98fa08a5 94 return BIT(op.r.rt);
d16005f8 95 default:
98fa08a5 96 break;
d16005f8 97 }
d16005f8 98 }
98fa08a5 99 return 0;
d16005f8
PC
100 case OP_J:
101 case OP_JAL:
102 case OP_LUI:
98fa08a5 103 return 0;
d16005f8 104 case OP_BEQ:
03535202
PC
105 if (op.i.rs == op.i.rt)
106 return 0;
107 fallthrough;
d16005f8
PC
108 case OP_BNE:
109 case OP_LWL:
110 case OP_LWR:
111 case OP_SB:
112 case OP_SH:
113 case OP_SWL:
114 case OP_SW:
115 case OP_SWR:
98fa08a5 116 return BIT(op.i.rs) | BIT(op.i.rt);
d16005f8 117 default:
98fa08a5 118 return BIT(op.i.rs);
d16005f8
PC
119 }
120}
121
ba3814c1 122static u64 mult_div_write_mask(union code op)
d16005f8 123{
98fa08a5
PC
124 u64 flags;
125
ba3814c1
PC
126 if (!OPT_FLAG_MULT_DIV)
127 return BIT(REG_LO) | BIT(REG_HI);
128
129 if (op.r.rd)
130 flags = BIT(op.r.rd);
131 else
132 flags = BIT(REG_LO);
133 if (op.r.imm)
134 flags |= BIT(op.r.imm);
135 else
136 flags |= BIT(REG_HI);
137
138 return flags;
139}
140
141static u64 opcode_write_mask(union code op)
142{
d16005f8 143 switch (op.i.op) {
ba3814c1
PC
144 case OP_META_MULT2:
145 case OP_META_MULTU2:
146 return mult_div_write_mask(op);
d16005f8
PC
147 case OP_SPECIAL:
148 switch (op.r.op) {
149 case OP_SPECIAL_JR:
d16005f8
PC
150 case OP_SPECIAL_SYSCALL:
151 case OP_SPECIAL_BREAK:
98fa08a5 152 return 0;
d16005f8
PC
153 case OP_SPECIAL_MULT:
154 case OP_SPECIAL_MULTU:
155 case OP_SPECIAL_DIV:
156 case OP_SPECIAL_DIVU:
ba3814c1 157 return mult_div_write_mask(op);
d16005f8 158 case OP_SPECIAL_MTHI:
98fa08a5 159 return BIT(REG_HI);
d16005f8 160 case OP_SPECIAL_MTLO:
98fa08a5 161 return BIT(REG_LO);
03535202
PC
162 case OP_SPECIAL_SLL:
163 if (!op.r.imm)
164 return 0;
165 fallthrough;
d16005f8 166 default:
98fa08a5 167 return BIT(op.r.rd);
d16005f8
PC
168 }
169 case OP_ADDI:
170 case OP_ADDIU:
171 case OP_SLTI:
172 case OP_SLTIU:
173 case OP_ANDI:
174 case OP_ORI:
175 case OP_XORI:
176 case OP_LUI:
177 case OP_LB:
178 case OP_LH:
179 case OP_LWL:
180 case OP_LW:
181 case OP_LBU:
182 case OP_LHU:
183 case OP_LWR:
fdf33147
PC
184 case OP_META_EXTC:
185 case OP_META_EXTS:
98fa08a5
PC
186 return BIT(op.i.rt);
187 case OP_JAL:
188 return BIT(31);
d16005f8
PC
189 case OP_CP0:
190 switch (op.r.rs) {
191 case OP_CP0_MFC0:
192 case OP_CP0_CFC0:
98fa08a5 193 return BIT(op.i.rt);
d16005f8 194 default:
98fa08a5 195 return 0;
d16005f8
PC
196 }
197 case OP_CP2:
198 if (op.r.op == OP_CP2_BASIC) {
199 switch (op.r.rs) {
200 case OP_CP2_BASIC_MFC2:
201 case OP_CP2_BASIC_CFC2:
98fa08a5 202 return BIT(op.i.rt);
d16005f8 203 default:
98fa08a5 204 break;
d16005f8 205 }
98fa08a5
PC
206 }
207 return 0;
208 case OP_REGIMM:
209 switch (op.r.rt) {
210 case OP_REGIMM_BLTZAL:
211 case OP_REGIMM_BGEZAL:
212 return BIT(31);
213 default:
214 return 0;
d16005f8
PC
215 }
216 case OP_META_MOV:
98fa08a5 217 return BIT(op.r.rd);
d16005f8 218 default:
98fa08a5
PC
219 return 0;
220 }
221}
222
223bool opcode_reads_register(union code op, u8 reg)
224{
225 return opcode_read_mask(op) & BIT(reg);
226}
227
228bool opcode_writes_register(union code op, u8 reg)
229{
230 return opcode_write_mask(op) & BIT(reg);
231}
232
233static int find_prev_writer(const struct opcode *list, unsigned int offset, u8 reg)
234{
235 union code c;
236 unsigned int i;
237
03535202 238 if (op_flag_sync(list[offset].flags))
98fa08a5
PC
239 return -1;
240
241 for (i = offset; i > 0; i--) {
242 c = list[i - 1].c;
243
244 if (opcode_writes_register(c, reg)) {
245 if (i > 1 && has_delay_slot(list[i - 2].c))
246 break;
247
248 return i - 1;
249 }
250
03535202 251 if (op_flag_sync(list[i - 1].flags) ||
98fa08a5
PC
252 has_delay_slot(c) ||
253 opcode_reads_register(c, reg))
254 break;
255 }
256
257 return -1;
258}
259
260static int find_next_reader(const struct opcode *list, unsigned int offset, u8 reg)
261{
262 unsigned int i;
263 union code c;
264
03535202 265 if (op_flag_sync(list[offset].flags))
98fa08a5
PC
266 return -1;
267
268 for (i = offset; ; i++) {
269 c = list[i].c;
270
271 if (opcode_reads_register(c, reg)) {
272 if (i > 0 && has_delay_slot(list[i - 1].c))
273 break;
274
275 return i;
276 }
277
03535202 278 if (op_flag_sync(list[i].flags) ||
98fa08a5
PC
279 has_delay_slot(c) || opcode_writes_register(c, reg))
280 break;
281 }
282
283 return -1;
284}
285
286static bool reg_is_dead(const struct opcode *list, unsigned int offset, u8 reg)
287{
288 unsigned int i;
289
03535202 290 if (op_flag_sync(list[offset].flags))
d16005f8 291 return false;
98fa08a5
PC
292
293 for (i = offset + 1; ; i++) {
294 if (opcode_reads_register(list[i].c, reg))
295 return false;
296
297 if (opcode_writes_register(list[i].c, reg))
298 return true;
299
300 if (has_delay_slot(list[i].c)) {
03535202 301 if (op_flag_no_ds(list[i].flags) ||
22eee2ac 302 opcode_reads_register(list[i + 1].c, reg))
98fa08a5
PC
303 return false;
304
305 return opcode_writes_register(list[i + 1].c, reg);
306 }
d16005f8
PC
307 }
308}
309
98fa08a5
PC
310static bool reg_is_read(const struct opcode *list,
311 unsigned int a, unsigned int b, u8 reg)
312{
313 /* Return true if reg is read in one of the opcodes of the interval
314 * [a, b[ */
315 for (; a < b; a++) {
316 if (!is_nop(list[a].c) && opcode_reads_register(list[a].c, reg))
317 return true;
318 }
319
320 return false;
321}
322
323static bool reg_is_written(const struct opcode *list,
324 unsigned int a, unsigned int b, u8 reg)
325{
326 /* Return true if reg is written in one of the opcodes of the interval
327 * [a, b[ */
328
329 for (; a < b; a++) {
330 if (!is_nop(list[a].c) && opcode_writes_register(list[a].c, reg))
331 return true;
332 }
333
334 return false;
335}
336
337static bool reg_is_read_or_written(const struct opcode *list,
338 unsigned int a, unsigned int b, u8 reg)
339{
340 return reg_is_read(list, a, b, reg) || reg_is_written(list, a, b, reg);
341}
342
343static bool opcode_is_load(union code op)
344{
345 switch (op.i.op) {
346 case OP_LB:
347 case OP_LH:
348 case OP_LWL:
349 case OP_LW:
350 case OP_LBU:
351 case OP_LHU:
352 case OP_LWR:
353 case OP_LWC2:
354 return true;
355 default:
356 return false;
357 }
358}
359
360static bool opcode_is_store(union code op)
361{
362 switch (op.i.op) {
363 case OP_SB:
364 case OP_SH:
365 case OP_SW:
366 case OP_SWL:
367 case OP_SWR:
368 case OP_SWC2:
369 return true;
370 default:
371 return false;
372 }
373}
374
ba3814c1
PC
375static u8 opcode_get_io_size(union code op)
376{
377 switch (op.i.op) {
378 case OP_LB:
379 case OP_LBU:
380 case OP_SB:
381 return 8;
382 case OP_LH:
383 case OP_LHU:
384 case OP_SH:
385 return 16;
386 default:
387 return 32;
388 }
389}
390
98fa08a5
PC
391bool opcode_is_io(union code op)
392{
393 return opcode_is_load(op) || opcode_is_store(op);
394}
395
d16005f8
PC
396/* TODO: Complete */
397static bool is_nop(union code op)
398{
399 if (opcode_writes_register(op, 0)) {
400 switch (op.i.op) {
401 case OP_CP0:
402 return op.r.rs != OP_CP0_MFC0;
403 case OP_LB:
404 case OP_LH:
405 case OP_LWL:
406 case OP_LW:
407 case OP_LBU:
408 case OP_LHU:
409 case OP_LWR:
410 return false;
411 default:
412 return true;
413 }
414 }
415
416 switch (op.i.op) {
417 case OP_SPECIAL:
418 switch (op.r.op) {
419 case OP_SPECIAL_AND:
420 return op.r.rd == op.r.rt && op.r.rd == op.r.rs;
421 case OP_SPECIAL_ADD:
422 case OP_SPECIAL_ADDU:
423 return (op.r.rd == op.r.rt && op.r.rs == 0) ||
424 (op.r.rd == op.r.rs && op.r.rt == 0);
425 case OP_SPECIAL_SUB:
426 case OP_SPECIAL_SUBU:
427 return op.r.rd == op.r.rs && op.r.rt == 0;
428 case OP_SPECIAL_OR:
429 if (op.r.rd == op.r.rt)
430 return op.r.rd == op.r.rs || op.r.rs == 0;
431 else
432 return (op.r.rd == op.r.rs) && op.r.rt == 0;
433 case OP_SPECIAL_SLL:
434 case OP_SPECIAL_SRA:
435 case OP_SPECIAL_SRL:
436 return op.r.rd == op.r.rt && op.r.imm == 0;
98fa08a5
PC
437 case OP_SPECIAL_MFHI:
438 case OP_SPECIAL_MFLO:
439 return op.r.rd == 0;
d16005f8
PC
440 default:
441 return false;
442 }
443 case OP_ORI:
444 case OP_ADDI:
445 case OP_ADDIU:
446 return op.i.rt == op.i.rs && op.i.imm == 0;
447 case OP_BGTZ:
448 return (op.i.rs == 0 || op.i.imm == 1);
449 case OP_REGIMM:
450 return (op.i.op == OP_REGIMM_BLTZ ||
451 op.i.op == OP_REGIMM_BLTZAL) &&
452 (op.i.rs == 0 || op.i.imm == 1);
453 case OP_BNE:
454 return (op.i.rs == op.i.rt || op.i.imm == 1);
455 default:
456 return false;
457 }
458}
459
460bool load_in_delay_slot(union code op)
461{
462 switch (op.i.op) {
463 case OP_CP0:
464 switch (op.r.rs) {
465 case OP_CP0_MFC0:
466 case OP_CP0_CFC0:
467 return true;
468 default:
469 break;
470 }
471
472 break;
473 case OP_CP2:
474 if (op.r.op == OP_CP2_BASIC) {
475 switch (op.r.rs) {
476 case OP_CP2_BASIC_MFC2:
477 case OP_CP2_BASIC_CFC2:
478 return true;
479 default:
480 break;
481 }
482 }
483
484 break;
485 case OP_LB:
486 case OP_LH:
487 case OP_LW:
488 case OP_LWL:
489 case OP_LWR:
490 case OP_LBU:
491 case OP_LHU:
492 return true;
493 default:
494 break;
495 }
496
497 return false;
498}
499
22eee2ac
PC
500static u32 lightrec_propagate_consts(const struct opcode *op,
501 const struct opcode *prev,
502 u32 known, u32 *v)
d16005f8 503{
22eee2ac 504 union code c = prev->c;
98fa08a5 505
fd58fa32
PC
506 /* Register $zero is always, well, zero */
507 known |= BIT(0);
508 v[0] = 0;
509
03535202 510 if (op_flag_sync(op->flags))
22eee2ac 511 return BIT(0);
98fa08a5 512
d16005f8
PC
513 switch (c.i.op) {
514 case OP_SPECIAL:
515 switch (c.r.op) {
516 case OP_SPECIAL_SLL:
517 if (known & BIT(c.r.rt)) {
518 known |= BIT(c.r.rd);
519 v[c.r.rd] = v[c.r.rt] << c.r.imm;
520 } else {
521 known &= ~BIT(c.r.rd);
522 }
523 break;
524 case OP_SPECIAL_SRL:
525 if (known & BIT(c.r.rt)) {
526 known |= BIT(c.r.rd);
527 v[c.r.rd] = v[c.r.rt] >> c.r.imm;
528 } else {
529 known &= ~BIT(c.r.rd);
530 }
531 break;
532 case OP_SPECIAL_SRA:
533 if (known & BIT(c.r.rt)) {
534 known |= BIT(c.r.rd);
535 v[c.r.rd] = (s32)v[c.r.rt] >> c.r.imm;
536 } else {
537 known &= ~BIT(c.r.rd);
538 }
539 break;
540 case OP_SPECIAL_SLLV:
541 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
542 known |= BIT(c.r.rd);
543 v[c.r.rd] = v[c.r.rt] << (v[c.r.rs] & 0x1f);
544 } else {
545 known &= ~BIT(c.r.rd);
546 }
547 break;
548 case OP_SPECIAL_SRLV:
549 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
550 known |= BIT(c.r.rd);
551 v[c.r.rd] = v[c.r.rt] >> (v[c.r.rs] & 0x1f);
552 } else {
553 known &= ~BIT(c.r.rd);
554 }
555 break;
556 case OP_SPECIAL_SRAV:
557 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
558 known |= BIT(c.r.rd);
559 v[c.r.rd] = (s32)v[c.r.rt]
560 >> (v[c.r.rs] & 0x1f);
561 } else {
562 known &= ~BIT(c.r.rd);
563 }
564 break;
565 case OP_SPECIAL_ADD:
566 case OP_SPECIAL_ADDU:
567 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
568 known |= BIT(c.r.rd);
569 v[c.r.rd] = (s32)v[c.r.rt] + (s32)v[c.r.rs];
570 } else {
571 known &= ~BIT(c.r.rd);
572 }
573 break;
574 case OP_SPECIAL_SUB:
575 case OP_SPECIAL_SUBU:
576 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
577 known |= BIT(c.r.rd);
578 v[c.r.rd] = v[c.r.rt] - v[c.r.rs];
579 } else {
580 known &= ~BIT(c.r.rd);
581 }
582 break;
583 case OP_SPECIAL_AND:
584 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
585 known |= BIT(c.r.rd);
586 v[c.r.rd] = v[c.r.rt] & v[c.r.rs];
587 } else {
588 known &= ~BIT(c.r.rd);
589 }
590 break;
591 case OP_SPECIAL_OR:
592 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
593 known |= BIT(c.r.rd);
594 v[c.r.rd] = v[c.r.rt] | v[c.r.rs];
595 } else {
596 known &= ~BIT(c.r.rd);
597 }
598 break;
599 case OP_SPECIAL_XOR:
600 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
601 known |= BIT(c.r.rd);
602 v[c.r.rd] = v[c.r.rt] ^ v[c.r.rs];
603 } else {
604 known &= ~BIT(c.r.rd);
605 }
606 break;
607 case OP_SPECIAL_NOR:
608 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
609 known |= BIT(c.r.rd);
610 v[c.r.rd] = ~(v[c.r.rt] | v[c.r.rs]);
611 } else {
612 known &= ~BIT(c.r.rd);
613 }
614 break;
615 case OP_SPECIAL_SLT:
616 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
617 known |= BIT(c.r.rd);
618 v[c.r.rd] = (s32)v[c.r.rs] < (s32)v[c.r.rt];
619 } else {
620 known &= ~BIT(c.r.rd);
621 }
622 break;
623 case OP_SPECIAL_SLTU:
624 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
625 known |= BIT(c.r.rd);
626 v[c.r.rd] = v[c.r.rs] < v[c.r.rt];
627 } else {
628 known &= ~BIT(c.r.rd);
629 }
630 break;
ba3814c1
PC
631 case OP_SPECIAL_MULT:
632 case OP_SPECIAL_MULTU:
633 case OP_SPECIAL_DIV:
634 case OP_SPECIAL_DIVU:
635 if (OPT_FLAG_MULT_DIV && c.r.rd)
636 known &= ~BIT(c.r.rd);
637 if (OPT_FLAG_MULT_DIV && c.r.imm)
638 known &= ~BIT(c.r.imm);
639 break;
cdfa3536
PC
640 case OP_SPECIAL_MFLO:
641 case OP_SPECIAL_MFHI:
642 known &= ~BIT(c.r.rd);
643 break;
d16005f8
PC
644 default:
645 break;
646 }
647 break;
ba3814c1
PC
648 case OP_META_MULT2:
649 case OP_META_MULTU2:
650 if (OPT_FLAG_MULT_DIV && (known & BIT(c.r.rs))) {
651 if (c.r.rd) {
652 known |= BIT(c.r.rd);
653
654 if (c.r.op < 32)
655 v[c.r.rd] = v[c.r.rs] << c.r.op;
656 else
657 v[c.r.rd] = 0;
658 }
659
660 if (c.r.imm) {
661 known |= BIT(c.r.imm);
662
663 if (c.r.op >= 32)
664 v[c.r.imm] = v[c.r.rs] << (c.r.op - 32);
665 else if (c.i.op == OP_META_MULT2)
666 v[c.r.imm] = (s32) v[c.r.rs] >> (32 - c.r.op);
667 else
668 v[c.r.imm] = v[c.r.rs] >> (32 - c.r.op);
669 }
670 } else {
671 if (OPT_FLAG_MULT_DIV && c.r.rd)
672 known &= ~BIT(c.r.rd);
673 if (OPT_FLAG_MULT_DIV && c.r.imm)
674 known &= ~BIT(c.r.imm);
675 }
676 break;
d16005f8
PC
677 case OP_REGIMM:
678 break;
679 case OP_ADDI:
680 case OP_ADDIU:
681 if (known & BIT(c.i.rs)) {
682 known |= BIT(c.i.rt);
683 v[c.i.rt] = v[c.i.rs] + (s32)(s16)c.i.imm;
684 } else {
685 known &= ~BIT(c.i.rt);
686 }
687 break;
688 case OP_SLTI:
689 if (known & BIT(c.i.rs)) {
690 known |= BIT(c.i.rt);
691 v[c.i.rt] = (s32)v[c.i.rs] < (s32)(s16)c.i.imm;
692 } else {
693 known &= ~BIT(c.i.rt);
694 }
695 break;
696 case OP_SLTIU:
697 if (known & BIT(c.i.rs)) {
698 known |= BIT(c.i.rt);
699 v[c.i.rt] = v[c.i.rs] < (u32)(s32)(s16)c.i.imm;
700 } else {
701 known &= ~BIT(c.i.rt);
702 }
703 break;
704 case OP_ANDI:
705 if (known & BIT(c.i.rs)) {
706 known |= BIT(c.i.rt);
707 v[c.i.rt] = v[c.i.rs] & c.i.imm;
708 } else {
709 known &= ~BIT(c.i.rt);
710 }
711 break;
712 case OP_ORI:
713 if (known & BIT(c.i.rs)) {
714 known |= BIT(c.i.rt);
715 v[c.i.rt] = v[c.i.rs] | c.i.imm;
716 } else {
717 known &= ~BIT(c.i.rt);
718 }
719 break;
720 case OP_XORI:
721 if (known & BIT(c.i.rs)) {
722 known |= BIT(c.i.rt);
723 v[c.i.rt] = v[c.i.rs] ^ c.i.imm;
724 } else {
725 known &= ~BIT(c.i.rt);
726 }
727 break;
728 case OP_LUI:
729 known |= BIT(c.i.rt);
730 v[c.i.rt] = c.i.imm << 16;
731 break;
732 case OP_CP0:
733 switch (c.r.rs) {
734 case OP_CP0_MFC0:
735 case OP_CP0_CFC0:
736 known &= ~BIT(c.r.rt);
737 break;
738 }
739 break;
740 case OP_CP2:
741 if (c.r.op == OP_CP2_BASIC) {
742 switch (c.r.rs) {
743 case OP_CP2_BASIC_MFC2:
744 case OP_CP2_BASIC_CFC2:
745 known &= ~BIT(c.r.rt);
746 break;
747 }
748 }
749 break;
750 case OP_LB:
751 case OP_LH:
752 case OP_LWL:
753 case OP_LW:
754 case OP_LBU:
755 case OP_LHU:
756 case OP_LWR:
757 case OP_LWC2:
758 known &= ~BIT(c.i.rt);
759 break;
760 case OP_META_MOV:
761 if (known & BIT(c.r.rs)) {
762 known |= BIT(c.r.rd);
763 v[c.r.rd] = v[c.r.rs];
764 } else {
765 known &= ~BIT(c.r.rd);
766 }
767 break;
fdf33147
PC
768 case OP_META_EXTC:
769 if (known & BIT(c.i.rs)) {
770 known |= BIT(c.i.rt);
771 v[c.i.rt] = (s32)(s8)v[c.i.rs];
772 } else {
773 known &= ~BIT(c.i.rt);
774 }
775 break;
776 case OP_META_EXTS:
777 if (known & BIT(c.i.rs)) {
778 known |= BIT(c.i.rt);
779 v[c.i.rt] = (s32)(s16)v[c.i.rs];
780 } else {
781 known &= ~BIT(c.i.rt);
782 }
783 break;
d16005f8
PC
784 default:
785 break;
786 }
787
788 return known;
789}
790
98fa08a5 791static void lightrec_optimize_sll_sra(struct opcode *list, unsigned int offset)
d16005f8 792{
98fa08a5
PC
793 struct opcode *prev, *prev2 = NULL, *curr = &list[offset];
794 struct opcode *to_change, *to_nop;
795 int idx, idx2;
d16005f8 796
98fa08a5
PC
797 if (curr->r.imm != 24 && curr->r.imm != 16)
798 return;
799
800 idx = find_prev_writer(list, offset, curr->r.rt);
801 if (idx < 0)
802 return;
803
804 prev = &list[idx];
805
806 if (prev->i.op != OP_SPECIAL || prev->r.op != OP_SPECIAL_SLL ||
807 prev->r.imm != curr->r.imm || prev->r.rd != curr->r.rt)
808 return;
d16005f8 809
98fa08a5
PC
810 if (prev->r.rd != prev->r.rt && curr->r.rd != curr->r.rt) {
811 /* sll rY, rX, 16
812 * ...
813 * srl rZ, rY, 16 */
d16005f8 814
98fa08a5
PC
815 if (!reg_is_dead(list, offset, curr->r.rt) ||
816 reg_is_read_or_written(list, idx, offset, curr->r.rd))
817 return;
818
819 /* If rY is dead after the SRL, and rZ is not used after the SLL,
820 * we can change rY to rZ */
821
822 pr_debug("Detected SLL/SRA with middle temp register\n");
823 prev->r.rd = curr->r.rd;
824 curr->r.rt = prev->r.rd;
825 }
826
827 /* We got a SLL/SRA combo. If imm #16, that's a cast to u16.
828 * If imm #24 that's a cast to u8.
829 *
830 * First of all, make sure that the target register of the SLL is not
831 * read before the SRA. */
832
833 if (prev->r.rd == prev->r.rt) {
834 /* sll rX, rX, 16
835 * ...
836 * srl rY, rX, 16 */
837 to_change = curr;
838 to_nop = prev;
839
840 /* rX is used after the SRA - we cannot convert it. */
841 if (prev->r.rd != curr->r.rd && !reg_is_dead(list, offset, prev->r.rd))
842 return;
d16005f8 843 } else {
98fa08a5
PC
844 /* sll rY, rX, 16
845 * ...
846 * srl rY, rY, 16 */
847 to_change = prev;
848 to_nop = curr;
d16005f8
PC
849 }
850
98fa08a5
PC
851 idx2 = find_prev_writer(list, idx, prev->r.rt);
852 if (idx2 >= 0) {
853 /* Note that PSX games sometimes do casts after
854 * a LHU or LBU; in this case we can change the
855 * load opcode to a LH or LB, and the cast can
856 * be changed to a MOV or a simple NOP. */
857
858 prev2 = &list[idx2];
859
860 if (curr->r.rd != prev2->i.rt &&
861 !reg_is_dead(list, offset, prev2->i.rt))
862 prev2 = NULL;
863 else if (curr->r.imm == 16 && prev2->i.op == OP_LHU)
864 prev2->i.op = OP_LH;
865 else if (curr->r.imm == 24 && prev2->i.op == OP_LBU)
866 prev2->i.op = OP_LB;
867 else
868 prev2 = NULL;
869
870 if (prev2) {
871 if (curr->r.rd == prev2->i.rt) {
872 to_change->opcode = 0;
873 } else if (reg_is_dead(list, offset, prev2->i.rt) &&
874 !reg_is_read_or_written(list, idx2 + 1, offset, curr->r.rd)) {
875 /* The target register of the SRA is dead after the
876 * LBU/LHU; we can change the target register of the
877 * LBU/LHU to the one of the SRA. */
878 prev2->i.rt = curr->r.rd;
879 to_change->opcode = 0;
880 } else {
881 to_change->i.op = OP_META_MOV;
882 to_change->r.rd = curr->r.rd;
883 to_change->r.rs = prev2->i.rt;
884 }
d16005f8 885
98fa08a5
PC
886 if (to_nop->r.imm == 24)
887 pr_debug("Convert LBU+SLL+SRA to LB\n");
888 else
889 pr_debug("Convert LHU+SLL+SRA to LH\n");
890 }
891 }
892
893 if (!prev2) {
894 pr_debug("Convert SLL/SRA #%u to EXT%c\n",
895 prev->r.imm,
896 prev->r.imm == 24 ? 'C' : 'S');
897
898 if (to_change == prev) {
899 to_change->i.rs = prev->r.rt;
900 to_change->i.rt = curr->r.rd;
901 } else {
902 to_change->i.rt = curr->r.rd;
903 to_change->i.rs = prev->r.rt;
904 }
905
906 if (to_nop->r.imm == 24)
907 to_change->i.op = OP_META_EXTC;
908 else
909 to_change->i.op = OP_META_EXTS;
910 }
911
912 to_nop->opcode = 0;
d16005f8
PC
913}
914
02487de7
PC
915static void lightrec_remove_useless_lui(struct block *block, unsigned int offset,
916 u32 known, u32 *values)
917{
918 struct opcode *list = block->opcode_list,
919 *op = &block->opcode_list[offset];
920 int reader;
921
03535202 922 if (!op_flag_sync(op->flags) && (known & BIT(op->i.rt)) &&
02487de7
PC
923 values[op->i.rt] == op->i.imm << 16) {
924 pr_debug("Converting duplicated LUI to NOP\n");
925 op->opcode = 0x0;
926 return;
927 }
928
929 if (op->i.imm != 0 || op->i.rt == 0)
930 return;
931
932 reader = find_next_reader(list, offset + 1, op->i.rt);
933 if (reader <= 0)
934 return;
935
936 if (opcode_writes_register(list[reader].c, op->i.rt) ||
937 reg_is_dead(list, reader, op->i.rt)) {
938 pr_debug("Removing useless LUI 0x0\n");
939
940 if (list[reader].i.rs == op->i.rt)
941 list[reader].i.rs = 0;
942 if (list[reader].i.op == OP_SPECIAL &&
943 list[reader].i.rt == op->i.rt)
944 list[reader].i.rt = 0;
945 op->opcode = 0x0;
946 }
947}
948
949static void lightrec_modify_lui(struct block *block, unsigned int offset)
950{
951 union code c, *lui = &block->opcode_list[offset].c;
952 bool stop = false, stop_next = false;
953 unsigned int i;
954
955 for (i = offset + 1; !stop && i < block->nb_ops; i++) {
956 c = block->opcode_list[i].c;
957 stop = stop_next;
958
959 if ((opcode_is_store(c) && c.i.rt == lui->i.rt)
960 || (!opcode_is_load(c) && opcode_reads_register(c, lui->i.rt)))
961 break;
962
963 if (opcode_writes_register(c, lui->i.rt)) {
964 pr_debug("Convert LUI at offset 0x%x to kuseg\n",
965 i - 1 << 2);
966 lui->i.imm = kunseg(lui->i.imm << 16) >> 16;
967 break;
968 }
969
970 if (has_delay_slot(c))
971 stop_next = true;
972 }
973}
974
03535202
PC
975static int lightrec_transform_branches(struct lightrec_state *state,
976 struct block *block)
977{
978 struct opcode *op;
979 unsigned int i;
980 s32 offset;
981
982 for (i = 0; i < block->nb_ops; i++) {
983 op = &block->opcode_list[i];
984
985 switch (op->i.op) {
986 case OP_J:
987 /* Transform J opcode into BEQ $zero, $zero if possible. */
988 offset = (s32)((block->pc & 0xf0000000) >> 2 | op->j.imm)
989 - (s32)(block->pc >> 2) - (s32)i - 1;
990
991 if (offset == (s16)offset) {
992 pr_debug("Transform J into BEQ $zero, $zero\n");
993 op->i.op = OP_BEQ;
994 op->i.rs = 0;
995 op->i.rt = 0;
996 op->i.imm = offset;
997
998 }
ba3814c1
PC
999 fallthrough;
1000 default:
03535202
PC
1001 break;
1002 }
1003 }
1004
1005 return 0;
1006}
1007
ba3814c1
PC
1008static inline bool is_power_of_two(u32 value)
1009{
1010 return popcount32(value) == 1;
1011}
1012
98fa08a5 1013static int lightrec_transform_ops(struct lightrec_state *state, struct block *block)
d16005f8
PC
1014{
1015 struct opcode *list = block->opcode_list;
22eee2ac 1016 struct opcode *prev, *op = NULL;
98fa08a5
PC
1017 u32 known = BIT(0);
1018 u32 values[32] = { 0 };
1019 unsigned int i;
ba3814c1 1020 u8 tmp;
d16005f8 1021
98fa08a5 1022 for (i = 0; i < block->nb_ops; i++) {
22eee2ac 1023 prev = op;
98fa08a5 1024 op = &list[i];
d16005f8 1025
22eee2ac
PC
1026 if (prev)
1027 known = lightrec_propagate_consts(op, prev, known, values);
1028
d16005f8
PC
1029 /* Transform all opcodes detected as useless to real NOPs
1030 * (0x0: SLL r0, r0, #0) */
98fa08a5 1031 if (op->opcode != 0 && is_nop(op->c)) {
d16005f8 1032 pr_debug("Converting useless opcode 0x%08x to NOP\n",
98fa08a5
PC
1033 op->opcode);
1034 op->opcode = 0x0;
d16005f8
PC
1035 }
1036
98fa08a5 1037 if (!op->opcode)
d16005f8
PC
1038 continue;
1039
98fa08a5 1040 switch (op->i.op) {
d16005f8 1041 case OP_BEQ:
98fa08a5
PC
1042 if (op->i.rs == op->i.rt) {
1043 op->i.rs = 0;
1044 op->i.rt = 0;
1045 } else if (op->i.rs == 0) {
1046 op->i.rs = op->i.rt;
1047 op->i.rt = 0;
d16005f8
PC
1048 }
1049 break;
98fa08a5 1050
d16005f8 1051 case OP_BNE:
98fa08a5
PC
1052 if (op->i.rs == 0) {
1053 op->i.rs = op->i.rt;
1054 op->i.rt = 0;
1055 }
1056 break;
1057
1058 case OP_LUI:
d8b04acd
PC
1059 if (!prev || !has_delay_slot(prev->c))
1060 lightrec_modify_lui(block, i);
02487de7 1061 lightrec_remove_useless_lui(block, i, known, values);
d16005f8
PC
1062 break;
1063
1064 /* Transform ORI/ADDI/ADDIU with imm #0 or ORR/ADD/ADDU/SUB/SUBU
1065 * with register $zero to the MOV meta-opcode */
1066 case OP_ORI:
1067 case OP_ADDI:
1068 case OP_ADDIU:
98fa08a5 1069 if (op->i.imm == 0) {
d16005f8 1070 pr_debug("Convert ORI/ADDI/ADDIU #0 to MOV\n");
98fa08a5
PC
1071 op->i.op = OP_META_MOV;
1072 op->r.rd = op->i.rt;
d16005f8
PC
1073 }
1074 break;
1075 case OP_SPECIAL:
98fa08a5 1076 switch (op->r.op) {
d16005f8 1077 case OP_SPECIAL_SRA:
98fa08a5
PC
1078 if (op->r.imm == 0) {
1079 pr_debug("Convert SRA #0 to MOV\n");
1080 op->i.op = OP_META_MOV;
1081 op->r.rs = op->r.rt;
1082 break;
1083 }
1084
1085 lightrec_optimize_sll_sra(block->opcode_list, i);
1086 break;
1087 case OP_SPECIAL_SLL:
d16005f8 1088 case OP_SPECIAL_SRL:
98fa08a5
PC
1089 if (op->r.imm == 0) {
1090 pr_debug("Convert SLL/SRL #0 to MOV\n");
1091 op->i.op = OP_META_MOV;
1092 op->r.rs = op->r.rt;
d16005f8
PC
1093 }
1094 break;
ba3814c1
PC
1095 case OP_SPECIAL_MULT:
1096 case OP_SPECIAL_MULTU:
1097 if ((known & BIT(op->r.rs)) &&
1098 is_power_of_two(values[op->r.rs])) {
1099 tmp = op->c.i.rs;
1100 op->c.i.rs = op->c.i.rt;
1101 op->c.i.rt = tmp;
1102 } else if (!(known & BIT(op->r.rt)) ||
1103 !is_power_of_two(values[op->r.rt])) {
1104 break;
1105 }
1106
1107 pr_debug("Multiply by power-of-two: %u\n",
1108 values[op->r.rt]);
1109
1110 if (op->r.op == OP_SPECIAL_MULT)
1111 op->i.op = OP_META_MULT2;
1112 else
1113 op->i.op = OP_META_MULTU2;
1114
13b02197 1115 op->r.op = ctz32(values[op->r.rt]);
ba3814c1 1116 break;
d16005f8
PC
1117 case OP_SPECIAL_OR:
1118 case OP_SPECIAL_ADD:
1119 case OP_SPECIAL_ADDU:
98fa08a5 1120 if (op->r.rs == 0) {
d16005f8 1121 pr_debug("Convert OR/ADD $zero to MOV\n");
98fa08a5
PC
1122 op->i.op = OP_META_MOV;
1123 op->r.rs = op->r.rt;
d16005f8 1124 }
d8b04acd
PC
1125 fallthrough;
1126 case OP_SPECIAL_SUB:
d16005f8 1127 case OP_SPECIAL_SUBU:
98fa08a5 1128 if (op->r.rt == 0) {
d16005f8 1129 pr_debug("Convert OR/ADD/SUB $zero to MOV\n");
98fa08a5 1130 op->i.op = OP_META_MOV;
d16005f8 1131 }
d8b04acd
PC
1132 fallthrough;
1133 default:
d16005f8
PC
1134 break;
1135 }
d8b04acd
PC
1136 fallthrough;
1137 default:
d16005f8
PC
1138 break;
1139 }
1140 }
1141
1142 return 0;
1143}
1144
ba3814c1
PC
1145static bool lightrec_can_switch_delay_slot(union code op, union code next_op)
1146{
1147 switch (op.i.op) {
1148 case OP_SPECIAL:
1149 switch (op.r.op) {
1150 case OP_SPECIAL_JALR:
1151 if (opcode_reads_register(next_op, op.r.rd) ||
1152 opcode_writes_register(next_op, op.r.rd))
1153 return false;
1154 fallthrough;
1155 case OP_SPECIAL_JR:
1156 if (opcode_writes_register(next_op, op.r.rs))
1157 return false;
1158 fallthrough;
1159 default:
1160 break;
1161 }
1162 fallthrough;
1163 case OP_J:
1164 break;
1165 case OP_JAL:
1166 if (opcode_reads_register(next_op, 31) ||
1167 opcode_writes_register(next_op, 31))
1168 return false;;
1169
1170 break;
1171 case OP_BEQ:
1172 case OP_BNE:
1173 if (op.i.rt && opcode_writes_register(next_op, op.i.rt))
1174 return false;
1175 fallthrough;
1176 case OP_BLEZ:
1177 case OP_BGTZ:
1178 if (op.i.rs && opcode_writes_register(next_op, op.i.rs))
1179 return false;
1180 break;
1181 case OP_REGIMM:
1182 switch (op.r.rt) {
1183 case OP_REGIMM_BLTZAL:
1184 case OP_REGIMM_BGEZAL:
1185 if (opcode_reads_register(next_op, 31) ||
1186 opcode_writes_register(next_op, 31))
1187 return false;
1188 fallthrough;
1189 case OP_REGIMM_BLTZ:
1190 case OP_REGIMM_BGEZ:
1191 if (op.i.rs && opcode_writes_register(next_op, op.i.rs))
1192 return false;
1193 break;
1194 }
1195 fallthrough;
1196 default:
1197 break;
1198 }
1199
1200 return true;
1201}
1202
98fa08a5 1203static int lightrec_switch_delay_slots(struct lightrec_state *state, struct block *block)
d16005f8 1204{
98fa08a5
PC
1205 struct opcode *list, *next = &block->opcode_list[0];
1206 unsigned int i;
1207 union code op, next_op;
03535202 1208 u32 flags;
d16005f8 1209
98fa08a5
PC
1210 for (i = 0; i < block->nb_ops - 1; i++) {
1211 list = next;
1212 next = &block->opcode_list[i + 1];
1213 next_op = next->c;
1214 op = list->c;
d16005f8 1215
03535202
PC
1216 if (!has_delay_slot(op) || op_flag_no_ds(list->flags) ||
1217 op_flag_emulate_branch(list->flags) ||
98fa08a5
PC
1218 op.opcode == 0 || next_op.opcode == 0)
1219 continue;
1220
1221 if (i && has_delay_slot(block->opcode_list[i - 1].c) &&
03535202 1222 !op_flag_no_ds(block->opcode_list[i - 1].flags))
d16005f8
PC
1223 continue;
1224
ba3814c1 1225 if (op_flag_sync(next->flags))
d16005f8
PC
1226 continue;
1227
ba3814c1
PC
1228 if (!lightrec_can_switch_delay_slot(list->c, next_op))
1229 continue;
d16005f8
PC
1230
1231 pr_debug("Swap branch and delay slot opcodes "
98fa08a5
PC
1232 "at offsets 0x%x / 0x%x\n",
1233 i << 2, (i + 1) << 2);
d16005f8 1234
ba3814c1 1235 flags = next->flags | (list->flags & LIGHTREC_SYNC);
d16005f8 1236 list->c = next_op;
98fa08a5 1237 next->c = op;
ba3814c1 1238 next->flags = (list->flags | LIGHTREC_NO_DS) & ~LIGHTREC_SYNC;
a59e5536 1239 list->flags = flags | LIGHTREC_NO_DS;
d16005f8
PC
1240 }
1241
1242 return 0;
1243}
1244
98fa08a5
PC
1245static int shrink_opcode_list(struct lightrec_state *state, struct block *block, u16 new_size)
1246{
ba3814c1 1247 struct opcode_list *list, *old_list;
98fa08a5
PC
1248
1249 if (new_size >= block->nb_ops) {
1250 pr_err("Invalid shrink size (%u vs %u)\n",
1251 new_size, block->nb_ops);
1252 return -EINVAL;
1253 }
1254
98fa08a5 1255 list = lightrec_malloc(state, MEM_FOR_IR,
ba3814c1 1256 sizeof(*list) + sizeof(struct opcode) * new_size);
98fa08a5
PC
1257 if (!list) {
1258 pr_err("Unable to allocate memory\n");
1259 return -ENOMEM;
1260 }
1261
ba3814c1
PC
1262 old_list = container_of(block->opcode_list, struct opcode_list, ops);
1263 memcpy(list->ops, old_list->ops, sizeof(struct opcode) * new_size);
98fa08a5 1264
ba3814c1
PC
1265 lightrec_free_opcode_list(state, block->opcode_list);
1266 list->nb_ops = new_size;
98fa08a5 1267 block->nb_ops = new_size;
ba3814c1 1268 block->opcode_list = list->ops;
98fa08a5
PC
1269
1270 pr_debug("Shrunk opcode list of block PC 0x%08x to %u opcodes\n",
1271 block->pc, new_size);
1272
1273 return 0;
1274}
1275
1276static int lightrec_detect_impossible_branches(struct lightrec_state *state,
1277 struct block *block)
d16005f8 1278{
03535202 1279 struct opcode *op, *list = block->opcode_list, *next = &list[0];
98fa08a5
PC
1280 unsigned int i;
1281 int ret = 0;
03535202 1282 s16 offset;
98fa08a5
PC
1283
1284 for (i = 0; i < block->nb_ops - 1; i++) {
1285 op = next;
03535202 1286 next = &list[i + 1];
d16005f8 1287
d16005f8
PC
1288 if (!has_delay_slot(op->c) ||
1289 (!load_in_delay_slot(next->c) &&
1290 !has_delay_slot(next->c) &&
1291 !(next->i.op == OP_CP0 && next->r.rs == OP_CP0_RFE)))
1292 continue;
1293
1294 if (op->c.opcode == next->c.opcode) {
1295 /* The delay slot is the exact same opcode as the branch
1296 * opcode: this is effectively a NOP */
1297 next->c.opcode = 0;
1298 continue;
1299 }
1300
03535202
PC
1301 offset = i + 1 + (s16)op->i.imm;
1302 if (load_in_delay_slot(next->c) &&
1303 (offset >= 0 && offset < block->nb_ops) &&
1304 !opcode_reads_register(list[offset].c, next->c.i.rt)) {
1305 /* The 'impossible' branch is a local branch - we can
1306 * verify here that the first opcode of the target does
1307 * not use the target register of the delay slot */
1308
1309 pr_debug("Branch at offset 0x%x has load delay slot, "
1310 "but is local and dest opcode does not read "
1311 "dest register\n", i << 2);
1312 continue;
1313 }
1314
98fa08a5
PC
1315 op->flags |= LIGHTREC_EMULATE_BRANCH;
1316
03535202 1317 if (op == list) {
98fa08a5
PC
1318 pr_debug("First opcode of block PC 0x%08x is an impossible branch\n",
1319 block->pc);
1320
d16005f8
PC
1321 /* If the first opcode is an 'impossible' branch, we
1322 * only keep the first two opcodes of the block (the
1323 * branch itself + its delay slot) */
98fa08a5
PC
1324 if (block->nb_ops > 2)
1325 ret = shrink_opcode_list(state, block, 2);
1326 break;
d16005f8 1327 }
d16005f8
PC
1328 }
1329
98fa08a5 1330 return ret;
d16005f8
PC
1331}
1332
98fa08a5 1333static int lightrec_local_branches(struct lightrec_state *state, struct block *block)
d16005f8 1334{
98fa08a5
PC
1335 struct opcode *list;
1336 unsigned int i;
d16005f8 1337 s32 offset;
d16005f8 1338
98fa08a5
PC
1339 for (i = 0; i < block->nb_ops; i++) {
1340 list = &block->opcode_list[i];
1341
1342 if (should_emulate(list))
d16005f8
PC
1343 continue;
1344
1345 switch (list->i.op) {
1346 case OP_BEQ:
1347 case OP_BNE:
1348 case OP_BLEZ:
1349 case OP_BGTZ:
1350 case OP_REGIMM:
98fa08a5 1351 offset = i + 1 + (s16)list->i.imm;
d16005f8
PC
1352 if (offset >= 0 && offset < block->nb_ops)
1353 break;
d8b04acd
PC
1354 fallthrough;
1355 default:
d16005f8
PC
1356 continue;
1357 }
1358
1359 pr_debug("Found local branch to offset 0x%x\n", offset << 2);
1360
98fa08a5
PC
1361 if (should_emulate(&block->opcode_list[offset])) {
1362 pr_debug("Branch target must be emulated - skip\n");
1363 continue;
1364 }
d16005f8 1365
98fa08a5
PC
1366 if (offset && has_delay_slot(block->opcode_list[offset - 1].c)) {
1367 pr_debug("Branch target is a delay slot - skip\n");
1368 continue;
1369 }
d16005f8 1370
98fa08a5 1371 pr_debug("Adding sync at offset 0x%x\n", offset << 2);
d16005f8 1372
98fa08a5
PC
1373 block->opcode_list[offset].flags |= LIGHTREC_SYNC;
1374 list->flags |= LIGHTREC_LOCAL_BRANCH;
d16005f8
PC
1375 }
1376
1377 return 0;
1378}
1379
1380bool has_delay_slot(union code op)
1381{
1382 switch (op.i.op) {
1383 case OP_SPECIAL:
1384 switch (op.r.op) {
1385 case OP_SPECIAL_JR:
1386 case OP_SPECIAL_JALR:
1387 return true;
1388 default:
1389 return false;
1390 }
1391 case OP_J:
1392 case OP_JAL:
1393 case OP_BEQ:
1394 case OP_BNE:
1395 case OP_BLEZ:
1396 case OP_BGTZ:
1397 case OP_REGIMM:
d16005f8
PC
1398 return true;
1399 default:
1400 return false;
1401 }
1402}
1403
98fa08a5 1404bool should_emulate(const struct opcode *list)
d16005f8 1405{
03535202
PC
1406 return op_flag_emulate_branch(list->flags) && has_delay_slot(list->c);
1407}
1408
1409static bool op_writes_rd(union code c)
1410{
1411 switch (c.i.op) {
1412 case OP_SPECIAL:
1413 case OP_META_MOV:
1414 return true;
1415 default:
1416 return false;
1417 }
1418}
1419
1420static void lightrec_add_reg_op(struct opcode *op, u8 reg, u32 reg_op)
1421{
1422 if (op_writes_rd(op->c) && reg == op->r.rd)
1423 op->flags |= LIGHTREC_REG_RD(reg_op);
1424 else if (op->i.rs == reg)
1425 op->flags |= LIGHTREC_REG_RS(reg_op);
1426 else if (op->i.rt == reg)
1427 op->flags |= LIGHTREC_REG_RT(reg_op);
1428 else
1429 pr_debug("Cannot add unload/clean/discard flag: "
1430 "opcode does not touch register %s!\n",
1431 lightrec_reg_name(reg));
d16005f8
PC
1432}
1433
98fa08a5 1434static void lightrec_add_unload(struct opcode *op, u8 reg)
d16005f8 1435{
03535202
PC
1436 lightrec_add_reg_op(op, reg, LIGHTREC_REG_UNLOAD);
1437}
d16005f8 1438
03535202
PC
1439static void lightrec_add_discard(struct opcode *op, u8 reg)
1440{
1441 lightrec_add_reg_op(op, reg, LIGHTREC_REG_DISCARD);
1442}
1443
1444static void lightrec_add_clean(struct opcode *op, u8 reg)
1445{
1446 lightrec_add_reg_op(op, reg, LIGHTREC_REG_CLEAN);
1447}
1448
1449static void
1450lightrec_early_unload_sync(struct opcode *list, s16 *last_r, s16 *last_w)
1451{
1452 unsigned int reg;
1453 s16 offset;
1454
1455 for (reg = 0; reg < 34; reg++) {
1456 offset = s16_max(last_w[reg], last_r[reg]);
1457
1458 if (offset >= 0)
1459 lightrec_add_unload(&list[offset], reg);
1460 }
1461
1462 memset(last_r, 0xff, sizeof(*last_r) * 34);
1463 memset(last_w, 0xff, sizeof(*last_w) * 34);
98fa08a5 1464}
d16005f8 1465
98fa08a5
PC
1466static int lightrec_early_unload(struct lightrec_state *state, struct block *block)
1467{
03535202 1468 u16 i, offset;
98fa08a5 1469 struct opcode *op;
03535202
PC
1470 s16 last_r[34], last_w[34], last_sync = 0, next_sync = 0;
1471 u64 mask_r, mask_w, dirty = 0, loaded = 0;
98fa08a5 1472 u8 reg;
d16005f8 1473
03535202
PC
1474 memset(last_r, 0xff, sizeof(last_r));
1475 memset(last_w, 0xff, sizeof(last_w));
98fa08a5 1476
03535202
PC
1477 /*
1478 * Clean if:
1479 * - the register is dirty, and is read again after a branch opcode
1480 *
1481 * Unload if:
1482 * - the register is dirty or loaded, and is not read again
1483 * - the register is dirty or loaded, and is written again after a branch opcode
1484 * - the next opcode has the SYNC flag set
1485 *
1486 * Discard if:
1487 * - the register is dirty or loaded, and is written again
1488 */
98fa08a5 1489
03535202
PC
1490 for (i = 0; i < block->nb_ops; i++) {
1491 op = &block->opcode_list[i];
1492
1493 if (op_flag_sync(op->flags) || should_emulate(op)) {
1494 /* The next opcode has the SYNC flag set, or is a branch
1495 * that should be emulated: unload all registers. */
1496 lightrec_early_unload_sync(block->opcode_list, last_r, last_w);
1497 dirty = 0;
1498 loaded = 0;
d16005f8
PC
1499 }
1500
03535202
PC
1501 if (next_sync == i) {
1502 last_sync = i;
1503 pr_debug("Last sync: 0x%x\n", last_sync << 2);
1504 }
d16005f8 1505
03535202
PC
1506 if (has_delay_slot(op->c)) {
1507 next_sync = i + 1 + !op_flag_no_ds(op->flags);
1508 pr_debug("Next sync: 0x%x\n", next_sync << 2);
1509 }
d16005f8 1510
03535202
PC
1511 mask_r = opcode_read_mask(op->c);
1512 mask_w = opcode_write_mask(op->c);
98fa08a5 1513
03535202
PC
1514 for (reg = 0; reg < 34; reg++) {
1515 if (mask_r & BIT(reg)) {
1516 if (dirty & BIT(reg) && last_w[reg] < last_sync) {
1517 /* The register is dirty, and is read
1518 * again after a branch: clean it */
1519
1520 lightrec_add_clean(&block->opcode_list[last_w[reg]], reg);
1521 dirty &= ~BIT(reg);
1522 loaded |= BIT(reg);
1523 }
1524
1525 last_r[reg] = i;
1526 }
1527
1528 if (mask_w & BIT(reg)) {
1529 if ((dirty & BIT(reg) && last_w[reg] < last_sync) ||
1530 (loaded & BIT(reg) && last_r[reg] < last_sync)) {
1531 /* The register is dirty or loaded, and
1532 * is written again after a branch:
1533 * unload it */
1534
1535 offset = s16_max(last_w[reg], last_r[reg]);
1536 lightrec_add_unload(&block->opcode_list[offset], reg);
1537 dirty &= ~BIT(reg);
1538 loaded &= ~BIT(reg);
1539 } else if (!(mask_r & BIT(reg)) &&
1540 ((dirty & BIT(reg) && last_w[reg] > last_sync) ||
1541 (loaded & BIT(reg) && last_r[reg] > last_sync))) {
1542 /* The register is dirty or loaded, and
1543 * is written again: discard it */
1544
1545 offset = s16_max(last_w[reg], last_r[reg]);
1546 lightrec_add_discard(&block->opcode_list[offset], reg);
1547 dirty &= ~BIT(reg);
1548 loaded &= ~BIT(reg);
1549 }
1550
1551 last_w[reg] = i;
1552 }
98fa08a5 1553
03535202
PC
1554 }
1555
1556 dirty |= mask_w;
1557 loaded |= mask_r;
d16005f8
PC
1558 }
1559
03535202
PC
1560 /* Unload all registers that are dirty or loaded at the end of block. */
1561 lightrec_early_unload_sync(block->opcode_list, last_r, last_w);
1562
d16005f8
PC
1563 return 0;
1564}
1565
98fa08a5 1566static int lightrec_flag_io(struct lightrec_state *state, struct block *block)
d16005f8 1567{
02487de7
PC
1568 struct opcode *prev = NULL, *list = NULL;
1569 enum psx_map psx_map;
d16005f8
PC
1570 u32 known = BIT(0);
1571 u32 values[32] = { 0 };
98fa08a5 1572 unsigned int i;
02487de7 1573 u32 val, kunseg_val;
ba3814c1 1574 bool no_mask;
98fa08a5
PC
1575
1576 for (i = 0; i < block->nb_ops; i++) {
22eee2ac 1577 prev = list;
98fa08a5 1578 list = &block->opcode_list[i];
d16005f8 1579
22eee2ac
PC
1580 if (prev)
1581 known = lightrec_propagate_consts(list, prev, known, values);
1582
d16005f8
PC
1583 switch (list->i.op) {
1584 case OP_SB:
1585 case OP_SH:
1586 case OP_SW:
98fa08a5
PC
1587 if (OPT_FLAG_STORES) {
1588 /* Mark all store operations that target $sp or $gp
1589 * as not requiring code invalidation. This is based
1590 * on the heuristic that stores using one of these
1591 * registers as address will never hit a code page. */
1592 if (list->i.rs >= 28 && list->i.rs <= 29 &&
1593 !state->maps[PSX_MAP_KERNEL_USER_RAM].ops) {
1594 pr_debug("Flaging opcode 0x%08x as not "
1595 "requiring invalidation\n",
1596 list->opcode);
1597 list->flags |= LIGHTREC_NO_INVALIDATE;
03535202 1598 list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT);
98fa08a5
PC
1599 }
1600
1601 /* Detect writes whose destination address is inside the
1602 * current block, using constant propagation. When these
1603 * occur, we mark the blocks as not compilable. */
1604 if ((known & BIT(list->i.rs)) &&
1605 kunseg(values[list->i.rs]) >= kunseg(block->pc) &&
1606 kunseg(values[list->i.rs]) < (kunseg(block->pc) +
1607 block->nb_ops * 4)) {
1608 pr_debug("Self-modifying block detected\n");
ba3814c1 1609 block_set_flags(block, BLOCK_NEVER_COMPILE);
98fa08a5
PC
1610 list->flags |= LIGHTREC_SMC;
1611 }
1612 }
d8b04acd
PC
1613 fallthrough;
1614 case OP_SWL:
98fa08a5
PC
1615 case OP_SWR:
1616 case OP_SWC2:
1617 case OP_LB:
1618 case OP_LBU:
1619 case OP_LH:
1620 case OP_LHU:
1621 case OP_LW:
1622 case OP_LWL:
1623 case OP_LWR:
1624 case OP_LWC2:
1625 if (OPT_FLAG_IO && (known & BIT(list->i.rs))) {
22eee2ac 1626 val = values[list->i.rs] + (s16) list->i.imm;
02487de7
PC
1627 kunseg_val = kunseg(val);
1628 psx_map = lightrec_get_map_idx(state, kunseg_val);
1629
03535202 1630 list->flags &= ~LIGHTREC_IO_MASK;
ba3814c1 1631 no_mask = val == kunseg_val;
03535202 1632
02487de7
PC
1633 switch (psx_map) {
1634 case PSX_MAP_KERNEL_USER_RAM:
ba3814c1 1635 if (no_mask)
02487de7 1636 list->flags |= LIGHTREC_NO_MASK;
d8b04acd 1637 fallthrough;
02487de7
PC
1638 case PSX_MAP_MIRROR1:
1639 case PSX_MAP_MIRROR2:
1640 case PSX_MAP_MIRROR3:
22eee2ac
PC
1641 pr_debug("Flaging opcode %u as RAM access\n", i);
1642 list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_RAM);
ba3814c1
PC
1643 if (no_mask && state->mirrors_mapped)
1644 list->flags |= LIGHTREC_NO_MASK;
02487de7
PC
1645 break;
1646 case PSX_MAP_BIOS:
22eee2ac
PC
1647 pr_debug("Flaging opcode %u as BIOS access\n", i);
1648 list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_BIOS);
ba3814c1
PC
1649 if (no_mask)
1650 list->flags |= LIGHTREC_NO_MASK;
02487de7
PC
1651 break;
1652 case PSX_MAP_SCRATCH_PAD:
22eee2ac
PC
1653 pr_debug("Flaging opcode %u as scratchpad access\n", i);
1654 list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_SCRATCH);
ba3814c1
PC
1655 if (no_mask)
1656 list->flags |= LIGHTREC_NO_MASK;
02487de7
PC
1657
1658 /* Consider that we're never going to run code from
1659 * the scratchpad. */
1660 list->flags |= LIGHTREC_NO_INVALIDATE;
1661 break;
ba3814c1
PC
1662 case PSX_MAP_HW_REGISTERS:
1663 if (state->ops.hw_direct &&
1664 state->ops.hw_direct(kunseg_val,
1665 opcode_is_store(list->c),
1666 opcode_get_io_size(list->c))) {
1667 pr_debug("Flagging opcode %u as direct I/O access\n",
1668 i);
1669 list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT_HW);
cdfa3536
PC
1670
1671 if (no_mask)
1672 list->flags |= LIGHTREC_NO_MASK;
ba3814c1
PC
1673 break;
1674 }
1675 fallthrough;
02487de7
PC
1676 default:
1677 pr_debug("Flagging opcode %u as I/O access\n",
1678 i);
1679 list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW);
1680 break;
98fa08a5 1681 }
d16005f8 1682 }
d8b04acd
PC
1683 fallthrough;
1684 default:
d16005f8
PC
1685 break;
1686 }
d16005f8
PC
1687 }
1688
1689 return 0;
1690}
1691
98fa08a5
PC
1692static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset,
1693 const struct opcode *last,
1694 u32 mask, bool sync, bool mflo, bool another)
d16005f8 1695{
98fa08a5
PC
1696 const struct opcode *op, *next = &block->opcode_list[offset];
1697 u32 old_mask;
1698 u8 reg2, reg = mflo ? REG_LO : REG_HI;
1699 u16 branch_offset;
1700 unsigned int i;
1701
1702 for (i = offset; i < block->nb_ops; i++) {
1703 op = next;
1704 next = &block->opcode_list[i + 1];
1705 old_mask = mask;
1706
1707 /* If any other opcode writes or reads to the register
1708 * we'd use, then we cannot use it anymore. */
1709 mask |= opcode_read_mask(op->c);
1710 mask |= opcode_write_mask(op->c);
1711
03535202 1712 if (op_flag_sync(op->flags))
98fa08a5 1713 sync = true;
d16005f8 1714
d16005f8
PC
1715 switch (op->i.op) {
1716 case OP_BEQ:
1717 case OP_BNE:
1718 case OP_BLEZ:
1719 case OP_BGTZ:
1720 case OP_REGIMM:
d16005f8 1721 /* TODO: handle backwards branches too */
03535202 1722 if (!last && op_flag_local_branch(op->flags) &&
d16005f8 1723 (s16)op->c.i.imm >= 0) {
98fa08a5 1724 branch_offset = i + 1 + (s16)op->c.i.imm
03535202 1725 - !!op_flag_no_ds(op->flags);
98fa08a5
PC
1726
1727 reg = get_mfhi_mflo_reg(block, branch_offset, NULL,
1728 mask, sync, mflo, false);
1729 reg2 = get_mfhi_mflo_reg(block, offset + 1, next,
1730 mask, sync, mflo, false);
1731 if (reg > 0 && reg == reg2)
1732 return reg;
1733 if (!reg && !reg2)
1734 return 0;
d16005f8 1735 }
98fa08a5
PC
1736
1737 return mflo ? REG_LO : REG_HI;
ba3814c1
PC
1738 case OP_META_MULT2:
1739 case OP_META_MULTU2:
1740 return 0;
d16005f8
PC
1741 case OP_SPECIAL:
1742 switch (op->r.op) {
1743 case OP_SPECIAL_MULT:
1744 case OP_SPECIAL_MULTU:
1745 case OP_SPECIAL_DIV:
1746 case OP_SPECIAL_DIVU:
98fa08a5 1747 return 0;
d16005f8 1748 case OP_SPECIAL_MTHI:
98fa08a5
PC
1749 if (!mflo)
1750 return 0;
1751 continue;
1752 case OP_SPECIAL_MTLO:
1753 if (mflo)
1754 return 0;
1755 continue;
d16005f8 1756 case OP_SPECIAL_JR:
98fa08a5
PC
1757 if (op->r.rs != 31)
1758 return reg;
1759
03535202 1760 if (!sync && !op_flag_no_ds(op->flags) &&
98fa08a5
PC
1761 (next->i.op == OP_SPECIAL) &&
1762 ((!mflo && next->r.op == OP_SPECIAL_MFHI) ||
1763 (mflo && next->r.op == OP_SPECIAL_MFLO)))
1764 return next->r.rd;
1765
1766 return 0;
d16005f8 1767 case OP_SPECIAL_JALR:
98fa08a5 1768 return reg;
d16005f8 1769 case OP_SPECIAL_MFHI:
98fa08a5
PC
1770 if (!mflo) {
1771 if (another)
1772 return op->r.rd;
1773 /* Must use REG_HI if there is another MFHI target*/
1774 reg2 = get_mfhi_mflo_reg(block, i + 1, next,
1775 0, sync, mflo, true);
1776 if (reg2 > 0 && reg2 != REG_HI)
1777 return REG_HI;
1778
1779 if (!sync && !(old_mask & BIT(op->r.rd)))
1780 return op->r.rd;
1781 else
1782 return REG_HI;
1783 }
1784 continue;
1785 case OP_SPECIAL_MFLO:
1786 if (mflo) {
1787 if (another)
1788 return op->r.rd;
1789 /* Must use REG_LO if there is another MFLO target*/
1790 reg2 = get_mfhi_mflo_reg(block, i + 1, next,
1791 0, sync, mflo, true);
1792 if (reg2 > 0 && reg2 != REG_LO)
1793 return REG_LO;
1794
1795 if (!sync && !(old_mask & BIT(op->r.rd)))
1796 return op->r.rd;
1797 else
1798 return REG_LO;
1799 }
d16005f8 1800 continue;
98fa08a5
PC
1801 default:
1802 break;
d16005f8 1803 }
98fa08a5 1804
d8b04acd 1805 fallthrough;
d16005f8
PC
1806 default:
1807 continue;
1808 }
1809 }
1810
98fa08a5
PC
1811 return reg;
1812}
1813
1814static void lightrec_replace_lo_hi(struct block *block, u16 offset,
1815 u16 last, bool lo)
1816{
1817 unsigned int i;
1818 u32 branch_offset;
1819
1820 /* This function will remove the following MFLO/MFHI. It must be called
1821 * only if get_mfhi_mflo_reg() returned a non-zero value. */
1822
1823 for (i = offset; i < last; i++) {
1824 struct opcode *op = &block->opcode_list[i];
1825
1826 switch (op->i.op) {
1827 case OP_BEQ:
1828 case OP_BNE:
1829 case OP_BLEZ:
1830 case OP_BGTZ:
1831 case OP_REGIMM:
1832 /* TODO: handle backwards branches too */
03535202 1833 if (op_flag_local_branch(op->flags) && (s16)op->c.i.imm >= 0) {
98fa08a5 1834 branch_offset = i + 1 + (s16)op->c.i.imm
03535202 1835 - !!op_flag_no_ds(op->flags);
98fa08a5
PC
1836
1837 lightrec_replace_lo_hi(block, branch_offset, last, lo);
1838 lightrec_replace_lo_hi(block, i + 1, branch_offset, lo);
1839 }
1840 break;
1841
1842 case OP_SPECIAL:
1843 if (lo && op->r.op == OP_SPECIAL_MFLO) {
1844 pr_debug("Removing MFLO opcode at offset 0x%x\n",
1845 i << 2);
1846 op->opcode = 0;
1847 return;
1848 } else if (!lo && op->r.op == OP_SPECIAL_MFHI) {
1849 pr_debug("Removing MFHI opcode at offset 0x%x\n",
1850 i << 2);
1851 op->opcode = 0;
1852 return;
1853 }
1854
d8b04acd 1855 fallthrough;
98fa08a5
PC
1856 default:
1857 break;
1858 }
1859 }
d16005f8
PC
1860}
1861
fd58fa32
PC
1862static bool lightrec_always_skip_div_check(void)
1863{
1864#ifdef __mips__
1865 return true;
1866#else
1867 return false;
1868#endif
1869}
1870
98fa08a5 1871static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *block)
d16005f8 1872{
22eee2ac 1873 struct opcode *prev, *list = NULL;
98fa08a5
PC
1874 u8 reg_hi, reg_lo;
1875 unsigned int i;
fd58fa32
PC
1876 u32 known = BIT(0);
1877 u32 values[32] = { 0 };
98fa08a5
PC
1878
1879 for (i = 0; i < block->nb_ops - 1; i++) {
22eee2ac 1880 prev = list;
98fa08a5 1881 list = &block->opcode_list[i];
d16005f8 1882
22eee2ac
PC
1883 if (prev)
1884 known = lightrec_propagate_consts(list, prev, known, values);
1885
ba3814c1
PC
1886 switch (list->i.op) {
1887 case OP_SPECIAL:
1888 switch (list->r.op) {
1889 case OP_SPECIAL_DIV:
1890 case OP_SPECIAL_DIVU:
1891 /* If we are dividing by a non-zero constant, don't
1892 * emit the div-by-zero check. */
1893 if (lightrec_always_skip_div_check() ||
1894 ((known & BIT(list->c.r.rt)) && values[list->c.r.rt]))
1895 list->flags |= LIGHTREC_NO_DIV_CHECK;
1896 fallthrough;
1897 case OP_SPECIAL_MULT:
1898 case OP_SPECIAL_MULTU:
1899 break;
1900 default:
1901 continue;
1902 }
d8b04acd 1903 fallthrough;
ba3814c1
PC
1904 case OP_META_MULT2:
1905 case OP_META_MULTU2:
d16005f8
PC
1906 break;
1907 default:
1908 continue;
1909 }
1910
98fa08a5
PC
1911 /* Don't support opcodes in delay slots */
1912 if ((i && has_delay_slot(block->opcode_list[i - 1].c)) ||
03535202 1913 op_flag_no_ds(list->flags)) {
d16005f8 1914 continue;
fd58fa32 1915 }
d16005f8 1916
98fa08a5
PC
1917 reg_lo = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, true, false);
1918 if (reg_lo == 0) {
1919 pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as"
1920 " not writing LO\n", i << 2);
1921 list->flags |= LIGHTREC_NO_LO;
1922 }
1923
1924 reg_hi = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, false, false);
1925 if (reg_hi == 0) {
1926 pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as"
1927 " not writing HI\n", i << 2);
1928 list->flags |= LIGHTREC_NO_HI;
1929 }
1930
1931 if (!reg_lo && !reg_hi) {
1932 pr_debug("Both LO/HI unused in this block, they will "
1933 "probably be used in parent block - removing "
1934 "flags.\n");
1935 list->flags &= ~(LIGHTREC_NO_LO | LIGHTREC_NO_HI);
1936 }
1937
1938 if (reg_lo > 0 && reg_lo != REG_LO) {
1939 pr_debug("Found register %s to hold LO (rs = %u, rt = %u)\n",
1940 lightrec_reg_name(reg_lo), list->r.rs, list->r.rt);
1941
1942 lightrec_replace_lo_hi(block, i + 1, block->nb_ops, true);
1943 list->r.rd = reg_lo;
1944 } else {
1945 list->r.rd = 0;
1946 }
1947
1948 if (reg_hi > 0 && reg_hi != REG_HI) {
1949 pr_debug("Found register %s to hold HI (rs = %u, rt = %u)\n",
1950 lightrec_reg_name(reg_hi), list->r.rs, list->r.rt);
1951
1952 lightrec_replace_lo_hi(block, i + 1, block->nb_ops, false);
1953 list->r.imm = reg_hi;
1954 } else {
1955 list->r.imm = 0;
1956 }
1957 }
1958
1959 return 0;
1960}
1961
1962static bool remove_div_sequence(struct block *block, unsigned int offset)
1963{
1964 struct opcode *op;
1965 unsigned int i, found = 0;
1966
1967 /*
1968 * Scan for the zero-checking sequence that GCC automatically introduced
1969 * after most DIV/DIVU opcodes. This sequence checks the value of the
1970 * divisor, and if zero, executes a BREAK opcode, causing the BIOS
1971 * handler to crash the PS1.
1972 *
1973 * For DIV opcodes, this sequence additionally checks that the signed
1974 * operation does not overflow.
1975 *
1976 * With the assumption that the games never crashed the PS1, we can
1977 * therefore assume that the games never divided by zero or overflowed,
1978 * and these sequences can be removed.
1979 */
1980
1981 for (i = offset; i < block->nb_ops; i++) {
1982 op = &block->opcode_list[i];
1983
1984 if (!found) {
1985 if (op->i.op == OP_SPECIAL &&
1986 (op->r.op == OP_SPECIAL_DIV || op->r.op == OP_SPECIAL_DIVU))
1987 break;
1988
1989 if ((op->opcode & 0xfc1fffff) == 0x14000002) {
1990 /* BNE ???, zero, +8 */
1991 found++;
1992 } else {
1993 offset++;
1994 }
1995 } else if (found == 1 && !op->opcode) {
1996 /* NOP */
1997 found++;
1998 } else if (found == 2 && op->opcode == 0x0007000d) {
1999 /* BREAK 0x1c00 */
2000 found++;
2001 } else if (found == 3 && op->opcode == 0x2401ffff) {
2002 /* LI at, -1 */
2003 found++;
2004 } else if (found == 4 && (op->opcode & 0xfc1fffff) == 0x14010004) {
2005 /* BNE ???, at, +16 */
2006 found++;
2007 } else if (found == 5 && op->opcode == 0x3c018000) {
2008 /* LUI at, 0x8000 */
2009 found++;
2010 } else if (found == 6 && (op->opcode & 0x141fffff) == 0x14010002) {
2011 /* BNE ???, at, +16 */
2012 found++;
2013 } else if (found == 7 && !op->opcode) {
2014 /* NOP */
2015 found++;
2016 } else if (found == 8 && op->opcode == 0x0006000d) {
2017 /* BREAK 0x1800 */
2018 found++;
2019 break;
2020 } else {
2021 break;
2022 }
2023 }
2024
2025 if (found >= 3) {
2026 if (found != 9)
2027 found = 3;
2028
2029 pr_debug("Removing DIV%s sequence at offset 0x%x\n",
2030 found == 9 ? "" : "U", offset << 2);
2031
2032 for (i = 0; i < found; i++)
2033 block->opcode_list[offset + i].opcode = 0;
2034
2035 return true;
2036 }
2037
2038 return false;
2039}
2040
2041static int lightrec_remove_div_by_zero_check_sequence(struct lightrec_state *state,
2042 struct block *block)
2043{
2044 struct opcode *op;
2045 unsigned int i;
2046
2047 for (i = 0; i < block->nb_ops; i++) {
2048 op = &block->opcode_list[i];
2049
2050 if (op->i.op == OP_SPECIAL &&
2051 (op->r.op == OP_SPECIAL_DIVU || op->r.op == OP_SPECIAL_DIV) &&
2052 remove_div_sequence(block, i + 1))
2053 op->flags |= LIGHTREC_NO_DIV_CHECK;
2054 }
2055
2056 return 0;
2057}
2058
2059static const u32 memset_code[] = {
2060 0x10a00006, // beqz a1, 2f
2061 0x24a2ffff, // addiu v0,a1,-1
2062 0x2403ffff, // li v1,-1
2063 0xac800000, // 1: sw zero,0(a0)
2064 0x2442ffff, // addiu v0,v0,-1
2065 0x1443fffd, // bne v0,v1, 1b
2066 0x24840004, // addiu a0,a0,4
2067 0x03e00008, // 2: jr ra
2068 0x00000000, // nop
2069};
2070
2071static int lightrec_replace_memset(struct lightrec_state *state, struct block *block)
2072{
2073 unsigned int i;
2074 union code c;
2075
2076 for (i = 0; i < block->nb_ops; i++) {
2077 c = block->opcode_list[i].c;
2078
2079 if (c.opcode != memset_code[i])
2080 return 0;
2081
2082 if (i == ARRAY_SIZE(memset_code) - 1) {
2083 /* success! */
2084 pr_debug("Block at PC 0x%x is a memset\n", block->pc);
ba3814c1
PC
2085 block_set_flags(block,
2086 BLOCK_IS_MEMSET | BLOCK_NEVER_COMPILE);
98fa08a5
PC
2087
2088 /* Return non-zero to skip other optimizers. */
2089 return 1;
d16005f8
PC
2090 }
2091 }
2092
2093 return 0;
2094}
2095
98fa08a5
PC
2096static int (*lightrec_optimizers[])(struct lightrec_state *state, struct block *) = {
2097 IF_OPT(OPT_REMOVE_DIV_BY_ZERO_SEQ, &lightrec_remove_div_by_zero_check_sequence),
2098 IF_OPT(OPT_REPLACE_MEMSET, &lightrec_replace_memset),
2099 IF_OPT(OPT_DETECT_IMPOSSIBLE_BRANCHES, &lightrec_detect_impossible_branches),
03535202 2100 IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_branches),
98fa08a5
PC
2101 IF_OPT(OPT_LOCAL_BRANCHES, &lightrec_local_branches),
2102 IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_ops),
2103 IF_OPT(OPT_SWITCH_DELAY_SLOTS, &lightrec_switch_delay_slots),
2104 IF_OPT(OPT_FLAG_IO || OPT_FLAG_STORES, &lightrec_flag_io),
2105 IF_OPT(OPT_FLAG_MULT_DIV, &lightrec_flag_mults_divs),
2106 IF_OPT(OPT_EARLY_UNLOAD, &lightrec_early_unload),
d16005f8
PC
2107};
2108
98fa08a5 2109int lightrec_optimize(struct lightrec_state *state, struct block *block)
d16005f8
PC
2110{
2111 unsigned int i;
98fa08a5 2112 int ret;
d16005f8
PC
2113
2114 for (i = 0; i < ARRAY_SIZE(lightrec_optimizers); i++) {
98fa08a5
PC
2115 if (lightrec_optimizers[i]) {
2116 ret = (*lightrec_optimizers[i])(state, block);
2117 if (ret)
2118 return ret;
2119 }
d16005f8
PC
2120 }
2121
2122 return 0;
2123}