Update lightrec 20220910 (#686)
[pcsx_rearmed.git] / deps / lightrec / optimizer.c
CommitLineData
98fa08a5 1// SPDX-License-Identifier: LGPL-2.1-or-later
d16005f8 2/*
98fa08a5 3 * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
d16005f8
PC
4 */
5
98fa08a5 6#include "lightrec-config.h"
d16005f8
PC
7#include "disassembler.h"
8#include "lightrec.h"
9#include "memmanager.h"
10#include "optimizer.h"
11#include "regcache.h"
12
13#include <errno.h>
14#include <stdbool.h>
15#include <stdlib.h>
98fa08a5
PC
16#include <string.h>
17
18#define IF_OPT(opt, ptr) ((opt) ? (ptr) : NULL)
d16005f8
PC
19
20struct optimizer_list {
21 void (**optimizers)(struct opcode *);
22 unsigned int nb_optimizers;
23};
24
98fa08a5
PC
25static bool is_nop(union code op);
26
27bool is_unconditional_jump(union code c)
28{
29 switch (c.i.op) {
30 case OP_SPECIAL:
31 return c.r.op == OP_SPECIAL_JR || c.r.op == OP_SPECIAL_JALR;
32 case OP_J:
33 case OP_JAL:
34 return true;
35 case OP_BEQ:
36 case OP_BLEZ:
37 return c.i.rs == c.i.rt;
38 case OP_REGIMM:
39 return (c.r.rt == OP_REGIMM_BGEZ ||
40 c.r.rt == OP_REGIMM_BGEZAL) && c.i.rs == 0;
41 default:
42 return false;
43 }
44}
45
46bool is_syscall(union code c)
47{
48 return (c.i.op == OP_SPECIAL && c.r.op == OP_SPECIAL_SYSCALL) ||
49 (c.i.op == OP_CP0 && (c.r.rs == OP_CP0_MTC0 ||
50 c.r.rs == OP_CP0_CTC0) &&
51 (c.r.rd == 12 || c.r.rd == 13));
52}
53
54static u64 opcode_read_mask(union code op)
d16005f8
PC
55{
56 switch (op.i.op) {
57 case OP_SPECIAL:
58 switch (op.r.op) {
59 case OP_SPECIAL_SYSCALL:
60 case OP_SPECIAL_BREAK:
98fa08a5 61 return 0;
d16005f8
PC
62 case OP_SPECIAL_JR:
63 case OP_SPECIAL_JALR:
64 case OP_SPECIAL_MTHI:
65 case OP_SPECIAL_MTLO:
98fa08a5 66 return BIT(op.r.rs);
d16005f8 67 case OP_SPECIAL_MFHI:
98fa08a5 68 return BIT(REG_HI);
d16005f8 69 case OP_SPECIAL_MFLO:
98fa08a5 70 return BIT(REG_LO);
d16005f8 71 case OP_SPECIAL_SLL:
03535202
PC
72 if (!op.r.imm)
73 return 0;
74 fallthrough;
d16005f8
PC
75 case OP_SPECIAL_SRL:
76 case OP_SPECIAL_SRA:
98fa08a5 77 return BIT(op.r.rt);
d16005f8 78 default:
98fa08a5 79 return BIT(op.r.rs) | BIT(op.r.rt);
d16005f8
PC
80 }
81 case OP_CP0:
82 switch (op.r.rs) {
83 case OP_CP0_MTC0:
84 case OP_CP0_CTC0:
98fa08a5 85 return BIT(op.r.rt);
d16005f8 86 default:
98fa08a5 87 return 0;
d16005f8
PC
88 }
89 case OP_CP2:
90 if (op.r.op == OP_CP2_BASIC) {
91 switch (op.r.rs) {
92 case OP_CP2_BASIC_MTC2:
93 case OP_CP2_BASIC_CTC2:
98fa08a5 94 return BIT(op.r.rt);
d16005f8 95 default:
98fa08a5 96 break;
d16005f8 97 }
d16005f8 98 }
98fa08a5 99 return 0;
d16005f8
PC
100 case OP_J:
101 case OP_JAL:
102 case OP_LUI:
98fa08a5 103 return 0;
d16005f8 104 case OP_BEQ:
03535202
PC
105 if (op.i.rs == op.i.rt)
106 return 0;
107 fallthrough;
d16005f8
PC
108 case OP_BNE:
109 case OP_LWL:
110 case OP_LWR:
111 case OP_SB:
112 case OP_SH:
113 case OP_SWL:
114 case OP_SW:
115 case OP_SWR:
98fa08a5 116 return BIT(op.i.rs) | BIT(op.i.rt);
d16005f8 117 default:
98fa08a5 118 return BIT(op.i.rs);
d16005f8
PC
119 }
120}
121
ba3814c1 122static u64 mult_div_write_mask(union code op)
d16005f8 123{
98fa08a5
PC
124 u64 flags;
125
ba3814c1
PC
126 if (!OPT_FLAG_MULT_DIV)
127 return BIT(REG_LO) | BIT(REG_HI);
128
129 if (op.r.rd)
130 flags = BIT(op.r.rd);
131 else
132 flags = BIT(REG_LO);
133 if (op.r.imm)
134 flags |= BIT(op.r.imm);
135 else
136 flags |= BIT(REG_HI);
137
138 return flags;
139}
140
141static u64 opcode_write_mask(union code op)
142{
d16005f8 143 switch (op.i.op) {
ba3814c1
PC
144 case OP_META_MULT2:
145 case OP_META_MULTU2:
146 return mult_div_write_mask(op);
d16005f8
PC
147 case OP_SPECIAL:
148 switch (op.r.op) {
149 case OP_SPECIAL_JR:
d16005f8
PC
150 case OP_SPECIAL_SYSCALL:
151 case OP_SPECIAL_BREAK:
98fa08a5 152 return 0;
d16005f8
PC
153 case OP_SPECIAL_MULT:
154 case OP_SPECIAL_MULTU:
155 case OP_SPECIAL_DIV:
156 case OP_SPECIAL_DIVU:
ba3814c1 157 return mult_div_write_mask(op);
d16005f8 158 case OP_SPECIAL_MTHI:
98fa08a5 159 return BIT(REG_HI);
d16005f8 160 case OP_SPECIAL_MTLO:
98fa08a5 161 return BIT(REG_LO);
03535202
PC
162 case OP_SPECIAL_SLL:
163 if (!op.r.imm)
164 return 0;
165 fallthrough;
d16005f8 166 default:
98fa08a5 167 return BIT(op.r.rd);
d16005f8
PC
168 }
169 case OP_ADDI:
170 case OP_ADDIU:
171 case OP_SLTI:
172 case OP_SLTIU:
173 case OP_ANDI:
174 case OP_ORI:
175 case OP_XORI:
176 case OP_LUI:
177 case OP_LB:
178 case OP_LH:
179 case OP_LWL:
180 case OP_LW:
181 case OP_LBU:
182 case OP_LHU:
183 case OP_LWR:
98fa08a5
PC
184 return BIT(op.i.rt);
185 case OP_JAL:
186 return BIT(31);
d16005f8
PC
187 case OP_CP0:
188 switch (op.r.rs) {
189 case OP_CP0_MFC0:
190 case OP_CP0_CFC0:
98fa08a5 191 return BIT(op.i.rt);
d16005f8 192 default:
98fa08a5 193 return 0;
d16005f8
PC
194 }
195 case OP_CP2:
196 if (op.r.op == OP_CP2_BASIC) {
197 switch (op.r.rs) {
198 case OP_CP2_BASIC_MFC2:
199 case OP_CP2_BASIC_CFC2:
98fa08a5 200 return BIT(op.i.rt);
d16005f8 201 default:
98fa08a5 202 break;
d16005f8 203 }
98fa08a5
PC
204 }
205 return 0;
206 case OP_REGIMM:
207 switch (op.r.rt) {
208 case OP_REGIMM_BLTZAL:
209 case OP_REGIMM_BGEZAL:
210 return BIT(31);
211 default:
212 return 0;
d16005f8
PC
213 }
214 case OP_META_MOV:
98fa08a5 215 return BIT(op.r.rd);
d16005f8 216 default:
98fa08a5
PC
217 return 0;
218 }
219}
220
221bool opcode_reads_register(union code op, u8 reg)
222{
223 return opcode_read_mask(op) & BIT(reg);
224}
225
226bool opcode_writes_register(union code op, u8 reg)
227{
228 return opcode_write_mask(op) & BIT(reg);
229}
230
231static int find_prev_writer(const struct opcode *list, unsigned int offset, u8 reg)
232{
233 union code c;
234 unsigned int i;
235
03535202 236 if (op_flag_sync(list[offset].flags))
98fa08a5
PC
237 return -1;
238
239 for (i = offset; i > 0; i--) {
240 c = list[i - 1].c;
241
242 if (opcode_writes_register(c, reg)) {
243 if (i > 1 && has_delay_slot(list[i - 2].c))
244 break;
245
246 return i - 1;
247 }
248
03535202 249 if (op_flag_sync(list[i - 1].flags) ||
98fa08a5
PC
250 has_delay_slot(c) ||
251 opcode_reads_register(c, reg))
252 break;
253 }
254
255 return -1;
256}
257
258static int find_next_reader(const struct opcode *list, unsigned int offset, u8 reg)
259{
260 unsigned int i;
261 union code c;
262
03535202 263 if (op_flag_sync(list[offset].flags))
98fa08a5
PC
264 return -1;
265
266 for (i = offset; ; i++) {
267 c = list[i].c;
268
269 if (opcode_reads_register(c, reg)) {
270 if (i > 0 && has_delay_slot(list[i - 1].c))
271 break;
272
273 return i;
274 }
275
03535202 276 if (op_flag_sync(list[i].flags) ||
98fa08a5
PC
277 has_delay_slot(c) || opcode_writes_register(c, reg))
278 break;
279 }
280
281 return -1;
282}
283
284static bool reg_is_dead(const struct opcode *list, unsigned int offset, u8 reg)
285{
286 unsigned int i;
287
03535202 288 if (op_flag_sync(list[offset].flags))
d16005f8 289 return false;
98fa08a5
PC
290
291 for (i = offset + 1; ; i++) {
292 if (opcode_reads_register(list[i].c, reg))
293 return false;
294
295 if (opcode_writes_register(list[i].c, reg))
296 return true;
297
298 if (has_delay_slot(list[i].c)) {
03535202 299 if (op_flag_no_ds(list[i].flags) ||
22eee2ac 300 opcode_reads_register(list[i + 1].c, reg))
98fa08a5
PC
301 return false;
302
303 return opcode_writes_register(list[i + 1].c, reg);
304 }
d16005f8
PC
305 }
306}
307
98fa08a5
PC
308static bool reg_is_read(const struct opcode *list,
309 unsigned int a, unsigned int b, u8 reg)
310{
311 /* Return true if reg is read in one of the opcodes of the interval
312 * [a, b[ */
313 for (; a < b; a++) {
314 if (!is_nop(list[a].c) && opcode_reads_register(list[a].c, reg))
315 return true;
316 }
317
318 return false;
319}
320
321static bool reg_is_written(const struct opcode *list,
322 unsigned int a, unsigned int b, u8 reg)
323{
324 /* Return true if reg is written in one of the opcodes of the interval
325 * [a, b[ */
326
327 for (; a < b; a++) {
328 if (!is_nop(list[a].c) && opcode_writes_register(list[a].c, reg))
329 return true;
330 }
331
332 return false;
333}
334
335static bool reg_is_read_or_written(const struct opcode *list,
336 unsigned int a, unsigned int b, u8 reg)
337{
338 return reg_is_read(list, a, b, reg) || reg_is_written(list, a, b, reg);
339}
340
341static bool opcode_is_load(union code op)
342{
343 switch (op.i.op) {
344 case OP_LB:
345 case OP_LH:
346 case OP_LWL:
347 case OP_LW:
348 case OP_LBU:
349 case OP_LHU:
350 case OP_LWR:
351 case OP_LWC2:
352 return true;
353 default:
354 return false;
355 }
356}
357
358static bool opcode_is_store(union code op)
359{
360 switch (op.i.op) {
361 case OP_SB:
362 case OP_SH:
363 case OP_SW:
364 case OP_SWL:
365 case OP_SWR:
366 case OP_SWC2:
367 return true;
368 default:
369 return false;
370 }
371}
372
ba3814c1
PC
373static u8 opcode_get_io_size(union code op)
374{
375 switch (op.i.op) {
376 case OP_LB:
377 case OP_LBU:
378 case OP_SB:
379 return 8;
380 case OP_LH:
381 case OP_LHU:
382 case OP_SH:
383 return 16;
384 default:
385 return 32;
386 }
387}
388
98fa08a5
PC
389bool opcode_is_io(union code op)
390{
391 return opcode_is_load(op) || opcode_is_store(op);
392}
393
d16005f8
PC
394/* TODO: Complete */
395static bool is_nop(union code op)
396{
397 if (opcode_writes_register(op, 0)) {
398 switch (op.i.op) {
399 case OP_CP0:
400 return op.r.rs != OP_CP0_MFC0;
401 case OP_LB:
402 case OP_LH:
403 case OP_LWL:
404 case OP_LW:
405 case OP_LBU:
406 case OP_LHU:
407 case OP_LWR:
408 return false;
409 default:
410 return true;
411 }
412 }
413
414 switch (op.i.op) {
415 case OP_SPECIAL:
416 switch (op.r.op) {
417 case OP_SPECIAL_AND:
418 return op.r.rd == op.r.rt && op.r.rd == op.r.rs;
419 case OP_SPECIAL_ADD:
420 case OP_SPECIAL_ADDU:
421 return (op.r.rd == op.r.rt && op.r.rs == 0) ||
422 (op.r.rd == op.r.rs && op.r.rt == 0);
423 case OP_SPECIAL_SUB:
424 case OP_SPECIAL_SUBU:
425 return op.r.rd == op.r.rs && op.r.rt == 0;
426 case OP_SPECIAL_OR:
427 if (op.r.rd == op.r.rt)
428 return op.r.rd == op.r.rs || op.r.rs == 0;
429 else
430 return (op.r.rd == op.r.rs) && op.r.rt == 0;
431 case OP_SPECIAL_SLL:
432 case OP_SPECIAL_SRA:
433 case OP_SPECIAL_SRL:
434 return op.r.rd == op.r.rt && op.r.imm == 0;
98fa08a5
PC
435 case OP_SPECIAL_MFHI:
436 case OP_SPECIAL_MFLO:
437 return op.r.rd == 0;
d16005f8
PC
438 default:
439 return false;
440 }
441 case OP_ORI:
442 case OP_ADDI:
443 case OP_ADDIU:
444 return op.i.rt == op.i.rs && op.i.imm == 0;
445 case OP_BGTZ:
446 return (op.i.rs == 0 || op.i.imm == 1);
447 case OP_REGIMM:
448 return (op.i.op == OP_REGIMM_BLTZ ||
449 op.i.op == OP_REGIMM_BLTZAL) &&
450 (op.i.rs == 0 || op.i.imm == 1);
451 case OP_BNE:
452 return (op.i.rs == op.i.rt || op.i.imm == 1);
453 default:
454 return false;
455 }
456}
457
458bool load_in_delay_slot(union code op)
459{
460 switch (op.i.op) {
461 case OP_CP0:
462 switch (op.r.rs) {
463 case OP_CP0_MFC0:
464 case OP_CP0_CFC0:
465 return true;
466 default:
467 break;
468 }
469
470 break;
471 case OP_CP2:
472 if (op.r.op == OP_CP2_BASIC) {
473 switch (op.r.rs) {
474 case OP_CP2_BASIC_MFC2:
475 case OP_CP2_BASIC_CFC2:
476 return true;
477 default:
478 break;
479 }
480 }
481
482 break;
483 case OP_LB:
484 case OP_LH:
485 case OP_LW:
486 case OP_LWL:
487 case OP_LWR:
488 case OP_LBU:
489 case OP_LHU:
490 return true;
491 default:
492 break;
493 }
494
495 return false;
496}
497
22eee2ac
PC
498static u32 lightrec_propagate_consts(const struct opcode *op,
499 const struct opcode *prev,
500 u32 known, u32 *v)
d16005f8 501{
22eee2ac 502 union code c = prev->c;
98fa08a5 503
fd58fa32
PC
504 /* Register $zero is always, well, zero */
505 known |= BIT(0);
506 v[0] = 0;
507
03535202 508 if (op_flag_sync(op->flags))
22eee2ac 509 return BIT(0);
98fa08a5 510
d16005f8
PC
511 switch (c.i.op) {
512 case OP_SPECIAL:
513 switch (c.r.op) {
514 case OP_SPECIAL_SLL:
515 if (known & BIT(c.r.rt)) {
516 known |= BIT(c.r.rd);
517 v[c.r.rd] = v[c.r.rt] << c.r.imm;
518 } else {
519 known &= ~BIT(c.r.rd);
520 }
521 break;
522 case OP_SPECIAL_SRL:
523 if (known & BIT(c.r.rt)) {
524 known |= BIT(c.r.rd);
525 v[c.r.rd] = v[c.r.rt] >> c.r.imm;
526 } else {
527 known &= ~BIT(c.r.rd);
528 }
529 break;
530 case OP_SPECIAL_SRA:
531 if (known & BIT(c.r.rt)) {
532 known |= BIT(c.r.rd);
533 v[c.r.rd] = (s32)v[c.r.rt] >> c.r.imm;
534 } else {
535 known &= ~BIT(c.r.rd);
536 }
537 break;
538 case OP_SPECIAL_SLLV:
539 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
540 known |= BIT(c.r.rd);
541 v[c.r.rd] = v[c.r.rt] << (v[c.r.rs] & 0x1f);
542 } else {
543 known &= ~BIT(c.r.rd);
544 }
545 break;
546 case OP_SPECIAL_SRLV:
547 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
548 known |= BIT(c.r.rd);
549 v[c.r.rd] = v[c.r.rt] >> (v[c.r.rs] & 0x1f);
550 } else {
551 known &= ~BIT(c.r.rd);
552 }
553 break;
554 case OP_SPECIAL_SRAV:
555 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
556 known |= BIT(c.r.rd);
557 v[c.r.rd] = (s32)v[c.r.rt]
558 >> (v[c.r.rs] & 0x1f);
559 } else {
560 known &= ~BIT(c.r.rd);
561 }
562 break;
563 case OP_SPECIAL_ADD:
564 case OP_SPECIAL_ADDU:
565 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
566 known |= BIT(c.r.rd);
567 v[c.r.rd] = (s32)v[c.r.rt] + (s32)v[c.r.rs];
568 } else {
569 known &= ~BIT(c.r.rd);
570 }
571 break;
572 case OP_SPECIAL_SUB:
573 case OP_SPECIAL_SUBU:
574 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
575 known |= BIT(c.r.rd);
576 v[c.r.rd] = v[c.r.rt] - v[c.r.rs];
577 } else {
578 known &= ~BIT(c.r.rd);
579 }
580 break;
581 case OP_SPECIAL_AND:
582 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
583 known |= BIT(c.r.rd);
584 v[c.r.rd] = v[c.r.rt] & v[c.r.rs];
585 } else {
586 known &= ~BIT(c.r.rd);
587 }
588 break;
589 case OP_SPECIAL_OR:
590 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
591 known |= BIT(c.r.rd);
592 v[c.r.rd] = v[c.r.rt] | v[c.r.rs];
593 } else {
594 known &= ~BIT(c.r.rd);
595 }
596 break;
597 case OP_SPECIAL_XOR:
598 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
599 known |= BIT(c.r.rd);
600 v[c.r.rd] = v[c.r.rt] ^ v[c.r.rs];
601 } else {
602 known &= ~BIT(c.r.rd);
603 }
604 break;
605 case OP_SPECIAL_NOR:
606 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
607 known |= BIT(c.r.rd);
608 v[c.r.rd] = ~(v[c.r.rt] | v[c.r.rs]);
609 } else {
610 known &= ~BIT(c.r.rd);
611 }
612 break;
613 case OP_SPECIAL_SLT:
614 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
615 known |= BIT(c.r.rd);
616 v[c.r.rd] = (s32)v[c.r.rs] < (s32)v[c.r.rt];
617 } else {
618 known &= ~BIT(c.r.rd);
619 }
620 break;
621 case OP_SPECIAL_SLTU:
622 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
623 known |= BIT(c.r.rd);
624 v[c.r.rd] = v[c.r.rs] < v[c.r.rt];
625 } else {
626 known &= ~BIT(c.r.rd);
627 }
628 break;
ba3814c1
PC
629 case OP_SPECIAL_MULT:
630 case OP_SPECIAL_MULTU:
631 case OP_SPECIAL_DIV:
632 case OP_SPECIAL_DIVU:
633 if (OPT_FLAG_MULT_DIV && c.r.rd)
634 known &= ~BIT(c.r.rd);
635 if (OPT_FLAG_MULT_DIV && c.r.imm)
636 known &= ~BIT(c.r.imm);
637 break;
d16005f8
PC
638 default:
639 break;
640 }
641 break;
ba3814c1
PC
642 case OP_META_MULT2:
643 case OP_META_MULTU2:
644 if (OPT_FLAG_MULT_DIV && (known & BIT(c.r.rs))) {
645 if (c.r.rd) {
646 known |= BIT(c.r.rd);
647
648 if (c.r.op < 32)
649 v[c.r.rd] = v[c.r.rs] << c.r.op;
650 else
651 v[c.r.rd] = 0;
652 }
653
654 if (c.r.imm) {
655 known |= BIT(c.r.imm);
656
657 if (c.r.op >= 32)
658 v[c.r.imm] = v[c.r.rs] << (c.r.op - 32);
659 else if (c.i.op == OP_META_MULT2)
660 v[c.r.imm] = (s32) v[c.r.rs] >> (32 - c.r.op);
661 else
662 v[c.r.imm] = v[c.r.rs] >> (32 - c.r.op);
663 }
664 } else {
665 if (OPT_FLAG_MULT_DIV && c.r.rd)
666 known &= ~BIT(c.r.rd);
667 if (OPT_FLAG_MULT_DIV && c.r.imm)
668 known &= ~BIT(c.r.imm);
669 }
670 break;
d16005f8
PC
671 case OP_REGIMM:
672 break;
673 case OP_ADDI:
674 case OP_ADDIU:
675 if (known & BIT(c.i.rs)) {
676 known |= BIT(c.i.rt);
677 v[c.i.rt] = v[c.i.rs] + (s32)(s16)c.i.imm;
678 } else {
679 known &= ~BIT(c.i.rt);
680 }
681 break;
682 case OP_SLTI:
683 if (known & BIT(c.i.rs)) {
684 known |= BIT(c.i.rt);
685 v[c.i.rt] = (s32)v[c.i.rs] < (s32)(s16)c.i.imm;
686 } else {
687 known &= ~BIT(c.i.rt);
688 }
689 break;
690 case OP_SLTIU:
691 if (known & BIT(c.i.rs)) {
692 known |= BIT(c.i.rt);
693 v[c.i.rt] = v[c.i.rs] < (u32)(s32)(s16)c.i.imm;
694 } else {
695 known &= ~BIT(c.i.rt);
696 }
697 break;
698 case OP_ANDI:
699 if (known & BIT(c.i.rs)) {
700 known |= BIT(c.i.rt);
701 v[c.i.rt] = v[c.i.rs] & c.i.imm;
702 } else {
703 known &= ~BIT(c.i.rt);
704 }
705 break;
706 case OP_ORI:
707 if (known & BIT(c.i.rs)) {
708 known |= BIT(c.i.rt);
709 v[c.i.rt] = v[c.i.rs] | c.i.imm;
710 } else {
711 known &= ~BIT(c.i.rt);
712 }
713 break;
714 case OP_XORI:
715 if (known & BIT(c.i.rs)) {
716 known |= BIT(c.i.rt);
717 v[c.i.rt] = v[c.i.rs] ^ c.i.imm;
718 } else {
719 known &= ~BIT(c.i.rt);
720 }
721 break;
722 case OP_LUI:
723 known |= BIT(c.i.rt);
724 v[c.i.rt] = c.i.imm << 16;
725 break;
726 case OP_CP0:
727 switch (c.r.rs) {
728 case OP_CP0_MFC0:
729 case OP_CP0_CFC0:
730 known &= ~BIT(c.r.rt);
731 break;
732 }
733 break;
734 case OP_CP2:
735 if (c.r.op == OP_CP2_BASIC) {
736 switch (c.r.rs) {
737 case OP_CP2_BASIC_MFC2:
738 case OP_CP2_BASIC_CFC2:
739 known &= ~BIT(c.r.rt);
740 break;
741 }
742 }
743 break;
744 case OP_LB:
745 case OP_LH:
746 case OP_LWL:
747 case OP_LW:
748 case OP_LBU:
749 case OP_LHU:
750 case OP_LWR:
751 case OP_LWC2:
752 known &= ~BIT(c.i.rt);
753 break;
754 case OP_META_MOV:
755 if (known & BIT(c.r.rs)) {
756 known |= BIT(c.r.rd);
757 v[c.r.rd] = v[c.r.rs];
758 } else {
759 known &= ~BIT(c.r.rd);
760 }
761 break;
762 default:
763 break;
764 }
765
766 return known;
767}
768
98fa08a5 769static void lightrec_optimize_sll_sra(struct opcode *list, unsigned int offset)
d16005f8 770{
98fa08a5
PC
771 struct opcode *prev, *prev2 = NULL, *curr = &list[offset];
772 struct opcode *to_change, *to_nop;
773 int idx, idx2;
d16005f8 774
98fa08a5
PC
775 if (curr->r.imm != 24 && curr->r.imm != 16)
776 return;
777
778 idx = find_prev_writer(list, offset, curr->r.rt);
779 if (idx < 0)
780 return;
781
782 prev = &list[idx];
783
784 if (prev->i.op != OP_SPECIAL || prev->r.op != OP_SPECIAL_SLL ||
785 prev->r.imm != curr->r.imm || prev->r.rd != curr->r.rt)
786 return;
d16005f8 787
98fa08a5
PC
788 if (prev->r.rd != prev->r.rt && curr->r.rd != curr->r.rt) {
789 /* sll rY, rX, 16
790 * ...
791 * srl rZ, rY, 16 */
d16005f8 792
98fa08a5
PC
793 if (!reg_is_dead(list, offset, curr->r.rt) ||
794 reg_is_read_or_written(list, idx, offset, curr->r.rd))
795 return;
796
797 /* If rY is dead after the SRL, and rZ is not used after the SLL,
798 * we can change rY to rZ */
799
800 pr_debug("Detected SLL/SRA with middle temp register\n");
801 prev->r.rd = curr->r.rd;
802 curr->r.rt = prev->r.rd;
803 }
804
805 /* We got a SLL/SRA combo. If imm #16, that's a cast to u16.
806 * If imm #24 that's a cast to u8.
807 *
808 * First of all, make sure that the target register of the SLL is not
809 * read before the SRA. */
810
811 if (prev->r.rd == prev->r.rt) {
812 /* sll rX, rX, 16
813 * ...
814 * srl rY, rX, 16 */
815 to_change = curr;
816 to_nop = prev;
817
818 /* rX is used after the SRA - we cannot convert it. */
819 if (prev->r.rd != curr->r.rd && !reg_is_dead(list, offset, prev->r.rd))
820 return;
d16005f8 821 } else {
98fa08a5
PC
822 /* sll rY, rX, 16
823 * ...
824 * srl rY, rY, 16 */
825 to_change = prev;
826 to_nop = curr;
d16005f8
PC
827 }
828
98fa08a5
PC
829 idx2 = find_prev_writer(list, idx, prev->r.rt);
830 if (idx2 >= 0) {
831 /* Note that PSX games sometimes do casts after
832 * a LHU or LBU; in this case we can change the
833 * load opcode to a LH or LB, and the cast can
834 * be changed to a MOV or a simple NOP. */
835
836 prev2 = &list[idx2];
837
838 if (curr->r.rd != prev2->i.rt &&
839 !reg_is_dead(list, offset, prev2->i.rt))
840 prev2 = NULL;
841 else if (curr->r.imm == 16 && prev2->i.op == OP_LHU)
842 prev2->i.op = OP_LH;
843 else if (curr->r.imm == 24 && prev2->i.op == OP_LBU)
844 prev2->i.op = OP_LB;
845 else
846 prev2 = NULL;
847
848 if (prev2) {
849 if (curr->r.rd == prev2->i.rt) {
850 to_change->opcode = 0;
851 } else if (reg_is_dead(list, offset, prev2->i.rt) &&
852 !reg_is_read_or_written(list, idx2 + 1, offset, curr->r.rd)) {
853 /* The target register of the SRA is dead after the
854 * LBU/LHU; we can change the target register of the
855 * LBU/LHU to the one of the SRA. */
856 prev2->i.rt = curr->r.rd;
857 to_change->opcode = 0;
858 } else {
859 to_change->i.op = OP_META_MOV;
860 to_change->r.rd = curr->r.rd;
861 to_change->r.rs = prev2->i.rt;
862 }
d16005f8 863
98fa08a5
PC
864 if (to_nop->r.imm == 24)
865 pr_debug("Convert LBU+SLL+SRA to LB\n");
866 else
867 pr_debug("Convert LHU+SLL+SRA to LH\n");
868 }
869 }
870
871 if (!prev2) {
872 pr_debug("Convert SLL/SRA #%u to EXT%c\n",
873 prev->r.imm,
874 prev->r.imm == 24 ? 'C' : 'S');
875
876 if (to_change == prev) {
877 to_change->i.rs = prev->r.rt;
878 to_change->i.rt = curr->r.rd;
879 } else {
880 to_change->i.rt = curr->r.rd;
881 to_change->i.rs = prev->r.rt;
882 }
883
884 if (to_nop->r.imm == 24)
885 to_change->i.op = OP_META_EXTC;
886 else
887 to_change->i.op = OP_META_EXTS;
888 }
889
890 to_nop->opcode = 0;
d16005f8
PC
891}
892
02487de7
PC
893static void lightrec_remove_useless_lui(struct block *block, unsigned int offset,
894 u32 known, u32 *values)
895{
896 struct opcode *list = block->opcode_list,
897 *op = &block->opcode_list[offset];
898 int reader;
899
03535202 900 if (!op_flag_sync(op->flags) && (known & BIT(op->i.rt)) &&
02487de7
PC
901 values[op->i.rt] == op->i.imm << 16) {
902 pr_debug("Converting duplicated LUI to NOP\n");
903 op->opcode = 0x0;
904 return;
905 }
906
907 if (op->i.imm != 0 || op->i.rt == 0)
908 return;
909
910 reader = find_next_reader(list, offset + 1, op->i.rt);
911 if (reader <= 0)
912 return;
913
914 if (opcode_writes_register(list[reader].c, op->i.rt) ||
915 reg_is_dead(list, reader, op->i.rt)) {
916 pr_debug("Removing useless LUI 0x0\n");
917
918 if (list[reader].i.rs == op->i.rt)
919 list[reader].i.rs = 0;
920 if (list[reader].i.op == OP_SPECIAL &&
921 list[reader].i.rt == op->i.rt)
922 list[reader].i.rt = 0;
923 op->opcode = 0x0;
924 }
925}
926
927static void lightrec_modify_lui(struct block *block, unsigned int offset)
928{
929 union code c, *lui = &block->opcode_list[offset].c;
930 bool stop = false, stop_next = false;
931 unsigned int i;
932
933 for (i = offset + 1; !stop && i < block->nb_ops; i++) {
934 c = block->opcode_list[i].c;
935 stop = stop_next;
936
937 if ((opcode_is_store(c) && c.i.rt == lui->i.rt)
938 || (!opcode_is_load(c) && opcode_reads_register(c, lui->i.rt)))
939 break;
940
941 if (opcode_writes_register(c, lui->i.rt)) {
942 pr_debug("Convert LUI at offset 0x%x to kuseg\n",
943 i - 1 << 2);
944 lui->i.imm = kunseg(lui->i.imm << 16) >> 16;
945 break;
946 }
947
948 if (has_delay_slot(c))
949 stop_next = true;
950 }
951}
952
03535202
PC
953static int lightrec_transform_branches(struct lightrec_state *state,
954 struct block *block)
955{
956 struct opcode *op;
957 unsigned int i;
958 s32 offset;
959
960 for (i = 0; i < block->nb_ops; i++) {
961 op = &block->opcode_list[i];
962
963 switch (op->i.op) {
964 case OP_J:
965 /* Transform J opcode into BEQ $zero, $zero if possible. */
966 offset = (s32)((block->pc & 0xf0000000) >> 2 | op->j.imm)
967 - (s32)(block->pc >> 2) - (s32)i - 1;
968
969 if (offset == (s16)offset) {
970 pr_debug("Transform J into BEQ $zero, $zero\n");
971 op->i.op = OP_BEQ;
972 op->i.rs = 0;
973 op->i.rt = 0;
974 op->i.imm = offset;
975
976 }
ba3814c1
PC
977 fallthrough;
978 default:
03535202
PC
979 break;
980 }
981 }
982
983 return 0;
984}
985
ba3814c1
PC
986static inline bool is_power_of_two(u32 value)
987{
988 return popcount32(value) == 1;
989}
990
98fa08a5 991static int lightrec_transform_ops(struct lightrec_state *state, struct block *block)
d16005f8
PC
992{
993 struct opcode *list = block->opcode_list;
22eee2ac 994 struct opcode *prev, *op = NULL;
98fa08a5
PC
995 u32 known = BIT(0);
996 u32 values[32] = { 0 };
997 unsigned int i;
ba3814c1 998 u8 tmp;
d16005f8 999
98fa08a5 1000 for (i = 0; i < block->nb_ops; i++) {
22eee2ac 1001 prev = op;
98fa08a5 1002 op = &list[i];
d16005f8 1003
22eee2ac
PC
1004 if (prev)
1005 known = lightrec_propagate_consts(op, prev, known, values);
1006
d16005f8
PC
1007 /* Transform all opcodes detected as useless to real NOPs
1008 * (0x0: SLL r0, r0, #0) */
98fa08a5 1009 if (op->opcode != 0 && is_nop(op->c)) {
d16005f8 1010 pr_debug("Converting useless opcode 0x%08x to NOP\n",
98fa08a5
PC
1011 op->opcode);
1012 op->opcode = 0x0;
d16005f8
PC
1013 }
1014
98fa08a5 1015 if (!op->opcode)
d16005f8
PC
1016 continue;
1017
98fa08a5 1018 switch (op->i.op) {
d16005f8 1019 case OP_BEQ:
98fa08a5
PC
1020 if (op->i.rs == op->i.rt) {
1021 op->i.rs = 0;
1022 op->i.rt = 0;
1023 } else if (op->i.rs == 0) {
1024 op->i.rs = op->i.rt;
1025 op->i.rt = 0;
d16005f8
PC
1026 }
1027 break;
98fa08a5 1028
d16005f8 1029 case OP_BNE:
98fa08a5
PC
1030 if (op->i.rs == 0) {
1031 op->i.rs = op->i.rt;
1032 op->i.rt = 0;
1033 }
1034 break;
1035
1036 case OP_LUI:
d8b04acd
PC
1037 if (!prev || !has_delay_slot(prev->c))
1038 lightrec_modify_lui(block, i);
02487de7 1039 lightrec_remove_useless_lui(block, i, known, values);
d16005f8
PC
1040 break;
1041
1042 /* Transform ORI/ADDI/ADDIU with imm #0 or ORR/ADD/ADDU/SUB/SUBU
1043 * with register $zero to the MOV meta-opcode */
1044 case OP_ORI:
1045 case OP_ADDI:
1046 case OP_ADDIU:
98fa08a5 1047 if (op->i.imm == 0) {
d16005f8 1048 pr_debug("Convert ORI/ADDI/ADDIU #0 to MOV\n");
98fa08a5
PC
1049 op->i.op = OP_META_MOV;
1050 op->r.rd = op->i.rt;
d16005f8
PC
1051 }
1052 break;
1053 case OP_SPECIAL:
98fa08a5 1054 switch (op->r.op) {
d16005f8 1055 case OP_SPECIAL_SRA:
98fa08a5
PC
1056 if (op->r.imm == 0) {
1057 pr_debug("Convert SRA #0 to MOV\n");
1058 op->i.op = OP_META_MOV;
1059 op->r.rs = op->r.rt;
1060 break;
1061 }
1062
1063 lightrec_optimize_sll_sra(block->opcode_list, i);
1064 break;
1065 case OP_SPECIAL_SLL:
d16005f8 1066 case OP_SPECIAL_SRL:
98fa08a5
PC
1067 if (op->r.imm == 0) {
1068 pr_debug("Convert SLL/SRL #0 to MOV\n");
1069 op->i.op = OP_META_MOV;
1070 op->r.rs = op->r.rt;
d16005f8
PC
1071 }
1072 break;
ba3814c1
PC
1073 case OP_SPECIAL_MULT:
1074 case OP_SPECIAL_MULTU:
1075 if ((known & BIT(op->r.rs)) &&
1076 is_power_of_two(values[op->r.rs])) {
1077 tmp = op->c.i.rs;
1078 op->c.i.rs = op->c.i.rt;
1079 op->c.i.rt = tmp;
1080 } else if (!(known & BIT(op->r.rt)) ||
1081 !is_power_of_two(values[op->r.rt])) {
1082 break;
1083 }
1084
1085 pr_debug("Multiply by power-of-two: %u\n",
1086 values[op->r.rt]);
1087
1088 if (op->r.op == OP_SPECIAL_MULT)
1089 op->i.op = OP_META_MULT2;
1090 else
1091 op->i.op = OP_META_MULTU2;
1092
1093 op->r.op = ffs32(values[op->r.rt]);
1094 break;
d16005f8
PC
1095 case OP_SPECIAL_OR:
1096 case OP_SPECIAL_ADD:
1097 case OP_SPECIAL_ADDU:
98fa08a5 1098 if (op->r.rs == 0) {
d16005f8 1099 pr_debug("Convert OR/ADD $zero to MOV\n");
98fa08a5
PC
1100 op->i.op = OP_META_MOV;
1101 op->r.rs = op->r.rt;
d16005f8 1102 }
d8b04acd
PC
1103 fallthrough;
1104 case OP_SPECIAL_SUB:
d16005f8 1105 case OP_SPECIAL_SUBU:
98fa08a5 1106 if (op->r.rt == 0) {
d16005f8 1107 pr_debug("Convert OR/ADD/SUB $zero to MOV\n");
98fa08a5 1108 op->i.op = OP_META_MOV;
d16005f8 1109 }
d8b04acd
PC
1110 fallthrough;
1111 default:
d16005f8
PC
1112 break;
1113 }
d8b04acd
PC
1114 fallthrough;
1115 default:
d16005f8
PC
1116 break;
1117 }
1118 }
1119
1120 return 0;
1121}
1122
ba3814c1
PC
1123static bool lightrec_can_switch_delay_slot(union code op, union code next_op)
1124{
1125 switch (op.i.op) {
1126 case OP_SPECIAL:
1127 switch (op.r.op) {
1128 case OP_SPECIAL_JALR:
1129 if (opcode_reads_register(next_op, op.r.rd) ||
1130 opcode_writes_register(next_op, op.r.rd))
1131 return false;
1132 fallthrough;
1133 case OP_SPECIAL_JR:
1134 if (opcode_writes_register(next_op, op.r.rs))
1135 return false;
1136 fallthrough;
1137 default:
1138 break;
1139 }
1140 fallthrough;
1141 case OP_J:
1142 break;
1143 case OP_JAL:
1144 if (opcode_reads_register(next_op, 31) ||
1145 opcode_writes_register(next_op, 31))
1146 return false;;
1147
1148 break;
1149 case OP_BEQ:
1150 case OP_BNE:
1151 if (op.i.rt && opcode_writes_register(next_op, op.i.rt))
1152 return false;
1153 fallthrough;
1154 case OP_BLEZ:
1155 case OP_BGTZ:
1156 if (op.i.rs && opcode_writes_register(next_op, op.i.rs))
1157 return false;
1158 break;
1159 case OP_REGIMM:
1160 switch (op.r.rt) {
1161 case OP_REGIMM_BLTZAL:
1162 case OP_REGIMM_BGEZAL:
1163 if (opcode_reads_register(next_op, 31) ||
1164 opcode_writes_register(next_op, 31))
1165 return false;
1166 fallthrough;
1167 case OP_REGIMM_BLTZ:
1168 case OP_REGIMM_BGEZ:
1169 if (op.i.rs && opcode_writes_register(next_op, op.i.rs))
1170 return false;
1171 break;
1172 }
1173 fallthrough;
1174 default:
1175 break;
1176 }
1177
1178 return true;
1179}
1180
98fa08a5 1181static int lightrec_switch_delay_slots(struct lightrec_state *state, struct block *block)
d16005f8 1182{
98fa08a5
PC
1183 struct opcode *list, *next = &block->opcode_list[0];
1184 unsigned int i;
1185 union code op, next_op;
03535202 1186 u32 flags;
d16005f8 1187
98fa08a5
PC
1188 for (i = 0; i < block->nb_ops - 1; i++) {
1189 list = next;
1190 next = &block->opcode_list[i + 1];
1191 next_op = next->c;
1192 op = list->c;
d16005f8 1193
03535202
PC
1194 if (!has_delay_slot(op) || op_flag_no_ds(list->flags) ||
1195 op_flag_emulate_branch(list->flags) ||
98fa08a5
PC
1196 op.opcode == 0 || next_op.opcode == 0)
1197 continue;
1198
1199 if (i && has_delay_slot(block->opcode_list[i - 1].c) &&
03535202 1200 !op_flag_no_ds(block->opcode_list[i - 1].flags))
d16005f8
PC
1201 continue;
1202
ba3814c1 1203 if (op_flag_sync(next->flags))
d16005f8
PC
1204 continue;
1205
ba3814c1
PC
1206 if (!lightrec_can_switch_delay_slot(list->c, next_op))
1207 continue;
d16005f8
PC
1208
1209 pr_debug("Swap branch and delay slot opcodes "
98fa08a5
PC
1210 "at offsets 0x%x / 0x%x\n",
1211 i << 2, (i + 1) << 2);
d16005f8 1212
ba3814c1 1213 flags = next->flags | (list->flags & LIGHTREC_SYNC);
d16005f8 1214 list->c = next_op;
98fa08a5 1215 next->c = op;
ba3814c1 1216 next->flags = (list->flags | LIGHTREC_NO_DS) & ~LIGHTREC_SYNC;
a59e5536 1217 list->flags = flags | LIGHTREC_NO_DS;
d16005f8
PC
1218 }
1219
1220 return 0;
1221}
1222
98fa08a5
PC
1223static int shrink_opcode_list(struct lightrec_state *state, struct block *block, u16 new_size)
1224{
ba3814c1 1225 struct opcode_list *list, *old_list;
98fa08a5
PC
1226
1227 if (new_size >= block->nb_ops) {
1228 pr_err("Invalid shrink size (%u vs %u)\n",
1229 new_size, block->nb_ops);
1230 return -EINVAL;
1231 }
1232
98fa08a5 1233 list = lightrec_malloc(state, MEM_FOR_IR,
ba3814c1 1234 sizeof(*list) + sizeof(struct opcode) * new_size);
98fa08a5
PC
1235 if (!list) {
1236 pr_err("Unable to allocate memory\n");
1237 return -ENOMEM;
1238 }
1239
ba3814c1
PC
1240 old_list = container_of(block->opcode_list, struct opcode_list, ops);
1241 memcpy(list->ops, old_list->ops, sizeof(struct opcode) * new_size);
98fa08a5 1242
ba3814c1
PC
1243 lightrec_free_opcode_list(state, block->opcode_list);
1244 list->nb_ops = new_size;
98fa08a5 1245 block->nb_ops = new_size;
ba3814c1 1246 block->opcode_list = list->ops;
98fa08a5
PC
1247
1248 pr_debug("Shrunk opcode list of block PC 0x%08x to %u opcodes\n",
1249 block->pc, new_size);
1250
1251 return 0;
1252}
1253
1254static int lightrec_detect_impossible_branches(struct lightrec_state *state,
1255 struct block *block)
d16005f8 1256{
03535202 1257 struct opcode *op, *list = block->opcode_list, *next = &list[0];
98fa08a5
PC
1258 unsigned int i;
1259 int ret = 0;
03535202 1260 s16 offset;
98fa08a5
PC
1261
1262 for (i = 0; i < block->nb_ops - 1; i++) {
1263 op = next;
03535202 1264 next = &list[i + 1];
d16005f8 1265
d16005f8
PC
1266 if (!has_delay_slot(op->c) ||
1267 (!load_in_delay_slot(next->c) &&
1268 !has_delay_slot(next->c) &&
1269 !(next->i.op == OP_CP0 && next->r.rs == OP_CP0_RFE)))
1270 continue;
1271
1272 if (op->c.opcode == next->c.opcode) {
1273 /* The delay slot is the exact same opcode as the branch
1274 * opcode: this is effectively a NOP */
1275 next->c.opcode = 0;
1276 continue;
1277 }
1278
03535202
PC
1279 offset = i + 1 + (s16)op->i.imm;
1280 if (load_in_delay_slot(next->c) &&
1281 (offset >= 0 && offset < block->nb_ops) &&
1282 !opcode_reads_register(list[offset].c, next->c.i.rt)) {
1283 /* The 'impossible' branch is a local branch - we can
1284 * verify here that the first opcode of the target does
1285 * not use the target register of the delay slot */
1286
1287 pr_debug("Branch at offset 0x%x has load delay slot, "
1288 "but is local and dest opcode does not read "
1289 "dest register\n", i << 2);
1290 continue;
1291 }
1292
98fa08a5
PC
1293 op->flags |= LIGHTREC_EMULATE_BRANCH;
1294
03535202 1295 if (op == list) {
98fa08a5
PC
1296 pr_debug("First opcode of block PC 0x%08x is an impossible branch\n",
1297 block->pc);
1298
d16005f8
PC
1299 /* If the first opcode is an 'impossible' branch, we
1300 * only keep the first two opcodes of the block (the
1301 * branch itself + its delay slot) */
98fa08a5
PC
1302 if (block->nb_ops > 2)
1303 ret = shrink_opcode_list(state, block, 2);
1304 break;
d16005f8 1305 }
d16005f8
PC
1306 }
1307
98fa08a5 1308 return ret;
d16005f8
PC
1309}
1310
98fa08a5 1311static int lightrec_local_branches(struct lightrec_state *state, struct block *block)
d16005f8 1312{
98fa08a5
PC
1313 struct opcode *list;
1314 unsigned int i;
d16005f8 1315 s32 offset;
d16005f8 1316
98fa08a5
PC
1317 for (i = 0; i < block->nb_ops; i++) {
1318 list = &block->opcode_list[i];
1319
1320 if (should_emulate(list))
d16005f8
PC
1321 continue;
1322
1323 switch (list->i.op) {
1324 case OP_BEQ:
1325 case OP_BNE:
1326 case OP_BLEZ:
1327 case OP_BGTZ:
1328 case OP_REGIMM:
98fa08a5 1329 offset = i + 1 + (s16)list->i.imm;
d16005f8
PC
1330 if (offset >= 0 && offset < block->nb_ops)
1331 break;
d8b04acd
PC
1332 fallthrough;
1333 default:
d16005f8
PC
1334 continue;
1335 }
1336
1337 pr_debug("Found local branch to offset 0x%x\n", offset << 2);
1338
98fa08a5
PC
1339 if (should_emulate(&block->opcode_list[offset])) {
1340 pr_debug("Branch target must be emulated - skip\n");
1341 continue;
1342 }
d16005f8 1343
98fa08a5
PC
1344 if (offset && has_delay_slot(block->opcode_list[offset - 1].c)) {
1345 pr_debug("Branch target is a delay slot - skip\n");
1346 continue;
1347 }
d16005f8 1348
98fa08a5 1349 pr_debug("Adding sync at offset 0x%x\n", offset << 2);
d16005f8 1350
98fa08a5
PC
1351 block->opcode_list[offset].flags |= LIGHTREC_SYNC;
1352 list->flags |= LIGHTREC_LOCAL_BRANCH;
d16005f8
PC
1353 }
1354
1355 return 0;
1356}
1357
1358bool has_delay_slot(union code op)
1359{
1360 switch (op.i.op) {
1361 case OP_SPECIAL:
1362 switch (op.r.op) {
1363 case OP_SPECIAL_JR:
1364 case OP_SPECIAL_JALR:
1365 return true;
1366 default:
1367 return false;
1368 }
1369 case OP_J:
1370 case OP_JAL:
1371 case OP_BEQ:
1372 case OP_BNE:
1373 case OP_BLEZ:
1374 case OP_BGTZ:
1375 case OP_REGIMM:
d16005f8
PC
1376 return true;
1377 default:
1378 return false;
1379 }
1380}
1381
98fa08a5 1382bool should_emulate(const struct opcode *list)
d16005f8 1383{
03535202
PC
1384 return op_flag_emulate_branch(list->flags) && has_delay_slot(list->c);
1385}
1386
1387static bool op_writes_rd(union code c)
1388{
1389 switch (c.i.op) {
1390 case OP_SPECIAL:
1391 case OP_META_MOV:
1392 return true;
1393 default:
1394 return false;
1395 }
1396}
1397
1398static void lightrec_add_reg_op(struct opcode *op, u8 reg, u32 reg_op)
1399{
1400 if (op_writes_rd(op->c) && reg == op->r.rd)
1401 op->flags |= LIGHTREC_REG_RD(reg_op);
1402 else if (op->i.rs == reg)
1403 op->flags |= LIGHTREC_REG_RS(reg_op);
1404 else if (op->i.rt == reg)
1405 op->flags |= LIGHTREC_REG_RT(reg_op);
1406 else
1407 pr_debug("Cannot add unload/clean/discard flag: "
1408 "opcode does not touch register %s!\n",
1409 lightrec_reg_name(reg));
d16005f8
PC
1410}
1411
98fa08a5 1412static void lightrec_add_unload(struct opcode *op, u8 reg)
d16005f8 1413{
03535202
PC
1414 lightrec_add_reg_op(op, reg, LIGHTREC_REG_UNLOAD);
1415}
d16005f8 1416
03535202
PC
1417static void lightrec_add_discard(struct opcode *op, u8 reg)
1418{
1419 lightrec_add_reg_op(op, reg, LIGHTREC_REG_DISCARD);
1420}
1421
1422static void lightrec_add_clean(struct opcode *op, u8 reg)
1423{
1424 lightrec_add_reg_op(op, reg, LIGHTREC_REG_CLEAN);
1425}
1426
1427static void
1428lightrec_early_unload_sync(struct opcode *list, s16 *last_r, s16 *last_w)
1429{
1430 unsigned int reg;
1431 s16 offset;
1432
1433 for (reg = 0; reg < 34; reg++) {
1434 offset = s16_max(last_w[reg], last_r[reg]);
1435
1436 if (offset >= 0)
1437 lightrec_add_unload(&list[offset], reg);
1438 }
1439
1440 memset(last_r, 0xff, sizeof(*last_r) * 34);
1441 memset(last_w, 0xff, sizeof(*last_w) * 34);
98fa08a5 1442}
d16005f8 1443
98fa08a5
PC
1444static int lightrec_early_unload(struct lightrec_state *state, struct block *block)
1445{
03535202 1446 u16 i, offset;
98fa08a5 1447 struct opcode *op;
03535202
PC
1448 s16 last_r[34], last_w[34], last_sync = 0, next_sync = 0;
1449 u64 mask_r, mask_w, dirty = 0, loaded = 0;
98fa08a5 1450 u8 reg;
d16005f8 1451
03535202
PC
1452 memset(last_r, 0xff, sizeof(last_r));
1453 memset(last_w, 0xff, sizeof(last_w));
98fa08a5 1454
03535202
PC
1455 /*
1456 * Clean if:
1457 * - the register is dirty, and is read again after a branch opcode
1458 *
1459 * Unload if:
1460 * - the register is dirty or loaded, and is not read again
1461 * - the register is dirty or loaded, and is written again after a branch opcode
1462 * - the next opcode has the SYNC flag set
1463 *
1464 * Discard if:
1465 * - the register is dirty or loaded, and is written again
1466 */
98fa08a5 1467
03535202
PC
1468 for (i = 0; i < block->nb_ops; i++) {
1469 op = &block->opcode_list[i];
1470
1471 if (op_flag_sync(op->flags) || should_emulate(op)) {
1472 /* The next opcode has the SYNC flag set, or is a branch
1473 * that should be emulated: unload all registers. */
1474 lightrec_early_unload_sync(block->opcode_list, last_r, last_w);
1475 dirty = 0;
1476 loaded = 0;
d16005f8
PC
1477 }
1478
03535202
PC
1479 if (next_sync == i) {
1480 last_sync = i;
1481 pr_debug("Last sync: 0x%x\n", last_sync << 2);
1482 }
d16005f8 1483
03535202
PC
1484 if (has_delay_slot(op->c)) {
1485 next_sync = i + 1 + !op_flag_no_ds(op->flags);
1486 pr_debug("Next sync: 0x%x\n", next_sync << 2);
1487 }
d16005f8 1488
03535202
PC
1489 mask_r = opcode_read_mask(op->c);
1490 mask_w = opcode_write_mask(op->c);
98fa08a5 1491
03535202
PC
1492 for (reg = 0; reg < 34; reg++) {
1493 if (mask_r & BIT(reg)) {
1494 if (dirty & BIT(reg) && last_w[reg] < last_sync) {
1495 /* The register is dirty, and is read
1496 * again after a branch: clean it */
1497
1498 lightrec_add_clean(&block->opcode_list[last_w[reg]], reg);
1499 dirty &= ~BIT(reg);
1500 loaded |= BIT(reg);
1501 }
1502
1503 last_r[reg] = i;
1504 }
1505
1506 if (mask_w & BIT(reg)) {
1507 if ((dirty & BIT(reg) && last_w[reg] < last_sync) ||
1508 (loaded & BIT(reg) && last_r[reg] < last_sync)) {
1509 /* The register is dirty or loaded, and
1510 * is written again after a branch:
1511 * unload it */
1512
1513 offset = s16_max(last_w[reg], last_r[reg]);
1514 lightrec_add_unload(&block->opcode_list[offset], reg);
1515 dirty &= ~BIT(reg);
1516 loaded &= ~BIT(reg);
1517 } else if (!(mask_r & BIT(reg)) &&
1518 ((dirty & BIT(reg) && last_w[reg] > last_sync) ||
1519 (loaded & BIT(reg) && last_r[reg] > last_sync))) {
1520 /* The register is dirty or loaded, and
1521 * is written again: discard it */
1522
1523 offset = s16_max(last_w[reg], last_r[reg]);
1524 lightrec_add_discard(&block->opcode_list[offset], reg);
1525 dirty &= ~BIT(reg);
1526 loaded &= ~BIT(reg);
1527 }
1528
1529 last_w[reg] = i;
1530 }
98fa08a5 1531
03535202
PC
1532 }
1533
1534 dirty |= mask_w;
1535 loaded |= mask_r;
d16005f8
PC
1536 }
1537
03535202
PC
1538 /* Unload all registers that are dirty or loaded at the end of block. */
1539 lightrec_early_unload_sync(block->opcode_list, last_r, last_w);
1540
d16005f8
PC
1541 return 0;
1542}
1543
98fa08a5 1544static int lightrec_flag_io(struct lightrec_state *state, struct block *block)
d16005f8 1545{
02487de7
PC
1546 struct opcode *prev = NULL, *list = NULL;
1547 enum psx_map psx_map;
d16005f8
PC
1548 u32 known = BIT(0);
1549 u32 values[32] = { 0 };
98fa08a5 1550 unsigned int i;
02487de7 1551 u32 val, kunseg_val;
ba3814c1 1552 bool no_mask;
98fa08a5
PC
1553
1554 for (i = 0; i < block->nb_ops; i++) {
22eee2ac 1555 prev = list;
98fa08a5 1556 list = &block->opcode_list[i];
d16005f8 1557
22eee2ac
PC
1558 if (prev)
1559 known = lightrec_propagate_consts(list, prev, known, values);
1560
d16005f8
PC
1561 switch (list->i.op) {
1562 case OP_SB:
1563 case OP_SH:
1564 case OP_SW:
98fa08a5
PC
1565 if (OPT_FLAG_STORES) {
1566 /* Mark all store operations that target $sp or $gp
1567 * as not requiring code invalidation. This is based
1568 * on the heuristic that stores using one of these
1569 * registers as address will never hit a code page. */
1570 if (list->i.rs >= 28 && list->i.rs <= 29 &&
1571 !state->maps[PSX_MAP_KERNEL_USER_RAM].ops) {
1572 pr_debug("Flaging opcode 0x%08x as not "
1573 "requiring invalidation\n",
1574 list->opcode);
1575 list->flags |= LIGHTREC_NO_INVALIDATE;
03535202 1576 list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT);
98fa08a5
PC
1577 }
1578
1579 /* Detect writes whose destination address is inside the
1580 * current block, using constant propagation. When these
1581 * occur, we mark the blocks as not compilable. */
1582 if ((known & BIT(list->i.rs)) &&
1583 kunseg(values[list->i.rs]) >= kunseg(block->pc) &&
1584 kunseg(values[list->i.rs]) < (kunseg(block->pc) +
1585 block->nb_ops * 4)) {
1586 pr_debug("Self-modifying block detected\n");
ba3814c1 1587 block_set_flags(block, BLOCK_NEVER_COMPILE);
98fa08a5
PC
1588 list->flags |= LIGHTREC_SMC;
1589 }
1590 }
d8b04acd
PC
1591 fallthrough;
1592 case OP_SWL:
98fa08a5
PC
1593 case OP_SWR:
1594 case OP_SWC2:
1595 case OP_LB:
1596 case OP_LBU:
1597 case OP_LH:
1598 case OP_LHU:
1599 case OP_LW:
1600 case OP_LWL:
1601 case OP_LWR:
1602 case OP_LWC2:
1603 if (OPT_FLAG_IO && (known & BIT(list->i.rs))) {
22eee2ac 1604 val = values[list->i.rs] + (s16) list->i.imm;
02487de7
PC
1605 kunseg_val = kunseg(val);
1606 psx_map = lightrec_get_map_idx(state, kunseg_val);
1607
03535202 1608 list->flags &= ~LIGHTREC_IO_MASK;
ba3814c1 1609 no_mask = val == kunseg_val;
03535202 1610
02487de7
PC
1611 switch (psx_map) {
1612 case PSX_MAP_KERNEL_USER_RAM:
ba3814c1 1613 if (no_mask)
02487de7 1614 list->flags |= LIGHTREC_NO_MASK;
d8b04acd 1615 fallthrough;
02487de7
PC
1616 case PSX_MAP_MIRROR1:
1617 case PSX_MAP_MIRROR2:
1618 case PSX_MAP_MIRROR3:
22eee2ac
PC
1619 pr_debug("Flaging opcode %u as RAM access\n", i);
1620 list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_RAM);
ba3814c1
PC
1621 if (no_mask && state->mirrors_mapped)
1622 list->flags |= LIGHTREC_NO_MASK;
02487de7
PC
1623 break;
1624 case PSX_MAP_BIOS:
22eee2ac
PC
1625 pr_debug("Flaging opcode %u as BIOS access\n", i);
1626 list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_BIOS);
ba3814c1
PC
1627 if (no_mask)
1628 list->flags |= LIGHTREC_NO_MASK;
02487de7
PC
1629 break;
1630 case PSX_MAP_SCRATCH_PAD:
22eee2ac
PC
1631 pr_debug("Flaging opcode %u as scratchpad access\n", i);
1632 list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_SCRATCH);
ba3814c1
PC
1633 if (no_mask)
1634 list->flags |= LIGHTREC_NO_MASK;
02487de7
PC
1635
1636 /* Consider that we're never going to run code from
1637 * the scratchpad. */
1638 list->flags |= LIGHTREC_NO_INVALIDATE;
1639 break;
ba3814c1
PC
1640 case PSX_MAP_HW_REGISTERS:
1641 if (state->ops.hw_direct &&
1642 state->ops.hw_direct(kunseg_val,
1643 opcode_is_store(list->c),
1644 opcode_get_io_size(list->c))) {
1645 pr_debug("Flagging opcode %u as direct I/O access\n",
1646 i);
1647 list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT_HW);
1648 break;
1649 }
1650 fallthrough;
02487de7
PC
1651 default:
1652 pr_debug("Flagging opcode %u as I/O access\n",
1653 i);
1654 list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW);
1655 break;
98fa08a5 1656 }
d16005f8 1657 }
d8b04acd
PC
1658 fallthrough;
1659 default:
d16005f8
PC
1660 break;
1661 }
d16005f8
PC
1662 }
1663
1664 return 0;
1665}
1666
98fa08a5
PC
1667static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset,
1668 const struct opcode *last,
1669 u32 mask, bool sync, bool mflo, bool another)
d16005f8 1670{
98fa08a5
PC
1671 const struct opcode *op, *next = &block->opcode_list[offset];
1672 u32 old_mask;
1673 u8 reg2, reg = mflo ? REG_LO : REG_HI;
1674 u16 branch_offset;
1675 unsigned int i;
1676
1677 for (i = offset; i < block->nb_ops; i++) {
1678 op = next;
1679 next = &block->opcode_list[i + 1];
1680 old_mask = mask;
1681
1682 /* If any other opcode writes or reads to the register
1683 * we'd use, then we cannot use it anymore. */
1684 mask |= opcode_read_mask(op->c);
1685 mask |= opcode_write_mask(op->c);
1686
03535202 1687 if (op_flag_sync(op->flags))
98fa08a5 1688 sync = true;
d16005f8 1689
d16005f8
PC
1690 switch (op->i.op) {
1691 case OP_BEQ:
1692 case OP_BNE:
1693 case OP_BLEZ:
1694 case OP_BGTZ:
1695 case OP_REGIMM:
d16005f8 1696 /* TODO: handle backwards branches too */
03535202 1697 if (!last && op_flag_local_branch(op->flags) &&
d16005f8 1698 (s16)op->c.i.imm >= 0) {
98fa08a5 1699 branch_offset = i + 1 + (s16)op->c.i.imm
03535202 1700 - !!op_flag_no_ds(op->flags);
98fa08a5
PC
1701
1702 reg = get_mfhi_mflo_reg(block, branch_offset, NULL,
1703 mask, sync, mflo, false);
1704 reg2 = get_mfhi_mflo_reg(block, offset + 1, next,
1705 mask, sync, mflo, false);
1706 if (reg > 0 && reg == reg2)
1707 return reg;
1708 if (!reg && !reg2)
1709 return 0;
d16005f8 1710 }
98fa08a5
PC
1711
1712 return mflo ? REG_LO : REG_HI;
ba3814c1
PC
1713 case OP_META_MULT2:
1714 case OP_META_MULTU2:
1715 return 0;
d16005f8
PC
1716 case OP_SPECIAL:
1717 switch (op->r.op) {
1718 case OP_SPECIAL_MULT:
1719 case OP_SPECIAL_MULTU:
1720 case OP_SPECIAL_DIV:
1721 case OP_SPECIAL_DIVU:
98fa08a5 1722 return 0;
d16005f8 1723 case OP_SPECIAL_MTHI:
98fa08a5
PC
1724 if (!mflo)
1725 return 0;
1726 continue;
1727 case OP_SPECIAL_MTLO:
1728 if (mflo)
1729 return 0;
1730 continue;
d16005f8 1731 case OP_SPECIAL_JR:
98fa08a5
PC
1732 if (op->r.rs != 31)
1733 return reg;
1734
03535202 1735 if (!sync && !op_flag_no_ds(op->flags) &&
98fa08a5
PC
1736 (next->i.op == OP_SPECIAL) &&
1737 ((!mflo && next->r.op == OP_SPECIAL_MFHI) ||
1738 (mflo && next->r.op == OP_SPECIAL_MFLO)))
1739 return next->r.rd;
1740
1741 return 0;
d16005f8 1742 case OP_SPECIAL_JALR:
98fa08a5 1743 return reg;
d16005f8 1744 case OP_SPECIAL_MFHI:
98fa08a5
PC
1745 if (!mflo) {
1746 if (another)
1747 return op->r.rd;
1748 /* Must use REG_HI if there is another MFHI target*/
1749 reg2 = get_mfhi_mflo_reg(block, i + 1, next,
1750 0, sync, mflo, true);
1751 if (reg2 > 0 && reg2 != REG_HI)
1752 return REG_HI;
1753
1754 if (!sync && !(old_mask & BIT(op->r.rd)))
1755 return op->r.rd;
1756 else
1757 return REG_HI;
1758 }
1759 continue;
1760 case OP_SPECIAL_MFLO:
1761 if (mflo) {
1762 if (another)
1763 return op->r.rd;
1764 /* Must use REG_LO if there is another MFLO target*/
1765 reg2 = get_mfhi_mflo_reg(block, i + 1, next,
1766 0, sync, mflo, true);
1767 if (reg2 > 0 && reg2 != REG_LO)
1768 return REG_LO;
1769
1770 if (!sync && !(old_mask & BIT(op->r.rd)))
1771 return op->r.rd;
1772 else
1773 return REG_LO;
1774 }
d16005f8 1775 continue;
98fa08a5
PC
1776 default:
1777 break;
d16005f8 1778 }
98fa08a5 1779
d8b04acd 1780 fallthrough;
d16005f8
PC
1781 default:
1782 continue;
1783 }
1784 }
1785
98fa08a5
PC
1786 return reg;
1787}
1788
1789static void lightrec_replace_lo_hi(struct block *block, u16 offset,
1790 u16 last, bool lo)
1791{
1792 unsigned int i;
1793 u32 branch_offset;
1794
1795 /* This function will remove the following MFLO/MFHI. It must be called
1796 * only if get_mfhi_mflo_reg() returned a non-zero value. */
1797
1798 for (i = offset; i < last; i++) {
1799 struct opcode *op = &block->opcode_list[i];
1800
1801 switch (op->i.op) {
1802 case OP_BEQ:
1803 case OP_BNE:
1804 case OP_BLEZ:
1805 case OP_BGTZ:
1806 case OP_REGIMM:
1807 /* TODO: handle backwards branches too */
03535202 1808 if (op_flag_local_branch(op->flags) && (s16)op->c.i.imm >= 0) {
98fa08a5 1809 branch_offset = i + 1 + (s16)op->c.i.imm
03535202 1810 - !!op_flag_no_ds(op->flags);
98fa08a5
PC
1811
1812 lightrec_replace_lo_hi(block, branch_offset, last, lo);
1813 lightrec_replace_lo_hi(block, i + 1, branch_offset, lo);
1814 }
1815 break;
1816
1817 case OP_SPECIAL:
1818 if (lo && op->r.op == OP_SPECIAL_MFLO) {
1819 pr_debug("Removing MFLO opcode at offset 0x%x\n",
1820 i << 2);
1821 op->opcode = 0;
1822 return;
1823 } else if (!lo && op->r.op == OP_SPECIAL_MFHI) {
1824 pr_debug("Removing MFHI opcode at offset 0x%x\n",
1825 i << 2);
1826 op->opcode = 0;
1827 return;
1828 }
1829
d8b04acd 1830 fallthrough;
98fa08a5
PC
1831 default:
1832 break;
1833 }
1834 }
d16005f8
PC
1835}
1836
fd58fa32
PC
1837static bool lightrec_always_skip_div_check(void)
1838{
1839#ifdef __mips__
1840 return true;
1841#else
1842 return false;
1843#endif
1844}
1845
98fa08a5 1846static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *block)
d16005f8 1847{
22eee2ac 1848 struct opcode *prev, *list = NULL;
98fa08a5
PC
1849 u8 reg_hi, reg_lo;
1850 unsigned int i;
fd58fa32
PC
1851 u32 known = BIT(0);
1852 u32 values[32] = { 0 };
98fa08a5
PC
1853
1854 for (i = 0; i < block->nb_ops - 1; i++) {
22eee2ac 1855 prev = list;
98fa08a5 1856 list = &block->opcode_list[i];
d16005f8 1857
22eee2ac
PC
1858 if (prev)
1859 known = lightrec_propagate_consts(list, prev, known, values);
1860
ba3814c1
PC
1861 switch (list->i.op) {
1862 case OP_SPECIAL:
1863 switch (list->r.op) {
1864 case OP_SPECIAL_DIV:
1865 case OP_SPECIAL_DIVU:
1866 /* If we are dividing by a non-zero constant, don't
1867 * emit the div-by-zero check. */
1868 if (lightrec_always_skip_div_check() ||
1869 ((known & BIT(list->c.r.rt)) && values[list->c.r.rt]))
1870 list->flags |= LIGHTREC_NO_DIV_CHECK;
1871 fallthrough;
1872 case OP_SPECIAL_MULT:
1873 case OP_SPECIAL_MULTU:
1874 break;
1875 default:
1876 continue;
1877 }
d8b04acd 1878 fallthrough;
ba3814c1
PC
1879 case OP_META_MULT2:
1880 case OP_META_MULTU2:
d16005f8
PC
1881 break;
1882 default:
1883 continue;
1884 }
1885
98fa08a5
PC
1886 /* Don't support opcodes in delay slots */
1887 if ((i && has_delay_slot(block->opcode_list[i - 1].c)) ||
03535202 1888 op_flag_no_ds(list->flags)) {
d16005f8 1889 continue;
fd58fa32 1890 }
d16005f8 1891
98fa08a5
PC
1892 reg_lo = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, true, false);
1893 if (reg_lo == 0) {
1894 pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as"
1895 " not writing LO\n", i << 2);
1896 list->flags |= LIGHTREC_NO_LO;
1897 }
1898
1899 reg_hi = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, false, false);
1900 if (reg_hi == 0) {
1901 pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as"
1902 " not writing HI\n", i << 2);
1903 list->flags |= LIGHTREC_NO_HI;
1904 }
1905
1906 if (!reg_lo && !reg_hi) {
1907 pr_debug("Both LO/HI unused in this block, they will "
1908 "probably be used in parent block - removing "
1909 "flags.\n");
1910 list->flags &= ~(LIGHTREC_NO_LO | LIGHTREC_NO_HI);
1911 }
1912
1913 if (reg_lo > 0 && reg_lo != REG_LO) {
1914 pr_debug("Found register %s to hold LO (rs = %u, rt = %u)\n",
1915 lightrec_reg_name(reg_lo), list->r.rs, list->r.rt);
1916
1917 lightrec_replace_lo_hi(block, i + 1, block->nb_ops, true);
1918 list->r.rd = reg_lo;
1919 } else {
1920 list->r.rd = 0;
1921 }
1922
1923 if (reg_hi > 0 && reg_hi != REG_HI) {
1924 pr_debug("Found register %s to hold HI (rs = %u, rt = %u)\n",
1925 lightrec_reg_name(reg_hi), list->r.rs, list->r.rt);
1926
1927 lightrec_replace_lo_hi(block, i + 1, block->nb_ops, false);
1928 list->r.imm = reg_hi;
1929 } else {
1930 list->r.imm = 0;
1931 }
1932 }
1933
1934 return 0;
1935}
1936
1937static bool remove_div_sequence(struct block *block, unsigned int offset)
1938{
1939 struct opcode *op;
1940 unsigned int i, found = 0;
1941
1942 /*
1943 * Scan for the zero-checking sequence that GCC automatically introduced
1944 * after most DIV/DIVU opcodes. This sequence checks the value of the
1945 * divisor, and if zero, executes a BREAK opcode, causing the BIOS
1946 * handler to crash the PS1.
1947 *
1948 * For DIV opcodes, this sequence additionally checks that the signed
1949 * operation does not overflow.
1950 *
1951 * With the assumption that the games never crashed the PS1, we can
1952 * therefore assume that the games never divided by zero or overflowed,
1953 * and these sequences can be removed.
1954 */
1955
1956 for (i = offset; i < block->nb_ops; i++) {
1957 op = &block->opcode_list[i];
1958
1959 if (!found) {
1960 if (op->i.op == OP_SPECIAL &&
1961 (op->r.op == OP_SPECIAL_DIV || op->r.op == OP_SPECIAL_DIVU))
1962 break;
1963
1964 if ((op->opcode & 0xfc1fffff) == 0x14000002) {
1965 /* BNE ???, zero, +8 */
1966 found++;
1967 } else {
1968 offset++;
1969 }
1970 } else if (found == 1 && !op->opcode) {
1971 /* NOP */
1972 found++;
1973 } else if (found == 2 && op->opcode == 0x0007000d) {
1974 /* BREAK 0x1c00 */
1975 found++;
1976 } else if (found == 3 && op->opcode == 0x2401ffff) {
1977 /* LI at, -1 */
1978 found++;
1979 } else if (found == 4 && (op->opcode & 0xfc1fffff) == 0x14010004) {
1980 /* BNE ???, at, +16 */
1981 found++;
1982 } else if (found == 5 && op->opcode == 0x3c018000) {
1983 /* LUI at, 0x8000 */
1984 found++;
1985 } else if (found == 6 && (op->opcode & 0x141fffff) == 0x14010002) {
1986 /* BNE ???, at, +16 */
1987 found++;
1988 } else if (found == 7 && !op->opcode) {
1989 /* NOP */
1990 found++;
1991 } else if (found == 8 && op->opcode == 0x0006000d) {
1992 /* BREAK 0x1800 */
1993 found++;
1994 break;
1995 } else {
1996 break;
1997 }
1998 }
1999
2000 if (found >= 3) {
2001 if (found != 9)
2002 found = 3;
2003
2004 pr_debug("Removing DIV%s sequence at offset 0x%x\n",
2005 found == 9 ? "" : "U", offset << 2);
2006
2007 for (i = 0; i < found; i++)
2008 block->opcode_list[offset + i].opcode = 0;
2009
2010 return true;
2011 }
2012
2013 return false;
2014}
2015
2016static int lightrec_remove_div_by_zero_check_sequence(struct lightrec_state *state,
2017 struct block *block)
2018{
2019 struct opcode *op;
2020 unsigned int i;
2021
2022 for (i = 0; i < block->nb_ops; i++) {
2023 op = &block->opcode_list[i];
2024
2025 if (op->i.op == OP_SPECIAL &&
2026 (op->r.op == OP_SPECIAL_DIVU || op->r.op == OP_SPECIAL_DIV) &&
2027 remove_div_sequence(block, i + 1))
2028 op->flags |= LIGHTREC_NO_DIV_CHECK;
2029 }
2030
2031 return 0;
2032}
2033
2034static const u32 memset_code[] = {
2035 0x10a00006, // beqz a1, 2f
2036 0x24a2ffff, // addiu v0,a1,-1
2037 0x2403ffff, // li v1,-1
2038 0xac800000, // 1: sw zero,0(a0)
2039 0x2442ffff, // addiu v0,v0,-1
2040 0x1443fffd, // bne v0,v1, 1b
2041 0x24840004, // addiu a0,a0,4
2042 0x03e00008, // 2: jr ra
2043 0x00000000, // nop
2044};
2045
2046static int lightrec_replace_memset(struct lightrec_state *state, struct block *block)
2047{
2048 unsigned int i;
2049 union code c;
2050
2051 for (i = 0; i < block->nb_ops; i++) {
2052 c = block->opcode_list[i].c;
2053
2054 if (c.opcode != memset_code[i])
2055 return 0;
2056
2057 if (i == ARRAY_SIZE(memset_code) - 1) {
2058 /* success! */
2059 pr_debug("Block at PC 0x%x is a memset\n", block->pc);
ba3814c1
PC
2060 block_set_flags(block,
2061 BLOCK_IS_MEMSET | BLOCK_NEVER_COMPILE);
98fa08a5
PC
2062
2063 /* Return non-zero to skip other optimizers. */
2064 return 1;
d16005f8
PC
2065 }
2066 }
2067
2068 return 0;
2069}
2070
98fa08a5
PC
2071static int (*lightrec_optimizers[])(struct lightrec_state *state, struct block *) = {
2072 IF_OPT(OPT_REMOVE_DIV_BY_ZERO_SEQ, &lightrec_remove_div_by_zero_check_sequence),
2073 IF_OPT(OPT_REPLACE_MEMSET, &lightrec_replace_memset),
2074 IF_OPT(OPT_DETECT_IMPOSSIBLE_BRANCHES, &lightrec_detect_impossible_branches),
03535202 2075 IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_branches),
98fa08a5
PC
2076 IF_OPT(OPT_LOCAL_BRANCHES, &lightrec_local_branches),
2077 IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_ops),
2078 IF_OPT(OPT_SWITCH_DELAY_SLOTS, &lightrec_switch_delay_slots),
2079 IF_OPT(OPT_FLAG_IO || OPT_FLAG_STORES, &lightrec_flag_io),
2080 IF_OPT(OPT_FLAG_MULT_DIV, &lightrec_flag_mults_divs),
2081 IF_OPT(OPT_EARLY_UNLOAD, &lightrec_early_unload),
d16005f8
PC
2082};
2083
98fa08a5 2084int lightrec_optimize(struct lightrec_state *state, struct block *block)
d16005f8
PC
2085{
2086 unsigned int i;
98fa08a5 2087 int ret;
d16005f8
PC
2088
2089 for (i = 0; i < ARRAY_SIZE(lightrec_optimizers); i++) {
98fa08a5
PC
2090 if (lightrec_optimizers[i]) {
2091 ret = (*lightrec_optimizers[i])(state, block);
2092 if (ret)
2093 return ret;
2094 }
d16005f8
PC
2095 }
2096
2097 return 0;
2098}