lightrec: hack to fix a wrong assumption
[pcsx_rearmed.git] / deps / lightrec / optimizer.c
1 // SPDX-License-Identifier: LGPL-2.1-or-later
2 /*
3  * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
4  */
5
6 #include "constprop.h"
7 #include "lightrec-config.h"
8 #include "disassembler.h"
9 #include "lightrec.h"
10 #include "memmanager.h"
11 #include "optimizer.h"
12 #include "regcache.h"
13
14 #include <errno.h>
15 #include <stdbool.h>
16 #include <stdlib.h>
17 #include <string.h>
18
19 #define IF_OPT(opt, ptr) ((opt) ? (ptr) : NULL)
20
21 struct optimizer_list {
22         void (**optimizers)(struct opcode *);
23         unsigned int nb_optimizers;
24 };
25
26 static bool is_nop(union code op);
27
28 bool is_unconditional_jump(union code c)
29 {
30         switch (c.i.op) {
31         case OP_SPECIAL:
32                 return c.r.op == OP_SPECIAL_JR || c.r.op == OP_SPECIAL_JALR;
33         case OP_J:
34         case OP_JAL:
35                 return true;
36         case OP_BEQ:
37         case OP_BLEZ:
38                 return c.i.rs == c.i.rt;
39         case OP_REGIMM:
40                 return (c.r.rt == OP_REGIMM_BGEZ ||
41                         c.r.rt == OP_REGIMM_BGEZAL) && c.i.rs == 0;
42         default:
43                 return false;
44         }
45 }
46
47 bool is_syscall(union code c)
48 {
49         return (c.i.op == OP_SPECIAL && c.r.op == OP_SPECIAL_SYSCALL) ||
50                 (c.i.op == OP_CP0 && (c.r.rs == OP_CP0_MTC0 ||
51                                         c.r.rs == OP_CP0_CTC0) &&
52                  (c.r.rd == 12 || c.r.rd == 13));
53 }
54
55 static u64 opcode_read_mask(union code op)
56 {
57         switch (op.i.op) {
58         case OP_SPECIAL:
59                 switch (op.r.op) {
60                 case OP_SPECIAL_SYSCALL:
61                 case OP_SPECIAL_BREAK:
62                         return 0;
63                 case OP_SPECIAL_JR:
64                 case OP_SPECIAL_JALR:
65                 case OP_SPECIAL_MTHI:
66                 case OP_SPECIAL_MTLO:
67                         return BIT(op.r.rs);
68                 case OP_SPECIAL_MFHI:
69                         return BIT(REG_HI);
70                 case OP_SPECIAL_MFLO:
71                         return BIT(REG_LO);
72                 case OP_SPECIAL_SLL:
73                         if (!op.r.imm)
74                                 return 0;
75                         fallthrough;
76                 case OP_SPECIAL_SRL:
77                 case OP_SPECIAL_SRA:
78                         return BIT(op.r.rt);
79                 default:
80                         return BIT(op.r.rs) | BIT(op.r.rt);
81                 }
82         case OP_CP0:
83                 switch (op.r.rs) {
84                 case OP_CP0_MTC0:
85                 case OP_CP0_CTC0:
86                         return BIT(op.r.rt);
87                 default:
88                         return 0;
89                 }
90         case OP_CP2:
91                 if (op.r.op == OP_CP2_BASIC) {
92                         switch (op.r.rs) {
93                         case OP_CP2_BASIC_MTC2:
94                         case OP_CP2_BASIC_CTC2:
95                                 return BIT(op.r.rt);
96                         default:
97                                 break;
98                         }
99                 }
100                 return 0;
101         case OP_J:
102         case OP_JAL:
103         case OP_LUI:
104                 return 0;
105         case OP_BEQ:
106                 if (op.i.rs == op.i.rt)
107                         return 0;
108                 fallthrough;
109         case OP_BNE:
110         case OP_LWL:
111         case OP_LWR:
112         case OP_SB:
113         case OP_SH:
114         case OP_SWL:
115         case OP_SW:
116         case OP_SWR:
117                 return BIT(op.i.rs) | BIT(op.i.rt);
118         default:
119                 return BIT(op.i.rs);
120         }
121 }
122
123 static u64 mult_div_write_mask(union code op)
124 {
125         u64 flags;
126
127         if (!OPT_FLAG_MULT_DIV)
128                 return BIT(REG_LO) | BIT(REG_HI);
129
130         if (op.r.rd)
131                 flags = BIT(op.r.rd);
132         else
133                 flags = BIT(REG_LO);
134         if (op.r.imm)
135                 flags |= BIT(op.r.imm);
136         else
137                 flags |= BIT(REG_HI);
138
139         return flags;
140 }
141
142 static u64 opcode_write_mask(union code op)
143 {
144         switch (op.i.op) {
145         case OP_META_MULT2:
146         case OP_META_MULTU2:
147                 return mult_div_write_mask(op);
148         case OP_SPECIAL:
149                 switch (op.r.op) {
150                 case OP_SPECIAL_JR:
151                 case OP_SPECIAL_SYSCALL:
152                 case OP_SPECIAL_BREAK:
153                         return 0;
154                 case OP_SPECIAL_MULT:
155                 case OP_SPECIAL_MULTU:
156                 case OP_SPECIAL_DIV:
157                 case OP_SPECIAL_DIVU:
158                         return mult_div_write_mask(op);
159                 case OP_SPECIAL_MTHI:
160                         return BIT(REG_HI);
161                 case OP_SPECIAL_MTLO:
162                         return BIT(REG_LO);
163                 case OP_SPECIAL_SLL:
164                         if (!op.r.imm)
165                                 return 0;
166                         fallthrough;
167                 default:
168                         return BIT(op.r.rd);
169                 }
170         case OP_ADDI:
171         case OP_ADDIU:
172         case OP_SLTI:
173         case OP_SLTIU:
174         case OP_ANDI:
175         case OP_ORI:
176         case OP_XORI:
177         case OP_LUI:
178         case OP_LB:
179         case OP_LH:
180         case OP_LWL:
181         case OP_LW:
182         case OP_LBU:
183         case OP_LHU:
184         case OP_LWR:
185         case OP_META_EXTC:
186         case OP_META_EXTS:
187                 return BIT(op.i.rt);
188         case OP_JAL:
189                 return BIT(31);
190         case OP_CP0:
191                 switch (op.r.rs) {
192                 case OP_CP0_MFC0:
193                 case OP_CP0_CFC0:
194                         return BIT(op.i.rt);
195                 default:
196                         return 0;
197                 }
198         case OP_CP2:
199                 if (op.r.op == OP_CP2_BASIC) {
200                         switch (op.r.rs) {
201                         case OP_CP2_BASIC_MFC2:
202                         case OP_CP2_BASIC_CFC2:
203                                 return BIT(op.i.rt);
204                         default:
205                                 break;
206                         }
207                 }
208                 return 0;
209         case OP_REGIMM:
210                 switch (op.r.rt) {
211                 case OP_REGIMM_BLTZAL:
212                 case OP_REGIMM_BGEZAL:
213                         return BIT(31);
214                 default:
215                         return 0;
216                 }
217         case OP_META_MOV:
218                 return BIT(op.r.rd);
219         default:
220                 return 0;
221         }
222 }
223
224 bool opcode_reads_register(union code op, u8 reg)
225 {
226         return opcode_read_mask(op) & BIT(reg);
227 }
228
229 bool opcode_writes_register(union code op, u8 reg)
230 {
231         return opcode_write_mask(op) & BIT(reg);
232 }
233
234 static int find_prev_writer(const struct opcode *list, unsigned int offset, u8 reg)
235 {
236         union code c;
237         unsigned int i;
238
239         if (op_flag_sync(list[offset].flags))
240                 return -1;
241
242         for (i = offset; i > 0; i--) {
243                 c = list[i - 1].c;
244
245                 if (opcode_writes_register(c, reg)) {
246                         if (i > 1 && has_delay_slot(list[i - 2].c))
247                                 break;
248
249                         return i - 1;
250                 }
251
252                 if (op_flag_sync(list[i - 1].flags) ||
253                     has_delay_slot(c) ||
254                     opcode_reads_register(c, reg))
255                         break;
256         }
257
258         return -1;
259 }
260
261 static int find_next_reader(const struct opcode *list, unsigned int offset, u8 reg)
262 {
263         unsigned int i;
264         union code c;
265
266         if (op_flag_sync(list[offset].flags))
267                 return -1;
268
269         for (i = offset; ; i++) {
270                 c = list[i].c;
271
272                 if (opcode_reads_register(c, reg))
273                         return i;
274
275                 if (op_flag_sync(list[i].flags)
276                     || (op_flag_no_ds(list[i].flags) && has_delay_slot(c))
277                     || is_delay_slot(list, i)
278                     || opcode_writes_register(c, reg))
279                         break;
280         }
281
282         return -1;
283 }
284
285 static bool reg_is_dead(const struct opcode *list, unsigned int offset, u8 reg)
286 {
287         unsigned int i;
288
289         if (op_flag_sync(list[offset].flags) || is_delay_slot(list, offset))
290                 return false;
291
292         for (i = offset + 1; ; i++) {
293                 if (opcode_reads_register(list[i].c, reg))
294                         return false;
295
296                 if (opcode_writes_register(list[i].c, reg))
297                         return true;
298
299                 if (has_delay_slot(list[i].c)) {
300                         if (op_flag_no_ds(list[i].flags) ||
301                             opcode_reads_register(list[i + 1].c, reg))
302                                 return false;
303
304                         return opcode_writes_register(list[i + 1].c, reg);
305                 }
306         }
307 }
308
309 static bool reg_is_read(const struct opcode *list,
310                         unsigned int a, unsigned int b, u8 reg)
311 {
312         /* Return true if reg is read in one of the opcodes of the interval
313          * [a, b[ */
314         for (; a < b; a++) {
315                 if (!is_nop(list[a].c) && opcode_reads_register(list[a].c, reg))
316                         return true;
317         }
318
319         return false;
320 }
321
322 static bool reg_is_written(const struct opcode *list,
323                            unsigned int a, unsigned int b, u8 reg)
324 {
325         /* Return true if reg is written in one of the opcodes of the interval
326          * [a, b[ */
327
328         for (; a < b; a++) {
329                 if (!is_nop(list[a].c) && opcode_writes_register(list[a].c, reg))
330                         return true;
331         }
332
333         return false;
334 }
335
336 static bool reg_is_read_or_written(const struct opcode *list,
337                                    unsigned int a, unsigned int b, u8 reg)
338 {
339         return reg_is_read(list, a, b, reg) || reg_is_written(list, a, b, reg);
340 }
341
342 static bool opcode_is_load(union code op)
343 {
344         switch (op.i.op) {
345         case OP_LB:
346         case OP_LH:
347         case OP_LWL:
348         case OP_LW:
349         case OP_LBU:
350         case OP_LHU:
351         case OP_LWR:
352         case OP_LWC2:
353                 return true;
354         default:
355                 return false;
356         }
357 }
358
359 static bool opcode_is_store(union code op)
360 {
361         switch (op.i.op) {
362         case OP_SB:
363         case OP_SH:
364         case OP_SW:
365         case OP_SWL:
366         case OP_SWR:
367         case OP_SWC2:
368                 return true;
369         default:
370                 return false;
371         }
372 }
373
374 static u8 opcode_get_io_size(union code op)
375 {
376         switch (op.i.op) {
377         case OP_LB:
378         case OP_LBU:
379         case OP_SB:
380                 return 8;
381         case OP_LH:
382         case OP_LHU:
383         case OP_SH:
384                 return 16;
385         default:
386                 return 32;
387         }
388 }
389
390 bool opcode_is_io(union code op)
391 {
392         return opcode_is_load(op) || opcode_is_store(op);
393 }
394
395 /* TODO: Complete */
396 static bool is_nop(union code op)
397 {
398         if (opcode_writes_register(op, 0)) {
399                 switch (op.i.op) {
400                 case OP_CP0:
401                         return op.r.rs != OP_CP0_MFC0;
402                 case OP_LB:
403                 case OP_LH:
404                 case OP_LWL:
405                 case OP_LW:
406                 case OP_LBU:
407                 case OP_LHU:
408                 case OP_LWR:
409                         return false;
410                 default:
411                         return true;
412                 }
413         }
414
415         switch (op.i.op) {
416         case OP_SPECIAL:
417                 switch (op.r.op) {
418                 case OP_SPECIAL_AND:
419                         return op.r.rd == op.r.rt && op.r.rd == op.r.rs;
420                 case OP_SPECIAL_ADD:
421                 case OP_SPECIAL_ADDU:
422                         return (op.r.rd == op.r.rt && op.r.rs == 0) ||
423                                 (op.r.rd == op.r.rs && op.r.rt == 0);
424                 case OP_SPECIAL_SUB:
425                 case OP_SPECIAL_SUBU:
426                         return op.r.rd == op.r.rs && op.r.rt == 0;
427                 case OP_SPECIAL_OR:
428                         if (op.r.rd == op.r.rt)
429                                 return op.r.rd == op.r.rs || op.r.rs == 0;
430                         else
431                                 return (op.r.rd == op.r.rs) && op.r.rt == 0;
432                 case OP_SPECIAL_SLL:
433                 case OP_SPECIAL_SRA:
434                 case OP_SPECIAL_SRL:
435                         return op.r.rd == op.r.rt && op.r.imm == 0;
436                 case OP_SPECIAL_MFHI:
437                 case OP_SPECIAL_MFLO:
438                         return op.r.rd == 0;
439                 default:
440                         return false;
441                 }
442         case OP_ORI:
443         case OP_ADDI:
444         case OP_ADDIU:
445                 return op.i.rt == op.i.rs && op.i.imm == 0;
446         case OP_BGTZ:
447                 return (op.i.rs == 0 || op.i.imm == 1);
448         case OP_REGIMM:
449                 return (op.i.op == OP_REGIMM_BLTZ ||
450                                 op.i.op == OP_REGIMM_BLTZAL) &&
451                         (op.i.rs == 0 || op.i.imm == 1);
452         case OP_BNE:
453                 return (op.i.rs == op.i.rt || op.i.imm == 1);
454         default:
455                 return false;
456         }
457 }
458
459 bool load_in_delay_slot(union code op)
460 {
461         switch (op.i.op) {
462         case OP_CP0:
463                 switch (op.r.rs) {
464                 case OP_CP0_MFC0:
465                 case OP_CP0_CFC0:
466                         return true;
467                 default:
468                         break;
469                 }
470
471                 break;
472         case OP_CP2:
473                 if (op.r.op == OP_CP2_BASIC) {
474                         switch (op.r.rs) {
475                         case OP_CP2_BASIC_MFC2:
476                         case OP_CP2_BASIC_CFC2:
477                                 return true;
478                         default:
479                                 break;
480                         }
481                 }
482
483                 break;
484         case OP_LB:
485         case OP_LH:
486         case OP_LW:
487         case OP_LWL:
488         case OP_LWR:
489         case OP_LBU:
490         case OP_LHU:
491                 return true;
492         default:
493                 break;
494         }
495
496         return false;
497 }
498
499 static void lightrec_optimize_sll_sra(struct opcode *list, unsigned int offset,
500                                       struct constprop_data *v)
501 {
502         struct opcode *ldop = NULL, *curr = &list[offset], *next;
503         struct opcode *to_change, *to_nop;
504         int idx, idx2;
505
506         if (curr->r.imm != 24 && curr->r.imm != 16)
507                 return;
508
509         if (is_delay_slot(list, offset))
510                 return;
511
512         idx = find_next_reader(list, offset + 1, curr->r.rd);
513         if (idx < 0)
514                 return;
515
516         next = &list[idx];
517
518         if (next->i.op != OP_SPECIAL || next->r.op != OP_SPECIAL_SRA ||
519             next->r.imm != curr->r.imm || next->r.rt != curr->r.rd)
520                 return;
521
522         if (curr->r.rd != curr->r.rt && next->r.rd != next->r.rt) {
523                 /* sll rY, rX, 16
524                  * ...
525                  * sra rZ, rY, 16 */
526
527                 if (!reg_is_dead(list, idx, curr->r.rd) ||
528                     reg_is_read_or_written(list, offset, idx, next->r.rd))
529                         return;
530
531                 /* If rY is dead after the SRL, and rZ is not used after the SLL,
532                  * we can change rY to rZ */
533
534                 pr_debug("Detected SLL/SRA with middle temp register\n");
535                 curr->r.rd = next->r.rd;
536                 next->r.rt = curr->r.rd;
537         }
538
539         /* We got a SLL/SRA combo. If imm #16, that's a cast to s16.
540          * If imm #24 that's a cast to s8.
541          *
542          * First of all, make sure that the target register of the SLL is not
543          * read after the SRA. */
544
545         if (curr->r.rd == curr->r.rt) {
546                 /* sll rX, rX, 16
547                  * ...
548                  * sra rY, rX, 16 */
549                 to_change = next;
550                 to_nop = curr;
551
552                 /* rX is used after the SRA - we cannot convert it. */
553                 if (curr->r.rd != next->r.rd && !reg_is_dead(list, idx, curr->r.rd))
554                         return;
555         } else {
556                 /* sll rY, rX, 16
557                  * ...
558                  * sra rY, rY, 16 */
559                 to_change = curr;
560                 to_nop = next;
561         }
562
563         idx2 = find_prev_writer(list, offset, curr->r.rt);
564         if (idx2 >= 0) {
565                 /* Note that PSX games sometimes do casts after
566                  * a LHU or LBU; in this case we can change the
567                  * load opcode to a LH or LB, and the cast can
568                  * be changed to a MOV or a simple NOP. */
569
570                 ldop = &list[idx2];
571
572                 if (next->r.rd != ldop->i.rt &&
573                     !reg_is_dead(list, idx, ldop->i.rt))
574                         ldop = NULL;
575                 else if (curr->r.imm == 16 && ldop->i.op == OP_LHU)
576                         ldop->i.op = OP_LH;
577                 else if (curr->r.imm == 24 && ldop->i.op == OP_LBU)
578                         ldop->i.op = OP_LB;
579                 else
580                         ldop = NULL;
581
582                 if (ldop) {
583                         if (next->r.rd == ldop->i.rt) {
584                                 to_change->opcode = 0;
585                         } else if (reg_is_dead(list, idx, ldop->i.rt) &&
586                                    !reg_is_read_or_written(list, idx2 + 1, idx, next->r.rd)) {
587                                 /* The target register of the SRA is dead after the
588                                  * LBU/LHU; we can change the target register of the
589                                  * LBU/LHU to the one of the SRA. */
590                                 v[ldop->i.rt].known = 0;
591                                 v[ldop->i.rt].sign = 0;
592                                 ldop->i.rt = next->r.rd;
593                                 to_change->opcode = 0;
594                         } else {
595                                 to_change->i.op = OP_META_MOV;
596                                 to_change->r.rd = next->r.rd;
597                                 to_change->r.rs = ldop->i.rt;
598                         }
599
600                         if (to_nop->r.imm == 24)
601                                 pr_debug("Convert LBU+SLL+SRA to LB\n");
602                         else
603                                 pr_debug("Convert LHU+SLL+SRA to LH\n");
604
605                         v[ldop->i.rt].known = 0;
606                         v[ldop->i.rt].sign = 0xffffff80 << 24 - curr->r.imm;
607                 }
608         }
609
610         if (!ldop) {
611                 pr_debug("Convert SLL/SRA #%u to EXT%c\n",
612                          curr->r.imm, curr->r.imm == 24 ? 'C' : 'S');
613
614                 if (to_change == curr) {
615                         to_change->i.rs = curr->r.rt;
616                         to_change->i.rt = next->r.rd;
617                 } else {
618                         to_change->i.rt = next->r.rd;
619                         to_change->i.rs = curr->r.rt;
620                 }
621
622                 if (to_nop->r.imm == 24)
623                         to_change->i.op = OP_META_EXTC;
624                 else
625                         to_change->i.op = OP_META_EXTS;
626         }
627
628         to_nop->opcode = 0;
629 }
630
631 static void
632 lightrec_remove_useless_lui(struct block *block, unsigned int offset,
633                             const struct constprop_data *v)
634 {
635         struct opcode *list = block->opcode_list,
636                       *op = &block->opcode_list[offset];
637         int reader;
638
639         if (!op_flag_sync(op->flags) && is_known(v, op->i.rt) &&
640             v[op->i.rt].value == op->i.imm << 16) {
641                 pr_debug("Converting duplicated LUI to NOP\n");
642                 op->opcode = 0x0;
643                 return;
644         }
645
646         if (op->i.imm != 0 || op->i.rt == 0 || offset == block->nb_ops - 1)
647                 return;
648
649         reader = find_next_reader(list, offset + 1, op->i.rt);
650         if (reader <= 0)
651                 return;
652
653         if (opcode_writes_register(list[reader].c, op->i.rt) ||
654             reg_is_dead(list, reader, op->i.rt)) {
655                 pr_debug("Removing useless LUI 0x0\n");
656
657                 if (list[reader].i.rs == op->i.rt)
658                         list[reader].i.rs = 0;
659                 if (list[reader].i.op == OP_SPECIAL &&
660                     list[reader].i.rt == op->i.rt)
661                         list[reader].i.rt = 0;
662                 op->opcode = 0x0;
663         }
664 }
665
666 static void lightrec_modify_lui(struct block *block, unsigned int offset)
667 {
668         union code c, *lui = &block->opcode_list[offset].c;
669         bool stop = false, stop_next = false;
670         unsigned int i;
671
672         for (i = offset + 1; !stop && i < block->nb_ops; i++) {
673                 c = block->opcode_list[i].c;
674                 stop = stop_next;
675
676                 if ((opcode_is_store(c) && c.i.rt == lui->i.rt)
677                     || (!opcode_is_load(c) && opcode_reads_register(c, lui->i.rt)))
678                         break;
679
680                 if (opcode_writes_register(c, lui->i.rt)) {
681                         pr_debug("Convert LUI at offset 0x%x to kuseg\n",
682                                  i - 1 << 2);
683                         lui->i.imm = kunseg(lui->i.imm << 16) >> 16;
684                         break;
685                 }
686
687                 if (has_delay_slot(c))
688                         stop_next = true;
689         }
690 }
691
692 static int lightrec_transform_branches(struct lightrec_state *state,
693                                        struct block *block)
694 {
695         struct opcode *op;
696         unsigned int i;
697         s32 offset;
698
699         for (i = 0; i < block->nb_ops; i++) {
700                 op = &block->opcode_list[i];
701
702                 switch (op->i.op) {
703                 case OP_J:
704                         /* Transform J opcode into BEQ $zero, $zero if possible. */
705                         offset = (s32)((block->pc & 0xf0000000) >> 2 | op->j.imm)
706                                 - (s32)(block->pc >> 2) - (s32)i - 1;
707
708                         if (offset == (s16)offset) {
709                                 pr_debug("Transform J into BEQ $zero, $zero\n");
710                                 op->i.op = OP_BEQ;
711                                 op->i.rs = 0;
712                                 op->i.rt = 0;
713                                 op->i.imm = offset;
714
715                         }
716                         fallthrough;
717                 default:
718                         break;
719                 }
720         }
721
722         return 0;
723 }
724
725 static inline bool is_power_of_two(u32 value)
726 {
727         return popcount32(value) == 1;
728 }
729
730 static void lightrec_patch_known_zero(struct opcode *op,
731                                       const struct constprop_data *v)
732 {
733         switch (op->i.op) {
734         case OP_SPECIAL:
735                 switch (op->r.op) {
736                 case OP_SPECIAL_JR:
737                 case OP_SPECIAL_JALR:
738                 case OP_SPECIAL_MTHI:
739                 case OP_SPECIAL_MTLO:
740                         if (is_known_zero(v, op->r.rs))
741                                 op->r.rs = 0;
742                         break;
743                 default:
744                         if (is_known_zero(v, op->r.rs))
745                                 op->r.rs = 0;
746                         fallthrough;
747                 case OP_SPECIAL_SLL:
748                 case OP_SPECIAL_SRL:
749                 case OP_SPECIAL_SRA:
750                         if (is_known_zero(v, op->r.rt))
751                                 op->r.rt = 0;
752                         break;
753                 case OP_SPECIAL_SYSCALL:
754                 case OP_SPECIAL_BREAK:
755                 case OP_SPECIAL_MFHI:
756                 case OP_SPECIAL_MFLO:
757                         break;
758                 }
759                 break;
760         case OP_CP0:
761                 switch (op->r.rs) {
762                 case OP_CP0_MTC0:
763                 case OP_CP0_CTC0:
764                         if (is_known_zero(v, op->r.rt))
765                                 op->r.rt = 0;
766                         break;
767                 default:
768                         break;
769                 }
770                 break;
771         case OP_CP2:
772                 if (op->r.op == OP_CP2_BASIC) {
773                         switch (op->r.rs) {
774                         case OP_CP2_BASIC_MTC2:
775                         case OP_CP2_BASIC_CTC2:
776                                 if (is_known_zero(v, op->r.rt))
777                                         op->r.rt = 0;
778                                 break;
779                         default:
780                                 break;
781                         }
782                 }
783                 break;
784         case OP_BEQ:
785         case OP_BNE:
786                 if (is_known_zero(v, op->i.rt))
787                         op->i.rt = 0;
788                 fallthrough;
789         case OP_REGIMM:
790         case OP_BLEZ:
791         case OP_BGTZ:
792         case OP_ADDI:
793         case OP_ADDIU:
794         case OP_SLTI:
795         case OP_SLTIU:
796         case OP_ANDI:
797         case OP_ORI:
798         case OP_XORI:
799         case OP_META_MOV:
800         case OP_META_EXTC:
801         case OP_META_EXTS:
802         case OP_META_MULT2:
803         case OP_META_MULTU2:
804                 if (is_known_zero(v, op->i.rs))
805                         op->i.rs = 0;
806                 break;
807         case OP_SB:
808         case OP_SH:
809         case OP_SWL:
810         case OP_SW:
811         case OP_SWR:
812                 if (is_known_zero(v, op->i.rt))
813                         op->i.rt = 0;
814                 fallthrough;
815         case OP_LB:
816         case OP_LH:
817         case OP_LWL:
818         case OP_LW:
819         case OP_LBU:
820         case OP_LHU:
821         case OP_LWR:
822         case OP_LWC2:
823         case OP_SWC2:
824                 if (is_known(v, op->i.rs)
825                     && kunseg(v[op->i.rs].value) == 0)
826                         op->i.rs = 0;
827                 break;
828         default:
829                 break;
830         }
831 }
832
833 static void lightrec_reset_syncs(struct block *block)
834 {
835         struct opcode *op, *list = block->opcode_list;
836         unsigned int i;
837         s32 offset;
838
839         for (i = 0; i < block->nb_ops; i++)
840                 list[i].flags &= ~LIGHTREC_SYNC;
841
842         for (i = 0; i < block->nb_ops; i++) {
843                 op = &list[i];
844
845                 if (op_flag_local_branch(op->flags) && has_delay_slot(op->c)) {
846                         offset = i + 1 + (s16)op->i.imm;
847                         list[offset].flags |= LIGHTREC_SYNC;
848                 }
849         }
850 }
851
852 static int lightrec_transform_ops(struct lightrec_state *state, struct block *block)
853 {
854         struct opcode *op, *list = block->opcode_list;
855         struct constprop_data v[32] = LIGHTREC_CONSTPROP_INITIALIZER;
856         unsigned int i;
857         bool local;
858         u8 tmp;
859
860         for (i = 0; i < block->nb_ops; i++) {
861                 op = &list[i];
862
863                 lightrec_consts_propagate(list, i, v);
864
865                 lightrec_patch_known_zero(op, v);
866
867                 /* Transform all opcodes detected as useless to real NOPs
868                  * (0x0: SLL r0, r0, #0) */
869                 if (op->opcode != 0 && is_nop(op->c)) {
870                         pr_debug("Converting useless opcode 0x%08x to NOP\n",
871                                         op->opcode);
872                         op->opcode = 0x0;
873                 }
874
875                 if (!op->opcode)
876                         continue;
877
878                 switch (op->i.op) {
879                 case OP_BEQ:
880                         if (op->i.rs == op->i.rt ||
881                             (is_known(v, op->i.rs) && is_known(v, op->i.rt) &&
882                              v[op->i.rs].value == v[op->i.rt].value)) {
883                                 if (op->i.rs != op->i.rt)
884                                         pr_debug("Found always-taken BEQ\n");
885
886                                 op->i.rs = 0;
887                                 op->i.rt = 0;
888                         } else if (v[op->i.rs].known & v[op->i.rt].known &
889                                    (v[op->i.rs].value ^ v[op->i.rt].value)) {
890                                 pr_debug("Found never-taken BEQ\n");
891
892                                 local = op_flag_local_branch(op->flags);
893                                 op->opcode = 0;
894                                 op->flags = 0;
895
896                                 if (local)
897                                         lightrec_reset_syncs(block);
898                         } else if (op->i.rs == 0) {
899                                 op->i.rs = op->i.rt;
900                                 op->i.rt = 0;
901                         }
902                         break;
903
904                 case OP_BNE:
905                         if (v[op->i.rs].known & v[op->i.rt].known &
906                             (v[op->i.rs].value ^ v[op->i.rt].value)) {
907                                 pr_debug("Found always-taken BNE\n");
908
909                                 op->i.op = OP_BEQ;
910                                 op->i.rs = 0;
911                                 op->i.rt = 0;
912                         } else if (is_known(v, op->i.rs) && is_known(v, op->i.rt) &&
913                                    v[op->i.rs].value == v[op->i.rt].value) {
914                                 pr_debug("Found never-taken BNE\n");
915
916                                 local = op_flag_local_branch(op->flags);
917                                 op->opcode = 0;
918                                 op->flags = 0;
919
920                                 if (local)
921                                         lightrec_reset_syncs(block);
922                         } else if (op->i.rs == 0) {
923                                 op->i.rs = op->i.rt;
924                                 op->i.rt = 0;
925                         }
926                         break;
927
928                 case OP_BLEZ:
929                         if (v[op->i.rs].known & BIT(31) &&
930                             v[op->i.rs].value & BIT(31)) {
931                                 pr_debug("Found always-taken BLEZ\n");
932
933                                 op->i.op = OP_BEQ;
934                                 op->i.rs = 0;
935                                 op->i.rt = 0;
936                         }
937                         break;
938
939                 case OP_BGTZ:
940                         if (v[op->i.rs].known & BIT(31) &&
941                             v[op->i.rs].value & BIT(31)) {
942                                 pr_debug("Found never-taken BGTZ\n");
943
944                                 local = op_flag_local_branch(op->flags);
945                                 op->opcode = 0;
946                                 op->flags = 0;
947
948                                 if (local)
949                                         lightrec_reset_syncs(block);
950                         }
951                         break;
952
953                 case OP_LUI:
954                         if (i == 0 || !has_delay_slot(list[i - 1].c))
955                                 lightrec_modify_lui(block, i);
956                         lightrec_remove_useless_lui(block, i, v);
957                         break;
958
959                 /* Transform ORI/ADDI/ADDIU with imm #0 or ORR/ADD/ADDU/SUB/SUBU
960                  * with register $zero to the MOV meta-opcode */
961                 case OP_ORI:
962                 case OP_ADDI:
963                 case OP_ADDIU:
964                         if (op->i.imm == 0) {
965                                 pr_debug("Convert ORI/ADDI/ADDIU #0 to MOV\n");
966                                 op->i.op = OP_META_MOV;
967                                 op->r.rd = op->i.rt;
968                         }
969                         break;
970                 case OP_ANDI:
971                         if (bits_are_known_zero(v, op->i.rs, ~op->i.imm)) {
972                                 pr_debug("Found useless ANDI 0x%x\n", op->i.imm);
973
974                                 if (op->i.rs == op->i.rt) {
975                                         op->opcode = 0;
976                                 } else {
977                                         op->i.op = OP_META_MOV;
978                                         op->r.rd = op->i.rt;
979                                 }
980                         }
981                         break;
982                 case OP_REGIMM:
983                         switch (op->r.rt) {
984                         case OP_REGIMM_BLTZ:
985                         case OP_REGIMM_BGEZ:
986                                 if (!(v[op->r.rs].known & BIT(31)))
987                                         break;
988
989                                 if (!!(v[op->r.rs].value & BIT(31))
990                                     ^ (op->r.rt == OP_REGIMM_BGEZ)) {
991                                         pr_debug("Found always-taken BLTZ/BGEZ\n");
992                                         op->i.op = OP_BEQ;
993                                         op->i.rs = 0;
994                                         op->i.rt = 0;
995                                 } else {
996                                         pr_debug("Found never-taken BLTZ/BGEZ\n");
997
998                                         local = op_flag_local_branch(op->flags);
999                                         op->opcode = 0;
1000                                         op->flags = 0;
1001
1002                                         if (local)
1003                                                 lightrec_reset_syncs(block);
1004                                 }
1005                                 break;
1006                         case OP_REGIMM_BLTZAL:
1007                         case OP_REGIMM_BGEZAL:
1008                                 /* TODO: Detect always-taken and replace with JAL */
1009                                 break;
1010                         }
1011                         break;
1012                 case OP_SPECIAL:
1013                         switch (op->r.op) {
1014                         case OP_SPECIAL_SRAV:
1015                                 if ((v[op->r.rs].known & 0x1f) != 0x1f)
1016                                         break;
1017
1018                                 pr_debug("Convert SRAV to SRA\n");
1019                                 op->r.imm = v[op->r.rs].value & 0x1f;
1020                                 op->r.op = OP_SPECIAL_SRA;
1021
1022                                 fallthrough;
1023                         case OP_SPECIAL_SRA:
1024                                 if (op->r.imm == 0) {
1025                                         pr_debug("Convert SRA #0 to MOV\n");
1026                                         op->i.op = OP_META_MOV;
1027                                         op->r.rs = op->r.rt;
1028                                         break;
1029                                 }
1030                                 break;
1031
1032                         case OP_SPECIAL_SLLV:
1033                                 if ((v[op->r.rs].known & 0x1f) != 0x1f)
1034                                         break;
1035
1036                                 pr_debug("Convert SLLV to SLL\n");
1037                                 op->r.imm = v[op->r.rs].value & 0x1f;
1038                                 op->r.op = OP_SPECIAL_SLL;
1039
1040                                 fallthrough;
1041                         case OP_SPECIAL_SLL:
1042                                 if (op->r.imm == 0) {
1043                                         pr_debug("Convert SLL #0 to MOV\n");
1044                                         op->i.op = OP_META_MOV;
1045                                         op->r.rs = op->r.rt;
1046                                 }
1047
1048                                 lightrec_optimize_sll_sra(block->opcode_list, i, v);
1049                                 break;
1050
1051                         case OP_SPECIAL_SRLV:
1052                                 if ((v[op->r.rs].known & 0x1f) != 0x1f)
1053                                         break;
1054
1055                                 pr_debug("Convert SRLV to SRL\n");
1056                                 op->r.imm = v[op->r.rs].value & 0x1f;
1057                                 op->r.op = OP_SPECIAL_SRL;
1058
1059                                 fallthrough;
1060                         case OP_SPECIAL_SRL:
1061                                 if (op->r.imm == 0) {
1062                                         pr_debug("Convert SRL #0 to MOV\n");
1063                                         op->i.op = OP_META_MOV;
1064                                         op->r.rs = op->r.rt;
1065                                 }
1066                                 break;
1067
1068                         case OP_SPECIAL_MULT:
1069                         case OP_SPECIAL_MULTU:
1070                                 if (is_known(v, op->r.rs) &&
1071                                     is_power_of_two(v[op->r.rs].value)) {
1072                                         tmp = op->c.i.rs;
1073                                         op->c.i.rs = op->c.i.rt;
1074                                         op->c.i.rt = tmp;
1075                                 } else if (!is_known(v, op->r.rt) ||
1076                                            !is_power_of_two(v[op->r.rt].value)) {
1077                                         break;
1078                                 }
1079
1080                                 pr_debug("Multiply by power-of-two: %u\n",
1081                                          v[op->r.rt].value);
1082
1083                                 if (op->r.op == OP_SPECIAL_MULT)
1084                                         op->i.op = OP_META_MULT2;
1085                                 else
1086                                         op->i.op = OP_META_MULTU2;
1087
1088                                 op->r.op = ctz32(v[op->r.rt].value);
1089                                 break;
1090                         case OP_SPECIAL_OR:
1091                         case OP_SPECIAL_ADD:
1092                         case OP_SPECIAL_ADDU:
1093                                 if (op->r.rs == 0) {
1094                                         pr_debug("Convert OR/ADD $zero to MOV\n");
1095                                         op->i.op = OP_META_MOV;
1096                                         op->r.rs = op->r.rt;
1097                                 }
1098                                 fallthrough;
1099                         case OP_SPECIAL_SUB:
1100                         case OP_SPECIAL_SUBU:
1101                                 if (op->r.rt == 0) {
1102                                         pr_debug("Convert OR/ADD/SUB $zero to MOV\n");
1103                                         op->i.op = OP_META_MOV;
1104                                 }
1105                                 fallthrough;
1106                         default:
1107                                 break;
1108                         }
1109                         fallthrough;
1110                 default:
1111                         break;
1112                 }
1113         }
1114
1115         return 0;
1116 }
1117
1118 static bool lightrec_can_switch_delay_slot(union code op, union code next_op)
1119 {
1120         switch (op.i.op) {
1121         case OP_SPECIAL:
1122                 switch (op.r.op) {
1123                 case OP_SPECIAL_JALR:
1124                         if (opcode_reads_register(next_op, op.r.rd) ||
1125                             opcode_writes_register(next_op, op.r.rd))
1126                                 return false;
1127                         fallthrough;
1128                 case OP_SPECIAL_JR:
1129                         if (opcode_writes_register(next_op, op.r.rs))
1130                                 return false;
1131                         fallthrough;
1132                 default:
1133                         break;
1134                 }
1135                 fallthrough;
1136         case OP_J:
1137                 break;
1138         case OP_JAL:
1139                 if (opcode_reads_register(next_op, 31) ||
1140                     opcode_writes_register(next_op, 31))
1141                         return false;;
1142
1143                 break;
1144         case OP_BEQ:
1145         case OP_BNE:
1146                 if (op.i.rt && opcode_writes_register(next_op, op.i.rt))
1147                         return false;
1148                 fallthrough;
1149         case OP_BLEZ:
1150         case OP_BGTZ:
1151                 if (op.i.rs && opcode_writes_register(next_op, op.i.rs))
1152                         return false;
1153                 break;
1154         case OP_REGIMM:
1155                 switch (op.r.rt) {
1156                 case OP_REGIMM_BLTZAL:
1157                 case OP_REGIMM_BGEZAL:
1158                         if (opcode_reads_register(next_op, 31) ||
1159                             opcode_writes_register(next_op, 31))
1160                                 return false;
1161                         fallthrough;
1162                 case OP_REGIMM_BLTZ:
1163                 case OP_REGIMM_BGEZ:
1164                         if (op.i.rs && opcode_writes_register(next_op, op.i.rs))
1165                                 return false;
1166                         break;
1167                 }
1168                 fallthrough;
1169         default:
1170                 break;
1171         }
1172
1173         return true;
1174 }
1175
1176 static int lightrec_switch_delay_slots(struct lightrec_state *state, struct block *block)
1177 {
1178         struct opcode *list, *next = &block->opcode_list[0];
1179         unsigned int i;
1180         union code op, next_op;
1181         u32 flags;
1182
1183         for (i = 0; i < block->nb_ops - 1; i++) {
1184                 list = next;
1185                 next = &block->opcode_list[i + 1];
1186                 next_op = next->c;
1187                 op = list->c;
1188
1189                 if (!has_delay_slot(op) || op_flag_no_ds(list->flags) ||
1190                     op_flag_emulate_branch(list->flags) ||
1191                     op.opcode == 0 || next_op.opcode == 0)
1192                         continue;
1193
1194                 if (is_delay_slot(block->opcode_list, i))
1195                         continue;
1196
1197                 if (op_flag_sync(next->flags))
1198                         continue;
1199
1200                 if (!lightrec_can_switch_delay_slot(list->c, next_op))
1201                         continue;
1202
1203                 pr_debug("Swap branch and delay slot opcodes "
1204                          "at offsets 0x%x / 0x%x\n",
1205                          i << 2, (i + 1) << 2);
1206
1207                 flags = next->flags | (list->flags & LIGHTREC_SYNC);
1208                 list->c = next_op;
1209                 next->c = op;
1210                 next->flags = (list->flags | LIGHTREC_NO_DS) & ~LIGHTREC_SYNC;
1211                 list->flags = flags | LIGHTREC_NO_DS;
1212         }
1213
1214         return 0;
1215 }
1216
1217 static int shrink_opcode_list(struct lightrec_state *state, struct block *block, u16 new_size)
1218 {
1219         struct opcode_list *list, *old_list;
1220
1221         if (new_size >= block->nb_ops) {
1222                 pr_err("Invalid shrink size (%u vs %u)\n",
1223                        new_size, block->nb_ops);
1224                 return -EINVAL;
1225         }
1226
1227         list = lightrec_malloc(state, MEM_FOR_IR,
1228                                sizeof(*list) + sizeof(struct opcode) * new_size);
1229         if (!list) {
1230                 pr_err("Unable to allocate memory\n");
1231                 return -ENOMEM;
1232         }
1233
1234         old_list = container_of(block->opcode_list, struct opcode_list, ops);
1235         memcpy(list->ops, old_list->ops, sizeof(struct opcode) * new_size);
1236
1237         lightrec_free_opcode_list(state, block->opcode_list);
1238         list->nb_ops = new_size;
1239         block->nb_ops = new_size;
1240         block->opcode_list = list->ops;
1241
1242         pr_debug("Shrunk opcode list of block PC 0x%08x to %u opcodes\n",
1243                  block->pc, new_size);
1244
1245         return 0;
1246 }
1247
1248 static int lightrec_detect_impossible_branches(struct lightrec_state *state,
1249                                                struct block *block)
1250 {
1251         struct opcode *op, *list = block->opcode_list, *next = &list[0];
1252         unsigned int i;
1253         int ret = 0;
1254         s16 offset;
1255
1256         for (i = 0; i < block->nb_ops - 1; i++) {
1257                 op = next;
1258                 next = &list[i + 1];
1259
1260                 if (!has_delay_slot(op->c) ||
1261                     (!load_in_delay_slot(next->c) &&
1262                      !has_delay_slot(next->c) &&
1263                      !(next->i.op == OP_CP0 && next->r.rs == OP_CP0_RFE)))
1264                         continue;
1265
1266                 if (op->c.opcode == next->c.opcode) {
1267                         /* The delay slot is the exact same opcode as the branch
1268                          * opcode: this is effectively a NOP */
1269                         next->c.opcode = 0;
1270                         continue;
1271                 }
1272
1273                 offset = i + 1 + (s16)op->i.imm;
1274                 if (load_in_delay_slot(next->c) &&
1275                     (offset >= 0 && offset < block->nb_ops) &&
1276                     !opcode_reads_register(list[offset].c, next->c.i.rt)) {
1277                         /* The 'impossible' branch is a local branch - we can
1278                          * verify here that the first opcode of the target does
1279                          * not use the target register of the delay slot */
1280
1281                         pr_debug("Branch at offset 0x%x has load delay slot, "
1282                                  "but is local and dest opcode does not read "
1283                                  "dest register\n", i << 2);
1284                         continue;
1285                 }
1286
1287                 op->flags |= LIGHTREC_EMULATE_BRANCH;
1288
1289                 if (op == list) {
1290                         pr_debug("First opcode of block PC 0x%08x is an impossible branch\n",
1291                                  block->pc);
1292
1293                         /* If the first opcode is an 'impossible' branch, we
1294                          * only keep the first two opcodes of the block (the
1295                          * branch itself + its delay slot) */
1296                         if (block->nb_ops > 2)
1297                                 ret = shrink_opcode_list(state, block, 2);
1298                         break;
1299                 }
1300         }
1301
1302         return ret;
1303 }
1304
1305 static int lightrec_local_branches(struct lightrec_state *state, struct block *block)
1306 {
1307         struct opcode *list;
1308         unsigned int i;
1309         s32 offset;
1310
1311         for (i = 0; i < block->nb_ops; i++) {
1312                 list = &block->opcode_list[i];
1313
1314                 if (should_emulate(list))
1315                         continue;
1316
1317                 switch (list->i.op) {
1318                 case OP_BEQ:
1319                 case OP_BNE:
1320                 case OP_BLEZ:
1321                 case OP_BGTZ:
1322                 case OP_REGIMM:
1323                         offset = i + 1 + (s16)list->i.imm;
1324                         if (offset >= 0 && offset < block->nb_ops)
1325                                 break;
1326                         fallthrough;
1327                 default:
1328                         continue;
1329                 }
1330
1331                 pr_debug("Found local branch to offset 0x%x\n", offset << 2);
1332
1333                 if (should_emulate(&block->opcode_list[offset])) {
1334                         pr_debug("Branch target must be emulated - skip\n");
1335                         continue;
1336                 }
1337
1338                 if (offset && has_delay_slot(block->opcode_list[offset - 1].c)) {
1339                         pr_debug("Branch target is a delay slot - skip\n");
1340                         continue;
1341                 }
1342
1343                 list->flags |= LIGHTREC_LOCAL_BRANCH;
1344         }
1345
1346         lightrec_reset_syncs(block);
1347
1348         return 0;
1349 }
1350
1351 bool has_delay_slot(union code op)
1352 {
1353         switch (op.i.op) {
1354         case OP_SPECIAL:
1355                 switch (op.r.op) {
1356                 case OP_SPECIAL_JR:
1357                 case OP_SPECIAL_JALR:
1358                         return true;
1359                 default:
1360                         return false;
1361                 }
1362         case OP_J:
1363         case OP_JAL:
1364         case OP_BEQ:
1365         case OP_BNE:
1366         case OP_BLEZ:
1367         case OP_BGTZ:
1368         case OP_REGIMM:
1369                 return true;
1370         default:
1371                 return false;
1372         }
1373 }
1374
1375 bool is_delay_slot(const struct opcode *list, unsigned int offset)
1376 {
1377         return offset > 0
1378                 && !op_flag_no_ds(list[offset - 1].flags)
1379                 && has_delay_slot(list[offset - 1].c);
1380 }
1381
1382 bool should_emulate(const struct opcode *list)
1383 {
1384         return op_flag_emulate_branch(list->flags) && has_delay_slot(list->c);
1385 }
1386
1387 static bool op_writes_rd(union code c)
1388 {
1389         switch (c.i.op) {
1390         case OP_SPECIAL:
1391         case OP_META_MOV:
1392                 return true;
1393         default:
1394                 return false;
1395         }
1396 }
1397
1398 static void lightrec_add_reg_op(struct opcode *op, u8 reg, u32 reg_op)
1399 {
1400         if (op_writes_rd(op->c) && reg == op->r.rd)
1401                 op->flags |= LIGHTREC_REG_RD(reg_op);
1402         else if (op->i.rs == reg)
1403                 op->flags |= LIGHTREC_REG_RS(reg_op);
1404         else if (op->i.rt == reg)
1405                 op->flags |= LIGHTREC_REG_RT(reg_op);
1406         else
1407                 pr_debug("Cannot add unload/clean/discard flag: "
1408                          "opcode does not touch register %s!\n",
1409                          lightrec_reg_name(reg));
1410 }
1411
1412 static void lightrec_add_unload(struct opcode *op, u8 reg)
1413 {
1414         lightrec_add_reg_op(op, reg, LIGHTREC_REG_UNLOAD);
1415 }
1416
1417 static void lightrec_add_discard(struct opcode *op, u8 reg)
1418 {
1419         lightrec_add_reg_op(op, reg, LIGHTREC_REG_DISCARD);
1420 }
1421
1422 static void lightrec_add_clean(struct opcode *op, u8 reg)
1423 {
1424         lightrec_add_reg_op(op, reg, LIGHTREC_REG_CLEAN);
1425 }
1426
1427 static void
1428 lightrec_early_unload_sync(struct opcode *list, s16 *last_r, s16 *last_w)
1429 {
1430         unsigned int reg;
1431         s16 offset;
1432
1433         for (reg = 0; reg < 34; reg++) {
1434                 offset = s16_max(last_w[reg], last_r[reg]);
1435
1436                 if (offset >= 0)
1437                         lightrec_add_unload(&list[offset], reg);
1438         }
1439
1440         memset(last_r, 0xff, sizeof(*last_r) * 34);
1441         memset(last_w, 0xff, sizeof(*last_w) * 34);
1442 }
1443
1444 static int lightrec_early_unload(struct lightrec_state *state, struct block *block)
1445 {
1446         u16 i, offset;
1447         struct opcode *op;
1448         s16 last_r[34], last_w[34], last_sync = 0, next_sync = 0;
1449         u64 mask_r, mask_w, dirty = 0, loaded = 0;
1450         u8 reg;
1451
1452         memset(last_r, 0xff, sizeof(last_r));
1453         memset(last_w, 0xff, sizeof(last_w));
1454
1455         /*
1456          * Clean if:
1457          * - the register is dirty, and is read again after a branch opcode
1458          *
1459          * Unload if:
1460          * - the register is dirty or loaded, and is not read again
1461          * - the register is dirty or loaded, and is written again after a branch opcode
1462          * - the next opcode has the SYNC flag set
1463          *
1464          * Discard if:
1465          * - the register is dirty or loaded, and is written again
1466          */
1467
1468         for (i = 0; i < block->nb_ops; i++) {
1469                 op = &block->opcode_list[i];
1470
1471                 if (op_flag_sync(op->flags) || should_emulate(op)) {
1472                         /* The next opcode has the SYNC flag set, or is a branch
1473                          * that should be emulated: unload all registers. */
1474                         lightrec_early_unload_sync(block->opcode_list, last_r, last_w);
1475                         dirty = 0;
1476                         loaded = 0;
1477                 }
1478
1479                 if (next_sync == i) {
1480                         last_sync = i;
1481                         pr_debug("Last sync: 0x%x\n", last_sync << 2);
1482                 }
1483
1484                 if (has_delay_slot(op->c)) {
1485                         next_sync = i + 1 + !op_flag_no_ds(op->flags);
1486                         pr_debug("Next sync: 0x%x\n", next_sync << 2);
1487                 }
1488
1489                 mask_r = opcode_read_mask(op->c);
1490                 mask_w = opcode_write_mask(op->c);
1491
1492                 for (reg = 0; reg < 34; reg++) {
1493                         if (mask_r & BIT(reg)) {
1494                                 if (dirty & BIT(reg) && last_w[reg] < last_sync) {
1495                                         /* The register is dirty, and is read
1496                                          * again after a branch: clean it */
1497
1498                                         lightrec_add_clean(&block->opcode_list[last_w[reg]], reg);
1499                                         dirty &= ~BIT(reg);
1500                                         loaded |= BIT(reg);
1501                                 }
1502
1503                                 last_r[reg] = i;
1504                         }
1505
1506                         if (mask_w & BIT(reg)) {
1507                                 if ((dirty & BIT(reg) && last_w[reg] < last_sync) ||
1508                                     (loaded & BIT(reg) && last_r[reg] < last_sync)) {
1509                                         /* The register is dirty or loaded, and
1510                                          * is written again after a branch:
1511                                          * unload it */
1512
1513                                         offset = s16_max(last_w[reg], last_r[reg]);
1514                                         lightrec_add_unload(&block->opcode_list[offset], reg);
1515                                         dirty &= ~BIT(reg);
1516                                         loaded &= ~BIT(reg);
1517                                 } else if (!(mask_r & BIT(reg)) &&
1518                                            ((dirty & BIT(reg) && last_w[reg] > last_sync) ||
1519                                            (loaded & BIT(reg) && last_r[reg] > last_sync))) {
1520                                         /* The register is dirty or loaded, and
1521                                          * is written again: discard it */
1522
1523                                         offset = s16_max(last_w[reg], last_r[reg]);
1524                                         lightrec_add_discard(&block->opcode_list[offset], reg);
1525                                         dirty &= ~BIT(reg);
1526                                         loaded &= ~BIT(reg);
1527                                 }
1528
1529                                 last_w[reg] = i;
1530                         }
1531
1532                 }
1533
1534                 dirty |= mask_w;
1535                 loaded |= mask_r;
1536         }
1537
1538         /* Unload all registers that are dirty or loaded at the end of block. */
1539         lightrec_early_unload_sync(block->opcode_list, last_r, last_w);
1540
1541         return 0;
1542 }
1543
1544 static int lightrec_flag_io(struct lightrec_state *state, struct block *block)
1545 {
1546         struct opcode *list;
1547         enum psx_map psx_map;
1548         struct constprop_data v[32] = LIGHTREC_CONSTPROP_INITIALIZER;
1549         unsigned int i;
1550         u32 val, kunseg_val;
1551         bool no_mask;
1552
1553         for (i = 0; i < block->nb_ops; i++) {
1554                 list = &block->opcode_list[i];
1555
1556                 lightrec_consts_propagate(block->opcode_list, i, v);
1557
1558                 switch (list->i.op) {
1559                 case OP_SB:
1560                 case OP_SH:
1561                 case OP_SW:
1562                         if (OPT_FLAG_STORES) {
1563                                 /* Mark all store operations that target $sp or $gp
1564                                  * as not requiring code invalidation. This is based
1565                                  * on the heuristic that stores using one of these
1566                                  * registers as address will never hit a code page. */
1567                                 if (list->i.rs >= 28 && list->i.rs <= 29 &&
1568                                     !state->maps[PSX_MAP_KERNEL_USER_RAM].ops) {
1569                                         pr_debug("Flaging opcode 0x%08x as not "
1570                                                  "requiring invalidation\n",
1571                                                  list->opcode);
1572                                         list->flags |= LIGHTREC_NO_INVALIDATE;
1573                                         list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT);
1574                                 }
1575
1576                                 /* Detect writes whose destination address is inside the
1577                                  * current block, using constant propagation. When these
1578                                  * occur, we mark the blocks as not compilable. */
1579                                 if (is_known(v, list->i.rs) &&
1580                                     kunseg(v[list->i.rs].value) >= kunseg(block->pc) &&
1581                                     kunseg(v[list->i.rs].value) < (kunseg(block->pc) +
1582                                                                    block->nb_ops * 4)) {
1583                                         pr_debug("Self-modifying block detected\n");
1584                                         block_set_flags(block, BLOCK_NEVER_COMPILE);
1585                                         list->flags |= LIGHTREC_SMC;
1586                                 }
1587                         }
1588                         fallthrough;
1589                 case OP_SWL:
1590                 case OP_SWR:
1591                 case OP_SWC2:
1592                 case OP_LB:
1593                 case OP_LBU:
1594                 case OP_LH:
1595                 case OP_LHU:
1596                 case OP_LW:
1597                 case OP_LWL:
1598                 case OP_LWR:
1599                 case OP_LWC2:
1600                         if (OPT_FLAG_IO &&
1601                             (v[list->i.rs].known | v[list->i.rs].sign)) {
1602                                 psx_map = lightrec_get_constprop_map(state, v,
1603                                                                      list->i.rs,
1604                                                                      (s16) list->i.imm);
1605
1606                                 if (psx_map != PSX_MAP_UNKNOWN && !is_known(v, list->i.rs))
1607                                         pr_debug("Detected map thanks to bit-level const propagation!\n");
1608
1609                                 list->flags &= ~LIGHTREC_IO_MASK;
1610
1611                                 val = v[list->i.rs].value + (s16) list->i.imm;
1612                                 kunseg_val = kunseg(val);
1613
1614                                 no_mask = (v[list->i.rs].known & ~v[list->i.rs].value
1615                                            & 0xe0000000) == 0xe0000000;
1616
1617                                 switch (psx_map) {
1618                                 case PSX_MAP_KERNEL_USER_RAM:
1619                                         if (no_mask)
1620                                                 list->flags |= LIGHTREC_NO_MASK;
1621                                         fallthrough;
1622                                 case PSX_MAP_MIRROR1:
1623                                 case PSX_MAP_MIRROR2:
1624                                 case PSX_MAP_MIRROR3:
1625                                         pr_debug("Flaging opcode %u as RAM access\n", i);
1626                                         list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_RAM);
1627                                         if (no_mask && state->mirrors_mapped)
1628                                                 list->flags |= LIGHTREC_NO_MASK;
1629                                         break;
1630                                 case PSX_MAP_BIOS:
1631                                         pr_debug("Flaging opcode %u as BIOS access\n", i);
1632                                         list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_BIOS);
1633                                         if (no_mask)
1634                                                 list->flags |= LIGHTREC_NO_MASK;
1635                                         break;
1636                                 case PSX_MAP_SCRATCH_PAD:
1637                                         pr_debug("Flaging opcode %u as scratchpad access\n", i);
1638                                         list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_SCRATCH);
1639                                         if (no_mask)
1640                                                 list->flags |= LIGHTREC_NO_MASK;
1641
1642                                         /* Consider that we're never going to run code from
1643                                          * the scratchpad. */
1644                                         list->flags |= LIGHTREC_NO_INVALIDATE;
1645                                         break;
1646                                 case PSX_MAP_HW_REGISTERS:
1647                                         if (state->ops.hw_direct &&
1648                                             state->ops.hw_direct(kunseg_val,
1649                                                                  opcode_is_store(list->c),
1650                                                                  opcode_get_io_size(list->c))) {
1651                                                 pr_debug("Flagging opcode %u as direct I/O access\n",
1652                                                          i);
1653                                                 list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT_HW);
1654
1655                                                 if (no_mask)
1656                                                         list->flags |= LIGHTREC_NO_MASK;
1657                                         } else {
1658                                                 pr_debug("Flagging opcode %u as I/O access\n",
1659                                                          i);
1660                                                 list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW);
1661                                         }
1662                                         break;
1663                                 default:
1664                                         break;
1665                                 }
1666                         }
1667                         fallthrough;
1668                 default:
1669                         break;
1670                 }
1671         }
1672
1673         return 0;
1674 }
1675
1676 static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset,
1677                             const struct opcode *last,
1678                             u32 mask, bool sync, bool mflo, bool another)
1679 {
1680         const struct opcode *op, *next = &block->opcode_list[offset];
1681         u32 old_mask;
1682         u8 reg2, reg = mflo ? REG_LO : REG_HI;
1683         u16 branch_offset;
1684         unsigned int i;
1685
1686         for (i = offset; i < block->nb_ops; i++) {
1687                 op = next;
1688                 next = &block->opcode_list[i + 1];
1689                 old_mask = mask;
1690
1691                 /* If any other opcode writes or reads to the register
1692                  * we'd use, then we cannot use it anymore. */
1693                 mask |= opcode_read_mask(op->c);
1694                 mask |= opcode_write_mask(op->c);
1695
1696                 if (op_flag_sync(op->flags))
1697                         sync = true;
1698
1699                 switch (op->i.op) {
1700                 case OP_BEQ:
1701                 case OP_BNE:
1702                 case OP_BLEZ:
1703                 case OP_BGTZ:
1704                 case OP_REGIMM:
1705                         /* TODO: handle backwards branches too */
1706                         if (!last && op_flag_local_branch(op->flags) &&
1707                             (s16)op->c.i.imm >= 0) {
1708                                 branch_offset = i + 1 + (s16)op->c.i.imm
1709                                         - !!op_flag_no_ds(op->flags);
1710
1711                                 reg = get_mfhi_mflo_reg(block, branch_offset, NULL,
1712                                                         mask, sync, mflo, false);
1713                                 reg2 = get_mfhi_mflo_reg(block, offset + 1, next,
1714                                                          mask, sync, mflo, false);
1715                                 if (reg > 0 && reg == reg2)
1716                                         return reg;
1717                                 if (!reg && !reg2)
1718                                         return 0;
1719                         }
1720
1721                         return mflo ? REG_LO : REG_HI;
1722                 case OP_META_MULT2:
1723                 case OP_META_MULTU2:
1724                         return 0;
1725                 case OP_SPECIAL:
1726                         switch (op->r.op) {
1727                         case OP_SPECIAL_MULT:
1728                         case OP_SPECIAL_MULTU:
1729                         case OP_SPECIAL_DIV:
1730                         case OP_SPECIAL_DIVU:
1731                                 return 0;
1732                         case OP_SPECIAL_MTHI:
1733                                 if (!mflo)
1734                                         return 0;
1735                                 continue;
1736                         case OP_SPECIAL_MTLO:
1737                                 if (mflo)
1738                                         return 0;
1739                                 continue;
1740                         case OP_SPECIAL_JR:
1741                                 if (op->r.rs != 31)
1742                                         return reg;
1743
1744                                 if (!sync && !op_flag_no_ds(op->flags) &&
1745                                     (next->i.op == OP_SPECIAL) &&
1746                                     ((!mflo && next->r.op == OP_SPECIAL_MFHI) ||
1747                                     (mflo && next->r.op == OP_SPECIAL_MFLO)))
1748                                         return next->r.rd;
1749
1750                                 return 0;
1751                         case OP_SPECIAL_JALR:
1752                                 return reg;
1753                         case OP_SPECIAL_MFHI:
1754                                 if (!mflo) {
1755                                         if (another)
1756                                                 return op->r.rd;
1757                                         /* Must use REG_HI if there is another MFHI target*/
1758                                         reg2 = get_mfhi_mflo_reg(block, i + 1, next,
1759                                                          0, sync, mflo, true);
1760                                         if (reg2 > 0 && reg2 != REG_HI)
1761                                                 return REG_HI;
1762
1763                                         if (!sync && !(old_mask & BIT(op->r.rd)))
1764                                                 return op->r.rd;
1765                                         else
1766                                                 return REG_HI;
1767                                 }
1768                                 continue;
1769                         case OP_SPECIAL_MFLO:
1770                                 if (mflo) {
1771                                         if (another)
1772                                                 return op->r.rd;
1773                                         /* Must use REG_LO if there is another MFLO target*/
1774                                         reg2 = get_mfhi_mflo_reg(block, i + 1, next,
1775                                                          0, sync, mflo, true);
1776                                         if (reg2 > 0 && reg2 != REG_LO)
1777                                                 return REG_LO;
1778
1779                                         if (!sync && !(old_mask & BIT(op->r.rd)))
1780                                                 return op->r.rd;
1781                                         else
1782                                                 return REG_LO;
1783                                 }
1784                                 continue;
1785                         default:
1786                                 break;
1787                         }
1788
1789                         fallthrough;
1790                 default:
1791                         continue;
1792                 }
1793         }
1794
1795         return reg;
1796 }
1797
1798 static void lightrec_replace_lo_hi(struct block *block, u16 offset,
1799                                    u16 last, bool lo)
1800 {
1801         unsigned int i;
1802         u32 branch_offset;
1803
1804         /* This function will remove the following MFLO/MFHI. It must be called
1805          * only if get_mfhi_mflo_reg() returned a non-zero value. */
1806
1807         for (i = offset; i < last; i++) {
1808                 struct opcode *op = &block->opcode_list[i];
1809
1810                 switch (op->i.op) {
1811                 case OP_BEQ:
1812                 case OP_BNE:
1813                 case OP_BLEZ:
1814                 case OP_BGTZ:
1815                 case OP_REGIMM:
1816                         /* TODO: handle backwards branches too */
1817                         if (op_flag_local_branch(op->flags) && (s16)op->c.i.imm >= 0) {
1818                                 branch_offset = i + 1 + (s16)op->c.i.imm
1819                                         - !!op_flag_no_ds(op->flags);
1820
1821                                 lightrec_replace_lo_hi(block, branch_offset, last, lo);
1822                                 lightrec_replace_lo_hi(block, i + 1, branch_offset, lo);
1823                         }
1824                         break;
1825
1826                 case OP_SPECIAL:
1827                         if (lo && op->r.op == OP_SPECIAL_MFLO) {
1828                                 pr_debug("Removing MFLO opcode at offset 0x%x\n",
1829                                          i << 2);
1830                                 op->opcode = 0;
1831                                 return;
1832                         } else if (!lo && op->r.op == OP_SPECIAL_MFHI) {
1833                                 pr_debug("Removing MFHI opcode at offset 0x%x\n",
1834                                          i << 2);
1835                                 op->opcode = 0;
1836                                 return;
1837                         }
1838
1839                         fallthrough;
1840                 default:
1841                         break;
1842                 }
1843         }
1844 }
1845
1846 static bool lightrec_always_skip_div_check(void)
1847 {
1848 #ifdef __mips__
1849         return true;
1850 #else
1851         return false;
1852 #endif
1853 }
1854
1855 static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *block)
1856 {
1857         struct opcode *list = NULL;
1858         struct constprop_data v[32] = LIGHTREC_CONSTPROP_INITIALIZER;
1859         u8 reg_hi, reg_lo;
1860         unsigned int i;
1861
1862         for (i = 0; i < block->nb_ops - 1; i++) {
1863                 list = &block->opcode_list[i];
1864
1865                 lightrec_consts_propagate(block->opcode_list, i, v);
1866
1867                 switch (list->i.op) {
1868                 case OP_SPECIAL:
1869                         switch (list->r.op) {
1870                         case OP_SPECIAL_DIV:
1871                         case OP_SPECIAL_DIVU:
1872                                 /* If we are dividing by a non-zero constant, don't
1873                                  * emit the div-by-zero check. */
1874                                 if (lightrec_always_skip_div_check() ||
1875                                     (v[list->r.rt].known & v[list->r.rt].value)) {
1876                                         list->flags |= LIGHTREC_NO_DIV_CHECK;
1877                                 }
1878                                 fallthrough;
1879                         case OP_SPECIAL_MULT:
1880                         case OP_SPECIAL_MULTU:
1881                                 break;
1882                         default:
1883                                 continue;
1884                         }
1885                         fallthrough;
1886                 case OP_META_MULT2:
1887                 case OP_META_MULTU2:
1888                         break;
1889                 default:
1890                         continue;
1891                 }
1892
1893                 /* Don't support opcodes in delay slots */
1894                 if (is_delay_slot(block->opcode_list, i) ||
1895                     op_flag_no_ds(list->flags)) {
1896                         continue;
1897                 }
1898
1899                 reg_lo = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, true, false);
1900                 if (reg_lo == 0) {
1901                         pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as"
1902                                  " not writing LO\n", i << 2);
1903                         list->flags |= LIGHTREC_NO_LO;
1904                 }
1905
1906                 reg_hi = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, false, false);
1907                 if (reg_hi == 0) {
1908                         pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as"
1909                                  " not writing HI\n", i << 2);
1910                         list->flags |= LIGHTREC_NO_HI;
1911                 }
1912
1913                 if (!reg_lo && !reg_hi) {
1914                         pr_debug("Both LO/HI unused in this block, they will "
1915                                  "probably be used in parent block - removing "
1916                                  "flags.\n");
1917                         list->flags &= ~(LIGHTREC_NO_LO | LIGHTREC_NO_HI);
1918                 }
1919
1920                 if (reg_lo > 0 && reg_lo != REG_LO) {
1921                         pr_debug("Found register %s to hold LO (rs = %u, rt = %u)\n",
1922                                  lightrec_reg_name(reg_lo), list->r.rs, list->r.rt);
1923
1924                         lightrec_replace_lo_hi(block, i + 1, block->nb_ops, true);
1925                         list->r.rd = reg_lo;
1926                 } else {
1927                         list->r.rd = 0;
1928                 }
1929
1930                 if (reg_hi > 0 && reg_hi != REG_HI) {
1931                         pr_debug("Found register %s to hold HI (rs = %u, rt = %u)\n",
1932                                  lightrec_reg_name(reg_hi), list->r.rs, list->r.rt);
1933
1934                         lightrec_replace_lo_hi(block, i + 1, block->nb_ops, false);
1935                         list->r.imm = reg_hi;
1936                 } else {
1937                         list->r.imm = 0;
1938                 }
1939         }
1940
1941         return 0;
1942 }
1943
1944 static bool remove_div_sequence(struct block *block, unsigned int offset)
1945 {
1946         struct opcode *op;
1947         unsigned int i, found = 0;
1948
1949         /*
1950          * Scan for the zero-checking sequence that GCC automatically introduced
1951          * after most DIV/DIVU opcodes. This sequence checks the value of the
1952          * divisor, and if zero, executes a BREAK opcode, causing the BIOS
1953          * handler to crash the PS1.
1954          *
1955          * For DIV opcodes, this sequence additionally checks that the signed
1956          * operation does not overflow.
1957          *
1958          * With the assumption that the games never crashed the PS1, we can
1959          * therefore assume that the games never divided by zero or overflowed,
1960          * and these sequences can be removed.
1961          */
1962
1963         for (i = offset; i < block->nb_ops; i++) {
1964                 op = &block->opcode_list[i];
1965
1966                 if (!found) {
1967                         if (op->i.op == OP_SPECIAL &&
1968                             (op->r.op == OP_SPECIAL_DIV || op->r.op == OP_SPECIAL_DIVU))
1969                                 break;
1970
1971                         if ((op->opcode & 0xfc1fffff) == 0x14000002) {
1972                                 /* BNE ???, zero, +8 */
1973                                 found++;
1974                         } else {
1975                                 offset++;
1976                         }
1977                 } else if (found == 1 && !op->opcode) {
1978                         /* NOP */
1979                         found++;
1980                 } else if (found == 2 && op->opcode == 0x0007000d) {
1981                         /* BREAK 0x1c00 */
1982                         found++;
1983                 } else if (found == 3 && op->opcode == 0x2401ffff) {
1984                         /* LI at, -1 */
1985                         found++;
1986                 } else if (found == 4 && (op->opcode & 0xfc1fffff) == 0x14010004) {
1987                         /* BNE ???, at, +16 */
1988                         found++;
1989                 } else if (found == 5 && op->opcode == 0x3c018000) {
1990                         /* LUI at, 0x8000 */
1991                         found++;
1992                 } else if (found == 6 && (op->opcode & 0x141fffff) == 0x14010002) {
1993                         /* BNE ???, at, +16 */
1994                         found++;
1995                 } else if (found == 7 && !op->opcode) {
1996                         /* NOP */
1997                         found++;
1998                 } else if (found == 8 && op->opcode == 0x0006000d) {
1999                         /* BREAK 0x1800 */
2000                         found++;
2001                         break;
2002                 } else {
2003                         break;
2004                 }
2005         }
2006
2007         if (found >= 3) {
2008                 if (found != 9)
2009                         found = 3;
2010
2011                 pr_debug("Removing DIV%s sequence at offset 0x%x\n",
2012                          found == 9 ? "" : "U", offset << 2);
2013
2014                 for (i = 0; i < found; i++)
2015                         block->opcode_list[offset + i].opcode = 0;
2016
2017                 return true;
2018         }
2019
2020         return false;
2021 }
2022
2023 static int lightrec_remove_div_by_zero_check_sequence(struct lightrec_state *state,
2024                                                       struct block *block)
2025 {
2026         struct opcode *op;
2027         unsigned int i;
2028
2029         for (i = 0; i < block->nb_ops; i++) {
2030                 op = &block->opcode_list[i];
2031
2032                 if (op->i.op == OP_SPECIAL &&
2033                     (op->r.op == OP_SPECIAL_DIVU || op->r.op == OP_SPECIAL_DIV) &&
2034                     remove_div_sequence(block, i + 1))
2035                         op->flags |= LIGHTREC_NO_DIV_CHECK;
2036         }
2037
2038         return 0;
2039 }
2040
2041 static const u32 memset_code[] = {
2042         0x10a00006,     // beqz         a1, 2f
2043         0x24a2ffff,     // addiu        v0,a1,-1
2044         0x2403ffff,     // li           v1,-1
2045         0xac800000,     // 1: sw        zero,0(a0)
2046         0x2442ffff,     // addiu        v0,v0,-1
2047         0x1443fffd,     // bne          v0,v1, 1b
2048         0x24840004,     // addiu        a0,a0,4
2049         0x03e00008,     // 2: jr        ra
2050         0x00000000,     // nop
2051 };
2052
2053 static int lightrec_replace_memset(struct lightrec_state *state, struct block *block)
2054 {
2055         unsigned int i;
2056         union code c;
2057
2058         for (i = 0; i < block->nb_ops; i++) {
2059                 c = block->opcode_list[i].c;
2060
2061                 if (c.opcode != memset_code[i])
2062                         return 0;
2063
2064                 if (i == ARRAY_SIZE(memset_code) - 1) {
2065                         /* success! */
2066                         pr_debug("Block at PC 0x%x is a memset\n", block->pc);
2067                         block_set_flags(block,
2068                                         BLOCK_IS_MEMSET | BLOCK_NEVER_COMPILE);
2069
2070                         /* Return non-zero to skip other optimizers. */
2071                         return 1;
2072                 }
2073         }
2074
2075         return 0;
2076 }
2077
2078 static int (*lightrec_optimizers[])(struct lightrec_state *state, struct block *) = {
2079         IF_OPT(OPT_REMOVE_DIV_BY_ZERO_SEQ, &lightrec_remove_div_by_zero_check_sequence),
2080         IF_OPT(OPT_REPLACE_MEMSET, &lightrec_replace_memset),
2081         IF_OPT(OPT_DETECT_IMPOSSIBLE_BRANCHES, &lightrec_detect_impossible_branches),
2082         IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_branches),
2083         IF_OPT(OPT_LOCAL_BRANCHES, &lightrec_local_branches),
2084         IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_ops),
2085         IF_OPT(OPT_SWITCH_DELAY_SLOTS, &lightrec_switch_delay_slots),
2086         IF_OPT(OPT_FLAG_IO || OPT_FLAG_STORES, &lightrec_flag_io),
2087         IF_OPT(OPT_FLAG_MULT_DIV, &lightrec_flag_mults_divs),
2088         IF_OPT(OPT_EARLY_UNLOAD, &lightrec_early_unload),
2089 };
2090
2091 int lightrec_optimize(struct lightrec_state *state, struct block *block)
2092 {
2093         unsigned int i;
2094         int ret;
2095
2096         for (i = 0; i < ARRAY_SIZE(lightrec_optimizers); i++) {
2097                 if (lightrec_optimizers[i]) {
2098                         ret = (*lightrec_optimizers[i])(state, block);
2099                         if (ret)
2100                                 return ret;
2101                 }
2102         }
2103
2104         return 0;
2105 }