Update lightrec 20220910 (#686)
[pcsx_rearmed.git] / deps / lightrec / optimizer.c
1 // SPDX-License-Identifier: LGPL-2.1-or-later
2 /*
3  * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
4  */
5
6 #include "lightrec-config.h"
7 #include "disassembler.h"
8 #include "lightrec.h"
9 #include "memmanager.h"
10 #include "optimizer.h"
11 #include "regcache.h"
12
13 #include <errno.h>
14 #include <stdbool.h>
15 #include <stdlib.h>
16 #include <string.h>
17
18 #define IF_OPT(opt, ptr) ((opt) ? (ptr) : NULL)
19
20 struct optimizer_list {
21         void (**optimizers)(struct opcode *);
22         unsigned int nb_optimizers;
23 };
24
25 static bool is_nop(union code op);
26
27 bool is_unconditional_jump(union code c)
28 {
29         switch (c.i.op) {
30         case OP_SPECIAL:
31                 return c.r.op == OP_SPECIAL_JR || c.r.op == OP_SPECIAL_JALR;
32         case OP_J:
33         case OP_JAL:
34                 return true;
35         case OP_BEQ:
36         case OP_BLEZ:
37                 return c.i.rs == c.i.rt;
38         case OP_REGIMM:
39                 return (c.r.rt == OP_REGIMM_BGEZ ||
40                         c.r.rt == OP_REGIMM_BGEZAL) && c.i.rs == 0;
41         default:
42                 return false;
43         }
44 }
45
46 bool is_syscall(union code c)
47 {
48         return (c.i.op == OP_SPECIAL && c.r.op == OP_SPECIAL_SYSCALL) ||
49                 (c.i.op == OP_CP0 && (c.r.rs == OP_CP0_MTC0 ||
50                                         c.r.rs == OP_CP0_CTC0) &&
51                  (c.r.rd == 12 || c.r.rd == 13));
52 }
53
54 static u64 opcode_read_mask(union code op)
55 {
56         switch (op.i.op) {
57         case OP_SPECIAL:
58                 switch (op.r.op) {
59                 case OP_SPECIAL_SYSCALL:
60                 case OP_SPECIAL_BREAK:
61                         return 0;
62                 case OP_SPECIAL_JR:
63                 case OP_SPECIAL_JALR:
64                 case OP_SPECIAL_MTHI:
65                 case OP_SPECIAL_MTLO:
66                         return BIT(op.r.rs);
67                 case OP_SPECIAL_MFHI:
68                         return BIT(REG_HI);
69                 case OP_SPECIAL_MFLO:
70                         return BIT(REG_LO);
71                 case OP_SPECIAL_SLL:
72                         if (!op.r.imm)
73                                 return 0;
74                         fallthrough;
75                 case OP_SPECIAL_SRL:
76                 case OP_SPECIAL_SRA:
77                         return BIT(op.r.rt);
78                 default:
79                         return BIT(op.r.rs) | BIT(op.r.rt);
80                 }
81         case OP_CP0:
82                 switch (op.r.rs) {
83                 case OP_CP0_MTC0:
84                 case OP_CP0_CTC0:
85                         return BIT(op.r.rt);
86                 default:
87                         return 0;
88                 }
89         case OP_CP2:
90                 if (op.r.op == OP_CP2_BASIC) {
91                         switch (op.r.rs) {
92                         case OP_CP2_BASIC_MTC2:
93                         case OP_CP2_BASIC_CTC2:
94                                 return BIT(op.r.rt);
95                         default:
96                                 break;
97                         }
98                 }
99                 return 0;
100         case OP_J:
101         case OP_JAL:
102         case OP_LUI:
103                 return 0;
104         case OP_BEQ:
105                 if (op.i.rs == op.i.rt)
106                         return 0;
107                 fallthrough;
108         case OP_BNE:
109         case OP_LWL:
110         case OP_LWR:
111         case OP_SB:
112         case OP_SH:
113         case OP_SWL:
114         case OP_SW:
115         case OP_SWR:
116                 return BIT(op.i.rs) | BIT(op.i.rt);
117         default:
118                 return BIT(op.i.rs);
119         }
120 }
121
122 static u64 mult_div_write_mask(union code op)
123 {
124         u64 flags;
125
126         if (!OPT_FLAG_MULT_DIV)
127                 return BIT(REG_LO) | BIT(REG_HI);
128
129         if (op.r.rd)
130                 flags = BIT(op.r.rd);
131         else
132                 flags = BIT(REG_LO);
133         if (op.r.imm)
134                 flags |= BIT(op.r.imm);
135         else
136                 flags |= BIT(REG_HI);
137
138         return flags;
139 }
140
141 static u64 opcode_write_mask(union code op)
142 {
143         switch (op.i.op) {
144         case OP_META_MULT2:
145         case OP_META_MULTU2:
146                 return mult_div_write_mask(op);
147         case OP_SPECIAL:
148                 switch (op.r.op) {
149                 case OP_SPECIAL_JR:
150                 case OP_SPECIAL_SYSCALL:
151                 case OP_SPECIAL_BREAK:
152                         return 0;
153                 case OP_SPECIAL_MULT:
154                 case OP_SPECIAL_MULTU:
155                 case OP_SPECIAL_DIV:
156                 case OP_SPECIAL_DIVU:
157                         return mult_div_write_mask(op);
158                 case OP_SPECIAL_MTHI:
159                         return BIT(REG_HI);
160                 case OP_SPECIAL_MTLO:
161                         return BIT(REG_LO);
162                 case OP_SPECIAL_SLL:
163                         if (!op.r.imm)
164                                 return 0;
165                         fallthrough;
166                 default:
167                         return BIT(op.r.rd);
168                 }
169         case OP_ADDI:
170         case OP_ADDIU:
171         case OP_SLTI:
172         case OP_SLTIU:
173         case OP_ANDI:
174         case OP_ORI:
175         case OP_XORI:
176         case OP_LUI:
177         case OP_LB:
178         case OP_LH:
179         case OP_LWL:
180         case OP_LW:
181         case OP_LBU:
182         case OP_LHU:
183         case OP_LWR:
184                 return BIT(op.i.rt);
185         case OP_JAL:
186                 return BIT(31);
187         case OP_CP0:
188                 switch (op.r.rs) {
189                 case OP_CP0_MFC0:
190                 case OP_CP0_CFC0:
191                         return BIT(op.i.rt);
192                 default:
193                         return 0;
194                 }
195         case OP_CP2:
196                 if (op.r.op == OP_CP2_BASIC) {
197                         switch (op.r.rs) {
198                         case OP_CP2_BASIC_MFC2:
199                         case OP_CP2_BASIC_CFC2:
200                                 return BIT(op.i.rt);
201                         default:
202                                 break;
203                         }
204                 }
205                 return 0;
206         case OP_REGIMM:
207                 switch (op.r.rt) {
208                 case OP_REGIMM_BLTZAL:
209                 case OP_REGIMM_BGEZAL:
210                         return BIT(31);
211                 default:
212                         return 0;
213                 }
214         case OP_META_MOV:
215                 return BIT(op.r.rd);
216         default:
217                 return 0;
218         }
219 }
220
221 bool opcode_reads_register(union code op, u8 reg)
222 {
223         return opcode_read_mask(op) & BIT(reg);
224 }
225
226 bool opcode_writes_register(union code op, u8 reg)
227 {
228         return opcode_write_mask(op) & BIT(reg);
229 }
230
231 static int find_prev_writer(const struct opcode *list, unsigned int offset, u8 reg)
232 {
233         union code c;
234         unsigned int i;
235
236         if (op_flag_sync(list[offset].flags))
237                 return -1;
238
239         for (i = offset; i > 0; i--) {
240                 c = list[i - 1].c;
241
242                 if (opcode_writes_register(c, reg)) {
243                         if (i > 1 && has_delay_slot(list[i - 2].c))
244                                 break;
245
246                         return i - 1;
247                 }
248
249                 if (op_flag_sync(list[i - 1].flags) ||
250                     has_delay_slot(c) ||
251                     opcode_reads_register(c, reg))
252                         break;
253         }
254
255         return -1;
256 }
257
258 static int find_next_reader(const struct opcode *list, unsigned int offset, u8 reg)
259 {
260         unsigned int i;
261         union code c;
262
263         if (op_flag_sync(list[offset].flags))
264                 return -1;
265
266         for (i = offset; ; i++) {
267                 c = list[i].c;
268
269                 if (opcode_reads_register(c, reg)) {
270                         if (i > 0 && has_delay_slot(list[i - 1].c))
271                                 break;
272
273                         return i;
274                 }
275
276                 if (op_flag_sync(list[i].flags) ||
277                     has_delay_slot(c) || opcode_writes_register(c, reg))
278                         break;
279         }
280
281         return -1;
282 }
283
284 static bool reg_is_dead(const struct opcode *list, unsigned int offset, u8 reg)
285 {
286         unsigned int i;
287
288         if (op_flag_sync(list[offset].flags))
289                 return false;
290
291         for (i = offset + 1; ; i++) {
292                 if (opcode_reads_register(list[i].c, reg))
293                         return false;
294
295                 if (opcode_writes_register(list[i].c, reg))
296                         return true;
297
298                 if (has_delay_slot(list[i].c)) {
299                         if (op_flag_no_ds(list[i].flags) ||
300                             opcode_reads_register(list[i + 1].c, reg))
301                                 return false;
302
303                         return opcode_writes_register(list[i + 1].c, reg);
304                 }
305         }
306 }
307
308 static bool reg_is_read(const struct opcode *list,
309                         unsigned int a, unsigned int b, u8 reg)
310 {
311         /* Return true if reg is read in one of the opcodes of the interval
312          * [a, b[ */
313         for (; a < b; a++) {
314                 if (!is_nop(list[a].c) && opcode_reads_register(list[a].c, reg))
315                         return true;
316         }
317
318         return false;
319 }
320
321 static bool reg_is_written(const struct opcode *list,
322                            unsigned int a, unsigned int b, u8 reg)
323 {
324         /* Return true if reg is written in one of the opcodes of the interval
325          * [a, b[ */
326
327         for (; a < b; a++) {
328                 if (!is_nop(list[a].c) && opcode_writes_register(list[a].c, reg))
329                         return true;
330         }
331
332         return false;
333 }
334
335 static bool reg_is_read_or_written(const struct opcode *list,
336                                    unsigned int a, unsigned int b, u8 reg)
337 {
338         return reg_is_read(list, a, b, reg) || reg_is_written(list, a, b, reg);
339 }
340
341 static bool opcode_is_load(union code op)
342 {
343         switch (op.i.op) {
344         case OP_LB:
345         case OP_LH:
346         case OP_LWL:
347         case OP_LW:
348         case OP_LBU:
349         case OP_LHU:
350         case OP_LWR:
351         case OP_LWC2:
352                 return true;
353         default:
354                 return false;
355         }
356 }
357
358 static bool opcode_is_store(union code op)
359 {
360         switch (op.i.op) {
361         case OP_SB:
362         case OP_SH:
363         case OP_SW:
364         case OP_SWL:
365         case OP_SWR:
366         case OP_SWC2:
367                 return true;
368         default:
369                 return false;
370         }
371 }
372
373 static u8 opcode_get_io_size(union code op)
374 {
375         switch (op.i.op) {
376         case OP_LB:
377         case OP_LBU:
378         case OP_SB:
379                 return 8;
380         case OP_LH:
381         case OP_LHU:
382         case OP_SH:
383                 return 16;
384         default:
385                 return 32;
386         }
387 }
388
389 bool opcode_is_io(union code op)
390 {
391         return opcode_is_load(op) || opcode_is_store(op);
392 }
393
394 /* TODO: Complete */
395 static bool is_nop(union code op)
396 {
397         if (opcode_writes_register(op, 0)) {
398                 switch (op.i.op) {
399                 case OP_CP0:
400                         return op.r.rs != OP_CP0_MFC0;
401                 case OP_LB:
402                 case OP_LH:
403                 case OP_LWL:
404                 case OP_LW:
405                 case OP_LBU:
406                 case OP_LHU:
407                 case OP_LWR:
408                         return false;
409                 default:
410                         return true;
411                 }
412         }
413
414         switch (op.i.op) {
415         case OP_SPECIAL:
416                 switch (op.r.op) {
417                 case OP_SPECIAL_AND:
418                         return op.r.rd == op.r.rt && op.r.rd == op.r.rs;
419                 case OP_SPECIAL_ADD:
420                 case OP_SPECIAL_ADDU:
421                         return (op.r.rd == op.r.rt && op.r.rs == 0) ||
422                                 (op.r.rd == op.r.rs && op.r.rt == 0);
423                 case OP_SPECIAL_SUB:
424                 case OP_SPECIAL_SUBU:
425                         return op.r.rd == op.r.rs && op.r.rt == 0;
426                 case OP_SPECIAL_OR:
427                         if (op.r.rd == op.r.rt)
428                                 return op.r.rd == op.r.rs || op.r.rs == 0;
429                         else
430                                 return (op.r.rd == op.r.rs) && op.r.rt == 0;
431                 case OP_SPECIAL_SLL:
432                 case OP_SPECIAL_SRA:
433                 case OP_SPECIAL_SRL:
434                         return op.r.rd == op.r.rt && op.r.imm == 0;
435                 case OP_SPECIAL_MFHI:
436                 case OP_SPECIAL_MFLO:
437                         return op.r.rd == 0;
438                 default:
439                         return false;
440                 }
441         case OP_ORI:
442         case OP_ADDI:
443         case OP_ADDIU:
444                 return op.i.rt == op.i.rs && op.i.imm == 0;
445         case OP_BGTZ:
446                 return (op.i.rs == 0 || op.i.imm == 1);
447         case OP_REGIMM:
448                 return (op.i.op == OP_REGIMM_BLTZ ||
449                                 op.i.op == OP_REGIMM_BLTZAL) &&
450                         (op.i.rs == 0 || op.i.imm == 1);
451         case OP_BNE:
452                 return (op.i.rs == op.i.rt || op.i.imm == 1);
453         default:
454                 return false;
455         }
456 }
457
458 bool load_in_delay_slot(union code op)
459 {
460         switch (op.i.op) {
461         case OP_CP0:
462                 switch (op.r.rs) {
463                 case OP_CP0_MFC0:
464                 case OP_CP0_CFC0:
465                         return true;
466                 default:
467                         break;
468                 }
469
470                 break;
471         case OP_CP2:
472                 if (op.r.op == OP_CP2_BASIC) {
473                         switch (op.r.rs) {
474                         case OP_CP2_BASIC_MFC2:
475                         case OP_CP2_BASIC_CFC2:
476                                 return true;
477                         default:
478                                 break;
479                         }
480                 }
481
482                 break;
483         case OP_LB:
484         case OP_LH:
485         case OP_LW:
486         case OP_LWL:
487         case OP_LWR:
488         case OP_LBU:
489         case OP_LHU:
490                 return true;
491         default:
492                 break;
493         }
494
495         return false;
496 }
497
498 static u32 lightrec_propagate_consts(const struct opcode *op,
499                                      const struct opcode *prev,
500                                      u32 known, u32 *v)
501 {
502         union code c = prev->c;
503
504         /* Register $zero is always, well, zero */
505         known |= BIT(0);
506         v[0] = 0;
507
508         if (op_flag_sync(op->flags))
509                 return BIT(0);
510
511         switch (c.i.op) {
512         case OP_SPECIAL:
513                 switch (c.r.op) {
514                 case OP_SPECIAL_SLL:
515                         if (known & BIT(c.r.rt)) {
516                                 known |= BIT(c.r.rd);
517                                 v[c.r.rd] = v[c.r.rt] << c.r.imm;
518                         } else {
519                                 known &= ~BIT(c.r.rd);
520                         }
521                         break;
522                 case OP_SPECIAL_SRL:
523                         if (known & BIT(c.r.rt)) {
524                                 known |= BIT(c.r.rd);
525                                 v[c.r.rd] = v[c.r.rt] >> c.r.imm;
526                         } else {
527                                 known &= ~BIT(c.r.rd);
528                         }
529                         break;
530                 case OP_SPECIAL_SRA:
531                         if (known & BIT(c.r.rt)) {
532                                 known |= BIT(c.r.rd);
533                                 v[c.r.rd] = (s32)v[c.r.rt] >> c.r.imm;
534                         } else {
535                                 known &= ~BIT(c.r.rd);
536                         }
537                         break;
538                 case OP_SPECIAL_SLLV:
539                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
540                                 known |= BIT(c.r.rd);
541                                 v[c.r.rd] = v[c.r.rt] << (v[c.r.rs] & 0x1f);
542                         } else {
543                                 known &= ~BIT(c.r.rd);
544                         }
545                         break;
546                 case OP_SPECIAL_SRLV:
547                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
548                                 known |= BIT(c.r.rd);
549                                 v[c.r.rd] = v[c.r.rt] >> (v[c.r.rs] & 0x1f);
550                         } else {
551                                 known &= ~BIT(c.r.rd);
552                         }
553                         break;
554                 case OP_SPECIAL_SRAV:
555                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
556                                 known |= BIT(c.r.rd);
557                                 v[c.r.rd] = (s32)v[c.r.rt]
558                                           >> (v[c.r.rs] & 0x1f);
559                         } else {
560                                 known &= ~BIT(c.r.rd);
561                         }
562                         break;
563                 case OP_SPECIAL_ADD:
564                 case OP_SPECIAL_ADDU:
565                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
566                                 known |= BIT(c.r.rd);
567                                 v[c.r.rd] = (s32)v[c.r.rt] + (s32)v[c.r.rs];
568                         } else {
569                                 known &= ~BIT(c.r.rd);
570                         }
571                         break;
572                 case OP_SPECIAL_SUB:
573                 case OP_SPECIAL_SUBU:
574                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
575                                 known |= BIT(c.r.rd);
576                                 v[c.r.rd] = v[c.r.rt] - v[c.r.rs];
577                         } else {
578                                 known &= ~BIT(c.r.rd);
579                         }
580                         break;
581                 case OP_SPECIAL_AND:
582                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
583                                 known |= BIT(c.r.rd);
584                                 v[c.r.rd] = v[c.r.rt] & v[c.r.rs];
585                         } else {
586                                 known &= ~BIT(c.r.rd);
587                         }
588                         break;
589                 case OP_SPECIAL_OR:
590                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
591                                 known |= BIT(c.r.rd);
592                                 v[c.r.rd] = v[c.r.rt] | v[c.r.rs];
593                         } else {
594                                 known &= ~BIT(c.r.rd);
595                         }
596                         break;
597                 case OP_SPECIAL_XOR:
598                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
599                                 known |= BIT(c.r.rd);
600                                 v[c.r.rd] = v[c.r.rt] ^ v[c.r.rs];
601                         } else {
602                                 known &= ~BIT(c.r.rd);
603                         }
604                         break;
605                 case OP_SPECIAL_NOR:
606                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
607                                 known |= BIT(c.r.rd);
608                                 v[c.r.rd] = ~(v[c.r.rt] | v[c.r.rs]);
609                         } else {
610                                 known &= ~BIT(c.r.rd);
611                         }
612                         break;
613                 case OP_SPECIAL_SLT:
614                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
615                                 known |= BIT(c.r.rd);
616                                 v[c.r.rd] = (s32)v[c.r.rs] < (s32)v[c.r.rt];
617                         } else {
618                                 known &= ~BIT(c.r.rd);
619                         }
620                         break;
621                 case OP_SPECIAL_SLTU:
622                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
623                                 known |= BIT(c.r.rd);
624                                 v[c.r.rd] = v[c.r.rs] < v[c.r.rt];
625                         } else {
626                                 known &= ~BIT(c.r.rd);
627                         }
628                         break;
629                 case OP_SPECIAL_MULT:
630                 case OP_SPECIAL_MULTU:
631                 case OP_SPECIAL_DIV:
632                 case OP_SPECIAL_DIVU:
633                         if (OPT_FLAG_MULT_DIV && c.r.rd)
634                                 known &= ~BIT(c.r.rd);
635                         if (OPT_FLAG_MULT_DIV && c.r.imm)
636                                 known &= ~BIT(c.r.imm);
637                         break;
638                 default:
639                         break;
640                 }
641                 break;
642         case OP_META_MULT2:
643         case OP_META_MULTU2:
644                 if (OPT_FLAG_MULT_DIV && (known & BIT(c.r.rs))) {
645                         if (c.r.rd) {
646                                 known |= BIT(c.r.rd);
647
648                                 if (c.r.op < 32)
649                                         v[c.r.rd] = v[c.r.rs] << c.r.op;
650                                 else
651                                         v[c.r.rd] = 0;
652                         }
653
654                         if (c.r.imm) {
655                                 known |= BIT(c.r.imm);
656
657                                 if (c.r.op >= 32)
658                                         v[c.r.imm] = v[c.r.rs] << (c.r.op - 32);
659                                 else if (c.i.op == OP_META_MULT2)
660                                         v[c.r.imm] = (s32) v[c.r.rs] >> (32 - c.r.op);
661                                 else
662                                         v[c.r.imm] = v[c.r.rs] >> (32 - c.r.op);
663                         }
664                 } else {
665                         if (OPT_FLAG_MULT_DIV && c.r.rd)
666                                 known &= ~BIT(c.r.rd);
667                         if (OPT_FLAG_MULT_DIV && c.r.imm)
668                                 known &= ~BIT(c.r.imm);
669                 }
670                 break;
671         case OP_REGIMM:
672                 break;
673         case OP_ADDI:
674         case OP_ADDIU:
675                 if (known & BIT(c.i.rs)) {
676                         known |= BIT(c.i.rt);
677                         v[c.i.rt] = v[c.i.rs] + (s32)(s16)c.i.imm;
678                 } else {
679                         known &= ~BIT(c.i.rt);
680                 }
681                 break;
682         case OP_SLTI:
683                 if (known & BIT(c.i.rs)) {
684                         known |= BIT(c.i.rt);
685                         v[c.i.rt] = (s32)v[c.i.rs] < (s32)(s16)c.i.imm;
686                 } else {
687                         known &= ~BIT(c.i.rt);
688                 }
689                 break;
690         case OP_SLTIU:
691                 if (known & BIT(c.i.rs)) {
692                         known |= BIT(c.i.rt);
693                         v[c.i.rt] = v[c.i.rs] < (u32)(s32)(s16)c.i.imm;
694                 } else {
695                         known &= ~BIT(c.i.rt);
696                 }
697                 break;
698         case OP_ANDI:
699                 if (known & BIT(c.i.rs)) {
700                         known |= BIT(c.i.rt);
701                         v[c.i.rt] = v[c.i.rs] & c.i.imm;
702                 } else {
703                         known &= ~BIT(c.i.rt);
704                 }
705                 break;
706         case OP_ORI:
707                 if (known & BIT(c.i.rs)) {
708                         known |= BIT(c.i.rt);
709                         v[c.i.rt] = v[c.i.rs] | c.i.imm;
710                 } else {
711                         known &= ~BIT(c.i.rt);
712                 }
713                 break;
714         case OP_XORI:
715                 if (known & BIT(c.i.rs)) {
716                         known |= BIT(c.i.rt);
717                         v[c.i.rt] = v[c.i.rs] ^ c.i.imm;
718                 } else {
719                         known &= ~BIT(c.i.rt);
720                 }
721                 break;
722         case OP_LUI:
723                 known |= BIT(c.i.rt);
724                 v[c.i.rt] = c.i.imm << 16;
725                 break;
726         case OP_CP0:
727                 switch (c.r.rs) {
728                 case OP_CP0_MFC0:
729                 case OP_CP0_CFC0:
730                         known &= ~BIT(c.r.rt);
731                         break;
732                 }
733                 break;
734         case OP_CP2:
735                 if (c.r.op == OP_CP2_BASIC) {
736                         switch (c.r.rs) {
737                         case OP_CP2_BASIC_MFC2:
738                         case OP_CP2_BASIC_CFC2:
739                                 known &= ~BIT(c.r.rt);
740                                 break;
741                         }
742                 }
743                 break;
744         case OP_LB:
745         case OP_LH:
746         case OP_LWL:
747         case OP_LW:
748         case OP_LBU:
749         case OP_LHU:
750         case OP_LWR:
751         case OP_LWC2:
752                 known &= ~BIT(c.i.rt);
753                 break;
754         case OP_META_MOV:
755                 if (known & BIT(c.r.rs)) {
756                         known |= BIT(c.r.rd);
757                         v[c.r.rd] = v[c.r.rs];
758                 } else {
759                         known &= ~BIT(c.r.rd);
760                 }
761                 break;
762         default:
763                 break;
764         }
765
766         return known;
767 }
768
769 static void lightrec_optimize_sll_sra(struct opcode *list, unsigned int offset)
770 {
771         struct opcode *prev, *prev2 = NULL, *curr = &list[offset];
772         struct opcode *to_change, *to_nop;
773         int idx, idx2;
774
775         if (curr->r.imm != 24 && curr->r.imm != 16)
776                 return;
777
778         idx = find_prev_writer(list, offset, curr->r.rt);
779         if (idx < 0)
780                 return;
781
782         prev = &list[idx];
783
784         if (prev->i.op != OP_SPECIAL || prev->r.op != OP_SPECIAL_SLL ||
785             prev->r.imm != curr->r.imm || prev->r.rd != curr->r.rt)
786                 return;
787
788         if (prev->r.rd != prev->r.rt && curr->r.rd != curr->r.rt) {
789                 /* sll rY, rX, 16
790                  * ...
791                  * srl rZ, rY, 16 */
792
793                 if (!reg_is_dead(list, offset, curr->r.rt) ||
794                     reg_is_read_or_written(list, idx, offset, curr->r.rd))
795                         return;
796
797                 /* If rY is dead after the SRL, and rZ is not used after the SLL,
798                  * we can change rY to rZ */
799
800                 pr_debug("Detected SLL/SRA with middle temp register\n");
801                 prev->r.rd = curr->r.rd;
802                 curr->r.rt = prev->r.rd;
803         }
804
805         /* We got a SLL/SRA combo. If imm #16, that's a cast to u16.
806          * If imm #24 that's a cast to u8.
807          *
808          * First of all, make sure that the target register of the SLL is not
809          * read before the SRA. */
810
811         if (prev->r.rd == prev->r.rt) {
812                 /* sll rX, rX, 16
813                  * ...
814                  * srl rY, rX, 16 */
815                 to_change = curr;
816                 to_nop = prev;
817
818                 /* rX is used after the SRA - we cannot convert it. */
819                 if (prev->r.rd != curr->r.rd && !reg_is_dead(list, offset, prev->r.rd))
820                         return;
821         } else {
822                 /* sll rY, rX, 16
823                  * ...
824                  * srl rY, rY, 16 */
825                 to_change = prev;
826                 to_nop = curr;
827         }
828
829         idx2 = find_prev_writer(list, idx, prev->r.rt);
830         if (idx2 >= 0) {
831                 /* Note that PSX games sometimes do casts after
832                  * a LHU or LBU; in this case we can change the
833                  * load opcode to a LH or LB, and the cast can
834                  * be changed to a MOV or a simple NOP. */
835
836                 prev2 = &list[idx2];
837
838                 if (curr->r.rd != prev2->i.rt &&
839                     !reg_is_dead(list, offset, prev2->i.rt))
840                         prev2 = NULL;
841                 else if (curr->r.imm == 16 && prev2->i.op == OP_LHU)
842                         prev2->i.op = OP_LH;
843                 else if (curr->r.imm == 24 && prev2->i.op == OP_LBU)
844                         prev2->i.op = OP_LB;
845                 else
846                         prev2 = NULL;
847
848                 if (prev2) {
849                         if (curr->r.rd == prev2->i.rt) {
850                                 to_change->opcode = 0;
851                         } else if (reg_is_dead(list, offset, prev2->i.rt) &&
852                                    !reg_is_read_or_written(list, idx2 + 1, offset, curr->r.rd)) {
853                                 /* The target register of the SRA is dead after the
854                                  * LBU/LHU; we can change the target register of the
855                                  * LBU/LHU to the one of the SRA. */
856                                 prev2->i.rt = curr->r.rd;
857                                 to_change->opcode = 0;
858                         } else {
859                                 to_change->i.op = OP_META_MOV;
860                                 to_change->r.rd = curr->r.rd;
861                                 to_change->r.rs = prev2->i.rt;
862                         }
863
864                         if (to_nop->r.imm == 24)
865                                 pr_debug("Convert LBU+SLL+SRA to LB\n");
866                         else
867                                 pr_debug("Convert LHU+SLL+SRA to LH\n");
868                 }
869         }
870
871         if (!prev2) {
872                 pr_debug("Convert SLL/SRA #%u to EXT%c\n",
873                          prev->r.imm,
874                          prev->r.imm == 24 ? 'C' : 'S');
875
876                 if (to_change == prev) {
877                         to_change->i.rs = prev->r.rt;
878                         to_change->i.rt = curr->r.rd;
879                 } else {
880                         to_change->i.rt = curr->r.rd;
881                         to_change->i.rs = prev->r.rt;
882                 }
883
884                 if (to_nop->r.imm == 24)
885                         to_change->i.op = OP_META_EXTC;
886                 else
887                         to_change->i.op = OP_META_EXTS;
888         }
889
890         to_nop->opcode = 0;
891 }
892
893 static void lightrec_remove_useless_lui(struct block *block, unsigned int offset,
894                                         u32 known, u32 *values)
895 {
896         struct opcode *list = block->opcode_list,
897                       *op = &block->opcode_list[offset];
898         int reader;
899
900         if (!op_flag_sync(op->flags) && (known & BIT(op->i.rt)) &&
901             values[op->i.rt] == op->i.imm << 16) {
902                 pr_debug("Converting duplicated LUI to NOP\n");
903                 op->opcode = 0x0;
904                 return;
905         }
906
907         if (op->i.imm != 0 || op->i.rt == 0)
908                 return;
909
910         reader = find_next_reader(list, offset + 1, op->i.rt);
911         if (reader <= 0)
912                 return;
913
914         if (opcode_writes_register(list[reader].c, op->i.rt) ||
915             reg_is_dead(list, reader, op->i.rt)) {
916                 pr_debug("Removing useless LUI 0x0\n");
917
918                 if (list[reader].i.rs == op->i.rt)
919                         list[reader].i.rs = 0;
920                 if (list[reader].i.op == OP_SPECIAL &&
921                     list[reader].i.rt == op->i.rt)
922                         list[reader].i.rt = 0;
923                 op->opcode = 0x0;
924         }
925 }
926
927 static void lightrec_modify_lui(struct block *block, unsigned int offset)
928 {
929         union code c, *lui = &block->opcode_list[offset].c;
930         bool stop = false, stop_next = false;
931         unsigned int i;
932
933         for (i = offset + 1; !stop && i < block->nb_ops; i++) {
934                 c = block->opcode_list[i].c;
935                 stop = stop_next;
936
937                 if ((opcode_is_store(c) && c.i.rt == lui->i.rt)
938                     || (!opcode_is_load(c) && opcode_reads_register(c, lui->i.rt)))
939                         break;
940
941                 if (opcode_writes_register(c, lui->i.rt)) {
942                         pr_debug("Convert LUI at offset 0x%x to kuseg\n",
943                                  i - 1 << 2);
944                         lui->i.imm = kunseg(lui->i.imm << 16) >> 16;
945                         break;
946                 }
947
948                 if (has_delay_slot(c))
949                         stop_next = true;
950         }
951 }
952
953 static int lightrec_transform_branches(struct lightrec_state *state,
954                                        struct block *block)
955 {
956         struct opcode *op;
957         unsigned int i;
958         s32 offset;
959
960         for (i = 0; i < block->nb_ops; i++) {
961                 op = &block->opcode_list[i];
962
963                 switch (op->i.op) {
964                 case OP_J:
965                         /* Transform J opcode into BEQ $zero, $zero if possible. */
966                         offset = (s32)((block->pc & 0xf0000000) >> 2 | op->j.imm)
967                                 - (s32)(block->pc >> 2) - (s32)i - 1;
968
969                         if (offset == (s16)offset) {
970                                 pr_debug("Transform J into BEQ $zero, $zero\n");
971                                 op->i.op = OP_BEQ;
972                                 op->i.rs = 0;
973                                 op->i.rt = 0;
974                                 op->i.imm = offset;
975
976                         }
977                         fallthrough;
978                 default:
979                         break;
980                 }
981         }
982
983         return 0;
984 }
985
986 static inline bool is_power_of_two(u32 value)
987 {
988         return popcount32(value) == 1;
989 }
990
991 static int lightrec_transform_ops(struct lightrec_state *state, struct block *block)
992 {
993         struct opcode *list = block->opcode_list;
994         struct opcode *prev, *op = NULL;
995         u32 known = BIT(0);
996         u32 values[32] = { 0 };
997         unsigned int i;
998         u8 tmp;
999
1000         for (i = 0; i < block->nb_ops; i++) {
1001                 prev = op;
1002                 op = &list[i];
1003
1004                 if (prev)
1005                         known = lightrec_propagate_consts(op, prev, known, values);
1006
1007                 /* Transform all opcodes detected as useless to real NOPs
1008                  * (0x0: SLL r0, r0, #0) */
1009                 if (op->opcode != 0 && is_nop(op->c)) {
1010                         pr_debug("Converting useless opcode 0x%08x to NOP\n",
1011                                         op->opcode);
1012                         op->opcode = 0x0;
1013                 }
1014
1015                 if (!op->opcode)
1016                         continue;
1017
1018                 switch (op->i.op) {
1019                 case OP_BEQ:
1020                         if (op->i.rs == op->i.rt) {
1021                                 op->i.rs = 0;
1022                                 op->i.rt = 0;
1023                         } else if (op->i.rs == 0) {
1024                                 op->i.rs = op->i.rt;
1025                                 op->i.rt = 0;
1026                         }
1027                         break;
1028
1029                 case OP_BNE:
1030                         if (op->i.rs == 0) {
1031                                 op->i.rs = op->i.rt;
1032                                 op->i.rt = 0;
1033                         }
1034                         break;
1035
1036                 case OP_LUI:
1037                         if (!prev || !has_delay_slot(prev->c))
1038                                 lightrec_modify_lui(block, i);
1039                         lightrec_remove_useless_lui(block, i, known, values);
1040                         break;
1041
1042                 /* Transform ORI/ADDI/ADDIU with imm #0 or ORR/ADD/ADDU/SUB/SUBU
1043                  * with register $zero to the MOV meta-opcode */
1044                 case OP_ORI:
1045                 case OP_ADDI:
1046                 case OP_ADDIU:
1047                         if (op->i.imm == 0) {
1048                                 pr_debug("Convert ORI/ADDI/ADDIU #0 to MOV\n");
1049                                 op->i.op = OP_META_MOV;
1050                                 op->r.rd = op->i.rt;
1051                         }
1052                         break;
1053                 case OP_SPECIAL:
1054                         switch (op->r.op) {
1055                         case OP_SPECIAL_SRA:
1056                                 if (op->r.imm == 0) {
1057                                         pr_debug("Convert SRA #0 to MOV\n");
1058                                         op->i.op = OP_META_MOV;
1059                                         op->r.rs = op->r.rt;
1060                                         break;
1061                                 }
1062
1063                                 lightrec_optimize_sll_sra(block->opcode_list, i);
1064                                 break;
1065                         case OP_SPECIAL_SLL:
1066                         case OP_SPECIAL_SRL:
1067                                 if (op->r.imm == 0) {
1068                                         pr_debug("Convert SLL/SRL #0 to MOV\n");
1069                                         op->i.op = OP_META_MOV;
1070                                         op->r.rs = op->r.rt;
1071                                 }
1072                                 break;
1073                         case OP_SPECIAL_MULT:
1074                         case OP_SPECIAL_MULTU:
1075                                 if ((known & BIT(op->r.rs)) &&
1076                                     is_power_of_two(values[op->r.rs])) {
1077                                         tmp = op->c.i.rs;
1078                                         op->c.i.rs = op->c.i.rt;
1079                                         op->c.i.rt = tmp;
1080                                 } else if (!(known & BIT(op->r.rt)) ||
1081                                            !is_power_of_two(values[op->r.rt])) {
1082                                         break;
1083                                 }
1084
1085                                 pr_debug("Multiply by power-of-two: %u\n",
1086                                          values[op->r.rt]);
1087
1088                                 if (op->r.op == OP_SPECIAL_MULT)
1089                                         op->i.op = OP_META_MULT2;
1090                                 else
1091                                         op->i.op = OP_META_MULTU2;
1092
1093                                 op->r.op = ffs32(values[op->r.rt]);
1094                                 break;
1095                         case OP_SPECIAL_OR:
1096                         case OP_SPECIAL_ADD:
1097                         case OP_SPECIAL_ADDU:
1098                                 if (op->r.rs == 0) {
1099                                         pr_debug("Convert OR/ADD $zero to MOV\n");
1100                                         op->i.op = OP_META_MOV;
1101                                         op->r.rs = op->r.rt;
1102                                 }
1103                                 fallthrough;
1104                         case OP_SPECIAL_SUB:
1105                         case OP_SPECIAL_SUBU:
1106                                 if (op->r.rt == 0) {
1107                                         pr_debug("Convert OR/ADD/SUB $zero to MOV\n");
1108                                         op->i.op = OP_META_MOV;
1109                                 }
1110                                 fallthrough;
1111                         default:
1112                                 break;
1113                         }
1114                         fallthrough;
1115                 default:
1116                         break;
1117                 }
1118         }
1119
1120         return 0;
1121 }
1122
1123 static bool lightrec_can_switch_delay_slot(union code op, union code next_op)
1124 {
1125         switch (op.i.op) {
1126         case OP_SPECIAL:
1127                 switch (op.r.op) {
1128                 case OP_SPECIAL_JALR:
1129                         if (opcode_reads_register(next_op, op.r.rd) ||
1130                             opcode_writes_register(next_op, op.r.rd))
1131                                 return false;
1132                         fallthrough;
1133                 case OP_SPECIAL_JR:
1134                         if (opcode_writes_register(next_op, op.r.rs))
1135                                 return false;
1136                         fallthrough;
1137                 default:
1138                         break;
1139                 }
1140                 fallthrough;
1141         case OP_J:
1142                 break;
1143         case OP_JAL:
1144                 if (opcode_reads_register(next_op, 31) ||
1145                     opcode_writes_register(next_op, 31))
1146                         return false;;
1147
1148                 break;
1149         case OP_BEQ:
1150         case OP_BNE:
1151                 if (op.i.rt && opcode_writes_register(next_op, op.i.rt))
1152                         return false;
1153                 fallthrough;
1154         case OP_BLEZ:
1155         case OP_BGTZ:
1156                 if (op.i.rs && opcode_writes_register(next_op, op.i.rs))
1157                         return false;
1158                 break;
1159         case OP_REGIMM:
1160                 switch (op.r.rt) {
1161                 case OP_REGIMM_BLTZAL:
1162                 case OP_REGIMM_BGEZAL:
1163                         if (opcode_reads_register(next_op, 31) ||
1164                             opcode_writes_register(next_op, 31))
1165                                 return false;
1166                         fallthrough;
1167                 case OP_REGIMM_BLTZ:
1168                 case OP_REGIMM_BGEZ:
1169                         if (op.i.rs && opcode_writes_register(next_op, op.i.rs))
1170                                 return false;
1171                         break;
1172                 }
1173                 fallthrough;
1174         default:
1175                 break;
1176         }
1177
1178         return true;
1179 }
1180
1181 static int lightrec_switch_delay_slots(struct lightrec_state *state, struct block *block)
1182 {
1183         struct opcode *list, *next = &block->opcode_list[0];
1184         unsigned int i;
1185         union code op, next_op;
1186         u32 flags;
1187
1188         for (i = 0; i < block->nb_ops - 1; i++) {
1189                 list = next;
1190                 next = &block->opcode_list[i + 1];
1191                 next_op = next->c;
1192                 op = list->c;
1193
1194                 if (!has_delay_slot(op) || op_flag_no_ds(list->flags) ||
1195                     op_flag_emulate_branch(list->flags) ||
1196                     op.opcode == 0 || next_op.opcode == 0)
1197                         continue;
1198
1199                 if (i && has_delay_slot(block->opcode_list[i - 1].c) &&
1200                     !op_flag_no_ds(block->opcode_list[i - 1].flags))
1201                         continue;
1202
1203                 if (op_flag_sync(next->flags))
1204                         continue;
1205
1206                 if (!lightrec_can_switch_delay_slot(list->c, next_op))
1207                         continue;
1208
1209                 pr_debug("Swap branch and delay slot opcodes "
1210                          "at offsets 0x%x / 0x%x\n",
1211                          i << 2, (i + 1) << 2);
1212
1213                 flags = next->flags | (list->flags & LIGHTREC_SYNC);
1214                 list->c = next_op;
1215                 next->c = op;
1216                 next->flags = (list->flags | LIGHTREC_NO_DS) & ~LIGHTREC_SYNC;
1217                 list->flags = flags | LIGHTREC_NO_DS;
1218         }
1219
1220         return 0;
1221 }
1222
1223 static int shrink_opcode_list(struct lightrec_state *state, struct block *block, u16 new_size)
1224 {
1225         struct opcode_list *list, *old_list;
1226
1227         if (new_size >= block->nb_ops) {
1228                 pr_err("Invalid shrink size (%u vs %u)\n",
1229                        new_size, block->nb_ops);
1230                 return -EINVAL;
1231         }
1232
1233         list = lightrec_malloc(state, MEM_FOR_IR,
1234                                sizeof(*list) + sizeof(struct opcode) * new_size);
1235         if (!list) {
1236                 pr_err("Unable to allocate memory\n");
1237                 return -ENOMEM;
1238         }
1239
1240         old_list = container_of(block->opcode_list, struct opcode_list, ops);
1241         memcpy(list->ops, old_list->ops, sizeof(struct opcode) * new_size);
1242
1243         lightrec_free_opcode_list(state, block->opcode_list);
1244         list->nb_ops = new_size;
1245         block->nb_ops = new_size;
1246         block->opcode_list = list->ops;
1247
1248         pr_debug("Shrunk opcode list of block PC 0x%08x to %u opcodes\n",
1249                  block->pc, new_size);
1250
1251         return 0;
1252 }
1253
1254 static int lightrec_detect_impossible_branches(struct lightrec_state *state,
1255                                                struct block *block)
1256 {
1257         struct opcode *op, *list = block->opcode_list, *next = &list[0];
1258         unsigned int i;
1259         int ret = 0;
1260         s16 offset;
1261
1262         for (i = 0; i < block->nb_ops - 1; i++) {
1263                 op = next;
1264                 next = &list[i + 1];
1265
1266                 if (!has_delay_slot(op->c) ||
1267                     (!load_in_delay_slot(next->c) &&
1268                      !has_delay_slot(next->c) &&
1269                      !(next->i.op == OP_CP0 && next->r.rs == OP_CP0_RFE)))
1270                         continue;
1271
1272                 if (op->c.opcode == next->c.opcode) {
1273                         /* The delay slot is the exact same opcode as the branch
1274                          * opcode: this is effectively a NOP */
1275                         next->c.opcode = 0;
1276                         continue;
1277                 }
1278
1279                 offset = i + 1 + (s16)op->i.imm;
1280                 if (load_in_delay_slot(next->c) &&
1281                     (offset >= 0 && offset < block->nb_ops) &&
1282                     !opcode_reads_register(list[offset].c, next->c.i.rt)) {
1283                         /* The 'impossible' branch is a local branch - we can
1284                          * verify here that the first opcode of the target does
1285                          * not use the target register of the delay slot */
1286
1287                         pr_debug("Branch at offset 0x%x has load delay slot, "
1288                                  "but is local and dest opcode does not read "
1289                                  "dest register\n", i << 2);
1290                         continue;
1291                 }
1292
1293                 op->flags |= LIGHTREC_EMULATE_BRANCH;
1294
1295                 if (op == list) {
1296                         pr_debug("First opcode of block PC 0x%08x is an impossible branch\n",
1297                                  block->pc);
1298
1299                         /* If the first opcode is an 'impossible' branch, we
1300                          * only keep the first two opcodes of the block (the
1301                          * branch itself + its delay slot) */
1302                         if (block->nb_ops > 2)
1303                                 ret = shrink_opcode_list(state, block, 2);
1304                         break;
1305                 }
1306         }
1307
1308         return ret;
1309 }
1310
1311 static int lightrec_local_branches(struct lightrec_state *state, struct block *block)
1312 {
1313         struct opcode *list;
1314         unsigned int i;
1315         s32 offset;
1316
1317         for (i = 0; i < block->nb_ops; i++) {
1318                 list = &block->opcode_list[i];
1319
1320                 if (should_emulate(list))
1321                         continue;
1322
1323                 switch (list->i.op) {
1324                 case OP_BEQ:
1325                 case OP_BNE:
1326                 case OP_BLEZ:
1327                 case OP_BGTZ:
1328                 case OP_REGIMM:
1329                         offset = i + 1 + (s16)list->i.imm;
1330                         if (offset >= 0 && offset < block->nb_ops)
1331                                 break;
1332                         fallthrough;
1333                 default:
1334                         continue;
1335                 }
1336
1337                 pr_debug("Found local branch to offset 0x%x\n", offset << 2);
1338
1339                 if (should_emulate(&block->opcode_list[offset])) {
1340                         pr_debug("Branch target must be emulated - skip\n");
1341                         continue;
1342                 }
1343
1344                 if (offset && has_delay_slot(block->opcode_list[offset - 1].c)) {
1345                         pr_debug("Branch target is a delay slot - skip\n");
1346                         continue;
1347                 }
1348
1349                 pr_debug("Adding sync at offset 0x%x\n", offset << 2);
1350
1351                 block->opcode_list[offset].flags |= LIGHTREC_SYNC;
1352                 list->flags |= LIGHTREC_LOCAL_BRANCH;
1353         }
1354
1355         return 0;
1356 }
1357
1358 bool has_delay_slot(union code op)
1359 {
1360         switch (op.i.op) {
1361         case OP_SPECIAL:
1362                 switch (op.r.op) {
1363                 case OP_SPECIAL_JR:
1364                 case OP_SPECIAL_JALR:
1365                         return true;
1366                 default:
1367                         return false;
1368                 }
1369         case OP_J:
1370         case OP_JAL:
1371         case OP_BEQ:
1372         case OP_BNE:
1373         case OP_BLEZ:
1374         case OP_BGTZ:
1375         case OP_REGIMM:
1376                 return true;
1377         default:
1378                 return false;
1379         }
1380 }
1381
1382 bool should_emulate(const struct opcode *list)
1383 {
1384         return op_flag_emulate_branch(list->flags) && has_delay_slot(list->c);
1385 }
1386
1387 static bool op_writes_rd(union code c)
1388 {
1389         switch (c.i.op) {
1390         case OP_SPECIAL:
1391         case OP_META_MOV:
1392                 return true;
1393         default:
1394                 return false;
1395         }
1396 }
1397
1398 static void lightrec_add_reg_op(struct opcode *op, u8 reg, u32 reg_op)
1399 {
1400         if (op_writes_rd(op->c) && reg == op->r.rd)
1401                 op->flags |= LIGHTREC_REG_RD(reg_op);
1402         else if (op->i.rs == reg)
1403                 op->flags |= LIGHTREC_REG_RS(reg_op);
1404         else if (op->i.rt == reg)
1405                 op->flags |= LIGHTREC_REG_RT(reg_op);
1406         else
1407                 pr_debug("Cannot add unload/clean/discard flag: "
1408                          "opcode does not touch register %s!\n",
1409                          lightrec_reg_name(reg));
1410 }
1411
1412 static void lightrec_add_unload(struct opcode *op, u8 reg)
1413 {
1414         lightrec_add_reg_op(op, reg, LIGHTREC_REG_UNLOAD);
1415 }
1416
1417 static void lightrec_add_discard(struct opcode *op, u8 reg)
1418 {
1419         lightrec_add_reg_op(op, reg, LIGHTREC_REG_DISCARD);
1420 }
1421
1422 static void lightrec_add_clean(struct opcode *op, u8 reg)
1423 {
1424         lightrec_add_reg_op(op, reg, LIGHTREC_REG_CLEAN);
1425 }
1426
1427 static void
1428 lightrec_early_unload_sync(struct opcode *list, s16 *last_r, s16 *last_w)
1429 {
1430         unsigned int reg;
1431         s16 offset;
1432
1433         for (reg = 0; reg < 34; reg++) {
1434                 offset = s16_max(last_w[reg], last_r[reg]);
1435
1436                 if (offset >= 0)
1437                         lightrec_add_unload(&list[offset], reg);
1438         }
1439
1440         memset(last_r, 0xff, sizeof(*last_r) * 34);
1441         memset(last_w, 0xff, sizeof(*last_w) * 34);
1442 }
1443
1444 static int lightrec_early_unload(struct lightrec_state *state, struct block *block)
1445 {
1446         u16 i, offset;
1447         struct opcode *op;
1448         s16 last_r[34], last_w[34], last_sync = 0, next_sync = 0;
1449         u64 mask_r, mask_w, dirty = 0, loaded = 0;
1450         u8 reg;
1451
1452         memset(last_r, 0xff, sizeof(last_r));
1453         memset(last_w, 0xff, sizeof(last_w));
1454
1455         /*
1456          * Clean if:
1457          * - the register is dirty, and is read again after a branch opcode
1458          *
1459          * Unload if:
1460          * - the register is dirty or loaded, and is not read again
1461          * - the register is dirty or loaded, and is written again after a branch opcode
1462          * - the next opcode has the SYNC flag set
1463          *
1464          * Discard if:
1465          * - the register is dirty or loaded, and is written again
1466          */
1467
1468         for (i = 0; i < block->nb_ops; i++) {
1469                 op = &block->opcode_list[i];
1470
1471                 if (op_flag_sync(op->flags) || should_emulate(op)) {
1472                         /* The next opcode has the SYNC flag set, or is a branch
1473                          * that should be emulated: unload all registers. */
1474                         lightrec_early_unload_sync(block->opcode_list, last_r, last_w);
1475                         dirty = 0;
1476                         loaded = 0;
1477                 }
1478
1479                 if (next_sync == i) {
1480                         last_sync = i;
1481                         pr_debug("Last sync: 0x%x\n", last_sync << 2);
1482                 }
1483
1484                 if (has_delay_slot(op->c)) {
1485                         next_sync = i + 1 + !op_flag_no_ds(op->flags);
1486                         pr_debug("Next sync: 0x%x\n", next_sync << 2);
1487                 }
1488
1489                 mask_r = opcode_read_mask(op->c);
1490                 mask_w = opcode_write_mask(op->c);
1491
1492                 for (reg = 0; reg < 34; reg++) {
1493                         if (mask_r & BIT(reg)) {
1494                                 if (dirty & BIT(reg) && last_w[reg] < last_sync) {
1495                                         /* The register is dirty, and is read
1496                                          * again after a branch: clean it */
1497
1498                                         lightrec_add_clean(&block->opcode_list[last_w[reg]], reg);
1499                                         dirty &= ~BIT(reg);
1500                                         loaded |= BIT(reg);
1501                                 }
1502
1503                                 last_r[reg] = i;
1504                         }
1505
1506                         if (mask_w & BIT(reg)) {
1507                                 if ((dirty & BIT(reg) && last_w[reg] < last_sync) ||
1508                                     (loaded & BIT(reg) && last_r[reg] < last_sync)) {
1509                                         /* The register is dirty or loaded, and
1510                                          * is written again after a branch:
1511                                          * unload it */
1512
1513                                         offset = s16_max(last_w[reg], last_r[reg]);
1514                                         lightrec_add_unload(&block->opcode_list[offset], reg);
1515                                         dirty &= ~BIT(reg);
1516                                         loaded &= ~BIT(reg);
1517                                 } else if (!(mask_r & BIT(reg)) &&
1518                                            ((dirty & BIT(reg) && last_w[reg] > last_sync) ||
1519                                            (loaded & BIT(reg) && last_r[reg] > last_sync))) {
1520                                         /* The register is dirty or loaded, and
1521                                          * is written again: discard it */
1522
1523                                         offset = s16_max(last_w[reg], last_r[reg]);
1524                                         lightrec_add_discard(&block->opcode_list[offset], reg);
1525                                         dirty &= ~BIT(reg);
1526                                         loaded &= ~BIT(reg);
1527                                 }
1528
1529                                 last_w[reg] = i;
1530                         }
1531
1532                 }
1533
1534                 dirty |= mask_w;
1535                 loaded |= mask_r;
1536         }
1537
1538         /* Unload all registers that are dirty or loaded at the end of block. */
1539         lightrec_early_unload_sync(block->opcode_list, last_r, last_w);
1540
1541         return 0;
1542 }
1543
1544 static int lightrec_flag_io(struct lightrec_state *state, struct block *block)
1545 {
1546         struct opcode *prev = NULL, *list = NULL;
1547         enum psx_map psx_map;
1548         u32 known = BIT(0);
1549         u32 values[32] = { 0 };
1550         unsigned int i;
1551         u32 val, kunseg_val;
1552         bool no_mask;
1553
1554         for (i = 0; i < block->nb_ops; i++) {
1555                 prev = list;
1556                 list = &block->opcode_list[i];
1557
1558                 if (prev)
1559                         known = lightrec_propagate_consts(list, prev, known, values);
1560
1561                 switch (list->i.op) {
1562                 case OP_SB:
1563                 case OP_SH:
1564                 case OP_SW:
1565                         if (OPT_FLAG_STORES) {
1566                                 /* Mark all store operations that target $sp or $gp
1567                                  * as not requiring code invalidation. This is based
1568                                  * on the heuristic that stores using one of these
1569                                  * registers as address will never hit a code page. */
1570                                 if (list->i.rs >= 28 && list->i.rs <= 29 &&
1571                                     !state->maps[PSX_MAP_KERNEL_USER_RAM].ops) {
1572                                         pr_debug("Flaging opcode 0x%08x as not "
1573                                                  "requiring invalidation\n",
1574                                                  list->opcode);
1575                                         list->flags |= LIGHTREC_NO_INVALIDATE;
1576                                         list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT);
1577                                 }
1578
1579                                 /* Detect writes whose destination address is inside the
1580                                  * current block, using constant propagation. When these
1581                                  * occur, we mark the blocks as not compilable. */
1582                                 if ((known & BIT(list->i.rs)) &&
1583                                     kunseg(values[list->i.rs]) >= kunseg(block->pc) &&
1584                                     kunseg(values[list->i.rs]) < (kunseg(block->pc) +
1585                                                                   block->nb_ops * 4)) {
1586                                         pr_debug("Self-modifying block detected\n");
1587                                         block_set_flags(block, BLOCK_NEVER_COMPILE);
1588                                         list->flags |= LIGHTREC_SMC;
1589                                 }
1590                         }
1591                         fallthrough;
1592                 case OP_SWL:
1593                 case OP_SWR:
1594                 case OP_SWC2:
1595                 case OP_LB:
1596                 case OP_LBU:
1597                 case OP_LH:
1598                 case OP_LHU:
1599                 case OP_LW:
1600                 case OP_LWL:
1601                 case OP_LWR:
1602                 case OP_LWC2:
1603                         if (OPT_FLAG_IO && (known & BIT(list->i.rs))) {
1604                                 val = values[list->i.rs] + (s16) list->i.imm;
1605                                 kunseg_val = kunseg(val);
1606                                 psx_map = lightrec_get_map_idx(state, kunseg_val);
1607
1608                                 list->flags &= ~LIGHTREC_IO_MASK;
1609                                 no_mask = val == kunseg_val;
1610
1611                                 switch (psx_map) {
1612                                 case PSX_MAP_KERNEL_USER_RAM:
1613                                         if (no_mask)
1614                                                 list->flags |= LIGHTREC_NO_MASK;
1615                                         fallthrough;
1616                                 case PSX_MAP_MIRROR1:
1617                                 case PSX_MAP_MIRROR2:
1618                                 case PSX_MAP_MIRROR3:
1619                                         pr_debug("Flaging opcode %u as RAM access\n", i);
1620                                         list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_RAM);
1621                                         if (no_mask && state->mirrors_mapped)
1622                                                 list->flags |= LIGHTREC_NO_MASK;
1623                                         break;
1624                                 case PSX_MAP_BIOS:
1625                                         pr_debug("Flaging opcode %u as BIOS access\n", i);
1626                                         list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_BIOS);
1627                                         if (no_mask)
1628                                                 list->flags |= LIGHTREC_NO_MASK;
1629                                         break;
1630                                 case PSX_MAP_SCRATCH_PAD:
1631                                         pr_debug("Flaging opcode %u as scratchpad access\n", i);
1632                                         list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_SCRATCH);
1633                                         if (no_mask)
1634                                                 list->flags |= LIGHTREC_NO_MASK;
1635
1636                                         /* Consider that we're never going to run code from
1637                                          * the scratchpad. */
1638                                         list->flags |= LIGHTREC_NO_INVALIDATE;
1639                                         break;
1640                                 case PSX_MAP_HW_REGISTERS:
1641                                         if (state->ops.hw_direct &&
1642                                             state->ops.hw_direct(kunseg_val,
1643                                                                  opcode_is_store(list->c),
1644                                                                  opcode_get_io_size(list->c))) {
1645                                                 pr_debug("Flagging opcode %u as direct I/O access\n",
1646                                                          i);
1647                                                 list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT_HW);
1648                                                 break;
1649                                         }
1650                                         fallthrough;
1651                                 default:
1652                                         pr_debug("Flagging opcode %u as I/O access\n",
1653                                                  i);
1654                                         list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW);
1655                                         break;
1656                                 }
1657                         }
1658                         fallthrough;
1659                 default:
1660                         break;
1661                 }
1662         }
1663
1664         return 0;
1665 }
1666
1667 static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset,
1668                             const struct opcode *last,
1669                             u32 mask, bool sync, bool mflo, bool another)
1670 {
1671         const struct opcode *op, *next = &block->opcode_list[offset];
1672         u32 old_mask;
1673         u8 reg2, reg = mflo ? REG_LO : REG_HI;
1674         u16 branch_offset;
1675         unsigned int i;
1676
1677         for (i = offset; i < block->nb_ops; i++) {
1678                 op = next;
1679                 next = &block->opcode_list[i + 1];
1680                 old_mask = mask;
1681
1682                 /* If any other opcode writes or reads to the register
1683                  * we'd use, then we cannot use it anymore. */
1684                 mask |= opcode_read_mask(op->c);
1685                 mask |= opcode_write_mask(op->c);
1686
1687                 if (op_flag_sync(op->flags))
1688                         sync = true;
1689
1690                 switch (op->i.op) {
1691                 case OP_BEQ:
1692                 case OP_BNE:
1693                 case OP_BLEZ:
1694                 case OP_BGTZ:
1695                 case OP_REGIMM:
1696                         /* TODO: handle backwards branches too */
1697                         if (!last && op_flag_local_branch(op->flags) &&
1698                             (s16)op->c.i.imm >= 0) {
1699                                 branch_offset = i + 1 + (s16)op->c.i.imm
1700                                         - !!op_flag_no_ds(op->flags);
1701
1702                                 reg = get_mfhi_mflo_reg(block, branch_offset, NULL,
1703                                                         mask, sync, mflo, false);
1704                                 reg2 = get_mfhi_mflo_reg(block, offset + 1, next,
1705                                                          mask, sync, mflo, false);
1706                                 if (reg > 0 && reg == reg2)
1707                                         return reg;
1708                                 if (!reg && !reg2)
1709                                         return 0;
1710                         }
1711
1712                         return mflo ? REG_LO : REG_HI;
1713                 case OP_META_MULT2:
1714                 case OP_META_MULTU2:
1715                         return 0;
1716                 case OP_SPECIAL:
1717                         switch (op->r.op) {
1718                         case OP_SPECIAL_MULT:
1719                         case OP_SPECIAL_MULTU:
1720                         case OP_SPECIAL_DIV:
1721                         case OP_SPECIAL_DIVU:
1722                                 return 0;
1723                         case OP_SPECIAL_MTHI:
1724                                 if (!mflo)
1725                                         return 0;
1726                                 continue;
1727                         case OP_SPECIAL_MTLO:
1728                                 if (mflo)
1729                                         return 0;
1730                                 continue;
1731                         case OP_SPECIAL_JR:
1732                                 if (op->r.rs != 31)
1733                                         return reg;
1734
1735                                 if (!sync && !op_flag_no_ds(op->flags) &&
1736                                     (next->i.op == OP_SPECIAL) &&
1737                                     ((!mflo && next->r.op == OP_SPECIAL_MFHI) ||
1738                                     (mflo && next->r.op == OP_SPECIAL_MFLO)))
1739                                         return next->r.rd;
1740
1741                                 return 0;
1742                         case OP_SPECIAL_JALR:
1743                                 return reg;
1744                         case OP_SPECIAL_MFHI:
1745                                 if (!mflo) {
1746                                         if (another)
1747                                                 return op->r.rd;
1748                                         /* Must use REG_HI if there is another MFHI target*/
1749                                         reg2 = get_mfhi_mflo_reg(block, i + 1, next,
1750                                                          0, sync, mflo, true);
1751                                         if (reg2 > 0 && reg2 != REG_HI)
1752                                                 return REG_HI;
1753
1754                                         if (!sync && !(old_mask & BIT(op->r.rd)))
1755                                                 return op->r.rd;
1756                                         else
1757                                                 return REG_HI;
1758                                 }
1759                                 continue;
1760                         case OP_SPECIAL_MFLO:
1761                                 if (mflo) {
1762                                         if (another)
1763                                                 return op->r.rd;
1764                                         /* Must use REG_LO if there is another MFLO target*/
1765                                         reg2 = get_mfhi_mflo_reg(block, i + 1, next,
1766                                                          0, sync, mflo, true);
1767                                         if (reg2 > 0 && reg2 != REG_LO)
1768                                                 return REG_LO;
1769
1770                                         if (!sync && !(old_mask & BIT(op->r.rd)))
1771                                                 return op->r.rd;
1772                                         else
1773                                                 return REG_LO;
1774                                 }
1775                                 continue;
1776                         default:
1777                                 break;
1778                         }
1779
1780                         fallthrough;
1781                 default:
1782                         continue;
1783                 }
1784         }
1785
1786         return reg;
1787 }
1788
1789 static void lightrec_replace_lo_hi(struct block *block, u16 offset,
1790                                    u16 last, bool lo)
1791 {
1792         unsigned int i;
1793         u32 branch_offset;
1794
1795         /* This function will remove the following MFLO/MFHI. It must be called
1796          * only if get_mfhi_mflo_reg() returned a non-zero value. */
1797
1798         for (i = offset; i < last; i++) {
1799                 struct opcode *op = &block->opcode_list[i];
1800
1801                 switch (op->i.op) {
1802                 case OP_BEQ:
1803                 case OP_BNE:
1804                 case OP_BLEZ:
1805                 case OP_BGTZ:
1806                 case OP_REGIMM:
1807                         /* TODO: handle backwards branches too */
1808                         if (op_flag_local_branch(op->flags) && (s16)op->c.i.imm >= 0) {
1809                                 branch_offset = i + 1 + (s16)op->c.i.imm
1810                                         - !!op_flag_no_ds(op->flags);
1811
1812                                 lightrec_replace_lo_hi(block, branch_offset, last, lo);
1813                                 lightrec_replace_lo_hi(block, i + 1, branch_offset, lo);
1814                         }
1815                         break;
1816
1817                 case OP_SPECIAL:
1818                         if (lo && op->r.op == OP_SPECIAL_MFLO) {
1819                                 pr_debug("Removing MFLO opcode at offset 0x%x\n",
1820                                          i << 2);
1821                                 op->opcode = 0;
1822                                 return;
1823                         } else if (!lo && op->r.op == OP_SPECIAL_MFHI) {
1824                                 pr_debug("Removing MFHI opcode at offset 0x%x\n",
1825                                          i << 2);
1826                                 op->opcode = 0;
1827                                 return;
1828                         }
1829
1830                         fallthrough;
1831                 default:
1832                         break;
1833                 }
1834         }
1835 }
1836
1837 static bool lightrec_always_skip_div_check(void)
1838 {
1839 #ifdef __mips__
1840         return true;
1841 #else
1842         return false;
1843 #endif
1844 }
1845
1846 static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *block)
1847 {
1848         struct opcode *prev, *list = NULL;
1849         u8 reg_hi, reg_lo;
1850         unsigned int i;
1851         u32 known = BIT(0);
1852         u32 values[32] = { 0 };
1853
1854         for (i = 0; i < block->nb_ops - 1; i++) {
1855                 prev = list;
1856                 list = &block->opcode_list[i];
1857
1858                 if (prev)
1859                         known = lightrec_propagate_consts(list, prev, known, values);
1860
1861                 switch (list->i.op) {
1862                 case OP_SPECIAL:
1863                         switch (list->r.op) {
1864                         case OP_SPECIAL_DIV:
1865                         case OP_SPECIAL_DIVU:
1866                                 /* If we are dividing by a non-zero constant, don't
1867                                  * emit the div-by-zero check. */
1868                                 if (lightrec_always_skip_div_check() ||
1869                                     ((known & BIT(list->c.r.rt)) && values[list->c.r.rt]))
1870                                         list->flags |= LIGHTREC_NO_DIV_CHECK;
1871                                 fallthrough;
1872                         case OP_SPECIAL_MULT:
1873                         case OP_SPECIAL_MULTU:
1874                                 break;
1875                         default:
1876                                 continue;
1877                         }
1878                         fallthrough;
1879                 case OP_META_MULT2:
1880                 case OP_META_MULTU2:
1881                         break;
1882                 default:
1883                         continue;
1884                 }
1885
1886                 /* Don't support opcodes in delay slots */
1887                 if ((i && has_delay_slot(block->opcode_list[i - 1].c)) ||
1888                     op_flag_no_ds(list->flags)) {
1889                         continue;
1890                 }
1891
1892                 reg_lo = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, true, false);
1893                 if (reg_lo == 0) {
1894                         pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as"
1895                                  " not writing LO\n", i << 2);
1896                         list->flags |= LIGHTREC_NO_LO;
1897                 }
1898
1899                 reg_hi = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, false, false);
1900                 if (reg_hi == 0) {
1901                         pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as"
1902                                  " not writing HI\n", i << 2);
1903                         list->flags |= LIGHTREC_NO_HI;
1904                 }
1905
1906                 if (!reg_lo && !reg_hi) {
1907                         pr_debug("Both LO/HI unused in this block, they will "
1908                                  "probably be used in parent block - removing "
1909                                  "flags.\n");
1910                         list->flags &= ~(LIGHTREC_NO_LO | LIGHTREC_NO_HI);
1911                 }
1912
1913                 if (reg_lo > 0 && reg_lo != REG_LO) {
1914                         pr_debug("Found register %s to hold LO (rs = %u, rt = %u)\n",
1915                                  lightrec_reg_name(reg_lo), list->r.rs, list->r.rt);
1916
1917                         lightrec_replace_lo_hi(block, i + 1, block->nb_ops, true);
1918                         list->r.rd = reg_lo;
1919                 } else {
1920                         list->r.rd = 0;
1921                 }
1922
1923                 if (reg_hi > 0 && reg_hi != REG_HI) {
1924                         pr_debug("Found register %s to hold HI (rs = %u, rt = %u)\n",
1925                                  lightrec_reg_name(reg_hi), list->r.rs, list->r.rt);
1926
1927                         lightrec_replace_lo_hi(block, i + 1, block->nb_ops, false);
1928                         list->r.imm = reg_hi;
1929                 } else {
1930                         list->r.imm = 0;
1931                 }
1932         }
1933
1934         return 0;
1935 }
1936
1937 static bool remove_div_sequence(struct block *block, unsigned int offset)
1938 {
1939         struct opcode *op;
1940         unsigned int i, found = 0;
1941
1942         /*
1943          * Scan for the zero-checking sequence that GCC automatically introduced
1944          * after most DIV/DIVU opcodes. This sequence checks the value of the
1945          * divisor, and if zero, executes a BREAK opcode, causing the BIOS
1946          * handler to crash the PS1.
1947          *
1948          * For DIV opcodes, this sequence additionally checks that the signed
1949          * operation does not overflow.
1950          *
1951          * With the assumption that the games never crashed the PS1, we can
1952          * therefore assume that the games never divided by zero or overflowed,
1953          * and these sequences can be removed.
1954          */
1955
1956         for (i = offset; i < block->nb_ops; i++) {
1957                 op = &block->opcode_list[i];
1958
1959                 if (!found) {
1960                         if (op->i.op == OP_SPECIAL &&
1961                             (op->r.op == OP_SPECIAL_DIV || op->r.op == OP_SPECIAL_DIVU))
1962                                 break;
1963
1964                         if ((op->opcode & 0xfc1fffff) == 0x14000002) {
1965                                 /* BNE ???, zero, +8 */
1966                                 found++;
1967                         } else {
1968                                 offset++;
1969                         }
1970                 } else if (found == 1 && !op->opcode) {
1971                         /* NOP */
1972                         found++;
1973                 } else if (found == 2 && op->opcode == 0x0007000d) {
1974                         /* BREAK 0x1c00 */
1975                         found++;
1976                 } else if (found == 3 && op->opcode == 0x2401ffff) {
1977                         /* LI at, -1 */
1978                         found++;
1979                 } else if (found == 4 && (op->opcode & 0xfc1fffff) == 0x14010004) {
1980                         /* BNE ???, at, +16 */
1981                         found++;
1982                 } else if (found == 5 && op->opcode == 0x3c018000) {
1983                         /* LUI at, 0x8000 */
1984                         found++;
1985                 } else if (found == 6 && (op->opcode & 0x141fffff) == 0x14010002) {
1986                         /* BNE ???, at, +16 */
1987                         found++;
1988                 } else if (found == 7 && !op->opcode) {
1989                         /* NOP */
1990                         found++;
1991                 } else if (found == 8 && op->opcode == 0x0006000d) {
1992                         /* BREAK 0x1800 */
1993                         found++;
1994                         break;
1995                 } else {
1996                         break;
1997                 }
1998         }
1999
2000         if (found >= 3) {
2001                 if (found != 9)
2002                         found = 3;
2003
2004                 pr_debug("Removing DIV%s sequence at offset 0x%x\n",
2005                          found == 9 ? "" : "U", offset << 2);
2006
2007                 for (i = 0; i < found; i++)
2008                         block->opcode_list[offset + i].opcode = 0;
2009
2010                 return true;
2011         }
2012
2013         return false;
2014 }
2015
2016 static int lightrec_remove_div_by_zero_check_sequence(struct lightrec_state *state,
2017                                                       struct block *block)
2018 {
2019         struct opcode *op;
2020         unsigned int i;
2021
2022         for (i = 0; i < block->nb_ops; i++) {
2023                 op = &block->opcode_list[i];
2024
2025                 if (op->i.op == OP_SPECIAL &&
2026                     (op->r.op == OP_SPECIAL_DIVU || op->r.op == OP_SPECIAL_DIV) &&
2027                     remove_div_sequence(block, i + 1))
2028                         op->flags |= LIGHTREC_NO_DIV_CHECK;
2029         }
2030
2031         return 0;
2032 }
2033
2034 static const u32 memset_code[] = {
2035         0x10a00006,     // beqz         a1, 2f
2036         0x24a2ffff,     // addiu        v0,a1,-1
2037         0x2403ffff,     // li           v1,-1
2038         0xac800000,     // 1: sw        zero,0(a0)
2039         0x2442ffff,     // addiu        v0,v0,-1
2040         0x1443fffd,     // bne          v0,v1, 1b
2041         0x24840004,     // addiu        a0,a0,4
2042         0x03e00008,     // 2: jr        ra
2043         0x00000000,     // nop
2044 };
2045
2046 static int lightrec_replace_memset(struct lightrec_state *state, struct block *block)
2047 {
2048         unsigned int i;
2049         union code c;
2050
2051         for (i = 0; i < block->nb_ops; i++) {
2052                 c = block->opcode_list[i].c;
2053
2054                 if (c.opcode != memset_code[i])
2055                         return 0;
2056
2057                 if (i == ARRAY_SIZE(memset_code) - 1) {
2058                         /* success! */
2059                         pr_debug("Block at PC 0x%x is a memset\n", block->pc);
2060                         block_set_flags(block,
2061                                         BLOCK_IS_MEMSET | BLOCK_NEVER_COMPILE);
2062
2063                         /* Return non-zero to skip other optimizers. */
2064                         return 1;
2065                 }
2066         }
2067
2068         return 0;
2069 }
2070
2071 static int (*lightrec_optimizers[])(struct lightrec_state *state, struct block *) = {
2072         IF_OPT(OPT_REMOVE_DIV_BY_ZERO_SEQ, &lightrec_remove_div_by_zero_check_sequence),
2073         IF_OPT(OPT_REPLACE_MEMSET, &lightrec_replace_memset),
2074         IF_OPT(OPT_DETECT_IMPOSSIBLE_BRANCHES, &lightrec_detect_impossible_branches),
2075         IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_branches),
2076         IF_OPT(OPT_LOCAL_BRANCHES, &lightrec_local_branches),
2077         IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_ops),
2078         IF_OPT(OPT_SWITCH_DELAY_SLOTS, &lightrec_switch_delay_slots),
2079         IF_OPT(OPT_FLAG_IO || OPT_FLAG_STORES, &lightrec_flag_io),
2080         IF_OPT(OPT_FLAG_MULT_DIV, &lightrec_flag_mults_divs),
2081         IF_OPT(OPT_EARLY_UNLOAD, &lightrec_early_unload),
2082 };
2083
2084 int lightrec_optimize(struct lightrec_state *state, struct block *block)
2085 {
2086         unsigned int i;
2087         int ret;
2088
2089         for (i = 0; i < ARRAY_SIZE(lightrec_optimizers); i++) {
2090                 if (lightrec_optimizers[i]) {
2091                         ret = (*lightrec_optimizers[i])(state, block);
2092                         if (ret)
2093                                 return ret;
2094                 }
2095         }
2096
2097         return 0;
2098 }