cdrom: delay the missed irq more
[pcsx_rearmed.git] / deps / lightrec / optimizer.c
1 // SPDX-License-Identifier: LGPL-2.1-or-later
2 /*
3  * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
4  */
5
6 #include "lightrec-config.h"
7 #include "disassembler.h"
8 #include "lightrec.h"
9 #include "memmanager.h"
10 #include "optimizer.h"
11 #include "regcache.h"
12
13 #include <errno.h>
14 #include <stdbool.h>
15 #include <stdlib.h>
16 #include <string.h>
17
18 #define IF_OPT(opt, ptr) ((opt) ? (ptr) : NULL)
19
20 struct optimizer_list {
21         void (**optimizers)(struct opcode *);
22         unsigned int nb_optimizers;
23 };
24
25 static bool is_nop(union code op);
26
27 bool is_unconditional_jump(union code c)
28 {
29         switch (c.i.op) {
30         case OP_SPECIAL:
31                 return c.r.op == OP_SPECIAL_JR || c.r.op == OP_SPECIAL_JALR;
32         case OP_J:
33         case OP_JAL:
34                 return true;
35         case OP_BEQ:
36         case OP_BLEZ:
37                 return c.i.rs == c.i.rt;
38         case OP_REGIMM:
39                 return (c.r.rt == OP_REGIMM_BGEZ ||
40                         c.r.rt == OP_REGIMM_BGEZAL) && c.i.rs == 0;
41         default:
42                 return false;
43         }
44 }
45
46 bool is_syscall(union code c)
47 {
48         return (c.i.op == OP_SPECIAL && c.r.op == OP_SPECIAL_SYSCALL) ||
49                 (c.i.op == OP_CP0 && (c.r.rs == OP_CP0_MTC0 ||
50                                         c.r.rs == OP_CP0_CTC0) &&
51                  (c.r.rd == 12 || c.r.rd == 13));
52 }
53
54 static u64 opcode_read_mask(union code op)
55 {
56         switch (op.i.op) {
57         case OP_SPECIAL:
58                 switch (op.r.op) {
59                 case OP_SPECIAL_SYSCALL:
60                 case OP_SPECIAL_BREAK:
61                         return 0;
62                 case OP_SPECIAL_JR:
63                 case OP_SPECIAL_JALR:
64                 case OP_SPECIAL_MTHI:
65                 case OP_SPECIAL_MTLO:
66                         return BIT(op.r.rs);
67                 case OP_SPECIAL_MFHI:
68                         return BIT(REG_HI);
69                 case OP_SPECIAL_MFLO:
70                         return BIT(REG_LO);
71                 case OP_SPECIAL_SLL:
72                         if (!op.r.imm)
73                                 return 0;
74                         fallthrough;
75                 case OP_SPECIAL_SRL:
76                 case OP_SPECIAL_SRA:
77                         return BIT(op.r.rt);
78                 default:
79                         return BIT(op.r.rs) | BIT(op.r.rt);
80                 }
81         case OP_CP0:
82                 switch (op.r.rs) {
83                 case OP_CP0_MTC0:
84                 case OP_CP0_CTC0:
85                         return BIT(op.r.rt);
86                 default:
87                         return 0;
88                 }
89         case OP_CP2:
90                 if (op.r.op == OP_CP2_BASIC) {
91                         switch (op.r.rs) {
92                         case OP_CP2_BASIC_MTC2:
93                         case OP_CP2_BASIC_CTC2:
94                                 return BIT(op.r.rt);
95                         default:
96                                 break;
97                         }
98                 }
99                 return 0;
100         case OP_J:
101         case OP_JAL:
102         case OP_LUI:
103                 return 0;
104         case OP_BEQ:
105                 if (op.i.rs == op.i.rt)
106                         return 0;
107                 fallthrough;
108         case OP_BNE:
109         case OP_LWL:
110         case OP_LWR:
111         case OP_SB:
112         case OP_SH:
113         case OP_SWL:
114         case OP_SW:
115         case OP_SWR:
116                 return BIT(op.i.rs) | BIT(op.i.rt);
117         default:
118                 return BIT(op.i.rs);
119         }
120 }
121
122 static u64 mult_div_write_mask(union code op)
123 {
124         u64 flags;
125
126         if (!OPT_FLAG_MULT_DIV)
127                 return BIT(REG_LO) | BIT(REG_HI);
128
129         if (op.r.rd)
130                 flags = BIT(op.r.rd);
131         else
132                 flags = BIT(REG_LO);
133         if (op.r.imm)
134                 flags |= BIT(op.r.imm);
135         else
136                 flags |= BIT(REG_HI);
137
138         return flags;
139 }
140
141 static u64 opcode_write_mask(union code op)
142 {
143         switch (op.i.op) {
144         case OP_META_MULT2:
145         case OP_META_MULTU2:
146                 return mult_div_write_mask(op);
147         case OP_SPECIAL:
148                 switch (op.r.op) {
149                 case OP_SPECIAL_JR:
150                 case OP_SPECIAL_SYSCALL:
151                 case OP_SPECIAL_BREAK:
152                         return 0;
153                 case OP_SPECIAL_MULT:
154                 case OP_SPECIAL_MULTU:
155                 case OP_SPECIAL_DIV:
156                 case OP_SPECIAL_DIVU:
157                         return mult_div_write_mask(op);
158                 case OP_SPECIAL_MTHI:
159                         return BIT(REG_HI);
160                 case OP_SPECIAL_MTLO:
161                         return BIT(REG_LO);
162                 case OP_SPECIAL_SLL:
163                         if (!op.r.imm)
164                                 return 0;
165                         fallthrough;
166                 default:
167                         return BIT(op.r.rd);
168                 }
169         case OP_ADDI:
170         case OP_ADDIU:
171         case OP_SLTI:
172         case OP_SLTIU:
173         case OP_ANDI:
174         case OP_ORI:
175         case OP_XORI:
176         case OP_LUI:
177         case OP_LB:
178         case OP_LH:
179         case OP_LWL:
180         case OP_LW:
181         case OP_LBU:
182         case OP_LHU:
183         case OP_LWR:
184                 return BIT(op.i.rt);
185         case OP_JAL:
186                 return BIT(31);
187         case OP_CP0:
188                 switch (op.r.rs) {
189                 case OP_CP0_MFC0:
190                 case OP_CP0_CFC0:
191                         return BIT(op.i.rt);
192                 default:
193                         return 0;
194                 }
195         case OP_CP2:
196                 if (op.r.op == OP_CP2_BASIC) {
197                         switch (op.r.rs) {
198                         case OP_CP2_BASIC_MFC2:
199                         case OP_CP2_BASIC_CFC2:
200                                 return BIT(op.i.rt);
201                         default:
202                                 break;
203                         }
204                 }
205                 return 0;
206         case OP_REGIMM:
207                 switch (op.r.rt) {
208                 case OP_REGIMM_BLTZAL:
209                 case OP_REGIMM_BGEZAL:
210                         return BIT(31);
211                 default:
212                         return 0;
213                 }
214         case OP_META_MOV:
215                 return BIT(op.r.rd);
216         default:
217                 return 0;
218         }
219 }
220
221 bool opcode_reads_register(union code op, u8 reg)
222 {
223         return opcode_read_mask(op) & BIT(reg);
224 }
225
226 bool opcode_writes_register(union code op, u8 reg)
227 {
228         return opcode_write_mask(op) & BIT(reg);
229 }
230
231 static int find_prev_writer(const struct opcode *list, unsigned int offset, u8 reg)
232 {
233         union code c;
234         unsigned int i;
235
236         if (op_flag_sync(list[offset].flags))
237                 return -1;
238
239         for (i = offset; i > 0; i--) {
240                 c = list[i - 1].c;
241
242                 if (opcode_writes_register(c, reg)) {
243                         if (i > 1 && has_delay_slot(list[i - 2].c))
244                                 break;
245
246                         return i - 1;
247                 }
248
249                 if (op_flag_sync(list[i - 1].flags) ||
250                     has_delay_slot(c) ||
251                     opcode_reads_register(c, reg))
252                         break;
253         }
254
255         return -1;
256 }
257
258 static int find_next_reader(const struct opcode *list, unsigned int offset, u8 reg)
259 {
260         unsigned int i;
261         union code c;
262
263         if (op_flag_sync(list[offset].flags))
264                 return -1;
265
266         for (i = offset; ; i++) {
267                 c = list[i].c;
268
269                 if (opcode_reads_register(c, reg)) {
270                         if (i > 0 && has_delay_slot(list[i - 1].c))
271                                 break;
272
273                         return i;
274                 }
275
276                 if (op_flag_sync(list[i].flags) ||
277                     has_delay_slot(c) || opcode_writes_register(c, reg))
278                         break;
279         }
280
281         return -1;
282 }
283
284 static bool reg_is_dead(const struct opcode *list, unsigned int offset, u8 reg)
285 {
286         unsigned int i;
287
288         if (op_flag_sync(list[offset].flags))
289                 return false;
290
291         for (i = offset + 1; ; i++) {
292                 if (opcode_reads_register(list[i].c, reg))
293                         return false;
294
295                 if (opcode_writes_register(list[i].c, reg))
296                         return true;
297
298                 if (has_delay_slot(list[i].c)) {
299                         if (op_flag_no_ds(list[i].flags) ||
300                             opcode_reads_register(list[i + 1].c, reg))
301                                 return false;
302
303                         return opcode_writes_register(list[i + 1].c, reg);
304                 }
305         }
306 }
307
308 static bool reg_is_read(const struct opcode *list,
309                         unsigned int a, unsigned int b, u8 reg)
310 {
311         /* Return true if reg is read in one of the opcodes of the interval
312          * [a, b[ */
313         for (; a < b; a++) {
314                 if (!is_nop(list[a].c) && opcode_reads_register(list[a].c, reg))
315                         return true;
316         }
317
318         return false;
319 }
320
321 static bool reg_is_written(const struct opcode *list,
322                            unsigned int a, unsigned int b, u8 reg)
323 {
324         /* Return true if reg is written in one of the opcodes of the interval
325          * [a, b[ */
326
327         for (; a < b; a++) {
328                 if (!is_nop(list[a].c) && opcode_writes_register(list[a].c, reg))
329                         return true;
330         }
331
332         return false;
333 }
334
335 static bool reg_is_read_or_written(const struct opcode *list,
336                                    unsigned int a, unsigned int b, u8 reg)
337 {
338         return reg_is_read(list, a, b, reg) || reg_is_written(list, a, b, reg);
339 }
340
341 static bool opcode_is_load(union code op)
342 {
343         switch (op.i.op) {
344         case OP_LB:
345         case OP_LH:
346         case OP_LWL:
347         case OP_LW:
348         case OP_LBU:
349         case OP_LHU:
350         case OP_LWR:
351         case OP_LWC2:
352                 return true;
353         default:
354                 return false;
355         }
356 }
357
358 static bool opcode_is_store(union code op)
359 {
360         switch (op.i.op) {
361         case OP_SB:
362         case OP_SH:
363         case OP_SW:
364         case OP_SWL:
365         case OP_SWR:
366         case OP_SWC2:
367                 return true;
368         default:
369                 return false;
370         }
371 }
372
373 static u8 opcode_get_io_size(union code op)
374 {
375         switch (op.i.op) {
376         case OP_LB:
377         case OP_LBU:
378         case OP_SB:
379                 return 8;
380         case OP_LH:
381         case OP_LHU:
382         case OP_SH:
383                 return 16;
384         default:
385                 return 32;
386         }
387 }
388
389 bool opcode_is_io(union code op)
390 {
391         return opcode_is_load(op) || opcode_is_store(op);
392 }
393
394 /* TODO: Complete */
395 static bool is_nop(union code op)
396 {
397         if (opcode_writes_register(op, 0)) {
398                 switch (op.i.op) {
399                 case OP_CP0:
400                         return op.r.rs != OP_CP0_MFC0;
401                 case OP_LB:
402                 case OP_LH:
403                 case OP_LWL:
404                 case OP_LW:
405                 case OP_LBU:
406                 case OP_LHU:
407                 case OP_LWR:
408                         return false;
409                 default:
410                         return true;
411                 }
412         }
413
414         switch (op.i.op) {
415         case OP_SPECIAL:
416                 switch (op.r.op) {
417                 case OP_SPECIAL_AND:
418                         return op.r.rd == op.r.rt && op.r.rd == op.r.rs;
419                 case OP_SPECIAL_ADD:
420                 case OP_SPECIAL_ADDU:
421                         return (op.r.rd == op.r.rt && op.r.rs == 0) ||
422                                 (op.r.rd == op.r.rs && op.r.rt == 0);
423                 case OP_SPECIAL_SUB:
424                 case OP_SPECIAL_SUBU:
425                         return op.r.rd == op.r.rs && op.r.rt == 0;
426                 case OP_SPECIAL_OR:
427                         if (op.r.rd == op.r.rt)
428                                 return op.r.rd == op.r.rs || op.r.rs == 0;
429                         else
430                                 return (op.r.rd == op.r.rs) && op.r.rt == 0;
431                 case OP_SPECIAL_SLL:
432                 case OP_SPECIAL_SRA:
433                 case OP_SPECIAL_SRL:
434                         return op.r.rd == op.r.rt && op.r.imm == 0;
435                 case OP_SPECIAL_MFHI:
436                 case OP_SPECIAL_MFLO:
437                         return op.r.rd == 0;
438                 default:
439                         return false;
440                 }
441         case OP_ORI:
442         case OP_ADDI:
443         case OP_ADDIU:
444                 return op.i.rt == op.i.rs && op.i.imm == 0;
445         case OP_BGTZ:
446                 return (op.i.rs == 0 || op.i.imm == 1);
447         case OP_REGIMM:
448                 return (op.i.op == OP_REGIMM_BLTZ ||
449                                 op.i.op == OP_REGIMM_BLTZAL) &&
450                         (op.i.rs == 0 || op.i.imm == 1);
451         case OP_BNE:
452                 return (op.i.rs == op.i.rt || op.i.imm == 1);
453         default:
454                 return false;
455         }
456 }
457
458 bool load_in_delay_slot(union code op)
459 {
460         switch (op.i.op) {
461         case OP_CP0:
462                 switch (op.r.rs) {
463                 case OP_CP0_MFC0:
464                 case OP_CP0_CFC0:
465                         return true;
466                 default:
467                         break;
468                 }
469
470                 break;
471         case OP_CP2:
472                 if (op.r.op == OP_CP2_BASIC) {
473                         switch (op.r.rs) {
474                         case OP_CP2_BASIC_MFC2:
475                         case OP_CP2_BASIC_CFC2:
476                                 return true;
477                         default:
478                                 break;
479                         }
480                 }
481
482                 break;
483         case OP_LB:
484         case OP_LH:
485         case OP_LW:
486         case OP_LWL:
487         case OP_LWR:
488         case OP_LBU:
489         case OP_LHU:
490                 return true;
491         default:
492                 break;
493         }
494
495         return false;
496 }
497
498 static u32 lightrec_propagate_consts(const struct opcode *op,
499                                      const struct opcode *prev,
500                                      u32 known, u32 *v)
501 {
502         union code c = prev->c;
503
504         /* Register $zero is always, well, zero */
505         known |= BIT(0);
506         v[0] = 0;
507
508         if (op_flag_sync(op->flags))
509                 return BIT(0);
510
511         switch (c.i.op) {
512         case OP_SPECIAL:
513                 switch (c.r.op) {
514                 case OP_SPECIAL_SLL:
515                         if (known & BIT(c.r.rt)) {
516                                 known |= BIT(c.r.rd);
517                                 v[c.r.rd] = v[c.r.rt] << c.r.imm;
518                         } else {
519                                 known &= ~BIT(c.r.rd);
520                         }
521                         break;
522                 case OP_SPECIAL_SRL:
523                         if (known & BIT(c.r.rt)) {
524                                 known |= BIT(c.r.rd);
525                                 v[c.r.rd] = v[c.r.rt] >> c.r.imm;
526                         } else {
527                                 known &= ~BIT(c.r.rd);
528                         }
529                         break;
530                 case OP_SPECIAL_SRA:
531                         if (known & BIT(c.r.rt)) {
532                                 known |= BIT(c.r.rd);
533                                 v[c.r.rd] = (s32)v[c.r.rt] >> c.r.imm;
534                         } else {
535                                 known &= ~BIT(c.r.rd);
536                         }
537                         break;
538                 case OP_SPECIAL_SLLV:
539                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
540                                 known |= BIT(c.r.rd);
541                                 v[c.r.rd] = v[c.r.rt] << (v[c.r.rs] & 0x1f);
542                         } else {
543                                 known &= ~BIT(c.r.rd);
544                         }
545                         break;
546                 case OP_SPECIAL_SRLV:
547                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
548                                 known |= BIT(c.r.rd);
549                                 v[c.r.rd] = v[c.r.rt] >> (v[c.r.rs] & 0x1f);
550                         } else {
551                                 known &= ~BIT(c.r.rd);
552                         }
553                         break;
554                 case OP_SPECIAL_SRAV:
555                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
556                                 known |= BIT(c.r.rd);
557                                 v[c.r.rd] = (s32)v[c.r.rt]
558                                           >> (v[c.r.rs] & 0x1f);
559                         } else {
560                                 known &= ~BIT(c.r.rd);
561                         }
562                         break;
563                 case OP_SPECIAL_ADD:
564                 case OP_SPECIAL_ADDU:
565                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
566                                 known |= BIT(c.r.rd);
567                                 v[c.r.rd] = (s32)v[c.r.rt] + (s32)v[c.r.rs];
568                         } else {
569                                 known &= ~BIT(c.r.rd);
570                         }
571                         break;
572                 case OP_SPECIAL_SUB:
573                 case OP_SPECIAL_SUBU:
574                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
575                                 known |= BIT(c.r.rd);
576                                 v[c.r.rd] = v[c.r.rt] - v[c.r.rs];
577                         } else {
578                                 known &= ~BIT(c.r.rd);
579                         }
580                         break;
581                 case OP_SPECIAL_AND:
582                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
583                                 known |= BIT(c.r.rd);
584                                 v[c.r.rd] = v[c.r.rt] & v[c.r.rs];
585                         } else {
586                                 known &= ~BIT(c.r.rd);
587                         }
588                         break;
589                 case OP_SPECIAL_OR:
590                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
591                                 known |= BIT(c.r.rd);
592                                 v[c.r.rd] = v[c.r.rt] | v[c.r.rs];
593                         } else {
594                                 known &= ~BIT(c.r.rd);
595                         }
596                         break;
597                 case OP_SPECIAL_XOR:
598                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
599                                 known |= BIT(c.r.rd);
600                                 v[c.r.rd] = v[c.r.rt] ^ v[c.r.rs];
601                         } else {
602                                 known &= ~BIT(c.r.rd);
603                         }
604                         break;
605                 case OP_SPECIAL_NOR:
606                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
607                                 known |= BIT(c.r.rd);
608                                 v[c.r.rd] = ~(v[c.r.rt] | v[c.r.rs]);
609                         } else {
610                                 known &= ~BIT(c.r.rd);
611                         }
612                         break;
613                 case OP_SPECIAL_SLT:
614                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
615                                 known |= BIT(c.r.rd);
616                                 v[c.r.rd] = (s32)v[c.r.rs] < (s32)v[c.r.rt];
617                         } else {
618                                 known &= ~BIT(c.r.rd);
619                         }
620                         break;
621                 case OP_SPECIAL_SLTU:
622                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
623                                 known |= BIT(c.r.rd);
624                                 v[c.r.rd] = v[c.r.rs] < v[c.r.rt];
625                         } else {
626                                 known &= ~BIT(c.r.rd);
627                         }
628                         break;
629                 case OP_SPECIAL_MULT:
630                 case OP_SPECIAL_MULTU:
631                 case OP_SPECIAL_DIV:
632                 case OP_SPECIAL_DIVU:
633                         if (OPT_FLAG_MULT_DIV && c.r.rd)
634                                 known &= ~BIT(c.r.rd);
635                         if (OPT_FLAG_MULT_DIV && c.r.imm)
636                                 known &= ~BIT(c.r.imm);
637                         break;
638                 case OP_SPECIAL_MFLO:
639                 case OP_SPECIAL_MFHI:
640                         known &= ~BIT(c.r.rd);
641                         break;
642                 default:
643                         break;
644                 }
645                 break;
646         case OP_META_MULT2:
647         case OP_META_MULTU2:
648                 if (OPT_FLAG_MULT_DIV && (known & BIT(c.r.rs))) {
649                         if (c.r.rd) {
650                                 known |= BIT(c.r.rd);
651
652                                 if (c.r.op < 32)
653                                         v[c.r.rd] = v[c.r.rs] << c.r.op;
654                                 else
655                                         v[c.r.rd] = 0;
656                         }
657
658                         if (c.r.imm) {
659                                 known |= BIT(c.r.imm);
660
661                                 if (c.r.op >= 32)
662                                         v[c.r.imm] = v[c.r.rs] << (c.r.op - 32);
663                                 else if (c.i.op == OP_META_MULT2)
664                                         v[c.r.imm] = (s32) v[c.r.rs] >> (32 - c.r.op);
665                                 else
666                                         v[c.r.imm] = v[c.r.rs] >> (32 - c.r.op);
667                         }
668                 } else {
669                         if (OPT_FLAG_MULT_DIV && c.r.rd)
670                                 known &= ~BIT(c.r.rd);
671                         if (OPT_FLAG_MULT_DIV && c.r.imm)
672                                 known &= ~BIT(c.r.imm);
673                 }
674                 break;
675         case OP_REGIMM:
676                 break;
677         case OP_ADDI:
678         case OP_ADDIU:
679                 if (known & BIT(c.i.rs)) {
680                         known |= BIT(c.i.rt);
681                         v[c.i.rt] = v[c.i.rs] + (s32)(s16)c.i.imm;
682                 } else {
683                         known &= ~BIT(c.i.rt);
684                 }
685                 break;
686         case OP_SLTI:
687                 if (known & BIT(c.i.rs)) {
688                         known |= BIT(c.i.rt);
689                         v[c.i.rt] = (s32)v[c.i.rs] < (s32)(s16)c.i.imm;
690                 } else {
691                         known &= ~BIT(c.i.rt);
692                 }
693                 break;
694         case OP_SLTIU:
695                 if (known & BIT(c.i.rs)) {
696                         known |= BIT(c.i.rt);
697                         v[c.i.rt] = v[c.i.rs] < (u32)(s32)(s16)c.i.imm;
698                 } else {
699                         known &= ~BIT(c.i.rt);
700                 }
701                 break;
702         case OP_ANDI:
703                 if (known & BIT(c.i.rs)) {
704                         known |= BIT(c.i.rt);
705                         v[c.i.rt] = v[c.i.rs] & c.i.imm;
706                 } else {
707                         known &= ~BIT(c.i.rt);
708                 }
709                 break;
710         case OP_ORI:
711                 if (known & BIT(c.i.rs)) {
712                         known |= BIT(c.i.rt);
713                         v[c.i.rt] = v[c.i.rs] | c.i.imm;
714                 } else {
715                         known &= ~BIT(c.i.rt);
716                 }
717                 break;
718         case OP_XORI:
719                 if (known & BIT(c.i.rs)) {
720                         known |= BIT(c.i.rt);
721                         v[c.i.rt] = v[c.i.rs] ^ c.i.imm;
722                 } else {
723                         known &= ~BIT(c.i.rt);
724                 }
725                 break;
726         case OP_LUI:
727                 known |= BIT(c.i.rt);
728                 v[c.i.rt] = c.i.imm << 16;
729                 break;
730         case OP_CP0:
731                 switch (c.r.rs) {
732                 case OP_CP0_MFC0:
733                 case OP_CP0_CFC0:
734                         known &= ~BIT(c.r.rt);
735                         break;
736                 }
737                 break;
738         case OP_CP2:
739                 if (c.r.op == OP_CP2_BASIC) {
740                         switch (c.r.rs) {
741                         case OP_CP2_BASIC_MFC2:
742                         case OP_CP2_BASIC_CFC2:
743                                 known &= ~BIT(c.r.rt);
744                                 break;
745                         }
746                 }
747                 break;
748         case OP_LB:
749         case OP_LH:
750         case OP_LWL:
751         case OP_LW:
752         case OP_LBU:
753         case OP_LHU:
754         case OP_LWR:
755         case OP_LWC2:
756                 known &= ~BIT(c.i.rt);
757                 break;
758         case OP_META_MOV:
759                 if (known & BIT(c.r.rs)) {
760                         known |= BIT(c.r.rd);
761                         v[c.r.rd] = v[c.r.rs];
762                 } else {
763                         known &= ~BIT(c.r.rd);
764                 }
765                 break;
766         default:
767                 break;
768         }
769
770         return known;
771 }
772
773 static void lightrec_optimize_sll_sra(struct opcode *list, unsigned int offset)
774 {
775         struct opcode *prev, *prev2 = NULL, *curr = &list[offset];
776         struct opcode *to_change, *to_nop;
777         int idx, idx2;
778
779         if (curr->r.imm != 24 && curr->r.imm != 16)
780                 return;
781
782         idx = find_prev_writer(list, offset, curr->r.rt);
783         if (idx < 0)
784                 return;
785
786         prev = &list[idx];
787
788         if (prev->i.op != OP_SPECIAL || prev->r.op != OP_SPECIAL_SLL ||
789             prev->r.imm != curr->r.imm || prev->r.rd != curr->r.rt)
790                 return;
791
792         if (prev->r.rd != prev->r.rt && curr->r.rd != curr->r.rt) {
793                 /* sll rY, rX, 16
794                  * ...
795                  * srl rZ, rY, 16 */
796
797                 if (!reg_is_dead(list, offset, curr->r.rt) ||
798                     reg_is_read_or_written(list, idx, offset, curr->r.rd))
799                         return;
800
801                 /* If rY is dead after the SRL, and rZ is not used after the SLL,
802                  * we can change rY to rZ */
803
804                 pr_debug("Detected SLL/SRA with middle temp register\n");
805                 prev->r.rd = curr->r.rd;
806                 curr->r.rt = prev->r.rd;
807         }
808
809         /* We got a SLL/SRA combo. If imm #16, that's a cast to u16.
810          * If imm #24 that's a cast to u8.
811          *
812          * First of all, make sure that the target register of the SLL is not
813          * read before the SRA. */
814
815         if (prev->r.rd == prev->r.rt) {
816                 /* sll rX, rX, 16
817                  * ...
818                  * srl rY, rX, 16 */
819                 to_change = curr;
820                 to_nop = prev;
821
822                 /* rX is used after the SRA - we cannot convert it. */
823                 if (prev->r.rd != curr->r.rd && !reg_is_dead(list, offset, prev->r.rd))
824                         return;
825         } else {
826                 /* sll rY, rX, 16
827                  * ...
828                  * srl rY, rY, 16 */
829                 to_change = prev;
830                 to_nop = curr;
831         }
832
833         idx2 = find_prev_writer(list, idx, prev->r.rt);
834         if (idx2 >= 0) {
835                 /* Note that PSX games sometimes do casts after
836                  * a LHU or LBU; in this case we can change the
837                  * load opcode to a LH or LB, and the cast can
838                  * be changed to a MOV or a simple NOP. */
839
840                 prev2 = &list[idx2];
841
842                 if (curr->r.rd != prev2->i.rt &&
843                     !reg_is_dead(list, offset, prev2->i.rt))
844                         prev2 = NULL;
845                 else if (curr->r.imm == 16 && prev2->i.op == OP_LHU)
846                         prev2->i.op = OP_LH;
847                 else if (curr->r.imm == 24 && prev2->i.op == OP_LBU)
848                         prev2->i.op = OP_LB;
849                 else
850                         prev2 = NULL;
851
852                 if (prev2) {
853                         if (curr->r.rd == prev2->i.rt) {
854                                 to_change->opcode = 0;
855                         } else if (reg_is_dead(list, offset, prev2->i.rt) &&
856                                    !reg_is_read_or_written(list, idx2 + 1, offset, curr->r.rd)) {
857                                 /* The target register of the SRA is dead after the
858                                  * LBU/LHU; we can change the target register of the
859                                  * LBU/LHU to the one of the SRA. */
860                                 prev2->i.rt = curr->r.rd;
861                                 to_change->opcode = 0;
862                         } else {
863                                 to_change->i.op = OP_META_MOV;
864                                 to_change->r.rd = curr->r.rd;
865                                 to_change->r.rs = prev2->i.rt;
866                         }
867
868                         if (to_nop->r.imm == 24)
869                                 pr_debug("Convert LBU+SLL+SRA to LB\n");
870                         else
871                                 pr_debug("Convert LHU+SLL+SRA to LH\n");
872                 }
873         }
874
875         if (!prev2) {
876                 pr_debug("Convert SLL/SRA #%u to EXT%c\n",
877                          prev->r.imm,
878                          prev->r.imm == 24 ? 'C' : 'S');
879
880                 if (to_change == prev) {
881                         to_change->i.rs = prev->r.rt;
882                         to_change->i.rt = curr->r.rd;
883                 } else {
884                         to_change->i.rt = curr->r.rd;
885                         to_change->i.rs = prev->r.rt;
886                 }
887
888                 if (to_nop->r.imm == 24)
889                         to_change->i.op = OP_META_EXTC;
890                 else
891                         to_change->i.op = OP_META_EXTS;
892         }
893
894         to_nop->opcode = 0;
895 }
896
897 static void lightrec_remove_useless_lui(struct block *block, unsigned int offset,
898                                         u32 known, u32 *values)
899 {
900         struct opcode *list = block->opcode_list,
901                       *op = &block->opcode_list[offset];
902         int reader;
903
904         if (!op_flag_sync(op->flags) && (known & BIT(op->i.rt)) &&
905             values[op->i.rt] == op->i.imm << 16) {
906                 pr_debug("Converting duplicated LUI to NOP\n");
907                 op->opcode = 0x0;
908                 return;
909         }
910
911         if (op->i.imm != 0 || op->i.rt == 0)
912                 return;
913
914         reader = find_next_reader(list, offset + 1, op->i.rt);
915         if (reader <= 0)
916                 return;
917
918         if (opcode_writes_register(list[reader].c, op->i.rt) ||
919             reg_is_dead(list, reader, op->i.rt)) {
920                 pr_debug("Removing useless LUI 0x0\n");
921
922                 if (list[reader].i.rs == op->i.rt)
923                         list[reader].i.rs = 0;
924                 if (list[reader].i.op == OP_SPECIAL &&
925                     list[reader].i.rt == op->i.rt)
926                         list[reader].i.rt = 0;
927                 op->opcode = 0x0;
928         }
929 }
930
931 static void lightrec_modify_lui(struct block *block, unsigned int offset)
932 {
933         union code c, *lui = &block->opcode_list[offset].c;
934         bool stop = false, stop_next = false;
935         unsigned int i;
936
937         for (i = offset + 1; !stop && i < block->nb_ops; i++) {
938                 c = block->opcode_list[i].c;
939                 stop = stop_next;
940
941                 if ((opcode_is_store(c) && c.i.rt == lui->i.rt)
942                     || (!opcode_is_load(c) && opcode_reads_register(c, lui->i.rt)))
943                         break;
944
945                 if (opcode_writes_register(c, lui->i.rt)) {
946                         pr_debug("Convert LUI at offset 0x%x to kuseg\n",
947                                  i - 1 << 2);
948                         lui->i.imm = kunseg(lui->i.imm << 16) >> 16;
949                         break;
950                 }
951
952                 if (has_delay_slot(c))
953                         stop_next = true;
954         }
955 }
956
957 static int lightrec_transform_branches(struct lightrec_state *state,
958                                        struct block *block)
959 {
960         struct opcode *op;
961         unsigned int i;
962         s32 offset;
963
964         for (i = 0; i < block->nb_ops; i++) {
965                 op = &block->opcode_list[i];
966
967                 switch (op->i.op) {
968                 case OP_J:
969                         /* Transform J opcode into BEQ $zero, $zero if possible. */
970                         offset = (s32)((block->pc & 0xf0000000) >> 2 | op->j.imm)
971                                 - (s32)(block->pc >> 2) - (s32)i - 1;
972
973                         if (offset == (s16)offset) {
974                                 pr_debug("Transform J into BEQ $zero, $zero\n");
975                                 op->i.op = OP_BEQ;
976                                 op->i.rs = 0;
977                                 op->i.rt = 0;
978                                 op->i.imm = offset;
979
980                         }
981                         fallthrough;
982                 default:
983                         break;
984                 }
985         }
986
987         return 0;
988 }
989
990 static inline bool is_power_of_two(u32 value)
991 {
992         return popcount32(value) == 1;
993 }
994
995 static int lightrec_transform_ops(struct lightrec_state *state, struct block *block)
996 {
997         struct opcode *list = block->opcode_list;
998         struct opcode *prev, *op = NULL;
999         u32 known = BIT(0);
1000         u32 values[32] = { 0 };
1001         unsigned int i;
1002         u8 tmp;
1003
1004         for (i = 0; i < block->nb_ops; i++) {
1005                 prev = op;
1006                 op = &list[i];
1007
1008                 if (prev)
1009                         known = lightrec_propagate_consts(op, prev, known, values);
1010
1011                 /* Transform all opcodes detected as useless to real NOPs
1012                  * (0x0: SLL r0, r0, #0) */
1013                 if (op->opcode != 0 && is_nop(op->c)) {
1014                         pr_debug("Converting useless opcode 0x%08x to NOP\n",
1015                                         op->opcode);
1016                         op->opcode = 0x0;
1017                 }
1018
1019                 if (!op->opcode)
1020                         continue;
1021
1022                 switch (op->i.op) {
1023                 case OP_BEQ:
1024                         if (op->i.rs == op->i.rt) {
1025                                 op->i.rs = 0;
1026                                 op->i.rt = 0;
1027                         } else if (op->i.rs == 0) {
1028                                 op->i.rs = op->i.rt;
1029                                 op->i.rt = 0;
1030                         }
1031                         break;
1032
1033                 case OP_BNE:
1034                         if (op->i.rs == 0) {
1035                                 op->i.rs = op->i.rt;
1036                                 op->i.rt = 0;
1037                         }
1038                         break;
1039
1040                 case OP_LUI:
1041                         if (!prev || !has_delay_slot(prev->c))
1042                                 lightrec_modify_lui(block, i);
1043                         lightrec_remove_useless_lui(block, i, known, values);
1044                         break;
1045
1046                 /* Transform ORI/ADDI/ADDIU with imm #0 or ORR/ADD/ADDU/SUB/SUBU
1047                  * with register $zero to the MOV meta-opcode */
1048                 case OP_ORI:
1049                 case OP_ADDI:
1050                 case OP_ADDIU:
1051                         if (op->i.imm == 0) {
1052                                 pr_debug("Convert ORI/ADDI/ADDIU #0 to MOV\n");
1053                                 op->i.op = OP_META_MOV;
1054                                 op->r.rd = op->i.rt;
1055                         }
1056                         break;
1057                 case OP_SPECIAL:
1058                         switch (op->r.op) {
1059                         case OP_SPECIAL_SRA:
1060                                 if (op->r.imm == 0) {
1061                                         pr_debug("Convert SRA #0 to MOV\n");
1062                                         op->i.op = OP_META_MOV;
1063                                         op->r.rs = op->r.rt;
1064                                         break;
1065                                 }
1066
1067                                 lightrec_optimize_sll_sra(block->opcode_list, i);
1068                                 break;
1069                         case OP_SPECIAL_SLL:
1070                         case OP_SPECIAL_SRL:
1071                                 if (op->r.imm == 0) {
1072                                         pr_debug("Convert SLL/SRL #0 to MOV\n");
1073                                         op->i.op = OP_META_MOV;
1074                                         op->r.rs = op->r.rt;
1075                                 }
1076                                 break;
1077                         case OP_SPECIAL_MULT:
1078                         case OP_SPECIAL_MULTU:
1079                                 if ((known & BIT(op->r.rs)) &&
1080                                     is_power_of_two(values[op->r.rs])) {
1081                                         tmp = op->c.i.rs;
1082                                         op->c.i.rs = op->c.i.rt;
1083                                         op->c.i.rt = tmp;
1084                                 } else if (!(known & BIT(op->r.rt)) ||
1085                                            !is_power_of_two(values[op->r.rt])) {
1086                                         break;
1087                                 }
1088
1089                                 pr_debug("Multiply by power-of-two: %u\n",
1090                                          values[op->r.rt]);
1091
1092                                 if (op->r.op == OP_SPECIAL_MULT)
1093                                         op->i.op = OP_META_MULT2;
1094                                 else
1095                                         op->i.op = OP_META_MULTU2;
1096
1097                                 op->r.op = ffs32(values[op->r.rt]);
1098                                 break;
1099                         case OP_SPECIAL_OR:
1100                         case OP_SPECIAL_ADD:
1101                         case OP_SPECIAL_ADDU:
1102                                 if (op->r.rs == 0) {
1103                                         pr_debug("Convert OR/ADD $zero to MOV\n");
1104                                         op->i.op = OP_META_MOV;
1105                                         op->r.rs = op->r.rt;
1106                                 }
1107                                 fallthrough;
1108                         case OP_SPECIAL_SUB:
1109                         case OP_SPECIAL_SUBU:
1110                                 if (op->r.rt == 0) {
1111                                         pr_debug("Convert OR/ADD/SUB $zero to MOV\n");
1112                                         op->i.op = OP_META_MOV;
1113                                 }
1114                                 fallthrough;
1115                         default:
1116                                 break;
1117                         }
1118                         fallthrough;
1119                 default:
1120                         break;
1121                 }
1122         }
1123
1124         return 0;
1125 }
1126
1127 static bool lightrec_can_switch_delay_slot(union code op, union code next_op)
1128 {
1129         switch (op.i.op) {
1130         case OP_SPECIAL:
1131                 switch (op.r.op) {
1132                 case OP_SPECIAL_JALR:
1133                         if (opcode_reads_register(next_op, op.r.rd) ||
1134                             opcode_writes_register(next_op, op.r.rd))
1135                                 return false;
1136                         fallthrough;
1137                 case OP_SPECIAL_JR:
1138                         if (opcode_writes_register(next_op, op.r.rs))
1139                                 return false;
1140                         fallthrough;
1141                 default:
1142                         break;
1143                 }
1144                 fallthrough;
1145         case OP_J:
1146                 break;
1147         case OP_JAL:
1148                 if (opcode_reads_register(next_op, 31) ||
1149                     opcode_writes_register(next_op, 31))
1150                         return false;;
1151
1152                 break;
1153         case OP_BEQ:
1154         case OP_BNE:
1155                 if (op.i.rt && opcode_writes_register(next_op, op.i.rt))
1156                         return false;
1157                 fallthrough;
1158         case OP_BLEZ:
1159         case OP_BGTZ:
1160                 if (op.i.rs && opcode_writes_register(next_op, op.i.rs))
1161                         return false;
1162                 break;
1163         case OP_REGIMM:
1164                 switch (op.r.rt) {
1165                 case OP_REGIMM_BLTZAL:
1166                 case OP_REGIMM_BGEZAL:
1167                         if (opcode_reads_register(next_op, 31) ||
1168                             opcode_writes_register(next_op, 31))
1169                                 return false;
1170                         fallthrough;
1171                 case OP_REGIMM_BLTZ:
1172                 case OP_REGIMM_BGEZ:
1173                         if (op.i.rs && opcode_writes_register(next_op, op.i.rs))
1174                                 return false;
1175                         break;
1176                 }
1177                 fallthrough;
1178         default:
1179                 break;
1180         }
1181
1182         return true;
1183 }
1184
1185 static int lightrec_switch_delay_slots(struct lightrec_state *state, struct block *block)
1186 {
1187         struct opcode *list, *next = &block->opcode_list[0];
1188         unsigned int i;
1189         union code op, next_op;
1190         u32 flags;
1191
1192         for (i = 0; i < block->nb_ops - 1; i++) {
1193                 list = next;
1194                 next = &block->opcode_list[i + 1];
1195                 next_op = next->c;
1196                 op = list->c;
1197
1198                 if (!has_delay_slot(op) || op_flag_no_ds(list->flags) ||
1199                     op_flag_emulate_branch(list->flags) ||
1200                     op.opcode == 0 || next_op.opcode == 0)
1201                         continue;
1202
1203                 if (i && has_delay_slot(block->opcode_list[i - 1].c) &&
1204                     !op_flag_no_ds(block->opcode_list[i - 1].flags))
1205                         continue;
1206
1207                 if (op_flag_sync(next->flags))
1208                         continue;
1209
1210                 if (!lightrec_can_switch_delay_slot(list->c, next_op))
1211                         continue;
1212
1213                 pr_debug("Swap branch and delay slot opcodes "
1214                          "at offsets 0x%x / 0x%x\n",
1215                          i << 2, (i + 1) << 2);
1216
1217                 flags = next->flags | (list->flags & LIGHTREC_SYNC);
1218                 list->c = next_op;
1219                 next->c = op;
1220                 next->flags = (list->flags | LIGHTREC_NO_DS) & ~LIGHTREC_SYNC;
1221                 list->flags = flags | LIGHTREC_NO_DS;
1222         }
1223
1224         return 0;
1225 }
1226
1227 static int shrink_opcode_list(struct lightrec_state *state, struct block *block, u16 new_size)
1228 {
1229         struct opcode_list *list, *old_list;
1230
1231         if (new_size >= block->nb_ops) {
1232                 pr_err("Invalid shrink size (%u vs %u)\n",
1233                        new_size, block->nb_ops);
1234                 return -EINVAL;
1235         }
1236
1237         list = lightrec_malloc(state, MEM_FOR_IR,
1238                                sizeof(*list) + sizeof(struct opcode) * new_size);
1239         if (!list) {
1240                 pr_err("Unable to allocate memory\n");
1241                 return -ENOMEM;
1242         }
1243
1244         old_list = container_of(block->opcode_list, struct opcode_list, ops);
1245         memcpy(list->ops, old_list->ops, sizeof(struct opcode) * new_size);
1246
1247         lightrec_free_opcode_list(state, block->opcode_list);
1248         list->nb_ops = new_size;
1249         block->nb_ops = new_size;
1250         block->opcode_list = list->ops;
1251
1252         pr_debug("Shrunk opcode list of block PC 0x%08x to %u opcodes\n",
1253                  block->pc, new_size);
1254
1255         return 0;
1256 }
1257
1258 static int lightrec_detect_impossible_branches(struct lightrec_state *state,
1259                                                struct block *block)
1260 {
1261         struct opcode *op, *list = block->opcode_list, *next = &list[0];
1262         unsigned int i;
1263         int ret = 0;
1264         s16 offset;
1265
1266         for (i = 0; i < block->nb_ops - 1; i++) {
1267                 op = next;
1268                 next = &list[i + 1];
1269
1270                 if (!has_delay_slot(op->c) ||
1271                     (!load_in_delay_slot(next->c) &&
1272                      !has_delay_slot(next->c) &&
1273                      !(next->i.op == OP_CP0 && next->r.rs == OP_CP0_RFE)))
1274                         continue;
1275
1276                 if (op->c.opcode == next->c.opcode) {
1277                         /* The delay slot is the exact same opcode as the branch
1278                          * opcode: this is effectively a NOP */
1279                         next->c.opcode = 0;
1280                         continue;
1281                 }
1282
1283                 offset = i + 1 + (s16)op->i.imm;
1284                 if (load_in_delay_slot(next->c) &&
1285                     (offset >= 0 && offset < block->nb_ops) &&
1286                     !opcode_reads_register(list[offset].c, next->c.i.rt)) {
1287                         /* The 'impossible' branch is a local branch - we can
1288                          * verify here that the first opcode of the target does
1289                          * not use the target register of the delay slot */
1290
1291                         pr_debug("Branch at offset 0x%x has load delay slot, "
1292                                  "but is local and dest opcode does not read "
1293                                  "dest register\n", i << 2);
1294                         continue;
1295                 }
1296
1297                 op->flags |= LIGHTREC_EMULATE_BRANCH;
1298
1299                 if (op == list) {
1300                         pr_debug("First opcode of block PC 0x%08x is an impossible branch\n",
1301                                  block->pc);
1302
1303                         /* If the first opcode is an 'impossible' branch, we
1304                          * only keep the first two opcodes of the block (the
1305                          * branch itself + its delay slot) */
1306                         if (block->nb_ops > 2)
1307                                 ret = shrink_opcode_list(state, block, 2);
1308                         break;
1309                 }
1310         }
1311
1312         return ret;
1313 }
1314
1315 static int lightrec_local_branches(struct lightrec_state *state, struct block *block)
1316 {
1317         struct opcode *list;
1318         unsigned int i;
1319         s32 offset;
1320
1321         for (i = 0; i < block->nb_ops; i++) {
1322                 list = &block->opcode_list[i];
1323
1324                 if (should_emulate(list))
1325                         continue;
1326
1327                 switch (list->i.op) {
1328                 case OP_BEQ:
1329                 case OP_BNE:
1330                 case OP_BLEZ:
1331                 case OP_BGTZ:
1332                 case OP_REGIMM:
1333                         offset = i + 1 + (s16)list->i.imm;
1334                         if (offset >= 0 && offset < block->nb_ops)
1335                                 break;
1336                         fallthrough;
1337                 default:
1338                         continue;
1339                 }
1340
1341                 pr_debug("Found local branch to offset 0x%x\n", offset << 2);
1342
1343                 if (should_emulate(&block->opcode_list[offset])) {
1344                         pr_debug("Branch target must be emulated - skip\n");
1345                         continue;
1346                 }
1347
1348                 if (offset && has_delay_slot(block->opcode_list[offset - 1].c)) {
1349                         pr_debug("Branch target is a delay slot - skip\n");
1350                         continue;
1351                 }
1352
1353                 pr_debug("Adding sync at offset 0x%x\n", offset << 2);
1354
1355                 block->opcode_list[offset].flags |= LIGHTREC_SYNC;
1356                 list->flags |= LIGHTREC_LOCAL_BRANCH;
1357         }
1358
1359         return 0;
1360 }
1361
1362 bool has_delay_slot(union code op)
1363 {
1364         switch (op.i.op) {
1365         case OP_SPECIAL:
1366                 switch (op.r.op) {
1367                 case OP_SPECIAL_JR:
1368                 case OP_SPECIAL_JALR:
1369                         return true;
1370                 default:
1371                         return false;
1372                 }
1373         case OP_J:
1374         case OP_JAL:
1375         case OP_BEQ:
1376         case OP_BNE:
1377         case OP_BLEZ:
1378         case OP_BGTZ:
1379         case OP_REGIMM:
1380                 return true;
1381         default:
1382                 return false;
1383         }
1384 }
1385
1386 bool should_emulate(const struct opcode *list)
1387 {
1388         return op_flag_emulate_branch(list->flags) && has_delay_slot(list->c);
1389 }
1390
1391 static bool op_writes_rd(union code c)
1392 {
1393         switch (c.i.op) {
1394         case OP_SPECIAL:
1395         case OP_META_MOV:
1396                 return true;
1397         default:
1398                 return false;
1399         }
1400 }
1401
1402 static void lightrec_add_reg_op(struct opcode *op, u8 reg, u32 reg_op)
1403 {
1404         if (op_writes_rd(op->c) && reg == op->r.rd)
1405                 op->flags |= LIGHTREC_REG_RD(reg_op);
1406         else if (op->i.rs == reg)
1407                 op->flags |= LIGHTREC_REG_RS(reg_op);
1408         else if (op->i.rt == reg)
1409                 op->flags |= LIGHTREC_REG_RT(reg_op);
1410         else
1411                 pr_debug("Cannot add unload/clean/discard flag: "
1412                          "opcode does not touch register %s!\n",
1413                          lightrec_reg_name(reg));
1414 }
1415
1416 static void lightrec_add_unload(struct opcode *op, u8 reg)
1417 {
1418         lightrec_add_reg_op(op, reg, LIGHTREC_REG_UNLOAD);
1419 }
1420
1421 static void lightrec_add_discard(struct opcode *op, u8 reg)
1422 {
1423         lightrec_add_reg_op(op, reg, LIGHTREC_REG_DISCARD);
1424 }
1425
1426 static void lightrec_add_clean(struct opcode *op, u8 reg)
1427 {
1428         lightrec_add_reg_op(op, reg, LIGHTREC_REG_CLEAN);
1429 }
1430
1431 static void
1432 lightrec_early_unload_sync(struct opcode *list, s16 *last_r, s16 *last_w)
1433 {
1434         unsigned int reg;
1435         s16 offset;
1436
1437         for (reg = 0; reg < 34; reg++) {
1438                 offset = s16_max(last_w[reg], last_r[reg]);
1439
1440                 if (offset >= 0)
1441                         lightrec_add_unload(&list[offset], reg);
1442         }
1443
1444         memset(last_r, 0xff, sizeof(*last_r) * 34);
1445         memset(last_w, 0xff, sizeof(*last_w) * 34);
1446 }
1447
1448 static int lightrec_early_unload(struct lightrec_state *state, struct block *block)
1449 {
1450         u16 i, offset;
1451         struct opcode *op;
1452         s16 last_r[34], last_w[34], last_sync = 0, next_sync = 0;
1453         u64 mask_r, mask_w, dirty = 0, loaded = 0;
1454         u8 reg;
1455
1456         memset(last_r, 0xff, sizeof(last_r));
1457         memset(last_w, 0xff, sizeof(last_w));
1458
1459         /*
1460          * Clean if:
1461          * - the register is dirty, and is read again after a branch opcode
1462          *
1463          * Unload if:
1464          * - the register is dirty or loaded, and is not read again
1465          * - the register is dirty or loaded, and is written again after a branch opcode
1466          * - the next opcode has the SYNC flag set
1467          *
1468          * Discard if:
1469          * - the register is dirty or loaded, and is written again
1470          */
1471
1472         for (i = 0; i < block->nb_ops; i++) {
1473                 op = &block->opcode_list[i];
1474
1475                 if (op_flag_sync(op->flags) || should_emulate(op)) {
1476                         /* The next opcode has the SYNC flag set, or is a branch
1477                          * that should be emulated: unload all registers. */
1478                         lightrec_early_unload_sync(block->opcode_list, last_r, last_w);
1479                         dirty = 0;
1480                         loaded = 0;
1481                 }
1482
1483                 if (next_sync == i) {
1484                         last_sync = i;
1485                         pr_debug("Last sync: 0x%x\n", last_sync << 2);
1486                 }
1487
1488                 if (has_delay_slot(op->c)) {
1489                         next_sync = i + 1 + !op_flag_no_ds(op->flags);
1490                         pr_debug("Next sync: 0x%x\n", next_sync << 2);
1491                 }
1492
1493                 mask_r = opcode_read_mask(op->c);
1494                 mask_w = opcode_write_mask(op->c);
1495
1496                 for (reg = 0; reg < 34; reg++) {
1497                         if (mask_r & BIT(reg)) {
1498                                 if (dirty & BIT(reg) && last_w[reg] < last_sync) {
1499                                         /* The register is dirty, and is read
1500                                          * again after a branch: clean it */
1501
1502                                         lightrec_add_clean(&block->opcode_list[last_w[reg]], reg);
1503                                         dirty &= ~BIT(reg);
1504                                         loaded |= BIT(reg);
1505                                 }
1506
1507                                 last_r[reg] = i;
1508                         }
1509
1510                         if (mask_w & BIT(reg)) {
1511                                 if ((dirty & BIT(reg) && last_w[reg] < last_sync) ||
1512                                     (loaded & BIT(reg) && last_r[reg] < last_sync)) {
1513                                         /* The register is dirty or loaded, and
1514                                          * is written again after a branch:
1515                                          * unload it */
1516
1517                                         offset = s16_max(last_w[reg], last_r[reg]);
1518                                         lightrec_add_unload(&block->opcode_list[offset], reg);
1519                                         dirty &= ~BIT(reg);
1520                                         loaded &= ~BIT(reg);
1521                                 } else if (!(mask_r & BIT(reg)) &&
1522                                            ((dirty & BIT(reg) && last_w[reg] > last_sync) ||
1523                                            (loaded & BIT(reg) && last_r[reg] > last_sync))) {
1524                                         /* The register is dirty or loaded, and
1525                                          * is written again: discard it */
1526
1527                                         offset = s16_max(last_w[reg], last_r[reg]);
1528                                         lightrec_add_discard(&block->opcode_list[offset], reg);
1529                                         dirty &= ~BIT(reg);
1530                                         loaded &= ~BIT(reg);
1531                                 }
1532
1533                                 last_w[reg] = i;
1534                         }
1535
1536                 }
1537
1538                 dirty |= mask_w;
1539                 loaded |= mask_r;
1540         }
1541
1542         /* Unload all registers that are dirty or loaded at the end of block. */
1543         lightrec_early_unload_sync(block->opcode_list, last_r, last_w);
1544
1545         return 0;
1546 }
1547
1548 static int lightrec_flag_io(struct lightrec_state *state, struct block *block)
1549 {
1550         struct opcode *prev = NULL, *list = NULL;
1551         enum psx_map psx_map;
1552         u32 known = BIT(0);
1553         u32 values[32] = { 0 };
1554         unsigned int i;
1555         u32 val, kunseg_val;
1556         bool no_mask;
1557
1558         for (i = 0; i < block->nb_ops; i++) {
1559                 prev = list;
1560                 list = &block->opcode_list[i];
1561
1562                 if (prev)
1563                         known = lightrec_propagate_consts(list, prev, known, values);
1564
1565                 switch (list->i.op) {
1566                 case OP_SB:
1567                 case OP_SH:
1568                 case OP_SW:
1569                         if (OPT_FLAG_STORES) {
1570                                 /* Mark all store operations that target $sp or $gp
1571                                  * as not requiring code invalidation. This is based
1572                                  * on the heuristic that stores using one of these
1573                                  * registers as address will never hit a code page. */
1574                                 if (list->i.rs >= 28 && list->i.rs <= 29 &&
1575                                     !state->maps[PSX_MAP_KERNEL_USER_RAM].ops) {
1576                                         pr_debug("Flaging opcode 0x%08x as not "
1577                                                  "requiring invalidation\n",
1578                                                  list->opcode);
1579                                         list->flags |= LIGHTREC_NO_INVALIDATE;
1580                                         list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT);
1581                                 }
1582
1583                                 /* Detect writes whose destination address is inside the
1584                                  * current block, using constant propagation. When these
1585                                  * occur, we mark the blocks as not compilable. */
1586                                 if ((known & BIT(list->i.rs)) &&
1587                                     kunseg(values[list->i.rs]) >= kunseg(block->pc) &&
1588                                     kunseg(values[list->i.rs]) < (kunseg(block->pc) +
1589                                                                   block->nb_ops * 4)) {
1590                                         pr_debug("Self-modifying block detected\n");
1591                                         block_set_flags(block, BLOCK_NEVER_COMPILE);
1592                                         list->flags |= LIGHTREC_SMC;
1593                                 }
1594                         }
1595                         fallthrough;
1596                 case OP_SWL:
1597                 case OP_SWR:
1598                 case OP_SWC2:
1599                 case OP_LB:
1600                 case OP_LBU:
1601                 case OP_LH:
1602                 case OP_LHU:
1603                 case OP_LW:
1604                 case OP_LWL:
1605                 case OP_LWR:
1606                 case OP_LWC2:
1607                         if (OPT_FLAG_IO && (known & BIT(list->i.rs))) {
1608                                 val = values[list->i.rs] + (s16) list->i.imm;
1609                                 kunseg_val = kunseg(val);
1610                                 psx_map = lightrec_get_map_idx(state, kunseg_val);
1611
1612                                 list->flags &= ~LIGHTREC_IO_MASK;
1613                                 no_mask = val == kunseg_val;
1614
1615                                 switch (psx_map) {
1616                                 case PSX_MAP_KERNEL_USER_RAM:
1617                                         if (no_mask)
1618                                                 list->flags |= LIGHTREC_NO_MASK;
1619                                         fallthrough;
1620                                 case PSX_MAP_MIRROR1:
1621                                 case PSX_MAP_MIRROR2:
1622                                 case PSX_MAP_MIRROR3:
1623                                         pr_debug("Flaging opcode %u as RAM access\n", i);
1624                                         list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_RAM);
1625                                         if (no_mask && state->mirrors_mapped)
1626                                                 list->flags |= LIGHTREC_NO_MASK;
1627                                         break;
1628                                 case PSX_MAP_BIOS:
1629                                         pr_debug("Flaging opcode %u as BIOS access\n", i);
1630                                         list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_BIOS);
1631                                         if (no_mask)
1632                                                 list->flags |= LIGHTREC_NO_MASK;
1633                                         break;
1634                                 case PSX_MAP_SCRATCH_PAD:
1635                                         pr_debug("Flaging opcode %u as scratchpad access\n", i);
1636                                         list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_SCRATCH);
1637                                         if (no_mask)
1638                                                 list->flags |= LIGHTREC_NO_MASK;
1639
1640                                         /* Consider that we're never going to run code from
1641                                          * the scratchpad. */
1642                                         list->flags |= LIGHTREC_NO_INVALIDATE;
1643                                         break;
1644                                 case PSX_MAP_HW_REGISTERS:
1645                                         if (state->ops.hw_direct &&
1646                                             state->ops.hw_direct(kunseg_val,
1647                                                                  opcode_is_store(list->c),
1648                                                                  opcode_get_io_size(list->c))) {
1649                                                 pr_debug("Flagging opcode %u as direct I/O access\n",
1650                                                          i);
1651                                                 list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT_HW);
1652
1653                                                 if (no_mask)
1654                                                         list->flags |= LIGHTREC_NO_MASK;
1655                                                 break;
1656                                         }
1657                                         fallthrough;
1658                                 default:
1659                                         pr_debug("Flagging opcode %u as I/O access\n",
1660                                                  i);
1661                                         list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW);
1662                                         break;
1663                                 }
1664                         }
1665                         fallthrough;
1666                 default:
1667                         break;
1668                 }
1669         }
1670
1671         return 0;
1672 }
1673
1674 static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset,
1675                             const struct opcode *last,
1676                             u32 mask, bool sync, bool mflo, bool another)
1677 {
1678         const struct opcode *op, *next = &block->opcode_list[offset];
1679         u32 old_mask;
1680         u8 reg2, reg = mflo ? REG_LO : REG_HI;
1681         u16 branch_offset;
1682         unsigned int i;
1683
1684         for (i = offset; i < block->nb_ops; i++) {
1685                 op = next;
1686                 next = &block->opcode_list[i + 1];
1687                 old_mask = mask;
1688
1689                 /* If any other opcode writes or reads to the register
1690                  * we'd use, then we cannot use it anymore. */
1691                 mask |= opcode_read_mask(op->c);
1692                 mask |= opcode_write_mask(op->c);
1693
1694                 if (op_flag_sync(op->flags))
1695                         sync = true;
1696
1697                 switch (op->i.op) {
1698                 case OP_BEQ:
1699                 case OP_BNE:
1700                 case OP_BLEZ:
1701                 case OP_BGTZ:
1702                 case OP_REGIMM:
1703                         /* TODO: handle backwards branches too */
1704                         if (!last && op_flag_local_branch(op->flags) &&
1705                             (s16)op->c.i.imm >= 0) {
1706                                 branch_offset = i + 1 + (s16)op->c.i.imm
1707                                         - !!op_flag_no_ds(op->flags);
1708
1709                                 reg = get_mfhi_mflo_reg(block, branch_offset, NULL,
1710                                                         mask, sync, mflo, false);
1711                                 reg2 = get_mfhi_mflo_reg(block, offset + 1, next,
1712                                                          mask, sync, mflo, false);
1713                                 if (reg > 0 && reg == reg2)
1714                                         return reg;
1715                                 if (!reg && !reg2)
1716                                         return 0;
1717                         }
1718
1719                         return mflo ? REG_LO : REG_HI;
1720                 case OP_META_MULT2:
1721                 case OP_META_MULTU2:
1722                         return 0;
1723                 case OP_SPECIAL:
1724                         switch (op->r.op) {
1725                         case OP_SPECIAL_MULT:
1726                         case OP_SPECIAL_MULTU:
1727                         case OP_SPECIAL_DIV:
1728                         case OP_SPECIAL_DIVU:
1729                                 return 0;
1730                         case OP_SPECIAL_MTHI:
1731                                 if (!mflo)
1732                                         return 0;
1733                                 continue;
1734                         case OP_SPECIAL_MTLO:
1735                                 if (mflo)
1736                                         return 0;
1737                                 continue;
1738                         case OP_SPECIAL_JR:
1739                                 if (op->r.rs != 31)
1740                                         return reg;
1741
1742                                 if (!sync && !op_flag_no_ds(op->flags) &&
1743                                     (next->i.op == OP_SPECIAL) &&
1744                                     ((!mflo && next->r.op == OP_SPECIAL_MFHI) ||
1745                                     (mflo && next->r.op == OP_SPECIAL_MFLO)))
1746                                         return next->r.rd;
1747
1748                                 return 0;
1749                         case OP_SPECIAL_JALR:
1750                                 return reg;
1751                         case OP_SPECIAL_MFHI:
1752                                 if (!mflo) {
1753                                         if (another)
1754                                                 return op->r.rd;
1755                                         /* Must use REG_HI if there is another MFHI target*/
1756                                         reg2 = get_mfhi_mflo_reg(block, i + 1, next,
1757                                                          0, sync, mflo, true);
1758                                         if (reg2 > 0 && reg2 != REG_HI)
1759                                                 return REG_HI;
1760
1761                                         if (!sync && !(old_mask & BIT(op->r.rd)))
1762                                                 return op->r.rd;
1763                                         else
1764                                                 return REG_HI;
1765                                 }
1766                                 continue;
1767                         case OP_SPECIAL_MFLO:
1768                                 if (mflo) {
1769                                         if (another)
1770                                                 return op->r.rd;
1771                                         /* Must use REG_LO if there is another MFLO target*/
1772                                         reg2 = get_mfhi_mflo_reg(block, i + 1, next,
1773                                                          0, sync, mflo, true);
1774                                         if (reg2 > 0 && reg2 != REG_LO)
1775                                                 return REG_LO;
1776
1777                                         if (!sync && !(old_mask & BIT(op->r.rd)))
1778                                                 return op->r.rd;
1779                                         else
1780                                                 return REG_LO;
1781                                 }
1782                                 continue;
1783                         default:
1784                                 break;
1785                         }
1786
1787                         fallthrough;
1788                 default:
1789                         continue;
1790                 }
1791         }
1792
1793         return reg;
1794 }
1795
1796 static void lightrec_replace_lo_hi(struct block *block, u16 offset,
1797                                    u16 last, bool lo)
1798 {
1799         unsigned int i;
1800         u32 branch_offset;
1801
1802         /* This function will remove the following MFLO/MFHI. It must be called
1803          * only if get_mfhi_mflo_reg() returned a non-zero value. */
1804
1805         for (i = offset; i < last; i++) {
1806                 struct opcode *op = &block->opcode_list[i];
1807
1808                 switch (op->i.op) {
1809                 case OP_BEQ:
1810                 case OP_BNE:
1811                 case OP_BLEZ:
1812                 case OP_BGTZ:
1813                 case OP_REGIMM:
1814                         /* TODO: handle backwards branches too */
1815                         if (op_flag_local_branch(op->flags) && (s16)op->c.i.imm >= 0) {
1816                                 branch_offset = i + 1 + (s16)op->c.i.imm
1817                                         - !!op_flag_no_ds(op->flags);
1818
1819                                 lightrec_replace_lo_hi(block, branch_offset, last, lo);
1820                                 lightrec_replace_lo_hi(block, i + 1, branch_offset, lo);
1821                         }
1822                         break;
1823
1824                 case OP_SPECIAL:
1825                         if (lo && op->r.op == OP_SPECIAL_MFLO) {
1826                                 pr_debug("Removing MFLO opcode at offset 0x%x\n",
1827                                          i << 2);
1828                                 op->opcode = 0;
1829                                 return;
1830                         } else if (!lo && op->r.op == OP_SPECIAL_MFHI) {
1831                                 pr_debug("Removing MFHI opcode at offset 0x%x\n",
1832                                          i << 2);
1833                                 op->opcode = 0;
1834                                 return;
1835                         }
1836
1837                         fallthrough;
1838                 default:
1839                         break;
1840                 }
1841         }
1842 }
1843
1844 static bool lightrec_always_skip_div_check(void)
1845 {
1846 #ifdef __mips__
1847         return true;
1848 #else
1849         return false;
1850 #endif
1851 }
1852
1853 static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *block)
1854 {
1855         struct opcode *prev, *list = NULL;
1856         u8 reg_hi, reg_lo;
1857         unsigned int i;
1858         u32 known = BIT(0);
1859         u32 values[32] = { 0 };
1860
1861         for (i = 0; i < block->nb_ops - 1; i++) {
1862                 prev = list;
1863                 list = &block->opcode_list[i];
1864
1865                 if (prev)
1866                         known = lightrec_propagate_consts(list, prev, known, values);
1867
1868                 switch (list->i.op) {
1869                 case OP_SPECIAL:
1870                         switch (list->r.op) {
1871                         case OP_SPECIAL_DIV:
1872                         case OP_SPECIAL_DIVU:
1873                                 /* If we are dividing by a non-zero constant, don't
1874                                  * emit the div-by-zero check. */
1875                                 if (lightrec_always_skip_div_check() ||
1876                                     ((known & BIT(list->c.r.rt)) && values[list->c.r.rt]))
1877                                         list->flags |= LIGHTREC_NO_DIV_CHECK;
1878                                 fallthrough;
1879                         case OP_SPECIAL_MULT:
1880                         case OP_SPECIAL_MULTU:
1881                                 break;
1882                         default:
1883                                 continue;
1884                         }
1885                         fallthrough;
1886                 case OP_META_MULT2:
1887                 case OP_META_MULTU2:
1888                         break;
1889                 default:
1890                         continue;
1891                 }
1892
1893                 /* Don't support opcodes in delay slots */
1894                 if ((i && has_delay_slot(block->opcode_list[i - 1].c)) ||
1895                     op_flag_no_ds(list->flags)) {
1896                         continue;
1897                 }
1898
1899                 reg_lo = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, true, false);
1900                 if (reg_lo == 0) {
1901                         pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as"
1902                                  " not writing LO\n", i << 2);
1903                         list->flags |= LIGHTREC_NO_LO;
1904                 }
1905
1906                 reg_hi = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, false, false);
1907                 if (reg_hi == 0) {
1908                         pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as"
1909                                  " not writing HI\n", i << 2);
1910                         list->flags |= LIGHTREC_NO_HI;
1911                 }
1912
1913                 if (!reg_lo && !reg_hi) {
1914                         pr_debug("Both LO/HI unused in this block, they will "
1915                                  "probably be used in parent block - removing "
1916                                  "flags.\n");
1917                         list->flags &= ~(LIGHTREC_NO_LO | LIGHTREC_NO_HI);
1918                 }
1919
1920                 if (reg_lo > 0 && reg_lo != REG_LO) {
1921                         pr_debug("Found register %s to hold LO (rs = %u, rt = %u)\n",
1922                                  lightrec_reg_name(reg_lo), list->r.rs, list->r.rt);
1923
1924                         lightrec_replace_lo_hi(block, i + 1, block->nb_ops, true);
1925                         list->r.rd = reg_lo;
1926                 } else {
1927                         list->r.rd = 0;
1928                 }
1929
1930                 if (reg_hi > 0 && reg_hi != REG_HI) {
1931                         pr_debug("Found register %s to hold HI (rs = %u, rt = %u)\n",
1932                                  lightrec_reg_name(reg_hi), list->r.rs, list->r.rt);
1933
1934                         lightrec_replace_lo_hi(block, i + 1, block->nb_ops, false);
1935                         list->r.imm = reg_hi;
1936                 } else {
1937                         list->r.imm = 0;
1938                 }
1939         }
1940
1941         return 0;
1942 }
1943
1944 static bool remove_div_sequence(struct block *block, unsigned int offset)
1945 {
1946         struct opcode *op;
1947         unsigned int i, found = 0;
1948
1949         /*
1950          * Scan for the zero-checking sequence that GCC automatically introduced
1951          * after most DIV/DIVU opcodes. This sequence checks the value of the
1952          * divisor, and if zero, executes a BREAK opcode, causing the BIOS
1953          * handler to crash the PS1.
1954          *
1955          * For DIV opcodes, this sequence additionally checks that the signed
1956          * operation does not overflow.
1957          *
1958          * With the assumption that the games never crashed the PS1, we can
1959          * therefore assume that the games never divided by zero or overflowed,
1960          * and these sequences can be removed.
1961          */
1962
1963         for (i = offset; i < block->nb_ops; i++) {
1964                 op = &block->opcode_list[i];
1965
1966                 if (!found) {
1967                         if (op->i.op == OP_SPECIAL &&
1968                             (op->r.op == OP_SPECIAL_DIV || op->r.op == OP_SPECIAL_DIVU))
1969                                 break;
1970
1971                         if ((op->opcode & 0xfc1fffff) == 0x14000002) {
1972                                 /* BNE ???, zero, +8 */
1973                                 found++;
1974                         } else {
1975                                 offset++;
1976                         }
1977                 } else if (found == 1 && !op->opcode) {
1978                         /* NOP */
1979                         found++;
1980                 } else if (found == 2 && op->opcode == 0x0007000d) {
1981                         /* BREAK 0x1c00 */
1982                         found++;
1983                 } else if (found == 3 && op->opcode == 0x2401ffff) {
1984                         /* LI at, -1 */
1985                         found++;
1986                 } else if (found == 4 && (op->opcode & 0xfc1fffff) == 0x14010004) {
1987                         /* BNE ???, at, +16 */
1988                         found++;
1989                 } else if (found == 5 && op->opcode == 0x3c018000) {
1990                         /* LUI at, 0x8000 */
1991                         found++;
1992                 } else if (found == 6 && (op->opcode & 0x141fffff) == 0x14010002) {
1993                         /* BNE ???, at, +16 */
1994                         found++;
1995                 } else if (found == 7 && !op->opcode) {
1996                         /* NOP */
1997                         found++;
1998                 } else if (found == 8 && op->opcode == 0x0006000d) {
1999                         /* BREAK 0x1800 */
2000                         found++;
2001                         break;
2002                 } else {
2003                         break;
2004                 }
2005         }
2006
2007         if (found >= 3) {
2008                 if (found != 9)
2009                         found = 3;
2010
2011                 pr_debug("Removing DIV%s sequence at offset 0x%x\n",
2012                          found == 9 ? "" : "U", offset << 2);
2013
2014                 for (i = 0; i < found; i++)
2015                         block->opcode_list[offset + i].opcode = 0;
2016
2017                 return true;
2018         }
2019
2020         return false;
2021 }
2022
2023 static int lightrec_remove_div_by_zero_check_sequence(struct lightrec_state *state,
2024                                                       struct block *block)
2025 {
2026         struct opcode *op;
2027         unsigned int i;
2028
2029         for (i = 0; i < block->nb_ops; i++) {
2030                 op = &block->opcode_list[i];
2031
2032                 if (op->i.op == OP_SPECIAL &&
2033                     (op->r.op == OP_SPECIAL_DIVU || op->r.op == OP_SPECIAL_DIV) &&
2034                     remove_div_sequence(block, i + 1))
2035                         op->flags |= LIGHTREC_NO_DIV_CHECK;
2036         }
2037
2038         return 0;
2039 }
2040
2041 static const u32 memset_code[] = {
2042         0x10a00006,     // beqz         a1, 2f
2043         0x24a2ffff,     // addiu        v0,a1,-1
2044         0x2403ffff,     // li           v1,-1
2045         0xac800000,     // 1: sw        zero,0(a0)
2046         0x2442ffff,     // addiu        v0,v0,-1
2047         0x1443fffd,     // bne          v0,v1, 1b
2048         0x24840004,     // addiu        a0,a0,4
2049         0x03e00008,     // 2: jr        ra
2050         0x00000000,     // nop
2051 };
2052
2053 static int lightrec_replace_memset(struct lightrec_state *state, struct block *block)
2054 {
2055         unsigned int i;
2056         union code c;
2057
2058         for (i = 0; i < block->nb_ops; i++) {
2059                 c = block->opcode_list[i].c;
2060
2061                 if (c.opcode != memset_code[i])
2062                         return 0;
2063
2064                 if (i == ARRAY_SIZE(memset_code) - 1) {
2065                         /* success! */
2066                         pr_debug("Block at PC 0x%x is a memset\n", block->pc);
2067                         block_set_flags(block,
2068                                         BLOCK_IS_MEMSET | BLOCK_NEVER_COMPILE);
2069
2070                         /* Return non-zero to skip other optimizers. */
2071                         return 1;
2072                 }
2073         }
2074
2075         return 0;
2076 }
2077
2078 static int (*lightrec_optimizers[])(struct lightrec_state *state, struct block *) = {
2079         IF_OPT(OPT_REMOVE_DIV_BY_ZERO_SEQ, &lightrec_remove_div_by_zero_check_sequence),
2080         IF_OPT(OPT_REPLACE_MEMSET, &lightrec_replace_memset),
2081         IF_OPT(OPT_DETECT_IMPOSSIBLE_BRANCHES, &lightrec_detect_impossible_branches),
2082         IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_branches),
2083         IF_OPT(OPT_LOCAL_BRANCHES, &lightrec_local_branches),
2084         IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_ops),
2085         IF_OPT(OPT_SWITCH_DELAY_SLOTS, &lightrec_switch_delay_slots),
2086         IF_OPT(OPT_FLAG_IO || OPT_FLAG_STORES, &lightrec_flag_io),
2087         IF_OPT(OPT_FLAG_MULT_DIV, &lightrec_flag_mults_divs),
2088         IF_OPT(OPT_EARLY_UNLOAD, &lightrec_early_unload),
2089 };
2090
2091 int lightrec_optimize(struct lightrec_state *state, struct block *block)
2092 {
2093         unsigned int i;
2094         int ret;
2095
2096         for (i = 0; i < ARRAY_SIZE(lightrec_optimizers); i++) {
2097                 if (lightrec_optimizers[i]) {
2098                         ret = (*lightrec_optimizers[i])(state, block);
2099                         if (ret)
2100                                 return ret;
2101                 }
2102         }
2103
2104         return 0;
2105 }