3e94cff362dca56f08c0b2593eff44aed3bc1dc9
[pcsx_rearmed.git] / deps / lightrec / optimizer.c
1 // SPDX-License-Identifier: LGPL-2.1-or-later
2 /*
3  * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
4  */
5
6 #include "lightrec-config.h"
7 #include "disassembler.h"
8 #include "lightrec.h"
9 #include "memmanager.h"
10 #include "optimizer.h"
11 #include "regcache.h"
12
13 #include <errno.h>
14 #include <stdbool.h>
15 #include <stdlib.h>
16 #include <string.h>
17
18 #define IF_OPT(opt, ptr) ((opt) ? (ptr) : NULL)
19
20 struct optimizer_list {
21         void (**optimizers)(struct opcode *);
22         unsigned int nb_optimizers;
23 };
24
25 static bool is_nop(union code op);
26
27 bool is_unconditional_jump(union code c)
28 {
29         switch (c.i.op) {
30         case OP_SPECIAL:
31                 return c.r.op == OP_SPECIAL_JR || c.r.op == OP_SPECIAL_JALR;
32         case OP_J:
33         case OP_JAL:
34                 return true;
35         case OP_BEQ:
36         case OP_BLEZ:
37                 return c.i.rs == c.i.rt;
38         case OP_REGIMM:
39                 return (c.r.rt == OP_REGIMM_BGEZ ||
40                         c.r.rt == OP_REGIMM_BGEZAL) && c.i.rs == 0;
41         default:
42                 return false;
43         }
44 }
45
46 bool is_syscall(union code c)
47 {
48         return (c.i.op == OP_SPECIAL && c.r.op == OP_SPECIAL_SYSCALL) ||
49                 (c.i.op == OP_CP0 && (c.r.rs == OP_CP0_MTC0 ||
50                                         c.r.rs == OP_CP0_CTC0) &&
51                  (c.r.rd == 12 || c.r.rd == 13));
52 }
53
54 static u64 opcode_read_mask(union code op)
55 {
56         switch (op.i.op) {
57         case OP_SPECIAL:
58                 switch (op.r.op) {
59                 case OP_SPECIAL_SYSCALL:
60                 case OP_SPECIAL_BREAK:
61                         return 0;
62                 case OP_SPECIAL_JR:
63                 case OP_SPECIAL_JALR:
64                 case OP_SPECIAL_MTHI:
65                 case OP_SPECIAL_MTLO:
66                         return BIT(op.r.rs);
67                 case OP_SPECIAL_MFHI:
68                         return BIT(REG_HI);
69                 case OP_SPECIAL_MFLO:
70                         return BIT(REG_LO);
71                 case OP_SPECIAL_SLL:
72                         if (!op.r.imm)
73                                 return 0;
74                         fallthrough;
75                 case OP_SPECIAL_SRL:
76                 case OP_SPECIAL_SRA:
77                         return BIT(op.r.rt);
78                 default:
79                         return BIT(op.r.rs) | BIT(op.r.rt);
80                 }
81         case OP_CP0:
82                 switch (op.r.rs) {
83                 case OP_CP0_MTC0:
84                 case OP_CP0_CTC0:
85                         return BIT(op.r.rt);
86                 default:
87                         return 0;
88                 }
89         case OP_CP2:
90                 if (op.r.op == OP_CP2_BASIC) {
91                         switch (op.r.rs) {
92                         case OP_CP2_BASIC_MTC2:
93                         case OP_CP2_BASIC_CTC2:
94                                 return BIT(op.r.rt);
95                         default:
96                                 break;
97                         }
98                 }
99                 return 0;
100         case OP_J:
101         case OP_JAL:
102         case OP_LUI:
103                 return 0;
104         case OP_BEQ:
105                 if (op.i.rs == op.i.rt)
106                         return 0;
107                 fallthrough;
108         case OP_BNE:
109         case OP_LWL:
110         case OP_LWR:
111         case OP_SB:
112         case OP_SH:
113         case OP_SWL:
114         case OP_SW:
115         case OP_SWR:
116                 return BIT(op.i.rs) | BIT(op.i.rt);
117         default:
118                 return BIT(op.i.rs);
119         }
120 }
121
122 static u64 mult_div_write_mask(union code op)
123 {
124         u64 flags;
125
126         if (!OPT_FLAG_MULT_DIV)
127                 return BIT(REG_LO) | BIT(REG_HI);
128
129         if (op.r.rd)
130                 flags = BIT(op.r.rd);
131         else
132                 flags = BIT(REG_LO);
133         if (op.r.imm)
134                 flags |= BIT(op.r.imm);
135         else
136                 flags |= BIT(REG_HI);
137
138         return flags;
139 }
140
141 static u64 opcode_write_mask(union code op)
142 {
143         switch (op.i.op) {
144         case OP_META_MULT2:
145         case OP_META_MULTU2:
146                 return mult_div_write_mask(op);
147         case OP_SPECIAL:
148                 switch (op.r.op) {
149                 case OP_SPECIAL_JR:
150                 case OP_SPECIAL_SYSCALL:
151                 case OP_SPECIAL_BREAK:
152                         return 0;
153                 case OP_SPECIAL_MULT:
154                 case OP_SPECIAL_MULTU:
155                 case OP_SPECIAL_DIV:
156                 case OP_SPECIAL_DIVU:
157                         return mult_div_write_mask(op);
158                 case OP_SPECIAL_MTHI:
159                         return BIT(REG_HI);
160                 case OP_SPECIAL_MTLO:
161                         return BIT(REG_LO);
162                 case OP_SPECIAL_SLL:
163                         if (!op.r.imm)
164                                 return 0;
165                         fallthrough;
166                 default:
167                         return BIT(op.r.rd);
168                 }
169         case OP_ADDI:
170         case OP_ADDIU:
171         case OP_SLTI:
172         case OP_SLTIU:
173         case OP_ANDI:
174         case OP_ORI:
175         case OP_XORI:
176         case OP_LUI:
177         case OP_LB:
178         case OP_LH:
179         case OP_LWL:
180         case OP_LW:
181         case OP_LBU:
182         case OP_LHU:
183         case OP_LWR:
184         case OP_META_EXTC:
185         case OP_META_EXTS:
186                 return BIT(op.i.rt);
187         case OP_JAL:
188                 return BIT(31);
189         case OP_CP0:
190                 switch (op.r.rs) {
191                 case OP_CP0_MFC0:
192                 case OP_CP0_CFC0:
193                         return BIT(op.i.rt);
194                 default:
195                         return 0;
196                 }
197         case OP_CP2:
198                 if (op.r.op == OP_CP2_BASIC) {
199                         switch (op.r.rs) {
200                         case OP_CP2_BASIC_MFC2:
201                         case OP_CP2_BASIC_CFC2:
202                                 return BIT(op.i.rt);
203                         default:
204                                 break;
205                         }
206                 }
207                 return 0;
208         case OP_REGIMM:
209                 switch (op.r.rt) {
210                 case OP_REGIMM_BLTZAL:
211                 case OP_REGIMM_BGEZAL:
212                         return BIT(31);
213                 default:
214                         return 0;
215                 }
216         case OP_META_MOV:
217                 return BIT(op.r.rd);
218         default:
219                 return 0;
220         }
221 }
222
223 bool opcode_reads_register(union code op, u8 reg)
224 {
225         return opcode_read_mask(op) & BIT(reg);
226 }
227
228 bool opcode_writes_register(union code op, u8 reg)
229 {
230         return opcode_write_mask(op) & BIT(reg);
231 }
232
233 static int find_prev_writer(const struct opcode *list, unsigned int offset, u8 reg)
234 {
235         union code c;
236         unsigned int i;
237
238         if (op_flag_sync(list[offset].flags))
239                 return -1;
240
241         for (i = offset; i > 0; i--) {
242                 c = list[i - 1].c;
243
244                 if (opcode_writes_register(c, reg)) {
245                         if (i > 1 && has_delay_slot(list[i - 2].c))
246                                 break;
247
248                         return i - 1;
249                 }
250
251                 if (op_flag_sync(list[i - 1].flags) ||
252                     has_delay_slot(c) ||
253                     opcode_reads_register(c, reg))
254                         break;
255         }
256
257         return -1;
258 }
259
260 static int find_next_reader(const struct opcode *list, unsigned int offset, u8 reg)
261 {
262         unsigned int i;
263         union code c;
264
265         if (op_flag_sync(list[offset].flags))
266                 return -1;
267
268         for (i = offset; ; i++) {
269                 c = list[i].c;
270
271                 if (opcode_reads_register(c, reg)) {
272                         if (i > 0 && has_delay_slot(list[i - 1].c))
273                                 break;
274
275                         return i;
276                 }
277
278                 if (op_flag_sync(list[i].flags) ||
279                     has_delay_slot(c) || opcode_writes_register(c, reg))
280                         break;
281         }
282
283         return -1;
284 }
285
286 static bool reg_is_dead(const struct opcode *list, unsigned int offset, u8 reg)
287 {
288         unsigned int i;
289
290         if (op_flag_sync(list[offset].flags))
291                 return false;
292
293         for (i = offset + 1; ; i++) {
294                 if (opcode_reads_register(list[i].c, reg))
295                         return false;
296
297                 if (opcode_writes_register(list[i].c, reg))
298                         return true;
299
300                 if (has_delay_slot(list[i].c)) {
301                         if (op_flag_no_ds(list[i].flags) ||
302                             opcode_reads_register(list[i + 1].c, reg))
303                                 return false;
304
305                         return opcode_writes_register(list[i + 1].c, reg);
306                 }
307         }
308 }
309
310 static bool reg_is_read(const struct opcode *list,
311                         unsigned int a, unsigned int b, u8 reg)
312 {
313         /* Return true if reg is read in one of the opcodes of the interval
314          * [a, b[ */
315         for (; a < b; a++) {
316                 if (!is_nop(list[a].c) && opcode_reads_register(list[a].c, reg))
317                         return true;
318         }
319
320         return false;
321 }
322
323 static bool reg_is_written(const struct opcode *list,
324                            unsigned int a, unsigned int b, u8 reg)
325 {
326         /* Return true if reg is written in one of the opcodes of the interval
327          * [a, b[ */
328
329         for (; a < b; a++) {
330                 if (!is_nop(list[a].c) && opcode_writes_register(list[a].c, reg))
331                         return true;
332         }
333
334         return false;
335 }
336
337 static bool reg_is_read_or_written(const struct opcode *list,
338                                    unsigned int a, unsigned int b, u8 reg)
339 {
340         return reg_is_read(list, a, b, reg) || reg_is_written(list, a, b, reg);
341 }
342
343 static bool opcode_is_load(union code op)
344 {
345         switch (op.i.op) {
346         case OP_LB:
347         case OP_LH:
348         case OP_LWL:
349         case OP_LW:
350         case OP_LBU:
351         case OP_LHU:
352         case OP_LWR:
353         case OP_LWC2:
354                 return true;
355         default:
356                 return false;
357         }
358 }
359
360 static bool opcode_is_store(union code op)
361 {
362         switch (op.i.op) {
363         case OP_SB:
364         case OP_SH:
365         case OP_SW:
366         case OP_SWL:
367         case OP_SWR:
368         case OP_SWC2:
369                 return true;
370         default:
371                 return false;
372         }
373 }
374
375 static u8 opcode_get_io_size(union code op)
376 {
377         switch (op.i.op) {
378         case OP_LB:
379         case OP_LBU:
380         case OP_SB:
381                 return 8;
382         case OP_LH:
383         case OP_LHU:
384         case OP_SH:
385                 return 16;
386         default:
387                 return 32;
388         }
389 }
390
391 bool opcode_is_io(union code op)
392 {
393         return opcode_is_load(op) || opcode_is_store(op);
394 }
395
396 /* TODO: Complete */
397 static bool is_nop(union code op)
398 {
399         if (opcode_writes_register(op, 0)) {
400                 switch (op.i.op) {
401                 case OP_CP0:
402                         return op.r.rs != OP_CP0_MFC0;
403                 case OP_LB:
404                 case OP_LH:
405                 case OP_LWL:
406                 case OP_LW:
407                 case OP_LBU:
408                 case OP_LHU:
409                 case OP_LWR:
410                         return false;
411                 default:
412                         return true;
413                 }
414         }
415
416         switch (op.i.op) {
417         case OP_SPECIAL:
418                 switch (op.r.op) {
419                 case OP_SPECIAL_AND:
420                         return op.r.rd == op.r.rt && op.r.rd == op.r.rs;
421                 case OP_SPECIAL_ADD:
422                 case OP_SPECIAL_ADDU:
423                         return (op.r.rd == op.r.rt && op.r.rs == 0) ||
424                                 (op.r.rd == op.r.rs && op.r.rt == 0);
425                 case OP_SPECIAL_SUB:
426                 case OP_SPECIAL_SUBU:
427                         return op.r.rd == op.r.rs && op.r.rt == 0;
428                 case OP_SPECIAL_OR:
429                         if (op.r.rd == op.r.rt)
430                                 return op.r.rd == op.r.rs || op.r.rs == 0;
431                         else
432                                 return (op.r.rd == op.r.rs) && op.r.rt == 0;
433                 case OP_SPECIAL_SLL:
434                 case OP_SPECIAL_SRA:
435                 case OP_SPECIAL_SRL:
436                         return op.r.rd == op.r.rt && op.r.imm == 0;
437                 case OP_SPECIAL_MFHI:
438                 case OP_SPECIAL_MFLO:
439                         return op.r.rd == 0;
440                 default:
441                         return false;
442                 }
443         case OP_ORI:
444         case OP_ADDI:
445         case OP_ADDIU:
446                 return op.i.rt == op.i.rs && op.i.imm == 0;
447         case OP_BGTZ:
448                 return (op.i.rs == 0 || op.i.imm == 1);
449         case OP_REGIMM:
450                 return (op.i.op == OP_REGIMM_BLTZ ||
451                                 op.i.op == OP_REGIMM_BLTZAL) &&
452                         (op.i.rs == 0 || op.i.imm == 1);
453         case OP_BNE:
454                 return (op.i.rs == op.i.rt || op.i.imm == 1);
455         default:
456                 return false;
457         }
458 }
459
460 bool load_in_delay_slot(union code op)
461 {
462         switch (op.i.op) {
463         case OP_CP0:
464                 switch (op.r.rs) {
465                 case OP_CP0_MFC0:
466                 case OP_CP0_CFC0:
467                         return true;
468                 default:
469                         break;
470                 }
471
472                 break;
473         case OP_CP2:
474                 if (op.r.op == OP_CP2_BASIC) {
475                         switch (op.r.rs) {
476                         case OP_CP2_BASIC_MFC2:
477                         case OP_CP2_BASIC_CFC2:
478                                 return true;
479                         default:
480                                 break;
481                         }
482                 }
483
484                 break;
485         case OP_LB:
486         case OP_LH:
487         case OP_LW:
488         case OP_LWL:
489         case OP_LWR:
490         case OP_LBU:
491         case OP_LHU:
492                 return true;
493         default:
494                 break;
495         }
496
497         return false;
498 }
499
500 static u32 lightrec_propagate_consts(const struct opcode *op,
501                                      const struct opcode *prev,
502                                      u32 known, u32 *v)
503 {
504         union code c = prev->c;
505
506         /* Register $zero is always, well, zero */
507         known |= BIT(0);
508         v[0] = 0;
509
510         if (op_flag_sync(op->flags))
511                 return BIT(0);
512
513         switch (c.i.op) {
514         case OP_SPECIAL:
515                 switch (c.r.op) {
516                 case OP_SPECIAL_SLL:
517                         if (known & BIT(c.r.rt)) {
518                                 known |= BIT(c.r.rd);
519                                 v[c.r.rd] = v[c.r.rt] << c.r.imm;
520                         } else {
521                                 known &= ~BIT(c.r.rd);
522                         }
523                         break;
524                 case OP_SPECIAL_SRL:
525                         if (known & BIT(c.r.rt)) {
526                                 known |= BIT(c.r.rd);
527                                 v[c.r.rd] = v[c.r.rt] >> c.r.imm;
528                         } else {
529                                 known &= ~BIT(c.r.rd);
530                         }
531                         break;
532                 case OP_SPECIAL_SRA:
533                         if (known & BIT(c.r.rt)) {
534                                 known |= BIT(c.r.rd);
535                                 v[c.r.rd] = (s32)v[c.r.rt] >> c.r.imm;
536                         } else {
537                                 known &= ~BIT(c.r.rd);
538                         }
539                         break;
540                 case OP_SPECIAL_SLLV:
541                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
542                                 known |= BIT(c.r.rd);
543                                 v[c.r.rd] = v[c.r.rt] << (v[c.r.rs] & 0x1f);
544                         } else {
545                                 known &= ~BIT(c.r.rd);
546                         }
547                         break;
548                 case OP_SPECIAL_SRLV:
549                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
550                                 known |= BIT(c.r.rd);
551                                 v[c.r.rd] = v[c.r.rt] >> (v[c.r.rs] & 0x1f);
552                         } else {
553                                 known &= ~BIT(c.r.rd);
554                         }
555                         break;
556                 case OP_SPECIAL_SRAV:
557                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
558                                 known |= BIT(c.r.rd);
559                                 v[c.r.rd] = (s32)v[c.r.rt]
560                                           >> (v[c.r.rs] & 0x1f);
561                         } else {
562                                 known &= ~BIT(c.r.rd);
563                         }
564                         break;
565                 case OP_SPECIAL_ADD:
566                 case OP_SPECIAL_ADDU:
567                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
568                                 known |= BIT(c.r.rd);
569                                 v[c.r.rd] = (s32)v[c.r.rt] + (s32)v[c.r.rs];
570                         } else {
571                                 known &= ~BIT(c.r.rd);
572                         }
573                         break;
574                 case OP_SPECIAL_SUB:
575                 case OP_SPECIAL_SUBU:
576                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
577                                 known |= BIT(c.r.rd);
578                                 v[c.r.rd] = v[c.r.rt] - v[c.r.rs];
579                         } else {
580                                 known &= ~BIT(c.r.rd);
581                         }
582                         break;
583                 case OP_SPECIAL_AND:
584                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
585                                 known |= BIT(c.r.rd);
586                                 v[c.r.rd] = v[c.r.rt] & v[c.r.rs];
587                         } else {
588                                 known &= ~BIT(c.r.rd);
589                         }
590                         break;
591                 case OP_SPECIAL_OR:
592                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
593                                 known |= BIT(c.r.rd);
594                                 v[c.r.rd] = v[c.r.rt] | v[c.r.rs];
595                         } else {
596                                 known &= ~BIT(c.r.rd);
597                         }
598                         break;
599                 case OP_SPECIAL_XOR:
600                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
601                                 known |= BIT(c.r.rd);
602                                 v[c.r.rd] = v[c.r.rt] ^ v[c.r.rs];
603                         } else {
604                                 known &= ~BIT(c.r.rd);
605                         }
606                         break;
607                 case OP_SPECIAL_NOR:
608                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
609                                 known |= BIT(c.r.rd);
610                                 v[c.r.rd] = ~(v[c.r.rt] | v[c.r.rs]);
611                         } else {
612                                 known &= ~BIT(c.r.rd);
613                         }
614                         break;
615                 case OP_SPECIAL_SLT:
616                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
617                                 known |= BIT(c.r.rd);
618                                 v[c.r.rd] = (s32)v[c.r.rs] < (s32)v[c.r.rt];
619                         } else {
620                                 known &= ~BIT(c.r.rd);
621                         }
622                         break;
623                 case OP_SPECIAL_SLTU:
624                         if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
625                                 known |= BIT(c.r.rd);
626                                 v[c.r.rd] = v[c.r.rs] < v[c.r.rt];
627                         } else {
628                                 known &= ~BIT(c.r.rd);
629                         }
630                         break;
631                 case OP_SPECIAL_MULT:
632                 case OP_SPECIAL_MULTU:
633                 case OP_SPECIAL_DIV:
634                 case OP_SPECIAL_DIVU:
635                         if (OPT_FLAG_MULT_DIV && c.r.rd)
636                                 known &= ~BIT(c.r.rd);
637                         if (OPT_FLAG_MULT_DIV && c.r.imm)
638                                 known &= ~BIT(c.r.imm);
639                         break;
640                 case OP_SPECIAL_MFLO:
641                 case OP_SPECIAL_MFHI:
642                         known &= ~BIT(c.r.rd);
643                         break;
644                 default:
645                         break;
646                 }
647                 break;
648         case OP_META_MULT2:
649         case OP_META_MULTU2:
650                 if (OPT_FLAG_MULT_DIV && (known & BIT(c.r.rs))) {
651                         if (c.r.rd) {
652                                 known |= BIT(c.r.rd);
653
654                                 if (c.r.op < 32)
655                                         v[c.r.rd] = v[c.r.rs] << c.r.op;
656                                 else
657                                         v[c.r.rd] = 0;
658                         }
659
660                         if (c.r.imm) {
661                                 known |= BIT(c.r.imm);
662
663                                 if (c.r.op >= 32)
664                                         v[c.r.imm] = v[c.r.rs] << (c.r.op - 32);
665                                 else if (c.i.op == OP_META_MULT2)
666                                         v[c.r.imm] = (s32) v[c.r.rs] >> (32 - c.r.op);
667                                 else
668                                         v[c.r.imm] = v[c.r.rs] >> (32 - c.r.op);
669                         }
670                 } else {
671                         if (OPT_FLAG_MULT_DIV && c.r.rd)
672                                 known &= ~BIT(c.r.rd);
673                         if (OPT_FLAG_MULT_DIV && c.r.imm)
674                                 known &= ~BIT(c.r.imm);
675                 }
676                 break;
677         case OP_REGIMM:
678                 break;
679         case OP_ADDI:
680         case OP_ADDIU:
681                 if (known & BIT(c.i.rs)) {
682                         known |= BIT(c.i.rt);
683                         v[c.i.rt] = v[c.i.rs] + (s32)(s16)c.i.imm;
684                 } else {
685                         known &= ~BIT(c.i.rt);
686                 }
687                 break;
688         case OP_SLTI:
689                 if (known & BIT(c.i.rs)) {
690                         known |= BIT(c.i.rt);
691                         v[c.i.rt] = (s32)v[c.i.rs] < (s32)(s16)c.i.imm;
692                 } else {
693                         known &= ~BIT(c.i.rt);
694                 }
695                 break;
696         case OP_SLTIU:
697                 if (known & BIT(c.i.rs)) {
698                         known |= BIT(c.i.rt);
699                         v[c.i.rt] = v[c.i.rs] < (u32)(s32)(s16)c.i.imm;
700                 } else {
701                         known &= ~BIT(c.i.rt);
702                 }
703                 break;
704         case OP_ANDI:
705                 if (known & BIT(c.i.rs)) {
706                         known |= BIT(c.i.rt);
707                         v[c.i.rt] = v[c.i.rs] & c.i.imm;
708                 } else {
709                         known &= ~BIT(c.i.rt);
710                 }
711                 break;
712         case OP_ORI:
713                 if (known & BIT(c.i.rs)) {
714                         known |= BIT(c.i.rt);
715                         v[c.i.rt] = v[c.i.rs] | c.i.imm;
716                 } else {
717                         known &= ~BIT(c.i.rt);
718                 }
719                 break;
720         case OP_XORI:
721                 if (known & BIT(c.i.rs)) {
722                         known |= BIT(c.i.rt);
723                         v[c.i.rt] = v[c.i.rs] ^ c.i.imm;
724                 } else {
725                         known &= ~BIT(c.i.rt);
726                 }
727                 break;
728         case OP_LUI:
729                 known |= BIT(c.i.rt);
730                 v[c.i.rt] = c.i.imm << 16;
731                 break;
732         case OP_CP0:
733                 switch (c.r.rs) {
734                 case OP_CP0_MFC0:
735                 case OP_CP0_CFC0:
736                         known &= ~BIT(c.r.rt);
737                         break;
738                 }
739                 break;
740         case OP_CP2:
741                 if (c.r.op == OP_CP2_BASIC) {
742                         switch (c.r.rs) {
743                         case OP_CP2_BASIC_MFC2:
744                         case OP_CP2_BASIC_CFC2:
745                                 known &= ~BIT(c.r.rt);
746                                 break;
747                         }
748                 }
749                 break;
750         case OP_LB:
751         case OP_LH:
752         case OP_LWL:
753         case OP_LW:
754         case OP_LBU:
755         case OP_LHU:
756         case OP_LWR:
757         case OP_LWC2:
758                 known &= ~BIT(c.i.rt);
759                 break;
760         case OP_META_MOV:
761                 if (known & BIT(c.r.rs)) {
762                         known |= BIT(c.r.rd);
763                         v[c.r.rd] = v[c.r.rs];
764                 } else {
765                         known &= ~BIT(c.r.rd);
766                 }
767                 break;
768         case OP_META_EXTC:
769                 if (known & BIT(c.i.rs)) {
770                         known |= BIT(c.i.rt);
771                         v[c.i.rt] = (s32)(s8)v[c.i.rs];
772                 } else {
773                         known &= ~BIT(c.i.rt);
774                 }
775                 break;
776         case OP_META_EXTS:
777                 if (known & BIT(c.i.rs)) {
778                         known |= BIT(c.i.rt);
779                         v[c.i.rt] = (s32)(s16)v[c.i.rs];
780                 } else {
781                         known &= ~BIT(c.i.rt);
782                 }
783                 break;
784         default:
785                 break;
786         }
787
788         return known;
789 }
790
791 static void lightrec_optimize_sll_sra(struct opcode *list, unsigned int offset)
792 {
793         struct opcode *prev, *prev2 = NULL, *curr = &list[offset];
794         struct opcode *to_change, *to_nop;
795         int idx, idx2;
796
797         if (curr->r.imm != 24 && curr->r.imm != 16)
798                 return;
799
800         idx = find_prev_writer(list, offset, curr->r.rt);
801         if (idx < 0)
802                 return;
803
804         prev = &list[idx];
805
806         if (prev->i.op != OP_SPECIAL || prev->r.op != OP_SPECIAL_SLL ||
807             prev->r.imm != curr->r.imm || prev->r.rd != curr->r.rt)
808                 return;
809
810         if (prev->r.rd != prev->r.rt && curr->r.rd != curr->r.rt) {
811                 /* sll rY, rX, 16
812                  * ...
813                  * srl rZ, rY, 16 */
814
815                 if (!reg_is_dead(list, offset, curr->r.rt) ||
816                     reg_is_read_or_written(list, idx, offset, curr->r.rd))
817                         return;
818
819                 /* If rY is dead after the SRL, and rZ is not used after the SLL,
820                  * we can change rY to rZ */
821
822                 pr_debug("Detected SLL/SRA with middle temp register\n");
823                 prev->r.rd = curr->r.rd;
824                 curr->r.rt = prev->r.rd;
825         }
826
827         /* We got a SLL/SRA combo. If imm #16, that's a cast to u16.
828          * If imm #24 that's a cast to u8.
829          *
830          * First of all, make sure that the target register of the SLL is not
831          * read before the SRA. */
832
833         if (prev->r.rd == prev->r.rt) {
834                 /* sll rX, rX, 16
835                  * ...
836                  * srl rY, rX, 16 */
837                 to_change = curr;
838                 to_nop = prev;
839
840                 /* rX is used after the SRA - we cannot convert it. */
841                 if (prev->r.rd != curr->r.rd && !reg_is_dead(list, offset, prev->r.rd))
842                         return;
843         } else {
844                 /* sll rY, rX, 16
845                  * ...
846                  * srl rY, rY, 16 */
847                 to_change = prev;
848                 to_nop = curr;
849         }
850
851         idx2 = find_prev_writer(list, idx, prev->r.rt);
852         if (idx2 >= 0) {
853                 /* Note that PSX games sometimes do casts after
854                  * a LHU or LBU; in this case we can change the
855                  * load opcode to a LH or LB, and the cast can
856                  * be changed to a MOV or a simple NOP. */
857
858                 prev2 = &list[idx2];
859
860                 if (curr->r.rd != prev2->i.rt &&
861                     !reg_is_dead(list, offset, prev2->i.rt))
862                         prev2 = NULL;
863                 else if (curr->r.imm == 16 && prev2->i.op == OP_LHU)
864                         prev2->i.op = OP_LH;
865                 else if (curr->r.imm == 24 && prev2->i.op == OP_LBU)
866                         prev2->i.op = OP_LB;
867                 else
868                         prev2 = NULL;
869
870                 if (prev2) {
871                         if (curr->r.rd == prev2->i.rt) {
872                                 to_change->opcode = 0;
873                         } else if (reg_is_dead(list, offset, prev2->i.rt) &&
874                                    !reg_is_read_or_written(list, idx2 + 1, offset, curr->r.rd)) {
875                                 /* The target register of the SRA is dead after the
876                                  * LBU/LHU; we can change the target register of the
877                                  * LBU/LHU to the one of the SRA. */
878                                 prev2->i.rt = curr->r.rd;
879                                 to_change->opcode = 0;
880                         } else {
881                                 to_change->i.op = OP_META_MOV;
882                                 to_change->r.rd = curr->r.rd;
883                                 to_change->r.rs = prev2->i.rt;
884                         }
885
886                         if (to_nop->r.imm == 24)
887                                 pr_debug("Convert LBU+SLL+SRA to LB\n");
888                         else
889                                 pr_debug("Convert LHU+SLL+SRA to LH\n");
890                 }
891         }
892
893         if (!prev2) {
894                 pr_debug("Convert SLL/SRA #%u to EXT%c\n",
895                          prev->r.imm,
896                          prev->r.imm == 24 ? 'C' : 'S');
897
898                 if (to_change == prev) {
899                         to_change->i.rs = prev->r.rt;
900                         to_change->i.rt = curr->r.rd;
901                 } else {
902                         to_change->i.rt = curr->r.rd;
903                         to_change->i.rs = prev->r.rt;
904                 }
905
906                 if (to_nop->r.imm == 24)
907                         to_change->i.op = OP_META_EXTC;
908                 else
909                         to_change->i.op = OP_META_EXTS;
910         }
911
912         to_nop->opcode = 0;
913 }
914
915 static void lightrec_remove_useless_lui(struct block *block, unsigned int offset,
916                                         u32 known, u32 *values)
917 {
918         struct opcode *list = block->opcode_list,
919                       *op = &block->opcode_list[offset];
920         int reader;
921
922         if (!op_flag_sync(op->flags) && (known & BIT(op->i.rt)) &&
923             values[op->i.rt] == op->i.imm << 16) {
924                 pr_debug("Converting duplicated LUI to NOP\n");
925                 op->opcode = 0x0;
926                 return;
927         }
928
929         if (op->i.imm != 0 || op->i.rt == 0)
930                 return;
931
932         reader = find_next_reader(list, offset + 1, op->i.rt);
933         if (reader <= 0)
934                 return;
935
936         if (opcode_writes_register(list[reader].c, op->i.rt) ||
937             reg_is_dead(list, reader, op->i.rt)) {
938                 pr_debug("Removing useless LUI 0x0\n");
939
940                 if (list[reader].i.rs == op->i.rt)
941                         list[reader].i.rs = 0;
942                 if (list[reader].i.op == OP_SPECIAL &&
943                     list[reader].i.rt == op->i.rt)
944                         list[reader].i.rt = 0;
945                 op->opcode = 0x0;
946         }
947 }
948
949 static void lightrec_modify_lui(struct block *block, unsigned int offset)
950 {
951         union code c, *lui = &block->opcode_list[offset].c;
952         bool stop = false, stop_next = false;
953         unsigned int i;
954
955         for (i = offset + 1; !stop && i < block->nb_ops; i++) {
956                 c = block->opcode_list[i].c;
957                 stop = stop_next;
958
959                 if ((opcode_is_store(c) && c.i.rt == lui->i.rt)
960                     || (!opcode_is_load(c) && opcode_reads_register(c, lui->i.rt)))
961                         break;
962
963                 if (opcode_writes_register(c, lui->i.rt)) {
964                         pr_debug("Convert LUI at offset 0x%x to kuseg\n",
965                                  i - 1 << 2);
966                         lui->i.imm = kunseg(lui->i.imm << 16) >> 16;
967                         break;
968                 }
969
970                 if (has_delay_slot(c))
971                         stop_next = true;
972         }
973 }
974
975 static int lightrec_transform_branches(struct lightrec_state *state,
976                                        struct block *block)
977 {
978         struct opcode *op;
979         unsigned int i;
980         s32 offset;
981
982         for (i = 0; i < block->nb_ops; i++) {
983                 op = &block->opcode_list[i];
984
985                 switch (op->i.op) {
986                 case OP_J:
987                         /* Transform J opcode into BEQ $zero, $zero if possible. */
988                         offset = (s32)((block->pc & 0xf0000000) >> 2 | op->j.imm)
989                                 - (s32)(block->pc >> 2) - (s32)i - 1;
990
991                         if (offset == (s16)offset) {
992                                 pr_debug("Transform J into BEQ $zero, $zero\n");
993                                 op->i.op = OP_BEQ;
994                                 op->i.rs = 0;
995                                 op->i.rt = 0;
996                                 op->i.imm = offset;
997
998                         }
999                         fallthrough;
1000                 default:
1001                         break;
1002                 }
1003         }
1004
1005         return 0;
1006 }
1007
1008 static inline bool is_power_of_two(u32 value)
1009 {
1010         return popcount32(value) == 1;
1011 }
1012
1013 static int lightrec_transform_ops(struct lightrec_state *state, struct block *block)
1014 {
1015         struct opcode *list = block->opcode_list;
1016         struct opcode *prev, *op = NULL;
1017         u32 known = BIT(0);
1018         u32 values[32] = { 0 };
1019         unsigned int i;
1020         u8 tmp;
1021
1022         for (i = 0; i < block->nb_ops; i++) {
1023                 prev = op;
1024                 op = &list[i];
1025
1026                 if (prev)
1027                         known = lightrec_propagate_consts(op, prev, known, values);
1028
1029                 /* Transform all opcodes detected as useless to real NOPs
1030                  * (0x0: SLL r0, r0, #0) */
1031                 if (op->opcode != 0 && is_nop(op->c)) {
1032                         pr_debug("Converting useless opcode 0x%08x to NOP\n",
1033                                         op->opcode);
1034                         op->opcode = 0x0;
1035                 }
1036
1037                 if (!op->opcode)
1038                         continue;
1039
1040                 switch (op->i.op) {
1041                 case OP_BEQ:
1042                         if (op->i.rs == op->i.rt) {
1043                                 op->i.rs = 0;
1044                                 op->i.rt = 0;
1045                         } else if (op->i.rs == 0) {
1046                                 op->i.rs = op->i.rt;
1047                                 op->i.rt = 0;
1048                         }
1049                         break;
1050
1051                 case OP_BNE:
1052                         if (op->i.rs == 0) {
1053                                 op->i.rs = op->i.rt;
1054                                 op->i.rt = 0;
1055                         }
1056                         break;
1057
1058                 case OP_LUI:
1059                         if (!prev || !has_delay_slot(prev->c))
1060                                 lightrec_modify_lui(block, i);
1061                         lightrec_remove_useless_lui(block, i, known, values);
1062                         break;
1063
1064                 /* Transform ORI/ADDI/ADDIU with imm #0 or ORR/ADD/ADDU/SUB/SUBU
1065                  * with register $zero to the MOV meta-opcode */
1066                 case OP_ORI:
1067                 case OP_ADDI:
1068                 case OP_ADDIU:
1069                         if (op->i.imm == 0) {
1070                                 pr_debug("Convert ORI/ADDI/ADDIU #0 to MOV\n");
1071                                 op->i.op = OP_META_MOV;
1072                                 op->r.rd = op->i.rt;
1073                         }
1074                         break;
1075                 case OP_SPECIAL:
1076                         switch (op->r.op) {
1077                         case OP_SPECIAL_SRA:
1078                                 if (op->r.imm == 0) {
1079                                         pr_debug("Convert SRA #0 to MOV\n");
1080                                         op->i.op = OP_META_MOV;
1081                                         op->r.rs = op->r.rt;
1082                                         break;
1083                                 }
1084
1085                                 lightrec_optimize_sll_sra(block->opcode_list, i);
1086                                 break;
1087                         case OP_SPECIAL_SLL:
1088                         case OP_SPECIAL_SRL:
1089                                 if (op->r.imm == 0) {
1090                                         pr_debug("Convert SLL/SRL #0 to MOV\n");
1091                                         op->i.op = OP_META_MOV;
1092                                         op->r.rs = op->r.rt;
1093                                 }
1094                                 break;
1095                         case OP_SPECIAL_MULT:
1096                         case OP_SPECIAL_MULTU:
1097                                 if ((known & BIT(op->r.rs)) &&
1098                                     is_power_of_two(values[op->r.rs])) {
1099                                         tmp = op->c.i.rs;
1100                                         op->c.i.rs = op->c.i.rt;
1101                                         op->c.i.rt = tmp;
1102                                 } else if (!(known & BIT(op->r.rt)) ||
1103                                            !is_power_of_two(values[op->r.rt])) {
1104                                         break;
1105                                 }
1106
1107                                 pr_debug("Multiply by power-of-two: %u\n",
1108                                          values[op->r.rt]);
1109
1110                                 if (op->r.op == OP_SPECIAL_MULT)
1111                                         op->i.op = OP_META_MULT2;
1112                                 else
1113                                         op->i.op = OP_META_MULTU2;
1114
1115                                 op->r.op = ffs32(values[op->r.rt]);
1116                                 break;
1117                         case OP_SPECIAL_OR:
1118                         case OP_SPECIAL_ADD:
1119                         case OP_SPECIAL_ADDU:
1120                                 if (op->r.rs == 0) {
1121                                         pr_debug("Convert OR/ADD $zero to MOV\n");
1122                                         op->i.op = OP_META_MOV;
1123                                         op->r.rs = op->r.rt;
1124                                 }
1125                                 fallthrough;
1126                         case OP_SPECIAL_SUB:
1127                         case OP_SPECIAL_SUBU:
1128                                 if (op->r.rt == 0) {
1129                                         pr_debug("Convert OR/ADD/SUB $zero to MOV\n");
1130                                         op->i.op = OP_META_MOV;
1131                                 }
1132                                 fallthrough;
1133                         default:
1134                                 break;
1135                         }
1136                         fallthrough;
1137                 default:
1138                         break;
1139                 }
1140         }
1141
1142         return 0;
1143 }
1144
1145 static bool lightrec_can_switch_delay_slot(union code op, union code next_op)
1146 {
1147         switch (op.i.op) {
1148         case OP_SPECIAL:
1149                 switch (op.r.op) {
1150                 case OP_SPECIAL_JALR:
1151                         if (opcode_reads_register(next_op, op.r.rd) ||
1152                             opcode_writes_register(next_op, op.r.rd))
1153                                 return false;
1154                         fallthrough;
1155                 case OP_SPECIAL_JR:
1156                         if (opcode_writes_register(next_op, op.r.rs))
1157                                 return false;
1158                         fallthrough;
1159                 default:
1160                         break;
1161                 }
1162                 fallthrough;
1163         case OP_J:
1164                 break;
1165         case OP_JAL:
1166                 if (opcode_reads_register(next_op, 31) ||
1167                     opcode_writes_register(next_op, 31))
1168                         return false;;
1169
1170                 break;
1171         case OP_BEQ:
1172         case OP_BNE:
1173                 if (op.i.rt && opcode_writes_register(next_op, op.i.rt))
1174                         return false;
1175                 fallthrough;
1176         case OP_BLEZ:
1177         case OP_BGTZ:
1178                 if (op.i.rs && opcode_writes_register(next_op, op.i.rs))
1179                         return false;
1180                 break;
1181         case OP_REGIMM:
1182                 switch (op.r.rt) {
1183                 case OP_REGIMM_BLTZAL:
1184                 case OP_REGIMM_BGEZAL:
1185                         if (opcode_reads_register(next_op, 31) ||
1186                             opcode_writes_register(next_op, 31))
1187                                 return false;
1188                         fallthrough;
1189                 case OP_REGIMM_BLTZ:
1190                 case OP_REGIMM_BGEZ:
1191                         if (op.i.rs && opcode_writes_register(next_op, op.i.rs))
1192                                 return false;
1193                         break;
1194                 }
1195                 fallthrough;
1196         default:
1197                 break;
1198         }
1199
1200         return true;
1201 }
1202
1203 static int lightrec_switch_delay_slots(struct lightrec_state *state, struct block *block)
1204 {
1205         struct opcode *list, *next = &block->opcode_list[0];
1206         unsigned int i;
1207         union code op, next_op;
1208         u32 flags;
1209
1210         for (i = 0; i < block->nb_ops - 1; i++) {
1211                 list = next;
1212                 next = &block->opcode_list[i + 1];
1213                 next_op = next->c;
1214                 op = list->c;
1215
1216                 if (!has_delay_slot(op) || op_flag_no_ds(list->flags) ||
1217                     op_flag_emulate_branch(list->flags) ||
1218                     op.opcode == 0 || next_op.opcode == 0)
1219                         continue;
1220
1221                 if (i && has_delay_slot(block->opcode_list[i - 1].c) &&
1222                     !op_flag_no_ds(block->opcode_list[i - 1].flags))
1223                         continue;
1224
1225                 if (op_flag_sync(next->flags))
1226                         continue;
1227
1228                 if (!lightrec_can_switch_delay_slot(list->c, next_op))
1229                         continue;
1230
1231                 pr_debug("Swap branch and delay slot opcodes "
1232                          "at offsets 0x%x / 0x%x\n",
1233                          i << 2, (i + 1) << 2);
1234
1235                 flags = next->flags | (list->flags & LIGHTREC_SYNC);
1236                 list->c = next_op;
1237                 next->c = op;
1238                 next->flags = (list->flags | LIGHTREC_NO_DS) & ~LIGHTREC_SYNC;
1239                 list->flags = flags | LIGHTREC_NO_DS;
1240         }
1241
1242         return 0;
1243 }
1244
1245 static int shrink_opcode_list(struct lightrec_state *state, struct block *block, u16 new_size)
1246 {
1247         struct opcode_list *list, *old_list;
1248
1249         if (new_size >= block->nb_ops) {
1250                 pr_err("Invalid shrink size (%u vs %u)\n",
1251                        new_size, block->nb_ops);
1252                 return -EINVAL;
1253         }
1254
1255         list = lightrec_malloc(state, MEM_FOR_IR,
1256                                sizeof(*list) + sizeof(struct opcode) * new_size);
1257         if (!list) {
1258                 pr_err("Unable to allocate memory\n");
1259                 return -ENOMEM;
1260         }
1261
1262         old_list = container_of(block->opcode_list, struct opcode_list, ops);
1263         memcpy(list->ops, old_list->ops, sizeof(struct opcode) * new_size);
1264
1265         lightrec_free_opcode_list(state, block->opcode_list);
1266         list->nb_ops = new_size;
1267         block->nb_ops = new_size;
1268         block->opcode_list = list->ops;
1269
1270         pr_debug("Shrunk opcode list of block PC 0x%08x to %u opcodes\n",
1271                  block->pc, new_size);
1272
1273         return 0;
1274 }
1275
1276 static int lightrec_detect_impossible_branches(struct lightrec_state *state,
1277                                                struct block *block)
1278 {
1279         struct opcode *op, *list = block->opcode_list, *next = &list[0];
1280         unsigned int i;
1281         int ret = 0;
1282         s16 offset;
1283
1284         for (i = 0; i < block->nb_ops - 1; i++) {
1285                 op = next;
1286                 next = &list[i + 1];
1287
1288                 if (!has_delay_slot(op->c) ||
1289                     (!load_in_delay_slot(next->c) &&
1290                      !has_delay_slot(next->c) &&
1291                      !(next->i.op == OP_CP0 && next->r.rs == OP_CP0_RFE)))
1292                         continue;
1293
1294                 if (op->c.opcode == next->c.opcode) {
1295                         /* The delay slot is the exact same opcode as the branch
1296                          * opcode: this is effectively a NOP */
1297                         next->c.opcode = 0;
1298                         continue;
1299                 }
1300
1301                 offset = i + 1 + (s16)op->i.imm;
1302                 if (load_in_delay_slot(next->c) &&
1303                     (offset >= 0 && offset < block->nb_ops) &&
1304                     !opcode_reads_register(list[offset].c, next->c.i.rt)) {
1305                         /* The 'impossible' branch is a local branch - we can
1306                          * verify here that the first opcode of the target does
1307                          * not use the target register of the delay slot */
1308
1309                         pr_debug("Branch at offset 0x%x has load delay slot, "
1310                                  "but is local and dest opcode does not read "
1311                                  "dest register\n", i << 2);
1312                         continue;
1313                 }
1314
1315                 op->flags |= LIGHTREC_EMULATE_BRANCH;
1316
1317                 if (op == list) {
1318                         pr_debug("First opcode of block PC 0x%08x is an impossible branch\n",
1319                                  block->pc);
1320
1321                         /* If the first opcode is an 'impossible' branch, we
1322                          * only keep the first two opcodes of the block (the
1323                          * branch itself + its delay slot) */
1324                         if (block->nb_ops > 2)
1325                                 ret = shrink_opcode_list(state, block, 2);
1326                         break;
1327                 }
1328         }
1329
1330         return ret;
1331 }
1332
1333 static int lightrec_local_branches(struct lightrec_state *state, struct block *block)
1334 {
1335         struct opcode *list;
1336         unsigned int i;
1337         s32 offset;
1338
1339         for (i = 0; i < block->nb_ops; i++) {
1340                 list = &block->opcode_list[i];
1341
1342                 if (should_emulate(list))
1343                         continue;
1344
1345                 switch (list->i.op) {
1346                 case OP_BEQ:
1347                 case OP_BNE:
1348                 case OP_BLEZ:
1349                 case OP_BGTZ:
1350                 case OP_REGIMM:
1351                         offset = i + 1 + (s16)list->i.imm;
1352                         if (offset >= 0 && offset < block->nb_ops)
1353                                 break;
1354                         fallthrough;
1355                 default:
1356                         continue;
1357                 }
1358
1359                 pr_debug("Found local branch to offset 0x%x\n", offset << 2);
1360
1361                 if (should_emulate(&block->opcode_list[offset])) {
1362                         pr_debug("Branch target must be emulated - skip\n");
1363                         continue;
1364                 }
1365
1366                 if (offset && has_delay_slot(block->opcode_list[offset - 1].c)) {
1367                         pr_debug("Branch target is a delay slot - skip\n");
1368                         continue;
1369                 }
1370
1371                 pr_debug("Adding sync at offset 0x%x\n", offset << 2);
1372
1373                 block->opcode_list[offset].flags |= LIGHTREC_SYNC;
1374                 list->flags |= LIGHTREC_LOCAL_BRANCH;
1375         }
1376
1377         return 0;
1378 }
1379
1380 bool has_delay_slot(union code op)
1381 {
1382         switch (op.i.op) {
1383         case OP_SPECIAL:
1384                 switch (op.r.op) {
1385                 case OP_SPECIAL_JR:
1386                 case OP_SPECIAL_JALR:
1387                         return true;
1388                 default:
1389                         return false;
1390                 }
1391         case OP_J:
1392         case OP_JAL:
1393         case OP_BEQ:
1394         case OP_BNE:
1395         case OP_BLEZ:
1396         case OP_BGTZ:
1397         case OP_REGIMM:
1398                 return true;
1399         default:
1400                 return false;
1401         }
1402 }
1403
1404 bool should_emulate(const struct opcode *list)
1405 {
1406         return op_flag_emulate_branch(list->flags) && has_delay_slot(list->c);
1407 }
1408
1409 static bool op_writes_rd(union code c)
1410 {
1411         switch (c.i.op) {
1412         case OP_SPECIAL:
1413         case OP_META_MOV:
1414                 return true;
1415         default:
1416                 return false;
1417         }
1418 }
1419
1420 static void lightrec_add_reg_op(struct opcode *op, u8 reg, u32 reg_op)
1421 {
1422         if (op_writes_rd(op->c) && reg == op->r.rd)
1423                 op->flags |= LIGHTREC_REG_RD(reg_op);
1424         else if (op->i.rs == reg)
1425                 op->flags |= LIGHTREC_REG_RS(reg_op);
1426         else if (op->i.rt == reg)
1427                 op->flags |= LIGHTREC_REG_RT(reg_op);
1428         else
1429                 pr_debug("Cannot add unload/clean/discard flag: "
1430                          "opcode does not touch register %s!\n",
1431                          lightrec_reg_name(reg));
1432 }
1433
1434 static void lightrec_add_unload(struct opcode *op, u8 reg)
1435 {
1436         lightrec_add_reg_op(op, reg, LIGHTREC_REG_UNLOAD);
1437 }
1438
1439 static void lightrec_add_discard(struct opcode *op, u8 reg)
1440 {
1441         lightrec_add_reg_op(op, reg, LIGHTREC_REG_DISCARD);
1442 }
1443
1444 static void lightrec_add_clean(struct opcode *op, u8 reg)
1445 {
1446         lightrec_add_reg_op(op, reg, LIGHTREC_REG_CLEAN);
1447 }
1448
1449 static void
1450 lightrec_early_unload_sync(struct opcode *list, s16 *last_r, s16 *last_w)
1451 {
1452         unsigned int reg;
1453         s16 offset;
1454
1455         for (reg = 0; reg < 34; reg++) {
1456                 offset = s16_max(last_w[reg], last_r[reg]);
1457
1458                 if (offset >= 0)
1459                         lightrec_add_unload(&list[offset], reg);
1460         }
1461
1462         memset(last_r, 0xff, sizeof(*last_r) * 34);
1463         memset(last_w, 0xff, sizeof(*last_w) * 34);
1464 }
1465
1466 static int lightrec_early_unload(struct lightrec_state *state, struct block *block)
1467 {
1468         u16 i, offset;
1469         struct opcode *op;
1470         s16 last_r[34], last_w[34], last_sync = 0, next_sync = 0;
1471         u64 mask_r, mask_w, dirty = 0, loaded = 0;
1472         u8 reg;
1473
1474         memset(last_r, 0xff, sizeof(last_r));
1475         memset(last_w, 0xff, sizeof(last_w));
1476
1477         /*
1478          * Clean if:
1479          * - the register is dirty, and is read again after a branch opcode
1480          *
1481          * Unload if:
1482          * - the register is dirty or loaded, and is not read again
1483          * - the register is dirty or loaded, and is written again after a branch opcode
1484          * - the next opcode has the SYNC flag set
1485          *
1486          * Discard if:
1487          * - the register is dirty or loaded, and is written again
1488          */
1489
1490         for (i = 0; i < block->nb_ops; i++) {
1491                 op = &block->opcode_list[i];
1492
1493                 if (op_flag_sync(op->flags) || should_emulate(op)) {
1494                         /* The next opcode has the SYNC flag set, or is a branch
1495                          * that should be emulated: unload all registers. */
1496                         lightrec_early_unload_sync(block->opcode_list, last_r, last_w);
1497                         dirty = 0;
1498                         loaded = 0;
1499                 }
1500
1501                 if (next_sync == i) {
1502                         last_sync = i;
1503                         pr_debug("Last sync: 0x%x\n", last_sync << 2);
1504                 }
1505
1506                 if (has_delay_slot(op->c)) {
1507                         next_sync = i + 1 + !op_flag_no_ds(op->flags);
1508                         pr_debug("Next sync: 0x%x\n", next_sync << 2);
1509                 }
1510
1511                 mask_r = opcode_read_mask(op->c);
1512                 mask_w = opcode_write_mask(op->c);
1513
1514                 for (reg = 0; reg < 34; reg++) {
1515                         if (mask_r & BIT(reg)) {
1516                                 if (dirty & BIT(reg) && last_w[reg] < last_sync) {
1517                                         /* The register is dirty, and is read
1518                                          * again after a branch: clean it */
1519
1520                                         lightrec_add_clean(&block->opcode_list[last_w[reg]], reg);
1521                                         dirty &= ~BIT(reg);
1522                                         loaded |= BIT(reg);
1523                                 }
1524
1525                                 last_r[reg] = i;
1526                         }
1527
1528                         if (mask_w & BIT(reg)) {
1529                                 if ((dirty & BIT(reg) && last_w[reg] < last_sync) ||
1530                                     (loaded & BIT(reg) && last_r[reg] < last_sync)) {
1531                                         /* The register is dirty or loaded, and
1532                                          * is written again after a branch:
1533                                          * unload it */
1534
1535                                         offset = s16_max(last_w[reg], last_r[reg]);
1536                                         lightrec_add_unload(&block->opcode_list[offset], reg);
1537                                         dirty &= ~BIT(reg);
1538                                         loaded &= ~BIT(reg);
1539                                 } else if (!(mask_r & BIT(reg)) &&
1540                                            ((dirty & BIT(reg) && last_w[reg] > last_sync) ||
1541                                            (loaded & BIT(reg) && last_r[reg] > last_sync))) {
1542                                         /* The register is dirty or loaded, and
1543                                          * is written again: discard it */
1544
1545                                         offset = s16_max(last_w[reg], last_r[reg]);
1546                                         lightrec_add_discard(&block->opcode_list[offset], reg);
1547                                         dirty &= ~BIT(reg);
1548                                         loaded &= ~BIT(reg);
1549                                 }
1550
1551                                 last_w[reg] = i;
1552                         }
1553
1554                 }
1555
1556                 dirty |= mask_w;
1557                 loaded |= mask_r;
1558         }
1559
1560         /* Unload all registers that are dirty or loaded at the end of block. */
1561         lightrec_early_unload_sync(block->opcode_list, last_r, last_w);
1562
1563         return 0;
1564 }
1565
1566 static int lightrec_flag_io(struct lightrec_state *state, struct block *block)
1567 {
1568         struct opcode *prev = NULL, *list = NULL;
1569         enum psx_map psx_map;
1570         u32 known = BIT(0);
1571         u32 values[32] = { 0 };
1572         unsigned int i;
1573         u32 val, kunseg_val;
1574         bool no_mask;
1575
1576         for (i = 0; i < block->nb_ops; i++) {
1577                 prev = list;
1578                 list = &block->opcode_list[i];
1579
1580                 if (prev)
1581                         known = lightrec_propagate_consts(list, prev, known, values);
1582
1583                 switch (list->i.op) {
1584                 case OP_SB:
1585                 case OP_SH:
1586                 case OP_SW:
1587                         if (OPT_FLAG_STORES) {
1588                                 /* Mark all store operations that target $sp or $gp
1589                                  * as not requiring code invalidation. This is based
1590                                  * on the heuristic that stores using one of these
1591                                  * registers as address will never hit a code page. */
1592                                 if (list->i.rs >= 28 && list->i.rs <= 29 &&
1593                                     !state->maps[PSX_MAP_KERNEL_USER_RAM].ops) {
1594                                         pr_debug("Flaging opcode 0x%08x as not "
1595                                                  "requiring invalidation\n",
1596                                                  list->opcode);
1597                                         list->flags |= LIGHTREC_NO_INVALIDATE;
1598                                         list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT);
1599                                 }
1600
1601                                 /* Detect writes whose destination address is inside the
1602                                  * current block, using constant propagation. When these
1603                                  * occur, we mark the blocks as not compilable. */
1604                                 if ((known & BIT(list->i.rs)) &&
1605                                     kunseg(values[list->i.rs]) >= kunseg(block->pc) &&
1606                                     kunseg(values[list->i.rs]) < (kunseg(block->pc) +
1607                                                                   block->nb_ops * 4)) {
1608                                         pr_debug("Self-modifying block detected\n");
1609                                         block_set_flags(block, BLOCK_NEVER_COMPILE);
1610                                         list->flags |= LIGHTREC_SMC;
1611                                 }
1612                         }
1613                         fallthrough;
1614                 case OP_SWL:
1615                 case OP_SWR:
1616                 case OP_SWC2:
1617                 case OP_LB:
1618                 case OP_LBU:
1619                 case OP_LH:
1620                 case OP_LHU:
1621                 case OP_LW:
1622                 case OP_LWL:
1623                 case OP_LWR:
1624                 case OP_LWC2:
1625                         if (OPT_FLAG_IO && (known & BIT(list->i.rs))) {
1626                                 val = values[list->i.rs] + (s16) list->i.imm;
1627                                 kunseg_val = kunseg(val);
1628                                 psx_map = lightrec_get_map_idx(state, kunseg_val);
1629
1630                                 list->flags &= ~LIGHTREC_IO_MASK;
1631                                 no_mask = val == kunseg_val;
1632
1633                                 switch (psx_map) {
1634                                 case PSX_MAP_KERNEL_USER_RAM:
1635                                         if (no_mask)
1636                                                 list->flags |= LIGHTREC_NO_MASK;
1637                                         fallthrough;
1638                                 case PSX_MAP_MIRROR1:
1639                                 case PSX_MAP_MIRROR2:
1640                                 case PSX_MAP_MIRROR3:
1641                                         pr_debug("Flaging opcode %u as RAM access\n", i);
1642                                         list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_RAM);
1643                                         if (no_mask && state->mirrors_mapped)
1644                                                 list->flags |= LIGHTREC_NO_MASK;
1645                                         break;
1646                                 case PSX_MAP_BIOS:
1647                                         pr_debug("Flaging opcode %u as BIOS access\n", i);
1648                                         list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_BIOS);
1649                                         if (no_mask)
1650                                                 list->flags |= LIGHTREC_NO_MASK;
1651                                         break;
1652                                 case PSX_MAP_SCRATCH_PAD:
1653                                         pr_debug("Flaging opcode %u as scratchpad access\n", i);
1654                                         list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_SCRATCH);
1655                                         if (no_mask)
1656                                                 list->flags |= LIGHTREC_NO_MASK;
1657
1658                                         /* Consider that we're never going to run code from
1659                                          * the scratchpad. */
1660                                         list->flags |= LIGHTREC_NO_INVALIDATE;
1661                                         break;
1662                                 case PSX_MAP_HW_REGISTERS:
1663                                         if (state->ops.hw_direct &&
1664                                             state->ops.hw_direct(kunseg_val,
1665                                                                  opcode_is_store(list->c),
1666                                                                  opcode_get_io_size(list->c))) {
1667                                                 pr_debug("Flagging opcode %u as direct I/O access\n",
1668                                                          i);
1669                                                 list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT_HW);
1670
1671                                                 if (no_mask)
1672                                                         list->flags |= LIGHTREC_NO_MASK;
1673                                                 break;
1674                                         }
1675                                         fallthrough;
1676                                 default:
1677                                         pr_debug("Flagging opcode %u as I/O access\n",
1678                                                  i);
1679                                         list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW);
1680                                         break;
1681                                 }
1682                         }
1683                         fallthrough;
1684                 default:
1685                         break;
1686                 }
1687         }
1688
1689         return 0;
1690 }
1691
1692 static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset,
1693                             const struct opcode *last,
1694                             u32 mask, bool sync, bool mflo, bool another)
1695 {
1696         const struct opcode *op, *next = &block->opcode_list[offset];
1697         u32 old_mask;
1698         u8 reg2, reg = mflo ? REG_LO : REG_HI;
1699         u16 branch_offset;
1700         unsigned int i;
1701
1702         for (i = offset; i < block->nb_ops; i++) {
1703                 op = next;
1704                 next = &block->opcode_list[i + 1];
1705                 old_mask = mask;
1706
1707                 /* If any other opcode writes or reads to the register
1708                  * we'd use, then we cannot use it anymore. */
1709                 mask |= opcode_read_mask(op->c);
1710                 mask |= opcode_write_mask(op->c);
1711
1712                 if (op_flag_sync(op->flags))
1713                         sync = true;
1714
1715                 switch (op->i.op) {
1716                 case OP_BEQ:
1717                 case OP_BNE:
1718                 case OP_BLEZ:
1719                 case OP_BGTZ:
1720                 case OP_REGIMM:
1721                         /* TODO: handle backwards branches too */
1722                         if (!last && op_flag_local_branch(op->flags) &&
1723                             (s16)op->c.i.imm >= 0) {
1724                                 branch_offset = i + 1 + (s16)op->c.i.imm
1725                                         - !!op_flag_no_ds(op->flags);
1726
1727                                 reg = get_mfhi_mflo_reg(block, branch_offset, NULL,
1728                                                         mask, sync, mflo, false);
1729                                 reg2 = get_mfhi_mflo_reg(block, offset + 1, next,
1730                                                          mask, sync, mflo, false);
1731                                 if (reg > 0 && reg == reg2)
1732                                         return reg;
1733                                 if (!reg && !reg2)
1734                                         return 0;
1735                         }
1736
1737                         return mflo ? REG_LO : REG_HI;
1738                 case OP_META_MULT2:
1739                 case OP_META_MULTU2:
1740                         return 0;
1741                 case OP_SPECIAL:
1742                         switch (op->r.op) {
1743                         case OP_SPECIAL_MULT:
1744                         case OP_SPECIAL_MULTU:
1745                         case OP_SPECIAL_DIV:
1746                         case OP_SPECIAL_DIVU:
1747                                 return 0;
1748                         case OP_SPECIAL_MTHI:
1749                                 if (!mflo)
1750                                         return 0;
1751                                 continue;
1752                         case OP_SPECIAL_MTLO:
1753                                 if (mflo)
1754                                         return 0;
1755                                 continue;
1756                         case OP_SPECIAL_JR:
1757                                 if (op->r.rs != 31)
1758                                         return reg;
1759
1760                                 if (!sync && !op_flag_no_ds(op->flags) &&
1761                                     (next->i.op == OP_SPECIAL) &&
1762                                     ((!mflo && next->r.op == OP_SPECIAL_MFHI) ||
1763                                     (mflo && next->r.op == OP_SPECIAL_MFLO)))
1764                                         return next->r.rd;
1765
1766                                 return 0;
1767                         case OP_SPECIAL_JALR:
1768                                 return reg;
1769                         case OP_SPECIAL_MFHI:
1770                                 if (!mflo) {
1771                                         if (another)
1772                                                 return op->r.rd;
1773                                         /* Must use REG_HI if there is another MFHI target*/
1774                                         reg2 = get_mfhi_mflo_reg(block, i + 1, next,
1775                                                          0, sync, mflo, true);
1776                                         if (reg2 > 0 && reg2 != REG_HI)
1777                                                 return REG_HI;
1778
1779                                         if (!sync && !(old_mask & BIT(op->r.rd)))
1780                                                 return op->r.rd;
1781                                         else
1782                                                 return REG_HI;
1783                                 }
1784                                 continue;
1785                         case OP_SPECIAL_MFLO:
1786                                 if (mflo) {
1787                                         if (another)
1788                                                 return op->r.rd;
1789                                         /* Must use REG_LO if there is another MFLO target*/
1790                                         reg2 = get_mfhi_mflo_reg(block, i + 1, next,
1791                                                          0, sync, mflo, true);
1792                                         if (reg2 > 0 && reg2 != REG_LO)
1793                                                 return REG_LO;
1794
1795                                         if (!sync && !(old_mask & BIT(op->r.rd)))
1796                                                 return op->r.rd;
1797                                         else
1798                                                 return REG_LO;
1799                                 }
1800                                 continue;
1801                         default:
1802                                 break;
1803                         }
1804
1805                         fallthrough;
1806                 default:
1807                         continue;
1808                 }
1809         }
1810
1811         return reg;
1812 }
1813
1814 static void lightrec_replace_lo_hi(struct block *block, u16 offset,
1815                                    u16 last, bool lo)
1816 {
1817         unsigned int i;
1818         u32 branch_offset;
1819
1820         /* This function will remove the following MFLO/MFHI. It must be called
1821          * only if get_mfhi_mflo_reg() returned a non-zero value. */
1822
1823         for (i = offset; i < last; i++) {
1824                 struct opcode *op = &block->opcode_list[i];
1825
1826                 switch (op->i.op) {
1827                 case OP_BEQ:
1828                 case OP_BNE:
1829                 case OP_BLEZ:
1830                 case OP_BGTZ:
1831                 case OP_REGIMM:
1832                         /* TODO: handle backwards branches too */
1833                         if (op_flag_local_branch(op->flags) && (s16)op->c.i.imm >= 0) {
1834                                 branch_offset = i + 1 + (s16)op->c.i.imm
1835                                         - !!op_flag_no_ds(op->flags);
1836
1837                                 lightrec_replace_lo_hi(block, branch_offset, last, lo);
1838                                 lightrec_replace_lo_hi(block, i + 1, branch_offset, lo);
1839                         }
1840                         break;
1841
1842                 case OP_SPECIAL:
1843                         if (lo && op->r.op == OP_SPECIAL_MFLO) {
1844                                 pr_debug("Removing MFLO opcode at offset 0x%x\n",
1845                                          i << 2);
1846                                 op->opcode = 0;
1847                                 return;
1848                         } else if (!lo && op->r.op == OP_SPECIAL_MFHI) {
1849                                 pr_debug("Removing MFHI opcode at offset 0x%x\n",
1850                                          i << 2);
1851                                 op->opcode = 0;
1852                                 return;
1853                         }
1854
1855                         fallthrough;
1856                 default:
1857                         break;
1858                 }
1859         }
1860 }
1861
1862 static bool lightrec_always_skip_div_check(void)
1863 {
1864 #ifdef __mips__
1865         return true;
1866 #else
1867         return false;
1868 #endif
1869 }
1870
1871 static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *block)
1872 {
1873         struct opcode *prev, *list = NULL;
1874         u8 reg_hi, reg_lo;
1875         unsigned int i;
1876         u32 known = BIT(0);
1877         u32 values[32] = { 0 };
1878
1879         for (i = 0; i < block->nb_ops - 1; i++) {
1880                 prev = list;
1881                 list = &block->opcode_list[i];
1882
1883                 if (prev)
1884                         known = lightrec_propagate_consts(list, prev, known, values);
1885
1886                 switch (list->i.op) {
1887                 case OP_SPECIAL:
1888                         switch (list->r.op) {
1889                         case OP_SPECIAL_DIV:
1890                         case OP_SPECIAL_DIVU:
1891                                 /* If we are dividing by a non-zero constant, don't
1892                                  * emit the div-by-zero check. */
1893                                 if (lightrec_always_skip_div_check() ||
1894                                     ((known & BIT(list->c.r.rt)) && values[list->c.r.rt]))
1895                                         list->flags |= LIGHTREC_NO_DIV_CHECK;
1896                                 fallthrough;
1897                         case OP_SPECIAL_MULT:
1898                         case OP_SPECIAL_MULTU:
1899                                 break;
1900                         default:
1901                                 continue;
1902                         }
1903                         fallthrough;
1904                 case OP_META_MULT2:
1905                 case OP_META_MULTU2:
1906                         break;
1907                 default:
1908                         continue;
1909                 }
1910
1911                 /* Don't support opcodes in delay slots */
1912                 if ((i && has_delay_slot(block->opcode_list[i - 1].c)) ||
1913                     op_flag_no_ds(list->flags)) {
1914                         continue;
1915                 }
1916
1917                 reg_lo = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, true, false);
1918                 if (reg_lo == 0) {
1919                         pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as"
1920                                  " not writing LO\n", i << 2);
1921                         list->flags |= LIGHTREC_NO_LO;
1922                 }
1923
1924                 reg_hi = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, false, false);
1925                 if (reg_hi == 0) {
1926                         pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as"
1927                                  " not writing HI\n", i << 2);
1928                         list->flags |= LIGHTREC_NO_HI;
1929                 }
1930
1931                 if (!reg_lo && !reg_hi) {
1932                         pr_debug("Both LO/HI unused in this block, they will "
1933                                  "probably be used in parent block - removing "
1934                                  "flags.\n");
1935                         list->flags &= ~(LIGHTREC_NO_LO | LIGHTREC_NO_HI);
1936                 }
1937
1938                 if (reg_lo > 0 && reg_lo != REG_LO) {
1939                         pr_debug("Found register %s to hold LO (rs = %u, rt = %u)\n",
1940                                  lightrec_reg_name(reg_lo), list->r.rs, list->r.rt);
1941
1942                         lightrec_replace_lo_hi(block, i + 1, block->nb_ops, true);
1943                         list->r.rd = reg_lo;
1944                 } else {
1945                         list->r.rd = 0;
1946                 }
1947
1948                 if (reg_hi > 0 && reg_hi != REG_HI) {
1949                         pr_debug("Found register %s to hold HI (rs = %u, rt = %u)\n",
1950                                  lightrec_reg_name(reg_hi), list->r.rs, list->r.rt);
1951
1952                         lightrec_replace_lo_hi(block, i + 1, block->nb_ops, false);
1953                         list->r.imm = reg_hi;
1954                 } else {
1955                         list->r.imm = 0;
1956                 }
1957         }
1958
1959         return 0;
1960 }
1961
1962 static bool remove_div_sequence(struct block *block, unsigned int offset)
1963 {
1964         struct opcode *op;
1965         unsigned int i, found = 0;
1966
1967         /*
1968          * Scan for the zero-checking sequence that GCC automatically introduced
1969          * after most DIV/DIVU opcodes. This sequence checks the value of the
1970          * divisor, and if zero, executes a BREAK opcode, causing the BIOS
1971          * handler to crash the PS1.
1972          *
1973          * For DIV opcodes, this sequence additionally checks that the signed
1974          * operation does not overflow.
1975          *
1976          * With the assumption that the games never crashed the PS1, we can
1977          * therefore assume that the games never divided by zero or overflowed,
1978          * and these sequences can be removed.
1979          */
1980
1981         for (i = offset; i < block->nb_ops; i++) {
1982                 op = &block->opcode_list[i];
1983
1984                 if (!found) {
1985                         if (op->i.op == OP_SPECIAL &&
1986                             (op->r.op == OP_SPECIAL_DIV || op->r.op == OP_SPECIAL_DIVU))
1987                                 break;
1988
1989                         if ((op->opcode & 0xfc1fffff) == 0x14000002) {
1990                                 /* BNE ???, zero, +8 */
1991                                 found++;
1992                         } else {
1993                                 offset++;
1994                         }
1995                 } else if (found == 1 && !op->opcode) {
1996                         /* NOP */
1997                         found++;
1998                 } else if (found == 2 && op->opcode == 0x0007000d) {
1999                         /* BREAK 0x1c00 */
2000                         found++;
2001                 } else if (found == 3 && op->opcode == 0x2401ffff) {
2002                         /* LI at, -1 */
2003                         found++;
2004                 } else if (found == 4 && (op->opcode & 0xfc1fffff) == 0x14010004) {
2005                         /* BNE ???, at, +16 */
2006                         found++;
2007                 } else if (found == 5 && op->opcode == 0x3c018000) {
2008                         /* LUI at, 0x8000 */
2009                         found++;
2010                 } else if (found == 6 && (op->opcode & 0x141fffff) == 0x14010002) {
2011                         /* BNE ???, at, +16 */
2012                         found++;
2013                 } else if (found == 7 && !op->opcode) {
2014                         /* NOP */
2015                         found++;
2016                 } else if (found == 8 && op->opcode == 0x0006000d) {
2017                         /* BREAK 0x1800 */
2018                         found++;
2019                         break;
2020                 } else {
2021                         break;
2022                 }
2023         }
2024
2025         if (found >= 3) {
2026                 if (found != 9)
2027                         found = 3;
2028
2029                 pr_debug("Removing DIV%s sequence at offset 0x%x\n",
2030                          found == 9 ? "" : "U", offset << 2);
2031
2032                 for (i = 0; i < found; i++)
2033                         block->opcode_list[offset + i].opcode = 0;
2034
2035                 return true;
2036         }
2037
2038         return false;
2039 }
2040
2041 static int lightrec_remove_div_by_zero_check_sequence(struct lightrec_state *state,
2042                                                       struct block *block)
2043 {
2044         struct opcode *op;
2045         unsigned int i;
2046
2047         for (i = 0; i < block->nb_ops; i++) {
2048                 op = &block->opcode_list[i];
2049
2050                 if (op->i.op == OP_SPECIAL &&
2051                     (op->r.op == OP_SPECIAL_DIVU || op->r.op == OP_SPECIAL_DIV) &&
2052                     remove_div_sequence(block, i + 1))
2053                         op->flags |= LIGHTREC_NO_DIV_CHECK;
2054         }
2055
2056         return 0;
2057 }
2058
2059 static const u32 memset_code[] = {
2060         0x10a00006,     // beqz         a1, 2f
2061         0x24a2ffff,     // addiu        v0,a1,-1
2062         0x2403ffff,     // li           v1,-1
2063         0xac800000,     // 1: sw        zero,0(a0)
2064         0x2442ffff,     // addiu        v0,v0,-1
2065         0x1443fffd,     // bne          v0,v1, 1b
2066         0x24840004,     // addiu        a0,a0,4
2067         0x03e00008,     // 2: jr        ra
2068         0x00000000,     // nop
2069 };
2070
2071 static int lightrec_replace_memset(struct lightrec_state *state, struct block *block)
2072 {
2073         unsigned int i;
2074         union code c;
2075
2076         for (i = 0; i < block->nb_ops; i++) {
2077                 c = block->opcode_list[i].c;
2078
2079                 if (c.opcode != memset_code[i])
2080                         return 0;
2081
2082                 if (i == ARRAY_SIZE(memset_code) - 1) {
2083                         /* success! */
2084                         pr_debug("Block at PC 0x%x is a memset\n", block->pc);
2085                         block_set_flags(block,
2086                                         BLOCK_IS_MEMSET | BLOCK_NEVER_COMPILE);
2087
2088                         /* Return non-zero to skip other optimizers. */
2089                         return 1;
2090                 }
2091         }
2092
2093         return 0;
2094 }
2095
2096 static int (*lightrec_optimizers[])(struct lightrec_state *state, struct block *) = {
2097         IF_OPT(OPT_REMOVE_DIV_BY_ZERO_SEQ, &lightrec_remove_div_by_zero_check_sequence),
2098         IF_OPT(OPT_REPLACE_MEMSET, &lightrec_replace_memset),
2099         IF_OPT(OPT_DETECT_IMPOSSIBLE_BRANCHES, &lightrec_detect_impossible_branches),
2100         IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_branches),
2101         IF_OPT(OPT_LOCAL_BRANCHES, &lightrec_local_branches),
2102         IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_ops),
2103         IF_OPT(OPT_SWITCH_DELAY_SLOTS, &lightrec_switch_delay_slots),
2104         IF_OPT(OPT_FLAG_IO || OPT_FLAG_STORES, &lightrec_flag_io),
2105         IF_OPT(OPT_FLAG_MULT_DIV, &lightrec_flag_mults_divs),
2106         IF_OPT(OPT_EARLY_UNLOAD, &lightrec_early_unload),
2107 };
2108
2109 int lightrec_optimize(struct lightrec_state *state, struct block *block)
2110 {
2111         unsigned int i;
2112         int ret;
2113
2114         for (i = 0; i < ARRAY_SIZE(lightrec_optimizers); i++) {
2115                 if (lightrec_optimizers[i]) {
2116                         ret = (*lightrec_optimizers[i])(state, block);
2117                         if (ret)
2118                                 return ret;
2119                 }
2120         }
2121
2122         return 0;
2123 }